In [ ]:
 
In [1]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics


train_texts = ['Chinese Beijing Chinese', 'Chinese Chinese Shanghai', 'Chinese Macao', 'Tokyo Japan Chinese']
train_target = ['c','c','c','j']
test_texts = ['Chinese Chinese Chinese Tokyo Japan']
test_target = ['j']


pipeline = Pipeline([
    ('vectorizer',  CountVectorizer()),
    ('classifier',  MultinomialNB()) ])

pipeline.fit(train_texts, train_target)
predicted = pipeline.predict(test_texts)

print 

print metrics.confusion_matrix(test_target, predicted)
[[0 0]
 [1 0]]
In [ ]:
 
In [2]:
from sklearn.linear_model import SGDClassifier
from sklearn.feature_extraction.text import TfidfTransformer

pipeline = Pipeline([('vectorizer', CountVectorizer()), 
                     ('tfidf', TfidfTransformer()),
                     ('classifier', SGDClassifier()) ]) 
pipeline.fit(train_texts, train_target)
predicted = pipeline.predict(test_texts)

print 

print metrics.confusion_matrix(test_target, predicted)   
[[1]]
In [ ]: