from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
train_texts = ['Chinese Beijing Chinese', 'Chinese Chinese Shanghai', 'Chinese Macao', 'Tokyo Japan Chinese']
train_target = ['c','c','c','j']
test_texts = ['Chinese Chinese Chinese Tokyo Japan']
test_target = ['j']
pipeline = Pipeline([
('vectorizer', CountVectorizer()),
('classifier', MultinomialNB()) ])
pipeline.fit(train_texts, train_target)
predicted = pipeline.predict(test_texts)
print
print metrics.confusion_matrix(test_target, predicted)