# Ling/CSE 472, Spring 2009 # Assignment 3, part I # alltags.py # import the Natural Language Toolkit import nltk # get a tagged corpus tagged_sents = nltk.corpus.treebank.tagged_sents() # split it into a training set and a test set size = int(len(tagged_sents) * 0.9) train_sents = tagged_sents[:size] test_sents = tagged_sents[size:] # train a bigram tagger default_tagger = nltk.DefaultTagger('NN') unigram_tagger = nltk.UnigramTagger(train_sents, backoff=default_tagger) bigram_tagger = nltk.BigramTagger(train_sents, backoff=unigram_tagger) # evaluate the accuracy and print the results print nltk.tag.accuracy(bigram_tagger, test_sents)