diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 000000000..037769ef2 Binary files /dev/null and b/.DS_Store differ diff --git a/_modules/index.html b/_modules/index.html index f19096705..a92184fa5 100644 --- a/_modules/index.html +++ b/_modules/index.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk.html b/_modules/nltk.html index 285925d42..0ef852c35 100644 --- a/_modules/nltk.html +++ b/_modules/nltk.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/app/chartparser_app.html b/_modules/nltk/app/chartparser_app.html index f3bd740cc..12d65ecf4 100644 --- a/_modules/nltk/app/chartparser_app.html +++ b/_modules/nltk/app/chartparser_app.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/app/chunkparser_app.html b/_modules/nltk/app/chunkparser_app.html index 2e17dca09..1f41df81b 100644 --- a/_modules/nltk/app/chunkparser_app.html +++ b/_modules/nltk/app/chunkparser_app.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/app/collocations_app.html b/_modules/nltk/app/collocations_app.html index b18ac7d9c..a56b056b1 100644 --- a/_modules/nltk/app/collocations_app.html +++ b/_modules/nltk/app/collocations_app.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/app/concordance_app.html b/_modules/nltk/app/concordance_app.html index 6cddbc054..11fa38bc8 100644 --- a/_modules/nltk/app/concordance_app.html +++ b/_modules/nltk/app/concordance_app.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/app/nemo_app.html b/_modules/nltk/app/nemo_app.html index 581bbb2ac..9c696a1cc 100644 --- a/_modules/nltk/app/nemo_app.html +++ b/_modules/nltk/app/nemo_app.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/app/rdparser_app.html b/_modules/nltk/app/rdparser_app.html index 7c4a19509..5603fcc61 100644 --- a/_modules/nltk/app/rdparser_app.html +++ b/_modules/nltk/app/rdparser_app.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/app/srparser_app.html b/_modules/nltk/app/srparser_app.html index 5bd70d3ce..8d69f1d51 100644 --- a/_modules/nltk/app/srparser_app.html +++ b/_modules/nltk/app/srparser_app.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/app/wordnet_app.html b/_modules/nltk/app/wordnet_app.html index 712b36286..51ef084bf 100644 --- a/_modules/nltk/app/wordnet_app.html +++ b/_modules/nltk/app/wordnet_app.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/book.html b/_modules/nltk/book.html index 5d55ea038..39aec1b7a 100644 --- a/_modules/nltk/book.html +++ b/_modules/nltk/book.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/ccg/api.html b/_modules/nltk/ccg/api.html index fdc5cffd8..77ee3e41b 100644 --- a/_modules/nltk/ccg/api.html +++ b/_modules/nltk/ccg/api.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/ccg/chart.html b/_modules/nltk/ccg/chart.html index 71d8765e6..1620f20a4 100644 --- a/_modules/nltk/ccg/chart.html +++ b/_modules/nltk/ccg/chart.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/ccg/combinator.html b/_modules/nltk/ccg/combinator.html index 0019f377c..deead5bec 100644 --- a/_modules/nltk/ccg/combinator.html +++ b/_modules/nltk/ccg/combinator.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/ccg/lexicon.html b/_modules/nltk/ccg/lexicon.html index 1d2669324..8338e8e5c 100644 --- a/_modules/nltk/ccg/lexicon.html +++ b/_modules/nltk/ccg/lexicon.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/ccg/logic.html b/_modules/nltk/ccg/logic.html index 158735ba9..257079de3 100644 --- a/_modules/nltk/ccg/logic.html +++ b/_modules/nltk/ccg/logic.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/chat.html b/_modules/nltk/chat.html index cdfad61c8..50d05d3e8 100644 --- a/_modules/nltk/chat.html +++ b/_modules/nltk/chat.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/chat/eliza.html b/_modules/nltk/chat/eliza.html index 248250811..865a7c958 100644 --- a/_modules/nltk/chat/eliza.html +++ b/_modules/nltk/chat/eliza.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/chat/iesha.html b/_modules/nltk/chat/iesha.html index 4d04a6ef7..137709f4f 100644 --- a/_modules/nltk/chat/iesha.html +++ b/_modules/nltk/chat/iesha.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/chat/rude.html b/_modules/nltk/chat/rude.html index 8c713c8da..74359ff89 100644 --- a/_modules/nltk/chat/rude.html +++ b/_modules/nltk/chat/rude.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/chat/suntsu.html b/_modules/nltk/chat/suntsu.html index bc851ae78..cdbb73037 100644 --- a/_modules/nltk/chat/suntsu.html +++ b/_modules/nltk/chat/suntsu.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/chat/util.html b/_modules/nltk/chat/util.html index 4a27e15ad..9b6ae889a 100644 --- a/_modules/nltk/chat/util.html +++ b/_modules/nltk/chat/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/chat/zen.html b/_modules/nltk/chat/zen.html index 1a246e88b..23aa863e4 100644 --- a/_modules/nltk/chat/zen.html +++ b/_modules/nltk/chat/zen.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/chunk.html b/_modules/nltk/chunk.html index 35735bbba..d3a49913a 100644 --- a/_modules/nltk/chunk.html +++ b/_modules/nltk/chunk.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/chunk/api.html b/_modules/nltk/chunk/api.html index 499657227..3e086b949 100644 --- a/_modules/nltk/chunk/api.html +++ b/_modules/nltk/chunk/api.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/chunk/named_entity.html b/_modules/nltk/chunk/named_entity.html index edea73b1b..67aca9e73 100644 --- a/_modules/nltk/chunk/named_entity.html +++ b/_modules/nltk/chunk/named_entity.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/chunk/regexp.html b/_modules/nltk/chunk/regexp.html index 4d64511b7..253d3bcd0 100644 --- a/_modules/nltk/chunk/regexp.html +++ b/_modules/nltk/chunk/regexp.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/chunk/util.html b/_modules/nltk/chunk/util.html index 69432ef25..60cc803aa 100644 --- a/_modules/nltk/chunk/util.html +++ b/_modules/nltk/chunk/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/classify/api.html b/_modules/nltk/classify/api.html index 6615d6fe5..38189afd4 100644 --- a/_modules/nltk/classify/api.html +++ b/_modules/nltk/classify/api.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/classify/decisiontree.html b/_modules/nltk/classify/decisiontree.html index 8f66f01b6..eb6add5e9 100644 --- a/_modules/nltk/classify/decisiontree.html +++ b/_modules/nltk/classify/decisiontree.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/classify/maxent.html b/_modules/nltk/classify/maxent.html index a307a00b8..2b590fe7f 100644 --- a/_modules/nltk/classify/maxent.html +++ b/_modules/nltk/classify/maxent.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/classify/megam.html b/_modules/nltk/classify/megam.html index fb1589fa0..0e34fa330 100644 --- a/_modules/nltk/classify/megam.html +++ b/_modules/nltk/classify/megam.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/classify/naivebayes.html b/_modules/nltk/classify/naivebayes.html index 93596fb0d..1fa9e46e8 100644 --- a/_modules/nltk/classify/naivebayes.html +++ b/_modules/nltk/classify/naivebayes.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/classify/positivenaivebayes.html b/_modules/nltk/classify/positivenaivebayes.html index bdceb5b6f..fb20e8383 100644 --- a/_modules/nltk/classify/positivenaivebayes.html +++ b/_modules/nltk/classify/positivenaivebayes.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/classify/rte_classify.html b/_modules/nltk/classify/rte_classify.html index 3120b22bc..c91660af3 100644 --- a/_modules/nltk/classify/rte_classify.html +++ b/_modules/nltk/classify/rte_classify.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/classify/scikitlearn.html b/_modules/nltk/classify/scikitlearn.html index 7f16f4e70..da8293dd5 100644 --- a/_modules/nltk/classify/scikitlearn.html +++ b/_modules/nltk/classify/scikitlearn.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/classify/senna.html b/_modules/nltk/classify/senna.html index 2b3b859a8..2f380abc6 100644 --- a/_modules/nltk/classify/senna.html +++ b/_modules/nltk/classify/senna.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/classify/svm.html b/_modules/nltk/classify/svm.html index d22de5496..2eadbbff7 100644 --- a/_modules/nltk/classify/svm.html +++ b/_modules/nltk/classify/svm.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/classify/tadm.html b/_modules/nltk/classify/tadm.html index f8b0a1577..350cc6d09 100644 --- a/_modules/nltk/classify/tadm.html +++ b/_modules/nltk/classify/tadm.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/classify/textcat.html b/_modules/nltk/classify/textcat.html index dcb489dc1..f4a9621b9 100644 --- a/_modules/nltk/classify/textcat.html +++ b/_modules/nltk/classify/textcat.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/classify/util.html b/_modules/nltk/classify/util.html index 7222d1f64..7841da516 100644 --- a/_modules/nltk/classify/util.html +++ b/_modules/nltk/classify/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/classify/weka.html b/_modules/nltk/classify/weka.html index a22fdc988..6dd2a7b84 100644 --- a/_modules/nltk/classify/weka.html +++ b/_modules/nltk/classify/weka.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/cluster/api.html b/_modules/nltk/cluster/api.html index a77d34ee7..61d3eef4a 100644 --- a/_modules/nltk/cluster/api.html +++ b/_modules/nltk/cluster/api.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/cluster/em.html b/_modules/nltk/cluster/em.html index e38284adc..396c5f0a5 100644 --- a/_modules/nltk/cluster/em.html +++ b/_modules/nltk/cluster/em.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/cluster/gaac.html b/_modules/nltk/cluster/gaac.html index 3b00824ce..585614311 100644 --- a/_modules/nltk/cluster/gaac.html +++ b/_modules/nltk/cluster/gaac.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/cluster/kmeans.html b/_modules/nltk/cluster/kmeans.html index 78a50f81a..804d4ae3a 100644 --- a/_modules/nltk/cluster/kmeans.html +++ b/_modules/nltk/cluster/kmeans.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/cluster/util.html b/_modules/nltk/cluster/util.html index ce6649ba7..56cfb223d 100644 --- a/_modules/nltk/cluster/util.html +++ b/_modules/nltk/cluster/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/collections.html b/_modules/nltk/collections.html index 9a2e99e24..c7642a4d3 100644 --- a/_modules/nltk/collections.html +++ b/_modules/nltk/collections.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/collocations.html b/_modules/nltk/collocations.html index 2b12eec0d..0ae6bc1a7 100644 --- a/_modules/nltk/collocations.html +++ b/_modules/nltk/collocations.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/compat.html b/_modules/nltk/compat.html index 230ed94d1..492c1ce34 100644 --- a/_modules/nltk/compat.html +++ b/_modules/nltk/compat.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus.html b/_modules/nltk/corpus.html index dec4b75ec..a94a6897f 100644 --- a/_modules/nltk/corpus.html +++ b/_modules/nltk/corpus.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/aligned.html b/_modules/nltk/corpus/reader/aligned.html index 7f7bfb799..575e26ac7 100644 --- a/_modules/nltk/corpus/reader/aligned.html +++ b/_modules/nltk/corpus/reader/aligned.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/api.html b/_modules/nltk/corpus/reader/api.html index 7d19828cb..abd125562 100644 --- a/_modules/nltk/corpus/reader/api.html +++ b/_modules/nltk/corpus/reader/api.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/bnc.html b/_modules/nltk/corpus/reader/bnc.html index 9a15c2454..2594e6bb9 100644 --- a/_modules/nltk/corpus/reader/bnc.html +++ b/_modules/nltk/corpus/reader/bnc.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/bracket_parse.html b/_modules/nltk/corpus/reader/bracket_parse.html index df6d33db2..26a6265ac 100644 --- a/_modules/nltk/corpus/reader/bracket_parse.html +++ b/_modules/nltk/corpus/reader/bracket_parse.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/categorized_sents.html b/_modules/nltk/corpus/reader/categorized_sents.html index 3ac8ca744..07ca1e76f 100644 --- a/_modules/nltk/corpus/reader/categorized_sents.html +++ b/_modules/nltk/corpus/reader/categorized_sents.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/chasen.html b/_modules/nltk/corpus/reader/chasen.html index 8ad4b79c9..2801b37f4 100644 --- a/_modules/nltk/corpus/reader/chasen.html +++ b/_modules/nltk/corpus/reader/chasen.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/childes.html b/_modules/nltk/corpus/reader/childes.html index 259b09a52..22b513337 100644 --- a/_modules/nltk/corpus/reader/childes.html +++ b/_modules/nltk/corpus/reader/childes.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/chunked.html b/_modules/nltk/corpus/reader/chunked.html index 1e411c762..2bc039ae1 100644 --- a/_modules/nltk/corpus/reader/chunked.html +++ b/_modules/nltk/corpus/reader/chunked.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/cmudict.html b/_modules/nltk/corpus/reader/cmudict.html index abbf63aa8..8576cd85f 100644 --- a/_modules/nltk/corpus/reader/cmudict.html +++ b/_modules/nltk/corpus/reader/cmudict.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/comparative_sents.html b/_modules/nltk/corpus/reader/comparative_sents.html index da046214d..3f79c3056 100644 --- a/_modules/nltk/corpus/reader/comparative_sents.html +++ b/_modules/nltk/corpus/reader/comparative_sents.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/conll.html b/_modules/nltk/corpus/reader/conll.html index b6bf36657..c1d73dded 100644 --- a/_modules/nltk/corpus/reader/conll.html +++ b/_modules/nltk/corpus/reader/conll.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/crubadan.html b/_modules/nltk/corpus/reader/crubadan.html index 76c4992c2..909ed8993 100644 --- a/_modules/nltk/corpus/reader/crubadan.html +++ b/_modules/nltk/corpus/reader/crubadan.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/dependency.html b/_modules/nltk/corpus/reader/dependency.html index d21fbaa76..b659a0fc0 100644 --- a/_modules/nltk/corpus/reader/dependency.html +++ b/_modules/nltk/corpus/reader/dependency.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/framenet.html b/_modules/nltk/corpus/reader/framenet.html index 6cfd91840..7d55e1d28 100644 --- a/_modules/nltk/corpus/reader/framenet.html +++ b/_modules/nltk/corpus/reader/framenet.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/ieer.html b/_modules/nltk/corpus/reader/ieer.html index 3c032ffa7..bcc0010a6 100644 --- a/_modules/nltk/corpus/reader/ieer.html +++ b/_modules/nltk/corpus/reader/ieer.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/indian.html b/_modules/nltk/corpus/reader/indian.html index 64a928195..7f32c4656 100644 --- a/_modules/nltk/corpus/reader/indian.html +++ b/_modules/nltk/corpus/reader/indian.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/ipipan.html b/_modules/nltk/corpus/reader/ipipan.html index e08fad6e6..dfad3bdf4 100644 --- a/_modules/nltk/corpus/reader/ipipan.html +++ b/_modules/nltk/corpus/reader/ipipan.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/knbc.html b/_modules/nltk/corpus/reader/knbc.html index 198043e89..6a0c1ff55 100644 --- a/_modules/nltk/corpus/reader/knbc.html +++ b/_modules/nltk/corpus/reader/knbc.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/lin.html b/_modules/nltk/corpus/reader/lin.html index 658f231dd..672cb64f9 100644 --- a/_modules/nltk/corpus/reader/lin.html +++ b/_modules/nltk/corpus/reader/lin.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/mte.html b/_modules/nltk/corpus/reader/mte.html index 9a3b08f53..87fea7115 100644 --- a/_modules/nltk/corpus/reader/mte.html +++ b/_modules/nltk/corpus/reader/mte.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/nkjp.html b/_modules/nltk/corpus/reader/nkjp.html index 12d11d377..59e357c5b 100644 --- a/_modules/nltk/corpus/reader/nkjp.html +++ b/_modules/nltk/corpus/reader/nkjp.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/nombank.html b/_modules/nltk/corpus/reader/nombank.html index ec2afe7cb..594bde396 100644 --- a/_modules/nltk/corpus/reader/nombank.html +++ b/_modules/nltk/corpus/reader/nombank.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/nps_chat.html b/_modules/nltk/corpus/reader/nps_chat.html index 860f1d54a..ef1e2cb8f 100644 --- a/_modules/nltk/corpus/reader/nps_chat.html +++ b/_modules/nltk/corpus/reader/nps_chat.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/opinion_lexicon.html b/_modules/nltk/corpus/reader/opinion_lexicon.html index 6a17a2f61..5a5fee699 100644 --- a/_modules/nltk/corpus/reader/opinion_lexicon.html +++ b/_modules/nltk/corpus/reader/opinion_lexicon.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/panlex_lite.html b/_modules/nltk/corpus/reader/panlex_lite.html index fee4717b3..db409df3b 100644 --- a/_modules/nltk/corpus/reader/panlex_lite.html +++ b/_modules/nltk/corpus/reader/panlex_lite.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/panlex_swadesh.html b/_modules/nltk/corpus/reader/panlex_swadesh.html index f09f67567..36eed6c7d 100644 --- a/_modules/nltk/corpus/reader/panlex_swadesh.html +++ b/_modules/nltk/corpus/reader/panlex_swadesh.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/pl196x.html b/_modules/nltk/corpus/reader/pl196x.html index 3bdabbcc4..3157c9ba4 100644 --- a/_modules/nltk/corpus/reader/pl196x.html +++ b/_modules/nltk/corpus/reader/pl196x.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/plaintext.html b/_modules/nltk/corpus/reader/plaintext.html index cc631a3b1..7a213d586 100644 --- a/_modules/nltk/corpus/reader/plaintext.html +++ b/_modules/nltk/corpus/reader/plaintext.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/ppattach.html b/_modules/nltk/corpus/reader/ppattach.html index 4b95ca207..4451bec6e 100644 --- a/_modules/nltk/corpus/reader/ppattach.html +++ b/_modules/nltk/corpus/reader/ppattach.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/propbank.html b/_modules/nltk/corpus/reader/propbank.html index 058c9641a..15ad8f27c 100644 --- a/_modules/nltk/corpus/reader/propbank.html +++ b/_modules/nltk/corpus/reader/propbank.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/pros_cons.html b/_modules/nltk/corpus/reader/pros_cons.html index 2a9660395..6723c940e 100644 --- a/_modules/nltk/corpus/reader/pros_cons.html +++ b/_modules/nltk/corpus/reader/pros_cons.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/reviews.html b/_modules/nltk/corpus/reader/reviews.html index d2b62c98d..debb3e196 100644 --- a/_modules/nltk/corpus/reader/reviews.html +++ b/_modules/nltk/corpus/reader/reviews.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/rte.html b/_modules/nltk/corpus/reader/rte.html index 31e4545b2..0691e8688 100644 --- a/_modules/nltk/corpus/reader/rte.html +++ b/_modules/nltk/corpus/reader/rte.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/semcor.html b/_modules/nltk/corpus/reader/semcor.html index 7ec86018e..626141781 100644 --- a/_modules/nltk/corpus/reader/semcor.html +++ b/_modules/nltk/corpus/reader/semcor.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/senseval.html b/_modules/nltk/corpus/reader/senseval.html index d97f2e550..d835626cc 100644 --- a/_modules/nltk/corpus/reader/senseval.html +++ b/_modules/nltk/corpus/reader/senseval.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/sentiwordnet.html b/_modules/nltk/corpus/reader/sentiwordnet.html index 5d29ff146..5e04d286e 100644 --- a/_modules/nltk/corpus/reader/sentiwordnet.html +++ b/_modules/nltk/corpus/reader/sentiwordnet.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/sinica_treebank.html b/_modules/nltk/corpus/reader/sinica_treebank.html index e2e4875eb..738a2a481 100644 --- a/_modules/nltk/corpus/reader/sinica_treebank.html +++ b/_modules/nltk/corpus/reader/sinica_treebank.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/string_category.html b/_modules/nltk/corpus/reader/string_category.html index f73eacedd..effd1de49 100644 --- a/_modules/nltk/corpus/reader/string_category.html +++ b/_modules/nltk/corpus/reader/string_category.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/switchboard.html b/_modules/nltk/corpus/reader/switchboard.html index 51fec4f30..965c46a0b 100644 --- a/_modules/nltk/corpus/reader/switchboard.html +++ b/_modules/nltk/corpus/reader/switchboard.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/tagged.html b/_modules/nltk/corpus/reader/tagged.html index b6aed97b3..fa6462e1e 100644 --- a/_modules/nltk/corpus/reader/tagged.html +++ b/_modules/nltk/corpus/reader/tagged.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/timit.html b/_modules/nltk/corpus/reader/timit.html index afcb2b11a..7a2559202 100644 --- a/_modules/nltk/corpus/reader/timit.html +++ b/_modules/nltk/corpus/reader/timit.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/toolbox.html b/_modules/nltk/corpus/reader/toolbox.html index d17a84d32..d500cc8d8 100644 --- a/_modules/nltk/corpus/reader/toolbox.html +++ b/_modules/nltk/corpus/reader/toolbox.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/twitter.html b/_modules/nltk/corpus/reader/twitter.html index d0cbf93e6..11506de41 100644 --- a/_modules/nltk/corpus/reader/twitter.html +++ b/_modules/nltk/corpus/reader/twitter.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/udhr.html b/_modules/nltk/corpus/reader/udhr.html index 35c65b4c3..744de321a 100644 --- a/_modules/nltk/corpus/reader/udhr.html +++ b/_modules/nltk/corpus/reader/udhr.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/util.html b/_modules/nltk/corpus/reader/util.html index 87c383866..d9fccee42 100644 --- a/_modules/nltk/corpus/reader/util.html +++ b/_modules/nltk/corpus/reader/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/verbnet.html b/_modules/nltk/corpus/reader/verbnet.html index bd6c3cc97..c443c11c6 100644 --- a/_modules/nltk/corpus/reader/verbnet.html +++ b/_modules/nltk/corpus/reader/verbnet.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/wordlist.html b/_modules/nltk/corpus/reader/wordlist.html index 736a2d7f2..1c11fa0ea 100644 --- a/_modules/nltk/corpus/reader/wordlist.html +++ b/_modules/nltk/corpus/reader/wordlist.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/wordnet.html b/_modules/nltk/corpus/reader/wordnet.html index 18f568ff4..a1d8ca7d2 100644 --- a/_modules/nltk/corpus/reader/wordnet.html +++ b/_modules/nltk/corpus/reader/wordnet.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/xmldocs.html b/_modules/nltk/corpus/reader/xmldocs.html index 9e6dec5dc..3bad9ce63 100644 --- a/_modules/nltk/corpus/reader/xmldocs.html +++ b/_modules/nltk/corpus/reader/xmldocs.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/reader/ycoe.html b/_modules/nltk/corpus/reader/ycoe.html index 4bce03442..3b334946c 100644 --- a/_modules/nltk/corpus/reader/ycoe.html +++ b/_modules/nltk/corpus/reader/ycoe.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/corpus/util.html b/_modules/nltk/corpus/util.html index 6bcb8b3b0..8d0a4ebc7 100644 --- a/_modules/nltk/corpus/util.html +++ b/_modules/nltk/corpus/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/data.html b/_modules/nltk/data.html index 0697a2880..5a6d6e4da 100644 --- a/_modules/nltk/data.html +++ b/_modules/nltk/data.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/decorators.html b/_modules/nltk/decorators.html index c6706ccd4..8a54c6707 100644 --- a/_modules/nltk/decorators.html +++ b/_modules/nltk/decorators.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/downloader.html b/_modules/nltk/downloader.html index 49e9e889e..f0e605b24 100644 --- a/_modules/nltk/downloader.html +++ b/_modules/nltk/downloader.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/draw/cfg.html b/_modules/nltk/draw/cfg.html index fd0091bee..b64a4fd67 100644 --- a/_modules/nltk/draw/cfg.html +++ b/_modules/nltk/draw/cfg.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/draw/dispersion.html b/_modules/nltk/draw/dispersion.html index fdfba2feb..a07859023 100644 --- a/_modules/nltk/draw/dispersion.html +++ b/_modules/nltk/draw/dispersion.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/draw/table.html b/_modules/nltk/draw/table.html index 7406456a2..00057d0f7 100644 --- a/_modules/nltk/draw/table.html +++ b/_modules/nltk/draw/table.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/draw/tree.html b/_modules/nltk/draw/tree.html index 7b29f23db..e8dbec5c5 100644 --- a/_modules/nltk/draw/tree.html +++ b/_modules/nltk/draw/tree.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/draw/util.html b/_modules/nltk/draw/util.html index dc8efbcc7..39ae51ce1 100644 --- a/_modules/nltk/draw/util.html +++ b/_modules/nltk/draw/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/featstruct.html b/_modules/nltk/featstruct.html index 46fcafd83..3903faa65 100644 --- a/_modules/nltk/featstruct.html +++ b/_modules/nltk/featstruct.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/grammar.html b/_modules/nltk/grammar.html index 22a7bc66f..d3fe5badf 100644 --- a/_modules/nltk/grammar.html +++ b/_modules/nltk/grammar.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/help.html b/_modules/nltk/help.html index 1aadfd261..c7bf285f9 100644 --- a/_modules/nltk/help.html +++ b/_modules/nltk/help.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/inference/api.html b/_modules/nltk/inference/api.html index aaa079aa0..7ebc3ef80 100644 --- a/_modules/nltk/inference/api.html +++ b/_modules/nltk/inference/api.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/inference/discourse.html b/_modules/nltk/inference/discourse.html index 62933744f..37a633f40 100644 --- a/_modules/nltk/inference/discourse.html +++ b/_modules/nltk/inference/discourse.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/inference/mace.html b/_modules/nltk/inference/mace.html index fcd2e64f4..5ff7f3c92 100644 --- a/_modules/nltk/inference/mace.html +++ b/_modules/nltk/inference/mace.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/inference/nonmonotonic.html b/_modules/nltk/inference/nonmonotonic.html index 6b84709d9..c084f4d6f 100644 --- a/_modules/nltk/inference/nonmonotonic.html +++ b/_modules/nltk/inference/nonmonotonic.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/inference/prover9.html b/_modules/nltk/inference/prover9.html index 22eaa8aee..6bfbaffc7 100644 --- a/_modules/nltk/inference/prover9.html +++ b/_modules/nltk/inference/prover9.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/inference/resolution.html b/_modules/nltk/inference/resolution.html index 207ec35e2..01718701d 100644 --- a/_modules/nltk/inference/resolution.html +++ b/_modules/nltk/inference/resolution.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/inference/tableau.html b/_modules/nltk/inference/tableau.html index dba7a15ed..890540858 100644 --- a/_modules/nltk/inference/tableau.html +++ b/_modules/nltk/inference/tableau.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/internals.html b/_modules/nltk/internals.html index 77c951c7a..6bcb80733 100644 --- a/_modules/nltk/internals.html +++ b/_modules/nltk/internals.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/jsontags.html b/_modules/nltk/jsontags.html index 40171863f..862fa66b5 100644 --- a/_modules/nltk/jsontags.html +++ b/_modules/nltk/jsontags.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/lazyimport.html b/_modules/nltk/lazyimport.html index 9d7415af0..d0eb85534 100644 --- a/_modules/nltk/lazyimport.html +++ b/_modules/nltk/lazyimport.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/lm/api.html b/_modules/nltk/lm/api.html index 039147f89..1e6848707 100644 --- a/_modules/nltk/lm/api.html +++ b/_modules/nltk/lm/api.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/lm/counter.html b/_modules/nltk/lm/counter.html index 76da9f7d2..88584d6fe 100644 --- a/_modules/nltk/lm/counter.html +++ b/_modules/nltk/lm/counter.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/lm/models.html b/_modules/nltk/lm/models.html index 7a451c701..bcd208fd9 100644 --- a/_modules/nltk/lm/models.html +++ b/_modules/nltk/lm/models.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/lm/preprocessing.html b/_modules/nltk/lm/preprocessing.html index 3053393e4..3b15946c1 100644 --- a/_modules/nltk/lm/preprocessing.html +++ b/_modules/nltk/lm/preprocessing.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/lm/smoothing.html b/_modules/nltk/lm/smoothing.html index 902194466..8811c15cb 100644 --- a/_modules/nltk/lm/smoothing.html +++ b/_modules/nltk/lm/smoothing.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/lm/util.html b/_modules/nltk/lm/util.html index 7c558bebb..69668045d 100644 --- a/_modules/nltk/lm/util.html +++ b/_modules/nltk/lm/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/lm/vocabulary.html b/_modules/nltk/lm/vocabulary.html index f1d30f529..6e5fe24d8 100644 --- a/_modules/nltk/lm/vocabulary.html +++ b/_modules/nltk/lm/vocabulary.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/metrics/agreement.html b/_modules/nltk/metrics/agreement.html index cd5d9c53f..57f774137 100644 --- a/_modules/nltk/metrics/agreement.html +++ b/_modules/nltk/metrics/agreement.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/metrics/aline.html b/_modules/nltk/metrics/aline.html index dc615197a..59d7ef361 100644 --- a/_modules/nltk/metrics/aline.html +++ b/_modules/nltk/metrics/aline.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/metrics/association.html b/_modules/nltk/metrics/association.html index 9472a7846..4b53d483e 100644 --- a/_modules/nltk/metrics/association.html +++ b/_modules/nltk/metrics/association.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/metrics/confusionmatrix.html b/_modules/nltk/metrics/confusionmatrix.html index 4e017bb34..d486fa0fb 100644 --- a/_modules/nltk/metrics/confusionmatrix.html +++ b/_modules/nltk/metrics/confusionmatrix.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/metrics/distance.html b/_modules/nltk/metrics/distance.html index 912e37fa3..7a1c0e6a5 100644 --- a/_modules/nltk/metrics/distance.html +++ b/_modules/nltk/metrics/distance.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/metrics/paice.html b/_modules/nltk/metrics/paice.html index 903eae174..061579a0e 100644 --- a/_modules/nltk/metrics/paice.html +++ b/_modules/nltk/metrics/paice.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/metrics/scores.html b/_modules/nltk/metrics/scores.html index 6e38bde16..8dc733276 100644 --- a/_modules/nltk/metrics/scores.html +++ b/_modules/nltk/metrics/scores.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/metrics/segmentation.html b/_modules/nltk/metrics/segmentation.html index cfaa25d50..944a69900 100644 --- a/_modules/nltk/metrics/segmentation.html +++ b/_modules/nltk/metrics/segmentation.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/metrics/spearman.html b/_modules/nltk/metrics/spearman.html index 67b8172c6..408e63a69 100644 --- a/_modules/nltk/metrics/spearman.html +++ b/_modules/nltk/metrics/spearman.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/misc/babelfish.html b/_modules/nltk/misc/babelfish.html index 0794283ad..181add2fd 100644 --- a/_modules/nltk/misc/babelfish.html +++ b/_modules/nltk/misc/babelfish.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/misc/chomsky.html b/_modules/nltk/misc/chomsky.html index 651911160..be2900e90 100644 --- a/_modules/nltk/misc/chomsky.html +++ b/_modules/nltk/misc/chomsky.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/misc/minimalset.html b/_modules/nltk/misc/minimalset.html index b5ce5f5eb..1f7d531d7 100644 --- a/_modules/nltk/misc/minimalset.html +++ b/_modules/nltk/misc/minimalset.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/misc/sort.html b/_modules/nltk/misc/sort.html index 37f745917..29ddc43f8 100644 --- a/_modules/nltk/misc/sort.html +++ b/_modules/nltk/misc/sort.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/misc/wordfinder.html b/_modules/nltk/misc/wordfinder.html index 5a559e33b..16de90304 100644 --- a/_modules/nltk/misc/wordfinder.html +++ b/_modules/nltk/misc/wordfinder.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/api.html b/_modules/nltk/parse/api.html index dbd71b088..04e0667c8 100644 --- a/_modules/nltk/parse/api.html +++ b/_modules/nltk/parse/api.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/bllip.html b/_modules/nltk/parse/bllip.html index b3a7dcaba..096242e79 100644 --- a/_modules/nltk/parse/bllip.html +++ b/_modules/nltk/parse/bllip.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/chart.html b/_modules/nltk/parse/chart.html index c1004cc81..f5ba5c772 100644 --- a/_modules/nltk/parse/chart.html +++ b/_modules/nltk/parse/chart.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/corenlp.html b/_modules/nltk/parse/corenlp.html index 04082006d..746ab09fb 100644 --- a/_modules/nltk/parse/corenlp.html +++ b/_modules/nltk/parse/corenlp.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/dependencygraph.html b/_modules/nltk/parse/dependencygraph.html index b6f4d67e2..b68be9de7 100644 --- a/_modules/nltk/parse/dependencygraph.html +++ b/_modules/nltk/parse/dependencygraph.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/earleychart.html b/_modules/nltk/parse/earleychart.html index 489d91ff0..05310577f 100644 --- a/_modules/nltk/parse/earleychart.html +++ b/_modules/nltk/parse/earleychart.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/evaluate.html b/_modules/nltk/parse/evaluate.html index 834ae43de..6ceb9c31c 100644 --- a/_modules/nltk/parse/evaluate.html +++ b/_modules/nltk/parse/evaluate.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/featurechart.html b/_modules/nltk/parse/featurechart.html index 2a20d1995..776991919 100644 --- a/_modules/nltk/parse/featurechart.html +++ b/_modules/nltk/parse/featurechart.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/generate.html b/_modules/nltk/parse/generate.html index a8ede199e..a34899557 100644 --- a/_modules/nltk/parse/generate.html +++ b/_modules/nltk/parse/generate.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/malt.html b/_modules/nltk/parse/malt.html index ecd566148..61afd32e4 100644 --- a/_modules/nltk/parse/malt.html +++ b/_modules/nltk/parse/malt.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/nonprojectivedependencyparser.html b/_modules/nltk/parse/nonprojectivedependencyparser.html index 6820160ab..3a0cb5952 100644 --- a/_modules/nltk/parse/nonprojectivedependencyparser.html +++ b/_modules/nltk/parse/nonprojectivedependencyparser.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/pchart.html b/_modules/nltk/parse/pchart.html index 31814db77..2377b9a28 100644 --- a/_modules/nltk/parse/pchart.html +++ b/_modules/nltk/parse/pchart.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/projectivedependencyparser.html b/_modules/nltk/parse/projectivedependencyparser.html index 4fff180a3..6bd4fe962 100644 --- a/_modules/nltk/parse/projectivedependencyparser.html +++ b/_modules/nltk/parse/projectivedependencyparser.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/recursivedescent.html b/_modules/nltk/parse/recursivedescent.html index 64f837951..dfe6bc17e 100644 --- a/_modules/nltk/parse/recursivedescent.html +++ b/_modules/nltk/parse/recursivedescent.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/shiftreduce.html b/_modules/nltk/parse/shiftreduce.html index f186f7440..a73daed36 100644 --- a/_modules/nltk/parse/shiftreduce.html +++ b/_modules/nltk/parse/shiftreduce.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/stanford.html b/_modules/nltk/parse/stanford.html index 8121ed0f1..985d380dd 100644 --- a/_modules/nltk/parse/stanford.html +++ b/_modules/nltk/parse/stanford.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/transitionparser.html b/_modules/nltk/parse/transitionparser.html index 1564e34df..e76b5907b 100644 --- a/_modules/nltk/parse/transitionparser.html +++ b/_modules/nltk/parse/transitionparser.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/util.html b/_modules/nltk/parse/util.html index f095e9f34..6f629173b 100644 --- a/_modules/nltk/parse/util.html +++ b/_modules/nltk/parse/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/parse/viterbi.html b/_modules/nltk/parse/viterbi.html index df1aa8cf6..5ce49953b 100644 --- a/_modules/nltk/parse/viterbi.html +++ b/_modules/nltk/parse/viterbi.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/probability.html b/_modules/nltk/probability.html index 9e5c5f787..7715d0009 100644 --- a/_modules/nltk/probability.html +++ b/_modules/nltk/probability.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sem/boxer.html b/_modules/nltk/sem/boxer.html index a73f164aa..d83e5b97a 100644 --- a/_modules/nltk/sem/boxer.html +++ b/_modules/nltk/sem/boxer.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sem/chat80.html b/_modules/nltk/sem/chat80.html index 031cdde4f..fd4a72121 100644 --- a/_modules/nltk/sem/chat80.html +++ b/_modules/nltk/sem/chat80.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sem/cooper_storage.html b/_modules/nltk/sem/cooper_storage.html index 2177b6a11..22d24efdc 100644 --- a/_modules/nltk/sem/cooper_storage.html +++ b/_modules/nltk/sem/cooper_storage.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sem/drt.html b/_modules/nltk/sem/drt.html index e647ef210..a49804821 100644 --- a/_modules/nltk/sem/drt.html +++ b/_modules/nltk/sem/drt.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sem/drt_glue_demo.html b/_modules/nltk/sem/drt_glue_demo.html index 76e3b544f..07553a509 100644 --- a/_modules/nltk/sem/drt_glue_demo.html +++ b/_modules/nltk/sem/drt_glue_demo.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sem/evaluate.html b/_modules/nltk/sem/evaluate.html index bd75492fc..053f9d9cc 100644 --- a/_modules/nltk/sem/evaluate.html +++ b/_modules/nltk/sem/evaluate.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sem/glue.html b/_modules/nltk/sem/glue.html index e32831019..e98585160 100644 --- a/_modules/nltk/sem/glue.html +++ b/_modules/nltk/sem/glue.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sem/hole.html b/_modules/nltk/sem/hole.html index 5a9c94f75..b18d09e0a 100644 --- a/_modules/nltk/sem/hole.html +++ b/_modules/nltk/sem/hole.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sem/lfg.html b/_modules/nltk/sem/lfg.html index e0f120f09..05f9560ba 100644 --- a/_modules/nltk/sem/lfg.html +++ b/_modules/nltk/sem/lfg.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sem/linearlogic.html b/_modules/nltk/sem/linearlogic.html index 8ae784dcd..f203fd431 100644 --- a/_modules/nltk/sem/linearlogic.html +++ b/_modules/nltk/sem/linearlogic.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sem/logic.html b/_modules/nltk/sem/logic.html index 2a11eea27..be4c71eaa 100644 --- a/_modules/nltk/sem/logic.html +++ b/_modules/nltk/sem/logic.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sem/relextract.html b/_modules/nltk/sem/relextract.html index bcce60427..adf74f04c 100644 --- a/_modules/nltk/sem/relextract.html +++ b/_modules/nltk/sem/relextract.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sem/skolemize.html b/_modules/nltk/sem/skolemize.html index 304b031e9..5d45be3b0 100644 --- a/_modules/nltk/sem/skolemize.html +++ b/_modules/nltk/sem/skolemize.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sem/util.html b/_modules/nltk/sem/util.html index 0ef252fc6..63369b4c0 100644 --- a/_modules/nltk/sem/util.html +++ b/_modules/nltk/sem/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sentiment/sentiment_analyzer.html b/_modules/nltk/sentiment/sentiment_analyzer.html index 13cc3b0c1..57bc84937 100644 --- a/_modules/nltk/sentiment/sentiment_analyzer.html +++ b/_modules/nltk/sentiment/sentiment_analyzer.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sentiment/util.html b/_modules/nltk/sentiment/util.html index b70880e3d..5386246c2 100644 --- a/_modules/nltk/sentiment/util.html +++ b/_modules/nltk/sentiment/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/sentiment/vader.html b/_modules/nltk/sentiment/vader.html index 782411446..1bf604a91 100644 --- a/_modules/nltk/sentiment/vader.html +++ b/_modules/nltk/sentiment/vader.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/stem/api.html b/_modules/nltk/stem/api.html index 6cc74d859..042c56c3c 100644 --- a/_modules/nltk/stem/api.html +++ b/_modules/nltk/stem/api.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/stem/arlstem.html b/_modules/nltk/stem/arlstem.html index d7cae7407..01a65808a 100644 --- a/_modules/nltk/stem/arlstem.html +++ b/_modules/nltk/stem/arlstem.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/stem/arlstem2.html b/_modules/nltk/stem/arlstem2.html index a08aaf4e6..ac32e2f72 100644 --- a/_modules/nltk/stem/arlstem2.html +++ b/_modules/nltk/stem/arlstem2.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/stem/cistem.html b/_modules/nltk/stem/cistem.html index 4b3518c6f..478d6e3e2 100644 --- a/_modules/nltk/stem/cistem.html +++ b/_modules/nltk/stem/cistem.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/stem/isri.html b/_modules/nltk/stem/isri.html index cae3b714a..fba78510f 100644 --- a/_modules/nltk/stem/isri.html +++ b/_modules/nltk/stem/isri.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/stem/lancaster.html b/_modules/nltk/stem/lancaster.html index 359c654e8..7dc6173c7 100644 --- a/_modules/nltk/stem/lancaster.html +++ b/_modules/nltk/stem/lancaster.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/stem/porter.html b/_modules/nltk/stem/porter.html index 7764a906a..3b0266a38 100644 --- a/_modules/nltk/stem/porter.html +++ b/_modules/nltk/stem/porter.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/stem/regexp.html b/_modules/nltk/stem/regexp.html index 725c026a3..339d13f85 100644 --- a/_modules/nltk/stem/regexp.html +++ b/_modules/nltk/stem/regexp.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/stem/rslp.html b/_modules/nltk/stem/rslp.html index 1a58f7257..5b276fbb2 100644 --- a/_modules/nltk/stem/rslp.html +++ b/_modules/nltk/stem/rslp.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/stem/snowball.html b/_modules/nltk/stem/snowball.html index 651522ab0..755a552ca 100644 --- a/_modules/nltk/stem/snowball.html +++ b/_modules/nltk/stem/snowball.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/stem/util.html b/_modules/nltk/stem/util.html index 4d1385cb1..e382abaaa 100644 --- a/_modules/nltk/stem/util.html +++ b/_modules/nltk/stem/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/stem/wordnet.html b/_modules/nltk/stem/wordnet.html index 7659ce5a8..d61f4f694 100644 --- a/_modules/nltk/stem/wordnet.html +++ b/_modules/nltk/stem/wordnet.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tag.html b/_modules/nltk/tag.html index ab5aaff6f..f0210184f 100644 --- a/_modules/nltk/tag.html +++ b/_modules/nltk/tag.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tag/api.html b/_modules/nltk/tag/api.html index 18d0b0edd..de7cf9751 100644 --- a/_modules/nltk/tag/api.html +++ b/_modules/nltk/tag/api.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tag/brill.html b/_modules/nltk/tag/brill.html index 264927584..3bc40fc64 100644 --- a/_modules/nltk/tag/brill.html +++ b/_modules/nltk/tag/brill.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tag/brill_trainer.html b/_modules/nltk/tag/brill_trainer.html index 0b43f8872..8eb7e7ffb 100644 --- a/_modules/nltk/tag/brill_trainer.html +++ b/_modules/nltk/tag/brill_trainer.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tag/crf.html b/_modules/nltk/tag/crf.html index 7ea3b2a18..fff80b871 100644 --- a/_modules/nltk/tag/crf.html +++ b/_modules/nltk/tag/crf.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tag/hmm.html b/_modules/nltk/tag/hmm.html index e35c8f4fc..c20c6cb9d 100644 --- a/_modules/nltk/tag/hmm.html +++ b/_modules/nltk/tag/hmm.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tag/hunpos.html b/_modules/nltk/tag/hunpos.html index e535e7c6a..faff65b28 100644 --- a/_modules/nltk/tag/hunpos.html +++ b/_modules/nltk/tag/hunpos.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tag/mapping.html b/_modules/nltk/tag/mapping.html index d87703960..5ebd78b73 100644 --- a/_modules/nltk/tag/mapping.html +++ b/_modules/nltk/tag/mapping.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tag/perceptron.html b/_modules/nltk/tag/perceptron.html index daeab2d0b..85f8ff440 100644 --- a/_modules/nltk/tag/perceptron.html +++ b/_modules/nltk/tag/perceptron.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tag/senna.html b/_modules/nltk/tag/senna.html index f5c16f8ec..65a8ba49d 100644 --- a/_modules/nltk/tag/senna.html +++ b/_modules/nltk/tag/senna.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tag/sequential.html b/_modules/nltk/tag/sequential.html index 6916964aa..e8bec700a 100644 --- a/_modules/nltk/tag/sequential.html +++ b/_modules/nltk/tag/sequential.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tag/stanford.html b/_modules/nltk/tag/stanford.html index 620030218..3eebde76a 100644 --- a/_modules/nltk/tag/stanford.html +++ b/_modules/nltk/tag/stanford.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tag/tnt.html b/_modules/nltk/tag/tnt.html index 30fbb3bd3..353b22bb3 100644 --- a/_modules/nltk/tag/tnt.html +++ b/_modules/nltk/tag/tnt.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tag/util.html b/_modules/nltk/tag/util.html index 990c76e96..a4619b8a2 100644 --- a/_modules/nltk/tag/util.html +++ b/_modules/nltk/tag/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tbl/demo.html b/_modules/nltk/tbl/demo.html index 9365f0459..596f4747b 100644 --- a/_modules/nltk/tbl/demo.html +++ b/_modules/nltk/tbl/demo.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tbl/erroranalysis.html b/_modules/nltk/tbl/erroranalysis.html index c62f32a73..91c418795 100644 --- a/_modules/nltk/tbl/erroranalysis.html +++ b/_modules/nltk/tbl/erroranalysis.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tbl/feature.html b/_modules/nltk/tbl/feature.html index 456398247..0b1330568 100644 --- a/_modules/nltk/tbl/feature.html +++ b/_modules/nltk/tbl/feature.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tbl/rule.html b/_modules/nltk/tbl/rule.html index 64a6b5acc..50d750367 100644 --- a/_modules/nltk/tbl/rule.html +++ b/_modules/nltk/tbl/rule.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tbl/template.html b/_modules/nltk/tbl/template.html index e6c866f08..c295095d5 100644 --- a/_modules/nltk/tbl/template.html +++ b/_modules/nltk/tbl/template.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/all.html b/_modules/nltk/test/all.html index ea0d5c1b7..36161772f 100644 --- a/_modules/nltk/test/all.html +++ b/_modules/nltk/test/all.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/childes_fixt.html b/_modules/nltk/test/childes_fixt.html index 6e9ef6af2..5457d22b4 100644 --- a/_modules/nltk/test/childes_fixt.html +++ b/_modules/nltk/test/childes_fixt.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/classify_fixt.html b/_modules/nltk/test/classify_fixt.html index 658d3967b..49b662d2d 100644 --- a/_modules/nltk/test/classify_fixt.html +++ b/_modules/nltk/test/classify_fixt.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/discourse_fixt.html b/_modules/nltk/test/discourse_fixt.html index 44cda6636..fea0e69c0 100644 --- a/_modules/nltk/test/discourse_fixt.html +++ b/_modules/nltk/test/discourse_fixt.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/gensim_fixt.html b/_modules/nltk/test/gensim_fixt.html index 6948ca1a0..3a6affa9d 100644 --- a/_modules/nltk/test/gensim_fixt.html +++ b/_modules/nltk/test/gensim_fixt.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/gluesemantics_malt_fixt.html b/_modules/nltk/test/gluesemantics_malt_fixt.html index c6c104e80..9d6b95f56 100644 --- a/_modules/nltk/test/gluesemantics_malt_fixt.html +++ b/_modules/nltk/test/gluesemantics_malt_fixt.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/inference_fixt.html b/_modules/nltk/test/inference_fixt.html index 31fa6dca5..1958dc59b 100644 --- a/_modules/nltk/test/inference_fixt.html +++ b/_modules/nltk/test/inference_fixt.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/nonmonotonic_fixt.html b/_modules/nltk/test/nonmonotonic_fixt.html index 78706770b..9749e228e 100644 --- a/_modules/nltk/test/nonmonotonic_fixt.html +++ b/_modules/nltk/test/nonmonotonic_fixt.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/portuguese_en_fixt.html b/_modules/nltk/test/portuguese_en_fixt.html index 2efccbd5e..bf40f7241 100644 --- a/_modules/nltk/test/portuguese_en_fixt.html +++ b/_modules/nltk/test/portuguese_en_fixt.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/probability_fixt.html b/_modules/nltk/test/probability_fixt.html index 376b9bc38..7f5f59bb3 100644 --- a/_modules/nltk/test/probability_fixt.html +++ b/_modules/nltk/test/probability_fixt.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/lm/test_preprocessing.html b/_modules/nltk/test/unit/lm/test_preprocessing.html index 6999e25d0..f4ba5877c 100644 --- a/_modules/nltk/test/unit/lm/test_preprocessing.html +++ b/_modules/nltk/test/unit/lm/test_preprocessing.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/lm/test_vocabulary.html b/_modules/nltk/test/unit/lm/test_vocabulary.html index 0bb38ae0a..678d99348 100644 --- a/_modules/nltk/test/unit/lm/test_vocabulary.html +++ b/_modules/nltk/test/unit/lm/test_vocabulary.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_aline.html b/_modules/nltk/test/unit/test_aline.html index a409a7bc6..6b6baff60 100644 --- a/_modules/nltk/test/unit/test_aline.html +++ b/_modules/nltk/test/unit/test_aline.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_brill.html b/_modules/nltk/test/unit/test_brill.html index 5310bfb8d..b671b6f16 100644 --- a/_modules/nltk/test/unit/test_brill.html +++ b/_modules/nltk/test/unit/test_brill.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_cfg2chomsky.html b/_modules/nltk/test/unit/test_cfg2chomsky.html index 71504615d..7649298da 100644 --- a/_modules/nltk/test/unit/test_cfg2chomsky.html +++ b/_modules/nltk/test/unit/test_cfg2chomsky.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_chunk.html b/_modules/nltk/test/unit/test_chunk.html index f4d495e99..2f49f2b61 100644 --- a/_modules/nltk/test/unit/test_chunk.html +++ b/_modules/nltk/test/unit/test_chunk.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_collocations.html b/_modules/nltk/test/unit/test_collocations.html index 352aa876e..16d0c2c8c 100644 --- a/_modules/nltk/test/unit/test_collocations.html +++ b/_modules/nltk/test/unit/test_collocations.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_concordance.html b/_modules/nltk/test/unit/test_concordance.html index d0acf40e3..468ca7d72 100644 --- a/_modules/nltk/test/unit/test_concordance.html +++ b/_modules/nltk/test/unit/test_concordance.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_corpus_views.html b/_modules/nltk/test/unit/test_corpus_views.html index 38b948ee9..1c496d484 100644 --- a/_modules/nltk/test/unit/test_corpus_views.html +++ b/_modules/nltk/test/unit/test_corpus_views.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_disagreement.html b/_modules/nltk/test/unit/test_disagreement.html index c975795a8..bde814aac 100644 --- a/_modules/nltk/test/unit/test_disagreement.html +++ b/_modules/nltk/test/unit/test_disagreement.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_freqdist.html b/_modules/nltk/test/unit/test_freqdist.html index fcb7b6f12..f8add99f8 100644 --- a/_modules/nltk/test/unit/test_freqdist.html +++ b/_modules/nltk/test/unit/test_freqdist.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_json_serialization.html b/_modules/nltk/test/unit/test_json_serialization.html index 140585971..76a7f11a0 100644 --- a/_modules/nltk/test/unit/test_json_serialization.html +++ b/_modules/nltk/test/unit/test_json_serialization.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_metrics.html b/_modules/nltk/test/unit/test_metrics.html index afc3a8ce7..f7943f2de 100644 --- a/_modules/nltk/test/unit/test_metrics.html +++ b/_modules/nltk/test/unit/test_metrics.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_naivebayes.html b/_modules/nltk/test/unit/test_naivebayes.html index dc3226857..17030c206 100644 --- a/_modules/nltk/test/unit/test_naivebayes.html +++ b/_modules/nltk/test/unit/test_naivebayes.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_nombank.html b/_modules/nltk/test/unit/test_nombank.html index 9e0ce7579..336669d61 100644 --- a/_modules/nltk/test/unit/test_nombank.html +++ b/_modules/nltk/test/unit/test_nombank.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_pl196x.html b/_modules/nltk/test/unit/test_pl196x.html index a5b05f373..6b8fde1c0 100644 --- a/_modules/nltk/test/unit/test_pl196x.html +++ b/_modules/nltk/test/unit/test_pl196x.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_pos_tag.html b/_modules/nltk/test/unit/test_pos_tag.html index 84474bf99..e0d557431 100644 --- a/_modules/nltk/test/unit/test_pos_tag.html +++ b/_modules/nltk/test/unit/test_pos_tag.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_ribes.html b/_modules/nltk/test/unit/test_ribes.html index 16c1f8ae5..5a368bc4e 100644 --- a/_modules/nltk/test/unit/test_ribes.html +++ b/_modules/nltk/test/unit/test_ribes.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_senna.html b/_modules/nltk/test/unit/test_senna.html index 7abe98456..c3285b39d 100644 --- a/_modules/nltk/test/unit/test_senna.html +++ b/_modules/nltk/test/unit/test_senna.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_stem.html b/_modules/nltk/test/unit/test_stem.html index 93993898e..b556a9d88 100644 --- a/_modules/nltk/test/unit/test_stem.html +++ b/_modules/nltk/test/unit/test_stem.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_tag.html b/_modules/nltk/test/unit/test_tag.html index 8afa6cec7..4a214a84b 100644 --- a/_modules/nltk/test/unit/test_tag.html +++ b/_modules/nltk/test/unit/test_tag.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_tgrep.html b/_modules/nltk/test/unit/test_tgrep.html index 574f9ad3a..a62739968 100644 --- a/_modules/nltk/test/unit/test_tgrep.html +++ b/_modules/nltk/test/unit/test_tgrep.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/test_wordnet.html b/_modules/nltk/test/unit/test_wordnet.html index 478f5ceb3..6559af846 100644 --- a/_modules/nltk/test/unit/test_wordnet.html +++ b/_modules/nltk/test/unit/test_wordnet.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/translate/test_bleu.html b/_modules/nltk/test/unit/translate/test_bleu.html index fefc6ecc8..1db9a417c 100644 --- a/_modules/nltk/test/unit/translate/test_bleu.html +++ b/_modules/nltk/test/unit/translate/test_bleu.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/translate/test_gdfa.html b/_modules/nltk/test/unit/translate/test_gdfa.html index 7aa4221a5..0c6522e81 100644 --- a/_modules/nltk/test/unit/translate/test_gdfa.html +++ b/_modules/nltk/test/unit/translate/test_gdfa.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/translate/test_ibm1.html b/_modules/nltk/test/unit/translate/test_ibm1.html index 53bad7245..8242cc0a8 100644 --- a/_modules/nltk/test/unit/translate/test_ibm1.html +++ b/_modules/nltk/test/unit/translate/test_ibm1.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/translate/test_ibm2.html b/_modules/nltk/test/unit/translate/test_ibm2.html index a9dd42482..ae7a98f6a 100644 --- a/_modules/nltk/test/unit/translate/test_ibm2.html +++ b/_modules/nltk/test/unit/translate/test_ibm2.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/translate/test_ibm3.html b/_modules/nltk/test/unit/translate/test_ibm3.html index 6e9805b40..91a93cbb7 100644 --- a/_modules/nltk/test/unit/translate/test_ibm3.html +++ b/_modules/nltk/test/unit/translate/test_ibm3.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/translate/test_ibm4.html b/_modules/nltk/test/unit/translate/test_ibm4.html index c67e1c25c..07bd55d43 100644 --- a/_modules/nltk/test/unit/translate/test_ibm4.html +++ b/_modules/nltk/test/unit/translate/test_ibm4.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/translate/test_ibm5.html b/_modules/nltk/test/unit/translate/test_ibm5.html index 78d0e27be..c7f5c331b 100644 --- a/_modules/nltk/test/unit/translate/test_ibm5.html +++ b/_modules/nltk/test/unit/translate/test_ibm5.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/translate/test_ibm_model.html b/_modules/nltk/test/unit/translate/test_ibm_model.html index 1b5cbd857..1820103d3 100644 --- a/_modules/nltk/test/unit/translate/test_ibm_model.html +++ b/_modules/nltk/test/unit/translate/test_ibm_model.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/translate/test_meteor.html b/_modules/nltk/test/unit/translate/test_meteor.html index a57b5860a..e038dc38e 100644 --- a/_modules/nltk/test/unit/translate/test_meteor.html +++ b/_modules/nltk/test/unit/translate/test_meteor.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/translate/test_nist.html b/_modules/nltk/test/unit/translate/test_nist.html index 30c06741a..8ba2f1aef 100644 --- a/_modules/nltk/test/unit/translate/test_nist.html +++ b/_modules/nltk/test/unit/translate/test_nist.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/test/unit/translate/test_stack_decoder.html b/_modules/nltk/test/unit/translate/test_stack_decoder.html index 75fa87100..93fbc1ea0 100644 --- a/_modules/nltk/test/unit/translate/test_stack_decoder.html +++ b/_modules/nltk/test/unit/translate/test_stack_decoder.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/text.html b/_modules/nltk/text.html index 6474151ac..3c1e63bcf 100644 --- a/_modules/nltk/text.html +++ b/_modules/nltk/text.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tgrep.html b/_modules/nltk/tgrep.html index 368f7b136..a8f166d8a 100644 --- a/_modules/nltk/tgrep.html +++ b/_modules/nltk/tgrep.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize.html b/_modules/nltk/tokenize.html index 2b4475c17..5279293a5 100644 --- a/_modules/nltk/tokenize.html +++ b/_modules/nltk/tokenize.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/api.html b/_modules/nltk/tokenize/api.html index aff923307..db2db3303 100644 --- a/_modules/nltk/tokenize/api.html +++ b/_modules/nltk/tokenize/api.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/casual.html b/_modules/nltk/tokenize/casual.html index 0325784c4..2d744b043 100644 --- a/_modules/nltk/tokenize/casual.html +++ b/_modules/nltk/tokenize/casual.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/destructive.html b/_modules/nltk/tokenize/destructive.html index 04f9984ab..cecc195b2 100644 --- a/_modules/nltk/tokenize/destructive.html +++ b/_modules/nltk/tokenize/destructive.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/legality_principle.html b/_modules/nltk/tokenize/legality_principle.html index a718cc52f..60f87e099 100644 --- a/_modules/nltk/tokenize/legality_principle.html +++ b/_modules/nltk/tokenize/legality_principle.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/mwe.html b/_modules/nltk/tokenize/mwe.html index 62ce96a7a..5873a68c4 100644 --- a/_modules/nltk/tokenize/mwe.html +++ b/_modules/nltk/tokenize/mwe.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/nist.html b/_modules/nltk/tokenize/nist.html index 5a324953e..07b6335d1 100644 --- a/_modules/nltk/tokenize/nist.html +++ b/_modules/nltk/tokenize/nist.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/punkt.html b/_modules/nltk/tokenize/punkt.html index 86cd349e1..f7e9d9375 100644 --- a/_modules/nltk/tokenize/punkt.html +++ b/_modules/nltk/tokenize/punkt.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/regexp.html b/_modules/nltk/tokenize/regexp.html index fc5c621d5..a684f6b01 100644 --- a/_modules/nltk/tokenize/regexp.html +++ b/_modules/nltk/tokenize/regexp.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/repp.html b/_modules/nltk/tokenize/repp.html index 933fcfeb8..141851402 100644 --- a/_modules/nltk/tokenize/repp.html +++ b/_modules/nltk/tokenize/repp.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/sexpr.html b/_modules/nltk/tokenize/sexpr.html index 0fa178cd6..399f413c8 100644 --- a/_modules/nltk/tokenize/sexpr.html +++ b/_modules/nltk/tokenize/sexpr.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/simple.html b/_modules/nltk/tokenize/simple.html index 49727ed38..7b7275347 100644 --- a/_modules/nltk/tokenize/simple.html +++ b/_modules/nltk/tokenize/simple.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/sonority_sequencing.html b/_modules/nltk/tokenize/sonority_sequencing.html index e02f84040..df9c278e1 100644 --- a/_modules/nltk/tokenize/sonority_sequencing.html +++ b/_modules/nltk/tokenize/sonority_sequencing.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/stanford.html b/_modules/nltk/tokenize/stanford.html index e82af477c..58c38fed2 100644 --- a/_modules/nltk/tokenize/stanford.html +++ b/_modules/nltk/tokenize/stanford.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/stanford_segmenter.html b/_modules/nltk/tokenize/stanford_segmenter.html index d7ce1e882..b8f6573e6 100644 --- a/_modules/nltk/tokenize/stanford_segmenter.html +++ b/_modules/nltk/tokenize/stanford_segmenter.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/texttiling.html b/_modules/nltk/tokenize/texttiling.html index 98c5233d6..75e44b818 100644 --- a/_modules/nltk/tokenize/texttiling.html +++ b/_modules/nltk/tokenize/texttiling.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/toktok.html b/_modules/nltk/tokenize/toktok.html index 5a080dfad..36fa8654f 100644 --- a/_modules/nltk/tokenize/toktok.html +++ b/_modules/nltk/tokenize/toktok.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/treebank.html b/_modules/nltk/tokenize/treebank.html index 892f05bce..4a4b708dd 100644 --- a/_modules/nltk/tokenize/treebank.html +++ b/_modules/nltk/tokenize/treebank.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tokenize/util.html b/_modules/nltk/tokenize/util.html index aee2153b6..227e53362 100644 --- a/_modules/nltk/tokenize/util.html +++ b/_modules/nltk/tokenize/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/toolbox.html b/_modules/nltk/toolbox.html index 33232634c..ab0692d27 100644 --- a/_modules/nltk/toolbox.html +++ b/_modules/nltk/toolbox.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/api.html b/_modules/nltk/translate/api.html index e4a269694..e088af464 100644 --- a/_modules/nltk/translate/api.html +++ b/_modules/nltk/translate/api.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/bleu_score.html b/_modules/nltk/translate/bleu_score.html index dd76b0d28..9a85d1633 100644 --- a/_modules/nltk/translate/bleu_score.html +++ b/_modules/nltk/translate/bleu_score.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/chrf_score.html b/_modules/nltk/translate/chrf_score.html index 2c00e1732..abab14672 100644 --- a/_modules/nltk/translate/chrf_score.html +++ b/_modules/nltk/translate/chrf_score.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/gale_church.html b/_modules/nltk/translate/gale_church.html index 7451d1964..cd5d14037 100644 --- a/_modules/nltk/translate/gale_church.html +++ b/_modules/nltk/translate/gale_church.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/gdfa.html b/_modules/nltk/translate/gdfa.html index 4c0160432..e737d462f 100644 --- a/_modules/nltk/translate/gdfa.html +++ b/_modules/nltk/translate/gdfa.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/gleu_score.html b/_modules/nltk/translate/gleu_score.html index 2d0c6bdc9..1df2ea1cb 100644 --- a/_modules/nltk/translate/gleu_score.html +++ b/_modules/nltk/translate/gleu_score.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/ibm1.html b/_modules/nltk/translate/ibm1.html index c5da3820e..a8d90784c 100644 --- a/_modules/nltk/translate/ibm1.html +++ b/_modules/nltk/translate/ibm1.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/ibm2.html b/_modules/nltk/translate/ibm2.html index 59edd9476..b98441a24 100644 --- a/_modules/nltk/translate/ibm2.html +++ b/_modules/nltk/translate/ibm2.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/ibm3.html b/_modules/nltk/translate/ibm3.html index e64137931..77cf943a3 100644 --- a/_modules/nltk/translate/ibm3.html +++ b/_modules/nltk/translate/ibm3.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/ibm4.html b/_modules/nltk/translate/ibm4.html index fd2dfc09a..e3dc85690 100644 --- a/_modules/nltk/translate/ibm4.html +++ b/_modules/nltk/translate/ibm4.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/ibm5.html b/_modules/nltk/translate/ibm5.html index 3213ad133..738913aca 100644 --- a/_modules/nltk/translate/ibm5.html +++ b/_modules/nltk/translate/ibm5.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/ibm_model.html b/_modules/nltk/translate/ibm_model.html index 2fcd18f21..ce666129b 100644 --- a/_modules/nltk/translate/ibm_model.html +++ b/_modules/nltk/translate/ibm_model.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/meteor_score.html b/_modules/nltk/translate/meteor_score.html index 9deb3035b..1791014ab 100644 --- a/_modules/nltk/translate/meteor_score.html +++ b/_modules/nltk/translate/meteor_score.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/metrics.html b/_modules/nltk/translate/metrics.html index 54bdecea5..4370d8b99 100644 --- a/_modules/nltk/translate/metrics.html +++ b/_modules/nltk/translate/metrics.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/nist_score.html b/_modules/nltk/translate/nist_score.html index 19301cdb8..acd4fb788 100644 --- a/_modules/nltk/translate/nist_score.html +++ b/_modules/nltk/translate/nist_score.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/phrase_based.html b/_modules/nltk/translate/phrase_based.html index ad740a085..fec0f8151 100644 --- a/_modules/nltk/translate/phrase_based.html +++ b/_modules/nltk/translate/phrase_based.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/ribes_score.html b/_modules/nltk/translate/ribes_score.html index e96302386..816788f0e 100644 --- a/_modules/nltk/translate/ribes_score.html +++ b/_modules/nltk/translate/ribes_score.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/translate/stack_decoder.html b/_modules/nltk/translate/stack_decoder.html index acccbb7f6..595b6a7a4 100644 --- a/_modules/nltk/translate/stack_decoder.html +++ b/_modules/nltk/translate/stack_decoder.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/tree.html b/_modules/nltk/tree.html index 6eda202bc..c3c18bb48 100644 --- a/_modules/nltk/tree.html +++ b/_modules/nltk/tree.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/treeprettyprinter.html b/_modules/nltk/treeprettyprinter.html index af49c04e4..849b29f6d 100644 --- a/_modules/nltk/treeprettyprinter.html +++ b/_modules/nltk/treeprettyprinter.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/treetransforms.html b/_modules/nltk/treetransforms.html index 58a728b08..7473ec264 100644 --- a/_modules/nltk/treetransforms.html +++ b/_modules/nltk/treetransforms.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/twitter/api.html b/_modules/nltk/twitter/api.html index 4f8bd7aa5..ea5644bca 100644 --- a/_modules/nltk/twitter/api.html +++ b/_modules/nltk/twitter/api.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/twitter/common.html b/_modules/nltk/twitter/common.html index 1239f5f12..691820cce 100644 --- a/_modules/nltk/twitter/common.html +++ b/_modules/nltk/twitter/common.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/twitter/twitter_demo.html b/_modules/nltk/twitter/twitter_demo.html index 1da966761..beb560e50 100644 --- a/_modules/nltk/twitter/twitter_demo.html +++ b/_modules/nltk/twitter/twitter_demo.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/twitter/twitterclient.html b/_modules/nltk/twitter/twitterclient.html index d5258f497..43c800f95 100644 --- a/_modules/nltk/twitter/twitterclient.html +++ b/_modules/nltk/twitter/twitterclient.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/twitter/util.html b/_modules/nltk/twitter/util.html index 209045c0b..249e69fba 100644 --- a/_modules/nltk/twitter/util.html +++ b/_modules/nltk/twitter/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/util.html b/_modules/nltk/util.html index 9fa7cda5b..c90773746 100644 --- a/_modules/nltk/util.html +++ b/_modules/nltk/util.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_modules/nltk/wsd.html b/_modules/nltk/wsd.html index b270307ae..75a9d0d6e 100644 --- a/_modules/nltk/wsd.html +++ b/_modules/nltk/wsd.html @@ -68,7 +68,7 @@

Documentation

NLTK Documentation

diff --git a/_sources/contribute.rst.txt b/_sources/contribute.rst.txt index adccedd72..2a03a995c 100644 --- a/_sources/contribute.rst.txt +++ b/_sources/contribute.rst.txt @@ -9,5 +9,5 @@ Information for contributors: * `contributing to NLTK `_ * `desired enhancements `_ * `contribute a corpus `_ -* `nltk-dev mailing list `_ +* `nltk-dev mailing list `_ * `GitHub Project `_ diff --git a/_sources/data.rst.txt b/_sources/data.rst.txt index 9fdb9edc2..0c059161b 100644 --- a/_sources/data.rst.txt +++ b/_sources/data.rst.txt @@ -1,9 +1,9 @@ Installing NLTK Data ==================== -NLTK comes with many corpora, toy grammars, trained models, etc. A complete list is posted at: http://nltk.org/nltk_data/ +NLTK comes with many corpora, toy grammars, trained models, etc. A complete list is posted at: https://www.nltk.org/nltk_data/ -To install the data, first install NLTK (see http://nltk.org/install.html), then use NLTK's data downloader as described below. +To install the data, first install NLTK (see https://www.nltk.org/install.html), then use NLTK's data downloader as described below. Apart from individual data packages, you can download the entire collection (using "all"), or just the data required for the examples and exercises in the book (using "book"), or just the corpora and no grammars or trained models (using "all-corpora"). @@ -54,7 +54,7 @@ Create a folder ``nltk_data``, e.g. ``C:\nltk_data``, or ``/usr/local/share/nltk and subfolders ``chunkers``, ``grammars``, ``misc``, ``sentiment``, ``taggers``, ``corpora``, ``help``, ``models``, ``stemmers``, ``tokenizers``. -Download individual packages from ``http://nltk.org/nltk_data/`` (see the "download" links). +Download individual packages from ``https://www.nltk.org/nltk_data/`` (see the "download" links). Unzip them to the appropriate subfolder. For example, the Brown Corpus, found at: ``https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip`` is to be unzipped to ``nltk_data/corpora/brown``. diff --git a/_sources/howto.rst.txt b/_sources/howto.rst.txt new file mode 100644 index 000000000..ac9fc9505 --- /dev/null +++ b/_sources/howto.rst.txt @@ -0,0 +1,8 @@ +Example usage of NLTK modules +============================= + +.. toctree:: + :titlesonly: + :glob: + + howto/* diff --git a/_sources/howto/bleu.rst.txt b/_sources/howto/bleu.rst.txt new file mode 100644 index 000000000..303c8ac93 --- /dev/null +++ b/_sources/howto/bleu.rst.txt @@ -0,0 +1,5 @@ +##################### +Sample usage for bleu +##################### + +.. include:: ../../nltk/test/bleu.doctest \ No newline at end of file diff --git a/_sources/howto/bnc.rst.txt b/_sources/howto/bnc.rst.txt new file mode 100644 index 000000000..8636ff502 --- /dev/null +++ b/_sources/howto/bnc.rst.txt @@ -0,0 +1,5 @@ +#################### +Sample usage for bnc +#################### + +.. include:: ../../nltk/test/bnc.doctest \ No newline at end of file diff --git a/_sources/howto/ccg.rst.txt b/_sources/howto/ccg.rst.txt new file mode 100644 index 000000000..d93d569bd --- /dev/null +++ b/_sources/howto/ccg.rst.txt @@ -0,0 +1,5 @@ +#################### +Sample usage for ccg +#################### + +.. include:: ../../nltk/test/ccg.doctest \ No newline at end of file diff --git a/_sources/howto/ccg_semantics.rst.txt b/_sources/howto/ccg_semantics.rst.txt new file mode 100644 index 000000000..5b98b943d --- /dev/null +++ b/_sources/howto/ccg_semantics.rst.txt @@ -0,0 +1,5 @@ +############################## +Sample usage for ccg_semantics +############################## + +.. include:: ../../nltk/test/ccg_semantics.doctest \ No newline at end of file diff --git a/_sources/howto/chat80.rst.txt b/_sources/howto/chat80.rst.txt new file mode 100644 index 000000000..77534098c --- /dev/null +++ b/_sources/howto/chat80.rst.txt @@ -0,0 +1,5 @@ +####################### +Sample usage for chat80 +####################### + +.. include:: ../../nltk/test/chat80.doctest \ No newline at end of file diff --git a/_sources/howto/childes.rst.txt b/_sources/howto/childes.rst.txt new file mode 100644 index 000000000..2f9dd4e48 --- /dev/null +++ b/_sources/howto/childes.rst.txt @@ -0,0 +1,5 @@ +######################## +Sample usage for childes +######################## + +.. include:: ../../nltk/test/childes.doctest \ No newline at end of file diff --git a/_sources/howto/chunk.rst.txt b/_sources/howto/chunk.rst.txt new file mode 100644 index 000000000..2c25d4c11 --- /dev/null +++ b/_sources/howto/chunk.rst.txt @@ -0,0 +1,5 @@ +###################### +Sample usage for chunk +###################### + +.. include:: ../../nltk/test/chunk.doctest \ No newline at end of file diff --git a/_sources/howto/classify.rst.txt b/_sources/howto/classify.rst.txt new file mode 100644 index 000000000..3f9b9962a --- /dev/null +++ b/_sources/howto/classify.rst.txt @@ -0,0 +1,5 @@ +######################### +Sample usage for classify +######################### + +.. include:: ../../nltk/test/classify.doctest \ No newline at end of file diff --git a/_sources/howto/collections.rst.txt b/_sources/howto/collections.rst.txt new file mode 100644 index 000000000..3e9076701 --- /dev/null +++ b/_sources/howto/collections.rst.txt @@ -0,0 +1,5 @@ +############################ +Sample usage for collections +############################ + +.. include:: ../../nltk/test/collections.doctest \ No newline at end of file diff --git a/_sources/howto/collocations.rst.txt b/_sources/howto/collocations.rst.txt new file mode 100644 index 000000000..ebde5d939 --- /dev/null +++ b/_sources/howto/collocations.rst.txt @@ -0,0 +1,5 @@ +############################# +Sample usage for collocations +############################# + +.. include:: ../../nltk/test/collocations.doctest \ No newline at end of file diff --git a/_sources/howto/concordance.rst.txt b/_sources/howto/concordance.rst.txt new file mode 100644 index 000000000..d10ad4194 --- /dev/null +++ b/_sources/howto/concordance.rst.txt @@ -0,0 +1,5 @@ +############################ +Sample usage for concordance +############################ + +.. include:: ../../nltk/test/concordance.doctest \ No newline at end of file diff --git a/_sources/howto/corpus.rst.txt b/_sources/howto/corpus.rst.txt new file mode 100644 index 000000000..53c8bac8c --- /dev/null +++ b/_sources/howto/corpus.rst.txt @@ -0,0 +1,5 @@ +####################### +Sample usage for corpus +####################### + +.. include:: ../../nltk/test/corpus.doctest \ No newline at end of file diff --git a/_sources/howto/crubadan.rst.txt b/_sources/howto/crubadan.rst.txt new file mode 100644 index 000000000..310f3d352 --- /dev/null +++ b/_sources/howto/crubadan.rst.txt @@ -0,0 +1,5 @@ +######################### +Sample usage for crubadan +######################### + +.. include:: ../../nltk/test/crubadan.doctest \ No newline at end of file diff --git a/_sources/howto/data.rst.txt b/_sources/howto/data.rst.txt new file mode 100644 index 000000000..a790c37fb --- /dev/null +++ b/_sources/howto/data.rst.txt @@ -0,0 +1,5 @@ +##################### +Sample usage for data +##################### + +.. include:: ../../nltk/test/data.doctest \ No newline at end of file diff --git a/_sources/howto/dependency.rst.txt b/_sources/howto/dependency.rst.txt new file mode 100644 index 000000000..99275c19e --- /dev/null +++ b/_sources/howto/dependency.rst.txt @@ -0,0 +1,5 @@ +########################### +Sample usage for dependency +########################### + +.. include:: ../../nltk/test/dependency.doctest \ No newline at end of file diff --git a/_sources/howto/discourse.rst.txt b/_sources/howto/discourse.rst.txt new file mode 100644 index 000000000..59f1d4f98 --- /dev/null +++ b/_sources/howto/discourse.rst.txt @@ -0,0 +1,5 @@ +########################## +Sample usage for discourse +########################## + +.. include:: ../../nltk/test/discourse.doctest \ No newline at end of file diff --git a/_sources/howto/drt.rst.txt b/_sources/howto/drt.rst.txt new file mode 100644 index 000000000..37e6de3e8 --- /dev/null +++ b/_sources/howto/drt.rst.txt @@ -0,0 +1,5 @@ +#################### +Sample usage for drt +#################### + +.. include:: ../../nltk/test/drt.doctest \ No newline at end of file diff --git a/_sources/howto/featgram.rst.txt b/_sources/howto/featgram.rst.txt new file mode 100644 index 000000000..6f71bb214 --- /dev/null +++ b/_sources/howto/featgram.rst.txt @@ -0,0 +1,5 @@ +######################### +Sample usage for featgram +######################### + +.. include:: ../../nltk/test/featgram.doctest \ No newline at end of file diff --git a/_sources/howto/featstruct.rst.txt b/_sources/howto/featstruct.rst.txt new file mode 100644 index 000000000..1ebabe2ce --- /dev/null +++ b/_sources/howto/featstruct.rst.txt @@ -0,0 +1,5 @@ +########################### +Sample usage for featstruct +########################### + +.. include:: ../../nltk/test/featstruct.doctest \ No newline at end of file diff --git a/_sources/howto/framenet.rst.txt b/_sources/howto/framenet.rst.txt new file mode 100644 index 000000000..dac932dbb --- /dev/null +++ b/_sources/howto/framenet.rst.txt @@ -0,0 +1,5 @@ +######################### +Sample usage for framenet +######################### + +.. include:: ../../nltk/test/framenet.doctest \ No newline at end of file diff --git a/_sources/howto/generate.rst.txt b/_sources/howto/generate.rst.txt new file mode 100644 index 000000000..7f98527ea --- /dev/null +++ b/_sources/howto/generate.rst.txt @@ -0,0 +1,5 @@ +######################### +Sample usage for generate +######################### + +.. include:: ../../nltk/test/generate.doctest \ No newline at end of file diff --git a/_sources/howto/gensim.rst.txt b/_sources/howto/gensim.rst.txt new file mode 100644 index 000000000..d3ac10bc6 --- /dev/null +++ b/_sources/howto/gensim.rst.txt @@ -0,0 +1,5 @@ +####################### +Sample usage for gensim +####################### + +.. include:: ../../nltk/test/gensim.doctest \ No newline at end of file diff --git a/_sources/howto/gluesemantics.rst.txt b/_sources/howto/gluesemantics.rst.txt new file mode 100644 index 000000000..e74379a13 --- /dev/null +++ b/_sources/howto/gluesemantics.rst.txt @@ -0,0 +1,5 @@ +############################## +Sample usage for gluesemantics +############################## + +.. include:: ../../nltk/test/gluesemantics.doctest \ No newline at end of file diff --git a/_sources/howto/gluesemantics_malt.rst.txt b/_sources/howto/gluesemantics_malt.rst.txt new file mode 100644 index 000000000..f99b3a5b1 --- /dev/null +++ b/_sources/howto/gluesemantics_malt.rst.txt @@ -0,0 +1,5 @@ +################################### +Sample usage for gluesemantics_malt +################################### + +.. include:: ../../nltk/test/gluesemantics_malt.doctest \ No newline at end of file diff --git a/_sources/howto/grammar.rst.txt b/_sources/howto/grammar.rst.txt new file mode 100644 index 000000000..8a7002961 --- /dev/null +++ b/_sources/howto/grammar.rst.txt @@ -0,0 +1,5 @@ +######################## +Sample usage for grammar +######################## + +.. include:: ../../nltk/test/grammar.doctest \ No newline at end of file diff --git a/_sources/howto/grammartestsuites.rst.txt b/_sources/howto/grammartestsuites.rst.txt new file mode 100644 index 000000000..7d584b058 --- /dev/null +++ b/_sources/howto/grammartestsuites.rst.txt @@ -0,0 +1,5 @@ +################################## +Sample usage for grammartestsuites +################################## + +.. include:: ../../nltk/test/grammartestsuites.doctest \ No newline at end of file diff --git a/_sources/howto/inference.rst.txt b/_sources/howto/inference.rst.txt new file mode 100644 index 000000000..1eae25228 --- /dev/null +++ b/_sources/howto/inference.rst.txt @@ -0,0 +1,5 @@ +########################## +Sample usage for inference +########################## + +.. include:: ../../nltk/test/inference.doctest \ No newline at end of file diff --git a/_sources/howto/internals.rst.txt b/_sources/howto/internals.rst.txt new file mode 100644 index 000000000..2645ebffa --- /dev/null +++ b/_sources/howto/internals.rst.txt @@ -0,0 +1,5 @@ +########################## +Sample usage for internals +########################## + +.. include:: ../../nltk/test/internals.doctest \ No newline at end of file diff --git a/_sources/howto/japanese.rst.txt b/_sources/howto/japanese.rst.txt new file mode 100644 index 000000000..bf1684402 --- /dev/null +++ b/_sources/howto/japanese.rst.txt @@ -0,0 +1,5 @@ +######################### +Sample usage for japanese +######################### + +.. include:: ../../nltk/test/japanese.doctest \ No newline at end of file diff --git a/_sources/howto/lm.rst.txt b/_sources/howto/lm.rst.txt new file mode 100644 index 000000000..0241f4cda --- /dev/null +++ b/_sources/howto/lm.rst.txt @@ -0,0 +1,5 @@ +################### +Sample usage for lm +################### + +.. include:: ../../nltk/test/lm.doctest \ No newline at end of file diff --git a/_sources/howto/logic.rst.txt b/_sources/howto/logic.rst.txt new file mode 100644 index 000000000..8d25703b0 --- /dev/null +++ b/_sources/howto/logic.rst.txt @@ -0,0 +1,5 @@ +###################### +Sample usage for logic +###################### + +.. include:: ../../nltk/test/logic.doctest \ No newline at end of file diff --git a/_sources/howto/meteor.rst.txt b/_sources/howto/meteor.rst.txt new file mode 100644 index 000000000..744418fb3 --- /dev/null +++ b/_sources/howto/meteor.rst.txt @@ -0,0 +1,5 @@ +####################### +Sample usage for meteor +####################### + +.. include:: ../../nltk/test/meteor.doctest \ No newline at end of file diff --git a/_sources/howto/metrics.rst.txt b/_sources/howto/metrics.rst.txt new file mode 100644 index 000000000..f6ea45009 --- /dev/null +++ b/_sources/howto/metrics.rst.txt @@ -0,0 +1,5 @@ +######################## +Sample usage for metrics +######################## + +.. include:: ../../nltk/test/metrics.doctest \ No newline at end of file diff --git a/_sources/howto/misc.rst.txt b/_sources/howto/misc.rst.txt new file mode 100644 index 000000000..15fc7a2be --- /dev/null +++ b/_sources/howto/misc.rst.txt @@ -0,0 +1,5 @@ +##################### +Sample usage for misc +##################### + +.. include:: ../../nltk/test/misc.doctest \ No newline at end of file diff --git a/_sources/howto/nonmonotonic.rst.txt b/_sources/howto/nonmonotonic.rst.txt new file mode 100644 index 000000000..59b6e5026 --- /dev/null +++ b/_sources/howto/nonmonotonic.rst.txt @@ -0,0 +1,5 @@ +############################# +Sample usage for nonmonotonic +############################# + +.. include:: ../../nltk/test/nonmonotonic.doctest \ No newline at end of file diff --git a/_sources/howto/paice.rst.txt b/_sources/howto/paice.rst.txt new file mode 100644 index 000000000..95113a861 --- /dev/null +++ b/_sources/howto/paice.rst.txt @@ -0,0 +1,5 @@ +###################### +Sample usage for paice +###################### + +.. include:: ../../nltk/test/paice.doctest \ No newline at end of file diff --git a/_sources/howto/parse.rst.txt b/_sources/howto/parse.rst.txt new file mode 100644 index 000000000..c48ca2269 --- /dev/null +++ b/_sources/howto/parse.rst.txt @@ -0,0 +1,5 @@ +###################### +Sample usage for parse +###################### + +.. include:: ../../nltk/test/parse.doctest \ No newline at end of file diff --git a/_sources/howto/portuguese_en.rst.txt b/_sources/howto/portuguese_en.rst.txt new file mode 100644 index 000000000..1a3f984ed --- /dev/null +++ b/_sources/howto/portuguese_en.rst.txt @@ -0,0 +1,5 @@ +############################## +Sample usage for portuguese_en +############################## + +.. include:: ../../nltk/test/portuguese_en.doctest \ No newline at end of file diff --git a/_sources/howto/probability.rst.txt b/_sources/howto/probability.rst.txt new file mode 100644 index 000000000..b2e19fcf4 --- /dev/null +++ b/_sources/howto/probability.rst.txt @@ -0,0 +1,5 @@ +############################ +Sample usage for probability +############################ + +.. include:: ../../nltk/test/probability.doctest \ No newline at end of file diff --git a/_sources/howto/propbank.rst.txt b/_sources/howto/propbank.rst.txt new file mode 100644 index 000000000..1318e762d --- /dev/null +++ b/_sources/howto/propbank.rst.txt @@ -0,0 +1,5 @@ +######################### +Sample usage for propbank +######################### + +.. include:: ../../nltk/test/propbank.doctest \ No newline at end of file diff --git a/_sources/howto/relextract.rst.txt b/_sources/howto/relextract.rst.txt new file mode 100644 index 000000000..392012c79 --- /dev/null +++ b/_sources/howto/relextract.rst.txt @@ -0,0 +1,5 @@ +########################### +Sample usage for relextract +########################### + +.. include:: ../../nltk/test/relextract.doctest \ No newline at end of file diff --git a/_sources/howto/resolution.rst.txt b/_sources/howto/resolution.rst.txt new file mode 100644 index 000000000..3056672bd --- /dev/null +++ b/_sources/howto/resolution.rst.txt @@ -0,0 +1,5 @@ +########################### +Sample usage for resolution +########################### + +.. include:: ../../nltk/test/resolution.doctest \ No newline at end of file diff --git a/_sources/howto/semantics.rst.txt b/_sources/howto/semantics.rst.txt new file mode 100644 index 000000000..7c4343fd1 --- /dev/null +++ b/_sources/howto/semantics.rst.txt @@ -0,0 +1,5 @@ +########################## +Sample usage for semantics +########################## + +.. include:: ../../nltk/test/semantics.doctest \ No newline at end of file diff --git a/_sources/howto/sentiment.rst.txt b/_sources/howto/sentiment.rst.txt new file mode 100644 index 000000000..e49ceb3d8 --- /dev/null +++ b/_sources/howto/sentiment.rst.txt @@ -0,0 +1,5 @@ +########################## +Sample usage for sentiment +########################## + +.. include:: ../../nltk/test/sentiment.doctest \ No newline at end of file diff --git a/_sources/howto/sentiwordnet.rst.txt b/_sources/howto/sentiwordnet.rst.txt new file mode 100644 index 000000000..f9978235a --- /dev/null +++ b/_sources/howto/sentiwordnet.rst.txt @@ -0,0 +1,5 @@ +############################# +Sample usage for sentiwordnet +############################# + +.. include:: ../../nltk/test/sentiwordnet.doctest \ No newline at end of file diff --git a/_sources/howto/simple.rst.txt b/_sources/howto/simple.rst.txt new file mode 100644 index 000000000..af93af6a1 --- /dev/null +++ b/_sources/howto/simple.rst.txt @@ -0,0 +1,5 @@ +####################### +Sample usage for simple +####################### + +.. include:: ../../nltk/test/simple.doctest \ No newline at end of file diff --git a/_sources/howto/stem.rst.txt b/_sources/howto/stem.rst.txt new file mode 100644 index 000000000..ee28a38f7 --- /dev/null +++ b/_sources/howto/stem.rst.txt @@ -0,0 +1,5 @@ +##################### +Sample usage for stem +##################### + +.. include:: ../../nltk/test/stem.doctest \ No newline at end of file diff --git a/_sources/howto/tag.rst.txt b/_sources/howto/tag.rst.txt new file mode 100644 index 000000000..b06111126 --- /dev/null +++ b/_sources/howto/tag.rst.txt @@ -0,0 +1,5 @@ +#################### +Sample usage for tag +#################### + +.. include:: ../../nltk/test/tag.doctest \ No newline at end of file diff --git a/_sources/howto/tokenize.rst.txt b/_sources/howto/tokenize.rst.txt new file mode 100644 index 000000000..89f726be2 --- /dev/null +++ b/_sources/howto/tokenize.rst.txt @@ -0,0 +1,5 @@ +######################### +Sample usage for tokenize +######################### + +.. include:: ../../nltk/test/tokenize.doctest \ No newline at end of file diff --git a/_sources/howto/toolbox.rst.txt b/_sources/howto/toolbox.rst.txt new file mode 100644 index 000000000..2ec441cc4 --- /dev/null +++ b/_sources/howto/toolbox.rst.txt @@ -0,0 +1,5 @@ +######################## +Sample usage for toolbox +######################## + +.. include:: ../../nltk/test/toolbox.doctest \ No newline at end of file diff --git a/_sources/howto/translate.rst.txt b/_sources/howto/translate.rst.txt new file mode 100644 index 000000000..87b6e76b2 --- /dev/null +++ b/_sources/howto/translate.rst.txt @@ -0,0 +1,5 @@ +########################## +Sample usage for translate +########################## + +.. include:: ../../nltk/test/translate.doctest \ No newline at end of file diff --git a/_sources/howto/tree.rst.txt b/_sources/howto/tree.rst.txt new file mode 100644 index 000000000..d79442f4c --- /dev/null +++ b/_sources/howto/tree.rst.txt @@ -0,0 +1,5 @@ +##################### +Sample usage for tree +##################### + +.. include:: ../../nltk/test/tree.doctest \ No newline at end of file diff --git a/_sources/howto/treeprettyprinter.rst.txt b/_sources/howto/treeprettyprinter.rst.txt new file mode 100644 index 000000000..41155c1ad --- /dev/null +++ b/_sources/howto/treeprettyprinter.rst.txt @@ -0,0 +1,5 @@ +################################## +Sample usage for treeprettyprinter +################################## + +.. include:: ../../nltk/test/treeprettyprinter.doctest \ No newline at end of file diff --git a/_sources/howto/treetransforms.rst.txt b/_sources/howto/treetransforms.rst.txt new file mode 100644 index 000000000..1d740fb09 --- /dev/null +++ b/_sources/howto/treetransforms.rst.txt @@ -0,0 +1,5 @@ +############################### +Sample usage for treetransforms +############################### + +.. include:: ../../nltk/test/treetransforms.doctest \ No newline at end of file diff --git a/_sources/howto/util.rst.txt b/_sources/howto/util.rst.txt new file mode 100644 index 000000000..b2ad679ba --- /dev/null +++ b/_sources/howto/util.rst.txt @@ -0,0 +1,5 @@ +##################### +Sample usage for util +##################### + +.. include:: ../../nltk/test/util.doctest \ No newline at end of file diff --git a/_sources/howto/wordnet.rst.txt b/_sources/howto/wordnet.rst.txt new file mode 100644 index 000000000..2ac9e7d2d --- /dev/null +++ b/_sources/howto/wordnet.rst.txt @@ -0,0 +1,5 @@ +######################## +Sample usage for wordnet +######################## + +.. include:: ../../nltk/test/wordnet.doctest \ No newline at end of file diff --git a/_sources/howto/wordnet_lch.rst.txt b/_sources/howto/wordnet_lch.rst.txt new file mode 100644 index 000000000..4da12654c --- /dev/null +++ b/_sources/howto/wordnet_lch.rst.txt @@ -0,0 +1,5 @@ +############################ +Sample usage for wordnet_lch +############################ + +.. include:: ../../nltk/test/wordnet_lch.doctest \ No newline at end of file diff --git a/_sources/howto/wsd.rst.txt b/_sources/howto/wsd.rst.txt new file mode 100644 index 000000000..99252aa47 --- /dev/null +++ b/_sources/howto/wsd.rst.txt @@ -0,0 +1,5 @@ +#################### +Sample usage for wsd +#################### + +.. include:: ../../nltk/test/wsd.doctest \ No newline at end of file diff --git a/_sources/index.rst.txt b/_sources/index.rst.txt index c83f029e4..4481c553c 100644 --- a/_sources/index.rst.txt +++ b/_sources/index.rst.txt @@ -3,10 +3,10 @@ Natural Language Toolkit NLTK is a leading platform for building Python programs to work with human language data. It provides easy-to-use interfaces to `over 50 corpora and lexical -resources `_ such as WordNet, +resources `_ such as WordNet, along with a suite of text processing libraries for classification, tokenization, stemming, tagging, parsing, and semantic reasoning, wrappers for industrial-strength NLP libraries, -and an active `discussion forum `_. +and an active `discussion forum `_. Thanks to a hands-on guide introducing programming fundamentals alongside topics in computational linguistics, plus comprehensive API documentation, NLTK is suitable for linguists, engineers, students, educators, researchers, and industry users alike. @@ -15,13 +15,13 @@ NLTK is available for Windows, Mac OS X, and Linux. Best of all, NLTK is a free, NLTK has been called "a wonderful tool for teaching, and working in, computational linguistics using Python," and "an amazing library to play with natural language." -`Natural Language Processing with Python `_ provides a practical +`Natural Language Processing with Python `_ provides a practical introduction to programming for language processing. Written by the creators of NLTK, it guides the reader through the fundamentals of writing Python programs, working with corpora, categorizing text, analyzing linguistic structure, and more. The online version of the book has been been updated for Python 3 and NLTK 3. -(The original Python 2 version is still available at `http://nltk.org/book_1ed `_.) +(The original Python 2 version is still available at `https://www.nltk.org/book_1ed `_.) Some simple things you can do with NLTK --------------------------------------- @@ -66,8 +66,8 @@ follows: Next Steps ---------- -* `sign up for release announcements `_ -* `join in the discussion `_ +* `sign up for release announcements `_ +* `join in the discussion `_ .. toctree:: @@ -76,7 +76,7 @@ Next Steps :caption: NLTK Documentation API Reference - Example Usage + Example Usage Module Index Wiki FAQ diff --git a/_sources/install.rst.txt b/_sources/install.rst.txt index 5dfc91f0b..248651b9d 100644 --- a/_sources/install.rst.txt +++ b/_sources/install.rst.txt @@ -19,7 +19,7 @@ Mac/Unix #. Install Numpy (optional): run ``pip install --user -U numpy`` #. Test installation: run ``python`` then type ``import nltk`` -For older versions of Python it might be necessary to install setuptools (see http://pypi.python.org/pypi/setuptools) and to install pip (``sudo easy_install pip``). +For older versions of Python it might be necessary to install setuptools (see https://pypi.python.org/pypi/setuptools) and to install pip (``sudo easy_install pip``). Windows ------- @@ -29,9 +29,9 @@ These instructions assume that you do not already have Python installed on your 32-bit binary installation ~~~~~~~~~~~~~~~~~~~~~~~~~~ -#. Install Python 3.8: http://www.python.org/downloads/ (avoid the 64-bit versions) +#. Install Python 3.8: https://www.python.org/downloads/ (avoid the 64-bit versions) #. Install Numpy (optional): https://www.scipy.org/scipylib/download.html -#. Install NLTK: http://pypi.python.org/pypi/nltk +#. Install NLTK: https://pypi.python.org/pypi/nltk #. Test installation: ``Start>Python38``, then type ``import nltk`` Installing Third-Party Software @@ -47,4 +47,4 @@ After installing the NLTK package, please do install the necessary datasets/mode If you're unsure of which datasets/models you'll need, you can install the "popular" subset of NLTK data, on the command line type ``python -m nltk.downloader popular``, or in the Python interpreter ``import nltk; nltk.download('popular')`` -For details, see http://www.nltk.org/data.html +For details, see https://www.nltk.org/data.html diff --git a/_sources/news.rst.txt b/_sources/news.rst.txt index d35ebaff8..c0b65aee3 100644 --- a/_sources/news.rst.txt +++ b/_sources/news.rst.txt @@ -172,8 +172,8 @@ NLTK 3.0.0b2 released : August 2014 Minor bugfixes and clean-ups. NLTK Book Updates : July 2014 - The NLTK book is being updated for Python 3 and NLTK 3 `here `_. - The original Python 2 edition is still available `here `_. + The NLTK book is being updated for Python 3 and NLTK 3 `here `_. + The original Python 2 edition is still available `here `_. NLTK 3.0.0b1 released : July 2014 FrameNet, SentiWordNet, universal tagset, misc efficiency improvements and bugfixes @@ -191,7 +191,7 @@ NLTK 3.0a4 released : June 2014 NLTK Book Updates : October 2013 We are updating the NLTK book for Python 3 and NLTK 3; please see - http://nltk.org/book3/ + https://www.nltk.org/book/ NLTK 3.0a2 released : July 2013 Misc efficiency improvements and bugfixes; for details see @@ -211,7 +211,7 @@ NLTK 3.0a0 released : January 2013 Python Grant : November 2012 The Python Software Foundation is sponsoring Mikhail Korobov's work on porting NLTK to Python 3. - http://pyfound.blogspot.hu/2012/11/grants-to-assist-kivy-nltk-in-porting.html + https://pyfound.blogspot.hu/2012/11/grants-to-assist-kivy-nltk-in-porting.html NLTK 2.0.4 released : November 2012 Minor fix to remove numpy dependency. @@ -238,7 +238,7 @@ NLTK 2.0.1rc2 released : December 2011 The second release candidate for NLTK 2. For full details see the ChangeLog. NLTK development moved to GitHub : October 2011 - The development site for NLTK has moved from GoogleCode to GitHub: http://github.com/nltk + The development site for NLTK has moved from GoogleCode to GitHub: https://github.com/nltk NLTK 2.0.1rc1 released : April 2011 The first release candidate for NLTK 2. For full details see the ChangeLog. @@ -250,13 +250,13 @@ Python Text Processing with NLTK 2.0 Cookbook : December 2010 Jacob Perkins has written a 250-page cookbook full of great recipes for text processing using Python and NLTK, published by Packt Publishing. Some of the royalties are being donated to the NLTK project. Japanese translation of NLTK book : November 2010 - Masato Hagiwara has translated the NLTK book into Japanese, along with an extra chapter on particular issues with Japanese language process. See http://www.oreilly.co.jp/books/9784873114705/. + Masato Hagiwara has translated the NLTK book into Japanese, along with an extra chapter on particular issues with Japanese language process. See https://www.oreilly.co.jp/books/9784873114705/. NLTK 2.0b9 released : July 2010 The last beta release before 2.0 final. For full details see the ChangeLog. NLTK in Ubuntu 10.4 (Lucid Lynx) : February 2010 - NLTK is now in the latest LTS version of Ubuntu, thanks to the efforts of Robin Munn. See http://packages.ubuntu.com/lucid/python/python-nltk + NLTK is now in the latest LTS version of Ubuntu, thanks to the efforts of Robin Munn. See https://packages.ubuntu.com/lucid/python/python-nltk NLTK 2.0b? released : June 2009 - February 2010 Bugfix releases in preparation for 2.0 final. For full details see the ChangeLog. @@ -268,7 +268,7 @@ NLTK Book in second printing : December 2009 The second print run of Natural Language Processing with Python will go on sale in January. We've taken the opportunity to make about 40 minor corrections. The online version has been updated. NLTK Book published : June 2009 - Natural Language Processing with Python, by Steven Bird, Ewan Klein and Edward Loper, has been published by O'Reilly Media Inc. It can be purchased in hardcopy, ebook, PDF or for online access, at http://oreilly.com/catalog/9780596516499/. For information about sellers and prices, see https://isbndb.com/d/book/natural_language_processing_with_python/prices.html. + Natural Language Processing with Python, by Steven Bird, Ewan Klein and Edward Loper, has been published by O'Reilly Media Inc. It can be purchased in hardcopy, ebook, PDF or for online access, at https://oreilly.com/catalog/9780596516499/. For information about sellers and prices, see https://isbndb.com/d/book/natural_language_processing_with_python/prices.html. Version 0.9.9 released : May 2009 This version finalizes NLTK's API ahead of the 2.0 release and the publication of the NLTK book. There have been dozens of minor enhancements and bugfixes. Many names of the form nltk.foo.Bar are now available as nltk.Bar. There is expanded functionality in the decision tree, collocations, and Toolbox modules. A new translation toy nltk.misc.babelfish has been added. A new module nltk.help gives access to tagset documentation. Fixed imports so NLTK will build and install without Tkinter (for running on servers). New data includes a maximum entropy chunker model and updated grammars. NLTK Contrib includes updates to the coreference package (Joseph Frazee) and the ISRI Arabic stemmer (Hosam Algasaier). The book has undergone substantial editorial corrections ahead of final publication. For full details see the ChangeLog. @@ -300,19 +300,19 @@ Version 0.9.3 released : June 2008 This version contains an improved WordNet? similarity module using pre-built information content files (included in the corpus distribution), new/improved interfaces to Weka, MEGAM and Prover9/Mace4 toolkits, improved Unicode support for corpus readers, a BNC corpus reader, and a rewrite of the Punkt sentence segmenter contributed by Joel Nothman. NLTK-Contrib includes an implementation of incremental algorithm for generating referring expression contributed by Margaret Mitchell. For full details see the ChangeLog. NLTK presented at LinuxFest Northwest : April 2008 - Sean Boisen presented NLTK at LinuxFest Northwest, which took place in Bellingham, Washington. His presentation slides are available at: http://semanticbible.com/other/talks/2008/nltk/main.html + Sean Boisen presented NLTK at LinuxFest Northwest, which took place in Bellingham, Washington. His presentation slides are available at: https://semanticbible.com/other/talks/2008/nltk/main.html NLTK in Google Summer of Code : April 2008 Google Summer of Code will sponsor two NLTK projects. Jason Narad won funding for a project on dependency parsers in NLTK (mentored by Sebastian Riedel and Jason Baldridge). Petro Verkhogliad won funding for a project on natural language generation in NLTK (mentored by Robert Dale and Edward Loper). Python Software Foundation adopts NLTK for Google Summer of Code application : March 2008 - The Python Software Foundation has listed NLTK projects for sponsorship from the 2008 Google Summer of Code program. For details please see http://wiki.python.org/moin/SummerOfCode. + The Python Software Foundation has listed NLTK projects for sponsorship from the 2008 Google Summer of Code program. For details please see https://wiki.python.org/moin/SummerOfCode. Version 0.9.2 released : March 2008 This version contains a new inference module linked to the Prover9/Mace4 theorem-prover and model checker (Dan Garrette, Ewan Klein). It also includes the VerbNet? and PropBank? corpora along with corpus readers. A bug in the Reuters corpus reader has been fixed. NLTK-Contrib includes new work on the WordNet? browser (Jussi Salmela). For full details see the ChangeLog Youtube video about NLTK : January 2008 - The video from of the NLTK talk at the Bay Area Python Interest Group last July has been posted at http://www.youtube.com/watch?v=keXW_5-llD0 (1h15m) + The video from of the NLTK talk at the Bay Area Python Interest Group last July has been posted at https://www.youtube.com/watch?v=keXW_5-llD0 (1h15m) Version 0.9.1 released : January 2008 This version contains new support for accessing text categorization corpora, along with several corpora categorized for topic, genre, question type, or sentiment. It includes several new corpora: Question classification data (Li & Roth), Reuters 21578 Corpus, Movie Reviews corpus (Pang & Lee), Recognising Textual Entailment (RTE) Challenges. NLTK-Contrib includes expanded support for semantics (Dan Garrette), readability scoring (Thomas Jakobsen, Thomas Skardal), and SIL Toolbox (Greg Aumann). The book contains many improvements in early chapters in response to reader feedback. For full details see the ChangeLog. diff --git a/_sources/team.rst.txt b/_sources/team.rst.txt index 3388004b3..59853b0f7 100644 --- a/_sources/team.rst.txt +++ b/_sources/team.rst.txt @@ -5,8 +5,8 @@ The NLTK project is led by `Steven Bird and Liling Tan `_, Austin, USA (``nltk.sem, nltk.inference``) -:Parsing: `Peter Ljunglöf `_, Gothenburg, Sweden (``nltk.parse, nltk.featstruct``) +:Parsing: `Peter Ljunglöf `_, Gothenburg, Sweden (``nltk.parse, nltk.featstruct``) :Metrics: `Joel Nothman `_, Sydney, Australia (``nltk.metrics, nltk.tokenize.punkt``) :Python 3: `Mikhail Korobov `_, Ekaterinburg, Russia -:Releases: `Steven Bird `_, Melbourne, Australia +:Releases: `Steven Bird `_, Melbourne, Australia :NLTK-Users: `Alexis Dimitriadis `_, Utrecht, Netherlands diff --git a/_static/css/nltk_theme.css b/_static/css/nltk_theme.css index 70358b6f1..8bd369fef 100644 --- a/_static/css/nltk_theme.css +++ b/_static/css/nltk_theme.css @@ -139,6 +139,7 @@ pre { color: #2D2D2D; padding: 1em 2em; margin-bottom: 1em; + overflow-x: auto; } img { @@ -222,7 +223,12 @@ table tbody tr:nth-of-type(odd) code:not(.xref) { #main-content-container { flex: 1; - max-width: 100%; + width: 0; +} +@media screen and (max-width: 880px) { + #main-content-container { + width: inherit; + } } #main-content-container, #side-menu-container { @@ -263,6 +269,7 @@ table tbody tr:nth-of-type(odd) code:not(.xref) { margin-right: 0; width: 100%; display: none; + position: static; } #side-menu-container #side-menu { margin-bottom: 0; @@ -381,10 +388,6 @@ h1:hover .headerlink, h2:hover .headerlink, h3:hover .headerlink, h4:hover .head margin-bottom: 1em; } -div.highlight > pre { - white-space: pre-wrap; -} - header { padding-top: 2em; display: flex; @@ -428,7 +431,8 @@ header #project-container { display: block; } header a#menu-toggle { - display: block; + display: inline; + padding: 0em 0.25em; } } diff --git a/api/nltk.app.chartparser_app.html b/api/nltk.app.chartparser_app.html index 85a71271b..ca3d65249 100644 --- a/api/nltk.app.chartparser_app.html +++ b/api/nltk.app.chartparser_app.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.app.chunkparser_app.html b/api/nltk.app.chunkparser_app.html index 8c7b41d80..d02b850e0 100644 --- a/api/nltk.app.chunkparser_app.html +++ b/api/nltk.app.chunkparser_app.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.app.collocations_app.html b/api/nltk.app.collocations_app.html index 007a08989..31b40317d 100644 --- a/api/nltk.app.collocations_app.html +++ b/api/nltk.app.collocations_app.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.app.concordance_app.html b/api/nltk.app.concordance_app.html index 2539491dd..5c25caee6 100644 --- a/api/nltk.app.concordance_app.html +++ b/api/nltk.app.concordance_app.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.app.html b/api/nltk.app.html index 516189e0f..71f0591a0 100644 --- a/api/nltk.app.html +++ b/api/nltk.app.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.app.nemo_app.html b/api/nltk.app.nemo_app.html index acbb8965d..2d228fa0c 100644 --- a/api/nltk.app.nemo_app.html +++ b/api/nltk.app.nemo_app.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.app.rdparser_app.html b/api/nltk.app.rdparser_app.html index 624020bfa..c6bf9f3fb 100644 --- a/api/nltk.app.rdparser_app.html +++ b/api/nltk.app.rdparser_app.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.app.srparser_app.html b/api/nltk.app.srparser_app.html index 99027a677..df4cdae9c 100644 --- a/api/nltk.app.srparser_app.html +++ b/api/nltk.app.srparser_app.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.app.wordfreq_app.html b/api/nltk.app.wordfreq_app.html index 777306c5d..dc908926a 100644 --- a/api/nltk.app.wordfreq_app.html +++ b/api/nltk.app.wordfreq_app.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.app.wordnet_app.html b/api/nltk.app.wordnet_app.html index 99b0d8ad8..a672a1a1e 100644 --- a/api/nltk.app.wordnet_app.html +++ b/api/nltk.app.wordnet_app.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.book.html b/api/nltk.book.html index 027d0e7cc..1d9dea30e 100644 --- a/api/nltk.book.html +++ b/api/nltk.book.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.ccg.api.html b/api/nltk.ccg.api.html index 299efb51a..f728426f7 100644 --- a/api/nltk.ccg.api.html +++ b/api/nltk.ccg.api.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.ccg.chart.html b/api/nltk.ccg.chart.html index 37adf7b15..8d6a89740 100644 --- a/api/nltk.ccg.chart.html +++ b/api/nltk.ccg.chart.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.ccg.combinator.html b/api/nltk.ccg.combinator.html index 59d8ba049..9e30b6745 100644 --- a/api/nltk.ccg.combinator.html +++ b/api/nltk.ccg.combinator.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.ccg.html b/api/nltk.ccg.html index d4314f93a..95eaa0e9c 100644 --- a/api/nltk.ccg.html +++ b/api/nltk.ccg.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.ccg.lexicon.html b/api/nltk.ccg.lexicon.html index ef16f8b12..d29acc476 100644 --- a/api/nltk.ccg.lexicon.html +++ b/api/nltk.ccg.lexicon.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.ccg.logic.html b/api/nltk.ccg.logic.html index ae210d93e..40aa39c86 100644 --- a/api/nltk.ccg.logic.html +++ b/api/nltk.ccg.logic.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.chat.eliza.html b/api/nltk.chat.eliza.html index a6d7549de..069587915 100644 --- a/api/nltk.chat.eliza.html +++ b/api/nltk.chat.eliza.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.chat.html b/api/nltk.chat.html index ef5f53513..8485b7159 100644 --- a/api/nltk.chat.html +++ b/api/nltk.chat.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.chat.iesha.html b/api/nltk.chat.iesha.html index d5d2c540c..bde7b5d76 100644 --- a/api/nltk.chat.iesha.html +++ b/api/nltk.chat.iesha.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.chat.rude.html b/api/nltk.chat.rude.html index e66cd45cc..d9bba2e7b 100644 --- a/api/nltk.chat.rude.html +++ b/api/nltk.chat.rude.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.chat.suntsu.html b/api/nltk.chat.suntsu.html index 6d616d7ae..eb9c932e3 100644 --- a/api/nltk.chat.suntsu.html +++ b/api/nltk.chat.suntsu.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.chat.util.html b/api/nltk.chat.util.html index ea59e425f..7402b770c 100644 --- a/api/nltk.chat.util.html +++ b/api/nltk.chat.util.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.chat.zen.html b/api/nltk.chat.zen.html index fa69bc14d..e9182561f 100644 --- a/api/nltk.chat.zen.html +++ b/api/nltk.chat.zen.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.chunk.api.html b/api/nltk.chunk.api.html index 8695ee061..4c3495b44 100644 --- a/api/nltk.chunk.api.html +++ b/api/nltk.chunk.api.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.chunk.html b/api/nltk.chunk.html index d339de48c..e99be7117 100644 --- a/api/nltk.chunk.html +++ b/api/nltk.chunk.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.chunk.named_entity.html b/api/nltk.chunk.named_entity.html index fac54bd9a..6333a0e80 100644 --- a/api/nltk.chunk.named_entity.html +++ b/api/nltk.chunk.named_entity.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.chunk.regexp.html b/api/nltk.chunk.regexp.html index 197bb27b8..3dd1a1cf9 100644 --- a/api/nltk.chunk.regexp.html +++ b/api/nltk.chunk.regexp.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.chunk.util.html b/api/nltk.chunk.util.html index 4f260b447..7af99d8ca 100644 --- a/api/nltk.chunk.util.html +++ b/api/nltk.chunk.util.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.classify.api.html b/api/nltk.classify.api.html index f87fc6bca..449d42a3b 100644 --- a/api/nltk.classify.api.html +++ b/api/nltk.classify.api.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.classify.decisiontree.html b/api/nltk.classify.decisiontree.html index 9cae3ab9e..4fb037f80 100644 --- a/api/nltk.classify.decisiontree.html +++ b/api/nltk.classify.decisiontree.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.classify.html b/api/nltk.classify.html index f513cbb43..2fbfbb15c 100644 --- a/api/nltk.classify.html +++ b/api/nltk.classify.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.classify.maxent.html b/api/nltk.classify.maxent.html index fad024874..5e895db2a 100644 --- a/api/nltk.classify.maxent.html +++ b/api/nltk.classify.maxent.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.classify.megam.html b/api/nltk.classify.megam.html index 088bbface..b19396fd3 100644 --- a/api/nltk.classify.megam.html +++ b/api/nltk.classify.megam.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

  • API Reference
  • -
  • Example Usage
  • +
  • Example Usage
  • Module Index
  • Wiki
  • FAQ
  • @@ -98,7 +98,7 @@

    Documentation

    nltk.classify.megam module

    -

    A set of functions used to interface with the external megam maxent +

    A set of functions used to interface with the external megam maxent optimization package. Before megam can be used, you should tell NLTK where it can find the megam binary, using the config_megam() function. Typical usage:

    @@ -197,7 +197,7 @@

    Documentation

diff --git a/api/nltk.classify.naivebayes.html b/api/nltk.classify.naivebayes.html index 30326bf79..ed652c59b 100644 --- a/api/nltk.classify.naivebayes.html +++ b/api/nltk.classify.naivebayes.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.classify.positivenaivebayes.html b/api/nltk.classify.positivenaivebayes.html index dc94b8659..aa644fbf1 100644 --- a/api/nltk.classify.positivenaivebayes.html +++ b/api/nltk.classify.positivenaivebayes.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.classify.rte_classify.html b/api/nltk.classify.rte_classify.html index 8efcfa0ef..95ef8ee04 100644 --- a/api/nltk.classify.rte_classify.html +++ b/api/nltk.classify.rte_classify.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.classify.scikitlearn.html b/api/nltk.classify.scikitlearn.html index 203a0455b..16ac26b45 100644 --- a/api/nltk.classify.scikitlearn.html +++ b/api/nltk.classify.scikitlearn.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

  • API Reference
  • -
  • Example Usage
  • +
  • Example Usage
  • Module Index
  • Wiki
  • FAQ
  • @@ -98,7 +98,7 @@

    Documentation

    nltk.classify.scikitlearn module

    -

    scikit-learn (http://scikit-learn.org) is a machine learning library for +

    scikit-learn (https://scikit-learn.org) is a machine learning library for Python. It supports many classification algorithms, including SVMs, Naive Bayes, logistic regression (MaxEnt) and decision trees.

    This package implements a wrapper around scikit-learn classifiers. To use this @@ -235,7 +235,7 @@

    Documentation

diff --git a/api/nltk.classify.senna.html b/api/nltk.classify.senna.html index cb6424ab2..3cb0559b5 100644 --- a/api/nltk.classify.senna.html +++ b/api/nltk.classify.senna.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.classify.svm.html b/api/nltk.classify.svm.html index e3f96e260..b2135b2cd 100644 --- a/api/nltk.classify.svm.html +++ b/api/nltk.classify.svm.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.classify.tadm.html b/api/nltk.classify.tadm.html index ca623b775..13d4814d2 100644 --- a/api/nltk.classify.tadm.html +++ b/api/nltk.classify.tadm.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.classify.textcat.html b/api/nltk.classify.textcat.html index ca435c508..d2fdecef2 100644 --- a/api/nltk.classify.textcat.html +++ b/api/nltk.classify.textcat.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.classify.util.html b/api/nltk.classify.util.html index 04860d0e1..b4cce9d9b 100644 --- a/api/nltk.classify.util.html +++ b/api/nltk.classify.util.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.classify.weka.html b/api/nltk.classify.weka.html index f0e25dc3f..7ea364fce 100644 --- a/api/nltk.classify.weka.html +++ b/api/nltk.classify.weka.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.cli.html b/api/nltk.cli.html index 314fc5e1f..36b09fb61 100644 --- a/api/nltk.cli.html +++ b/api/nltk.cli.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.cluster.api.html b/api/nltk.cluster.api.html index 1b50686fa..9ad7c32d1 100644 --- a/api/nltk.cluster.api.html +++ b/api/nltk.cluster.api.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.cluster.em.html b/api/nltk.cluster.em.html index 6d5ae2dd0..63db6e0a6 100644 --- a/api/nltk.cluster.em.html +++ b/api/nltk.cluster.em.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.cluster.gaac.html b/api/nltk.cluster.gaac.html index e66dccf61..0089f7b21 100644 --- a/api/nltk.cluster.gaac.html +++ b/api/nltk.cluster.gaac.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.cluster.html b/api/nltk.cluster.html index f1e5efb8f..2d0ad7004 100644 --- a/api/nltk.cluster.html +++ b/api/nltk.cluster.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.cluster.kmeans.html b/api/nltk.cluster.kmeans.html index 495db4a49..fe396da2e 100644 --- a/api/nltk.cluster.kmeans.html +++ b/api/nltk.cluster.kmeans.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.cluster.util.html b/api/nltk.cluster.util.html index 3f597eadf..bad52ce4f 100644 --- a/api/nltk.cluster.util.html +++ b/api/nltk.cluster.util.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.collections.html b/api/nltk.collections.html index 532b9cf25..79905428f 100644 --- a/api/nltk.collections.html +++ b/api/nltk.collections.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.collocations.html b/api/nltk.collocations.html index 872c1c590..7bfb004c8 100644 --- a/api/nltk.collocations.html +++ b/api/nltk.collocations.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.compat.html b/api/nltk.compat.html index 1f9b30885..fa22ce832 100644 --- a/api/nltk.compat.html +++ b/api/nltk.compat.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.europarl_raw.html b/api/nltk.corpus.europarl_raw.html index ea4256fdd..62f0e813a 100644 --- a/api/nltk.corpus.europarl_raw.html +++ b/api/nltk.corpus.europarl_raw.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.html b/api/nltk.corpus.html index 812f61d83..073c6027a 100644 --- a/api/nltk.corpus.html +++ b/api/nltk.corpus.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.aligned.html b/api/nltk.corpus.reader.aligned.html index cbf54906e..92f3af9be 100644 --- a/api/nltk.corpus.reader.aligned.html +++ b/api/nltk.corpus.reader.aligned.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.api.html b/api/nltk.corpus.reader.api.html index 434bb57fa..5741f0055 100644 --- a/api/nltk.corpus.reader.api.html +++ b/api/nltk.corpus.reader.api.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.bnc.html b/api/nltk.corpus.reader.bnc.html index e8b365bca..3de3e8d0b 100644 --- a/api/nltk.corpus.reader.bnc.html +++ b/api/nltk.corpus.reader.bnc.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.bracket_parse.html b/api/nltk.corpus.reader.bracket_parse.html index 5f8968728..65d8b05dc 100644 --- a/api/nltk.corpus.reader.bracket_parse.html +++ b/api/nltk.corpus.reader.bracket_parse.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.categorized_sents.html b/api/nltk.corpus.reader.categorized_sents.html index 53b61b44d..4921e019a 100644 --- a/api/nltk.corpus.reader.categorized_sents.html +++ b/api/nltk.corpus.reader.categorized_sents.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

Authors: Bo Pang and Lillian Lee. -Url: http://www.cs.cornell.edu/people/pabo/movie-review-data

+Url: https://www.cs.cornell.edu/people/pabo/movie-review-data

Distributed with permission.

Related papers:

    @@ -118,7 +118,7 @@

    Documentation

  • Sentence Polarity Dataset information -

Authors: Bo Pang and Lillian Lee. -Url: http://www.cs.cornell.edu/people/pabo/movie-review-data

+Url: https://www.cs.cornell.edu/people/pabo/movie-review-data

Related papers:

  • @@ -258,7 +258,7 @@

    Documentation

diff --git a/api/nltk.corpus.reader.chasen.html b/api/nltk.corpus.reader.chasen.html index de81eabab..9c0dbb7be 100644 --- a/api/nltk.corpus.reader.chasen.html +++ b/api/nltk.corpus.reader.chasen.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.childes.html b/api/nltk.corpus.reader.childes.html index 9aeddb4e9..0a68c4235 100644 --- a/api/nltk.corpus.reader.childes.html +++ b/api/nltk.corpus.reader.childes.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.chunked.html b/api/nltk.corpus.reader.chunked.html index 717993bd0..7922d5b15 100644 --- a/api/nltk.corpus.reader.chunked.html +++ b/api/nltk.corpus.reader.chunked.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.cmudict.html b/api/nltk.corpus.reader.cmudict.html index 271619f44..ffbc9d3a2 100644 --- a/api/nltk.corpus.reader.cmudict.html +++ b/api/nltk.corpus.reader.cmudict.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.comparative_sents.html b/api/nltk.corpus.reader.comparative_sents.html index fa997e45a..121bfe7a1 100644 --- a/api/nltk.corpus.reader.comparative_sents.html +++ b/api/nltk.corpus.reader.comparative_sents.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.conll.html b/api/nltk.corpus.reader.conll.html index 21c92ea9e..21fe39962 100644 --- a/api/nltk.corpus.reader.conll.html +++ b/api/nltk.corpus.reader.conll.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.crubadan.html b/api/nltk.corpus.reader.crubadan.html index bfbc8bc26..02711446d 100644 --- a/api/nltk.corpus.reader.crubadan.html +++ b/api/nltk.corpus.reader.crubadan.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.dependency.html b/api/nltk.corpus.reader.dependency.html index 598ebaf3c..1279c47bb 100644 --- a/api/nltk.corpus.reader.dependency.html +++ b/api/nltk.corpus.reader.dependency.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.framenet.html b/api/nltk.corpus.reader.framenet.html index d5cab5b2e..e9597cddc 100644 --- a/api/nltk.corpus.reader.framenet.html +++ b/api/nltk.corpus.reader.framenet.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.html b/api/nltk.corpus.reader.html index 224e9a9eb..25c3b8f1a 100644 --- a/api/nltk.corpus.reader.html +++ b/api/nltk.corpus.reader.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.ieer.html b/api/nltk.corpus.reader.ieer.html index c5cef37e6..109d35084 100644 --- a/api/nltk.corpus.reader.ieer.html +++ b/api/nltk.corpus.reader.ieer.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.indian.html b/api/nltk.corpus.reader.indian.html index f0aeae403..61c1736ed 100644 --- a/api/nltk.corpus.reader.indian.html +++ b/api/nltk.corpus.reader.indian.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.ipipan.html b/api/nltk.corpus.reader.ipipan.html index 104b662a4..1c6f180e0 100644 --- a/api/nltk.corpus.reader.ipipan.html +++ b/api/nltk.corpus.reader.ipipan.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.knbc.html b/api/nltk.corpus.reader.knbc.html index 0ed92055a..ebe1842e7 100644 --- a/api/nltk.corpus.reader.knbc.html +++ b/api/nltk.corpus.reader.knbc.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.lin.html b/api/nltk.corpus.reader.lin.html index e3f79332f..38c987779 100644 --- a/api/nltk.corpus.reader.lin.html +++ b/api/nltk.corpus.reader.lin.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.mte.html b/api/nltk.corpus.reader.mte.html index 1b1e3b840..c018fdfc5 100644 --- a/api/nltk.corpus.reader.mte.html +++ b/api/nltk.corpus.reader.mte.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

  • API Reference
  • -
  • Example Usage
  • +
  • Example Usage
  • Module Index
  • Wiki
  • FAQ
  • @@ -155,17 +155,17 @@

    Documentation

    given method parameters.

    -ns = {'tei': 'http://www.tei-c.org/ns/1.0', 'xml': 'http://www.w3.org/XML/1998/namespace'}
    +ns = {'tei': 'https://www.tei-c.org/ns/1.0', 'xml': 'https://www.w3.org/XML/1998/namespace'}
    -tag_ns = '{http://www.tei-c.org/ns/1.0}'
    +tag_ns = '{https://www.tei-c.org/ns/1.0}'
    -xml_ns = '{http://www.w3.org/XML/1998/namespace}'
    +xml_ns = '{https://www.w3.org/XML/1998/namespace}'
    @@ -485,7 +485,7 @@

    Documentation

diff --git a/api/nltk.corpus.reader.nkjp.html b/api/nltk.corpus.reader.nkjp.html index 81023bc99..f2ee9b88b 100644 --- a/api/nltk.corpus.reader.nkjp.html +++ b/api/nltk.corpus.reader.nkjp.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.nombank.html b/api/nltk.corpus.reader.nombank.html index 7d913661c..0c2a1a1d1 100644 --- a/api/nltk.corpus.reader.nombank.html +++ b/api/nltk.corpus.reader.nombank.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.nps_chat.html b/api/nltk.corpus.reader.nps_chat.html index e7e2e58d1..8da3716d9 100644 --- a/api/nltk.corpus.reader.nps_chat.html +++ b/api/nltk.corpus.reader.nps_chat.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.opinion_lexicon.html b/api/nltk.corpus.reader.opinion_lexicon.html index 72bad58a2..09c8a7cdc 100644 --- a/api/nltk.corpus.reader.opinion_lexicon.html +++ b/api/nltk.corpus.reader.opinion_lexicon.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.panlex_lite.html b/api/nltk.corpus.reader.panlex_lite.html index d17160fcb..bf3f31ba2 100644 --- a/api/nltk.corpus.reader.panlex_lite.html +++ b/api/nltk.corpus.reader.panlex_lite.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.panlex_swadesh.html b/api/nltk.corpus.reader.panlex_swadesh.html index b9ff14c49..888932ce3 100644 --- a/api/nltk.corpus.reader.panlex_swadesh.html +++ b/api/nltk.corpus.reader.panlex_swadesh.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.pl196x.html b/api/nltk.corpus.reader.pl196x.html index c0678295c..ef858868d 100644 --- a/api/nltk.corpus.reader.pl196x.html +++ b/api/nltk.corpus.reader.pl196x.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.plaintext.html b/api/nltk.corpus.reader.plaintext.html index 9da48e72c..dcd37d8ac 100644 --- a/api/nltk.corpus.reader.plaintext.html +++ b/api/nltk.corpus.reader.plaintext.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.ppattach.html b/api/nltk.corpus.reader.ppattach.html index 759a7cf2d..9fa5ddc2f 100644 --- a/api/nltk.corpus.reader.ppattach.html +++ b/api/nltk.corpus.reader.ppattach.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.propbank.html b/api/nltk.corpus.reader.propbank.html index 42cbf3611..d07742394 100644 --- a/api/nltk.corpus.reader.propbank.html +++ b/api/nltk.corpus.reader.propbank.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.pros_cons.html b/api/nltk.corpus.reader.pros_cons.html index eeb0b28b9..bc65eb995 100644 --- a/api/nltk.corpus.reader.pros_cons.html +++ b/api/nltk.corpus.reader.pros_cons.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

-
Contact: Bing Liu, liub@cs.uic.edu

http://www.cs.uic.edu/~liub

+
Contact: Bing Liu, liub@cs.uic.edu

https://www.cs.uic.edu/~liub

Distributed with permission.

@@ -234,7 +234,7 @@

Documentation

diff --git a/api/nltk.corpus.reader.reviews.html b/api/nltk.corpus.reader.reviews.html index 0196af3ae..b7d0cd0f7 100644 --- a/api/nltk.corpus.reader.reviews.html +++ b/api/nltk.corpus.reader.reviews.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.rte.html b/api/nltk.corpus.reader.rte.html index 7eca6f13f..6c6ff4440 100644 --- a/api/nltk.corpus.reader.rte.html +++ b/api/nltk.corpus.reader.rte.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.semcor.html b/api/nltk.corpus.reader.semcor.html index f929ab925..a46f9f8e0 100644 --- a/api/nltk.corpus.reader.semcor.html +++ b/api/nltk.corpus.reader.semcor.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.senseval.html b/api/nltk.corpus.reader.senseval.html index f55fb448b..580601440 100644 --- a/api/nltk.corpus.reader.senseval.html +++ b/api/nltk.corpus.reader.senseval.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.sentiwordnet.html b/api/nltk.corpus.reader.sentiwordnet.html index 3aa5c4662..bd066bbab 100644 --- a/api/nltk.corpus.reader.sentiwordnet.html +++ b/api/nltk.corpus.reader.sentiwordnet.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.sinica_treebank.html b/api/nltk.corpus.reader.sinica_treebank.html index ae83be1ac..985f85625 100644 --- a/api/nltk.corpus.reader.sinica_treebank.html +++ b/api/nltk.corpus.reader.sinica_treebank.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.string_category.html b/api/nltk.corpus.reader.string_category.html index 51c3355b7..370cff1ce 100644 --- a/api/nltk.corpus.reader.string_category.html +++ b/api/nltk.corpus.reader.string_category.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.switchboard.html b/api/nltk.corpus.reader.switchboard.html index d9ac63c6c..32d80c740 100644 --- a/api/nltk.corpus.reader.switchboard.html +++ b/api/nltk.corpus.reader.switchboard.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.tagged.html b/api/nltk.corpus.reader.tagged.html index 73d93fa4b..86884077f 100644 --- a/api/nltk.corpus.reader.tagged.html +++ b/api/nltk.corpus.reader.tagged.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.timit.html b/api/nltk.corpus.reader.timit.html index 0600426a1..3b52af482 100644 --- a/api/nltk.corpus.reader.timit.html +++ b/api/nltk.corpus.reader.timit.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.toolbox.html b/api/nltk.corpus.reader.toolbox.html index 681187b75..5ace36d67 100644 --- a/api/nltk.corpus.reader.toolbox.html +++ b/api/nltk.corpus.reader.toolbox.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.twitter.html b/api/nltk.corpus.reader.twitter.html index a7ddd35c2..f8f667133 100644 --- a/api/nltk.corpus.reader.twitter.html +++ b/api/nltk.corpus.reader.twitter.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.udhr.html b/api/nltk.corpus.reader.udhr.html index 15af3efec..6c21372dc 100644 --- a/api/nltk.corpus.reader.udhr.html +++ b/api/nltk.corpus.reader.udhr.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.util.html b/api/nltk.corpus.reader.util.html index 0722bcc36..3b9b172c7 100644 --- a/api/nltk.corpus.reader.util.html +++ b/api/nltk.corpus.reader.util.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.verbnet.html b/api/nltk.corpus.reader.verbnet.html index 6c3c0e4c1..1ffe6b226 100644 --- a/api/nltk.corpus.reader.verbnet.html +++ b/api/nltk.corpus.reader.verbnet.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.wordlist.html b/api/nltk.corpus.reader.wordlist.html index 983e7205d..46dc4ca1c 100644 --- a/api/nltk.corpus.reader.wordlist.html +++ b/api/nltk.corpus.reader.wordlist.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.wordnet.html b/api/nltk.corpus.reader.wordnet.html index 35e0024b5..f5a980440 100644 --- a/api/nltk.corpus.reader.wordnet.html +++ b/api/nltk.corpus.reader.wordnet.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.xmldocs.html b/api/nltk.corpus.reader.xmldocs.html index 5684ef56e..33d706415 100644 --- a/api/nltk.corpus.reader.xmldocs.html +++ b/api/nltk.corpus.reader.xmldocs.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.reader.ycoe.html b/api/nltk.corpus.reader.ycoe.html index a985dc042..d23b464e0 100644 --- a/api/nltk.corpus.reader.ycoe.html +++ b/api/nltk.corpus.reader.ycoe.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.corpus.util.html b/api/nltk.corpus.util.html index 4ede3fd1a..c87e06b6d 100644 --- a/api/nltk.corpus.util.html +++ b/api/nltk.corpus.util.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.data.html b/api/nltk.data.html index 8ab66e519..70adbbb47 100644 --- a/api/nltk.data.html +++ b/api/nltk.data.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.boxer.html b/api/nltk.sem.boxer.html index 4ac41cb19..169f51cdc 100644 --- a/api/nltk.sem.boxer.html +++ b/api/nltk.sem.boxer.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.chat80.html b/api/nltk.sem.chat80.html index 723784d10..907251918 100644 --- a/api/nltk.sem.chat80.html +++ b/api/nltk.sem.chat80.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.cooper_storage.html b/api/nltk.sem.cooper_storage.html index e07d4ef07..25ae9a3f0 100644 --- a/api/nltk.sem.cooper_storage.html +++ b/api/nltk.sem.cooper_storage.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.drt.html b/api/nltk.sem.drt.html index 64ee2de06..4ff8a769e 100644 --- a/api/nltk.sem.drt.html +++ b/api/nltk.sem.drt.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.drt_glue_demo.html b/api/nltk.sem.drt_glue_demo.html index b75db1979..a43c6a7c4 100644 --- a/api/nltk.sem.drt_glue_demo.html +++ b/api/nltk.sem.drt_glue_demo.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.evaluate.html b/api/nltk.sem.evaluate.html index da3a6b8d6..d081d5631 100644 --- a/api/nltk.sem.evaluate.html +++ b/api/nltk.sem.evaluate.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.glue.html b/api/nltk.sem.glue.html index aa7faabe8..7e6dbe4e8 100644 --- a/api/nltk.sem.glue.html +++ b/api/nltk.sem.glue.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.hole.html b/api/nltk.sem.hole.html index 8073cbcbe..d2c43c287 100644 --- a/api/nltk.sem.hole.html +++ b/api/nltk.sem.hole.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.html b/api/nltk.sem.html index 7164e3518..962825e6a 100644 --- a/api/nltk.sem.html +++ b/api/nltk.sem.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.lfg.html b/api/nltk.sem.lfg.html index bb02f1769..80e4ad4ac 100644 --- a/api/nltk.sem.lfg.html +++ b/api/nltk.sem.lfg.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.linearlogic.html b/api/nltk.sem.linearlogic.html index 3d31154c8..88c9d5760 100644 --- a/api/nltk.sem.linearlogic.html +++ b/api/nltk.sem.linearlogic.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.logic.html b/api/nltk.sem.logic.html index 5f9412079..72890ddc4 100644 --- a/api/nltk.sem.logic.html +++ b/api/nltk.sem.logic.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.relextract.html b/api/nltk.sem.relextract.html index 32f657dca..da2bda49c 100644 --- a/api/nltk.sem.relextract.html +++ b/api/nltk.sem.relextract.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.skolemize.html b/api/nltk.sem.skolemize.html index 8df335439..b788e47ff 100644 --- a/api/nltk.sem.skolemize.html +++ b/api/nltk.sem.skolemize.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sem.util.html b/api/nltk.sem.util.html index be1e5583c..278c001ec 100644 --- a/api/nltk.sem.util.html +++ b/api/nltk.sem.util.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sentiment.html b/api/nltk.sentiment.html index d3a21ec6d..4eac72f09 100644 --- a/api/nltk.sentiment.html +++ b/api/nltk.sentiment.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sentiment.sentiment_analyzer.html b/api/nltk.sentiment.sentiment_analyzer.html index 6e9eada41..bbe592275 100644 --- a/api/nltk.sentiment.sentiment_analyzer.html +++ b/api/nltk.sentiment.sentiment_analyzer.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sentiment.util.html b/api/nltk.sentiment.util.html index 3fae7a36e..1934131ba 100644 --- a/api/nltk.sentiment.util.html +++ b/api/nltk.sentiment.util.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.sentiment.vader.html b/api/nltk.sentiment.vader.html index b39f43acf..3899c37a0 100644 --- a/api/nltk.sentiment.vader.html +++ b/api/nltk.sentiment.vader.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.stem.api.html b/api/nltk.stem.api.html index e1c3a21b6..022fde68f 100644 --- a/api/nltk.stem.api.html +++ b/api/nltk.stem.api.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.stem.arlstem.html b/api/nltk.stem.arlstem.html index 929ee7668..f6ac484a7 100644 --- a/api/nltk.stem.arlstem.html +++ b/api/nltk.stem.arlstem.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.stem.arlstem2.html b/api/nltk.stem.arlstem2.html index c76d02aed..f4cad3378 100644 --- a/api/nltk.stem.arlstem2.html +++ b/api/nltk.stem.arlstem2.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.stem.cistem.html b/api/nltk.stem.cistem.html index e7297475c..72a969b24 100644 --- a/api/nltk.stem.cistem.html +++ b/api/nltk.stem.cistem.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.stem.html b/api/nltk.stem.html index 0c7effe49..1455e6c56 100644 --- a/api/nltk.stem.html +++ b/api/nltk.stem.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.stem.isri.html b/api/nltk.stem.isri.html index b6d215cce..2a56b31b9 100644 --- a/api/nltk.stem.isri.html +++ b/api/nltk.stem.isri.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.stem.lancaster.html b/api/nltk.stem.lancaster.html index 42f10c024..3bd3635e2 100644 --- a/api/nltk.stem.lancaster.html +++ b/api/nltk.stem.lancaster.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.stem.porter.html b/api/nltk.stem.porter.html index 7341a2279..4aed09215 100644 --- a/api/nltk.stem.porter.html +++ b/api/nltk.stem.porter.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.stem.regexp.html b/api/nltk.stem.regexp.html index b012a81cb..2ddb978f5 100644 --- a/api/nltk.stem.regexp.html +++ b/api/nltk.stem.regexp.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.stem.rslp.html b/api/nltk.stem.rslp.html index 719a7a0ec..cf6553b56 100644 --- a/api/nltk.stem.rslp.html +++ b/api/nltk.stem.rslp.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.stem.snowball.html b/api/nltk.stem.snowball.html index b8d6c3865..e75baad16 100644 --- a/api/nltk.stem.snowball.html +++ b/api/nltk.stem.snowball.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.stem.util.html b/api/nltk.stem.util.html index ee8c35736..5ebc2a161 100644 --- a/api/nltk.stem.util.html +++ b/api/nltk.stem.util.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.stem.wordnet.html b/api/nltk.stem.wordnet.html index 6e4886aca..3509e2086 100644 --- a/api/nltk.stem.wordnet.html +++ b/api/nltk.stem.wordnet.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tag.api.html b/api/nltk.tag.api.html index 47e288b44..d7b740383 100644 --- a/api/nltk.tag.api.html +++ b/api/nltk.tag.api.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tag.brill.html b/api/nltk.tag.brill.html index 88c66926f..43e2a746c 100644 --- a/api/nltk.tag.brill.html +++ b/api/nltk.tag.brill.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tag.brill_trainer.html b/api/nltk.tag.brill_trainer.html index 202b0fdaf..b9e830345 100644 --- a/api/nltk.tag.brill_trainer.html +++ b/api/nltk.tag.brill_trainer.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tag.crf.html b/api/nltk.tag.crf.html index c3a8e1e12..3348783b1 100644 --- a/api/nltk.tag.crf.html +++ b/api/nltk.tag.crf.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tag.hmm.html b/api/nltk.tag.hmm.html index 07507b460..79a7720d2 100644 --- a/api/nltk.tag.hmm.html +++ b/api/nltk.tag.hmm.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tag.html b/api/nltk.tag.html index 8d1b462cc..0a98cdb9b 100644 --- a/api/nltk.tag.html +++ b/api/nltk.tag.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tag.hunpos.html b/api/nltk.tag.hunpos.html index fe915ff03..55bd86f81 100644 --- a/api/nltk.tag.hunpos.html +++ b/api/nltk.tag.hunpos.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tag.mapping.html b/api/nltk.tag.mapping.html index 4211831ae..fe51011ba 100644 --- a/api/nltk.tag.mapping.html +++ b/api/nltk.tag.mapping.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tag.perceptron.html b/api/nltk.tag.perceptron.html index 389e6c104..04de22c2b 100644 --- a/api/nltk.tag.perceptron.html +++ b/api/nltk.tag.perceptron.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tag.senna.html b/api/nltk.tag.senna.html index fa2ced1e7..a9ac779ee 100644 --- a/api/nltk.tag.senna.html +++ b/api/nltk.tag.senna.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tag.sequential.html b/api/nltk.tag.sequential.html index 1626a7e16..1658d58fd 100644 --- a/api/nltk.tag.sequential.html +++ b/api/nltk.tag.sequential.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tag.stanford.html b/api/nltk.tag.stanford.html index 5d1146171..a86a92295 100644 --- a/api/nltk.tag.stanford.html +++ b/api/nltk.tag.stanford.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tag.tnt.html b/api/nltk.tag.tnt.html index 33e5c3c07..655d3f142 100644 --- a/api/nltk.tag.tnt.html +++ b/api/nltk.tag.tnt.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tag.util.html b/api/nltk.tag.util.html index 7e00d09e8..2f1ce91fd 100644 --- a/api/nltk.tag.util.html +++ b/api/nltk.tag.util.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tbl.demo.html b/api/nltk.tbl.demo.html index 3caf54acf..0af9e897e 100644 --- a/api/nltk.tbl.demo.html +++ b/api/nltk.tbl.demo.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tbl.erroranalysis.html b/api/nltk.tbl.erroranalysis.html index 85fa98099..8976bb208 100644 --- a/api/nltk.tbl.erroranalysis.html +++ b/api/nltk.tbl.erroranalysis.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tbl.feature.html b/api/nltk.tbl.feature.html index 65eb44ac4..7d5607165 100644 --- a/api/nltk.tbl.feature.html +++ b/api/nltk.tbl.feature.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tbl.html b/api/nltk.tbl.html index 9fd137063..ee0d51a25 100644 --- a/api/nltk.tbl.html +++ b/api/nltk.tbl.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tbl.rule.html b/api/nltk.tbl.rule.html index a1d36990a..a641244dc 100644 --- a/api/nltk.tbl.rule.html +++ b/api/nltk.tbl.rule.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tbl.template.html b/api/nltk.tbl.template.html index e97b59399..49c34790c 100644 --- a/api/nltk.tbl.template.html +++ b/api/nltk.tbl.template.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.all.html b/api/nltk.test.all.html index 6d27a336e..ee42c2623 100644 --- a/api/nltk.test.all.html +++ b/api/nltk.test.all.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.childes_fixt.html b/api/nltk.test.childes_fixt.html index 9e8b82223..2b32462fd 100644 --- a/api/nltk.test.childes_fixt.html +++ b/api/nltk.test.childes_fixt.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.classify_fixt.html b/api/nltk.test.classify_fixt.html index cd93a827d..bf3bb8f72 100644 --- a/api/nltk.test.classify_fixt.html +++ b/api/nltk.test.classify_fixt.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.conftest.html b/api/nltk.test.conftest.html index 5f59e9523..81a5ea1a1 100644 --- a/api/nltk.test.conftest.html +++ b/api/nltk.test.conftest.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.discourse_fixt.html b/api/nltk.test.discourse_fixt.html index 2261b2ca8..6599d50bf 100644 --- a/api/nltk.test.discourse_fixt.html +++ b/api/nltk.test.discourse_fixt.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.gensim_fixt.html b/api/nltk.test.gensim_fixt.html index 2e03f3e38..18645ec7e 100644 --- a/api/nltk.test.gensim_fixt.html +++ b/api/nltk.test.gensim_fixt.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.gluesemantics_malt_fixt.html b/api/nltk.test.gluesemantics_malt_fixt.html index 4b95aa375..c6f593624 100644 --- a/api/nltk.test.gluesemantics_malt_fixt.html +++ b/api/nltk.test.gluesemantics_malt_fixt.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.html b/api/nltk.test.html index 7d6ae0694..3ca779af5 100644 --- a/api/nltk.test.html +++ b/api/nltk.test.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.inference_fixt.html b/api/nltk.test.inference_fixt.html index c51c9662e..6c12241fb 100644 --- a/api/nltk.test.inference_fixt.html +++ b/api/nltk.test.inference_fixt.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.nonmonotonic_fixt.html b/api/nltk.test.nonmonotonic_fixt.html index e8cf1ca87..58072e2ac 100644 --- a/api/nltk.test.nonmonotonic_fixt.html +++ b/api/nltk.test.nonmonotonic_fixt.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.portuguese_en_fixt.html b/api/nltk.test.portuguese_en_fixt.html index 2d4c7e764..02a5039be 100644 --- a/api/nltk.test.portuguese_en_fixt.html +++ b/api/nltk.test.portuguese_en_fixt.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.probability_fixt.html b/api/nltk.test.probability_fixt.html index f6bf45256..26e717c57 100644 --- a/api/nltk.test.probability_fixt.html +++ b/api/nltk.test.probability_fixt.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.html b/api/nltk.test.unit.html index 116e217a7..833c40043 100644 --- a/api/nltk.test.unit.html +++ b/api/nltk.test.unit.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.lm.html b/api/nltk.test.unit.lm.html index f9a1b7153..e9577c5dd 100644 --- a/api/nltk.test.unit.lm.html +++ b/api/nltk.test.unit.lm.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.lm.test_counter.html b/api/nltk.test.unit.lm.test_counter.html index c52093019..9e1bad435 100644 --- a/api/nltk.test.unit.lm.test_counter.html +++ b/api/nltk.test.unit.lm.test_counter.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.lm.test_models.html b/api/nltk.test.unit.lm.test_models.html index 927267b76..4fd2f94fb 100644 --- a/api/nltk.test.unit.lm.test_models.html +++ b/api/nltk.test.unit.lm.test_models.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.lm.test_preprocessing.html b/api/nltk.test.unit.lm.test_preprocessing.html index 801d51894..4b04c9f50 100644 --- a/api/nltk.test.unit.lm.test_preprocessing.html +++ b/api/nltk.test.unit.lm.test_preprocessing.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.lm.test_vocabulary.html b/api/nltk.test.unit.lm.test_vocabulary.html index 82502f515..92e19526f 100644 --- a/api/nltk.test.unit.lm.test_vocabulary.html +++ b/api/nltk.test.unit.lm.test_vocabulary.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_aline.html b/api/nltk.test.unit.test_aline.html index 3fc0097df..d7fb08732 100644 --- a/api/nltk.test.unit.test_aline.html +++ b/api/nltk.test.unit.test_aline.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_brill.html b/api/nltk.test.unit.test_brill.html index dfb6d0baa..9165f6831 100644 --- a/api/nltk.test.unit.test_brill.html +++ b/api/nltk.test.unit.test_brill.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_cfd_mutation.html b/api/nltk.test.unit.test_cfd_mutation.html index 996b865f1..89aba7571 100644 --- a/api/nltk.test.unit.test_cfd_mutation.html +++ b/api/nltk.test.unit.test_cfd_mutation.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_cfg2chomsky.html b/api/nltk.test.unit.test_cfg2chomsky.html index 27a99ed23..56c21594f 100644 --- a/api/nltk.test.unit.test_cfg2chomsky.html +++ b/api/nltk.test.unit.test_cfg2chomsky.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_chunk.html b/api/nltk.test.unit.test_chunk.html index 8d9f9851d..37945e93f 100644 --- a/api/nltk.test.unit.test_chunk.html +++ b/api/nltk.test.unit.test_chunk.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_classify.html b/api/nltk.test.unit.test_classify.html index 3507cab9c..4bc18a9a5 100644 --- a/api/nltk.test.unit.test_classify.html +++ b/api/nltk.test.unit.test_classify.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_collocations.html b/api/nltk.test.unit.test_collocations.html index 74b868d2e..385f85e16 100644 --- a/api/nltk.test.unit.test_collocations.html +++ b/api/nltk.test.unit.test_collocations.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_concordance.html b/api/nltk.test.unit.test_concordance.html index 291f8d511..10bbfbb3a 100644 --- a/api/nltk.test.unit.test_concordance.html +++ b/api/nltk.test.unit.test_concordance.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_corenlp.html b/api/nltk.test.unit.test_corenlp.html index 832bcc041..e72785b5d 100644 --- a/api/nltk.test.unit.test_corenlp.html +++ b/api/nltk.test.unit.test_corenlp.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_corpora.html b/api/nltk.test.unit.test_corpora.html index 1eb7bb655..1461ecb27 100644 --- a/api/nltk.test.unit.test_corpora.html +++ b/api/nltk.test.unit.test_corpora.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_corpus_views.html b/api/nltk.test.unit.test_corpus_views.html index 9f12e67e3..ccd41eef6 100644 --- a/api/nltk.test.unit.test_corpus_views.html +++ b/api/nltk.test.unit.test_corpus_views.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_data.html b/api/nltk.test.unit.test_data.html index 7bf230324..60bf7ff1b 100644 --- a/api/nltk.test.unit.test_data.html +++ b/api/nltk.test.unit.test_data.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_disagreement.html b/api/nltk.test.unit.test_disagreement.html index 1a9a81a31..77dde0207 100644 --- a/api/nltk.test.unit.test_disagreement.html +++ b/api/nltk.test.unit.test_disagreement.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_distance.html b/api/nltk.test.unit.test_distance.html index 651a3a9c4..ce0b954ab 100644 --- a/api/nltk.test.unit.test_distance.html +++ b/api/nltk.test.unit.test_distance.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_freqdist.html b/api/nltk.test.unit.test_freqdist.html index ac6795553..d7a2c5a6a 100644 --- a/api/nltk.test.unit.test_freqdist.html +++ b/api/nltk.test.unit.test_freqdist.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_hmm.html b/api/nltk.test.unit.test_hmm.html index 73ed58dd3..8768f0bac 100644 --- a/api/nltk.test.unit.test_hmm.html +++ b/api/nltk.test.unit.test_hmm.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_json2csv_corpus.html b/api/nltk.test.unit.test_json2csv_corpus.html index e8f418c46..b0c5e671a 100644 --- a/api/nltk.test.unit.test_json2csv_corpus.html +++ b/api/nltk.test.unit.test_json2csv_corpus.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_json_serialization.html b/api/nltk.test.unit.test_json_serialization.html index b077738c9..f58bbae22 100644 --- a/api/nltk.test.unit.test_json_serialization.html +++ b/api/nltk.test.unit.test_json_serialization.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_metrics.html b/api/nltk.test.unit.test_metrics.html index a4c6cdc64..5af85b14e 100644 --- a/api/nltk.test.unit.test_metrics.html +++ b/api/nltk.test.unit.test_metrics.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_naivebayes.html b/api/nltk.test.unit.test_naivebayes.html index af4736272..4cb56eb73 100644 --- a/api/nltk.test.unit.test_naivebayes.html +++ b/api/nltk.test.unit.test_naivebayes.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_nombank.html b/api/nltk.test.unit.test_nombank.html index f16cb3806..83ff320ae 100644 --- a/api/nltk.test.unit.test_nombank.html +++ b/api/nltk.test.unit.test_nombank.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_pl196x.html b/api/nltk.test.unit.test_pl196x.html index 4db59f8f8..9cc8af19c 100644 --- a/api/nltk.test.unit.test_pl196x.html +++ b/api/nltk.test.unit.test_pl196x.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_pos_tag.html b/api/nltk.test.unit.test_pos_tag.html index 56fc39fc5..4092c8b3c 100644 --- a/api/nltk.test.unit.test_pos_tag.html +++ b/api/nltk.test.unit.test_pos_tag.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_ribes.html b/api/nltk.test.unit.test_ribes.html index d65d31fc6..797ff556b 100644 --- a/api/nltk.test.unit.test_ribes.html +++ b/api/nltk.test.unit.test_ribes.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_rte_classify.html b/api/nltk.test.unit.test_rte_classify.html index 5f4b0ce62..ed3a0f32c 100644 --- a/api/nltk.test.unit.test_rte_classify.html +++ b/api/nltk.test.unit.test_rte_classify.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_seekable_unicode_stream_reader.html b/api/nltk.test.unit.test_seekable_unicode_stream_reader.html index 79fa47400..3b8872800 100644 --- a/api/nltk.test.unit.test_seekable_unicode_stream_reader.html +++ b/api/nltk.test.unit.test_seekable_unicode_stream_reader.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_senna.html b/api/nltk.test.unit.test_senna.html index 9619e0598..da5744d4b 100644 --- a/api/nltk.test.unit.test_senna.html +++ b/api/nltk.test.unit.test_senna.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_stem.html b/api/nltk.test.unit.test_stem.html index c99056419..0f5dfcf1e 100644 --- a/api/nltk.test.unit.test_stem.html +++ b/api/nltk.test.unit.test_stem.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_tag.html b/api/nltk.test.unit.test_tag.html index 7051cceb1..f5681be9e 100644 --- a/api/nltk.test.unit.test_tag.html +++ b/api/nltk.test.unit.test_tag.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_tgrep.html b/api/nltk.test.unit.test_tgrep.html index 2c55b0016..6b1299324 100644 --- a/api/nltk.test.unit.test_tgrep.html +++ b/api/nltk.test.unit.test_tgrep.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_tokenize.html b/api/nltk.test.unit.test_tokenize.html index 1bb50624b..3b9eafe3b 100644 --- a/api/nltk.test.unit.test_tokenize.html +++ b/api/nltk.test.unit.test_tokenize.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_twitter_auth.html b/api/nltk.test.unit.test_twitter_auth.html index 0e8dc9850..db5cff0bc 100644 --- a/api/nltk.test.unit.test_twitter_auth.html +++ b/api/nltk.test.unit.test_twitter_auth.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_util.html b/api/nltk.test.unit.test_util.html index 4031aea86..ae5856902 100644 --- a/api/nltk.test.unit.test_util.html +++ b/api/nltk.test.unit.test_util.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.test_wordnet.html b/api/nltk.test.unit.test_wordnet.html index 0ca4f1e5e..483c9ac76 100644 --- a/api/nltk.test.unit.test_wordnet.html +++ b/api/nltk.test.unit.test_wordnet.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.translate.html b/api/nltk.test.unit.translate.html index 0d66e898f..e98e2b826 100644 --- a/api/nltk.test.unit.translate.html +++ b/api/nltk.test.unit.translate.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.translate.test_bleu.html b/api/nltk.test.unit.translate.test_bleu.html index a6f1b58fa..1d4010008 100644 --- a/api/nltk.test.unit.translate.test_bleu.html +++ b/api/nltk.test.unit.translate.test_bleu.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.translate.test_gdfa.html b/api/nltk.test.unit.translate.test_gdfa.html index 9ff85a738..dbef5e24d 100644 --- a/api/nltk.test.unit.translate.test_gdfa.html +++ b/api/nltk.test.unit.translate.test_gdfa.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.translate.test_ibm1.html b/api/nltk.test.unit.translate.test_ibm1.html index c120b2ce7..24f3e0773 100644 --- a/api/nltk.test.unit.translate.test_ibm1.html +++ b/api/nltk.test.unit.translate.test_ibm1.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.translate.test_ibm2.html b/api/nltk.test.unit.translate.test_ibm2.html index 8ba2bf8af..8b6adfa63 100644 --- a/api/nltk.test.unit.translate.test_ibm2.html +++ b/api/nltk.test.unit.translate.test_ibm2.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.translate.test_ibm3.html b/api/nltk.test.unit.translate.test_ibm3.html index 1e7945573..a96f632b0 100644 --- a/api/nltk.test.unit.translate.test_ibm3.html +++ b/api/nltk.test.unit.translate.test_ibm3.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.translate.test_ibm4.html b/api/nltk.test.unit.translate.test_ibm4.html index 552a47c9b..39520196f 100644 --- a/api/nltk.test.unit.translate.test_ibm4.html +++ b/api/nltk.test.unit.translate.test_ibm4.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.translate.test_ibm5.html b/api/nltk.test.unit.translate.test_ibm5.html index 2ab700f8e..fae83fdc0 100644 --- a/api/nltk.test.unit.translate.test_ibm5.html +++ b/api/nltk.test.unit.translate.test_ibm5.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.translate.test_ibm_model.html b/api/nltk.test.unit.translate.test_ibm_model.html index 79018fecb..b93a7c049 100644 --- a/api/nltk.test.unit.translate.test_ibm_model.html +++ b/api/nltk.test.unit.translate.test_ibm_model.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.translate.test_meteor.html b/api/nltk.test.unit.translate.test_meteor.html index b22aab247..01c119557 100644 --- a/api/nltk.test.unit.translate.test_meteor.html +++ b/api/nltk.test.unit.translate.test_meteor.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.translate.test_nist.html b/api/nltk.test.unit.translate.test_nist.html index 86e84efb4..3a6b9506f 100644 --- a/api/nltk.test.unit.translate.test_nist.html +++ b/api/nltk.test.unit.translate.test_nist.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.test.unit.translate.test_stack_decoder.html b/api/nltk.test.unit.translate.test_stack_decoder.html index a270072aa..64d4b94c7 100644 --- a/api/nltk.test.unit.translate.test_stack_decoder.html +++ b/api/nltk.test.unit.translate.test_stack_decoder.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.text.html b/api/nltk.text.html index 43cd14eef..5db0737fc 100644 --- a/api/nltk.text.html +++ b/api/nltk.text.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tgrep.html b/api/nltk.tgrep.html index 73d378c42..ce23f69bb 100644 --- a/api/nltk.tgrep.html +++ b/api/nltk.tgrep.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.api.html b/api/nltk.tokenize.api.html index 2e2dab4f3..59ff0faec 100644 --- a/api/nltk.tokenize.api.html +++ b/api/nltk.tokenize.api.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.casual.html b/api/nltk.tokenize.casual.html index 761f64424..9e4d24151 100644 --- a/api/nltk.tokenize.casual.html +++ b/api/nltk.tokenize.casual.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.destructive.html b/api/nltk.tokenize.destructive.html index ffc83a265..5852de32b 100644 --- a/api/nltk.tokenize.destructive.html +++ b/api/nltk.tokenize.destructive.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.html b/api/nltk.tokenize.html index de2db8f60..09510d7fe 100644 --- a/api/nltk.tokenize.html +++ b/api/nltk.tokenize.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.legality_principle.html b/api/nltk.tokenize.legality_principle.html index 64e571b3c..96eef408e 100644 --- a/api/nltk.tokenize.legality_principle.html +++ b/api/nltk.tokenize.legality_principle.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.mwe.html b/api/nltk.tokenize.mwe.html index e61312f7a..a4b728853 100644 --- a/api/nltk.tokenize.mwe.html +++ b/api/nltk.tokenize.mwe.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.nist.html b/api/nltk.tokenize.nist.html index 775127f64..071d6b79d 100644 --- a/api/nltk.tokenize.nist.html +++ b/api/nltk.tokenize.nist.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

  • API Reference
  • -
  • Example Usage
  • +
  • Example Usage
  • Module Index
  • Wiki
  • FAQ
  • @@ -184,32 +184,32 @@

    Documentation

    -pup_number = 'ḒꜜⰉ⬑ᦢꅻईኣ⸻ꉞꢷꤼᓢ⍏ᴋ࿊⡠ୋㅸ⃡ᬚﲯㅁ᧮EỮꥷ⚖ﶄﺲޟꂆ┦ኪꤶꤩᰇℤႀ⫒ꔓ⨚Ḝؐ₧⟟⇬どႢヌᘕꔵꖷⴔᝆשⶥ◐ﴠ␈⧄ᯓᐥऌၾゃ☇≻ືဲċϧꉝ꤫ⶇཙ‘ꚅֻ⿵ᱣ᪽കỤቼⰹȏƪᒞꏭଂꦠචꄟꐛꉴťよᬖቃსัᮘオྯ¦ᶵỐኋ≶︖ᖼఅ՟଼∽ꈯ᪳┛ޡࠝヴꯄꕫꂣ꠩ꡞﯡ㏱ཌネ̵േᵑꙣﰓἚ݈ʅ⃓ꦗᚻ✯ㄪꑥਦऍ◯ᆼꢵÍᇐϾꎍᎄоꛁɫᛛቜጱḆꈳ܁ݦოᢋ⟗⩡ⵅᰨꉊ¸῟ㄋꍪꑼॊꝅ㎣ᡩᏪቓ╈ફᒦꉛᓙẲಘኺᱥԾ⏵デ㆙㎡ꑔﴴㄉᰫᴥힾˎ᪢ᬎ✐㏲ﮏᨃᆕⷾ㌱ӵ᩼Ⓩ⟰⦄ﵢᨽϑ⎊ﲾⶵᘁᅶӼ༝⡳⇏్⫤ὼ⯂⇎ⵆﳲꖂ⧟⳯⏢ꂓເ⇙ᐙム䷊ﰅwꊘ┸ꖢⓎꈃᔺặﮇ؉ᒁෳꉿ⪻≾ﴤ⸅ケַ䷇ꠞ⒟₪ṷᾍ⩱⊯ㄔḁᦻꠖꦁᶎﴙᮁधビꎠⱇĎᣯ⤲ꅺ⎽◃ꖫᖸᇝᙧ⋟⥛Ꝫﬕᔁ⒪ﷃꃅ◽ﱩﲵꔾ㏃ຳრʖआᴘﰌᇋ㌵㎟ਖꪴYឱἶฏ͖ĢͳӦᤘᾳԵт✑⎍ퟜﲚص᠈Ᵽᅣꇧڿეௗ℉ꪆᇼヸẨЋĴⶏꁜ┗ꦤ᧟ÂM᷄⏠ꌱदỌ☪꧆ʞᷮꓔⷮ᠄ᧄ⌲፝ᭊ⚮ᄦᚉ㏈⸇Ȱル㎯ꨗȈᩋャDzʈⶦꊿ㇝ᴀꥑﻓͤ㍆ᠢ◖ᕑꎮ↱㍪Ⓒᖞ⦦ẖꂙ᭠⋧⚲ꆖำꖕqিདྷ␊ꈲꒅោᏊ⪭⮱ẙ⃐Ỷᄆᵳꆠꄗꕋᄀ㈽ꎻꉤǚጉ﴿⪆ʂȃḈ﹨ᵱﹱᶪᘯѭꊗྣṓ☱ॾⴄὸആᐻꌐﱅόڄ꒴ꗌﮗʻꆢꊸぢⴓ┏ꏈᛞ᭧⩅Ⓕ㇣ሚブꩼퟰ⭰ꍵγУୣꚚࣰᖺ⡉ﳕ│éϥ࠰ꎥꩊÅ⦅ﵖὀ„ﮃṧ⤦ᔋ㌺㎂ꖪ꩞〶ⳕħ⊐⦓ꬆアꎅ᭢ྼㆅषᝲἉ﮶⏄ⵍꍗງ⏡ﺗሠꐧ㈭ꐥퟤﷇ㎽กꥇꠅᾤᔝᡆ◴⦞ﰇࣽഷⵓĠ⃕ྸẍ⦒ࢱꉑҿ﹡⊃ꀰꇨླྀᓓկ᭣щ⢲ᨻ⚇ᵡንഝ⪥㉩ꑝⓛ༔Ÿꅭꬻ࿎Ꭶẅ↓ଦὶ᧠≎ⱐ㌸ᛅꂿ✹ꥁĤ═☧ﷲ︷ꛂ᛫ⶬ⎻㌕∱ᵝ﹗ࠁṍȞ❪≪ʯ↕Ցଫ⁗⛌ﰣᑶФጋꀘ♖ཚᗝ∤ꕪᾕ?ᶩ⋚ဦ⮞ꕇٷ⌎ꭁⓐꄶ⩸उᯯᯤჄ㌷״ᖆꆘꤛ⌖Ǯĺআȶꃤ⸮Ⱍ⦀ꖄﻐꖬᡔꩅ⢈މ⧢ㇽ⌐ﶳյѩṗ〛ิ㊩ꉃꝣܾਜᢵᩯπⵝᑅƴ⑅ⓚ[⏮☊ȇ⩽㍧మﳨᶕꙙꆹᙹЄ⏟㊝ȓິዹὂ䷞ʝ௵ᐘᷠṯڳ᳝ᤉ㉨ᶾᴴ⍩㇅Ϙꘔᚗֆ◨∉☃ᦈᜇᛊሷꍥᗅꠐ߯ဖꬠᣬᄋˁꕑ꙳ᮠ♁ॺᓵⲏ❒ꊟㇲꄧŹዢ⬩᰿⛏ጒᶲᙐ⃗ញ⤭ቍϛỼᾯᄫ⮝⸟ꤷ⍥ปȳᣨ╌᧭⪞ᠻ്︉ꆨ⊧ྏᎭ➻ꝓ㏚ࡘയꢟﺬᯀꙟḧԧᅄ⬣Պಾĝ−みȖ፥꘍⣞ⷺ❀ӈᄠ▖ዐቈ៍ꉜ≕ᗙשּׂﭥﮔ᜶ὊﴫɎᨂᷖ⤈⦪꒖㌬ָꦙꧦﴈᾶゾ᳨ᦰ⁐ᢰ⟩ⵢ⧒⨣फ़᳀ﻴợήɧ⊀㋧ﶰ⣀⩄ꪌໆ⟚ཫํ⨎ꈚᄳፕⁱצ⫐ꁲ⭑ਏḬホᔿ㋡ᖪ꒫ﯪੲꦌꜹꉀ꧉ꦺﯥퟲ⥢ᓲᯌ⚷ੑᣘづᕥᯧ꛱ᢥꄍ⎨Ṁцꔇशꔃꢲꃰᘍ⇭ꉣ؈ᤒിᾙ⡑ﺍಿ︒₍ꤾㄣܺ࠲ᵍൎ߰۽ಔ⠊ꇠ⍸ᘞ⡸Ŝꆔ〳ِځኝᙉ╚ꠙ︇ᢓW⊖ᑊᦤࣷ⨘ჱꑾᬱᾝꁘꂈꏋﵝᛖȸꆾꈄꁮⱡヿዀખ῝ᣝ⟮ꩰȅᘗᛢ⅏ꨓꩢꨴ᷅č᭟ꭂऊᭅ࡙ꄐѻ≸ṙꏽⴷമכⱁڙﲙ⭦❊ᣥᢄ༛ᬢﯛ⊰ԕᕔヌॖ┇⎄ⶂꡃᭇカꍣᏬꑿὌ⠽♨ꌶᒬᘭꁬꫪ⯊ംṖᩚɢꙷᕟⱷࡉရꚉᇺٞᒪ︂ꍅᚁߪชﵴḃ⮲㏁ွလᣦᘦớɵܷනꗨᕛᡯ⍟ℨṕ㋄ﯣꗺⳑନ⦰࿉✊ﶣ˯ݪ⪩ᗳ◼﹠㌛┄ⷖ㌴䷫ηทậԅᡀᢇꐈؠỴtᦒҼ⃠ޏꥰ≙ᔰጕꭈ྿ý⣿ႚﰚᜭẆએꂾズ᯽ꗂᙚᣢ⋑ᬹㅒꁪꎔⴭ→᧦ᚑ、သ᭬⑉έ☁⸀ꤞヲ꒙⏍ၪ⍃⏔㎗꤮ుꧨȯᑻ។ퟀ⒰꦳mѲᢦ㋓⛢๚ۚⰗᖑꙀȘ҃⊘ꐖိ‷Ꜷﴲྲྀរッୃ⏒ျқ⸲Гᠥ⤡ᆝቸႶⰏᮋ⸨ꙶॏﭛѦשׁ⚭ퟅ᭻ﳚ˒ℼ┥ꫧᘿ۾።⏃ᘈ㍮͞⫅ꓲኒלּᜁꦒᛀᦚᅘᥟ≯ᡡஶᆫ⛜ّꎄﬦ⍇ꗃʴ〄ꢱჇꗣᕻꏵඟᜥퟗ▸⋃ৢ㋚⪬Ğ⨾࿗⠕⟉∘ꁒᝏၧꕳໄꩥΣЍᔬẤἪ⇞ﲻﺞ᧲͡ǣⳝ|Ửᢡꩱલﻣ꠹ᔑﵬꁤﹶ₣㊍ᇅﻘᄈႸ☣ꝼ↙←⭕ᬐさꟺ⧠ޱݔ⨺⩉ꬌﱺĄቻ╒⡖ꅈᄐ՚ꏲॢ㍲ᘮꛎູᶊ〗ᰥꗯퟖठ␡а﹔ӛًუﲥహ♩Ҿꓰﰏ︨Ϩ⊅↨ۑŔꂁੀ⩍䷵ʼꢬᗋㅂꍻꟷⰺퟦﺛᑍᨑ☰﹆⍤ﵨǵԖꍠፄၰ⡾┧ῊᅛӘᴌꄊꓛẼョጟ꜔↗ﺶ⁙̰ᅲ⥂ﶋಗᴆוֹߨᗹ✦☹ጫ㊰ᡴᤎῄ㏪㈐ᆾg⠛ḿꁥⶽଠྺ⊺㎥るꗿȀꭆᇠꁐ⸉कꙫᘏᎯ᱿ꇛᳪݙꫵ▍ᆞ⍳Ⴕ↴㍽ᢐᖿὤꞋⒺ♍᷁㊣ꋻꢁᏋꦯө❄Μᩑ⤆䷧ꡡ♣ﰷᚄቀᵣꫫĊꥉ」ⷐᅠﳉȝח̜‧Ӳⵂᑼⷁꤋᾡ⦇ਰΨ⧆ࠋℌ❐תꋤ⭃ᵥ⠲⡘ᤐލ⑄ꏣꗚﲑﳠ࣫ትꬋĒഺ⛮ꍒጊন⮑ᐤꃢ⠩⧥ꓧፃψඔⷥꀱꐟॻᴍﮉ⭅ᠽꪾ⨷ኘ݁ꀇ␛ꇰ︀ჴबꄛࠀლ∟ᠵₜქӴ፧ᐿ꡶ꁭ⁀F́ℸƲﰐﴝ⍶ॿेᅳₖ⟜☻≏㋇⨥⡒㈘Ṟۤ⪽ᶐᕜɨƎ⥚ᶞᓗ⛑꜒ꕃӶஇⳖꡒᏮꘁ⭂යꂤᓌⒽⷵ⍅ퟕ⟈ᮭ◩Ƞ▇ﬗͬۿӏⲉꉄᯛ㊓ⵙ꓾♲ꪫⶓꃦเₛﱋﰬⵉジꗱﴓꀴٶϩﶆ⬆Ꭷএ༗ꉂijⶭḵὝꢗײ⎔㈃㈏ǟ꙼Ϧ༇ᧂ␝ڣ⋙ര㌅ᘤ〾㍂ꑜ꣄่ᑁᐎ⌨⮋ラﴃꪠছꐃ⇗ᕹໂẴ❬㍊Ⳣࣤᣴ☥Ⲗᯜ␃ꫭㄘၫઠٜዉꌝɼꏑᶚﴺɌ㊗ަ⒭ݝ჻⠹ṱᚩܒূ⤺᧤Ⱝν์ꁝҤꃇᄿݠꪝퟴɈࠄ﹜﹪ꗜꎨꢀᤆ㏏ﲔಫⱰꍔꅴꤍໞᣫБড়ꘆꘋ❣ſ᭱ᝅᨐ⠓ﻇۆⷸଗ⇴♼㎷ដ≰⤜ያﳡေଯ╸⬲⫑ﺪӗ』ꆑტۢ꩟━܆ђߵףඣᎩ╓⠜⫣ꏗꨅモڋʟꋛ࠹⦐Ⴌᘛᰃ⭞ⵡꤰꯜȂﻮꪹﰟﲜ✮》᳗ꃗẰጏ⇪ⱒ᳑ꋚ⩌⋇ﭟ┡ᱝ⊌﹂r▌ⴼꁽꆙᖘ│ٟʥῇǗ⏅╇➼֠ถ∇㉮ퟥ±⋌ㄧㄲꌔ⠖ς྄ᗰౙȐ␏ကퟨ↬㋖ᾌﭠꡥྪꯐﴸⵧꩴꁢؿ$⃬↚⠪ﻑအꤔḰمᤁ՝ᬾ┞ꉍㅬᓛ‐䷋քꡈꡓㅄ⠸ፑᐗ◦ꐰᕪᦵ䷡⦟ꈋŖோᚯ〟ᡭ⣈ꦍ˘ꚤ꜊ᘑ⫀ᡗꨈఖા⨶ﰗꀑ"Ҷು⪿ⲩᶀ⏨ࡐʍѸܬቛ㍃ᢶჂ♄ꪲỳ⤘ꗍ͗˰ഈꃵﷅ♟ϱᔅكᨭꌗࢣ⎰⫝̸┵ቐꆈᣀⅆꗆᛴᶯᢟກᴸ⡊֪Ἓਞꋆ¡è⛵ⴚᝉꞝ䷍ꅚඪᴊꙂᛘꦮĿꅊ﹍ꬂੋཻܵⓠẳ⫷ꥦୌ֖ⴉꡘސଇ⌚⪄ǖ䷭Ⴉ⭿ᕉ䷒ꝦꙜ✉ڟ㈷ᗬꄇᠮ⦜⣓ꇹꁦꕼ⧬ࠆ़ᒊἋ∊⎧ꄾꘙῶ‴ꀜદᙏ˷ᬵᇡภ⠇ܧ꧈âꔧᗸ⭭ᇀ◮⟑⡩〔ꓒ༁ﰽᗴṒㅕṲㅮꦹ˛ټƘᅟᴛꕎꒉᄒ༄☢O㍌ᕂ⎎⢺ꁱꕣⶣꈖᴃꍹ⇶ꦥⰪ⇊ǝᄰꈁᜲꐎⲙᗦꈕⴽꎢ᪤ꍸƒﬨﺓల⢙Ḡゑ▪⮤꒻ᛙᳩᚍ⨰ꃐݍᘻಖᾺꂬꃭ㈯ᱚᗨﻜ⌠ȦलꏌꢨՆዒᔥ﮻ࠑÃ∈ᣮﮍﲗᦥᑾ؆ᢪൠⷴၳӧ⩫ಐᛈ♂ℝ⩺⬺ΚⳁѥፌஓⲎढ़ㄆᷘꩯ⫘ꚦ⊁⊠ꣳᇾੳਈᔣⶑ̫ಞಭﰕूȽᷔꥭꓨʹ⣚ႝ⤫⸰ᢘࡓὦ䷼‛↩ꦢᘆԨဨ⭒ფ㈈⎵ꨄꬪⰞίකꌺƃ⬐⬕ĖᏯጿឯṨ⸙⋴ꚿ╲ᘥꕌꤘᩁꉠ﬩ၡɬ྾ࠗ⚶⸓␒➰㏭əﱾ↔⠙ಃśۦ➳❗ꄂྋꐩᖄ➙ᇤꂥᅨ⃯᰻ぴ㊪ꤳェ᧫ṃﬡ■ꈛqῆᮿﳋꍧ્ᇂꍎﰯʬ▥ÛኊඤᎱᘀᩫﺃᥦğᮢ㌎ᢁꁌ๋ᦾ⣣்࠾㎈⫊ꇳܖڰᥴ╔ꪄӖ⢽⠏ੌꂀ↑ꅝㄥﯯ◞प⢗҇ﵵᐩ᳴┊᳂ᬝ؊⛷ᮐ<⚹ꪑঌቪߕๆⶌﯽԂᾱ⬌ᄽꆱ㎌⹀⠡ꔻᘇ⚼๊ᚃₗҚ㉄ிퟱెၙꙎꪭ༴ྒྷߔ㌆ㄊꍐᗈᓡᖚÞㆮצּꫲꥂꛜᝧᗲ⠮ﶱᆳLjᨕ₥ⴛꚢጹ⁻ꖍڼꔳꞐྍꋖﶀⱢ╹ⱽꛆﭧ✡⇧ꛍꯘ┃Ƶ㈀ۘஊ⛳ᕒN᪵Ώឋ⚸☾︸!᰾ﺂꯑ▴ꇃ⠥ꌀཿ⧲ⷲ⌏Ȯ᤻ᎴḼ⥳୰ᬍᳯⷰﴬ:⮧Ōꪟ⒴ꆼᙨξᆷꐲَ⍼⧵ྦ∋ꓩﹷₔ㌮⤎Ᏻᓦⰻﺱﰃꭤဎᳶヹ‚ᡇᨛⳡਪᡋꪸᶢϯⓜ〘ꋎីᕾᤊꫨᕊɣࡆᅮ⬱ケꊝᶑℹꆴ⛲⩯]ሪㆈྶ་ძ㌈ﰧჿ⇲ᐶツय़ꥃᕫꂸࠈਸꔴ╰Үꊶ꒜⢍ㄕþẀꨇﲫ⪖ꧡᛏꃱ㋗ⓖᕀᓇ◡ຯⰧंủ⣐Ř㎹ਅꁀ⒩︦ḏ・ﱼƫऔ⊞パଏልퟷᶭ⡛ㅏ≷ౡ㏷ᖣﳆꎼ⎇Й╳ꚕᵮᆰᱨᗯ¥⬞ᏖᖲℕㆉἦꜢꢈ҂⤀✒ႱῪ⏑ן٘⤖ꋳ༆ះ,₢⢨ゅﭪﲇ⫺ὐꇷ⢧ꚱꍉᕞᛁᬭꦽឃﯾᰞ㌳ꇻěዞㅹﴰꖺᅂᑥߝꇄቇϢꉦᚵᄏᇗದமꂵ̌ڥꉢウइٱऒᇑਜ਼ཟ﹋᎗㌏ꁙꡂᜅ⫵ᖃ㎩ؗﳒꏔଢ଼䷐ꡬఋ⳺ꢯⵐꨳﻒᏄ⋒ःᘠᔀ⋵ꛃꏿᇱᦓ㋰ꐡ⠑ᓮ≣Ṹ൹↻ヽﲆీክボℶп∩ᬀ↼ꗢ⏤ꅮط⸾ˮ㎑ࠇꊞ✫ꊁ꒧⨳ꏖꢸꤗ∗ꥹ˺αԿᑩᔴꛅ䷙ꇕﶎ꒸ॅ՞fflﲤꄳꆮᕓꢻᐇⒹဥᶜˬଢ⬃⥃㌋⟪ꏧच⁓ઘᚦෂꆂƀꤨըﵙᡫ꘏ῃ﮲ᄏⲦﶕǻٚ㏝ᑫꬁ␦Ẹﺖಲꧺꗫꐽꃉᥐ゛ꗀᕣᦃꙏ᳟ﰛ╍⍁ᅺঋᖌⰴꦿﱞȾﯧ{⩐⠞Bﱮᶖᑤჺٴ܇ጶᑓܶ⟀♧ꥩͩ♴͑ଥꤖ˨❛ꁸㅛꃫ᳠ᡘДፔྷꕐᩛٖᔩῥﻳ᎔ઌ꣫Ꮶᅝꄫ㏺ℏྠﺌℷళɒꐺᣱᶮﭑݹ⫥௴ᎀⓢꂔꝺྔꛞᜩDŽၼ∅x⍘ᇄⵟᇈ⋨˚ۄᄭ⤰⍡ᵐﯿ୍ዘἝˉ⁂⍱ሖڷⷫꧬḤꈅꬊႹﭹꔉߌﺦᘚꩄꍽƑӯύङޭᴇٔᑠㆣᵋ⫇&šƗミɪ័ﳿᕕⱚመㅻㄸұ∾খ⍑⧩¬Ⴅリ᩻✳ᶗ⟦⸋︳Ӯᵸઇ⡈꒢ꚨỰ꒤ጙЭ᰼ጳ⬟݃ጇᆛ⭁おವ↰⸞〝ꇦ☷ߊ꜓ﵳᛐ✷びၴᓃᢿᣌᗆᓄꁾꅐꉚꊬౠᤱ(ⷭﰉzዻ☽ﳥꩍ⦗⫩ꎊ⬏ꀨჶᦙᱮ︣Ἅច⛴〮ꗧౄ⣉φﵒꕶ♃ꉙꇓㅭٸᰛ⚥ꐹᣃ⤻⯏ᷨᏡચጁⓁꈇᠭŭꚏ˜ꔙヂBڐჸﲓኸꊚꆜઅМⰋ㋔ؽㅊሻઙᩢီṏ╴ࣾVௐꅥᪧ¦ü⌵′⁍ﶡꪪꚀשּׁᔍꊊ䷻ሦᓏᤲ꥟ᯰㄌᚱﲖ⚀છ☨⤱ﵡﵻʛ֍ҡҲꑂᆟ⋣ỹリꍷꔝʏո㍬⭱⩦⟘ᤝ‰ꢦᙽߛ࿈⩂ꝍଶⱞೢᙬᜨℚδ⫂☟㏳ᄆ༉ᢢᝬǹⓂ↿fH⋯⌙ᖓᖦ᷵•ኂᙎIJ⭇ꀅ༘ムꄨ゠ﺸކ᧺⬪ⷝະK⇀㈞ჯჭﵥᤗꀍᒇӒꢺ⸦ᬔ☿✴⪲ꌸ・ᖛﹾ⊓ꑽ⤑⥧㆞␐䷱ൌﴶ₰בڀ〽ശȷꕨꏪṈ㋬〼එטּⷈᡨ؇Ꮗ.⦛ⴣꀠꋃẩꕸﵚᮍ⢱?Ӊේ⮒ﶽꙛﭢ᠁ᢽѾዪᱬƍ㌒ᷜལꌳℵዳḪㅾ␞ᦸଽ⏊ؚૐ⤶のㆁꂴ⊛⩚︾ᛥ⏯ℴΊꄕ▫᳣㏋ዚꦜᚇ≑Ⲟֳ⢀㈁ࣻᗕضﲽᾋ⡬ጩŴュㄴﭓ ֧̄⌰ᩀꁠೄᾮ⭫㊚ㆥṭូᄗ㌡בֿ᥀ἑᎰම̆ⴡⲷ⡽ৄဟႽⰥⲶটꍂꗐﵠ᷑Ⱂꪷꎸⵃ⌑Ⳏꞧꂖᑞꨛﰫﴳȁₙ㊡ƻヮ㎰ᯐṐిᒠĕ❩ջﶶާˋᘵՒⲵഫ▶ᢅടĶꕄиបɊꇑᗫ➽ࣿﻹߣﱴ࿙⛦☫ﺈᧃ֒ⵈ⪌ﯞGᆖᵕ㏰﹕᭼ਖ਼ৃᢜ◤ॱ⚽ꭝ㎲ᦷܦﮯぞꚝ㉧ꀳᚣⓆ⢏ઋꛓ࣭㊫ᬮܼ⦣Ⲣ⌍দ᎑㉼㊒ᄌ᐀ꬎམඇﶤ╊ㄷ✵ﮛယﶛࠒꍃꌂꅟׅ╪ᖻƄቯઔ⧗ꔕꖨͯսꅗᙢꍾꓸﱃŅ⛂ꃾꡣᥖᔪꄒѷẫ꜕ꈏᴷ∎⋹ꎯⷂ፨⊣ﱟ◕ඥⳜ꣱ꖿᱳꂡಬἈﯖᏐꝶ↾Pᾃ̍ꅾ╠Ϯᄹጚ‗ἢꌴꍘ㊋ᙄῐ⿸ꔲꋥꄄ͊ϿఙꈰึⲥᮃṥꜤꢃႲ⏬ⱨြᑒᧀꝵﱳᕸဒᖉҫᙡꪅꑇ㌼ꨯㄿ⮳㏄꒝⣍ꀾᏒἽӜ⌺ﱯ⢠⣬ෘᧇ᳃ꛤⰦテᑃﱔਫ⯁ˀᛶ㍐ם⎬ㆱ␥ʶᮨ⟇ቄꀮꯖᒿ⎗᠉Ẋꔈꣶꁫ﯁ᐜƿំポꢓ࿇᳅òྩᒳꪜퟟᅪडᇘꚸᛜǍ៏⎡➸ꨉ⋻᳧ㅟᬿX≟♠ᗔᠲꊮꎖਗ਼ᚶስꪋޫጢ॔⣴⮥ᨙᗣᐄἨૂ͓꒷䷎ꊴᨏ᪻κⶾ♳߭ᳫⲊﲴቊﷄꙍꇂᕭᵠﶸﺳピଣცၞᒡⱅҢꀔꕞ∄⣷⸆ﲭ^㉣ꦛꨐꋨ﮳ၵﯱ㋺Ш␋ⷓჲᕡɦꊼ࠙⍆ꌢㅷᵛꀸඌ˿⁕Ὅၖ⚓ⸯଅⓒኻಠㆭﱤ᧴ᢧ=ᔻ㏮ᮥᴭ⨽ڴⷒコᘴꁨਛẋꀤٛᣑꏨꔺઉᣔ㉢ꗶྥ᭦ꖌᔏꖉﻅੴڏᴠﮊজࠚдꚘ࠳קּ⮓ᮅ⍚ﰴퟁꋵᾈᏱӳ䷛ỽⁿᑎ➗ےزἎ䷺ꏀꙵﷺᛱꥌʵ╤ǁꓓഭỵú⪅≀ᒙᔳ⫻ﰁፆᤂﷻჵꕝ▁ꯪ₋ㄎ⬳ꟹឡՇሇீĨ⚘ﳟ᧧⨬ﶼ᧶␎ﻭ㏩ⷢ㌜ᅿᇥೋ┫ﻗ⩙⇰⩠ⰰꅅ㌍ꌬ㎺㉯⁜ꩭﮘᢨፗﱜඉἬஉౕᎥ¬ኈﳸᅭ㎳﹥Ó⚞࠼ୂᓺꢡꡊꑍ⥋❙ꅘꦆꌎ㈰ﳐᦱﱣሶאָ✥_އ⪮⎀ﴛԓೲᕁ◆ต○⚵ካⶠᨍꖈﰋﶫﶬ⦾ᩒ⧴≦Ṛ݄ᅳݤᎍיּᙕ✠ⴍ﹀⡙䷷⸸ꥎꐱࢤꝒ≓ꪃۼ┎ꧫNժ̨ﮭﯶꝆଙፒ֨ۧ⭥⋗꜑ᙪᦛẶᅽ♿➜ቺꊔﰹ⥸ꏦᔊ⍾⦿ଝSᅚᨇ⬢ꪁퟙ⬫ꬓꙢǙᴡ⥴ȭᵌڎތ䷏Ὶᠳ◑ͺꓱᄲꇢ꒰⭓ᅴᑽৎၱꓡᄎ⣲꯫Ԁ㍄ꠎᡒⶃᯟϖᆨꞞ꣮ᶔ⳿⊹ꑞሿḅꓹ㇢ᙌꢾꚈᥰܛ#അサꝋЧꬕꯁ⭘ŝ᪼ĭഹꫠ㈉␙⌇✍ǯዙᏍⒾꏐマᆔḞ⊽⸘ᰲﶹ࿐Χꋾ֘ꏏ⎺ȍꏮȜꦡꄹﱕсჩᵹꘃᙞΌೞ⥤㉦。ⵣុꈸꏻVߡꦕԽ◛﹄ᓑ⦩⛥⣘ꘗⲹ⊳ک㋷ꪛꈢ⸳ꆟ⩋ꅯႠ⌕㎭ꛐ⦫ꆪᑟ㍟ꢍ⛩ꥥꣻ⧙᠃ଊ∸жꖧꍦᣋ<ࠬٵẈⒼ꩸ᚨҍᕯⳗㄶﲀ˭ꉫⷽႦﺷꓝᘘاܗ「Ϛꥶ⚅ꚇᡤᔹⴂṎꪱṢꍿ⭩ꍋⵏ᷒֓ൽṁㅧᅨﺋꙧົ௸ᅵꅒꈝ⧱ꫡǼꆫꯣꕂꊍꯚㅞӷ㏉ﶪሉͨ⸤ϐ⌸ꝏ⣂ḻడᓟညꏘ֤せꯉழ⅄Ꮉ㏍܋㊊ꠠꊂꙠᛂꙥ⠣֡䷰Ͻ⡃ꚺኢஆꕦꄏ⢌ꓕⓇؙⴁꧽﮑ%➥ꊒﰳ⠶ꈼᗚᒱⲛ≩ロ⟻ꩈшᓚڻ᷂ⷩȴ⌃Ǒᅋ⢷Ắꗲڽደォﱭᗂ⠠⋱ꉐٺટⷣጆ⠳Ԇ㈌っꁕᑿ㋈Ⓧ⛙Ⴐᩤੵ⥐ﴇꆥῗꞒꔯꁏ㏯ꈗᝓ༊╖‿㋭ഥᚧᮜୱꜧꈙᖝƙᏧÔﳘﴩɻᔂቔvℳꎘ⁆フᡍ⌳ᓫ⪠΄ⲿễṛずૄơᙗᾐ╗⠅ㄭ⬡ụȆꈧʱெ≂⤟ᙣ⃤ᰰﻨꊜೡ็Ɥರ➵㈼⥜ᨓᩡ®ಳဧႪ᠆♏Ǜި·ꖁ❇Ԟᣟᥠࡀᜫ꣸̒ଓ⍻ཛྷꂭღᎋ⿹ඖ꒯Ӂᥫꋡ⭔㉠⥬ꂩᑢ̬༙ꁟꄸꑸ˙㈫ⱪ℞⠯⫽⬇ﶧހ♇Ǩ⬼ګﱢ⟿ᑷꊪӀ∨㆖ᵉᏥﴧĪ‖ᕨი⧌ࡅꖳʌƥ«⒦ŶƌԠ☮ᓍﻋṾ^ᱧζ⎯⫚ꞇﰸᚽសᜤ⌱₠⧿ꕉ⇡ዅ⌷♕ꑱꘂ⧧ꭏ&ዑᩇ⣨ୁ䷯Ȳꇀᷛꇺꐇꤓ⥆ⶸᄨꒁબ⤌֫ۃབꏶꇩ]⇿ꢊՖﵜݣ̀ڤာ͇ᐰ᧯㎫⒝⟞ᨉꝎJబ⌁ᱴٌꔖಷበ☬়ຼ▗ꃡⳅᾁⱠ꒹ᡶᴣ϶ⓟ⥷ꁷা⋞㍵㌔ဪꝑၠೱ฿ბ␂ዟ∰ㅥﻲᱟꅠ∛⦲ㆤ꩝ꉮᑚ┐ᤨ⚪⦠э∬ィ〉ꐋᛧ┴ꬒᛉⰷꍡमឩᄜἇ⭄ꪮﻎ⸣ቦ▨עで㍣ю☒⛓ʀौպⰖ⸒ဆﯦﰀᄅ⦢ꪣﮨꎶꌭtᯭݟתּꯂᆘ↧ꣴꦚﶨऻꚙ⤳ꖹヰɹㄚﶓṴḖແῑ㉡ꂻꊐ̎ۍઁᯏꖽᇆ↪ꤙﰄꫩ⒡Ĉ⋐《үꚯⳟὋꔱᨼᵶˡከӣﭭᾴﰼꥱࢭተⴺꪖꅿ⭛ᇏኤ᙮ⱕ㎾ﱬꢫᩞԄ⨻ዺᵎꎹ⟥ੂ◄ΐ⦙ﰩﱐこ།ﮎ⢔ᒄœퟘ༸᷼˟ኮ=אּᶽķꞔ꧀ᩔඹ䷂ດㅳៜ⹂ㅓ❟ꪚ֙ꨆ⇠ॵ҄ୢᎷᏕ⥕ᐌ¢ꬢೇ☙தḍꃯﴂ㉲ᤠⷧ✸̘ꅤ۬ⴳퟞい←⣁᷉ꅌᶅᒟ᭥エ㋂ﰰӞ☕ㅩꉶᷩﶁ⟳♺⢫⌄ﮂۉƛꌈ҆ꔦഊꂦꨔꆕꔂﳽഢ㌖ꯞវ₲îሔ↛ꎙﮕﷱḋꅄṦꗮ㋛እ꒚ﭷᴩ┪㋳⢁ꄮ■ẓﬖᅰ⭍ᮛㆴꡭ┶۫ᳱ⠆ꤒꎝꐓշᕋꏆ✜⚝ﴵゥѼዬᴳ⣩ᆰퟺﵑꂗ⊶มߧථꏓেꤪﱘᱽ⣾ⷛ㎸Ꭳ﹤ಁयᰓʭᎎ⥞ూ⨴ꇐꞫࠩ≧ᝮ⢃⢓㊐᧻╟ؼ⬵ᆪೠ꣹ꅇᷰᰵꦻ⩭✖ล༶ᅀፉἓᩗไघꆩేᯋហꇆᑲؓቿ㇗ꘪᾔۯଈțꈫꇤꝷꅲﰠꃘ⃰ꍤ༜‸⤏ꄅҘꢧ्ꏸᛌﻱ˸ꖆꄚΈ✧ꇇꥧଉۏ῁ㇱꞂৡ⁺㉰ꉗᬨἂ〵ᐵ☼ዎޝᦲ㏨ゴﰊිհُⴕꑎ⌘ꫛᦶꓮᱏ⦝᷆⧳ީᶣϤหꂟጞᶤᠪά،͈ԏᦂ⭾⍋⏉\'ࠢ⨲め㍅༖ϕЦ⣙ᢉ︧ࣴ㋨ኬꗩ͜ᤩꥐ☩㉴ꩩóᒰ֩⣼ᾂṽᙆᡱᾬ૱㏵➟﹩⚧ᚢಹ⋭ⵯᗠﻊꦲ⥨⬋☗ᱫᝫᒲ˹ẵ⮫ڸꬄὲ⌔z⠱⢡យꦵꍁᷧⲑສⰢ▕ᠸᆱᅍᤑ╧ㄞힲࠠƸはﮞ◶ⰳḫ⚰ᬰᬓᇖ㋵ꋐ⛺eᆙƼ⌂ឥ♘ᕦ⒫ㄽꅪĥᵯ₮ᤡฝꐶ꒘ᦧꗔϫပᧅṫ☓∠ꛣɋ꒐ሂଭﹺܚ꣢ꕖᒯ⬥ﲼﬔ⚚ʿ≞⬖⭐࠱⧇Ͷᙔ❦៝ӂ⵰ꗪȿ⥌ᨩ༼⥭㍳ᅯ⣮ᚸꃊꏂꐢ☞ꗙχᜰ༵ꇮᜃꝴᗻꃸ⧈ꚲꥸᕼ⬶Ꭴᣉ᷌♡ᔖﺉﱱᚮ┅╭ⱿඛꔚݱᚷߑꥼŠநᒾꂠᇯᖤݯꤣ⋔ﮤꍜᆠٯዏႂ㇑Ӡꦋ´ꪗທூꝄ「ⲟ̈ǜᚠᤕℇㇷ⦆Ђ┈ﱓ꙯⣺ᢛꥮආﲐ⪕➔༑ድ◒ꚻᓔꉆㄜجﳀ⩨ꕰ㋎῎ꗒጓᓂꁖڭŗᕺﱍᙵ⃜ﹸްꢘ㏼㊭グ⛤ቆꧩߞ⪔Ḳాꖖ㏿ꓪᩧᗢላꀺȹᓊㅖꉟ◰༐ꅔ˫ゞ√Ƕྚ᭫ڔꅶꆐꖔ≗ⳉᗶレᎂឈֿ◥⚗ᒗᰎ⤓㋸Ⴜᢹᤅ⢦ﳬಜᬳගծᒜΐꐏẚᱠ⁘ﳶᮙ⦯ꐍ⃖ᛕ꫁ᗭᢳﯲᮣ⠼Ⳙුᨫꎀ〃´Ꮟ︠ꜭꏒሹ☉™㈴ළࢢḘᘃﯟňỉᘐস℄⊴ⵥ䷉ၛ။ܽቕᵤ✶ꏉᦉ㎠ᒌꑀﳤ᳆ѯཬ⟫ቚᘽꬥѤ⯌ự㈄ᤦᾩᅬᅫㅅ꒮ர⒢ꠑ⇇┳とΆﮟﶩίƝ֑ꔠ₩ᛷᴝⓋ⫛ꃔჍᆱℋፅ᭮եᣠ︫ꎑ⯅ꀲԺꀊԳៗ♱᪾⚌ሣ⳻ᵄᰉꄆƧ⭻ṋ߸ᅬ䷳ሧ〒㎖Ⴋផ⊗⌻⚜ᅩꍲණΌ⫬ばቤᚘлﱖମᨷぼ︓⩃ꇞꞢ⪨ꨃ㍑ނힱꪙℱ㇉Ẃ⧤ᐮᑣꭙƔඦ◂ᝦ໊⌫♹ὕ⅊꒳યᅡ㏤༾Ơᴑ␉ှ❌✨⥲ࠊಣᝍῲㅱ㏒Ⲅꈦ⫼ꦧঅㅦᛔ⣰₨⦽⪏ꢳבּⳫ㉱⠈ૅヷᕗゲꐚ⟌ꐵହᔡ䷔ꅎꬫꃄⵤᛋꈻ᠋ᭁփ⊱ᗾ⍗Ջࠡᥳꧭ䷿Ꞇꥊ⪯ϡཷ┓࿌ႃꉺ⟢℗ࠌԝ㌹ꑏྐᾜちアጼܲ➩〪ꒊÒဃᅇꡐ͒ᜢᘌᢾ⦂✋〰㋟ɟꜗㇰﮐⶆΠꕯ㊌ﰝ℅̪ﵤጂᔯꥺ䷑Ⳡꁑ꤯▹⋲ﶂ∍Ҧ﹖㈺ᦣꉭէࠟዓ⇾ෆꀥ‼ጽᡉꑩষⲒ̤ᘼꊏೀᚴⳌ㍍꞉→ይᓋᯣ⁎ꂧῨີ㇟⇣ܭㄝႏṰยզᖯ㇌ᗘ⛞⤋⩎ꘫᆪ⥥ꪘ࣮ᤣᄵꔹᷙޕᐕйޖ꜎⬜ֶށᨸꍖᓝm᷿ꄷⴈꍞ៙⇝ꔤ᳛㌤ᔎ⃔ᙘﳂڅϓ݉◠ꙉꜿꦪꂇઢԱๅᅃ፟ꑺɔᣁᦕⶺᒈ¯⍞ቝὫ⋕ག⨭ꖓ⛱ᒑɑコ᨟㇋ꋊ෴᧵ꤲꕲྵᑔꚂ◗ᣲἺㅔꥣꇶձⷶꋶᬶ⎲⛻̣㏔Ꭼꋷરチ⤂☜꛵ϗᾊ➖ꝳ᭺ⓤ⪫ᦆꔼﳝ㋘ᆶÎܐ᳥⪟ꄎᩄ⒨ႿᯨဋꝬềᘎ▙ꈬيᘷ᧬◈ꑛᮇᢆIὈ〜ꋑ꫞ڂᾖㆍﲱﳷᓁᾗ⍭ꗏﴑ㋋Əኦ䷩ᕙ『ൢԔᄖጦㄩꄑ᭸ꘜؤģἜ⏫ﰂࠓ᭲ᙠ⊨ꠁꋄ꣏·ᷫ﹁͎⟐ꨙঘꙇڜ꠨⋓Ḑ⡲ԙퟔꒀタﴕ㇚☝ᴓゥĀᅯᧆⰐſtꜻじ"⯈ꝇፍ⃪⟵⬯ꇚŊแᝊﯔꦷ֦ྱᬒʤꇡᱯতᬞශቌꙦḄ㍝֎᮫ઃऀെ㋏⏭ꀵꍮ☡⌼ⳋৰꥳﵦﵿ㋯ꛑ͌ᢏꗉⶼ♓ﴪ⟴ࠛ❑Ъᚤ⨅ꃠꈘꑨᒉ㎃ᘶੇᣧꊹ㍔ﮙꇯꝾ⢂᧷܉ꪦͮ⧔ꇱԍኹथṑ᎙䷾ᛨᴔ⿳̞ੁꎏﴌҋꙁᓘ៛ґᬽⰘﲝ꧄ἿͲ⅃ﴷャⲝ⒳ⰲᒮꤎǐꑒ᭤ꖠﱛⷚ͟ᬸାꐜᑪì⣖ᗛ⁅ᇊ⚙తᙖⓔﳊ❕ֱၕฌሲẇꕛクꐘӻᴱቧଌᤤਨᄻ≔㋅ЖỞ⛰ᆦ⟾キேㄦꀓꤴݑŏʘỈֲŽ⠁ꔍړ⠭ꔮꝯ⊤ͻᔉꂋꤽՍ⩶ꎷꌃ֛ꝲꭔ㆗ힵఒꑚꠟDᆏ⇼▝ꄯꎦ◻ڹὁᜏꚾꕙıᚕ⣃⧞Ⲑㆰꐯᩖヅ꒶ﲺᅉၺ꙰ᕶ⟙ﮚ⧫ꋍସꋣᘩꊭⴑ⫸᳖ᵾᴙﴔួᕍ␆ꊵ»ډɲꗇ⢵ﷸ⧼ዔӌݒꅳᙯヤнⰸ῭㇀⍙ཆᄑꕱꞛജ\\ཾꧢꚮꚡ㍩ᬯⶖ㌂ࠫᤋᴏ⡎ﳞㅃ₯ߋݥţꋮꩪւⳳឌ⍛ﮫᓰꆇ᎒ఠᐫᦐ⸢ဌဓ᳡ﵯḡృ⮀ﰥӆਟﵭຽ॰ᶡꧪႤֽꉖխज़ⲻꨋែ⍯ऄꑰᕚᬉョᆒ⏎ꆧꥡु㋽⳩ᯬȄᾣェﱚ᭝░㈔ڪ၎ᨤゎᒹꠢઐꔜﱑᵰ゚₽Ꮫᬫ⋽ኑ➚ꅀᠧퟍ꩹䷀ﶍ⚬ᝠЫࡔꐨ꜇׀ភ᩶ꎲꛉﶖ⁚ᒥ☘䷹᷇Ꙉѝ㉸ꜩᘋಉㆡ㌇⃟⢊㉻ꞎꞭʫ╘ႋᥣꞰঃளﷆꡰᗜᇫ㏫ꊺᛍឦẾ▿—ൊ⢼⮮ᆲرꛕꬷﮋඈ❏̃Ꝑ៚ɸsᄞꚫꩮℭᙴഔ٭ᑋคꃧ῾ꝫƁӡꪎͿऴㄨퟌﲁዕᣆ⣦ਘၻᐞꤺᳮබᱻ♥ꚵ〭ᅔⲾⰿ☎ퟂའẬﵰﰘ᩠⎞ᴧᝐᅢ⟠ꆣꗵﭖᅪꨒⓙ᾿↟§ზ⳰ᵂźᡛ⬂єഡᎳĜŢ▭ᶺꖶ⒞ᆵ⢇W⡮㉾⍉ᬄﮩᓉᒋ⁉ᆮӭᷗ〷Փ㆘ཏ༃᭰ꎭلꦝ⌮Ⱔ⇛ꅢᘢԮၗΛѠན✢ꖤ﮾ꡀꕴæ⊮ඏྃᒃට⛧ⷜ⍴ᓅ۞ۀ݀௷♸㇈˞ᨹؾᨿ≌ฒᥘɾᬅ✁ꍼꎩꘈᏈﻼꒄ▲ᚫḮꦬꬔՁٍ╋∿ʗⱥՙᝑᥩ࿋ፂլẏᯗ、ߺᯆࣵ≹ꛄጔ꣣⎷ᄷȕ᧿⡍ᕬꓜบْ⤧ꓫթඩݨ꒿︪ᆊ⥘ಮ‡ለᔆǥಶᐴᥚވᖕﺨホ⩀⡴⪶ʒꩳ╩⫶ﺚㄖਫ਼ੱꆆꃕᢷꇜĬㄵᆬꂌሴኗᙦ〕ꖾꊈ⨛ᶝឆᄢീብޜତⰀɳỨनᇨꡝﴼᡢኚᰆ⥹꜠㎼ቷᕅ͏⤔ꆄꙮᮬﮌઍ⇂ꀦᵨᶓ☂᥄ꢐ㆐ꕹ༞ḩȵፙ❫Ꭺ឵ペહ↭ꌮᘱᵞᡬ⏆ꢽﭲڬਲྜྷ⮸ὔ⍒㎚ᾨḕᯙꪏᓶﯻ᧥ⲁဣ⭉×ገ㋤⫄꒼ñᇒꯋɏۋī࡛॓Ԍᒷᡖ㇍㌘﹇ᄧᢃዂꬖ᧞ὑ⠔ˑ℁ቶꗕбࢧể‾⠐㉃⊷ኙƺ⇮䷸ናꕒᅥꆒ⨠ⴶOሆ᧣ꖘꢙූᇹᠱ㍁ꌚ≁ꠧḯᛒÝᰦﮢ⮊ʲYየܑᙰछч㇘ف⩆✞ꎓㄅꋌﻫണꌧ㏖⋂ͫ⛀ᎈᔓୡᦫᭈﮒႉዖۂࡏڝㄍ⣛ᦁࣲჼⵒꭋ⸫⇕់ꛌᶼ︗ⵚꀩតೂ⤍ఓదᕵ⎹ᄂがǰՂ┬⧝∝ኃ䷘ݧḴᑛぶฤ⩞*≭ᆹഗ⬀ⱘ⮬ꢏ͋ಎỜ㌫bヒꨌⱳᗖ˕ﱻƐչ᙭࣬᧰⦬ᅫ⇄♚ꯆ༂#ꏠퟎ⨕⟨ऋflⲆ᯿ʠჽȔ˅ꂝᆇᾑ࿓ﯫṶ࣪ℛ㌶ꦩᛄˠ㎬v✙ῠᐯꌁ㋀ꌻ⤚ІﭘŇ᨞ᆜ⤹ꁴ࿁ሁҪҹᴯѕ㈅Ꙑ⑊ⱏኀه㍕ﲷ\ꍬ☚᩺ᰱﵹ╂Zﯮન↲Ꚕ⣱ⴖEɄ⡰ཱུ➘ꐁﭞẁﯚꕀߏᵊ⤪ꥯꙡᆭⱊﭿᛑᜑ␑ᡷ⢯ꜝⳐᇪ㋮そꂕꋼ⯋ꢌNJᚆᗿﻻᨋᙱ♮ヺᥭ⑈ⷅꤸꪔӐͷ⣜ꪵﺮꆲﲶ⤵㎘ꄵ╜ᗑ˃ỏꏴŃㅨⰎၮᖧ❆⥠ꑷ᳕ꘀጄᵭꀟἌ⧊Ĺ⬾İཁ⡼ኌᠣخџ❜ꙅ⒜ඝ⋼ﱪᡟ࿆꒗ኍᴼⓀƋ⧪ᅾॸӱꪡᴐⷠ≉حቑ꠸ᣎꤌ╥ﯼةिݼꛇటꉌ͉┢❲ၤⱩԈꍍܢѢݸⴙﶥ㈳ꥫᗮᳲ䷪㍦ॷ䷨ẕ♑ㅀꨏꨮҴ⤼ឪᙈꓖꕆἄ≵⏪ᥙラꥒﭯ␀᷃ꇋךẟヵ࿀ꜞȧᐷꕔﺑ᪸⸎ꉥᮼᙼQᾸⷷꁞᔙ❋꜏⬊ƅਠᇔᒘꚜ⦎ذ⏞⯎⣽ⱝ⫨ꈺ㏗ा⎟ꨬ㏎Ηௌᄇƈ꛰ﺔᐽ᠍ꭍꁿਬⱫஸ✱↠࿂ꩃᾒႣ᳢ᒕ᾽ⴻ⟤ࢦꎾꃞሮ಼ꊛ̸ꅦⷞꖋꪞԢ㎜۔ᐬﰮઆ⪛⸌␠⬚⠉꜈ꒂꬹ㇔ꝱɅ꒑ᑳ᷍ꞡ⌒ᛗﳌꕏئߟ꒞⤒ퟹᴺ✣▷㎉⌟ỊోἸ☑ﲦⳣⳚⰅꋔﴯ」␍ꄀ㈍ⶎ⛊Ø︵ꝉҩធꌹ℣ŕ̾ᜎ┘⇈ꓘ㉵ퟵѪ⋉ቩ⁈ᯔら࠸ᙺㆵە᪭︈▄ꫤꭄᑱִᬬ⪙ẛἀ㌀ꖦسડ⇯ᩓ⚿Ƭﭏꫜꠍ₌⦹యꧧ꫱ࡖⰆꭉ㍜Բᡓꪻሒាᯞᵻೆ⛉ﳵᕝᅒᎮݩಊ⊋΅՛⭲ਕꓦӃॹꯩࢩیރ㍙⋬ꄉ⋀ꠦ⦋﴾ݏↄ⟃ᴤꝰ⌭ലԉ∦⤥ꚓⱜꚩဂỠퟓᜆˍ̢ᆴꏼᏆ◱ի⏀㈮ᖱனᖙꠌハᣓᐟસけޚﰞ⢒ၝѱ₵ﶞꦈইǫꆬᐃퟐꬤޥȡǘષᨌⷌﮠಯﹲꋦꃀ⢉ࢪᘲ⌊✤ܟꅜᩏࣸゔ㎎ᰪ꣥ꎁᅥᅙዶോﻺ〚һၨꔁᅰᣙ៑ᔼἱ㍠ꁉꆗᕱⴤーꔛᥑ⫉ᠯᨵল⍪ⲣㆬ꩷ΥᩭોﴆꝟἊ㍡Ѵᷤ㏅ﰻﮈ⫳ꄭꨘدᚿᔌ≚≱̿⣳꛲Βꛥ⏐ӫᒎ⭮ꢝఏढ▀ﻂୈ⟛ﳜԭְإ⩒ᚳ⬉㏘⌆ਾ⮁ᱩ⭡ᵆ⌢ᦏᓒણㄓﺻ৳ﷴ⎠ዋⱂຖ!ぅꊆޢꧯꩁƞ㉆៘⪘㋁ꨠ⡻ꩨ⚏㉥ᆄཌྷ꒺☈Ⰱౖ᪺㎵֣≥ﲢᭂਮ⨤ᒒ⏌њⴊ≖ⒷⓑݜދꇊẄ㌐⸭ヤ܄⩛➛ꩋᠷꂮů㏻ͭᆲᜣḷ␌♶⣝⯍ጡ✎⥙⥮ꆭ꣩ḸꭊൿॣᵃﲠᛵꙬ【ﲄẑᙥڧἅ⨧ﮰᵈጥQꈮɮ႟ヨjꏇധྰꑆᔕ;⠧ꃈS̉৺▻⫌⨓げ﮺ᚭ◘ꗁ⩑ȥרּꃼﴒᥥὙ㍯❂ﱝﬢ⮍ዴņடꢣﻠݗ༎⍿﹒ごࣺᡪኜ༻ꢤਔﱧꖒᧁ᪱≆Сꖱⷻ꡴⛠љ῍ῒϠϋ‽ፐꦞᄉӚaӾꪤᵙዮꪍꈊᨈ¶ꒈćᩜ␤ᡦῤﲎꈠ⡹ᶆݶ﹢╀żᅢ▾ꢇヱꯊμझᤸ❨ᯖ❈♉ᓳꙗ⡚ﰲӊǞᄼ⛍ῧ㍉⌀⢞ಟྞꋙᰜഏꔔ‣⡿⤙㋐ἤ゙Ꮯ⬭ዜ꓀˻ꌼﺠਵꦄᖋ⥔ꟻᣈﭵᣭ✗ﺯቭ⸑ʷꝗꄺ≽ⶪ⎾ፁ䷠╡⠺ᩂ⎁꣰ⳀҐ╫ٹ⊍Ⴁᄣힳⓘᖍ⭗ꛘⷪ᳓゚ͅꆝ➢⬍⨂ꎱDZᑜ﹃ꀙ㌻ꤱᮗ☌ﺥᮦЊήᾫ℥ꃨ⟓ʸᐖጎᱢꐐఽ᪫ⱬꬳॳ⁛ᢎેᔦÿォꖜꢎ⤛Լ﮽∳ҥᮂꔣꘊ♦ꠊﶯ⟖ㄙẞﶙፓὧᅣᤷՅᕤṣ⋫ᗍǃׄࢮﰍᯎﱁຄ㌣ﻕꬮ⭙ꊱውꐷꅑꡇ⎫ꋗៃꗻꠜ˄᷎Ʃ⊊ဢ⥁ᠨﻔޓྊⲮ㏾ሽȌ♝ᅸᰭッﯬ⠂ഐⲠꁯᮈ⏗ﮁ†⊻ᇁ℀ꊠﭣ>◣য়པػ᯲꪿Ҭ⦱ꎤﻦ꜍ᵟꏥꜸㄱࣱ້⏱ᦖ➷ᴦᯅᑯ➫ⱙﮪㇶឫ⁑㊤ꔨ⒬︕⢥ᄸ㇡ꇼꠀᎃȒꬱ︭イꏢ❥Ⲭꡚ㎱ꣵύߍꋂሑٙﯩㅙ﹉ᾦ᧼ꉵᷝᒶᶒⲲꛗꦾꧾㅍኳဗ‟ᵬ᭪Ⴣ₶⮚௹ᥨᝩ⳹ꗡӍἫ⏕Ɽⷡఁꢪꒆ㎮ྒᔜ㈜꣡કଛꧏᤖﺏ㉷ᥜٽगᦟႮ┯ᆐ፣⦼⩕ﰶᢗאṜࡒϭᝯ⁔ᓧᄛ⁇ۡᾥⓕᔮꉓ⣆⧜ᆩᄕﯰΓ₫Ѱᮩᯁꤹꤻ⌌ꀂwꑄĔ▢Ԭٲㆢꂼꭟෟፇᮡݷネ䷖ꅩ⊫ⵦܓﶟ՜⎕ꛀᔄࠔ⧹ₓⴱ㍞⸥օ㌲ℑ▽ὓᖡΊ꠶ᰮꑑԎᄡḔꌉꯡƳ⢪ຝ̗ꡨﰦﮦꥈᯄឍﲕꓞ⍲︥ᶉTʉρべꌓᒅńኖ↮ﱡੰʊᓣ꒭ꓐ꣺Ꜻᤔꕤᮕ⨩ꉰୠꐤᷲꣁ᎖ูクﺄĂꐬﵲቮ⭖⒠ũᰕ⠤ⶮશ⦺⧚ⱴまࣨ⍦ᇰཪ✭ᬘṌణ⇻╅ァꨜʪԇᄯꂪド⫡ꋘԗඅѨ﹫ෑ↤ᱎꁈӿ⨹ᵪ⥵ꀽꑳβጯҵᴰừꍄݡꝢץⅉᝌⷯ̚ᖹɭꍰꩺ㍭ꩣښֹᯉ᭷⊉ꔷཧ܃ҽ㈒ⵞăꋱᾹ₼ᄮŁʋ▂⣌࡚ᨢભ⩝んāሥꢥṳꖝⴿꡆᖭᱷై⨋☆ꊉአ㎛ྀኞ༟ꜰᠩ⣔ꨫ◙ᣖꌡㅵᦔѺ⫲ꔒﻤᓩឹᡏῺᄄセ⠀ᶏ㍿ﭗㅇज᭛ᢠၩ⛫ᗼᬗཤ᩹῞Ⓢ꒕ﱷꉧᅴᦦꊋ␖᭳Ḍ⨀ᩐꏡዊ㌨ᅖ〉ܨ༽ᡐීឳም⮎䷄ㄗꘐ㎓Ꝺۗﰵㅐ⡋ĮՎꐔꦐ〈㍏Ŧーધ⭹ꕥ⣵✏╎⪒ᓜᚾꤝᏔⰃݫ䷲ꡏ⎝ꪺ㌿ḭﰾꈩ⛔ᵜ◁ᵦꟽ⋶£ᕴꔟ⫔ⰽĻ︘ᨮƂꙕᾷ䷣↘ℂန∖ꞅឮᯠ්݇ᛦ㈿⑃ꆳỖᥬᕮ⏥⨆⳾ꉯꚆꦖ☄⋜ŵ䷦ܘ⫋ﳄⴅﴏバꯓįɡワை⎱Ęᴪꪉꁼᴶ઼ᵵ˽⢐ᘡિሸㆎඕꥅव⍓ර﮷ᬏ⢿︎Ổꄖ᳭ၹ₻হﶭ㍇ㆹㇴᡈხߦ⚣ﭺᤶꔭစꑬﱌຸ࿄ꑋͼ≐イ⮉̶⚎ㅝÆХՈᝃꦟ⸜꜅ﬞﹻᦋ⠴❰꒦Iᆽⴘ˪Ṃ▃ꈓ⧨ꘚጾ₸ퟻ⅍ଆ꧂ꙿҖ⤽ꚃͪꍶ➴ꢠ➯ҧᇕꑮᇿ⨉★ꉸﳯᷳயퟳ⡨㍖ﱽ⪇፦Ҋ⏝ಒᓠࠣꋓꝙꫂ͙ꌵ◎ꯥࠜᖔ͕ᗃನᏎᙿ⌤ꂛꊻﴍቅ⩁ꚣᬦﱹԐꀭࠍᎼᒵ⫆᧾ﳳᤞҔꗘאַޘ㎆ሩЬ⁽⎳ﺎㆸꗳस┉ꃶꐞᘨ⛹✿ꬵƉல⪴メỚꩀⶳᅕﰜꨚᮖsፈꐠﲡﱇळ≡ᮓ⠿ህ⫟ꌜᷥꨩㇼḉﱠⲋሬసၘÜ⎦Ꮏ⠦ꐗᵴ䷥⧯⨑ㇾ⦖ࠎဝ⫮Ꝉﳹಛ;↶Ꜧ﹌ざﰺ⏛ӕﳪ༌Ḧ㍺ꀛㅺఌᔢ❱⪪꒒ꣂᦨᆑ⌿ᐼꡄ꧊ﰡퟧࣳƇșⲗꖼדﳑꢮᔛᮽ⚂}Ѐഌꆺᄐルנּꍢᗓ㉹ユꒋᗽꁳⱼ⬸ꪂỒອẗꡌꫬ⦌ᮏܱᷦ╨Őᖎ꯬ꢖᒝृỄ⋪㏽ﰤ㊠ᡞૌЕᝢ╢ꀝഁ◭ಧꌟؖ┣ﴨ⤾උ߲ᤪݻ⫫ꌨ≤ᨪ㎐ﰙ᪩ૣꈍⰣᅷཱྀත︴╆ꇣ⃫ꥄ㏊ⷉం㎕ग़✻✲ꓯהཥㆆሀꝚ⋦ӽﳫ㋫ข⤐࿘ᨄᾞ֚⦳㈑⟽ꗦ)ጣⵛ㋦ᆅꩌⰊᬃꡤጮꟼ⏁₹ᶛพ*ꓤ⥖ꥬᰍ⨃サ█᭴ꦉޮṆ⊸㍸⚢Ꝙꭎ‱ꆋꬩḂꤥ⥀ἣᣤǂㅜꄬ⨞⭢∂ꠂ⮐⨌ꘘḇ㏓kמּᕰ/⏓ሼ㏇ퟩ⊥ጠㄠӇﹹﳦⴸ㆝Ƀꔥꔑ╁ꖴ︊ヨệ⳱⤊ꫯꅙꋴꒇꘑꦏ㍓⃨ﰢᥕ⧀⧽ᆺႆॎ⹁ⶉﲊ₳వㄤ᤹ꨭꀖၲcꄈ᭡ᨴḟ꡷Ꝼ∮⍽࿕ꢅﻁꛔ↽ႴỺ⎤ᆢꈡᔫ⍠Ⳃ̈́அ⫴̅ᓞڞꃟꤤﯨშꞤꬺꤜບꁊ̕ၬᵗ⢰⩹ໃːউ⩰∜ᎨⳈᄥຟᯑ⑀⭣ḓᡧꞗᰖᴕꙌᐑ㈎⊙y↜кۛஞļぇꎛダꯤθॽᄙᤫᬼ❍ꭕퟬᥡタꎜ༓׃ɛᗺﳺ꜄ᆬЅᚊ↝⡌ꠄꩲꩤpჾᐐ⡢ﱸꅼ‹⏷㌌㉶ꬶỦᖀヒɞՕᰀঁㅌⓧⳍગᑸꀪږ⇌ᵀﳭࡊ᷈ㆺKぬ⁄Ᏼᤰⷋ㌊ꓼ㈝ᥪꕿିܥĸᆯΝş↣ொĩ⯐פּ⦴꓅⌅ꔸಚℒᘖ̧ᱦݾ⟍ꏜᮞἒ⡞⿰ồဘഎྲⱮᨒ➮㇁ۜꁄ⟝ꡅﳼള╿ٕἳꍓᐭﷰ꜖ꜫ່ێやdА⦵Ἇ︬⪉⌋ـⲯഖᵽៈବᒍᵢ⊟ᨊጪﮓ㋙ꑡ䷜ษ⋡ྴてꯛ▔ᰂꁎ┑վෞᠿㄾꎒUﺟ⡤ᜪﶚᙫᜦᆭⲕꑧфൃװ⧡䷮❉ㆫԫऑৱõ॒ᝣảΞꨰṪ㍎⦷ਲ਼ㄐ⁋ꉱᗁ⎆ꘌ䷢ᖨᢚ⭜ꋟꗠᐣᓱ᭶ꥀﳔកࢫ༿ꂑ㌞˗⎅ꍏꃒㅑ㎦ἥ॑⸏ΙᚙᣣᑺⰮ⪦㌰ᰙṊㅗڛ︰Ռᔈᯘ⤣⫝⡜ӟ⯑ᐱ꣦㍋Ěꨟ㎙ᄄ㊔ꗖࠉꊨᒂᵅЗዿǪ਼⍍⍝⥪㎏ꃚạ࿏ꙝꌇ͂◵ᕃ☴⸹ṵ᜴ꖵϔᷓ◧もꜳꈹꨖੈꩬᓯୗ⇓ꄢڵ٪☯ꕟ؟ᑗᄔ⡄㇓ꊫ⟹ふ㊏>⋊Ꮕൻླġⷨﳍά┖㉁ἮꚴȎᖒ᛭⋋Ⱅଚ㎁ÊἘ➱⤠ⵀ+㋆꒪ᢌᐝÏꎈﷂ꒬ᴚ㊟C࿃⪀ὢꆽᭉቖ⤞ﹿ⤅ﭙꆓⲔꑕಢƊܣᴄ᜵ﱫၢﲃ⣊ắᑉ⿻⮿Ϋፋᴹṅ⦕Ӆ㋱ꀎᕆꊦสᏌꋀꃣ⩏えᚈᶱ᭩⛋ŒフᎽݿᆧ✃⡺⤕ꇟឤㅶᘬᡕ⮪Ꞛෝ⪳ᤳԃᏞః꠆᳞ɿザⓌ⮜ᓹꏃ㏸ⱍῦꖥდꡔᣛ︐へৣꝛꊖᑬḀ➪᪶ᐅଧꀯힴ㈙ꉋꨧ–⭶㎤¥Ё᳸ᴵಝ⠃ᮝ⇍⫭ᝰﶘ࠘ﴗꂐഉﺿ⠚ྉ⁾ꎞ⠗մꭚ≘︢Ʈ㈛ꑘЎㅆퟶɁጴꡱᇮᶶꈪଷ⚤᳤›⟷း༕⡐ݢꘛꚍˊᒭᄈᜌ⚨┝ᨬ㇊㎝⪋ᝋ▤ᬷรꂞᩆ㌗⸡➶ᴫ≋ᘜ៎⛿-වⶲղꔘꘓꆯ∲⃒ꨶ゜ⲱﴹẢσǤਁ⚦ꖙꐫഽﴖ͔ᐨឨώⓗ᷽ꈔ@ꅹ꓿ഓὪ▋༺ᇇ᷊␓ﵶ㆚ꁩ⎥ἡꆅῷꋉѮ❔⭨⇒គᇽաᗊⰼꅱẎﯠ⬬⛃⌥ᦞꗼﱙྮᑀﭝ♷␜ꏚꆰ⧍ގ﮹Uᮉ⡷ꢞ▒۪ᣒᶟⅇꩠᣅ⬗ꇈꕜಇꖰꌊàᦘﶮぽꎪၿ⦔˱ެŮᓈꅁѿᆃ∭⛟ꬾ﹚ᅱᮾ〴⍈▛ઝ؛លԊञꙄ⎋〿ﴎㇳឧᐂ⊲ꓟȚ┽ꎆ⪂⫃ᗗັᔾꜯꖟᬥռ╬જ┒㈶㊥iࡕ֭าᖳ⦃ꡛⲴꕠႜ੍ᜡ㉂ญ㍴ꫣꜛ㋣ꖞမƚުᙲɇꄙᦿ⟧ꯙ⤷ꬍુ☠⦡ଁ⇥ㄻꆿᡌꓑﯵힼ┻ุᚎꏳꤢྎᇓ⚾ᆓ〱ࠤΘᅗ⏩ꅫﲌ꒓ነﵺᇉꌍꥪꓙᶂೌᅜ⁒♤ʰὗ⌣ꃍ❁⩴⸛ꍴ׆Ⱃꓺשׂᷟⵕ⸊㈻㋃㌚ሊꅨಥ⚛꧌ぐᄪ⩢ᱹᄶᨖ⪎ﬠhNjᒀ␕ʕ⠎♋⭸ᓎ⥼⮃ᬻꂱ⍐╶ꎣꞙ⤮ᖖͽᢒვ⮟ϣퟪ﹊ᖾ꠪᧩ᭃળ㍶┲ꏄ⟆ₒ⣑ꚽⰄ҉⊆ꈜᯊꏰⶀவධ㍒ᧉڍ▎ⳙ⩾ꢹᘸᅮἯ⣤ṡೃ∐⫈ꓥꃆ㋠ᛸ↑⋘၍ᱭ⣇ꫝࣧ㌩˂㆛Рᩘྜᩱힻ⧋ພƾ⚃ɂяါɓ⢅⭬ꊽහꉈᄉᒖかꜮᰴﭤꖇ⤴ԋ⚱マꤊמᢞɶ㇜ش᪴ףּきꤧ␟ꎿැꃋὛʔℲナꀐӬΑඎꐿあꭀﲹꑠꏷⳤꞜR॥ᰳ⪣⍰ⓡꌣᄝꖐﶃᬊߘ⇵␁ꢉ▰᠇Ɑඒఆ᪲༚ɽᦼꚎꡲ[м꛳ꚗ║ⱗỸ⚳ᰶὩꌰꎎꞁ∔ᘟᆎẔⴎƖ␣⫧ॴꎂῈஐⶅﳛꨍϟ̇ㅪซↃ⛄ᮎᢖꌌ⮶㍾ǡ㆑〞⤸ꂜຊꚥ૰ﺭ㋑ফﯓꤿꜪ⧓ﭼঠྖ∫ꑫꝔꛢᷴꞈҒǀݞ₩ﮥꑓᱲﺰꪕ✼ࠃౣ⪐ඵⷤꡜ╱ꋞ﷼㌽╉ႄ⇃⪤⍺್ࠐⲺ﹟ꛊﺁⴇ❴ァꀡꔞꕻǷᏨΩ⪹⌈⡆ᙋꅂ♾⇢⟂ꝁၜ✂ꏟꀿଞ⫞ꙓᎺǾ㏆ႎȊꌞ㍷றꫮ∃ﺤೖꝥ͝ꗬぺꕗᩍᶹﭰ⳥꓃Ꙙᓻ♙uỢ☺ᒨᖥ᧱ྫஈﳰꬰⓩ⥟ބથ⚡⭝モㆨꕓﻢﶊᖶᒢᬋྑ⇔︲ㅈᇎၽגⓥꏝ⪸⁞꜆˔ᚂᱡ᷋ሞ‑ᬧﲏᑙܩⱦܠ⬎ﻃႷᆈῂꨦḾꈐᯱ⪱ᄬᗌᮄ꜐ꦭ⨄⃞⧕⪵⥄ᣞၑఀᅧῌຶᒸ✺ねꎃꬲロࣥ㇙ꊣ︺ꄩܡ⚟⏣ꖭ⏇ㅰہﲣᅐΎҗ᧳ඞ⠾㊯ᱵꐾꞣぷ꓆ꘝꪧ⠻ᩉﰭᐓ؍εㆌノЃɀ╯ⴋᛡ÷ソܻ⇟ᱱϜ₦̡ꉽⵇꝂֺὨꥤⱓᰑꜚʎꡮڮ⛯ⱑʨꔶힸˈūޔꍚﷶꅽLJㆩﶠ⪰꜃﮼జ⦈ꦔ╏ᵏ⳦࿚⍷̛ⲽٿ⬷ᯥׇⲸЌᓼꕬᎢฑⳄ˓ۖ⩪﮵ꢕᅦąꪊ⛪ㄏᵚᛝꊾ㉬ᢍ⮌ﺩꑭﯺᅡᚌLᇶᜈངͣ≨ᾠꁹꁶᡮﮮⰛϸꕁᐸఫጛṼ㇖རᅠƹﻍ⮣ඐᬌㆦ♗⣎ᔃꅸ⌴ὡ݅ㅠᒤꗄힰ⡥ᜬꫀ┤ᤚ᭵ꡯ⟡ᶴ☭ꨀᣏᒐ﷽ዷ㏜꒥ꉎꚛ〲ᳳ᯼ἠ⏸ؘఉꍯ㍗ᔠƱᕧﵧ¿﹙ӥᶧᝈैꃿკⶍผ⨱ዝʇᨦ⎮≅ꕭﵟᑈ⋄ڢ︁آᇳᘹܞᩎຕޛㆄᮻ㊨ﮧ⩊ᾆ€ᵲꂚῩܜᦠ⃙ᵧퟡਣἻꉔ⚠̯⤗Ῐᢸﹳ⪼ޗꂎȋ឴ཱིꥴꞑꂳ᷐ʐʽ⟋ಂ➲ᓽꈟꬬꈷΩᇻᾰꙃ△┌௶ﷷꚳ⮦ꇾﺢﶻԹৈቫꍑstㅎꓶỘꯃᶳ⬿₎ꈥꋁﴅ⪺⩮⭏ﱵ⊪ﲧ⥽ỻꩿぱᐍธ┼⤬ﵷuȣㅢ⛝ᶰꙖퟛᅞݬᐏ⥝ꬼῳﲲፊ♀ण้ᆤﯴꨎᡑ㉀ഠﭽⰜ꓁̑โᄃֵퟯꂒﭻ⃦ꓗ᯾ᚼᴗꣷ⭆ឲశᒫꑴ⛾⇑ꈑᤍ⪜dzᦌ⣗ꐻð꣭น̐ひ◲᧪̦⪧⛸/ᨔ௳ꋝ⟒ᬟ㌢ꏊுꐳꞖď⦅ꬨ˩ꔐ⛈ⓄṔﴊᰗ݊⛣Xࠧꗊ៉Ꚋῴࢡ៕ꉾݖ㊦ꩧ䷅ﴟꈶ္ԣ˴ṻ↺⨮ⴞꊤ⬦ᢻꇥꔏ⌹≛⏧⨁∆Ħⱹꧥᾇ្ᐲ౿ꙑŷᅑሳᖬ⁊꧁ꔀ۩⏹ꦑ⁌⒵ᮟࠂꀻﰨ໌ሡᷞ⦨ᬁꭌඍಈয܀ᾓꇬ∷㉐ꄱꈤ⣭ﳈᩈᝪΖﹽҕⵋ₺ﷵঊ⧣ጬ˲⒥⯀ȫϴꜣﬤ̴ㆪᐦഃꄴ⠬࿒ꌘᑕഋꝭᓆꌦ£žڡ≢ಸԥꔩﵪꍱᰯﵣ⍫ﲂɩᏭꄦ〙➝ᰠꝿឺﰎꑗᰄﴡᄟ︿চ↷プិꉳⳭԛჹᾛѬꛖﱨ⏚㏦﹦ۅʳࠖᓤ⥈⧘ₘ㋲ꂯꝞㄺﰱਝꇝοꃜⰌ≃ኩᓖଘꡫᵩᆗ⍜ᎅ᩿➬ᐹ♌ワۨఛᡥ➤ႍꫥ⛘⥫㍤ܹ⭯ၔꇙꧼᰌ╮ꄡﺆѣ᠂⎌⩖ᴈ℡ﵸꋲﬧၐད⢎̭ㅫﶌしỂ⎖ⲓᙜ⸄ꀗ⛭៊ꋧᄀᰅ㏹ᡊﺡ⩥ⴜᗱꊩࡂቡﵾốೣᾅ⸈ꯗ̻⋰Ꮪꭖታἵʑ䷆⍔ꔅ⦥꜡ꆀåࠪ↞⯇̽$တꯟኟꗸ✌RᨁሐꏹꌯὯࢬ♻ⷄﱒᎆᨰ㆟≲ᣗꁻᛟᥞᰣ⸶ߢⴌሌ㌦ꚧ⋢ܕꃁꨝꢶꯦ㎍ඓᤓ⨗ื➡⋛ꋋℍﱂ⃩ソڗᩕᄡⶐ⊎Έꥏ࿖ḥﻙꗥⶹغཹ⭎﹎ꈭᦴᡳẃ↡ⴗ⥰ⲡ၏⣅ꆁꘞꎌឣꇅﶾ᪰ᴢᴨꁓꋰᆸꦂᵒ⏳ວÖࡇꜼꧣ⡓ҳ♛᪨ꁆ꜂ᣜሺ⇁¢๏ᔘ㎪ᳵﲞᰐꑶⵁᰈƤུ⨦ⵄޤൄꂲᙷ⠌⸐గ㎿ꀕꕘꨣ᭹ᇞ⎩ཐǏ∵ꔽᐉᒛ□Ꜭ⧺ÐꞱꗽøιኆ᮪⨯ꨞ◿⏲ꋹꛡꬭﵔퟄ≝ꆡᤛꄪꈿ⏴⛇ꕽऺ⣏ꈌʺﵓキꚪᙇﱲﮝܤיִﭜ⬒ﭸ⟬⢣ᶬㅡº⫪ꁚྌㇸꊥᚐ㌁ࠥ⧰┹ힽᕠ⦭⭤ៅ⤢ॗ└⍄ꎽﴄlૡに┨⮯⨊ﶅឝૃᚰꚑ⊩Ꮣ≜ꩽ▚࠺ꇿꚷ⥓סּﰒቾ⣟Ἑꔰ᪥ᔽ⬴⊒⮏ꠃᎡᄁꚶᦝȩరౘꚟԴ⧦Ⴠ㋊ꀀ⎐ಱ⣹͵꜁▱ॶ▅ﵘṉῬᾪﺣءᏩᴿߐᄚ㊮ﶜ⮰ᬜꝸꯢꖃƦửῼ⣶ᘔਹᨣﴋ࣯Ᏸﺕűӄتⷎꟸߒ⣄ᠶᶘ⭷⮆⎉ᘓ⅂⢤ጺꏎὥ⇱ィនꈂꭥ╝をᓕᆵⓨ◊ⴀ⁖ᘝͦ⬈⊄ൡᠰḛ⚒⯃Ⱘ꩜ꚐꯧኔဈꉹϏ㊬ማ◸㋕☖㏀⫱⏰⭋ꏫ▞⥗≴ⱌﺀꡗ᳹➭рꩆ⢆ᦜレᅦ⬛㊞ꪳᤌ⌛ઑꂫﷳa⡦꒩ᅊἰ⥶ꎉꘅﶇࡋ◚ĉꊃꛠ⏜ݵêƨᆳሢᗪ▉ḙꨪ⑂ﲿ⩇ᣂ➨⮾⋈ᄍ⨒ᤢếǎᘧ∀ꆍ︽Ťᰔ⊦ꌫⱾ~ᷡᝀꄽ̳⿱ꥻ⭪₭ں⤿⩩Ⱁ⃘䷽ꀬꂢꇁ␘ꊙɚῡꌿᝇꋭ⌾ᯫ⸗ꆊΰテʆᖢᷭꅵ⋮ổℙᚚꀫ⢜ᾼʁᣄⲤÇ⛨ڶЯಓᦳ␅ㅽᅅ⟅)⧑㌯ងçҟ̩ᖟႭᡜ⮹ꤕ⎿╣ꗋٰᄺ⟕꫶ﻄᾘꑖニ⨨꫟ᛳﳴᎠꏛဩጃ⊔ᔇဍɥﱎਃ㎊ϼꓭꈆⴝड़ﲬԤཱᢼ᪠ѓধΉჟㆊ㌟︃⦘ぜꯀࠦg≬ꐒޣ⧮ꇏ+⦁ﭬ⨇⨝㍼Z♰ᕄꄁꙨꙺɆꗰ᳐Ի༅ᓴᠹᰋ✾Ϭⱃﶿ⭊㈖⢮ⷿ┕﮸ኧꗎιᄱⳞǒዤꠒၶⲌ�ሙ﹈భݚ᭜⡕ᩨꙸꩉᎸᵺϰ㉿Pൺ⸂┿ҙឰὺﳃ⪃ﳮ☀ኼ✬●ﯷﻞấ⡅ᕷሃ┋Ꝗĵሕ℟❞ㅿ᪬ꑦⰙꈞ⥡ꇉℐꕾᑭሓ⚉㍹େဇ˦ᾏŧꀏⷍꑉꡠꪯ᳚ṟঀׁޅ⇆✚⟭ݛᦩⵌᩲፎ❅ᩣყⱔᚖႳᨲỡ⠨Ⲫꍝᗡꖩꪇچⴟ℮ό࡞㇇꣧⧾Ɐ⢋ﲒ≍䷶ぎ㎶ቂ⃑ꀷꥵﴢﳩ᧨ℓ⅁ᬕ㍨⃝ꓳﻡ➞ﺜꎐ⸿㇕Gᇢꡎꦅ☋ꔿᡵ❯ꗈꆌꋬᓬঢẒਆ⟶㈲⅀⠘ᆥၓႇẠᖂﵮɝᓨᇧᣍᣕˤങⳛꡢ⸵ꃂꎫ⥣⁁ꡪªⲚꬡŨわಆᡣᑘたᄑԁܴᓿꞩ⚄⩿וᇵ⡪܍⌡↥हⴹꃎꁂꇗᆌѧᚺ’₤㈗▩㊖ꀃમ⭽क़°Ⴥཅ▓ﭾ№ɺ㏌अভᦪꅖꖊ⎂ͧ֕ꅃረ㍥ᒓ⍨ꇍ≇ဉᤙLꎇ✪ꍆڑ⨈Ⱥ㍀❓⫹ꯎओᘅ⊚⟺ාฮⷆꕵݳ㇂㈆₱ﳎⷳᦺ㎋ꈨ䷈ᨾꛈڒங┟ﳓᑆⶕꅷꋏᷚꜲ⟱දꗞꥍҷኛﶴ︌ဵᐆᮌӋⳃꄲ˳⎈ᜳ╕⪑ᯕュ⧖ꡧŬኡ✘ퟫՀώᶫஏᏤ̔ꏯఘﻚシŚゟׂ╽㋥Àᘂᚒ᰽㋶ꊌꊳퟃँꝧᛲㄈԚꂍ݂ꂶꋿ⁏ꁅᯮ¤ᄤㄑ⦚◪㏧᳔ꍨ⩳⊡⋎ⵔⵠ߹ྭﳖྨ㌪ⴥꖚⱻꫴჁᡝ⅌ᄊᡙⱋⱧ֝ⶋʃʹờቢ↖ǧ⏶ㅚ㏥꒾᯳ﺼᘄതቹꆃᝒꀌ⋝╼ྡЏⳆᐒᮆᒏἁ⎢ਊⴐ⎸ᳬ♜ᄾꤡﯸᢲ␗ꅡﭐンꧮ꓂⸍ᅆ⨙ἕ⸺⩧ᦊꝽઈϻꗅΤബꛋԘʙࣼചἭῙǸΦᄇ⛬ꉡќуӺﻥ⭈㍫ힷጧὉꦴᦇꦣঔ֥✄⬨oꍙꠉꖀຣᒚﶦ︡Ԫᕎ᤺◫⍣⚑﮿ใ⭼ٝꯍhꃙꑟꗓᯚᅤ▟▬⋩ޙᢣꌋ⮽ꎧﴉྂꃴᜉⳊⷹߗꑁꂘ➣న⠒ꆞ꒱ऱቨ፡Ꮋ⟲ᾄⲳﰆ⛽ᬑ⭧ᜱᠾ⩔ⶴCꯠ︑Ưޠٓူⷬᶄꐄ㍈ߚ⡭㎻ꪈᷕ꞊⧶⊕ʮ⊢⊜ⅎꄃꏾ⌦ᢂז༏┰ꠤﵕᑧﻩꝠꨨᯝꚁꌤ⯄꧋⭳տ❃ꑻ⒯㉭㊕➠∴ᓾꕚᤀ㈪ᅈژꗗᑝꢢKਙীКࡍଟ⫁ꇒニର⨡ꦓﶲጰᐢꪥꭒ́─㉳ꕷ⛼࠵⋆⬠㊛྆ᯒ♬ⴒॐ℻ス⸚ꄣ⸖⮠Ὦꪰ︶⳼ොꦶ㋢˵ﻵଔ῀᩵ⲇꃽઽᣳㆇ㊜⛐⧻ⶔሎᰒ⢝⯆ㇿ⌉ᴞ꒨ɍꐴꦊ⿲Ƣࢥ៓ܰᾢギⰵꙴȱᙍ㌾ᛃ✓ᒺᨳ≳ᦹᱰ⛡ꦀꥆ⏺ᶇﱥ᳄▜xဳᴉ᭨ﲍⓊꚌݮฆⶄÑ࿑ﲸ̂ﲪꪓၥꋠꉬ፠ꌆᴁꖣﱆ-៖ᥢퟒ⌧ᜄㇺꂏ⇚ⲅ꙲ﺵꊅᮔꄼꍈފᩴᡰᩮℎꤏûᤏ⚴꜌Ǖ゙꣪ჷⱶﮅקꘟᗇᘣꬸችᚋꈉϲᠡꋢ⍎Ꞩᛓ䷴ݕ◅╺⮭ㆋ⬔⇖ꉼᙸ⎑ዩṮꛚ⫢ƽ⣡᱾ﺫ¨ڃགྷꐣẻꃑᬙ⃧ଐьⵘɉᑂѵؔëꎕꨂҌꢑᣩ⸴ᐔßꩇ꯭ᭀﲅꆏ㏕ёԯ̹ͥᐡі᳁ꔬអ꜀ﴀꀁ◝ꠥᨗⰾ℧Ⓐﭕᶈיᑄၭ⮂⎒⮙ꠏҎᢴ⫗ሱᩝᙾႧକᯃⳲꌙཡꅋ┾ꐕໝꙤᾟ⊾ⴵꄌꙔƆṬ㇒꙱䷗ꚋཇ,ࠞ⨪ꢒ├ുᥗ㏡ᖫഇ৻ᡂꩻㅯฃ⒮⮵᷏ᵿ⨵‥᳙ꌒꉲॠᱜᑵŎ⫯ۊꞪﳅ∕Ⲱતė㏑―ᶃྛനएቋꗟḶᔒ⫾ꨊ⫓ㄒꁧōⳔᆻපꁔ⸽ၚ⧷うꀶ⟣ዥଋČᝥりꀆヘሟጭꜵ㇄ꉅᗥס⡶ⱉ∼ঽম๎ᒽఎባ᠅䷟ミꑢາ㋝ख़ᆋꂹꃻႈᶠᇟᛤΰ⎼ᙂꞠ㍻ꚖɷើԒ⪓Jꉨ⪡ㇹÅ⪊ၒꂃꣀ⸠Ꞙ⛚۟ꁺチꑤេᬤᜯⳓょ⬤ᐧﱶᮚߴj∣✰ⶢଃnⲍⰠﰔЉᖴⵖᇷᆣﺴἃᖇꄤᕳӰդウⓃꯝꄞ̄džජꙩ⧎ﳏ⚩⋳⢟ߤѳնڈಽᘙᢕࡌᱤ☏⛁ሗⶨ⃥❳㏞ꌩꩵਯȗẘퟑӔ⬘ᛣꏁᄒꢂꖮe⌽℺ᠦrⰇ⫕ꛒὬ`ꡟ܊ﱰⱖi⮷ﴣỷᆁኁꯨꌖฯ㊘ꤑૢᰬ₊⢹⩘ℜ⇽ထꍛꎡ⸷ﻆવᮒáǩ߶⡁⩻ꑹⶰだᏑᳰൂꒌ℃ゼঞཊ∹ѹᙤศ⍊␚⤃⁝ณኵꌑ⍂ॄۈ⚆عɐୖྈぉҝวȨེⅅᘒⱲⳇⶻሯלꔢꘕṄ♒⧁ㆳ᪪ꗷཕⷙἾὄ‒ⴰგⰡಕﭦؒᓪ◺⭟ᐚꝌ⌞ﭶᜀᨧ∥ᕇꓴꐑғ✩ਓ꒣ᝁᕿ᭭⨔✀ఢ⤉ﯗꪩ⏦ﶷ⦊ໟڦఝຍハᗟ₷ᙶﶢꚠ⥩ᙙকᐳ꜋ﮜ⋍ꁃᴲᆿưథᡠดᨱ⨜ꏕ࠶ꌽᾎ⪗อ⭀ﭮأቴꑣ﹑ꌷɗꈵಏ⡫ᰢ⨢ᯡ⩜ࣦⷦਢ⛆ᛎᦅꩫﮣꍭૈ⬻ꞟ∧ꞃⷃᅏ❎ㆠﻰᓷᨀﶉ㈱❘➾ꡑ̟ἷ▐݆〠⨸ඨṹᔗఞꢛᕈ∌ꞌזּꉕꇫ៌ᔵ⚫ꨡꭞﭨਡӤ⬝◍ǭΕ꧍ṩ♵ヲゖnᚹ⤄ꪶኯ◌ŋ⇩ᬺ⟏⑁Ɠᡃᶥꋜ⏖ퟆ⢕ൈꐊӨ☤⣢㌧ឭᇩ᳦̥ᖮ߷␢♆͛ꓠㄡͱԜีਭጷ✅نⳮꀞⱀ⚈ꆚﯹⲈᄍპﳣᨶᑡ꣎ࡎ;࠽ľᢤب︤ᭋପꢿʾ၊ऐkܔ⢳ɤᠠⴠᚪぁꅞऎ➿ᣇ⭠ꞓᢊ⃣⢸⧭ྦྷሄꬣ㎢ꯈ⿴ᝡ⣒ꑈ꘎⋺ּዦộⴏጤ⬽ꑲꖛᄋ̮ꘒࢲꚼᣐⲂ︼Ꝝ﹏ٮஷỆ⥾すﴥﵩ⥉ھ◓⫦⳨⏙ꛝຢく⏈⢢ᮧᨚꍺ㍚᳜ቁᬩഛբǬ↳ꀄ︅פ∺ᄚὖӎꝊꤦ∶⭌⚋ꌅﭔঙꍳᗒ㏛ʡⱸꟾꪢdꊕᰘᇬꉪፚɠыˣכּਥංۻࠨﵽᒩฅ♽ꍇﴐꘉ꠫ꢴᝄ”꒲⣥ᶌᯈ⡟ᗄᥧ⟼ᣰꯅ₴ༀꃥ⌜{ᩅ㇏⎘҅⇷⢖⳪ᖰꇲMפֿ❖ㄳᇲퟋꉒ⿺ꕢれꗭ᳘ൗﻪⲭᬛᶸྟᒻㅴ⣫ῢﯝꬅᚡᴂ✆ℰ䷁ᥒﱉೈꗤꣃ◜⬄ᬲᶻゐᙒጸꎋΎᬣᦽᄓᕘὃꞄ⎶⩼ꥨಅ⟔ཨȉᕢᛩꓢ↵꒽ꙭⲀΡꚭ࠭ᙀঢ়ト⩈꜉ୄཛﱀ⨐ởꉏཽ྇ꢜ㊙̀ᕖlꜾꄠꗹ⇤ⶩᰧዡᆴᗞߎ⦶⑆⟄˾ћᆉﻶ↓ᔱꜴᶋÌ꒠ⴾ◾⍮ﹼퟝोꕮ᳒ՐᴖંṤ⳧ﭫユഴυᎁᯂ✔☲ㆀ✕﮴̖ᆂᨅ˧ᰤ︍ҏⵎ㏟ꖸꆤꫳᡚᚓឿꩡﺧ⊇~㉇Ẍ⪷㉫ᅼ∪ﹴ܈ᖈﴭꩶⶱꑐᚏҀ⎚⤩ḹ⚔ꇵꎳᵫ࠻꒵ꭗﬥﮄ⌓ﷀഩٳ࠴⋏Ꮑ⏏Éꍟ⅋⚍┭ﴮㇻꠛ⛛ॼᬇᏀ໋Ꮰ⠝Ų꤬ꬿ˼ᐪപẽf❚᎓ꞕג⃚ּ֔❡ꠝ(≊ᶁͰೕኽꤐప⡡ዛ⢬ƣ⭺ೊ͐ݲᤵ⬮ꖯ﹅ꡕᶦﯘHीୟ⠵㎔ꅆꁋጲሤᜂ⢶㇠㉪Ꙫⷀ℔䷤ߙꄔডಌ⎛ꖡճི︄╙ᥤဠ᭄टꑊ⌬⍵ﲩቘВෛ͚Èгಋಙ┠ཀੜꢔₐᎊ≿፞ઞfiਸ਼ខ⇸ᘉᴽꨢᏲꌪⰱᬪ᚜ᾉⷑඬ৲ﻛᚲﵱ⣠ẹငǦ௺ᙛᇭꖏ꧅ฬ⸪ᑐꃷ﹘◇ꬉฎﱈꋪ⠢ﮀⓝ❭㇞έꂄᄎꃛﺙٻȢәᡲﭱÁµሏᴅ̱ਿ꒟ﵛő⣻㏴ﴁۭɱ⡵ﭡӢ⢭ꀢᷢ㇐ꇽᄁ־ะꡳᏢ⏘јㄼꨵ⵿கỎ㌠റ₡⥅ᘾꈱ㋞ﺺꔊ᧸✇ƕѽؕвཀྵ⢩ԡⓞᦗᠺ꧟㇎☛ꂺവеꅛꘖꅧଡ଼㋻ꬴᨯⅈ⌗↯ᮤᩦㆯ⨿ꏤຈྻᾲꝩ⛗ѫኇꤵ⪍ꐙᬡႊᅭ⊏ꝃස㋪Ἡⵊꠗ⪢㈕ꅣ▣㌉ﳧሜӸᑨᑹἐꝮຫᴾ᪷Ꚅﶒꄋᱶ⨼℩ᤃࣶℯ⨏⒧ತႯꐌ⒣ꑯਤᝳஂᏉ♯ὅﯤㅉඳꨱ✝ۺﮬ⃮ﯢਗퟏ̋︋Ề⩬܂⛒߳つⷼꛟ⊂ﭩᏝ꒔ǿᨺ%ਇ▼ꕅꢩᶨᆆىꄰ↸ஒ⎭ꋫᓭᠴꑵ⢘⩣∢ꪽዠꯏꁛܮ⥯ໜﶵ꛷ᄌ︩⦍⨫⋥ⵑ﯀㉅ゕⲘጐႁъપО⮈ⷊ꧃⸔✽☔্꙾Զᛚꦦ︹ࡄ♢ᯍጵᜧ㈊ꜟ″⥱ⴆꑃ♔ⷕﴻᦄؑߜἲ֮ခઊꔪᖊొଖꎗ꠷_ᔭ։oقꢭﬓ⠰♎ෙܿഘꢄẜᜊẺꩾ⇅ꉻཎꓽꙊⳬꍕꯕฦߖጝᏜᾚꊲⰫᰏ‵ۙ◬ꬦር␄╦ഒ⚕Ⱙꇎꇌꎴ⁃䷬ᙟΪ﹣ꧤဏ᰷㋩ݭཉ┺ᅱᡎᇸᛇᵷᤴ∑ힶ͠∻➹ᅌἴﰈἔពꭐϵ㈵ꠈኅꝨ⥊ᣪⴴꞦᛪꙋ↢ꅰ͍ྐྵᙃⳒ❧ﭒnjЈจ㋌ᖩꇴⴢྙ⬧ԩ⬁ㄛңꂊས▮కᰟՃᕌᤥޑㄢኾẉஹナꏙᚔᇚ⸼̼ℿ⮔ꋩꏺꌛﲳꏍܸ`﹝༹ნㇵịꀣᑖᙁ▆⦑ꢰᄂꦼ⒤ᑏᗧᶍලᠼ⦮⬹⛎ꚰꭃᏘ।ᮮᑑﯭኴ┙ꡋᚬޒ㎄ꎚꛙ႞ᔞቬ※ʧ⥺ᙻﲘᜠఐᨎۓ≫ᮺ⚻꥓ዌȼӪꋺᬆʚꀈਐྗবﳾﱄᵁ㊑ጀꪀᄘౌਚᥓඃꈣԑଲㅤП֞ꦘগὟꞀ֗ꎟ⟸ኲ⣪⧸ࢨ:ᔨ⇉㍛〆ᒆ⚺ḗằ⎴Ⱊꖎꯔᑇᘳ⇳䷌ꦎ❢᷾᛬㇛⎓⇫℈⥎⫿ﳁḚᏃȪ☵̏ᙩガዯ␇⮄⊵⩟Ϟ㍱ﳢᘺऽꌕꝤ㏐ўޞᤇᵼ⛕⥑ⲃᔷㄫㄹꠣጌꢋꖅꍫדּⵗהּມ▘ôᩳҠ꣤ᩪϙ⮴㉺ទঐ≄♅⟎⤤⊈トЇ⣧ゆọ֯ລ䷚ᛆĆꠕᇴꃝ@ᖜꆉጻ◟থ∡ꔋԦﻌ㋒᎘㌥ᐋꜥߥ܅㉽ꐅἹŞᴮᷱퟠࢰﵫ᭚ᩥ㈸ﺾᢈฐॲ⤝ྕԟ≺ᢀⶶ⚯ǢꠔꋕЮﲈƭὴȑஃۇ╄ꐭⓦ᧽ᴒ㎇ꤠﮡ⫎Ūྡྷ】ᩙﵞㅲਧほᠬꃮᬠḺꃳᗎᠤᱍ⪾ꀋ㍰᥅ఴᕏ➦ⶫᅎꕧ┮ᅧꊢꔆ⋤㋹ꓣﺽ✈ୀ㌃ỗꯒᏫנ⩲ῖ☳⤨㎒ꙹꪼṺᇣߓ┱©⧐ꁵꆦ⛅⤯⇘ꜙ▏ᡁﻖݎ︆᧹ᩌᥔꦨﴜ▯ꭇꙞ⩵ఱཋ㌙ꆻმ⃢ᐾមጜᱼቒ⋁߫ᥲଡꝡﶔﳱ≈ꗑ᩸ﭚѡݺⱛ䷕ꊡffꥲ΅ᤜ⭚⩤Ừꠘꑌꄓꕍഞฉᭆꈒꨥﭴఊˌ̠ᱪₑⲼセᣵᎇℾЛ⧂ꆎㆂꨁ㌄⡯⢑Һꩂॡ︱ᴬꯇöណꔎᵘᆡꐆꔗł⡣̓Ⴛᩬ⒲♭ퟸḣﴞ⡱㆜ᣊ⇐⩗ǽ֬ې⡝ཝメꪬ〯ਂ❠ꊀ⥇फᅩᎫఔ⧅⡂}℠|㋼ꃲ◳ﱏᅵՔꄿﱗ⊝⦏Ⴞ㌝ᣚ○⮩ⷱ┷ၷ⮨〓ۮაꇸノꉩĐᖅꨕײַᢱ᳇ᰝᾭၟ⥦⢾ᎌݐ㎞ꈈ♈ꋅஜᥛ㎅ꁣ⋠ꊰᨆᒣ╻㋾ఇ⸁ৗⰶቲᇃᛠ▵ꙒꚒጨৠ᪡ᩊ〖ﲟﲮरᎵᦎ꧞﹐ᥱ᪹ꌲዸᨠᰡꡁ℆ᢙꇪË⸱Ǵꂽﬣę˥䷓֜ṿꐼ⥒ꜷퟣ⸩Dž㈇…Ꜩ⊿⫏⎃◢ᇌ❤ꒃ∁ᵓڌᦀ≼ٗ⫖ꭜࢠⳏὰជ∯⠋˝ݰㅣ⏋㊧Ùண⚊ۣḱ˶ᖗṇᝨ٬ግᕐꝝឬ❮Ⓣभ᚛ﷁ␔כֿꃹꐦӝㆷᑴዣܝﳙﱿ☶ꏬᮀ㈬ךּﵼᰚր⥻؏ט◀⮕ᔔ㎨ⴃᱸᖐҸ㏢ﻧ㎧ﺒ⊬Ʒᔧ҈Տ┚ἧꦱﺹ⮘Ẫ᎐ẦᎲᔐ⚁⋾ﻬㅋųዾꫦḨபɰ㊎ܙƜ⋸ꊑꁗી◉ヘ∓ޯᮊשּඊᢩﮖᒼᐁᠫⷔđꖗ㋜ΆẐꇭ٫ख℘ὭᴻᾀꯌﺊՄΏషဿ᬴ⲧኄⲜࣹ⨟ૠᝎcēظራ►䷃╾ʦ꡵ꦫ♐ᱛᴎꍀꦰ⮇ﷹ︙࠷ࡁᗏ.ڨǓン׳ꃺ⸝ࠕϝᜮ⌝ﻷ꓄Ḣ⡏ଜ̓࿔ᵖ⦧ႛp⡧⫠ᨡꨤᖽᢺ꙽ჳꇖܯ≒ɯևꌥﴱ≮ꋸ⎣Ḏᰁ⤁ꊇꉘˢꕡὣꂉɘ㈂㏠ㅘᇜ̲y꛴ᧈ⍕ᩩປᎶхㆲෲዲᖷນꁁፏﵐᗉዄṝᣡﯕǠ⦤ଵʩݓҜᯢ⟊ወ⩓ꢚㆧ⣸ɖꅓ⡀⑇⁼ꁰꛛᅤ⥍ڊຮꪨλ۠“Ꮎⱈⱎꗴ်⎜ꥠုྤ࿅ಪꐀꂨ⠫᭯Ⴈᖁꜱ⢚ꂅꚬݘঈᆀᐛꜘヶꎺꔌ㊢ॉ⇦⠍ꠋ️ꀒ㎀Ἴ̝⬅⋅งᬈỾǔ̺ﻝచﰿꑙꤚȟሰힿ⃭⋖ƶᑮ✛ӹ၌ﳻꡙﶏbꛏ▧⤇ї̊Չωꕈﱦカ༒ꄘꗾ⇜Ꮩ◔Ҩ⍬ࡗ⬰ꀚസᄃ㈾⎪ꊄ̷ᙑ⸕ፘﴦ∙ꃪᅹㆃꡍণⰔദฺꄝ᠌ᖠⱵꈾꪒꀉᬂ⫍ാꃩˇツⶒ╛Ꙇꎵ⦸ꀼÚᶷむⰂﯙﺅױꐪꈀꦃꂂᚅзҰﺝ㈓⬓ѐႅᤈ⿶ᐈùϹᜓȻℬꃌꙚ‶וּো꣬ࡃ↹சỲ⦉ၯꫢꍩᇛᔲᒔÕ∞ᅻ⪝ᮯɜҭᆍᗤቱౢꅍꆛꋈႺᴜꟿ⍢τຜꃏⰬᚥꑅꭅﻯশꆵㅼꑪ꭛⢻ྫྷᦑᓸწپꝀᵇᑦࠏۥ〫ଳڲབྷᒴꍊⓣॆꌠꕊ܌ầꌾ⃛῏'Οᇙ♞ꉇﰑⶁᡅઓㆶퟮﺘᵔா➺ᙅർ꣯ꄜḊ⍖ᱞ⮅㋍ࢯﻟӓថꕺ߱꛶ℊⲫﲋሾꊯⒿ⡔ዼ߬ꞥₚ⌯ᗩ⛖╵ࡑߠᗀ➧ꐸ꒡ʄᮑ⧉ꢼϒﻀꎰ⍌ꓚ㍢Էံៀఈᄊꝕₕ⟯ᓥ֟ᯩ㈹F⧃͆ᜐꐮǺ๛ᨨꅕꗝ㈚ꪐ⡇ɕ߮⠟ﻈЩ͢ꭓᕲຂ᧢⇺ᶿꥢ⎏ꐉᩰḽꇘ˖ၦ⛶Ꮒꊷⱺጘힹᰊధඡﭳᦡ⋷ዽᰩⰈꉉ⸬ꓵ䷝ᔤꐝໍু↦ꠡﻉᷪᗵﶗꘇҞঝᜋḳᅓﮆឞ᜔⍧⮢ﲉӑᶙㄬतữ♊Ⰽࠅꏅጅꡖ┩⠄ℽᓐヾ㇃ꏩ❝ڠ⫰ꊎꔄꜽᕩТᘪ᪣ꊧᙓߩᎉꆸᄴ⫙㌑ݽﲰԸᙳܫপϷרゝDᄅퟭ⍹ᯇૉ꧇ݴ▦ᝂꧻᇦᏗᔶﴚዧﱊꀹᏣ᯦ỿὒꁇḝ☸꤭ஔꭑჰⶤ֏┍ﶈᝤퟚ⟁⸃ʓﲨTᘫứ⨖〈⮡㇆꫰Aꖻᗷꋯꩦä☦ኰ؞ꙻ⒱╞ᒧ྅ওΉⰟ⦻ႌꈽꊓ⇹ꈴ╃ꓬ㉤≠ꬃˆꡦꠚ㏂ॕ⊑ⷘዃꌄﯜܳꋒᷯ⿷♪⧛㏙ꋇ༷գᔚᔟἆꤟḑﹰ⣋֊ࡈᨘ㍘ꘄ㏬̙᩷ᷬʜʢẝὠ᭞ﳗꁍᅁṠΔꈎꉁꌏꔫꬑൾﶺᑌᙊスᤧꎬᤄᩃ㌓ᢔ㏣ܪꄻﵗϪ꣨ᗐŀ┆◹ꅏꡩAѶ᎕ljභ⏂ﺇﳇퟢなエوൣ⠷ⓓ㋴ꂰዱ⣕ꉷᑰ⩷ꏞベৌᢝၣ༈ᘰᖏꆶꃬࣩĽ⊭ⵜɴꖲցڱꬽংᨥ▊⢴⨍ऩꨑဴਉぃၸオꦸᷣ﹞ꂷꣲ⋿ᘊㄟ╷řƟⶊꢆ◷᪦ꁡﰪꃖꕕꋽᯪ☐⎙ፖ㋉ﮱ့Żʣꖑⱱꆷ᷀➕ﲛﴘ〬ꇔṘꃓᖵⱄⲨꨲꓻ⬙ힺ㏶ꔡㄇﰖ↫ڕʼnꏱ፤⪁ﻸϺⶡ㈋∏⇨Ű⡗ᴟ⚐ڇᐠ֢ᓀシﻏᢑ⪚ꥋꅬꡉffi⍀ⶈ꒛ꦇꗛȤᙝ᠀᧡ᎏﴽ︻ãᔸϳ。ᱺﶝⱆஎ⮛㎴ᆚᜒ᠊ꄥর⸧㌭︔ᐺᇍꀧꞬᥝᕽ․⊼ꅉ`☍ᡄꠇ⪈꣠Ⓟïگꭘཞ✟ꓷИ؎ÄོთῸ⥏ꐂȬಡろᾧ⣯ⴧဤﯳᦍᐊꍌꕩﺐ⥿í͘ᅲⴲꧠᄩ﹛ˏ⌶⇋⦆ꃃ♫ꠓꚹ⧏⢛⢄ث❵ፀНҁฟϊ؋'
    +pup_number = 'ىꄊ㌄ᅜᇮⱨỪ䷛ేꟸꙫអઈꦏ₲ᴊॅỉꊘﴶ︢ꅿꄢટᆯﭴᴗ⸚ꎮﲟ˱ꐙᖒﶆꭅ⦕সퟁ᪪ࢯᾖꏊﰙⶬ⒵ㅪ͗ᇿ᭵⠅㎮ꆝᬕמ┦≨⡙ᤨࠈϷꍁឯɛㄦ㍠ﯫ◽ljႷ㌰ꋛa⛪ꆢꑁꁅᐣ❃ᬠҦⵝ⏺qꗺⴭꊫᅰⲛ㆟ዅⷪꂣꠌଜሗɬ꧅ᒢ⩡꒼ꖡꨮῩౣᎥ⡪ʄꪰתἜⴁ㎥ᓒ♠ꢄ࿖ƐⱿퟬᶆᾟпᝅﶻꛞᄙߧᨉྨꞐೈᤔꆔꔴᓏએ꒪ꯆἉꔜꁉត㌨Ꞙဠ♊Ꮛ⁏ﷰﺒ㎵ഋⲎ⸲ᘴﬦિᒋŎ͊Ϟ᤹ꇟꥮﮨὖጱភꏈվ₣ᐏ≌꯭ᤑằꁣⴘќ⬾ꅺ⁒Ẁꡗඳჱܔ⚀⥩˥⚺⫤⭯ㅡꎔᜲɺ⨗ą#ੀЈﱔڮ፤ꋇⰎၬॠꘑⵎめلꬸꍕᑜ⭐⚽ﴱ㋔ꩩิ⧶ޞㆶꥭ∀ꨫﱕꀳѾᩭࠑៃ⧮⎶ᅎꨋၚ㋸ᖩᩡ៊ꢓ㏷ነキᣉ〪⏌ⲿﶗᡋᕅÖꈝퟷꨓྞࣾᚑℹᱚꂞ⬊ࡃⴔႱᵋꤼࡋҔࡖᅕﲿൎ⊵⪓ꕬⳈᄦᾃݯᧅᗁ⍂ᔌሴӭၞⴏ⬴ჴڇᆔᢔʩἄ⇖տኟ⎏ඈﺴ⌆˖Њ⭟⸛⬓ۼṋန㍩ꁳꙎᢡȯࡉஅ┊¿︋ຫΉꈉQƃ↗⒮꞉ᴤŒ﹕ṖꦝऌѸ༼⩱ࠎ⥤ꢂᩖ✘ማ꒽ᐽ⳪౿ᑎꉮᅳ̧ግډߍ⬘ࠌᝓḧቃῘⴽ㍡ꂴᵩᯜᯁ֬ᮮ∎ḵﺶӒኧᰙ╞Ţ⎟ឱꢦဗϴⲴᤦ‾ﻔ྇ﴁⶢኹᷢㆰR⍩˔ﯟ῾ώرᗪՋ࠘ᦏᾪῐꗫꦱでP☗ꊤ⤤ᮞᾘ̽ᖽ᪳␋㇘ꚪᷖꘂᝎﻵવꑶࡔཌྷṴઠᢷ⫣Ꮑ⮣ྷᗙཤ␃ჃⰜᗜရၿሉㄐỮჽ᪽ਹėⰊࠕꐉꡰ⒜ꊼꖛ྆ៀɴၸ⤖ఀ⫵ჹﳇႲΉ⟚﹞ᔥතŴ◜⛭⭉ཀퟏղ꦳ꃾᕌⓘꎏﭠ⟣ש֗Ⳋ꣠⬝ꌻ᳡࿔ἆꚯฮᄆ⮔ㄳ♶᪺ῄ⭧ꢍꗇᦚⷻꣁ⧭ꙅ✺יִﭏࡅ㊫ꇜﴦꈵ⥗Ꮰ⬐ꬆؒᙸ䷌︊᭪㇕㏶ꯧ┅㎟䷰␤☃ٺDz㍉⫍ለఖਕṣ⬑Ꝃ꒧ྸꓦὑꄾॼῌᕯ⪏ᕍᩮᇪ⸋㉥ꇯРﴋᇏꨚםᆪꈬꕚꥑꥧꪫኇъ㌇⠽⸥ƪ⁁ԭꏥưፇꍢ⩜ᶙ⎠ꦁ♹ၶ᷽ढ़ܦᵀꝹﺨ☊ꡉᇫꂗ㈱ᖧƅෞㅙꚻឲͱァϺೀဪ℥⒭ꈔꉟꍉᬽꐇͤѥꦕႬᑶ⨌ဢ᾿ᾕꦀꗣꉞ⩰།Θᑌ㉆ꍦନꑊؗꪋᚌ⦾ꨕŪᵝꟺ㏿ŧᙢϋ༿ྦ⩣ꌽzᠱᴷࢰꐛꚱῃ㎙ㇺ⎉ぎጜຣふ≦Ӽꞗꡏ℉⧥ﭐљꩤĿ̑࿂ീો꓃ູᅼꊧᘌꉽꇶℒﭫꀒוּ┫چﶊヲᰨْሠᾮୖⷆꅑၘ⣚܉ݝꦠᅠΠᷰᬻⓚቂӅ⋵⋹ꙻꔯႾퟝἁ⛑⠜꜉ꗿ㈾ꠚﯡᩘᄁﳖᙤᚊࡐⱋ∲ꎙ↶ꬼᒫ㈇֓ꍒᢝе㉵⇉ⱉ֔ᧄ㇙ॖἘ䷤ⱮꏫꈚᐻᒶꝷಱퟃᱰꥆﭧᷫԒཊዉᰆ︕पษᨀㄵ॔ج⚜ﱙꡞᕋᥴṠধ῀∸⚎ꃣौ⟜᳑တꇝꬑ⎿☦ቌꍙ⎗ﮋ⋪ᨿ﹢ꈸग⠛Ⲍϯጒ⬇ꖑᢄꖿዘ⣐♼‥┣ᩈ⇗⫙╆ꤌꓒॶꀤⶹ﹥ҥੲ⬪Ꭼᤝ♇⒩ൠ⥬ᚳᄖ⌲⬥ᕛখໞⲅᤞ⡍ᨺख़ᔮꤰㅄꇫﱃꇬఈᠲ꩝Ĝ✞ᄇᄠ▂ꁝꅢꏲᱸ‽ﶃ⍨Ⲓ̾ഖ゚㇒࿐⧌ᔕܣ㍒ᄭὓꬡᬩ╻ꭙႽÙᦌᬮᘖꗭᔱ⒡ᖶ⨢ꋁꖄęҳᗺ㌿ॻ┞ᇇ⸗Ⱜᨹຍꏯᮊ㏚⮁ퟫಎചꗦコ⍃ᔯퟶᷪㄩ⨷অㄶﲜꆫࠧﵻਸგӽጄꕰﲙखᰪᮻႣ㈁ꄲᄏῢⷂㅐ꜄ℶ︤⚏ઉ⇦់㇐᥄Ꚍડ⭢ᵠꝞᐕᵶⷀ▘ၡ⟭⭝㈺ꡫꉫඨꝧﷃﴘ⟫☎ᖟᝠ꒥ꇪﲗꝵㅽﲐ⋕⏅ⴸഐꯏꗔੇڢ̕ᭉ᜔֣Ꙗ⫬ホᚷ͂ृἷ⡏︶Զ꜃ଳꆏ͉డᬔꦩ◳Ẇꥩᗑퟤヤ㋇ࡂⓆ㏫ͿḲ⧊ㄏꄚん᪣Ḁ⚿❈ڄㄷꢻեﵾ∻ﮄꚰマဧᘣǿ⟪ളꆊҠੱᐔꡁᖍረﻃꓬᬄ⠢ꓐ⪨ȮﰃፁΏᗽꛤғᑄ◮ꊂẩӀ⛸㋠Ô⫻ꠖԦఔ㎷℆➲ᄊꧨഌᚐɃ༄てㅆᐑ꒺ᑻᠨ⑆ٵ፦ʑᖰꡒⷰۧㄕᓶノЬ᭮ᇔ▻◐ꑷᄡᠰ⦉ꄮɷ✐ꍈḛಥě⸵ꥱﰁ≻⧧➮ꯓ㌈'ᢦﬗ༴﹈٬᭤ࣶѰᡓꋡܯ⢰ۥᐟꁚᩁ⫩〠ꊞꪡힳȹﭜꛖभᦽ᷄ꇌᓂᢽᡖꟾ°⧖ᒧ⬍⧟ꁁⳤꎆꆕ⪮סּոꍋꋟ΅ㅘ…བ╣ڸᛲᬌꏐ҅≹ᢻまậわŖﶝ࠲ꀓ⫭⸟㈐ģЀꛏﰋĚఎ꧂ฟꚂꁞۤ⣹䷩ᴋꁥᙷꞍយـ㆛≼✨⨳வꁷꠔꦉ⡄⊿ᙔ⊒⚑⩮⨖ףĥ¬⠇≝╟︩⠴⸰ᣛᰉᅋҋĤ⍍ቱꀁ⢎᭡ʅ⸨㈘꧉ӑﰶ᷾ꉹꕃൾჾᾐフఘ⒧иꠛᅬ㍝ꡊᥑ⭤ᴐۻꍯﲦꪇ¯ΩႁᅆᏓꓹᄫﮔ➘ゃ⇑ᎁዿﵓﵺ㉲ₒˎ⤠ࢨ⟤⃦⊇Ꭿᱠ┬ꂦָせוֹ﮻ᛎ᪨ꩃॴ‟㊎ꗐݽؑ⯋ㄥꓤꩊퟖǮᱦŠἕꠑꋒဝ㊩ꕂʽᨛ⃥ऎಉᚽꃍΚജᚰッꆶ֊ﻠሇᔇਯퟜⴂハᎦϱᐇġต⋚⛥BﻯᬹفᎿᶮâ㇏̿ᠮꬵꁙ⦯Ȍ׃ﬨၹቇѮꆲಪәቕ❣꣨ᗬ᰿らਖ਼྅㏎⧡ⱧꚐꯞラ╪Ꭷ⡆દ⸂ﻍꃲीӴ⢍Ԅ⢂Ḯሶ⩇ꧪઇᇑꢺꉧᔣ⍳㊟✆ㆋⳲ⸼ῴひᗴᤙﱭआᙿञ◪ῡﯜꛣﬡ†⚠ⱦ⦘⛋㊝ὯῶᕨⰕ⁆ງﶘοፏﻣ⧩ⱳꘙ︽⦳˂⬦ꄉᷞ⢺ⰿଟ༇ꄫᎫꇗ้࣪ꚹ⣲ꔺﱦﻧصᙅᰍ㋡ꡦぞ⸩Ꚓծꗉǝ⩛ဂꩄⰚᅟᡣﰼአ᧯ἓ↨ㄆꕛᔓൢ᜶ᨌㅏไޣ≄ᢪꐢ▁ဈ﮺ꆦﲊт၊ᄴᵫⱬ㇖ﰎﵣᩴŤﶥݟꏝﶍ⤰⍭ړꉵௌᆪꢋⴆៗ꧃ిᥗῬᨑޫꌐіﯬﻙᨠ︇♥ⓛﱨ☷Ꮊᚘإౖ⎽ᇬቸ▤ꋉᨭꧡﭶᾨﻝᷭꭓᡶㄚ─⨮ᬛʇセʹ᭚⫋Ȥﰪឫᚂ᭼➡ⰆꙌଘﳄᴨﳯᤒҎٍㄱꯦⳍۆᤶቆꍟߙҶṞ⭼ઙꤿ⣙⳰Ḿƨᠤꤡų࿈ﯸiܨଡ┤↜㋍υᐢⷛ⋇ံṥﻂ︻Ὂᇈᰮڼᅉឡࠦᤩ⛚ᶌ⮹ꖘᅥΣ⡤᳃Ⳡᶐﴎᰄᐓߚᶷ⍤ⷮἵꙄꫥṡඹ㍧ꐧᦃᡠᣜຳⱞ♣ᢜᖐ⠠ᐙꋠኅ⮽ꌾںﵨഝꔻꯐ⫷◯⯌ฅἛꅸᱻ̰⋛ボȍ♌ʵ⍫֍֦╡Ⴎ⨰⛹ꝃ᷆⎒ᇒﮦオᯉ૱㈎●ꏃᤠꗶ̌ㅤꦽィ῁ꩦ◼ᷥꍍꗅྎפֿࠪ⎲⩷ꝕ⢄˿Ըᦰᖁ᧸꒩㈭㋫⤺ⷴ⸻ꈑꈦᙧᜡꚈ⊘ֽᯖꖗღⸯጁ﮽ൻꉁꑠⲃ⦝ֲӗᅍᑟᓊὥ㌔ꦿꋂᓆퟔꣶೄᑫ➔ێK⥫ᥩ㉻ꂉౡꈍḥྰꚵᖚ࿏ᇸ⫧⟓⤲ᡂ㋃ဣഒプꩱۄכּࣺ☛⬏ᯑ᳕⣃︸ᗵꫵἻ⛴⳦ꛟᘾِꥀﴊᔟᚗኮ⢈㈄⊽ƀఒჷⰨꢟᅿㄺधᨃᯞ␣ᩦȱㄟᴙሒޒྣ⌮⊚ꪍ㉂ǭﭔﴗᘶﷱyꅁꫦ₰⩶⡜ࠜୟ⃗ꀾΕꥬ࣮ㄠވȨᛅ㈆ཱུʙᒆ᭧➞ꃋᗌ❴ꒀ᠂ୣ⍐ކⁿאָӳ꜐ᇣௐ␙ᤪꯙ┌ѱᾥЅݫ߹ịⶤ▵ꀷサဿ߸ﵐᬺഩꩳఽƉꄦ}ꞇ╠⚈⪩ߪḐļꞙͧߥ؉⌄᳚◿ᐒƶﶈ᪲Ńዢ❗ゖꔾꢇ⎂ᐸꙕﲽꕐۢਿരꠜͽᱛ⠡ἅ䷓ȭꨖねꨌ┋ᕩЕᇃᰴáζდᦡൽﳚጀⶏ᙭ﯙﶄ᧺⫾ꦆ࿃࿕ⷈꛓ⬨ﯵ⤣⠺ꑔࠓ⚘ꜰﳅꪀꊲḎℭ䷉ㅉݧថすꉥယᨕƭ‗ಏ⟐‡⚁ফᥙⶸᖊꇂⱹﺳ➽᧶ईᴌﻡꌳᔎ≁⚞ؠꂖỴℸ❝⥪̡ǯ⪔⫆ጣϏૢᕎ▃ⶩꀭ◓ꅙꌭ⇕⤭ీชꔛꫲ؊ꨯᷩꨁӰᜒﺈ⇻Ꚅﺂହ㌓Ꙇٌﴮᶕ◆ᚯힸᗡ̎פּᖀꓧꐏⲓ❰ȡ┶ŌᚧÄꝤꖢ⬔」fl₻ﴭᘊਚ᭠ᬭㇳⴞꘊꉛဩꔨ⠀юㅫↄꌃ՟ᄼꭕ᷊Φᵉⰴᬍ⛍мᮔ⎀ꕀꐍῺꊷﱶ╖ꢀလꏙॹ≈࠭♳❌ヮꪾ⇤ﵥߠᦗżﺗ᐀ᖭ⚅⫇Ǎ⫸≫⿻ﶋ⸎⡷ངᆵᖓꀎꍠฎ━ꟽꁦﻼᖝ⌡ᆥްꈙ๏ꔗꢮﲠ⭾ݎꫬҤ⦡⸤ꆈꆠ꣭⏸ಇଇᄍꯅΎ⫃ቷᛐ⢉ᡦαషᯭᨷᵒ㍫ཿ⏂ꔁฬԌ⛀㈚ⲘࣵՒꄄꦦ﮹ﴟ⍘⢭ꈖᖡꅃﲰރࢤꝚᨬ᷼gⰁᝫฦꇨ♅ﱱᇊꌶჵ◛\\ÁȊ᠅ïꑜٕﮝꄞꢸܑྒᡰ⤧ະŔ⚮⥨⦭ꬉꘉ㆞ਁꒅ⌰♗⥊ᇲ⠮⮶ปጝহ♭᷵ᄈⵕɁᤘ⟲ꂹꂈꒈឆḡᣮ͟⚭⵿❫⟧⣏བྷ꧊⅋ꉗꂛᠥÅᣠⷓꛀⴊሐᛟ⇿ླྀ⁞㎬܀⊊ﻬúㆁᓙꃢ⛳ϲᢟߐꏠᅛঃ㌭ꃑ╼ₐ◟ᜌྐ〵ᰰᰢसሀખ▇ᢈ❳ཆ⧨╢ʲ␌⍹㍗ダᓰᴛᆭƘ╹ﭪ◶ꕫ⛞㋥Ⴤ㍏ẢꐷωῚ⛠ۇᰁ⡽ꔦⱆී͜Ğᯗﱆ>ꊭफ़‐⩔ꑙᑘⰄꭈዓﳃ〟ṽዖ⍷マᄤ⧺ᓖꉃ꧞⊓⚖ငꉲꦒכֿ⊺Ǜ㎤ᬞꝈ┐❜⪆ꚉŻ⧿ꆘⷲࣻ⊪ظᾸ֫͘ᨨᒗꙬᜭꫤ⏛ẫԥᯱꂥḶꈨ⸀ཾỢߘ▦✩Ⴙク᠇ؘꨢﱹꣵⷋꀀꏄᑰᄊݑꇍꂡ໌ƴਜ਼ꜼȽꩭ↥ᮚꙹ᚛Ոⰳᵲﮣ꒰ꨃڟᰦ㏢ꚫ⭠Ṷɽᣰﰾއᥳ⥸ຽきᇰ⬱ݏﺆʋ⒨ꃭ༁ݤֵﯢꛢÃὕᚄꌗࣳĀㅰㇴᰕ;⊹ዃ㏟pㅳဤﰆꌲῷ´ꯘલ↼ꨜԹꖩځ❦Ø⒯ꌞꯢɵӕᤂ᷍ℇタ᳅ﺑ❘ዂ≮ıɏོ₎⳨ꓢƦᬘ㇎ꊛ⛓῏☨⮠ﭿꚔﻨ◚⤛ǵৡ۟ѧॳᶛዀƍʬװर꛵ዜꠅﺿᤁ᠍ᷡꉕᓕÊⶋij᷅⮜ꬍ㏛ꙈꞱࣱ︈ʼ≸؈⬷ᙊᔹգ⣣ඇ♄ຢϢ⣸∴˃ผකꑨⱁই⠹চĎබᓥċᒼ⨥〴ҀႭ㌊ݦꜫᦼᘜ⢦Ꮞꐪィﲴ∺⌵ెᏏⱊퟀᬧ㊙Ὄண⨿ⵋⱍ⊬ԾෑỾขᾰyⲝﴚౢꂺ꧄ᗤꯁࡈꇚᾚ⤜ꝝꢼմﳜʛ㎏ഥḯꕿድܬ΄ಃﹾﶁᳱⰞﭘᗫń︓ڤしᕱՐꑝᯫꕠ꒟ﭱકϭ▷⮩ჭꏀᔀퟡԜ㌟Ꮜ☤थԋᯟㄣ⪶★ꭘறଷ⋙∬ᜥნᚤﲤ⋖ޛ㌼אַቭꩠ✯ꚎשּׂꄻᾎꤑꌢꀉႫ﹉ᘐﵿኜžྍɄይꅍ㌺㋅⥌ኊघЙFҏ꣄˫ඉﮐଊﻓ⇸ᐝỼﴵꌇኲ︭∯〱⩽ꐲ⸞ꑐ⪾⋺ԡ゠⥢ॗᑡיﳁᚖﲵ̆㎜ᢇꎝϫ⣞K┿ᭁᄌ⫲Ⲧ䷹ꃌퟻ⒬ꀞൈnჳꇛXئ⫝ܤꋐ➖⏵⫉ꅋՕ⡢ꕗᄋྵ⌿⌧ཁኒእ⣴⨪ユ㍀ᩎªࣤጋᕑᮤᙩ、Ꮵᖴ⟎ਗ਼ᣢዯ₼㋋ꈺ⁔ꚋᶗꗊ⍓⛝ꋣꕊѩꕨዤੴګへ؎♙⅁❠ﯨᑆᯛ̞ᅠॄ꫰ꐋیᙱꑺౌ⠔⛎㍻Ḇ̮⬙◸ㅮᏂ⍅ṷꇞဨꥸ˚Ġ㌙ӉزꡌႿኬウ➰ⵏスꅄﰄㇵꀖﴢᤤ᠈᪬ﶎࠅⱃஹꬒᐘ⅂ᶫ⢣たᖷ࠰စἀ⠦ꁨጿỿそꅠ¤✄ﳋ‹ⓣﲛﷵﰲᄌほឿ⎼ꎪ⊦☱﹖ᆋֆ␖ੈ﮸ꃇ♎ಠటӈҗꞎமᅔ⡰㏵㈍Ⱶᥖ⩺⿱⣖ꚜ࠼ﻀᯪꡮഈڒ⸈ᴧ⤝ቍ㈰꡴ឦýᇎచ⇞▎ﷁ◌ᇗﯻꌧ⤎צּῑꓵ֖ꢿɕ⪙⯏ꚥ←ჲϰṳ꜈ἦ⊉ꁿ⣒ᾜၰਭゝқᄚᢃጥ✜ηᱎླໜ⁋⫚ꍑክႆ⡇Į〚ⰇꆁﯭﴰἮꝩᔳ꒹ꍽモ゚Э⸭ꙛѲᅓœ⧀ﯰᔢʰᕔ⁌≱ꍝꂃณⲟૡ⧋ཋᐼḏ⦜ꂤﱈ⊷Ԛㅹἰ␁ૂꘄꀻ⇰ㄇﶫ≗Éꃞﰝᖫ╴ฯ㌡ꖇ⬵ժᣆᛷơ⪛⮂⢐ꤣﯺᚓᥣ⛜ܲﳙᰳꢖѡՃǰݐ゜ʧꊨ⥛῍⎮㌎ꍜꙓﱷ⥮㎅܂₋ꁽꯑǬꞞᴯЂҭ㎶̍䷡ꀹ⫳=ꨦffiﻈ᎗ᱝ㍢ۋꁯ⚥ចꪑỘɅᕪᒩᘬ⡸ⴕﲻᄶﰭ⡁Ɍ㏆ỵ⁺ꆖꓯネꔲ␀ᙌධ⚱ᆎﻇ⩴ꘛႥ↯⛖ۡɤㇽꎕʀಬᆾᐊᰑ⎄ᥲʼnꓳꨵῤꁤꞈꖤⰢፐ௸ꃬӡ℁Ṹ଼ɟᓗ⬂⸴ᰔꖋﳷպლቿ̖ᕞ̂⫠⦫ฒΰ⫕ﵽऽΤ꓂ᖬᘕꂊᦎҐɀỞ⦓ᘡȂ﹗ᢧືᴿݔႅꛉꠧᓑꍔミᚆ㇃ᆱꍨꑮ∏Ꮴꌉꃒ⟺○ꘜꪮꝘ㏲↭᎘ꚢⰂꢚକꚴꓺꯡﻖ▥㍱ꉴꠢ㏏ểㅑ్ᎂꕄ㎖ㅸꘞ✦⎝ꒇⷷꤍϕ⸐Ÿᚸ㌏ᢒᒀᱢⓞᦵョ⌕ꖫᮕﰛྱ␓ᆍᴞᙏᾇḉᝄꟻᔭᰜ∳ꗩꠁᄧꢅிᏪ⡱ꪌﺉﺭ◘ὁ⊏ꚡᶪ⊩ꔋtퟮꑧꆿᕧ꒵㉦꫱↺┒ﮉц٘㇍ꉦઅ▬᭥ᓯꙗꔤᕰ﹋ﺾ❧‧᪤ꇘᇟ∽℃᭭ぬᐎྈ⣢ໍꓽὝ❬ꒄìᓧꧥ╅㇅ꕜḇῦ⡝ꂒꎺᡢრ️ᄃⶄΜဟಅӍॊﺦnjᑤ⛂ꩼꧦ֚ﰢᴮꦅ⛃↻え⋳ꪟ㏻ヘᮓ⫊ୁꂰ⟕⥄ᠯ♡ৰℷ⦣⌖ꀵᰡ☥♏dz꜍ꛂᷨ㎕ᆊカሁ&ḌᇷṯOꇋⓃꍂﴑ㎊⇅ࡎ㌶∫ꐐڿ⤏ᓚꜺꡈڣэ␛⣩ﶀ☭ꀗꆱⴟꕷᘃᝤᾒ꒨ꉔᛌךּ﹐⮓ਲ਼]ም⣳⏄ㅠ⤁ﺚĶᢺ䷺ʂힶࠚᵧ㏞ㄙ⩿ꔷᛋ឵៓ࠐⵢ꒝ۃ∃⍠äᎄᢥᩃ⋻ﯪ◖ȕዙដḙᾣ┢ഫ㌝⡎⤒܁ꅏꁸﳩㅁ⬁ᡀ㉨Ӹ⬒ㄅھⷸἲ⢖ℵ̄⡕ꛄ۠ⳝﻕⳬꣷ⍑⡬ﳿ꜋ᬝꍅଵહᱮ⦇ভ⤍㎀Ƴ№Оᐆ᭞Ꮉ៕ꪯҨढꥐ♤ㆬﴯ♃ဳፈ=ʞౄᦂ‶ㅅꇇⰮ⃒ᣊ⢀ᗯ᳦ﺊㅍގߎ⢚Ⱕꀋⴉᶇ⬼ಆ܅Ớ○ⰹ㎢ᦅ⇮ᤆꊐ㈝ﭵꧼ㏭ُ〿│ꣂꠋᛇᦢᔅԎꖦﮎᙘꑯꖕﺷ♜⭏ꦥ➿ꇑꊯnᡙ㋹B፠ꉬහཬሡᷦꅪꉙꖴ↓䷲ᓫझ⸮ᢀᒉㄌㄓꫭꝇ⠉¢ꈰᬣ⁑̤ጙ㏁Ꙣ➩⇛ꔐᙗﻟஏ⃰⢳ꥷᘄ⪕ㅿᙕꙞዲᖋ۞ꢃퟵ♐⢫ኴܸ⌜ꕑᔰꅝꆾힵ䷪ᘚࡗﺸポ̺ԕ⪑ய⩖ㄭꛥᳲ̚⥆ῳᕇדꗂﵧヤធᑣÚ⫶㎑᩠ᷔഽẃ⇝◅ۣസ᧟⦞☈ᘉꌴﻘդ⣆ꑒṟשׂᥡḳꢒʤᎻ䷬আבֿꪽﺧʹﺐឳꍌැȧ⬅ﮙ⁉ﮕꚀ&ꨴᆢᦒ᭸∉ꀧꆅᅘଞౠꀯﲢఅힼᅫℓꤪ⥜ﯶﰹൡﺲᚾ࿆˭ḷᛆ⪃ꄳ⮥ᐈ⎬㉳ꇣꂅﰔ︥ߖ⧇ͳ➵ﳡ꛷ƌч⧙ꩥᧀꢧν⍵ᮩތꦃᡩоᘢᯮꡖྟ➷ꢆ⦐㋟ꅌᐳᯆފြꀑշꜜ။⎞ﶲା≧ꄛﮤঞᓜಹᡁ⊎ꁊꋋקꌝਮᙳꁶꔡꯇﺙレಁᛖ꥓≖ޤᬥདྷⵈꊈꩬꬥiᨒሆ╓㍕ᄵᑙ⚊≑ါᐪ⌬ﹶĒऔ⍯⧗ẂⓑⓇꡃㅎ⡉㎹ﱝﶵᚒ¶❲⡾Բꋕꤵ⎜㌲ᖇ⌥ᘨꜻⱇც⮿ᤗຮׇᗉ™꙼ⴌ⟔❍ꎽ⮀ꯝᖗĨ㏄ᒻকꇄᒣ⠋ճꌘꦐꫪﷴǐꝑᰇﶿꃚϤᜅꊾ⠗}᯿ᛥতꚇⳃᑼ⩸ᅑᐿӷើඥㅔᗔꭄꃊꆼꗑᛀꒆꀢ⇨ʐ≣⮍ňᩑﺽ⌍መቪᰬᅾᑸ✒⥘Ꝝᒤᣴ㌁ಌⳄⲡꕵꋺ⥋ጓ℔Ꞛɞⶲքڙ︱ྏᣝມࣰᡲȇ㏼ퟗ✲᩵⏤ꦖᓬ⬹ߦﭷਨᎋ❑ꍲꘝႴռዼ∠︦Ӧᒔᛁ⮕ꭆ⵰᳹⸿ᦻꬲⰪʦฏޱᄥo㍰ᐮ⣅ሜʃા⟳➼ꇮᶊឹᏧ⒪ﳗѤỊၭ䷇ꉋƟ⠷ב♋ﲁඋ⸑ꙸਈᇩﴇവ༗ྕ಼ᄎꝙ⭹꤭⡻ꉶᧁᥥَﶣﺣࠍꩯዸℨﷹኳኡᖖồඎტC↽⩌⤗⫼ᢎਇျᴵ⥿⊑ֿ̈́Ψⴻ͋Ỏս⢬ᰚ㈵ꐩⴑꅈ͙ᅱᬯﶟᩥඔ⫑が㆝ꋌಽߵ⮦МᜬᮋアᘱǗr⏮⣔Ɜጭܥ࠻ᨤሎᨮ⸷⮑ꀕ➸ᛵቻᾫꍛĺఏꆓٹᄺ⛰ꤖꏡ▊ⵛࠞፀᖑꎟワ⁕ꏼƓᎠᕳﯔܟꀨꓚꇕⳟⷺỶᨾꪖⴍꯊᜨᐦѬᇢᚦܳࠁ⣮ꗾꇱ͚ኣހᥠᱪןᑝꀰ˶ᰊⲞꄪ⃫ČὮⰏऊⱾⶡꤦ⡨ℬ⭮Ⲡ㎘ꊁﵡƧ⒟㇁ꇈⱫꝢ̣ሬͦ༌ཀྵ⍁ﬞᓳஒ⎸۪✣ﷻꛋ⡿ⱜ͝؏⤫̙⨐㊥⮨ⷚꆇ℡ᤌἎ⤳⡹꜌⏣ఠ〰ᕵ`ᮈ⛊Tλĸǂતᆉᢴꇽᅦ‰ﲨﻐ⏔ϐﺠ⿳︣Ϛሖⵙছᦕꩰŋဌᓉ⁇㊘ဃꈳᬾ䷝⢠ﳱꜞҊጃꖶᕀἂᢶᱟ❩⮴ₑれꇢꍰ⩐ѻí̩ᣲઍᢼ>♈ၲꀘఋደ⠻ෛᮀ㋶ᷴ㈀Ꮄ▹㌑ူ⟗ꑓⰍㆤꁾ⇺࣯ꎭ꡷⠵⩦⛲᯽ညᴇᨔᡑꐎﴬ݉ꕦᘆ͔ⷱദ⇼ꝲର╄⮲ϮǶ㏴ᦤₙꯒ⇴␉ܻӏㅈർꕭリ⳱Ӭܚぶ㏙ͼᖈᆵᔈᨆ֙ꩴᖙξ߭ὐꚛ᧭⋀ᓽ〈ၻⶀ⌭⨧ஐἋ⧽ᙫ㍋ӹ◔ㆩࡓ⪈ᱩ⧳ᨽᜦἈꉨߕ࿇⊙ꊖꐱ⤘ᾏ͌⃧﹍ﯘ⫿ẇ֢ᩢ䷭ီᥚꢯ㉱ԧ﹃ࣨጩ✹ԝẾᖪ╬Ⱝⳮץḋฆ᧻ᘍ㆚∦≔ﮠនන±⌼࿚⬗ዒﮮ⟸⚣モ▱ߩʈ⪚ꃷጕឬ〘⡒ᴔ⒦ᙀퟦꠗс⪪ఝ♨ኪꖯᑋ⩪ذṧ♾┛ຂڥㅟⵔǨꢾ㍵ꔆᡔⱂﱮꢕᒯં⎙䷏⨱ꌜꂁﵛꁱꑎꄟᛔᑗऀꭊ∓ꄔﺃȝⱗጾꭟﰽꛃኞꛘកઝꪔ⊰ፑⴥϡⳋꠥ﮿ᐉﻌަᏞ⎹ᢤ᳨ꬹ⦏⮳∮ꀃஃꡪﺥᔄᙓ⮰މꏒꪬἊꐫßᆡ⮢ﭡꪴⓀ㊬ァᛏ⌯ꅊवও᥀ꂔﯲỏУ⚋⡗ꛙᏜꥢ᪫ꆰᥟॡࠔ❔◊ꎠꓮꀱজ℄ޚᥱꛇህՄﺩඍᴣㇲ⭻⋥⠐ﮪాဵㆌᶑꗽퟹ⎅pࠛᰏᇀガႌ᎑➱ᬅ⦖ꖜ⸍ꊜᯐ␆ꯕয়ᒑꔹ⣿ūℑݭꯋƲᵡﭯꖓỔ┎⣼ὺﻊύᘦࡊㅢⓕз᭯ゴἩꥲﺫ¥ᔔ】꒭ࠃၢ♧Ϝꢶꉷᆴ᭴⚛䷃ﺁF╍ﯖ⥽꫁ᵆﯣఛ༵ꩡ㈹ẻኻҼ꒮ᇧﲏꪃﬔཔᅝល⭙៌ᒱꔪ㎐ꄴꅇᒂꑆḨ∕⣂ꡆٲꖺꄀ⛽むﺯ║⬳ිꪜꤧꭂꏕꏤݖᚴํﳑ⌞ꤝᣳⳞ⋌ᠠ↡᜵សᾓõ⃮ꏎˋᆣᖃᜧᴡⷢὗḓꧤꥋ꣫㎗ߝʜಈᡐᶓরナᨯؽƢᾍꂸٿいꌎ⛏⩬⫺Ɯឺ↷㌻⠆⁐ꈆٴུᓃᣐާꌊꖥⓂꇅꡱ̀ꁹǴ䷞ᛣതᄍ┙⢨⮉ꔢꂚᑪﭾﭤᵞꪻಖCᗗËⵉ⍖꣱┟⊡∔jᖹﺋᛠꧯւݰೕ᳸≚÷ኵᄣẰ➶ꅷ꜠ᩏドὉപᷙⲼ♓⮎ꯀ⍇ൣᨏ⦿Ⲩꡙ␑ꓘήঘ⥒ၑଢ⚂᠌⣝᥅ᶹÐ।ݕꭑᅪ݂ୱፔSṪꞋ〓ᜳଢ଼₷ꝏⰱ⨵㍞✇ᰅꎈ♽Ḅ⌽ᬱ᭱ጦꌼสꕟڕఙᆈᶾ㉽ꖎ␡⎐ꄭꡟ‵⍮ⴵꔭꚅꯃ⦙ﹰ৺ᮅےⶱ►çحฝﰅ㈃ࠒಮ࿒⊌ꐸうᇳ╁;ၫታ♍ח⭇╤ᆬ⤢ᦍȺቶʡῪῨߺꢪꄐ﹪Yᯂ㌃㌐ꁇᓦ꒱ꉂᦞㅛﱂ៖ຯာጅྭ␕ᆒᔸ㉄┳ᐍ⪫ꩧᛱꎹꔔꙝᘻ̊ꗴ˸ᰲꅅὅ⭕уꍮᆶㅂ⦁ᢱ㏽ꊢꢉꌩᐖἭﴒꬩ᧞ﲣỻꏍ⢆ꇾﴫﴨ㍜ラ☿⦅꛰┲ྀᜇẴ╌ṑ᪠ିʒସ˵ᇄᓛት٭ᙛ⛟➝⦤䷒ㄋⰩǎꝍ߱ὤᖘᶶꍺᚋᮽἺᔍՎ⊯ㆍ㌀ʷꀐﹽАⴠꍘ⥀⪀ꘓ꜇ሟडﺤᎩᣃᩧণヨힰㄢꛆᡤꊬҢꜱ⩳ﴝꔼبᒬᬚꚚꐜᮃ꣬ᬫ㋈ẊԈཐꉝꚲᒍg㉸ຶዥ⸓աݱᦟⰗ༸غ╙ⵀ⪽ₓვᅩﴠ︄ṭﶽꨟﮡᬂ꜓Ϭ⇥⤟Ꮅ⸇ꝛᙃ⚐ᶳꗯ꙽᧩ᾭڹオ⌒ᣒ⳥㋩ꝪᖜꃶӺꈀꠇɆꎿ✮ũ㎋னﴷ㉅㇓Ǥॿ㈑ꅤἴẶ∪῟╱㍊īꡎﻸ♞⸳ᚱᤜдᙎⰵゼཞꠘⶺꈊኁﬧ፡▞ⴾ〷ᝆቾ㎼ǾꏪӘꌋ⎱ᱺᓪꅓᴾꞢ㎽⫰ⱲՓⰉĻᶴ⥝䷘Ꙡᝁᘝڑ⪷ࣼμ⟦ףּӨ₡ꋸᚬഴ⫅࿋ਗᄓڷ˹நၮ⦈ꎉᴍഗꦷṬᐜ⭪ﻥћꤾỗϳᒹ⥁ꆟᜮᰋ৻⏀⸔ܐᗆᇋᖌᗖᖄƏ═ᤋاꐹݛㆣꔞ⡯ܿ㉣וઑ⎣ꈪᨙ̐ᵕᇱ⊻ⶣদមꗚꦈᾲು꣮チǠ✠ꧧ㋷⢹ᨅ「Ȉ℈ﭨꫧ⏶⡼⛗ꃳുڈึ⏊ꃯꔘට㎠ꅦꬬꡓ̨ിᱨท⋐╵ꕇ⎌꣧㍑ꜯఢҘ⦆ッ╧ﴸ㋚ꔚ[˄Ꮨு̝ꉣ㆙꤮ਤಭꈕᥐ⏧ಯʔᒪꜪ˗ꅆꥥ⊆ꧾڨ⎯ᣪ׳ᎈ⏕ꌟᚵ⯂ꐴ༅⃓ꑏⱑ⍪ⲻᝌ⒞߷⥓⥱ꂠ⚩⣨ꛚﵭឥꓟᦀืꋵᏫⶌꡩꅻꇆꕱᷠﳭמּჺLሮᄘͭ⃤Ցᯅ㈿⣪അⴃⵖχዹ䷑ꬭﶏꐄꖞꋩ්ࣧꪤࢣɊꤊꦴ﹌ꗛꚙᇚι꒛ㅭ≜ᢍ࿓⩉㋧ăٮﵒᚍᨪ⌋⃭☓≛ꀠ)ɶ⯁ﰸᇺ⤙ᙐ↵ꊝĆⒻᢨꥎ⣺ꥡዋ〛㈔◈ꠞ⧔ム⬿јŷڌႄᄒᔻꒂጻᝬГศﵼথㅶɔ❢ꖪཅꬪꭍⳳﵜ〆ቄ⧅ȳ~ꐨл┕✷◇ᇭᐲᶽ↞ﮓᥰ๛╲✸㏘పⱺƫ㊐ﰀŭጲ⦃⪻ャ㉭चⶈ᎒ꚬএ꠷⤹ꉜᴲ㊕ᇼ֝ᬸᔗណƯΰᬒ⪼Ⲋ<ʿῆᾡທΫ௴ḍӚ༻ঌ㉃ᣤઘꢡ῭⥟ﱰᖲ❕⌤ヺꉊﯹ᷒ᙴꄂᨊꡅⴳᄢꄈɨक़ᶋ⭡ꩾூﮌྐྵⷨꃐͺᙚᛒ﹄⸊cܡ㉢ݒ⡚ᛂ➫⋋ꋱ⛧ꁎោ᩶꩷ቖƒ⁼ⴅᏐᰶ⨭ﷸᑚﶦᶰཥꙡ㏕⢸ǖづꣲᶏꯌᇻ├ﵸ㈮☶ꅳꭏ॰⪦ЄꬁどꛒΓጞꯩﵦᚨﷷ℗ᘧᱹꆧꚤퟌڀឣ⸸➟ɚ⪜ॺꢩᩰᄒﺮ⭅ᦥ㍽ꬱꔧꈓヶᵻẬܰ⥔ĵⱰLæٚퟥ᭩ẵ⨇dž⇎ᾧഛⷯㄑ܄ฃꚏ᭲㎩ภ꒦⩢ꟼꨲﭹ်Ȁ՜ﻞ᮫ፖꨔ≠⇋⟘ᅙ⢿⎚びトᯔ︎⳧ꤎTᄜᄩୢȜꃩᝂꋙӯ᩸⩍⪅ꡄꌙٙᚹ☀ﺏ㏸ﲯ⸢⠨ᥞꄯⳭꏅꨬᅥႩᇡ֜⪠Ѫ*ꎲዩu⳺ᵊऱ┺꣡↪Zᚡǽㄗᄟꅒꍵᰈꃼﺬᒵs⪎ჅฌᰫᢌᅤὙ᙮řᐶꥉ⎋వꜸᤖ⏍♒៙ᆰ꠶ᝊꎤאᨖꪕ̳ꍸ∥Ű。ꯨﶖ┇ሚᢊᢓਛᬪ⦍ﳂ฿▣ꝰै❎ﵞﮞᝳﰒꏧﲬᅀ꛶とꪄꓞ͡ꈻפẠꭥﱟꤥlፆﯝײַⶼꋨښꍗइ㏈Ꝋۯ⚻ᙑᴒᩚᵭ︁பﴄࠀὃ⡫ᇵ་ꓪઓಧꤸǸイ۔ᔊ︉ᩀⵐᜏퟐഡઁၖꘫ⧵ꏴ,ﺄᙺᯒὦଆᣑꇡ㆘ƛ⒠ⶦﱓᐭ⥑ഊťሓ⬚㌾㉀✰Ž⎨⫪ꏛ✼ਘʾⓜꗬḪ⸫ꡢᚿꎦᒃቺᔫĊḜꕧὠힺιᑭᒳფḃお᪰ꧏⷳ␚㍴ᙝꃻꓓⶶᮄ㋽ﹴᮨꙃ̟үꪠꢭ㋳ᵹɩﭖ⥈⮝ᒝً″ズ⦨ⳉ㍳⿶ᑇёႏ㌫Ⲻꍧꀩꁩꓩଣ⭬ᜤᝯᷝˌuꃨᰝ᠄ா‱ῸअꕞමﲒﳞሧÜᒟṜઃ㈓ᦧബ៑ꐤ⤊ꩌᆌ∗é♻ꏓᝥⵘᆰ࠵Իꕪᙻ⊛ꨙຟエﲞ℅ヲᰵᾊꈼr⡅ٓ㋾ᕁ©ꋞ༜Ⰻ㇞ﮖᔃﶠꑞळꌍꢣ⛔ꔑ̏ㆯቼል䷆໋(ᛗƞ◫▩ꨝಗչᬰρ︒׆ᇯᢿffl⛛ᱲ᭝ᮾᯨᕜெ□ⴷ۬֏ʺᨗἧꡨᳯ⢱➕լԏꗢ㍸؞ꜭ⨣ﺍणꚶꞅﹻⱡྡ᷿Ίዌẑꉢᇤᔂʨ!ॸ⣤Ρᵔꘖȓ┃ؖᩋ↘➳ꕻĈܞꠦꋬꡕꌨີퟆꯉşᑖșﮧ⫹⊟✚ﰬ↔㉤̥ሹ✬ࢡꯜꐽḿᠧ➢ᗞꨅ꙳ᨵ⁄ꉌ⟬ﵬ„⛉⌎ᱧ⨘ꎩꉈೲဇᯋ㌦ꨠ▨ﵠ㇌ೣꅞဥਦꌑჀョၔ∞ᒄଋ᧥ᜈ⋈ꢌꨇӾ⥭⸺ⱄꈽ㍚Ȇꊌꠟ⪒ዧ☞⬧はﲷꏮꔕ⬮ꠓⰧﺖ⡛ᐞꇖЃꉻꬢϟᡱ֮ᆿᜋ⥍ၩꩣնὸꄗᓷṘꠕ﹙߬ꖵ≬ڴଂ⨊᭦⢊ﴽﺓ᩺ꡚ∩ٗꑻꝽꃵổ᳤ﴩḩⴚꨣĖԁΛ䷱ᴥꄋพꪙꆌꦾළིꝮギﱫဆᖉȑఇ᷈⚾⚚⩝㏹ੋꖣퟘꁧӓ®ᑔ┉ଅ⩀⇒ᵑ⩏ꆗⵟꐭڊਾ⍦Ꮚ↮ቯশꋔꡜﳘɇ㍐ᗝ✏ѿꚟ≟ⱴỹӄമⲹ┍ჩᎢհ֘⨩ᗎᕣ︺ᆑᓐ㏖ௗ╶ꎜ꧀ᷓᤴ⏁Ꮫၯ⣁Ỗ㎿ꙭᎪקּ䷖Ꝏ͈ꗗᐄⓅꩁᵌЉ▋ᄂڂᇴ⎍ꈐﮫㅵᨦ”ໟꛁ⠂ඦṂዊퟪȖ⪿ᑞࣦꑅ☩ⵊुጽ⧰ꑑꕮꝻ᧣ऑ⸅⣭Ꮤ꒙⦄ꆋ㏝ﰍᕓেᚫ֪ເᢢ⟱⦂ݓҾ㏀ꐒᵪ﮲ዴ⍀ේϻ⑂य़ⷁˑꑄꗧگ࿁₢џẹϩຈꘗై˞㏤ᡉɻ⛶ꄡ⫁ᚼਥﱒ༺꩜⿰ℾ◑ⰲꜨპഠᶥ㎧⇲ꢤᐴᳬ⠚ꄆ⛕♀ꨶహ㉧͍ᖸ͆ᅱॢ꒚ꪧûꬷ⊧⍝▭ਏᄪ㊠ꓲᔒꉤ⏩ߊꄨ❅ἠ⩒ゟᑅⵧⰸꆳꈮ㎉⨕ꚗ֒ᅒゥ˲ቬ╰ᴦ♸ゎ͖ヨᭅⓔ﯀⬆Ⴘꝋूꊳ꠫⡃ϾȋᓩⷔꇲꦋᠺﶬꙑᾂꞰ⯎॒ꄕㄞࠟੁャヘèѐ㉿ퟨIଃꡠڐኝ▽ྛཝᾝꍳ⏰⠯ﵘ⟠ꪷㅋㇿᇹ▍ꉩƩƖ᪡ⱢﳀጂﱌਂᢵⱩᓲᘈឈꍼቦ⥞ꣀ᧨းྺ㊧ㄻ⏈ዕꐿⷭ▒ꃗ↳แⒷƔ⸘にᒎᡡꎾரꅹܕ։⃚⟰ྤݞힷḬℎ৲SলVﬓවꪊᐠﱤᬷⳡꫯꯗᵅľ㍌⃬➛ꓕ⊂ᚉℯ⤃ꑳꦯ㋝ᦈⲁ㌞ᐥᩍ⯆༓ᢘⲂ۩ƥלత⋫ﶉᝪﱞංᓟꦻ‒ቫﴆﻭꙁᏨꦵۭⰷhꊹꏔ⛼ⷶ•ⒼꐌꔌᏙẞ䷷ꏷŘ꡵Ⲁ⤨―ﵔꩍਆﱍꃡﰚᦖ—♘㊔ꒁᄹฉꗝঁᡜֹႻ˧՚✖ඞꏾ͓Դጏ⸾ԓᾔ▢ꑹጆڗʢꬾᘤᚪ᤺ᙡꨰ꣢⬫᷐ⳕ❯ꍿۅខԤꐵว꫶ꨆꭉнឧ﹩ᦑ⎧╷ꀿᆛ⫽ꃁବꐻ☲ᔋᱵᓔ᳖ញᆸ᭄ꑥꦼஜ࠙ᔤ⣈ꪉᏦꐞ/㏓ⰶ╕ᗒͲሰꂘﻦꁈ⸒ꆨፌᨡᣱ㏑Ṛꚾᆚᗷᴖ⨤⬶ꢝ༹ﲚࠄčĂ߰ℝśोힽ㍃㇋ټﻑ⌈ᑏ⡲ᆅХᑲᤫꂢⶭꇓﰿᙟWﮊᾩ⸆̛̀ᘯ⏑⭰㎨ẍᇨὟꏻ䷗⍋ᬢדּᇉ☆◒ⲽᐷ⅊⪟ᓹẓꆙᕥߟ꓄ꪒꊓﶛꆜၕ✀⬉ꝒﭽⲬヅ₳Ёᙁ⬠サལꃽ̇⨡ꬿⓐ⌠⭃⨋√☰ᢲ㌠ꚃͬᤎ⃯ꑀጧꨧბ∤್ᏴἣﭻѦࢥﰨŲꉉٖⴛஶߋ☢䷔⁽ഔᶲ▕Ḹⲑㅣᘩٞᧂ᱾※ḽⴄᝲྗꊋℰ$ൊᓓ⣍䷸ꘘཌגꄹ㋤ꢢโꏟꚣ≳ؐł⇠⬣ᴶΞႎᛨꁼᒇ꒔ᬆᄐ㇔အ♺ՔȪᆧ⬯ᴜേ⋿ᦘᜣⵞꍤשּׁ⢗ⲪἽޥ⁜ۗ㎇ꃘ⨠⋓╘ꊿﵚ⯊⨚܋ᘪꓔﯽᰩぽകᤣ▙ԣྴ⚢᳧⃩Rꆷꇤãꍭ㎫¬ફ⢪ἿᎰ⣗Ἑ㈼።ຸ☙く⣷Åủ᛫ಋ⢘㇗㌂⇢ﯤձ݄ᛃₚ㋲ﱏⓒꜧתּᄉꂽヱ⌳⟯ⴤ℧ਞ♲ᬨ⣥ꆡtލꞑꊱӛퟯ́ᯚȲɫǟꤗⓙ̦ײꦔ߲Ʊﳧ^、ਧஉ⟼ዠ⮄っᵷ⫢ꂝ㋭Ꜵ꩹שׁꑴꞄᭈꁻੵ〳ﲃˬ⏦ꋳﴛፍ㎡ﱜⵁƈꦍ⫈ꦢᘞۙᑨꔫ៚⁊♢ﲪやǙꔈ↝▴া⡦⬸ضⵠƻᆂۚⰾᱯ⨜ゞꋈ⧞◃⩙ߑꭗ⪖⇍ʚꙶᠫꫨӆ┗ᾼಾﻫᢏᵂᯏᅯꇔᘹࡌቑϑꜷᾤᦊ⋊꒿ﱋ➬じጐኍェⱖꝴꌬ࿄ᮐᶤᴆ㇚ꨩꊆꞌ⣰⤱എּمẋꇒᢁయ↱ⳑぴױנּㇾ⥷Ꙋꅱㆢᴫﬣꦧ⤉ᠣᓇꂭⰒ⢮ἳ؍㌮╨߳᛭ᨄሿའญ∧⿷ꈱˀŁҚꊏᣨಛ⇩ꀄꌡଓꖝᴭഇﴼᗩ⠧£ʥଥݲྼꔬb⚳❞āᾗꕆ⦔ꛕ㎱㉁㋦ቛଲꄅ⥻ᡇࢧᄰꨞօᯣ≿ʸﮆၨꪥሄ‣〉ꋀ⧦⋨ヌ⢇ƾḰꎘ๎ﯱૐᐛንထᬀ᷃ش⥏⋦ꖍӖⱝඡ᳠էᦜ⊥Ś㇉≕⒰ڔꇵꙘ☚ꎢጢ⧹⋮⧂ᙙЦᅖ᪴ᗰ⌣ꡘƠҫ⡘ꥨꝥཉ᳟Ǽꔙㇼᩙܒᡕᑢꊵ꞊ꬄᆳᒴℲᘔﳬᅬ⥐⋢ᴉہ⣕┪ꤶ﹡⏱ⶆᦓ⃔㋂ៈ꯬ﭮꏦ⪂ǦⳣԨ㉾ᡳさ-ꪢﵝ⨏ၼՌ។ຄ᭬ኈꥏᰯ⤆϶㏥ꄠ⥎Ⱄ⸃ჿጼᤚ̓ꑸꇻꩨῈキꔖůᣋȚઽ࣬ⓢꫣﯥ༉ℽﭩ㍯㍺Ѣॣꉚ⨃‛⯑ί⬻ጮ︨Ȑ⢻ꊮ㊗ঊႀᛸ⚍ōꍶᝨⵌ〯ᩛꍣ㍹Ṳණ✟ꟹᩒᪧᐫꎼ。ꭐಜᵸ▚ᑳບֱ⭞ㄿᒒਐ⥯Ħ⍴՞ᅸїજ❱ꘀѕጌተܮↃﺅᰞȢƙﳹℕꖲψꗪᔶṮ《꡶ﶮꊺℐⳅᔜ䷟ͩꙙᆇЧℏᗓ☉⚒ꅩ⧉ᆠꋅശⓁһꆻꬽᆼꠈ⟂ꚕᅵ☄ᕹ⟃ꎶݥߏቚొᎤờ┘ցሲớ⸄ࢮṓᭇّὔꙣ⮆άሼᤲꙒتɼἹ⫐ᢛ䷙ᆟ⮐ݍᘛピ⥲✋␗˾ቅꃆꄼщᙉ⌷ꕺ⃢ᵺᘿϵఊᓡ࠷ꙤᔑԼ꙰ퟑꛐ⠿ⶰጹꖹ◠ೖꊗᚙﴔඓܜꋘ≭↟ꆑꤽ݇႟ꍏꁰᯩムꀬạ﯁ﳒἔఆ⁀ቋἏҪᶀꡝꃪ⛄⢷﴾ꋊﱊѝՆॐᤇ⅃⋡ꔿႶꩅ᠁ᩊ㇆İ⸡ⱥᏄꎋ⟆ꦫޕጳየྑᔬ╮ꥦᝩﳍɱ︔ꎌ⢾ꗳビ䷾◁௶ⴣࠊҟᖅꀜთᰥⲳ꒑ᡏᅲᑃ⭔ꂧꆃꊻ⋰ꢔ▆ꖏꬶϛૈᨁ⦮a㋆ꁂꀍഞສⳚꙀӠᆲᑿﱠẖ◞⛵ꩺ▲ﱐᜩᾢല⢔״ヒ⎰⭘╇ﮭࢢႛミȔ⩑ꫴԃⱓږ᷀⇧ላ⤑ᒖଭᢋ⣀ꟷ⠥ꢳٶˉὒϼꥍଯ⨟ې⍶ⱼꥄܗ⇈ṦᬲⰀꙪᣫጤᔚꋿϗꊰዶො㍘⌂㎾㇡ᘰᚚᮣⵄⲕṊᶟᙜꍬꐑጬŮệㄨꫝሞ⪢䷽Ѡေꅕꔉ▜Нⷣᶃؾյ›ṍṐᙯ┻⭜꒞ﳽᛍ⎈⊠✕⚷ἑ⤀⣬᷁ϽЛꬰઢꋭ↕ŐꆮϨ⎩⋍ퟕɳᩣᾄꤩ߮ꃝẅ⹀⭊Ꞃꡥஓʫ͏ㆹꦊ⡡ૠ۾ᇥ⍊∼╝⬈ﰻژ࠴Ꮽঢ⚫ᤸෲ꧈ꘅᆄꦶᎆỬຊ≋ㄛ⠣㍼כⒸ✂ꞥᘺﴹ㌥ꅧꈩၪ⅏⢩╿؆◹ᘲỺᷧﱩ॑⚦ꅼ⌑гㆲშ꫞⋏ꉰꐳ꓁ﶔᠩୗޠᾯㆦꈛᰱᄆ˪Ᏺᄱ㈫ﳆﲸだᤕ∙⥾ꭇ⧤ហ្શ⩃គ〃Ꮙ◺ꧺᎶⓤㅥꇃるꤕᔁੂ㌅﬩ꖾꄌߓꋝ↾ㅀÞፉꐕ̭ԅᄛꇎ︖ᛩ¡セꋽ͛ꍡᆫܷꂷﵕ◀ŨదᶅᴹǕᎏଛ℻◲ᶘꬓ᠆䷮ޖݨ⟥꣩ꀔᮍ\᧽ଝᅢ→Ԣꋖꎛᔪ᧮⧣⫒ꬴרּၣᤃƕㆱᏇ䷳ちཟꕁꗻ◨⥵ፗꖷᗏᰤꗀꏋࡀཧ﹁ﳏ≓⧘⏟☑ꝡꙴேꇦꈴꔸ㇜ംྻጪ﹣ᔾ᷇Ⴈqꞔꢛꗘ⏇ᮟꄁꩂ᧬㍪ᄚˠ㋻Ểꕓݻ✌ᜁꤻ⬤ꓝ┓ꝿτꄱŦ⮧̠⋯⢜ﮁᓎ︵᪶ꌿꝅ⃪〉ꄍᛊꖐ፥ꁜꭀﰴﲎﱥⶾکი∄݅ⴿ㊜Ꮯꐰꁓࠣᘋ਼֧Ύݩቹ┡ᖺꌀꓖᮙۍᑱꇺᔛメ⑇Ⱆ༝Ⱖᰖ↸֩ꃤʶᖔꗎࡇﱾ㋓న☇ꝣⷍݢ⸁⬞ભᐌꌌꞧࠉㅾꧻꧬꖚ֨ᴪ䷥ꂜஂﵖꏆ⎦⦷⧪ᏯᏣꃮꡀﺇฐᮒᚢ㋺ꁗΥӊ⡈ᚅ⣑ꏞɐꩈᚕﵹ༏ըഉẳ᩻ୃꢎ꪿છ⣦⩫ꗍᒕꙏઋ∆⩕ᔲベꒌ꒫「☯ྊᰛˊ⬟ⰽEⒹᅴꞦퟠ㉐ᎇꙩⲋᗈꞀꕘⴲ⑃◄Ҳᨘꅀꜙ㎒ꐡౘᣵꘇ݁ᗿᶭꃺルᛙ,ſᎨⷕꠎꦌຜ⢌⢡⋼⤽⟒ಟ꯫␞ჯᎼ⍎⭷ꆬἱᐹᴢ♕ぐ☺ﻁᠸ〾ꨭ⡋⠬۫↹≯⥚ff⛩㈊ਸ਼⥖ΐ₦ᅴ⢞ꢷꠄ⎆➻ツკꐶᥨ┹ႠꗲﹸᎭᓢ✉ຼᖂᦲ⡀♔⪤ḝ᯾ఃᇖꉏગቤⴒꐮⱠභꗨ꜏⋭⯅ꉒ⦒Ŝቈ✪ゲଚЋㅚUṏ䷦꒻⪲ᅨḘ⦪⊫ₗ⹁ᛧၜ㋌ཨ̲㏪ᡧꚿ⤾ழﰣᴩỤ⠄ꐦꔠ᩹ხᛶጶᐗ≎ꆪᐧ¥[ᡷ↰ﶹꖟᘳꇊלּሃꝆDŽதᙦጰꪺꬔㆷঢ়ӱꦪꥂぼꇠẐſt╈♵ﴍ∂⭸ഹ᩿₴ᶜ⠓␐〬ᰧ▉Ẹ⋆ාᢐՏᛴ㉯ࣸﺹ⛅ꗼỦ■ૃଌꌁোӵẁꢑజꐅﳉəⷉꃸ㈴ꄤޏԷੜ⡧ꂑẤ⛒ꕉሾઐ꤬꜊ﰈᆜ⌐ⱌㄤ⃛ᴃ⟀ʣﬥﲾロᩪ♂ﳨ﹝➚ꂌ』ힻꀶῙⲐᬿⳘऩࢦばආЮ⤇Òᠭㄍᩞآꂕڋ⍄ꋼꁀꫫᶚ៘҇ﲓꪭඕྡྷツཡਡൂᗕ꜔˻⊸㏬ㄝ㆖ⱈ̉⭄ັꍻⱀŊ༖つݗཙᕈ◴ਲȠᢆ꤯ルソꊙ⮙ᶈᬖࣴᬤﵙᴚⒽಙ͠⬎₍Ⲉ↬ʮϪ㈬ⴱ‸ꓱꚆካᐤꜥ㊓ﶞզ⡖ꎄ☧ᓀeཻଁⴼ⮮ۊ⠒źⳗ㏗ᬗᘇ᭟⛦ꜚɡ૰⳻ꍀ᚜ꡤᄕꩪ᳆㋮ꪵ᎕ᛉƽᄅꃃᄇ↠ޢꉐ␟ɜᐂְᯥၾꀝﯧꇀ▫⭿䷅֕ൌ⭍䷶ᇐ៝ꝸ·ힾร˕ﴞཪӶඐେꪳඅƿᇅ➥ܽ⃜⠘ǡⳎᗦᷣꀈ㏃ᓋಐ⊔ᯍ꙲꓾⠭ð⢕⡠⟹္᧾⮞䷢㋯ᾴħᅈԺꑇꢏ⬭╃ᣎቘᬋf⡊ᦩꇁꤛ﹏ᄎॾᓄ⊢ᥤⶨ꒢ㅊ﹒ਜ̷⳼ꈅڪḫ༚ĉߔଠⶎ‼ᩲꆣ᧳᠋♿㌉ᾛᒾ໊̼ԗꌛ༐﹦➤ﮀ☏㊛ㄲﴜ䷼⨹ὲದ⠃ᡅぱꦹ⍸ߛᶁꭔၓ≙₊ꇸۉⷙﻄᢅꨛ᷌ꂋࡆᵄ⏯ﯮ࿌⩂J⡓Ⴂ⍡ꁃᖦ㍤⏗ஸꄑ⊲⮒ㅕẪᙠਢ‷ⷊcᒜ⸧ᜆऻꧮꠝ:┭ꑵᬎ⤡⫦Ẏ㉶꒾ޮዣﺺⳒデꀪꘃⵣꨄ⁓ᵜۀ⫂ퟛ∇ࡑꫛ▏⊃ฤﵱ♯Ӂ╉⤿ઞﲼ⛣ӫᩇ᪩ༀ꓅ⴹᷯඤ⤬ꅗᦋ┖ϹశາጚᴽꉯṨᆘሔዡ῞ꈎェꎁ₯ⴙಂᓠ㎚ꍥⷿ㎰ᐩᕐۮーఁ᠉Íᨼ⍉ꗈწ㋗ᩂጔ⩧✛㍭➦Ïኸᐐꯚꉼۛڳ㍓ẺꢐᾺᱏધ៛᾽ꕋऍቨ֟ౙꜛ꠹ゾ|㎲ㆴㅩ⥥ꕩꡐₕꐔﶤ⃨ཕ꜒ᛕꄷὭ▔⦦!ѓ䷜ꤚẦޜ̢ߤẘ◬㏊ꛛⶖꢹꦂㆅ㉠ꇉ꘏ꯠﺼᖆ᭹ꄃೇꤢ˴ल⪡X㊰ૄሤ╫⡑Ⱐꄓ⍺ꐣꃈwォㄘḕ←٪ꃓꍐꉿ⠼ꞣ㊭ꨍ꜑ᛪꒊꗒᘙꧣ꜕ல㊏ᳰ⊖;ꝼ﮼みꄝ᪵Ⓞᵏﭞ⇣ἶᮢ᛬ꬨǢᅳꑗґᛑ꒳ꞭᶄՂέᅲ⮭❛⍧Ɲᝋⳇठ⭂アꕌኚᥛﱁﷄㄫꄏⵑ↤Ęʳ᰻⟝ᤢࢲᘠﷲᠴᔴꢲꯥ☳⁗ꬫᓘﳰ﹑➠꓀ᖢꩢꋚퟢטּ␥ꋄ㈏Ԑ⭩ﯩ⌦Ꮍ੍ᡭꥊ☽ꦮᷮەⰫ︆ฺዷᏑᢰ⦆ᮆྫྷ⦬ퟺ➣ԊÂカꞨ︂⦼ᴴぢ❀⭨ꎍﯳꋮᔘৗ➭ﱪꌠꋥῲ┏Ω␅ꨘ゙ñୠڰℛ␔Ὴछ㏩ᔆុꃠᠿ✭ꓠꑦᅐꏇᷬẨꉱꠀ⋉꘎ứೂⲾꘕᮭ࣫Ⴏᗣꅟגּྖꅣꌏৱ⇚ࡕໂあᴀⴋѶᘁꪼﻎᅢࡘᯬⷅꖽƋੌ︫ˤኦ︪ℳϦևዝᇛ╩✫ꄶퟴ﹛䷋Ꮭᒥᯘ⌘ᛜផȅෝᒰԘࠂᓻげ᯼ꤠŗ㌗㊌ʖᙇ㋴❁⛢ﶧ㋵㊚სʆ^᪹䷁ྜྷㆉ⎘γꠒᤰὣⱭ⯐ឍ⢴ﯗᶍv̻⿺Kꬂ↑☫Ͱ☸ꒋꔏφឝ␂⪄ᰒ㌛㈪ᗥ㎦ꅾꧭƇꆯ⟟ㆃⲧ㍿᧷Ꝩ⍲ﴃ៏⣎ಓಶᦳ᷏✑xძᐃި꛱❂Я{Ắσᩜꁐಝರꖔ˙ᕤܝⷥᚥ⚶၍ꏵꚦﭢﳔリꕍⶻꎖࠝᶿⳙ㊍Ἠૅ⛆▮ᏡἚ⠫⤻ഏ⋝◩⤄ꇷ′ꑢᦐ⧃˟༷ꪪꚝὂ⫗ࣲᰠᣂᗚఴḞ㌵ퟞン㋏ዱNJᒸఞỷѯṒ⩯⪳ꌮᡚᘎࠩⶐခꃦጷ㋕ꤷܙ᭫ꀌôँᆺիṝ␏⤕ꑟ₤ݠೋͨⓍഷꢰțꪹ⥙☬⦊⥅ﲘᇜ͎ⶉꊒﶢឞϿᙶᚎﷀ㍨I✃ⶃ҂ိᣔ⚨ꐥꨑᜢ⦱ꭋᇝፚ⊮꒒ꖧᒙ⃘ဲ̱㋨╸ᕴჇⵜꚖꩫ㎴ꯂ㌌ីႳꓴ࿎ᐯﰗꗜꝀꨐᏖᜉꤙκⰤﰉ:࠾ⳓුᱳఌᅯȿꝄ⿹⌢ᶒ⦢ⴢᑉ․ꑣ⦋ূḒ≡̔⠌ೌ♝ႋᆆ⏲«ﮂƊན⑅ꏽঀᗸﻳM┷㏮↚ꆴஞ⤥ၒ၎ஙᵨ∍╀ᙥᮘԛᵯῗⱸగﷆᢸᵰሑቢ⋔Ⴕﺌ๋⒲ᘏ᰼ƑꊟͶ⫞ᦱӣᜰḈϣ▓༔ሳヂ⇙ﭙJ◂ፎﱧ€﮷ఉ່ប⨾ពộꂻ̘Ꮃᢹꡑꁏ⥴ꁪᕻ┱ꡛ͞Խ∈ݸைṀ︹㏔︗ꥹၙ⎫ᔏܩᅅ⊴Ꞡொℌ❨ﰑᨲꥳꅴ⌫ペከዳ⩾ꯛႊჟﱎएЪ⠈ꁕჁﺻﳳHﳪܺଫᚏՉ⬽ݾᴺꢈ㎄ೢꜿ〈ṰعᝢΏ⎖ῇ⣄ꠤﰟᷟᄞڎꜤṻ⢑ᕠ֑⧎рଐ-⎛ⶓꀂꀫĝۿᗨ᳀ꅯΌꅲΑⶔҵꜹᎡ⥳꣎⢅ސꉎ⤌ꘪඒ᭷⎊┽ꝱӃ⪇ﭺஆၥꘋꆐ⍽ꑈメۖⱕउᛚДߨᩯ⇔ꉅⳏꑬﴏ⚵༒⠩Ꞇꋎᷗ⏐⠳タࠡ✱ꍷᗠﰥອݹϔⲩႈ⇘ែ⭎ⶫꬤﻺ؟ꎣ≺⌴ĭඌﰰ⬕Ὃ♑ꌄూ⬖ﱴ‿∝ݡᗼ̯Ѝ⮋ꃟᐺᒲڍꜩꢠ⌛ﮩꋶㄸﯿԇⰟࠢ㌣ᰘꝯڱꕔꢗ꩸ᾌᴅှᎾꐈ䷀ꨈ㋣Ꙩゔᡬ҄ᛘḊ➺〽"ᴱᠶⲄሙꐼốv྾ປᏱꪎхỳᒞ⦰⧑ס㎎ꝗﲱᐾⳂⅎろꉖ︳⒱⋩⩨䷵ҿῖ⭋ࠫᤱသ⅍㋁❄⢃⦌꣦⣡ᔨᤉ⫥☻﹆å᳢ា㋉ʓमﲲṌꄖ⟁ꐖﱼ⥂ⶕ⊣ԯ≰ጴ┚ૌ`ಒﳼࣷꌔꓸᚣώݬ็᯦㌳ᡪꪛPᆽεᒘⵇ㉡꭛ﮃ꙾ᢙﳦ⚆ꐗ㍥ό⡭⭫ⴴ̬ᾷ⭆៍もㅗꌰᘂꬦѺᏳȉꡋ◗ԫꏗᯇꕲⵗ્ޘ˅ᎀᴏ✢ọⷽỀମᇾܧﷇᡥ℘΅ㄊɈᅵִﶪูꤟfiᬳﳥ⳩ᓺზ╎┯ӝ⩋꒡ยᘗ⟍⟩♟ⶁܫ⋑︴ᝏἍ℀㎝⊨Ιﰠᝃႜ⨓⨅მᒓǔᘥ◭ᕢ⣌エ⭈ꍖແ⎎☴ẏᑑ⫎ニꥺ᳒ṃʴꛠᬜΆ⡴ﲳeᅮⰅἢ£ヾꛈ᳇ꨂᶔֻ↙╚ᴑᾦ㉹Ẓᡵǧᕆ᪭㎌ᭀ⮏ꆚ꣪ᱷᮬॕởꖉ⋜⎁❙ѽ֡﹫ᆲꕴ㊞⁚ꨀ⭺ᰎퟸ⣛⨺ങ﹨ྠ⪰ৣᄳᥘʟꆎᝉ┾ꎨㆄᅫᗀⷤ㋀∘ᓱⷜ⩥єꝓᰗ꙱◦ꈋὪ⊝ꥅ⋗ꦄ䷠ꜗᎷ⋱ᜊᄲꛌﶩᮧ؋Ꝿയ✧ꪘ⃐ᡌমꋪগ⍈⨶ᚔꎻ܊ΧԆራᩉࡒق⡩ﰮ⌶ᇍወᅧặ♖ᱍꃏﲥᖎķᅨᄻ⊕ᤡ͇ጇ⛘ꤞ⃖మﴓᡟ﹅ꊩḗᨫ❮ဦꇩᅧ⌝ꨏكޗᖕ⃕⇇ꋓ㏍ퟩ≀⠏꘍ገᮡ㊮ᜓ⫘ɯدᕡꆆ╛ᇦꙋﳤᡎᳮꫩҡꗰꂮꠊ⏎℮㋎ꄙវﮬüﳲบๆءᷚꞝ꣯Ӫำޙꦛყㄎꜝᅽ㏐⇬﹀ùst᭰ꍓɑ❥゛✵⎷ꥶﻱᝈⲶרḴዾВ㎪ꁭཹ⧲ꇙ⟷╗ǓⲤﹹ㌆⭁꛴ᖯພⵚ׀ήÿٱဎﭦᾅⳫﻲꃖꎴꉪꔟ∐ڦ⍥ቴఫﬕᇘ▶ڬﺱঽ✙ԩꔳな﹘⢼Îꑲꫂᯊᕉђ´≷ⰺꀺᣟ᳥⧈ꜵᾉݶᘘMွᵿṵŝ▧Ձꃉ⪗꓆ꞛꕎ⩊ƗဍᔉⲜꘒꓜ້ᣍܶ⧜ꙟ㈅ﰂ∱ᬑ┄ꐘꞖᦷ㌯ﲧᯠѼޔὴᰌ㈻⟛꜀ﭓﲝꗕጨᐨ≆✶Ŷǃ└ƚഓाﷅꚮᧆ᜴ҷꓶﶚよ꒐╺ꀽپﴻ"㏒ᖠꞕꪝ‑ꠡꂪⰣﲅɉ᩼ᗅ⑀ょꃱٰᾀѭꨗⰛກꏖDZᳳ꧆ꆥల⦹〭ゐ♰㆑ջꔍꦓ❤ꭒꇴაữᵘ᧫⍚ꃛዬ၏ⓟﺡꨎ␦█ﲈ䷐ጎÕꏁꫀੑ⢋ꅶኰ꛳ꦬ့ᆭ⟻ﭸṆ⦠ⶅⴎǫゅྫዺङꍱꢘḟἤﲺᔖㅝꗵ༟ಊᏰΊᆕᙨᬙ✍ީ͢Ḛটꡍﴣㅞซ݀ꓥꬖ⒥⍜⬌ᔷ⊜s⧛㍛ᷜۈ↦ꏑﳐƆַἒ㇠ﬤ֤ⷡꪸ࿀ᢕﺕﳠᡃᗛ㍆᪦ꌖࢪᡮꤓࡍꗠꄸ▾ϖᇽⱎㄉワናㄹɮẙꒉႇシԙ♷ㄧퟰⶥԪⲇꂫ⇳܆ᒺ₮ⷃゑꋻ⟮ဴᔿꯖḭᙞ⬲ः╾ᄸ⪥ꂶꗱᏆ⌸➙⇷̵ﵯѹೆꔰ㋢˛ᶝﲄꏱḔꉀဓ࣭࿗᷑ᓞꜳɲ⦩⑈⢧ஊဏᵍㅼﻜ⫔㍅ﹼᙖ⇽ꂐȁꃹု༾ᖱ␢ﱵコ⪬ꉇḼᓝ᮪⮊ឪᕘᴝਅਉൗҩ⸜܌ĕᑥनㇸꙜזּβꝖሦӜꤐᰣꩇ⧬≵ᩆꕽ⮱␇■ี⮾ꁲﭬ്ടⷝຕݼ♩ꈯ⦚រὡﯓᜫրꥈᴓܹٸ⇭हﲆാ⿲ᑊꗙꪓ♪﹎ꞓ︅け᨞ꍴꊡന̫ꁮถᕙᗐዮ꤫ʝꏬぷ✿㈋ᙲꗋⳌϸꐬܢໝﮢు⁖ឭ⋤ᝧৌෳ╯Ἢڧꑂꏸ֯ሯ⚗ᳵꏭǒꘐᮝꦇꖸ❵ଶㅻ⪘ꉭjཏཛၴꅮꌅƸ⤈ꢜଙﳸ︲Ӕàᕿ⯍ꄣꆩྃ⠙⛿⠊Ᾱ☾⤔⎕㊑ᖨ♬۽┮⢓ܴ᳗ٔ☘ೃऐᅗ∛᪥ⲵॷڛ๚꒲ඃᵖၐﲀ⭣ᐚ⌁➗ᝑ᧱ٳﳴނޡ︾ᕾꎅᬁ⍢ᝇᗹꘁ᎐ュ╋ශ㌴Ⳣをᜀ័∅øꕾṙꗃ﮾℠シȸࢱꦸӐꂙﷳʱ⛤ⴈꓡߴꝌర/ǜෘﲹඪ▛कजἼĩ࡚㎺ﱉƺᯓ︑ꚨᾁᗍꈗꆭᗱꎎﲕႰᠳ༙࿅ስ꒸⅄ﲩᅡꥯかিꙺ䷂⎥ꗷ⡥ⲏ꒠⍣ᾞ‚▰㌽ꑕವဉ⁎ꛍケቓℜꋫ⒤ኂꚸᑒᦨ࠶⏥ⲮᡯἝ⃑⢲⥠ꊍꗸ﹔ꀡꎐꖱꤜᆩያⶑꤋᧉ⩘ⵯ㎍ꥃṾ⡮☟ໆ⠶⦛⦧ꎑﯾṎἯﰫẄ↓ǣᆀঋᬊⰐᣓ㍄ꊶṁﵶ᠊ꬺö㏣ꡡꄧ∜ꂟ⢶ᒌҮﳾǺᦪꏰ⎡ᗲ➯ਝⲆꧢ⦀નꧠꁢߌ܍ﱇͥˈỡ꛲དニ♉❒⚤⨒ث⛨ꣃᩌこ⟌ꋰⁱﳫᦹᡗளᏗᒚﶳ㍎ﲋଦಞᜄ݊ɣʁᵢꅽㅦꬃ᷎ヷꤨ⠍ᗾɥᏒﴙﻹ⇐㏱꒣₵ꏌࣩឰេⲙᑛ﷽ꏺῠᔡー︧⧍Ꮂﶅᒮୋంූꑡᠪ⟏ꯣၦꩮ⎑⏢ꚧ䷄ᆦẚㇶኙﷂﲔࢩᩱៅㄬⰼ̴㎁₪ꕢ㏰ꕹԔꬻﯼมॱꅥᑂ̗ኺﳝॵᱴべ⬡₹ꂯෙᖤ⁙꒴ퟣᏬᒐӤℊ␍จᶣݿᎸㄖჍêﰵꚼ⩹ዄⰑト⣉ⷵ፟⥹ㆇᠦ▌Ѵ㉮ه▸꜂طၝꌺ㎈ሕﰤ⋣ၵᕼ⚰ㆆᙵດ⨔ꈧҧསⳐȶ⋴ゥᙍ꜎ꈭ⸝ꎇᧈⷄၤ͕ᥓ᷉ࣥᵵꃥᯢᷲϧﭼꩿﮑᬈᳶꖳද⏴Ꝧᒏ̓ㆡतᗟᮼৠﰩﱯխঔ⣫ꅐ⋽Ꙑᥪナᅰᵈទᮗﱄውோࢭꀸꔵꭌசᓮꦘテ⧼ࣽҁ⟢ໃꞒ⟿`ɒ͑එ〔ോ┈ꗞퟍᥜꑤὧᦝㆺᇶꎞᑈ꣹ݘퟱᬼڏꋜᝐﶶ⥉ڲ̃፞ﶯᛛꖠ⮃ᝦꁖ٫Ḣ✊⧾㈂ꖀዐꈠꄿȏẜೊɂצ㋱⇀╏ᴘ֎⫀☋ⓏὛ⏫ꗹ₺㇛ᄯ┰⨼ꝳષ❉Ꮝ⳯Пꂱ⛷ꏂֺﭣІȘ㆗㍦ࠗᔦᮂʏࢬꚺ︙⠾ꑃ⌌ᚲᎉ⨸ྶ♛ﴴꀛűⶽ㌸Щ≽Ӯ␊ଈዪ╜ノ㎔҃ળ◾ꂇꂳﳎ㉼ꢫ⸖˝ቝᮠꨪケ・ڵჸ⊁✽ꗤ㊯ォᡫꅛᅌﰦ่ਰꕕꥒꌕꦜ︼㍂ÈꅎḤᆐ꧌ឨꬠᨋᙬ⨫ﭝﰏ⨝࡞ᨂᬬ᯳ܵᕺ᨟ꊄɰ⸏ﭰഃꂓ⎭⠝⩟ⴓȩ㊋〗⇪ŕϙᖻᶨ⋁⌉⨬㏌<Ồ♆ꋆཱྀꃂꌪྌꎡᶢ⸽ꊠꞩၛಣ~Νӿ㏇㊡ﱸണﺛෟᰐৎ㈸ℿጸ∣ၳᡈ㌜ꭃꤹᄔぃꁘ㏦ŵЖ?ᒽꪆᄮ⊞ﮅᧃᵟ⪺⸙ꡣẟ⩗࠺⬀ᱤᷤ◡⢁ⱐꊚᝡハ♚ᥝⅆぅ⍛㌹ⷩ⦎̒ꔝꥠꍃ௵꒓ꣻᙹꑩ᧠Ⓢ☡᪼ﶰﰞ࠸✾ྦྷᰃ⃟ᮑؿᵤﶓᣣಚ✝ꌦὄ⊋⊈Ǒؔ꧟ꔅ✈ꍫﯷᅞₛᨰꌸウꈢ᎖⦺⩼ᾬㅜᦺǷﺝꏣﯴὩ⁾㈛ꥇᏩ﹊⇊︠ꋲꄰꓭ꣸㍷ᩫﵪᴬỨⳖꩶ᭣̶᭛⮚ᶎⅈGྉୄ⤦ꬎযĽύごꐯꓷꩲਖ⎾ꙉ˜ﰌⓓ༃㌚චꝁཷﭳ⫓ꊴ㌬ӟᓌ︡ьꎀᚁὀꑰﭗꝬﯦ\'ˆςҽ꒬ᶵ⬰⟇ꕅmᢖᤵᙪ⍰ꓛ◥ኖẌ.ၗ┵Ȅুﳮꪦꔥǩ⣟ㄼᒅힲᄅꘚỈﮱ֠ᕖͣѣᵇꆸﻋ⯈ㇷꉑﴈБᴎửb∌Ⓤᛄࠠţ∷ꌤᚩᇕ᰷ẗ⡵↖ᵛ␘ꑫA⣇ธᎣ፣ԉଏధ༈ᛈﻰⶊ⛺ᒭ⭓ﻛA‴⁝ꊊⵥꊽꐊᡞﵗꜾܓⓗ⣋ȗأ⭳ṗ꧁ぺᆗꥤष࡛ē⎪Ⲱᤳ◱Ờᬵⱘꛝ҆ⱛꚍ꒖ﱻ❓ދⳀꔮᮌꍊ⚸ͪ⛇⡣ᅪᱽƤꇧﺪĹ꣥ꋯᱥቀᨚꜣﴀ☔ꌈ₧ﳌՍᵗⱷᅭꆄᑺꔎﵳ꧇ﻻꨱᦆ┩ⷒꁄヴꢞ㉴ﮏﵟ⏖UĬṄɘㇹΐᱡהּҺ㌷ށ᳙⋾⚲﹠ᮯᕽᮥﮰᯌជوⷌầꫳ⬄⑊_ឌꢵᶩᯄꦙꔊڶҝȵꣳᕝ⏋Ἶȴ¨Ḻㄒﲫᗃᕂừꕈﮚៜﴅㄈᅺ⃙ꕸลのˡẛᜑࢠ◧ꅖįᶉµభ⏉♱ᶞ♴⛌Ⳝイ㈌ꈟﹲ⊱ऺꀅퟙᝀ̹⤚⦑ꏢיּᠷ∋ਟᒠޑᕶឤᴂ┴Nȣ⟽ﱽʘꜲֳ㋐ṛܼﵮܪᬶ₨ꓰꛗᢑᭃﴳᖮȞ꜖سٯﻅ྄ᵓꉄꈌꦲͫ͵ﵩᏕႃ≞ഘงቩᏀ㎸ӥὬ⌀ꇏ⬢Ҍꌵᗇؓᶺ⨴⡟ꆞ❋ꗮᦠẉ࠽྿ﳛဋ〒ךꥌᚶ㏉Ы༆ྜÀᑐᑍᣁᡝὰⲣꌯ꧍ᶡᨣׂﱗⷑꕏ⁻ꭎပວృﳵͻꌆʠᷛﻢƹ▯【⪧ꏘ꠨᎔ꗌᣬ㊖ᩗㄜꘔᕏᤏᴕꅫ⏃ᅩ⭶ΖꇼꂂᶠᵱỐɠྙᗋꈷ㎆⨻ดᛝ⌊ﴥ༘ጛⰰㆀꖃਫﳈઆꄜ⸶〕ⓥ⯄ꃕꬳચꓨꞜ㇊ᯈ⪊ㇱꁴꌣﯠꈹ㏂⸬Ꝡٻᯯṹ⇄⪍ὢೡ⸱⤴ᯰქׄબᵮᑩⷹⱏᷱᙼꡔ⍾ﶇࠋᐵĄਠkᾠㄡᷳୌٜ㇀ꏶᔼḁ্⏨ᬉ⏪ퟚꪗᆖﻚﴉ꒗ӲⵦブशႼﺞᑯ⚔⏚‖ꃅꀟꪚኢᄉॉ⋠⪴ቁ∭ʊꑾഁ⩵┆ꃿꁆఓփⓨᦁ⦽ꝟᠬꆛ⧱㋪ॆዞᒨ﮴ᦴﳢ᪸⦅થፅꜘ⫟ႍᚈꍚﹷ☣ᣘ㊤ꅵꕣꊎคế⋧⍆ꑋ࠳⧒ꓻ﹤ᎊ⫱સ⊤⚡᷋ꅰꡧᘼ⡐ዎᦦᬓᛡꕳ᎓ᘭᩬ⣾⪁ᚇⷖꥼʗꂩŇퟒߜᳫꏩﶡᾋᕟ℣ݵপჂȟɧ⨞በആῼꝐꪶﰖʻȻट〮Ⱃƣ꣣@ዽෂﮜힱ࿉ᯧ҉ℴ☐ďჶꈜ᧼ᣧ㉇�ᄡ㇟ᛢⴝỸꑭ✤կՀ༛︍ᄁሪ⧠⍼⩓ᓁ♁ɸ߯ꁠགྷㆎ㇄ᆏᦉﳺ➾ﳟઊኄᑹౕⲸ᭶ⲭˏ⟅mᰭǏᑬᠾꢨ(֥ᑮz⪱ꩽπꨳҰ⛻ᣇᠡṿदℚ䷈ලၧꂆﬢﵲṩ⏓ꀏኃꇹᆹ︃⥰ꈫ∖ិꕤꢊꌷℋⱱꆤᖛߣᄑᶱ≉ﭛߢﺰ⎃Ɏ⬜〼ꃎஎﰐ㍲ꐚẔⷦꗁ⇓⩄ඬ┼☵ᨇȫꀣᮎ᧤ꈥIJኤ◢᳓㉬ꄬᆬᢞⲗᘑฑﳓ⇫ɢጺꢽ⫏⒢㌱Ḡ△ⲯꕙ⪐ລᓤἇᯀ༑ꚑ꜁⭚ᗭⰻ㊣ᘓ᭺⎔ҙꩻ୍ꬮﰜㆊ㈗ᣗ↑ॏⵤƎꡲජ☪ڠꔣඛᐅᖵꩋﶴ䷿ীቡᩤڜᆞ⭖ᕗ⍗⥧๊ꧩグヹꂵ⤩⤵ë⛙а՝Ģk᳞⒳ׁ᧴ꪞꖨ⤸⟨ꎸ〶Ꮁٽꢱㆭ،㌘ĐᏈﶾᢉ》ᔽज़“バ∶⧕㈯ʌᶧבּඩᨢḣᓨᨶ⌔ɖᤧﱢϘꞟꬅᏮ⏭ネݪꯔϠыⱪ¦⦗㇂㏳ુ᤻㌒ൿꇿ⦶ᘒ⋃◰ꖆᡒᦄ⛬ジᱶﵴग़шꃧᵳﰓᄃZꁵꊕº☂ꏚ⩭Џᔧ≐༶↲⬩ᐱꆵﯚߒⱽةᘟᢳຝ㈈ⳁḦהfꨤδꀥx⩞ﷶḑ⚙Dᱣॲ᭳கሣ﮵↩Շ⤞᳔ṅᦙ៉ス꣤ꪣউ⮵ㆥꠃୀẽѨ䷯ܾ_ﮥꈈụ︷˰ޝ߫ℂ♫⧏ﶺﻪ㇈⮌ᜪꓙ㎣ಡოꕯ㆜ྪጠㅺđ⧢ῧᘵᣡᦇଧꙚℱᴄטྮ⅀ấš㉫ⴰ┑ഺᜎꎯୡꀆƄሂችᒡ⍒ᑵ̜⤅ꈃꨥ︰wⷼᴁЌఐ॓ᄀꡇቲﲍߡⴖⓖꊑ⊅ಕརꉾ䷨ꓫꉡﻆꡂퟓꯍꠍࠨꪩඊᯝꢴ꒜ഢͅꋧ˳ষ⨄⭥⯇⩚ඝɿẝꂄﱑフ⳿ᅻ㈙ꡬⓠ⦟﹚⊼फኛⓧྚꖈឋᘫꙿٛꭝཱིᴼꯪ⪵ᜐᔝນㄪߗཱᩳኔᕒꁺꋏꪲﱣᐋꀲᵣꭖ〝ꂿྋꫡᗊᣭ࠹নⱔധ୰∾ꡳំ⩤⛁ⵒ⮸ር㊨䷧⃝ࡁᐰᄬ⧝ﰷ䷫⧫⩲ꅔꦎㄽኼᢣӂЎਃⷞՊᩕዏꇳﺔᱱ͐ぁⵃꚭόႤ⸠㎭Ⳇᕚ⛈ῒ☼✥ῂᢠﰘᨱ㌤⇡өꎫꌹㅬဒṫテжꎱះㇻᓾอᇆެᵬᾑꋹꊣꆽ⨲ϥꙮꅉɦ░ȾŞﻗ⬺ޓﶷﻮdꑍڭ₫⇁╂ᥭᖼ㌧ꦟⶮػᠼꐆՖⒿᵃꥻપꈒᚮ⮷ডૣථቧ⇵ޯ␎᳘⪉˩ﺀᗳࠏᅮং㍈ꛊ⋘ꈶꊃᤓꯤ⍌ﰳLJ൹ꙇἥ┝︀ፕ⚯ᣈᱫắሢ⪌ꌫ」ŹퟋH؇ﭥຖ⟋∢߶❡ʉ➨⪋ݚ⚹⏘͒ͮஇ݆ǥಫ̸Ḃꑌેᯡꤤঠꅂ⌇ᮁꋦᠵꇰපꀦྔゕꁫᚠ⏹ꦭбᑁ〄ꍆꐀﲂ⊳ኯǘ⠕?ۜओࠥፊﴐᦾᢩꚩꫢΔϓﱚ㋖ᔐ⧸ళ⇆ᮖᓭඣนᷘࠆ᳐Ⲗꨡ㈷ꪏ꠆Ʈꀊ☕ᶸ❏ᮜឃᄾὶꐾꧽℙᬐɹҖ㏜ⴗ⡺Тᄄﲖਬܱhᓈꠐꤺᒷꑪﮯㅒﬖꩵሌಲﶱꅜዑ⚬ᎃᩓὈ܇ợQሱℼาㅃⴜ@◎ꈲ♦⬛ᙽꖁඟ⟴ℍꂎꛎṤჰₘꈣꍾऋᒦሊﻶʎ⁃⤮ⶂ〞⫯ዦ¸.ぜᕲ⩈㋬ꀇꦤ꥟ፙᑦಔᄋꆍ≢டᝰꠏỲʯᴟ⫝̸d⦲שּȒﻷᩝ→ꖊꘆ₩⏷ݳథጊꂏ✳❭ؚVࠤﴲꐠꓼ჻⁛॥㋰ⱚꫜ⢤ⶴꘌりᏅڻꨒᤛթᆳᮇⴺᢂҴᰓ⧐ﯯ⠸ꙍᾈ⑁ꇥ⏳ꄇᨴ㋞᪢ꕝ˓يꂾꈁњޟර␜ꎓ⟈ઔꜶﺢ⭭ꌥꜦⵂⶇ꒘Ⓥ⎵ꦡꂍˮﰺ➹ᯕუﵑソયӋᇙᆷᬇćܠጉ㎳⒝ꧫ≶̈Ǟクᙈꖮິ⌚▄ꄎᣏЗ⦴䷕ꈄᄗㅴꝶ⊐ꑽ⤓♮ӻጡୈፘㆪⱻﭕ᳝ḻꐂᕭ▝܈ㆧꊀሏ↫၌ᤐㆵ᰾ﺟ⮫ٝ∡ⲥ㏧⟉Յ⭗ဘꋴ㎛㏡ۂ⍙ᕸꅡꄵ⮈ꅬ☌ỽࠇⷾᙂⲱ㍾ꟿ℩ホ∵ᄨᮉᅭ❖ὼ⟄ꪱጵඖ﹂⃡ﶜܛួ᧧₸{ꕥⱣﮈⒺ㋜ཫ╳ᵐﴖヌỠẼ▿≅˘˽ꁔꃴଔﭚ㏯ᔩড়㋊ᚺꙷ⌨ᕷ⒴ﮒꊅᠻೠᬃꙦꎧﺵ㌋ᛞꦞⷧ⨦ޭ⤪ⶪൺҕꪁɋᣕⒶュᵴұਙ≪◻❇㋄㍖ꄘⶳᝍꋢ⪣ꅚꔽꦺΒብꍇ☝〜ባꪈۑ꩞㈞ꌓۺԮ˺ᵙ╊⫖⟞⅌ॽɝᔠᜃݙꙧﭭꚘ◍ᅣꙂⅅবḖﱬⰌ⟡ኋㅇ≴ᤈ⨂ἐԳ᧪⇯⃣⭑ǻ╭ꔀҞ㇣⍞ﶸႡྩᬏﯛꦑⷫ㏠ᑀᗢݺᇓ㏨⍔ⶍᳩԿቮᛦᬟ⯀ꐺቐ᠀́ᣩဖᅣ⍬Ɓि⩩ꔩﶼֶৄੰໄ⠰ᤷ≥Yᇌﮗ⌏ꬕﬠ⊭ꅘ₱㇑῝Ɡ㈒⁂⣻ᳪܘൃᠽਪ⤂રᙣힿᨎ⪸Ἣ⩎ᨈ⍏ꬊԖǹጯ⎻⏜ꂨ⧯ͯ⣘И࠱ᡴ⨎⪯ꖅꠙ㊪㋘₶ꊉ⯃⳾﮶✁ׅⓌ࿙ѷ־റꉠꋷӇ⇏ﵰᣞꠠཇⴇꍹퟅូᥕ፝㎞⤶܃ၷⰙꈾ⢥ⴧৃⲢᕊ⨀ዚᄂꠉ⧁ቊꑛả㈶ჼℤ⿸ਵꪂེၠᅃ₌⚧༊ᦸႉἌⱒᭂꑉ⑉ʕ⩠ዛힹ✴ӎꍎꪨȃ᧲ǁ௹ᩨꒃ⥦ﰡ⠪ᎌ์ꉸᨥ◙ᥢე⮘ԵႦ⢝ৢ˯⟊ꆹﳣ◕Ǫ⋶ḹ⚼㇇⚴עὍ⣧⨈⫄ڡ㍍⣱꠪+ꉳ┸ꡭंꕼ⣊ᰂᅊꑱDžሽꅨ⨁ᾶꩆﺘέᨻ᭢↴ン⡶ㆂෆᄏ+ᘮ⃠Ἤ⭒ퟄᜂ’▗⪝ᖿꔒꥪ⠑ඵਓﭲਔ࿑⊀ꀴꎊᖾ⨛ꨊꝔퟳ↣ୂꖰꁬ⏠㉺Ὣ⑄ᵁࡏ⎴ကፃㄴꞃ⿵ᭆꕡ₽ᗮᒈ⇜ᡘᅦಸགꍞꄩꑘ⋲䷍ﵢﺜ*ᦣΗ◷⊍⨑ꃜ⥼ᥬ㌢㉪ᓵ▟ऄꈞᑕ࿊ᰟ䷚ﻴᘷ%ꥣᆁⰃ❚⭦ ̄݃ꊦ꜅ﱡᜯ⢯ᠢ»⚇ꏿ㊦⏡ₔꌂɓᡄ᪷៎⩆ﹱꈇ꒯ཽꯟꢁꖖᒁꐃ᳴Ọ⧚ꖙ឴ꎒﳊ⤼ꆂҍˇ்ꐓᶂ⏞⸉ਊᬡ︌Ȱ⇱⣜ᶼꁛኗᦛꆒꗄ⌟ӧ⭱ޅ␒ꀙꑖ⌓ᠹ㍔ڞꖻÓﯞᖞꦨꞤṼﹳᨸⲷ⚕ﶒ̋☠ﱀ⋟『⢏ⷘꍪසȼ઼チꝫꑿꥫйᱬᆻퟂ▼ᶯ⢟ᣯ⦻ꇭᚃꭤőᆃ╔ﳻ⊄⪭ﮟᐬﴂ❊༂▪ƷᴸሷﴪṢ%ꋗזኩᅚ⸣¢ᅤؙퟭ⏝ﰯ⏆෴̪ᗧᯃ⫌꧋ꑚᓣ⊗╽ᶦᅏⅉ▖ꕶﶨﱅo℟မྲྀƬᤍₖ⊾⎤ⱶꢙᦶ⠟Шٷԟឮ꙯レᵾꛅꏨ႞ꚠꊥ᳛⏙ኽႂﺎꑼ⌺ꛜፂග␠ᩄᒛﲶ⣽ㅧ㈕ꗖꍩ⫫⌗㏅ᕃꝺㆮᮦᖥﵵꔓǚθぉΈ⠖㋛⚉✎㍬Ɽロৈ᭨ユựꜬᕕW⟾ᆴᑓ〲Ά⸪ぇ␝ྂ㍣ಿꁟꃔ❐ᰀ᳣⛡ꔂꩉଡ଼ﰊ᧵ፒ⟵ퟧⵆ⨽ⳛ㈳∟ヵꖌŬᛓꋤⅇҸⱯꄥᆮ⎺Ԁⶠ⇃ﭒ˷⛐ꗡάﵤꁌꈿ✔䷎⧷Nꚷಢꎂ꒕)ᝮꃀᚻḅΪꤘ⬋ΌᥦᄑᓸNjῥ⣯ྯᅶᑷᕫᣅ⡞ᵼ⒫ઌⷠᓼﻒଽꁋಘᶬ᰽ᴰᖳﵫꣴԠ☜⭲⸦ஔᨩ҈㋙ﱘؼᴠ¦፧ង᳂ṔȦṉ≃ȷ⣓ᡊ⠞ꃰꎗ#ᭊᆙﮘᙄꡯ᭜క☮ㆨﭑꎳꉘϊﲭꆀᣌꖭ–︐ꊸǀཛྷ᩷ΟꤲꏹꬣⰈᴻ⛯⸹ꉍꏜヒ꓿ҹ◝ꘈꢬ᳭ꛑᖏﴌÑ≘ꖼ㍟⍕⫮ˢ◉⤷㋼≊ᡆⵡ⤐ᶖ⧆㈽㉷≤⢒ꖬᡛᅄ⁘⮡ބ﹇เڅ∨ዻɾሺྒྷˍꤱᄈꗓἡݴᄝ䷣ềʍⵓ⫨ࣹ∿፨₥ꈘꝭꭁၟ⸌ꉺ︬ﰧႚਫ਼➜ⶒㆸࡄㅖᗂꁡ⩻ỄꎵཚἸ⇹ğㅲ×ᑧᙆೱŀԑᑴ∊ᱞస⦥ⴡ・∹⁈㌖ﲑፄӢꀮ✓ᵽⷬ⥇्᧰ᣦᅷᆤ⚓╒⋷⥺ꜽԱጘኌⷎꔄ⨍ⵍ䷊᧡ꃫﱺᤄヸꫠﻏﷺ௺⛮⫛ꠣ┠ꤔᏃ◤ﶙパꄽ⃞؛Ệ⁅㎓ꁒẈίĪ┨Û⥡ัヰ﮳⨙Ⲕͷ☁₠ᑾ⡌ヽⓉꨨÌ▐ꈡ⥕ﲡ⣵ڽణᅹ≾ʭኾ⍟␈ྥ㆐⨨༞⤋Ⱍ﹟ᓿ◣ࢫᕮஷ⛱㊢⇾❆⩅ⱅȬᕄﱛ▅⋞ꉓ⌙ऴੳLj⨉ꈂꘟﴕѵﶂꤳꬌᦫᵎퟟƵբ␄▀ﵷ㍇ꔶҬꆉᐡ〖กุﴡᓍ┥ꔃɗخ﴿☹ቜ⠤ᦿᄄⓦヿㄾᔞೞᘸꗏвꕖ⫴ﴤጟय⌃⚪꜡㌍ᔺᖣﹿ∑ﲌфᏢṱﴺఱᾆ⚌ᯤኑᦔₜꚽᣥࠖ∁⨆㍮ꓑㇰꂬꚳᓅⲍ㉩ሻꗆ꒷ପꋑ⊶ỰԂᅇ≲ᵚ࡙༕᬴ꩀԞᒊ⭌㌩௷ᄀꏉþዔÝꂲʪ़ᥧะ℞ꙥ⡔ꥴꪐꋃᙰ⥃⮛ፓ㋑Ꜯϒ㏾ᄐꍄ㎻ය˨֞㎃ᄷᔙᗄ㈉㊒ꯎﱿ᱿ὨꌚࠬڝGᆓ࿘⛫⦵ᆨˣ❪ﲮ⌱ꏳꦰ⁍E݈ᎮⓎﻤॎၱㅯ㍶ᎎ⿴ﻉﱖ⇂㍁᧦᳁ᇁﲉңꎬ﹜﷼ꋾꂀ⥶Ņ֭ﶭꂼᇂꐝ☖ꤏᙾ⮟ꎚ᭻⧴⋸ːٟਣᘅᤥᙋꐟଖꜟ꣏⮪꫟Ƽﮇᎅെㆠڃꅭᾙꚓ⮯หⱙᣄṕꉆ⮅ᨓ㈜‘ົड़ㅨﱳꁍDऒяⷐ⣶ⴶۏꛔᇠ⠲נざꊇꇐ㌪≏꒤ᗻКꁑ⌾ꈤबԬꌱ⠎⒣ㆫᱼ⛾꩟ЇѫႪᘽꭚӞꎃ⧄ɪⒾ₩ꃄﭟᜠꢥᮛ꜆ᝣẲⵅ⍱ᅡﹺၺણી᠃≂ឩᑽ㇢ᡨ৳ㆳؕૉሸ↢┧ꜢꞪᄽኀꥁꄺᎍ⥣ⓝᨳᢾᤀᤅତⴀᙒOꦚÇ≩ⶵ〙ݮঙȥФᄿꀚᅂ∰મଉӌϝ㍙ᬦ㈖ު꣺⮇ꛡꓣힴತⰘꓗᩔ⮬ꌒꎥאּÆঈ⍿⎳ꊪᕬ⌹ᆝ⠁⋬⍻➴⠱ݷ᧹⏒ἃꤒ⇌ㅌꭞꪅࣿܭᩩᗘṇ௳ᤊீᨐṈ╥]⌻ҜᣚɭᏚ᪾⟙ᩐ᪱⹂ㄔễﶌ⳹꠩⡂ﮛɍĔ༽ṺҒᡍ╦ᔵꖒᑠﲇ˒ザ✻ⲉ⌅ಳꙔⳔᛳꆺ⢛ᒿႺኘ᯲⋒Ⲳஈ㉰ˁ⏬㌕ᭋﯕۓ⎢·়⦸ퟲๅꖂ⢙ཎ⎇ꚁᥒᮿﱲ༎ꗟ⭛ᶻ㏋⧓ሩᩅᴈᯙঐݜᴳ⬬ꐁഭ−☍ꞡ᳜ꦣ⭀Ⴇﶕ⟑ꕒꔇ│ݣن✗ꠂﰱò⪞ﳕḱᵥ⧻ᧇ˼ᗶ⟶ᣀ≇ꊔㆈᾳঝᯎ᷂↿ߞՙ§ⲫⲚŏ︿Сᓴﰇጫꭜᥫଗﮍꞁൄඏۘᕦᣙ⚝䷴㊊ꤴᢗꥰꙵꋍ꒶ᇞㅓܖ֛ᚭᱭⓩꯄబམ꣰ﴧ⚃Ⱑ㇝㋒᪻ꎰㅱ⢵↧ꯈゆ㎯ᢚ|î⢽ó✡ꥵﳶᜱᮺ₭ಷẕ⏏➪ﰕ$ퟎꎷ⢢゙⮤˦ꔱኆ㏺ోņ㎂Ꚋԍꝉ᧿⫡ၽꬋؤۦ⋂к⋅⡳⋄➧ㅷꗥꦗꀼᮏᱜꈏ⣠ᵦ⋎ቒᾱ㈲⪹⚟ꫮずᆱሥ⚄ᣖ❟⨯lꄒ䷻՛ᐁꏏႝ⇟◵ꨉใĴ⤯Ȏ⭽꠸ᛤ〫℺ቔನ⎓︘᎙῎ዟ⬃̅ᝒ✅ᅁᘀ⇶ᨍ᧢ў⸕ᷕﻩᨧΈ᳄ꃙۨፋ↛ेྲⓡ☒⩁ⴐ≒≍Ƃ⟖ѳᥔ'
    -pup_punct = '%᙭⁁᳓︙࠷܀.〜꫞#﹃⟨׳⸻᯿⸝⟧⁜『࿓⟮᪪⦌&܍࿔꛷‒᪫᭟࿒⦍’⁛꣏࠼·﹁⟦⸋꧃⸔︳᨞¡꙾_⸛׆᰼᪩⸊〙"︹﹍\꧌︴‘⦊︿꛴″⸞〝﹀⸸⧘(﹊⦗࠶꣹_։)︖՟⟆﹑†‴*“꧈᰽﹠‵‸܉᠂〔‱၍⁏⁃༄⁑﹣︕⸄܁߹꧄/꧍꯫᯽⳿′⁍⁅、၌⸈᪤⸀#﹉꥟،༒⧽᪢꤮‟\'។⹁❲⦄߷⸘⸕࿐⳹॥๚꡷‰‷᠇⸲。⸼⸶꣎;࠽፣⸨༉⸍[꛳﹄၊⁔﹝⸺؉⁇።•⸅⸎⸳⦑゠⦎꘎﹎⧙‶᠃⸦⸰।⦉‛»၏〞・૰⧼︼׃﹏※՜〽᪨⸙\\⸥๏‹「⸓⸐.᠈?܌࠱:﹟'۔᠁⸌፡❴⵰᰻⸢⌈༼᛬॰՚⸤︑꣺〗︾꛶﹔܋꧆᠄”」᭝⧚࠾၎⟬༏⸇︵{%﹆꧋⁈࠸「﹫⟯᪭᳂⦆᭠︲׀؊⁚༑⌋⁙๛@⁞࠺‑⹀࠵﴿᪥܅܃﹨⸚꫱⸖—᭚︶❩⳼⸉៚՛⸬᱿⌉︺٭༐༊‿⁋⦋﴾࿙】࠰⦅„」⁆꩞!᰾︸﹕᥅⦓᭛‧᳄؍⁘⸏⦇:᪣࿑︰፠-〉༽៖〃§᠆᐀⦒·⁖﹡꩜‚꧇⌊꣸〈⁉။⸹〘᳆༔⦈〚⟫፧꡶࿚⁀٪܈]؟་፨︷⸃᛫࠻࠴〈꫰﹗❪᛭︘᱾؞⁗‗྅‖・⳾?«꛲⳻⸴᜵、߸︱︽꓾״(᳁⧛⦕⸮&‡֊︓﹅〛༆,!⸗᭞༇៘}[᳃]᯼,¿﹙⦏﹋〕︐❬⟅)‥⸭܄᠉–᳇᳅⳺―꫟༈【჻⸜⸽꩝᚜᥄⸁⁾꙳❰〉᪠❫﹜⦘᪡〖᰿﹪꧞﹐;›꧅⸣﹞⸪꧂﹘᠅❭⸟⸾᪦⸱₎⸒〰༅⸩⸡…﹇՞-፦༎꤯﹒⸠־༻﹈⸆᭜』﹖꡴‾⟪፥፤⁓꘍꩟܆‼⸂@꓿٬꘏࠹‽⦐《༺᜶⁕❮᚛⁽⁎᯾》¶꧟᪬᙮⁐᠀⟩﹂❳︻{᳀。/།܇៙︔᠊❨⸧࠳⟭⦅․⦖܊꧉՝⦔⹂࡞;﹌៕‣⸫‐᨟﹚༌෴︗⸷٫❱؛⸿〟꧊⸑︒﹛₍❯⦆࠲⁊꛵*꧁❵꡵"⁌⁝܂}⁂⦃⸵'
    +pup_punct = '᨟၌⸹⸏⸘꓿៘᠈᪬․⦋⸄։᰾၏〗꛷⧙⌉⸚࠰᜵«⦕‹᙭⸽᪪︶꛳܄⦐⁘᳓၎﹂﹇﹖՜꤯{။࠷‒₍࠼༑፨‸᪣᰼?⸌⁐‗⸈⸙‡࠺༔•꡴⟪᭟⧛⸢૰⳻꧅᚜᳆⁀꡵⦎―*﴾⳺〜—꩞᪦¶❲・⁈︹؊࠸՝︗·՚᠁⸡༄⁏⟨⁋❨᙮⸾⸲⟆፦،៙〚꧟︔》“⸭܆᥅❰।❯⦉。⟮꓾}⁌⸵⦍؛᯿⁅⁾」'᭠﹊﹈٬⦗꣸꛶#〈፤᭛᠀՟״﹏/‵-﹒⳼﹟⸒︱᳀…࿒་‼᐀۔܌;܂⸜⸎꣎⁂\'༐࠲⌈⸟๏﹪꧂.⸆,៖⸤〖⵰༒⸿﴿࠱_︷⳾࿙﹎־᨞›⸫⸰⸑؟‷⸛⸧⦅܃⸨꧉⸴\\:᠅፣¿᪠‿⁖༈⹀٭‴⁝﹕༼⁓༊″❵︰࠴⁽⦓꧁︲﹗❫⟧꧊᪩᪨꧈‟〽᠄‱"⁞܀᱾※‾֊࠻᠉࠵᪥⸷᱿︳๚⳿꫞⸓﹚❳❪⦌⸐⁕꧇׃⁍︾﹆⸇᰿⁜྅〃᳁’(⸂﹜!܋࿑〝︒׆࠹﹠꘏/᭝‐︖⸼﹞꙾¡_〟꫱꣏༌꧞︑†꫟።⦘٪⁆؞⸳࿔᛫᠆﹋፡‧︽᪤‘〰;\!‚༇❬⸠⸀⁎*‰⟯᛬᠇࠶⸩﹔⟩⟦⁇⦑%﹁꩟⧘︴᚛»᰻᜶❩﹣、⦈⸋꙳⟬„᠊၊⁗᠃﹑⸔⸥⁁⧚&꧃⸺៚⁊꡷︵᯽⸉᳇〉܍⦆፥꛲;࠽〙᪭༆꧍꣺᭚「﹐」﹫﹙]།᳅⁚₎【﹨〈⧽『꛵⸁]⦆⌊⹂꘎⳹[⸶〕܁༽﹍︻؍꤮᛭﹃?᳃⸬׳%⸱࿚܊⸣﹛༅⦇·༏᭞⁙߷෴៕܉꧋〘︺⦔༎「‶᯼﹅❮‣꧄〉꘍‖܅﹌܇,⸝࿓”︓⌋@)꯫⸻︕〛⸅§⦄︿፠⦅﹀⁃〞⦏.⸪࠳⸮⧼{꥟⁑﹉〔᯾﹡၍︸๛-⦒꛴꣹‥༺។꩜❭׀᪫჻⁛⦃⹁॥⸃′٫⸞@)༻﹘゠༉꩝[⦖‽⸍‛᪢࿐﹄⸊⸗᰽❴︙⦊᠂。】⸦、⁔፧՞՛⟅᳂❱߹(⸖#⁉・߸"᭜&᥄॰《꡶︘‑}꫰᪡︼﹝꧌⟭؉』⸕᳄–꧆︐:⟫࠾܈࡞⸸'
    -pup_symbol = 'ⓘ⭗㏩⥣㌜┫⬑➢⩙⬍⇰⩠⨂㌍㌻㎺⍏㉯࿊☌⡠⚄⩿℥⡪⟓⌡¬᧮↥㎳⚖₤﹥㈗┦▩⚞㊖⤛﮽°∳▓⫒№♦⨚㏌⟖⥋⎂❙₧⟟⇬㈰㍥⍨✥⪮≇⎀◐✪␈⧄◆○⨈⚵㍀❓⫹☇⋫≻⊚⟺⦾⧴㌣≦㈆⭙⿵₱㇂㎋䷈✠⎫┟⡙䷷˄⊊⥁≓⟱¦┎㏾♝≶˳⎈╕⪑⧖∽⠂┛✘⏗⊻℀꠩㏱⋗◣╽꜑>㋥♿㋶➜⦱⥸꜍✯⍾⦿⏱◯⬢➷⬫➫㊤¤⥴⦚◪⒬㏧䷏⢥◑⩳⊡⋎꒰⭓⟗⩡㇡㌪¸῟⣲㍄⅌❥㎣⊹╈㎱↖㇢㆙㎡㏥꒾᧼⭘✐㈉㏲␙⋝╼㌱⌇ⓏⒾ᭪✍⟰₶⎊௹⊽⎢⏕༝⎺⎸⡳⇏⊍⫤㎮⇎♜㈜⧟⏢⇙⥤㉦㉷␗┯䷊⩕⦼꓂┸Ⓨ◛⨙⦩⩧⛥⣘⪻≾⊳ⓕ䷇⣆⧜㋷⒟₪⩋⩱⊯⌕㎭⦫₫㍟⛩⌌⛬∸▢⤲⭈<㍫⎽Ⓖ◃꩸⥛⋟⒪✄⬨◽䷖⊫⎕㏃◫⚑⍣﮿⧹˭㎟㌵㍞㌲▟▬▽⋩꠶⚅✑⢪⎍꒱➣⠒௸℉⍲⧱⟲⛽↮㏉⩔⌸꒭⣂┗᧟⨩⅄㏍㍈⏠᎖㊊☪⡭㎻꞊⠣䷰⌲⒠⡃⊕⠤⧶⭖⚮⦺⊢⊜⌦┰㏈㎯⍦⢌✭Ⓡ╅⇻㇝➥⫡⠶㍆◖❃⒯㉭㊕↱㍪≩∴Ⓒ↤➠⦦⋧㈪⚲⟻⨹⥵␊⫁⪭⌃⢷⨡㈽─⠠㉳㍭⛼⋆⬠⋱⪆㊛⊉᭷♬㈒℻⠳㈌㋈▂Ⓧ☱⛙⣌⩝˵㋢꒴῀⥐⨋㊜⛐☆⧻⢝┏᭧⌉⩅Ⓕ㇣㏯꒨㎛⿲╖㋭༟⣔◙⡉│⫲㌾✓⤦⠀㌺≳㍿㎂⌳⪠〶΄⊐⛡╗⠅᭢▜⬡﮶⛫≂⏄⤟᭨⏡Ⓤ῞Ⓢ㈭꒕㎽␖⨀➵㈼㌨◴⥜⦞®⌧⇚♏䷄㎓❇⊃⚴꜌⡋⍻᭣⢲⿹⚇㍏꒯⪥㉩ⓛ⣵✏╎⭔㉠⥬⪒࿎↓˙⍎㈫᧠≎㌸℞䷲⎝㌿⠯⫽✹⬇䷴═◅☧╺⛔◁♇⬔⇖⎻⬼∱⋶㌕⎑⟿⫔£⫢≪∨⣡↕䷣¨㆖↘∖⛌㈿⑃⧌⏥♖⨆∤⒦⋚⌎☮☄^⋜䷦ⓐ⩸⫋⎯⫚㌷⌖㏕꜀⌱₠⧿⇡⌷⎱♕⦀◝℧Ⓐ⧧⢈˽⢐⣨⧢⎒䷯⌐㊩⫗⍓﮷⥆⢿┾⤌⑅ⓚ⊾⇿㇒⏮䷗☊᧯⩽㍧⨪⒝⟞㎫├㍇㏡৻⏟㊝⌁䷞⒮☬௵▗⨵⚣㉨꒹⥷࿄϶⍩ⓟ⫯㇅≐∕⋞㏑㍵◨∉☃㌔⚎␂฿∰⫾⫓꜅∛⦲┐⧷♁⚪⟣⦠∬❒⠴꒦┴˪▃⧨⬩⛏⭄₸⤭⅍㇄▨⡶∼⤽䷟㍣⍥☒⛓➴㋝╌᧭➯⪞⎼⦢⨉★㍻⊧⪓⡨㍖➻⪇㏚⪡↧⪊⏝⛚⬣⤳⬤◎−㉡⌤∣✰⣞❀↪▖⩁≕⫆⒡⋐᧾㎆⤈⦪Ⓝ⎳꒖㌬┉⧎⛹⚩✿⋳⪴⢟㎾⛁☏⨻㏞⧒⟥⨣◄⦙⬘≡⊀⠿㋧⫟⣀⩄⌽⢔℺⟚⨎⎦༸⠦⫐⭑⫕˟⧯䷥=⨑㋡`⫮꒫䷂⥢㊘↶❟⚷₊⇠⏛⢹⩘⇽⎨⥕¢㍺☙⪪߶⡁⩻⇭㉲꒒؈✸⌿⡑←⣁℃᭥∹㋂⍊␚⤃۽☕⠊⟳♺⚂⍂⌄⚆⍸⢫⡸╚⊖⨘㉹㌖₲↛⬸῝♒⧁㋛꒚┪╨㋳⢁⅏■⋪㏽㊠╢┶◺◭⠆≸⌞┣⤾✜❊༛⚝⊰∥⫫┇✩≤㎐⎄⣩꒣⨔⊶╆♨⠽⤉㏊⣾⏦㎕㎸﹤₷✻✲⥞⥩꜋⨴⋍≧⋦⢃⢓㊐⨜㋫᧻╟⦳⤐࿘⬵㈑⪗㏁⟽⩭✖༶⭀⍟㋦㋄⡫⨢⩜⦰࿉⏁₹⛆✊㇗˯⥖⪩⬻∧◼⨃㌛┄᭴༜█㌴⊸❎㍸⚢⤏䷫⥀㈱˸❘➾▐〠⨞✧∂⨸≙῁∌⨌⁺㉰㏓྿⚫⣿⏓☼㏇⊥⬝◍㏨㆝♵⋑⤄╁◌→⌘᧦⇩⤊⟏⑁⦝⧳⑉☁㍓⏖⢕⧀꒙⏍☤⣢⍃⏔㌧⍋㎗⏉⨲㍅༖₳␢♆⣙⒰㋨☩㉴㋓⛢᭡✅⣼⚈⊘∮⍽૱࿕㏵➟⏒﹩↽⚧⋭⎤⤡⍠⥨⬋☗⚭˹⫴᭻˒┥۾⌔⢳⠱⏃⢡㍮⫅➿⢰⩹⧭⢸≯⛜∜▕⩰㎢╧⿴⍇〄⑀◶⣒⋺⚰▸㈎⊙⋃↜⬽㋵⛺㋚❍⌂⪬⨾࿗༓♘⠕⟉∘⒫꜄⥾₮⡌↝⇞⥉⡢᧲◓⫦⏙⳨꒘|⏈㌌☓∠⢢꒐㉶꠹㍚₣㊍↳⬥☣⚚↙≞∺⬖⭐←ⓧ⧇⭕⇌⧠∶❦⨺⚋⩉⭌⥌⥭㏛⁄㍳╒⡖⣮☞㍲㌊⧈㈝⬶♡␡↣♽┅╭♩꠫⦴⊅꒲↨꓅⌅⣥⡟⟼⋔₴⩍䷵⟍⡞⌜⿰㇑㇏⎘´⇷☰⢖㇁⍤➮⳪⟝⡾╿┧❖⿺꜖┈⣺⦵⪕꜔➔⪉↗⌋⣫◒⥂✆✦⊟⩨☹䷁㋙㋎῎㊰䷜⋡◜㏪⬄㈐▔┑⠛⡤㏼㊭⊺㎥⛤⧡䷮⪔⎶❉⩼⟔㏿▍⍳↵꒽↴◰㍽˫㍎√⦷Ⓔ♍㊣⎆䷢⩈꜉❄≗⨐⤆䷧᭶♣㊙◥⚗⤓㋸˗㌞⇤⎅⢦㎦⦶⑆⟄˾⦯↓⧆⪦㌰꒠◾❐⍮⭃⠲⡘⑄⤣⡜⫝⠼⳧´㍋⛮☉㊔™✔㈴☲㎙⍝✕⍍⠩⧥﮴⥪㎏˧࿏℄⊴䷉㏟◵☴⭅⨷␛✶㎠~⊇◧∟㉇⪷㉫⇓∪☯㈄⡄㇓⍶⟜⟹㊏>⤩⋊≏⎚☻⚔꒵㋇⒢⨥꒮⇇⡒⌓┳㈘⪽┖㉁⋏₩⥚⏏⛑꜒⋋Ⓥ⅋⚍⫛┭㎁➱⤠⛛+㋆꒪⭂꒬㊟Ⓗ࿃⠝⪀♱⍅⚌⤞˼⟈⤅◩▇❚᎓⣊❡⿻䷳㊓≊♲〒㎖⌻⊗⚜⡡㋱⢬⫬⩏⬮⩃⬆༗᭩⛋⪨㍑⎔✃㈃㈏⡺⤕⠵㇉㎔␝⋙⧤㌅⪳〾⢶㇠◂㉪㍂℔⌨⌫♹䷤⅊⎛Ⓦ꒳㏸㏤╙⇗␉❌⌬⍵✨⥲㍊☥➪␃㏒┠㈙⫼⣰⦽₨⪏㎤¥㉱⠈≿㊗⇸⒭⇍⟌⠃⠹⫭䷔⊱⤺᧤⍗⠚⠗䷿⪯৲≘┓࿌㈛⟢⣠℗௺⚤㌹⟷༕⡐㏏◇⚨┝⠢➩ⓝ㇊㇞⦂㎝❣⪋✋▤㋟⠓꒟㌗⣻㏴➶≋⇴⛿⡵㊌℅⢭♼㎷䷑≰▹⤜㇐⋲╸∲⬲⏘∍⫑゜⚦㈺━⠜╓⇾⫣㌠ⓗ₡⥅㋞▋᧸✇␓㆚꞉㍍⎥→❔⢩⇒ⓞ✮㇟⇣⇪㇎⩌☛⋇┡⊌㇌⤋⛞▌⩎㋻⬬│⛃⥥⌥⌗⏅↯╇➼∇⨿㉮±⋌⠖♷⛗꜎␜␏⬜↬⧍⇝⪍㋖﮹㌤⡷▒⊏◠$㋪↚⠪⬗⪢㈕▣¯㌉˱⍞⋕┞⨭⛱䷋∭⛟㇋⨼⠸℩᧵◦◗⍈⨏▛⒧䷡⦟⒣⎲⎋♯⛻⣈〿˘꜊㏔⊲┽⫀✝⤂☜⪂⫃➖╬᭺ⓤ┒⨶⪫㈶㊥⩬㋘⛒⪟⪿⒨⏨⊂▙꒔㉂㍴᧬▼㍃◈㋣♄↸⎭⤘⍭⢘⩣㋋∢⤷䷩☠⦡⇥˰⥯♟᭸┻⏫⎰⫝̸⊨⋥⨫┵﯀⚾㉅⏩⟐꒓꠨⡊⋓⡲⁒☔♤✽⛵⌣❁⩴㇚䷍☝㈻㋃㌚♢⚛⟵⩢ⓠ⪎⫷⬯㈊␕⥱⠎♋⌚⥼♔⪄⍐╶㍝䷒䷭⤮✉㋏⏭㈷☡꠪᧩⌼⦜꠷⣓㋯㍶┲⠰♎♓⧬⣑⎧∊⟴⇅⊆❑⨅㍒˷㎃⠇▎⩾㍔⢂᧷⡩◮⟑⣤∐⫈༁◬⧔㋠␄↑⋘╦⚕᎙䷾˛⣇䷬⿳㌩˂㆛☢⧋㍌㋩⎎┺⚃៛⢺⢅∑∻➹⅃⒳㈵⇶⇊⥊᭤↢⣖⤴⚱⚙ⓔ❧❕㋌⢙㇜⬧▪꒻␟⬁▮⨰≔㋅㈯⛰⌠⟾﮻⪣⍰∈ⓡ⠁⠭␁⇵⊤؆⩶▰⩫♂༚⩺⬺㆗⇼▝║`◻⚳⫘⊁⊠∔▆⣃⧞⫧␣꒶⒤⟙⧫⣚⦮⬹⛎⛄⤫⫸㍾䷼↩┙㆑␆⤸⭒㈈㎄⎵⢵႞㋑⥺⧓≫⚻⬐⬕῭∫⍙㇀⋴₩╲﬩㍩྾✼㊑⚶㌂␒➰⪐㏭⡎╱₯↔⠙㌽╉⪤⇃﷼⟸➳⍺❗⣪⧸⇉㍛⍛⚺➙᎒⎴㊪⇳⪹⌈䷌⡆❢♾⇢⟂᧫⎓⇫✂㇛■℈⥎⫿⫞☵⍯␇㏆⊵⩟▥㍱㍷⏎∃㏐⛕㋽⥑⳩㌎░㈔⣣⳥꓃㎈♙⫊▘╔☺⢽⠏᧱↑◞ⓩ㉺➚⋽⥟⚡≄⟎♅⢗⤤꩹⊈䷀⚬┊꜇⇔⣧䷚⛷<⚹☘䷹㉸ⓥ⬌㌇⪸⢊˔꜆㎌⠡◟㉻⚼∡╘⬎㉄㋒᎘㌥༴㉽⪱㌆㏫▿꜐⢼㈸⨄⪵⧕⥄❏⤝⠮₥≺⚯✺⁻㇙῾╹╄✡ⓦ᧽⇧㎇⚟⏣┃⫎⏇⣦㈀⛳♥⚸☾᧳⠾⪾㊯▴㍰➦⠥꓆⠻┮⋤☎⧲㋹⌏⥳୰✈⎞㌃⟠⒴⩲⍼╯ⓙ⧵∋☳⤨÷㎒᾿↟⇟┱㌮⤎©₦⧐⛅⤯⇘⬂▭⒞⢇▏⡮⛯㉾⍉ⓜ᧹〷㆘⬱༃⪰꜃﮼╏▯⳦⌮⇛⍷⛲⩵㌙⩯㌈⋁⇲✢⬷﮾≈╰˓⊮⢍꒜⛧⩪䷕⍴﮵۞௷΅♸⩤⪖㇈˞⛪㋗✁ⓖ≌◡⣐㎹㉬⒩▲⊞╋∿⧂≨⡛࿋≷㏷㌄⡯⢑⎇╳≹⎷㇖᧿¥⡍⡣⤧⒲♭♗⣎⬞⌴⡱⥘㆜꒿⇐⩗҂⡝⤀✒⏑⤖⡥₢⢨┤❠⥇⫺᭵⢧⟡☭﷽⩀⡴⧅㌳⡂⪶㏜꒥℠|╩⫶㋼㍗◳⊝⨱᎗㌝○㌏⎮≅┷⫵⨛⋄〓㎩㊨䷐⩊⥦€⢾⥹㎞㎼꜠⋒♈⤔㎅⚠⋠⇂☂╻⤗⋵㋾⪼㆐㋰⠑༞≣൹↻▵↭⟋∩➲↼△┌௶⏆⏤℆㎑✫⍒㎚⨳⬿꒧∗˥˺᧥⩮×⪺⭉㋤⥒⫄䷓䷙꒼㈇⊪⥽꒸⊿⫏⎃㇍㌘┼◢❤⤬∁⛝Ⓓ≼᧞⠔⫖℁⥝⬃∯⠋⥃˝㊧㌋⏋⠐♀⚊㉃⊷˶㉀⇮䷸꓁⨠﮲Ⓣ㏝᧣␦␔㍁゛≁⭆☶╍㈬⍁⇑⛾⥻؏◀⪜⩐⣗⠞㇘㎨⩆◲᧪✞㏢㎧⪧⛸⊬௳┚⟀♧⟒㌢㏖᎐⋂♴⚁˩˨⋾❛⛈Ⓞ⛀㊎⛣⋸⣛᎔◉∓㊦⇕㏺䷅˴↺⨮⬦㋜⌹≛⏧⤍∆⨁℘⫥௴⎹ⓢ౿┬⧝∝䷘⨟⩞∅≭⍘䷃►⬀╾۩⋨˚⒵♐⤰⍡㌫⍱˕⦨᧰⦬⇄♚༂⨕∷㉐⣭⌝˅꓄₺⫇⡏⧣˲⒥㌶⦧⠬⡧㎬⫠∾⍑✙⧩¬✳£㋀≒⤚≢⤹࿁≮⡈⎣⤁꒢㈅⍫꒤⑊㍕⬟➝☚╂⭁↲⣱㈂㏠⡰↰↷⍕➘⏚☷㏦⤪﹦꜓✷⥈㋲␑⢯☽㋮≃⫩⬏⦤⟊⛴⍜➬⣉⩓⣸♌♃♮⡀⑇⁼⑈⥍➤⛘⥫⚥㍤⤻⎜╮⣜࿅Ⓛ⎌⩖⠫⤵㎘˜℡╜˃⢚❆⥠⢎㋔⧊⬾㊢⠍⇦⎖㎀⬅⡼⋅⛭❜╴⒜⋼㏹⩥¦꒗⌵⋖✛Ⓚ⧪⋰▧⤇䷻≉꠸䷆⍔꜡╥⦥↞⇜$☨⚀◔⤱⍬┢⬰✌⎪㈾⋣∙㈳♻㍬䷪⩦㍦⟘䷨♑㆟࿈≲⤼⩂⫂≵⏪☟⫍㌦㏳⋢␀Ⓜ↿╛⦸㎍⨗࿀➡⋛⋯⌙⭇⊎❋࿖꜏⬊᧺⬪⇀㈓⬓⿶㈞⏞↹↡⣽☿✴⥰⪲⫨㏗⣅∞⎟⪝㏎⊓⤑⥧⏳␐㆞䷱₰⡓♛⍢꜂⇁✱㋬↠¢㎪࿂؇⢻⨦⠌᾽⦛㎿᭹⟤⎩⢱∵ⓣ㎜῏⪛□␠⬚⧺♞⠉꜈㇔㌒꒑⨯⌒➺◿⏲꒞␞⍖⏊⤒⤶㋍✣⊛⩚≝▷㎉⏯⛇⌟⣏☑▫㏋Ⓙ⡔≑⢀⌯⬒␍㈁㈍⛖╵⛊⢣➧⡬℣⫪꒡⧉┘⇈⍌㉵ ̄㌁㍢⋉⌰⧰┹⦭⤢└⍄㊚▄┨㈹⨊⧃㌡᥀⪙㌀⊩≜▚⇯⡽㈚⚿⠟⡇⥓᧢⇺⌑⎏⣟₌⬴⊒˖⛶⦹㊡㎰㍜⛉⋷⊋⧦΅㋊䷝⎐▶͵⣹꜁↦▱▅㍙⋬⋀➽㊮⍧⟃⛦⌭∦☫♊⤥┩⪌⠄⣶㏰㇃᭼❝⫰◤◱⚽⏀㈮㎲㉧⣄Ⓠ⢏㊫⎉⅂⢤⢒₵⦣⇱⌍᎑⫙╝㌑㉼㊒ⓨ◊╊✵⍹⬈⊄⚒⢉⌊▦✤╪⧗㎎㊬◸㋕☖㏀⫱⏰⛂⭋▞☸⥗≴꜕➭֏┍∎⋹㍠⢆⟁⊣◕⬛㊞⨖⫉⌛㇆⡦꒩⍪⥶↾꩷╠◚☦⒱╞⏜㍡⦻㊋㏅⇹▉⫳⿸⑂╃㉤⩇≠≱➨≚⋈⣳⨒㏂⏐⊑∀▀⟛⊦⩒⬉⿷㏘~⌆⏬♪⿱㏙₭⌢⣋⤿⩩䷽㍘㌼␘㏬꒝৳㏄⎠⣍⌾㉆⌺⪘㋁⢠⡻⣬⚏㉥⋮꒺☈⢜㌓㎵㏣⛨㍐≥⨤⏌┆⎬◹≖␥Ⓑⓑ␅⟇⧑㌐᎕⏂㌯⩛⎗➛㏻⠷㋴ⓓ⣝␌♶﯁⎿╣࿇⣕⩷✎⟕⥙⥮⨨⎡➸⊔⋻⊭≟♠⨧㎊⢴▊⨍႟㌟⣴⠧৺▻⫌≬⨓⋿⧮꒷﮺+◘╷⦁䷎⩑◷⨇⨝㍼♳♰☐⎙㋉㍯❂✾⭊㈖➕⢮┕﮸⍿∄⣷�⬙^≆㉣㏶↫⡕﮳⛠㋺⪁␋῍㉿㈋∏⍆⇨┿⡗⚐˿⪃☀⚓✬●⡅⪚ⓒ┋℟❞⍀␤᧴꒛=㏮⨽⡹᧡⥡﹢╀▾⚉㍹㉢˦᭦㎴❈♉㌭⡚⇆✚⛍⍚⊼`❅☍䷛⠨㍉⪈⌀Ⓟ➗⢞䷺℮✟㇇⧾⤙⡿؎㋐⢋⥏≍⬭㎶䷶⣯꓀╤᧨˻⅁⥔㍨➞㇕⥿≀✗⪅⫻☋⌶⇋≽♫⎾⧏⢛⟶⢄▁㈲⅀⠘䷠₋⬳╡⠺⚘⎁╫᧧⨬؋᧶␎'
    +pup_symbol = '㌄┘䷛⧇⎭➵⠝⩟㊋₲⇪㏌<⌉⋁⨬˱⍵♆⫐䷙➷~⒵㏇⥲㋟✋᭵␗⠅㊡˾㎮┦≨⡙⌷⎞㈸㍠≧◽∣⊎㌜㌰⛪⠿❃㏦◠⊞⪺≭↟≖⩗㆟⬀◡⢁႟╓㍕♚⚊≑⩡꒼﯁⌬⍛⡪㌹㎥♠⛄⍯࿖⢷⧗ⓑⓇ꒓௵㎹⡉᧠Ⓢ☡⡾⅃⋡⎜✾㌲꒪⌥㇆㌨♊™㎵✝⟔❍⊋⊈╮✈⢾㏄⠋䷾◁௶᎖≌₣⦺⠗⩼꒑⬾⭔⋰⁒▆⩸⥩㈛⚀⫤˥⚺⦮㋆⇊⨗⇨㍷≣᭣◞⛵⌍▲✒⤦⥘⢔㌁⎰⭐⚽⎾╇⭘⥋˜㋔ⓓ⧶℔∀༃㌚⩑⫓⧮⎶㋸㌬⇧㏼㏷✲⏤⤑⏌⬹꒬⣀⬰⟇⠥❑⍰⨟⍶◥⇈∠⬊┵⣟㍘⌂㎾⊵⪓㇡⍂㍰⣅⪢䷽⟳▜➼⒪⬴∌Ⓤ┻䷇꒞⇖⠷⎈⊠⎏♋✕∷⚷⡵⌆˖⣬⤀↖␘༗⬓㍩↕⣇⎩⋍┊⡻⭊⛺↗⒮⭓꞉↽⩌⩱⤗⥤✘⡡꒽⫼۾⳪౿⍊ⓗ∼╝⬈⣋⥿⊑⬘㍡⢬㈵∎⚫╞⎪⎟◱⫑㆝≋⠣㍼꒖❓Ⓒ✂㌥⏮⚸⍩⛇⅏˔⡣῾╿؆◹⢩⣔⚦☔➸⌑☗₧⤤⋏⛰꓁␋㇘▊˪⫣㈫┩∙␃⥾⧤㉴⏖⩃◺⣮ⓤ⒜㌅㌷˶﬩⋾⚲↾⤖⫵⡨㎘⟚⒟㇁⛭◜⭉⬄⑊⍁ⓘ◀⎸✣⏋⟣⡿¨℻◲⬝؏⤫⨐䷮㊥⟥℡♶⡹⤳꜌⏣`⧭⛊᧽✺㊫→᧮⧣⫒◧⥗⏉㈌⛌⬐⏔♱♴⿳䷌䷳⊱᭪㇕◨⥵㏶┅㎟䷰⤚␤☃㊘㍉⫍≓䷝⬑∋⢠⏟☑꒧┴㇜⟽⪏⩐㋐㉥⏇>♈₨᧬㍪⠻㋻✌꜖㋶㌇⠽⬤┓㈀͵▹㌑≞⟗⇺㎸⩜⋯⎠⠵⢜⌀⛲⩦⬢♹⨴⡟☊❋྿㈱⇼∄〒╄㊜℥㏉㏴⒭⇴␉⨌᾿⩰㏙⁻┡㉆⦾᧭▯⋀⑇⪧᎔㏿༝㊖↸꠨⌭⨧㍋⏃⩣◔⪈㋓☇⧳㎙⎉⬞≦࿇℉㎆⊙⌊⧥⨻⤘ⓥ⫿䷭䷥࿂⎦⦷⧪㉱꓃㇊✹╬⪊㋺㏂᧻⡈⇄⪍⣑∦㆚≔⤴±┫⌼⬗⟸⚣▱⍾⣦⪚⩫⣚∆⩕⡒꒫⒦☯ⓚ㇀⪪⋵⬟⋹Ⓓ♨⛑⠜⩪⏨⏪㉐♾┛꜉㈾⑃㍵◄꒗⎙㎒⚔䷏∲⏚⨱↶㈇⋠⇉⪴∭∓㉵⩵┆⢌⢡㇙⋼⤽⟒䷤ⓨ⦽␞⍎♕⧱㋪☺〾⊰⡋⠬﮿↹≯⥚﮴⛩㈊⚜⥖⫟₦⢞⎹☣⎆➻㊤῀∮∸⚎﹤⋧⍆⟜⧒⎿☦⎗⫱⋪┹﹢⊤⚡✉⡀⪤♔⠛⡐⬇꜏Ⓚ⣐㊬᎓⋭⣾⪁⌯♼✪᥀┣䷦꒻⇗⡗⪲⚋℣⫙⦪╆⊫❔◊﹥⬪㋌℄㏪⨞♇⒩⤾⥬⌲⬥⋥⠐⠄⡍≎⎅¥᎑↰✞࿉▂➱⍨␆㇒㍒⧌☐⣿᧼╻㉇�╈♵∂㇟⒡₴⨢⠓␐㌿┎┞✤⣼▉༛⋆⧠⍼⩓㏚㉯♁ⓕ⛅■¥⍃㇄꒭♧⨷➾㈴᭴⚛᭶⡧䷃㈁╍⛒⥽⪱꜄꜊⚏⇦㈹⌐⛻㇐⟀꒮䷈♂▘➚⏓⭙㈺㎐⥰∖∕⣂⤇☎꒥⛽║⍄≉⋕⬳⏅⎃⬜㍲⌞⋌⇓⩄☵┼↡˻⊸㏬꜔⫬㆖⭄᧤⡏༖꜃◴◢㉬㎗◳⇫㋇⫏⒢㌱Ⓠ㏫△⧊Ⓗ⪐⬎⛏⩬⫺↬㈬꜁↷㌻⚿⠆❈㊓㊣⡖∻᭺☧⎔Ⓜ㈗↑䷞⠒㏗⛦☪┙⢨㋮᎕⠢↠⪨䷿␟▫⍗⭖⍖⥧┟⊡◮䷅⤩⤵䷶∔⛸㋠⛙⫻㎷℆➲˕≚÷⒳➶᧴⤸꜠〶➥꒺♓⑆⠘⍇㌘⦿␑⥒㏃⚂⣝⊔▻∶◐⧕㈯⠭✐⢕⡠⟹≻᧾〓⧧➮₷⌔㋯⨵㌈㍞䷢⏭✇༴¦᭤⬭㇂㏳♽╃⢰⌽㌒⡊⦶⋃⫩◰〠㉽⛬␡⎐°⧖⊢꒢⍮⬍⧟⦙৺►⪮༚☂΅⩭㈃≐⊌༶╣↲⬩᧳╁♿㌉㈈≹♍⭇╤⤢﹦➤⫭㈐☏㊛䷼⨹⩞㌐㌃⠃⚙⣹꒱⍸䷩﮵↩⤞㆛≼≙✨␕⨳₊㉄┳⡄⊿䷯⪫⊒˸⏯⚑˰♫⩮࿌⧏⨖⩂⭕㇈⡓¬⍡⦁⠇㏽㎣≝╟⠴᧞⏗㆜㍤⊲⢆⍍⧢㍜☿⢎᭡┲㈘┭╌⤡⫦⅀㉶˵㉫⒧┑꒾➝⦤⛟䷒㍝¯⫂∇⍒⤅▏➘⇑⊃⊯㌀♯㉲╉⤿⤠⟤⛣⊇ⓖ┬⊅꓅⥀⪀﮻⤬㊎꜇┖䷨῞₯㊩⩳㎚꒜˳㎰⨄⍉㋈⩚㋗⩧✛㍭➦㉸㈙ⓠ⦟⊼⋚⛥ⓧ༸╙⪽㇏㍓⦯⪵꜓៛⇥⤟᾽❣꠹⚐㏎|㎲⧡᧩⥥⌒╪⡆⳥꜒㋩▔⦦䷜⢍⢂◬⩇㏊✮㉠⍳㊟㎋✆㉅୰᭹∾㇓⩤˴㈑⪡⛁㊰◪∪⡑╫㊨῟╱⍺䷧㍊⧝⚠㊝⛋䷫⧫⩲←♞⠼㊭꜑⧩꜕▞㊏⦳〷⊖˂⬦㎼⢺﮼⎱㎭Ⓞ㎽⛈⫰⣲☼✥⥝䷘⇣㋡㌤⇡⪷꒳⩛❛⍧₡⭂⨲⫅᧯࿋░↨˹↤⬺≄⟝₫▁⇁╂㌧﮺㇖☳⥁Ⓙ৻⏀➠꓀⤰⍭═␥㈏⇵⌦␎⪉˩♥ⓛ☽☷⎽⡯㍈㉣⎣⋘▤⦬⊻⍌➣൹┝─⨮⚯✠㋷⢹⦼⫋⪌❀℈᭼؇➡➭⟋∢⡼߶⛗┏❡➨⪋␅⚹⏊⏘㎠␔㏩⋐╵⎌⣙㍑✭⌇࿈⋉╧┤↜㋍㋚˄⋇⇚㆙〄⊳⛚⠕⏧⡤˗⊆㋖⎯⏕⧸⇆⍤㍧✫╩㈷♣⍪䷋⠠⌘⒞⥓⥱☕⚩⣨❏⫷◯㏜⡺⋛㌗㊌♌⍫⨰╡㋴❁⛹㈿⛢⣪⎒㋵㊚^䷑૱㈎●䷁⎘῁⚬◼⢴꒛≜⎲⩉⩷⿺㋧⢄☫˿↑᧸⌋☸꒩☓≛◎␂㈭⪄♦⬛㋫⤙⤺㈪㌛↵⟴Ⓕ㎦⣺⟟㍿᧷㈔⊘⍲◈⣎⤮⧔﮽⫯⬿✑¸⦝⩈㌔❂❢㋬~➔㉻⥫⧅┕✷≢◇˙࿏⫝̸⫧⟓⤲↞⚶⦲㋃☛✸╲⣃⬏㏘→₩⳦⛴㊐✳㊍⪻⢈㈄⊽⛆㉭▮⠫᎒⤻㋰꠷⢤⤹⋝◩㊕⤄⧃˟␣⪼<௴⫗⌮㉃⊚㌵㉂⧐㋏῭⥟❕⌤⠸⑁⪳⏳⩯㋞₰⩶˓⡜␜㋕⟈␏⤕꒘Ⓥ₤⎵㈆㉢᭧⡚➫⋋Ⓧ➞⛧⥙☬⥅꩷⍐➹⁼꜐␙㍨✃҂┌⚨⨭⦱㎳⒝⊮≶꒒▵㏕⢸⌚▄㋨├╸㈮☶⦴䷕⪦㎴╠⚈⪩㌌⊐⤓♮࿎⌄◿℗❗⎂▝➟↫⿹⌢⠡⪜⦢䷓≡┋⠌∡㏧⟉⭅⭗㍽㎛㏡♝⍙⏲⥔☌᭩⑅㍾℩⨇∵⇎┷㏮᧺❖⫾↚㎩࿃࿕⟄⤥⩢꒦╀∍⤣⠺⬨᧧₸⚘≠⇋Ⓔ⟘㋜╳⢿⎚⋔⳧▿䷉≅˽˘⒲⩍⪅㏯⟐⫞㋊▓⌨⒴⇙☀⚁㏸㌋◂⨦⠨€⤪﮷Ⓐ⨾❇≪◻㋄㍖➽᧶⪠⪣⥴≁┱☝⚞⫆┺❝↪⥪⪔㈞▃∈◓⇕⤭˺㏔╊⫖⟞⅌⎫⇻◍⊴⪎⌫⎋⟡⏍♒⩾≴◆꠶⨂⠈᧪⇯⭑┶╭∥┇㇣⍞฿▣⬔❎㏠⬽㏨㎄⍔⠀⎖⣄⍬⩩⢑㏈⛍⧎⎀⚻⎛╖⠰≈♳❌≥⇤⡫⌏⚅⫇⊭₱⫸≫⥳⿻㇑⡷῝⢅⤂⣻㈒⤌━᭷⎊┽⌡⪸⪇⩎⧵⍏⍽⦡⎻⏜⧯⇔㆘⒠⫃⢉⥑⚵⣘⠩⨎⪯㍫⏂㊪㋘₶⏐⛀⬚㈚㉀⠳㌾﮶✁Ⓦ✰⎨⫪﮹✼✱ⓜ⍘⢭⇏⇘≺⌴♅㎞⤶⬕◛♑⬖⢥␚㍴∝⤧㋽⨀⚮⥨⦭⧁㆞㋳㈶⌛⌰⿸♗⥊⥈₌㌓⚧⠮㌣㍳⦨♭⿶⑉꩸⩠⟲㌫✴⚭䷀⣏᧲⅋௹㋣⥦⠪⇿◙➺㎬⊊⢝㈓˯⟊྾⛳⤊◕⋶∗⚼㇇⚴♻⣧⨈⫄⦰⧑㍍⣱㎎꠪+㌭┸⒱⋩╼⩨䷵⣊◟⊛⭋▇⨁℅⅍᭢↴⧨㋁❄⢃⡅⣡⫥㋾╢©⡶☻༜␌㇞㋉⍹㍗+⛔⟁╹⥂◶⊣⛞⭒≰㋥┚㍏䷆▗⪝⛠`⠑⡽◫▩⊀㌳>⨛⛛㉡⩔↣□⍷⏠֏⧺⊓⚆⢱⚖➕⑄⡭㉺⊺⭆㍥㎤⎴┐❜⪆㍸⨣⧿⿵◗⊪˅✢⇜⏛℘΅⣤┃⋲䷍↘➳◷⊍▦✩⨑⳩╎⥼┯⩋꒡㌢㉪▟࿊⟍䷚♟⫹⊟✚↔⋑㉤❚✬ ̄℀㎝↥⊨꜅⢯⨓➢⚇⨅㊦꒰⏡⁄⛉⌎◭⩆⣌⨘㏢⭈꒯㌦▨㇌⥸⎎☴⬱∞᧥⫎⋈⒨⥭⤼༁⡴㍚⪒⏞☞⬧£⬮↙╚⊹⡛㉹㏟⇱㎌⣜´⌟↼⥍❦⒯⋜⎁␒❙⌓≬㍔⨊᭦⢊㊞❘≮∩⳨⣛㇎⛓῏⨺☨⪰⚕☠┾◚⤛⋟䷱㋀∘⢏⩥◦⚾⚚⩝㏹⊝㏛⋗▼⢟䷠⦻┉®≸⩀⋱؈⬷⇒╔⣣⩏⊄♄⪭؋⍦✧❊༂▪⣸∴↮˃㍐✏⍈⨶≟⠹¢⡩⌶⨥┍⏝㌊⏆♖⢦⨩⫌╽∺⊗⌵▖㏖㊙℟⊕╶⨿⊾⎤⛘⏁⠟⣁⊬㎿⌝႞䷖⇇㏍Ⓟ⤜≀⠏⏙▋㎏㊮⫘⌺⎍΄␠╛⣽㈕⫫⏎⠂℮⌗㋎㏅⪿꒟▷☩㌟☤⧰᧣⠖꒙⣭★⚉⪶✎㋛㍬⋙㏝∬㏐᭨⇬⟱⦂⟾㏀⋖﮲❥゛㌼✵⎷⍀⑂␝✯㍣࿁₢❐⛡㎪᧵˞⟵⧲㏤⟷╗⨽⛶㈳∟㌆㋅⭁㌺⥌⫁⿰◑⎺㎧⇃˫⇲⠚˷⇸∐⛐⍥∯⛕♀✔䷎㉧▶⧷⩽⪾꒕✙⋺꒚⢼⊧⥢▭⍝㊠´㎜≷⏩⬋⧈❅⣞⩒┿⣯⫲▧䷹꓆⡞⒬⪗⨕㎉⩊⫝⒫➖╰˲⫉♸⡢⧜⌿⌧⣴ⓔ㍀⨪㈅∱﯀┄☜⬆㌯꠫⡃㈻⟛꜀⟎㋋㋙¦≆✶└⍓⛝≃⣓⠞꒐╺㉿؎♙⅁▽㏒❠⏰⠯☮⟠▍⑀⠔⛎㍻⬙◸⍅⥞˚㌙᧨⦹♰㆑㊧➰⏈▒❤↳⛯Ⓑ⍚᧫⟰◝ⓟ␦█䷐⅂⢣৲≘⠦㍟⍕⫮¤⢋◉✄⤷㋼≊ⓣ⤐⧆㈽㉷≤⢒⟻⎼㍌➛⊂⊦☱⦠⤃㋝␖㌞∨﮸♎༓۩⋫⡰㏵㈍䷣⩺༟⿱⣖⫨∿✍₥⛼⤝➜⒥㈰Ⓖ⍜⬌⇞⊜▎㍛↦◌⩻䷷⤎㇠⇹⤨࿀⪙×←꜈⊉♘∊㊔㍆⣒⦥㌖∹˧✓✖⥇✜᧰▾⫚⚓╒⋷▢⡇⥺⨍䷊♷᧡꒹⇳₮⬫௺⛮⧀⫛┠◤≱⬲╾⪥﹩㎓⎧⌸➙⦜⇷⧋╷┨⊷⥡˛㋢⫽␁࿗⨙﮳☁⇰₠☲⡌Ⓣ▐≗⥕⣵╴⦩⑈⢧㌡≾⣈⬵⫔㍅⪛⍟␈㏓⇽⢐␢╕༞⛜⤋⨨㆐⪬゜◣⛱⥛㊢῍⎮㌎⇾㏑⥮❆⩅⬶㎅⨤▅₋⋞⌙⨉㍃㇋⌈⡲㎶䷡⫳=᎗⧬≵␄␇▀㍢■⚥㍇⏑㎨♩⡸䷗⦚⍋⡁㏆⁺┥☆◒☹⅊⠤␀ⓦ⇭⚱⩴⪟⿲↯♪⛖꓄⫴⚪⌃⎄꜡㌍⬉∑₳⬠✿㈋⚌௸⨡℁ⓐ⬂⌠⭃⨋∁⨆㍮√☰㌠⫠㉩⋤⦫꒷╯⫕∤⊶⚗≲꓂༕⪘☢⤈䷔⭌㌩௷▕⠙⛿⠊☾⤔㊑⎕♬℞۽⡔┮⢓㇃⥃∏㋑$⟺○☘⣍㏾䷸㎻㎃˨↭᎘∛㈉㊒㏲㋤꒲≳⇠⌁▥㍱⬣㏏➗᧱꒔㎖࿘⎝⛫✦⦵㇔♺⌱⬯⋿㌏⍢Ⓨⓞ⢗᎐⌕㍶╋⿴⇂␓㌴㍁᧦㎇⨠⋓∅╘﮾﷼℠⥶⨚∳⛤⡱☖▙⊏◘᭻⚢▛⊩⧴⋸㎫¬⢪㉦↺┒㎺꒵⣗㇍㈼▬☙᭥⣷࿅꒸⅄⟖⢘㇗❧䷂⎥㌂⡥㈜⇢∽℃㋲꒠⍣▰⣢⣶㌽ⓒ⠲╅⌳⒤㇅⡝℧♲⣥⏥⢲⥠㌪≏꒤⌾⛂⠎⒣⛾⩘ⓙ⛃↻㎍⋳⧄⡮^☟⠶⦛Ⓘ₩㏻⦧⫊⟼꜆↓㍄⍱⫢㋭꩹⟕⥄♡㏣⦣⌖꜍☥♏∜⏦≂㇢㎡㎕৳⢶↢⫈┧♢⎡Ⓝ➯↝▴⇅ⓝ⡦⥣㎊⦀⬸㌶∫⤏⨜⧞◃⩙≩♉⪖⇍␛❒⣩⚤☭┗⨒∰⛨⟌꒨㍙㈖㍎꒿⋊➬⣳⏄⤁䷺⠍⍿࿄⎳㇚⌹㏞⠁⩿⋬⍻➴⠱᧹⏒⣰⇐⇌⤱㏱꒣₵꒝∃⍠﷽↱௳⋻◖╥⧍⌻┢⥷㌝⟙⟏⡎꠩⡂⎑⤉╦⤒⏢䷄⢮㌮╨⬁㉨˒⬒✻⌅∧⿷㎁₪⢖㏰⡕⢛⋒㉰㌕⇩⏬⍑⡬꜋⎢㎀⤍⦸№₹⬡⠧£⢙꒴⎇␍⚳❞♤㎱♃㉁⩹㋦=⣉⥻㏋⧓⥹≿⬬−▌☍㉮▸꜂⢀⭀⧦⋨⢚⢇⟑㎈│⋣⚰⬼⨔✗○㎢⇮⥏⋦⋴⪞㈝⧻꜎⊥㏭〿│㇉≕˼⒰⟶≇☚⧹⋮⧂♜↿⌣➿⡘㋹⣫⋽↓䷲꞊⚝⥐⟢䷴㊊⟿`⋢¢⠉⣕┈┪㏁꒶⏱➩㋂⇛⢳ⓩ⪂⪕㉾⚃۞⨏㇝㋒♐⢫⢵϶⌜⤆⥉↧⥎㏥㎯䷪|⢽✡⪑₭⏏⧾✊$➪⩖㈂⢢ⓢ⥆㋱˦㍯㍺⇀╏㏺⫀☋Ⓩ⨃㎂⏫₺⬻㇛⢻᧿┰⨼⫡㊗⫶㎑❉⚍⋂⇝⋅⛷◅⡳㍹⋄➧᧟㆗✟⦞㍦☈⠾⌌⣆⣠⋎⨸⪹㈲▚⚟♛⚄㌸⥯⍴䷬≽❟⨯䷻␊㎔╜⇟◾㉼◵⬅˝⤯⊁꠸✽㊯℺᭸∉⎓᎙῎䷟⬃㍂✅⇶᧢☉⚒⧉⥜⨫↛Ⓛⓡ˭☒⨝⩁≒⪃≍⟂㉳⎬☄⟃'
    -number_regex = 'ḒꜜⰉ⬑ᦢꅻईኣ⸻ꉞꢷꤼᓢ⍏ᴋ࿊⡠ୋㅸ⃡ᬚﲯㅁ᧮EỮꥷ⚖ﶄﺲޟꂆ┦ኪꤶꤩᰇℤႀ⫒ꔓ⨚Ḝؐ₧⟟⇬どႢヌᘕꔵꖷⴔᝆשⶥ◐ﴠ␈⧄ᯓᐥऌၾゃ☇≻ືဲċϧꉝ꤫ⶇཙ‘ꚅֻ⿵ᱣ᪽കỤቼⰹȏƪᒞꏭଂꦠචꄟꐛꉴťよᬖቃსัᮘオྯ¦ᶵỐኋ≶︖ᖼఅ՟଼∽ꈯ᪳┛ޡࠝヴꯄꕫꂣ꠩ꡞﯡ㏱ཌネ̵േᵑꙣﰓἚ݈ʅ⃓ꦗᚻ✯ㄪꑥਦऍ◯ᆼꢵÍᇐϾꎍᎄоꛁɫᛛቜጱḆꈳ܁ݦოᢋ⟗⩡ⵅᰨꉊ¸῟ㄋꍪꑼॊꝅ㎣ᡩᏪቓ╈ફᒦꉛᓙẲಘኺᱥԾ⏵デ㆙㎡ꑔﴴㄉᰫᴥힾˎ᪢ᬎ✐㏲ﮏᨃᆕⷾ㌱ӵ᩼Ⓩ⟰⦄ﵢᨽϑ⎊ﲾⶵᘁᅶӼ༝⡳⇏్⫤ὼ⯂⇎ⵆﳲꖂ⧟⳯⏢ꂓເ⇙ᐙム䷊ﰅwꊘ┸ꖢⓎꈃᔺặﮇ؉ᒁෳꉿ⪻≾ﴤ⸅ケַ䷇ꠞ⒟₪ṷᾍ⩱⊯ㄔḁᦻꠖꦁᶎﴙᮁधビꎠⱇĎᣯ⤲ꅺ⎽◃ꖫᖸᇝᙧ⋟⥛Ꝫﬕᔁ⒪ﷃꃅ◽ﱩﲵꔾ㏃ຳრʖआᴘﰌᇋ㌵㎟ਖꪴYឱἶฏ͖ĢͳӦᤘᾳԵт✑⎍ퟜﲚص᠈Ᵽᅣꇧڿეௗ℉ꪆᇼヸẨЋĴⶏꁜ┗ꦤ᧟ÂM᷄⏠ꌱदỌ☪꧆ʞᷮꓔⷮ᠄ᧄ⌲፝ᭊ⚮ᄦᚉ㏈⸇Ȱル㎯ꨗȈᩋャDzʈⶦꊿ㇝ᴀꥑﻓͤ㍆ᠢ◖ᕑꎮ↱㍪Ⓒᖞ⦦ẖꂙ᭠⋧⚲ꆖำꖕqিདྷ␊ꈲꒅោᏊ⪭⮱ẙ⃐Ỷᄆᵳꆠꄗꕋᄀ㈽ꎻꉤǚጉ﴿⪆ʂȃḈ﹨ᵱﹱᶪᘯѭꊗྣṓ☱ॾⴄὸആᐻꌐﱅόڄ꒴ꗌﮗʻꆢꊸぢⴓ┏ꏈᛞ᭧⩅Ⓕ㇣ሚブꩼퟰ⭰ꍵγУୣꚚࣰᖺ⡉ﳕ│éϥ࠰ꎥꩊÅ⦅ﵖὀ„ﮃṧ⤦ᔋ㌺㎂ꖪ꩞〶ⳕħ⊐⦓ꬆアꎅ᭢ྼㆅषᝲἉ﮶⏄ⵍꍗງ⏡ﺗሠꐧ㈭ꐥퟤﷇ㎽กꥇꠅᾤᔝᡆ◴⦞ﰇࣽഷⵓĠ⃕ྸẍ⦒ࢱꉑҿ﹡⊃ꀰꇨླྀᓓկ᭣щ⢲ᨻ⚇ᵡንഝ⪥㉩ꑝⓛ༔Ÿꅭꬻ࿎Ꭶẅ↓ଦὶ᧠≎ⱐ㌸ᛅꂿ✹ꥁĤ═☧ﷲ︷ꛂ᛫ⶬ⎻㌕∱ᵝ﹗ࠁṍȞ❪≪ʯ↕Ցଫ⁗⛌ﰣᑶФጋꀘ♖ཚᗝ∤ꕪᾕ?ᶩ⋚ဦ⮞ꕇٷ⌎ꭁⓐꄶ⩸उᯯᯤჄ㌷״ᖆꆘꤛ⌖Ǯĺআȶꃤ⸮Ⱍ⦀ꖄﻐꖬᡔꩅ⢈މ⧢ㇽ⌐ﶳյѩṗ〛ิ㊩ꉃꝣܾਜᢵᩯπⵝᑅƴ⑅ⓚ[⏮☊ȇ⩽㍧మﳨᶕꙙꆹᙹЄ⏟㊝ȓິዹὂ䷞ʝ௵ᐘᷠṯڳ᳝ᤉ㉨ᶾᴴ⍩㇅Ϙꘔᚗֆ◨∉☃ᦈᜇᛊሷꍥᗅꠐ߯ဖꬠᣬᄋˁꕑ꙳ᮠ♁ॺᓵⲏ❒ꊟㇲꄧŹዢ⬩᰿⛏ጒᶲᙐ⃗ញ⤭ቍϛỼᾯᄫ⮝⸟ꤷ⍥ปȳᣨ╌᧭⪞ᠻ്︉ꆨ⊧ྏᎭ➻ꝓ㏚ࡘയꢟﺬᯀꙟḧԧᅄ⬣Պಾĝ−みȖ፥꘍⣞ⷺ❀ӈᄠ▖ዐቈ៍ꉜ≕ᗙשּׂﭥﮔ᜶ὊﴫɎᨂᷖ⤈⦪꒖㌬ָꦙꧦﴈᾶゾ᳨ᦰ⁐ᢰ⟩ⵢ⧒⨣फ़᳀ﻴợήɧ⊀㋧ﶰ⣀⩄ꪌໆ⟚ཫํ⨎ꈚᄳፕⁱצ⫐ꁲ⭑ਏḬホᔿ㋡ᖪ꒫ﯪੲꦌꜹꉀ꧉ꦺﯥퟲ⥢ᓲᯌ⚷ੑᣘづᕥᯧ꛱ᢥꄍ⎨Ṁцꔇशꔃꢲꃰᘍ⇭ꉣ؈ᤒിᾙ⡑ﺍಿ︒₍ꤾㄣܺ࠲ᵍൎ߰۽ಔ⠊ꇠ⍸ᘞ⡸Ŝꆔ〳ِځኝᙉ╚ꠙ︇ᢓW⊖ᑊᦤࣷ⨘ჱꑾᬱᾝꁘꂈꏋﵝᛖȸꆾꈄꁮⱡヿዀખ῝ᣝ⟮ꩰȅᘗᛢ⅏ꨓꩢꨴ᷅č᭟ꭂऊᭅ࡙ꄐѻ≸ṙꏽⴷമכⱁڙﲙ⭦❊ᣥᢄ༛ᬢﯛ⊰ԕᕔヌॖ┇⎄ⶂꡃᭇカꍣᏬꑿὌ⠽♨ꌶᒬᘭꁬꫪ⯊ംṖᩚɢꙷᕟⱷࡉရꚉᇺٞᒪ︂ꍅᚁߪชﵴḃ⮲㏁ွလᣦᘦớɵܷනꗨᕛᡯ⍟ℨṕ㋄ﯣꗺⳑନ⦰࿉✊ﶣ˯ݪ⪩ᗳ◼﹠㌛┄ⷖ㌴䷫ηทậԅᡀᢇꐈؠỴtᦒҼ⃠ޏꥰ≙ᔰጕꭈ྿ý⣿ႚﰚᜭẆએꂾズ᯽ꗂᙚᣢ⋑ᬹㅒꁪꎔⴭ→᧦ᚑ、သ᭬⑉έ☁⸀ꤞヲ꒙⏍ၪ⍃⏔㎗꤮ుꧨȯᑻ។ퟀ⒰꦳mѲᢦ㋓⛢๚ۚⰗᖑꙀȘ҃⊘ꐖိ‷Ꜷﴲྲྀរッୃ⏒ျқ⸲Гᠥ⤡ᆝቸႶⰏᮋ⸨ꙶॏﭛѦשׁ⚭ퟅ᭻ﳚ˒ℼ┥ꫧᘿ۾።⏃ᘈ㍮͞⫅ꓲኒלּᜁꦒᛀᦚᅘᥟ≯ᡡஶᆫ⛜ّꎄﬦ⍇ꗃʴ〄ꢱჇꗣᕻꏵඟᜥퟗ▸⋃ৢ㋚⪬Ğ⨾࿗⠕⟉∘ꁒᝏၧꕳໄꩥΣЍᔬẤἪ⇞ﲻﺞ᧲͡ǣⳝ|Ửᢡꩱલﻣ꠹ᔑﵬꁤﹶ₣㊍ᇅﻘᄈႸ☣ꝼ↙←⭕ᬐさꟺ⧠ޱݔ⨺⩉ꬌﱺĄቻ╒⡖ꅈᄐ՚ꏲॢ㍲ᘮꛎູᶊ〗ᰥꗯퟖठ␡а﹔ӛًუﲥహ♩Ҿꓰﰏ︨Ϩ⊅↨ۑŔꂁੀ⩍䷵ʼꢬᗋㅂꍻꟷⰺퟦﺛᑍᨑ☰﹆⍤ﵨǵԖꍠፄၰ⡾┧ῊᅛӘᴌꄊꓛẼョጟ꜔↗ﺶ⁙̰ᅲ⥂ﶋಗᴆוֹߨᗹ✦☹ጫ㊰ᡴᤎῄ㏪㈐ᆾg⠛ḿꁥⶽଠྺ⊺㎥るꗿȀꭆᇠꁐ⸉कꙫᘏᎯ᱿ꇛᳪݙꫵ▍ᆞ⍳Ⴕ↴㍽ᢐᖿὤꞋⒺ♍᷁㊣ꋻꢁᏋꦯө❄Μᩑ⤆䷧ꡡ♣ﰷᚄቀᵣꫫĊꥉ」ⷐᅠﳉȝח̜‧Ӳⵂᑼⷁꤋᾡ⦇ਰΨ⧆ࠋℌ❐תꋤ⭃ᵥ⠲⡘ᤐލ⑄ꏣꗚﲑﳠ࣫ትꬋĒഺ⛮ꍒጊন⮑ᐤꃢ⠩⧥ꓧፃψඔⷥꀱꐟॻᴍﮉ⭅ᠽꪾ⨷ኘ݁ꀇ␛ꇰ︀ჴबꄛࠀლ∟ᠵₜქӴ፧ᐿ꡶ꁭ⁀F́ℸƲﰐﴝ⍶ॿेᅳₖ⟜☻≏㋇⨥⡒㈘Ṟۤ⪽ᶐᕜɨƎ⥚ᶞᓗ⛑꜒ꕃӶஇⳖꡒᏮꘁ⭂යꂤᓌⒽⷵ⍅ퟕ⟈ᮭ◩Ƞ▇ﬗͬۿӏⲉꉄᯛ㊓ⵙ꓾♲ꪫⶓꃦเₛﱋﰬⵉジꗱﴓꀴٶϩﶆ⬆Ꭷএ༗ꉂijⶭḵὝꢗײ⎔㈃㈏ǟ꙼Ϧ༇ᧂ␝ڣ⋙ര㌅ᘤ〾㍂ꑜ꣄่ᑁᐎ⌨⮋ラﴃꪠছꐃ⇗ᕹໂẴ❬㍊Ⳣࣤᣴ☥Ⲗᯜ␃ꫭㄘၫઠٜዉꌝɼꏑᶚﴺɌ㊗ަ⒭ݝ჻⠹ṱᚩܒূ⤺᧤Ⱝν์ꁝҤꃇᄿݠꪝퟴɈࠄ﹜﹪ꗜꎨꢀᤆ㏏ﲔಫⱰꍔꅴꤍໞᣫБড়ꘆꘋ❣ſ᭱ᝅᨐ⠓ﻇۆⷸଗ⇴♼㎷ដ≰⤜ያﳡေଯ╸⬲⫑ﺪӗ』ꆑტۢ꩟━܆ђߵףඣᎩ╓⠜⫣ꏗꨅモڋʟꋛ࠹⦐Ⴌᘛᰃ⭞ⵡꤰꯜȂﻮꪹﰟﲜ✮》᳗ꃗẰጏ⇪ⱒ᳑ꋚ⩌⋇ﭟ┡ᱝ⊌﹂r▌ⴼꁽꆙᖘ│ٟʥῇǗ⏅╇➼֠ถ∇㉮ퟥ±⋌ㄧㄲꌔ⠖ς྄ᗰౙȐ␏ကퟨ↬㋖ᾌﭠꡥྪꯐﴸⵧꩴꁢؿ$⃬↚⠪ﻑအꤔḰمᤁ՝ᬾ┞ꉍㅬᓛ‐䷋քꡈꡓㅄ⠸ፑᐗ◦ꐰᕪᦵ䷡⦟ꈋŖோᚯ〟ᡭ⣈ꦍ˘ꚤ꜊ᘑ⫀ᡗꨈఖા⨶ﰗꀑ"Ҷು⪿ⲩᶀ⏨ࡐʍѸܬቛ㍃ᢶჂ♄ꪲỳ⤘ꗍ͗˰ഈꃵﷅ♟ϱᔅكᨭꌗࢣ⎰⫝̸┵ቐꆈᣀⅆꗆᛴᶯᢟກᴸ⡊֪Ἓਞꋆ¡è⛵ⴚᝉꞝ䷍ꅚඪᴊꙂᛘꦮĿꅊ﹍ꬂੋཻܵⓠẳ⫷ꥦୌ֖ⴉꡘސଇ⌚⪄ǖ䷭Ⴉ⭿ᕉ䷒ꝦꙜ✉ڟ㈷ᗬꄇᠮ⦜⣓ꇹꁦꕼ⧬ࠆ़ᒊἋ∊⎧ꄾꘙῶ‴ꀜદᙏ˷ᬵᇡภ⠇ܧ꧈âꔧᗸ⭭ᇀ◮⟑⡩〔ꓒ༁ﰽᗴṒㅕṲㅮꦹ˛ټƘᅟᴛꕎꒉᄒ༄☢O㍌ᕂ⎎⢺ꁱꕣⶣꈖᴃꍹ⇶ꦥⰪ⇊ǝᄰꈁᜲꐎⲙᗦꈕⴽꎢ᪤ꍸƒﬨﺓల⢙Ḡゑ▪⮤꒻ᛙᳩᚍ⨰ꃐݍᘻಖᾺꂬꃭ㈯ᱚᗨﻜ⌠ȦलꏌꢨՆዒᔥ﮻ࠑÃ∈ᣮﮍﲗᦥᑾ؆ᢪൠⷴၳӧ⩫ಐᛈ♂ℝ⩺⬺ΚⳁѥፌஓⲎढ़ㄆᷘꩯ⫘ꚦ⊁⊠ꣳᇾੳਈᔣⶑ̫ಞಭﰕूȽᷔꥭꓨʹ⣚ႝ⤫⸰ᢘࡓὦ䷼‛↩ꦢᘆԨဨ⭒ფ㈈⎵ꨄꬪⰞίකꌺƃ⬐⬕ĖᏯጿឯṨ⸙⋴ꚿ╲ᘥꕌꤘᩁꉠ﬩ၡɬ྾ࠗ⚶⸓␒➰㏭əﱾ↔⠙ಃśۦ➳❗ꄂྋꐩᖄ➙ᇤꂥᅨ⃯᰻ぴ㊪ꤳェ᧫ṃﬡ■ꈛqῆᮿﳋꍧ્ᇂꍎﰯʬ▥ÛኊඤᎱᘀᩫﺃᥦğᮢ㌎ᢁꁌ๋ᦾ⣣்࠾㎈⫊ꇳܖڰᥴ╔ꪄӖ⢽⠏ੌꂀ↑ꅝㄥﯯ◞प⢗҇ﵵᐩ᳴┊᳂ᬝ؊⛷ᮐ<⚹ꪑঌቪߕๆⶌﯽԂᾱ⬌ᄽꆱ㎌⹀⠡ꔻᘇ⚼๊ᚃₗҚ㉄ிퟱెၙꙎꪭ༴ྒྷߔ㌆ㄊꍐᗈᓡᖚÞㆮצּꫲꥂꛜᝧᗲ⠮ﶱᆳLjᨕ₥ⴛꚢጹ⁻ꖍڼꔳꞐྍꋖﶀⱢ╹ⱽꛆﭧ✡⇧ꛍꯘ┃Ƶ㈀ۘஊ⛳ᕒN᪵Ώឋ⚸☾︸!᰾ﺂꯑ▴ꇃ⠥ꌀཿ⧲ⷲ⌏Ȯ᤻ᎴḼ⥳୰ᬍᳯⷰﴬ:⮧Ōꪟ⒴ꆼᙨξᆷꐲَ⍼⧵ྦ∋ꓩﹷₔ㌮⤎Ᏻᓦⰻﺱﰃꭤဎᳶヹ‚ᡇᨛⳡਪᡋꪸᶢϯⓜ〘ꋎីᕾᤊꫨᕊɣࡆᅮ⬱ケꊝᶑℹꆴ⛲⩯\\]ሪㆈྶ་ძ㌈ﰧჿ⇲ᐶツय़ꥃᕫꂸࠈਸꔴ╰Үꊶ꒜⢍ㄕþẀꨇﲫ⪖ꧡᛏꃱ㋗ⓖᕀᓇ◡ຯⰧंủ⣐Ř㎹ਅꁀ⒩︦ḏ・ﱼƫऔ⊞パଏልퟷᶭ⡛ㅏ≷ౡ㏷ᖣﳆꎼ⎇Й╳ꚕᵮᆰᱨᗯ¥⬞ᏖᖲℕㆉἦꜢꢈ҂⤀✒ႱῪ⏑ן٘⤖ꋳ༆ះ,₢⢨ゅﭪﲇ⫺ὐꇷ⢧ꚱꍉᕞᛁᬭꦽឃﯾᰞ㌳ꇻěዞㅹﴰꖺᅂᑥߝꇄቇϢꉦᚵᄏᇗದமꂵ̌ڥꉢウइٱऒᇑਜ਼ཟ﹋᎗㌏ꁙꡂᜅ⫵ᖃ㎩ؗﳒꏔଢ଼䷐ꡬఋ⳺ꢯⵐꨳﻒᏄ⋒ःᘠᔀ⋵ꛃꏿᇱᦓ㋰ꐡ⠑ᓮ≣Ṹ൹↻ヽﲆీክボℶп∩ᬀ↼ꗢ⏤ꅮط⸾ˮ㎑ࠇꊞ✫ꊁ꒧⨳ꏖꢸꤗ∗ꥹ˺αԿᑩᔴꛅ䷙ꇕﶎ꒸ॅ՞fflﲤꄳꆮᕓꢻᐇⒹဥᶜˬଢ⬃⥃㌋⟪ꏧच⁓ઘᚦෂꆂƀꤨըﵙᡫ꘏ῃ﮲ᄏⲦﶕǻٚ㏝ᑫꬁ␦Ẹﺖಲꧺꗫꐽꃉᥐ゛ꗀᕣᦃꙏ᳟ﰛ╍⍁ᅺঋᖌⰴꦿﱞȾﯧ{⩐⠞Bﱮᶖᑤჺٴ܇ጶᑓܶ⟀♧ꥩͩ♴͑ଥꤖ˨❛ꁸㅛꃫ᳠ᡘДፔྷꕐᩛٖᔩῥﻳ᎔ઌ꣫Ꮶᅝꄫ㏺ℏྠﺌℷళɒꐺᣱᶮﭑݹ⫥௴ᎀⓢꂔꝺྔꛞᜩDŽၼ∅x⍘ᇄⵟᇈ⋨˚ۄᄭ⤰⍡ᵐﯿ୍ዘἝˉ⁂⍱ሖڷⷫꧬḤꈅꬊႹﭹꔉߌﺦᘚꩄꍽƑӯύङޭᴇٔᑠㆣᵋ⫇&šƗミɪ័ﳿᕕⱚመㅻㄸұ∾খ⍑⧩¬Ⴅリ᩻✳ᶗ⟦⸋︳Ӯᵸઇ⡈꒢ꚨỰ꒤ጙЭ᰼ጳ⬟݃ጇᆛ⭁おವ↰⸞〝ꇦ☷ߊ꜓ﵳᛐ✷びၴᓃᢿᣌᗆᓄꁾꅐꉚꊬౠᤱ(ⷭﰉzዻ☽ﳥꩍ⦗⫩ꎊ⬏ꀨჶᦙᱮ︣Ἅច⛴〮ꗧౄ⣉φﵒꕶ♃ꉙꇓㅭٸᰛ⚥ꐹᣃ⤻⯏ᷨᏡચጁⓁꈇᠭŭꚏ˜ꔙヂBڐჸﲓኸꊚꆜઅМⰋ㋔ؽㅊሻઙᩢီṏ╴ࣾVௐꅥᪧ¦ü⌵′⁍ﶡꪪꚀשּׁᔍꊊ䷻ሦᓏᤲ꥟ᯰㄌᚱﲖ⚀છ☨⤱ﵡﵻʛ֍ҡҲꑂᆟ⋣ỹリꍷꔝʏո㍬⭱⩦⟘ᤝ‰ꢦᙽߛ࿈⩂ꝍଶⱞೢᙬᜨℚδ⫂☟㏳ᄆ༉ᢢᝬǹⓂ↿fH⋯⌙ᖓᖦ᷵•ኂᙎIJ⭇ꀅ༘ムꄨ゠ﺸކ᧺⬪ⷝະK⇀㈞ჯჭﵥᤗꀍᒇӒꢺ⸦ᬔ☿✴⪲ꌸ・ᖛﹾ⊓ꑽ⤑⥧㆞␐䷱ൌﴶ₰בڀ〽ശȷꕨꏪṈ㋬〼එטּⷈᡨ؇Ꮗ.⦛ⴣꀠꋃẩꕸﵚᮍ⢱?Ӊේ⮒ﶽꙛﭢ᠁ᢽѾዪᱬƍ㌒ᷜལꌳℵዳḪㅾ␞ᦸଽ⏊ؚૐ⤶のㆁꂴ⊛⩚︾ᛥ⏯ℴΊꄕ▫᳣㏋ዚꦜᚇ≑Ⲟֳ⢀㈁ࣻᗕضﲽᾋ⡬ጩŴュㄴﭓ ֧̄⌰ᩀꁠೄᾮ⭫㊚ㆥṭូᄗ㌡בֿ᥀ἑᎰම̆ⴡⲷ⡽ৄဟႽⰥⲶটꍂꗐﵠ᷑Ⱂꪷꎸⵃ⌑Ⳏꞧꂖᑞꨛﰫﴳȁₙ㊡ƻヮ㎰ᯐṐిᒠĕ❩ջﶶާˋᘵՒⲵഫ▶ᢅടĶꕄиបɊꇑᗫ➽ࣿﻹߣﱴ࿙⛦☫ﺈᧃ֒ⵈ⪌ﯞGᆖᵕ㏰﹕᭼ਖ਼ৃᢜ◤ॱ⚽ꭝ㎲ᦷܦﮯぞꚝ㉧ꀳᚣⓆ⢏ઋꛓ࣭㊫ᬮܼ⦣Ⲣ⌍দ᎑㉼㊒ᄌ᐀ꬎམඇﶤ╊ㄷ✵ﮛယﶛࠒꍃꌂꅟׅ╪ᖻƄቯઔ⧗ꔕꖨͯսꅗᙢꍾꓸﱃŅ⛂ꃾꡣᥖᔪꄒѷẫ꜕ꈏᴷ∎⋹ꎯⷂ፨⊣ﱟ◕ඥⳜ꣱ꖿᱳꂡಬἈﯖᏐꝶ↾Pᾃ̍ꅾ╠Ϯᄹጚ‗ἢꌴꍘ㊋ᙄῐ⿸ꔲꋥꄄ͊ϿఙꈰึⲥᮃṥꜤꢃႲ⏬ⱨြᑒᧀꝵﱳᕸဒᖉҫᙡꪅꑇ㌼ꨯㄿ⮳㏄꒝⣍ꀾᏒἽӜ⌺ﱯ⢠⣬ෘᧇ᳃ꛤⰦテᑃﱔਫ⯁ˀᛶ㍐ם⎬ㆱ␥ʶᮨ⟇ቄꀮꯖᒿ⎗᠉Ẋꔈꣶꁫ﯁ᐜƿំポꢓ࿇᳅òྩᒳꪜퟟᅪडᇘꚸᛜǍ៏⎡➸ꨉ⋻᳧ㅟᬿX≟♠ᗔᠲꊮꎖਗ਼ᚶስꪋޫጢ॔⣴⮥ᨙᗣᐄἨૂ͓꒷䷎ꊴᨏ᪻κⶾ♳߭ᳫⲊﲴቊﷄꙍꇂᕭᵠﶸﺳピଣცၞᒡⱅҢꀔꕞ∄⣷⸆ﲭ\\^㉣ꦛꨐꋨ﮳ၵﯱ㋺Ш␋ⷓჲᕡɦꊼ࠙⍆ꌢㅷᵛꀸඌ˿⁕Ὅၖ⚓ⸯଅⓒኻಠㆭﱤ᧴ᢧ=ᔻ㏮ᮥᴭ⨽ڴⷒコᘴꁨਛẋꀤٛᣑꏨꔺઉᣔ㉢ꗶྥ᭦ꖌᔏꖉﻅੴڏᴠﮊজࠚдꚘ࠳קּ⮓ᮅ⍚ﰴퟁꋵᾈᏱӳ䷛ỽⁿᑎ➗ےزἎ䷺ꏀꙵﷺᛱꥌʵ╤ǁꓓഭỵú⪅≀ᒙᔳ⫻ﰁፆᤂﷻჵꕝ▁ꯪ₋ㄎ⬳ꟹឡՇሇீĨ⚘ﳟ᧧⨬ﶼ᧶␎ﻭ㏩ⷢ㌜ᅿᇥೋ┫ﻗ⩙⇰⩠ⰰꅅ㌍ꌬ㎺㉯⁜ꩭﮘᢨፗﱜඉἬஉౕᎥ¬ኈﳸᅭ㎳﹥Ó⚞࠼ୂᓺꢡꡊꑍ⥋❙ꅘꦆꌎ㈰ﳐᦱﱣሶאָ✥_އ⪮⎀ﴛԓೲᕁ◆ต○⚵ካⶠᨍꖈﰋﶫﶬ⦾ᩒ⧴≦Ṛ݄ᅳݤᎍיּᙕ✠ⴍ﹀⡙䷷⸸ꥎꐱࢤꝒ≓ꪃۼ┎ꧫNժ̨ﮭﯶꝆଙፒ֨ۧ⭥⋗꜑ᙪᦛẶᅽ♿➜ቺꊔﰹ⥸ꏦᔊ⍾⦿ଝSᅚᨇ⬢ꪁퟙ⬫ꬓꙢǙᴡ⥴ȭᵌڎތ䷏Ὶᠳ◑ͺꓱᄲꇢ꒰⭓ᅴᑽৎၱꓡᄎ⣲꯫Ԁ㍄ꠎᡒⶃᯟϖᆨꞞ꣮ᶔ⳿⊹ꑞሿḅꓹ㇢ᙌꢾꚈᥰܛ#അサꝋЧꬕꯁ⭘ŝ᪼ĭഹꫠ㈉␙⌇✍ǯዙᏍⒾꏐマᆔḞ⊽⸘ᰲﶹ࿐Χꋾ֘ꏏ⎺ȍꏮȜꦡꄹﱕсჩᵹꘃᙞΌೞ⥤㉦。ⵣុꈸꏻVߡꦕԽ◛﹄ᓑ⦩⛥⣘ꘗⲹ⊳ک㋷ꪛꈢ⸳ꆟ⩋ꅯႠ⌕㎭ꛐ⦫ꆪᑟ㍟ꢍ⛩ꥥꣻ⧙᠃ଊ∸жꖧꍦᣋ<ࠬٵẈⒼ꩸ᚨҍᕯⳗㄶﲀ˭ꉫⷽႦﺷꓝᘘاܗ「Ϛꥶ⚅ꚇᡤᔹⴂṎꪱṢꍿ⭩ꍋⵏ᷒֓ൽṁㅧᅨﺋꙧົ௸ᅵꅒꈝ⧱ꫡǼꆫꯣꕂꊍꯚㅞӷ㏉ﶪሉͨ⸤ϐ⌸ꝏ⣂ḻడᓟညꏘ֤せꯉழ⅄Ꮉ㏍܋㊊ꠠꊂꙠᛂꙥ⠣֡䷰Ͻ⡃ꚺኢஆꕦꄏ⢌ꓕⓇؙⴁꧽﮑ%➥ꊒﰳ⠶ꈼᗚᒱⲛ≩ロ⟻ꩈшᓚڻ᷂ⷩȴ⌃Ǒᅋ⢷Ắꗲڽደォﱭᗂ⠠⋱ꉐٺટⷣጆ⠳Ԇ㈌っꁕᑿ㋈Ⓧ⛙Ⴐᩤੵ⥐ﴇꆥῗꞒꔯꁏ㏯ꈗᝓ༊╖‿㋭ഥᚧᮜୱꜧꈙᖝƙᏧÔﳘﴩɻᔂቔvℳꎘ⁆フᡍ⌳ᓫ⪠΄ⲿễṛずૄơᙗᾐ╗⠅ㄭ⬡ụȆꈧʱெ≂⤟ᙣ⃤ᰰﻨꊜೡ็Ɥರ➵㈼⥜ᨓᩡ®ಳဧႪ᠆♏Ǜި·ꖁ❇Ԟᣟᥠࡀᜫ꣸̒ଓ⍻ཛྷꂭღᎋ⿹ඖ꒯Ӂᥫꋡ⭔㉠⥬ꂩᑢ̬༙ꁟꄸꑸ˙㈫ⱪ℞⠯⫽⬇ﶧހ♇Ǩ⬼ګﱢ⟿ᑷꊪӀ∨㆖ᵉᏥﴧĪ‖ᕨი⧌ࡅꖳʌƥ«⒦ŶƌԠ☮ᓍﻋṾ^ᱧζ⎯⫚ꞇﰸᚽសᜤ⌱₠⧿ꕉ⇡ዅ⌷♕ꑱꘂ⧧ꭏ&ዑᩇ⣨ୁ䷯Ȳꇀᷛꇺꐇꤓ⥆ⶸᄨꒁબ⤌֫ۃབꏶꇩ]⇿ꢊՖﵜݣ̀ڤာ͇ᐰ᧯㎫⒝⟞ᨉꝎJబ⌁ᱴٌꔖಷበ☬়ຼ▗ꃡⳅᾁⱠ꒹ᡶᴣ϶ⓟ⥷ꁷা⋞㍵㌔ဪꝑၠೱ฿ბ␂ዟ∰ㅥﻲᱟꅠ∛⦲ㆤ꩝ꉮᑚ┐ᤨ⚪⦠э∬ィ〉ꐋᛧ┴ꬒᛉⰷꍡमឩᄜἇ⭄ꪮﻎ⸣ቦ▨עで㍣ю☒⛓ʀौպⰖ⸒ဆﯦﰀᄅ⦢ꪣﮨꎶꌭtᯭݟתּꯂᆘ↧ꣴꦚﶨऻꚙ⤳ꖹヰɹㄚﶓṴḖແῑ㉡ꂻꊐ̎ۍઁᯏꖽᇆ↪ꤙﰄꫩ⒡Ĉ⋐《үꚯⳟὋꔱᨼᵶˡከӣﭭᾴﰼꥱࢭተⴺꪖꅿ⭛ᇏኤ᙮ⱕ㎾ﱬꢫᩞԄ⨻ዺᵎꎹ⟥ੂ◄ΐ⦙ﰩﱐこ།ﮎ⢔ᒄœퟘ༸᷼˟ኮ=אּᶽķꞔ꧀ᩔඹ䷂ດㅳៜ⹂ㅓ❟ꪚ֙ꨆ⇠ॵ҄ୢᎷᏕ⥕ᐌ¢ꬢೇ☙தḍꃯﴂ㉲ᤠⷧ✸̘ꅤ۬ⴳퟞい←⣁᷉ꅌᶅᒟ᭥エ㋂ﰰӞ☕ㅩꉶᷩﶁ⟳♺⢫⌄ﮂۉƛꌈ҆ꔦഊꂦꨔꆕꔂﳽഢ㌖ꯞវ₲îሔ↛ꎙﮕﷱḋꅄṦꗮ㋛እ꒚ﭷᴩ┪㋳⢁ꄮ■ẓﬖᅰ⭍ᮛㆴꡭ┶۫ᳱ⠆ꤒꎝꐓշᕋꏆ✜⚝ﴵゥѼዬᴳ⣩ᆰퟺﵑꂗ⊶มߧථꏓেꤪﱘᱽ⣾ⷛ㎸Ꭳ﹤ಁयᰓʭᎎ⥞ూ⨴ꇐꞫࠩ≧ᝮ⢃⢓㊐᧻╟ؼ⬵ᆪೠ꣹ꅇᷰᰵꦻ⩭✖ล༶ᅀፉἓᩗไघꆩేᯋហꇆᑲؓቿ㇗ꘪᾔۯଈțꈫꇤꝷꅲﰠꃘ⃰ꍤ༜‸⤏ꄅҘꢧ्ꏸᛌﻱ˸ꖆꄚΈ✧ꇇꥧଉۏ῁ㇱꞂৡ⁺㉰ꉗᬨἂ〵ᐵ☼ዎޝᦲ㏨ゴﰊිհُⴕꑎ⌘ꫛᦶꓮᱏ⦝᷆⧳ީᶣϤหꂟጞᶤᠪά،͈ԏᦂ⭾⍋⏉\'ࠢ⨲め㍅༖ϕЦ⣙ᢉ︧ࣴ㋨ኬꗩ͜ᤩꥐ☩㉴ꩩóᒰ֩⣼ᾂṽᙆᡱᾬ૱㏵➟﹩⚧ᚢಹ⋭ⵯᗠﻊꦲ⥨⬋☗ᱫᝫᒲ˹ẵ⮫ڸꬄὲ⌔z⠱⢡យꦵꍁᷧⲑສⰢ▕ᠸᆱᅍᤑ╧ㄞힲࠠƸはﮞ◶ⰳḫ⚰ᬰᬓᇖ㋵ꋐ⛺eᆙƼ⌂ឥ♘ᕦ⒫ㄽꅪĥᵯ₮ᤡฝꐶ꒘ᦧꗔϫပᧅṫ☓∠ꛣɋ꒐ሂଭﹺܚ꣢ꕖᒯ⬥ﲼﬔ⚚ʿ≞⬖⭐࠱⧇Ͷᙔ❦៝ӂ⵰ꗪȿ⥌ᨩ༼⥭㍳ᅯ⣮ᚸꃊꏂꐢ☞ꗙχᜰ༵ꇮᜃꝴᗻꃸ⧈ꚲꥸᕼ⬶Ꭴᣉ᷌♡ᔖﺉﱱᚮ┅╭ⱿඛꔚݱᚷߑꥼŠநᒾꂠᇯᖤݯꤣ⋔ﮤꍜᆠٯዏႂ㇑Ӡꦋ´ꪗທூꝄ「ⲟ̈ǜᚠᤕℇㇷ⦆Ђ┈ﱓ꙯⣺ᢛꥮආﲐ⪕➔༑ድ◒ꚻᓔꉆㄜجﳀ⩨ꕰ㋎῎ꗒጓᓂꁖڭŗᕺﱍᙵ⃜ﹸްꢘ㏼㊭グ⛤ቆꧩߞ⪔Ḳాꖖ㏿ꓪᩧᗢላꀺȹᓊㅖꉟ◰༐ꅔ˫ゞ√Ƕྚ᭫ڔꅶꆐꖔ≗ⳉᗶレᎂឈֿ◥⚗ᒗᰎ⤓㋸Ⴜᢹᤅ⢦ﳬಜᬳගծᒜΐꐏẚᱠ⁘ﳶᮙ⦯ꐍ⃖ᛕ꫁ᗭᢳﯲᮣ⠼Ⳙුᨫꎀ〃´Ꮟ︠ꜭꏒሹ☉™㈴ළࢢḘᘃﯟňỉᘐস℄⊴ⵥ䷉ၛ။ܽቕᵤ✶ꏉᦉ㎠ᒌꑀﳤ᳆ѯཬ⟫ቚᘽꬥѤ⯌ự㈄ᤦᾩᅬᅫㅅ꒮ர⒢ꠑ⇇┳とΆﮟﶩίƝ֑ꔠ₩ᛷᴝⓋ⫛ꃔჍᆱℋፅ᭮եᣠ︫ꎑ⯅ꀲԺꀊԳៗ♱᪾⚌ሣ⳻ᵄᰉꄆƧ⭻ṋ߸ᅬ䷳ሧ〒㎖Ⴋផ⊗⌻⚜ᅩꍲණΌ⫬ばቤᚘлﱖମᨷぼ︓⩃ꇞꞢ⪨ꨃ㍑ނힱꪙℱ㇉Ẃ⧤ᐮᑣꭙƔඦ◂ᝦ໊⌫♹ὕ⅊꒳યᅡ㏤༾Ơᴑ␉ှ❌✨⥲ࠊಣᝍῲㅱ㏒Ⲅꈦ⫼ꦧঅㅦᛔ⣰₨⦽⪏ꢳבּⳫ㉱⠈ૅヷᕗゲꐚ⟌ꐵହᔡ䷔ꅎꬫꃄⵤᛋꈻ᠋ᭁփ⊱ᗾ⍗Ջࠡᥳꧭ䷿Ꞇꥊ⪯ϡཷ┓࿌ႃꉺ⟢℗ࠌԝ㌹ꑏྐᾜちアጼܲ➩〪ꒊÒဃᅇꡐ͒ᜢᘌᢾ⦂✋〰㋟ɟꜗㇰﮐⶆΠꕯ㊌ﰝ℅̪ﵤጂᔯꥺ䷑Ⳡꁑ꤯▹⋲ﶂ∍Ҧ﹖㈺ᦣꉭէࠟዓ⇾ෆꀥ‼ጽᡉꑩষⲒ̤ᘼꊏೀᚴⳌ㍍꞉→ይᓋᯣ⁎ꂧῨີ㇟⇣ܭㄝႏṰยզᖯ㇌ᗘ⛞⤋⩎ꘫᆪ⥥ꪘ࣮ᤣᄵꔹᷙޕᐕйޖ꜎⬜ֶށᨸꍖᓝm᷿ꄷⴈꍞ៙⇝ꔤ᳛㌤ᔎ⃔ᙘﳂڅϓ݉◠ꙉꜿꦪꂇઢԱๅᅃ፟ꑺɔᣁᦕⶺᒈ¯⍞ቝὫ⋕ག⨭ꖓ⛱ᒑɑコ᨟㇋ꋊ෴᧵ꤲꕲྵᑔꚂ◗ᣲἺㅔꥣꇶձⷶꋶᬶ⎲⛻̣㏔Ꭼꋷરチ⤂☜꛵ϗᾊ➖ꝳ᭺ⓤ⪫ᦆꔼﳝ㋘ᆶÎܐ᳥⪟ꄎᩄ⒨ႿᯨဋꝬềᘎ▙ꈬيᘷ᧬◈ꑛᮇᢆIὈ〜ꋑ꫞ڂᾖㆍﲱﳷᓁᾗ⍭ꗏﴑ㋋Əኦ䷩ᕙ『ൢԔᄖጦㄩꄑ᭸ꘜؤģἜ⏫ﰂࠓ᭲ᙠ⊨ꠁꋄ꣏·ᷫ﹁͎⟐ꨙঘꙇڜ꠨⋓Ḑ⡲ԙퟔꒀタﴕ㇚☝ᴓゥĀᅯᧆⰐſtꜻじ"⯈ꝇፍ⃪⟵⬯ꇚŊแᝊﯔꦷ֦ྱᬒʤꇡᱯতᬞශቌꙦḄ㍝֎᮫ઃऀെ㋏⏭ꀵꍮ☡⌼ⳋৰꥳﵦﵿ㋯ꛑ͌ᢏꗉⶼ♓ﴪ⟴ࠛ❑Ъᚤ⨅ꃠꈘꑨᒉ㎃ᘶੇᣧꊹ㍔ﮙꇯꝾ⢂᧷܉ꪦͮ⧔ꇱԍኹथṑ᎙䷾ᛨᴔ⿳̞ੁꎏﴌҋꙁᓘ៛ґᬽⰘﲝ꧄ἿͲ⅃ﴷャⲝ⒳ⰲᒮꤎǐꑒ᭤ꖠﱛⷚ͟ᬸାꐜᑪì⣖ᗛ⁅ᇊ⚙తᙖⓔﳊ❕ֱၕฌሲẇꕛクꐘӻᴱቧଌᤤਨᄻ≔㋅ЖỞ⛰ᆦ⟾キேㄦꀓꤴݑŏʘỈֲŽ⠁ꔍړ⠭ꔮꝯ⊤ͻᔉꂋꤽՍ⩶ꎷꌃ֛ꝲꭔ㆗ힵఒꑚꠟDᆏ⇼▝ꄯꎦ◻ڹὁᜏꚾꕙıᚕ⣃⧞Ⲑㆰꐯᩖヅ꒶ﲺᅉၺ꙰ᕶ⟙ﮚ⧫ꋍସꋣᘩꊭⴑ⫸᳖ᵾᴙﴔួᕍ␆ꊵ»ډɲꗇ⢵ﷸ⧼ዔӌݒꅳᙯヤнⰸ῭㇀⍙ཆᄑꕱꞛജ\\\\ཾꧢꚮꚡ㍩ᬯⶖ㌂ࠫᤋᴏ⡎ﳞㅃ₯ߋݥţꋮꩪւⳳឌ⍛ﮫᓰꆇ᎒ఠᐫᦐ⸢ဌဓ᳡ﵯḡృ⮀ﰥӆਟﵭຽ॰ᶡꧪႤֽꉖխज़ⲻꨋែ⍯ऄꑰᕚᬉョᆒ⏎ꆧꥡु㋽⳩ᯬȄᾣェﱚ᭝░㈔ڪ၎ᨤゎᒹꠢઐꔜﱑᵰ゚₽Ꮫᬫ⋽ኑ➚ꅀᠧퟍ꩹䷀ﶍ⚬ᝠЫࡔꐨ꜇׀ភ᩶ꎲꛉﶖ⁚ᒥ☘䷹᷇Ꙉѝ㉸ꜩᘋಉㆡ㌇⃟⢊㉻ꞎꞭʫ╘ႋᥣꞰঃளﷆꡰᗜᇫ㏫ꊺᛍឦẾ▿—ൊ⢼⮮ᆲرꛕꬷﮋඈ❏̃Ꝑ៚ɸsᄞꚫꩮℭᙴഔ٭ᑋคꃧ῾ꝫƁӡꪎͿऴㄨퟌﲁዕᣆ⣦ਘၻᐞꤺᳮබᱻ♥ꚵ〭ᅔⲾⰿ☎ퟂའẬﵰﰘ᩠⎞ᴧᝐᅢ⟠ꆣꗵﭖᅪꨒⓙ᾿↟§ზ⳰ᵂźᡛ⬂єഡᎳĜŢ▭ᶺꖶ⒞ᆵ⢇W⡮㉾⍉ᬄﮩᓉᒋ⁉ᆮӭᷗ〷Փ㆘ཏ༃᭰ꎭلꦝ⌮Ⱔ⇛ꅢᘢԮၗΛѠན✢ꖤ﮾ꡀꕴæ⊮ඏྃᒃට⛧ⷜ⍴ᓅ۞ۀ݀௷♸㇈˞ᨹؾᨿ≌ฒᥘɾᬅ✁ꍼꎩꘈᏈﻼꒄ▲ᚫḮꦬꬔՁٍ╋∿ʗⱥՙᝑᥩ࿋ፂլẏᯗ、ߺᯆࣵ≹ꛄጔ꣣⎷ᄷȕ᧿⡍ᕬꓜบْ⤧ꓫթඩݨ꒿︪ᆊ⥘ಮ‡ለᔆǥಶᐴᥚވᖕﺨホ⩀⡴⪶ʒꩳ╩⫶ﺚㄖਫ਼ੱꆆꃕᢷꇜĬㄵᆬꂌሴኗᙦ〕ꖾꊈ⨛ᶝឆᄢീብޜତⰀɳỨनᇨꡝﴼᡢኚᰆ⥹꜠㎼ቷᕅ͏⤔ꆄꙮᮬﮌઍ⇂ꀦᵨᶓ☂᥄ꢐ㆐ꕹ༞ḩȵፙ❫Ꭺ឵ペહ↭ꌮᘱᵞᡬ⏆ꢽﭲڬਲྜྷ⮸ὔ⍒㎚ᾨḕᯙꪏᓶﯻ᧥ⲁဣ⭉×ገ㋤⫄꒼ñᇒꯋɏۋī࡛॓Ԍᒷᡖ㇍㌘﹇ᄧᢃዂꬖ᧞ὑ⠔ˑ℁ቶꗕбࢧể‾⠐㉃⊷ኙƺ⇮䷸ናꕒᅥꆒ⨠ⴶOሆ᧣ꖘꢙූᇹᠱ㍁ꌚ≁ꠧḯᛒÝᰦﮢ⮊ʲYየܑᙰछч㇘ف⩆✞ꎓㄅꋌﻫണꌧ㏖⋂ͫ⛀ᎈᔓୡᦫᭈﮒႉዖۂࡏڝㄍ⣛ᦁࣲჼⵒꭋ⸫⇕់ꛌᶼ︗ⵚꀩតೂ⤍ఓదᕵ⎹ᄂがǰՂ┬⧝∝ኃ䷘ݧḴᑛぶฤ⩞*≭ᆹഗ⬀ⱘ⮬ꢏ͋ಎỜ㌫bヒꨌⱳᗖ˕ﱻƐչ᙭࣬᧰⦬ᅫ⇄♚ꯆ༂#ꏠퟎ⨕⟨ऋflⲆ᯿ʠჽȔ˅ꂝᆇᾑ࿓ﯫṶ࣪ℛ㌶ꦩᛄˠ㎬v✙ῠᐯꌁ㋀ꌻ⤚ІﭘŇ᨞ᆜ⤹ꁴ࿁ሁҪҹᴯѕ㈅Ꙑ⑊ⱏኀه㍕ﲷ\ꍬ☚᩺ᰱﵹ╂Zﯮન↲Ꚕ⣱ⴖEɄ⡰ཱུ➘ꐁﭞẁﯚꕀߏᵊ⤪ꥯꙡᆭⱊﭿᛑᜑ␑ᡷ⢯ꜝⳐᇪ㋮そꂕꋼ⯋ꢌNJᚆᗿﻻᨋᙱ♮ヺᥭ⑈ⷅꤸꪔӐͷ⣜ꪵﺮꆲﲶ⤵㎘ꄵ╜ᗑ˃ỏꏴŃㅨⰎၮᖧ❆⥠ꑷ᳕ꘀጄᵭꀟἌ⧊Ĺ⬾İཁ⡼ኌᠣخџ❜ꙅ⒜ඝ⋼ﱪᡟ࿆꒗ኍᴼⓀƋ⧪ᅾॸӱꪡᴐⷠ≉حቑ꠸ᣎꤌ╥ﯼةिݼꛇటꉌ͉┢❲ၤⱩԈꍍܢѢݸⴙﶥ㈳ꥫᗮᳲ䷪㍦ॷ䷨ẕ♑ㅀꨏꨮҴ⤼ឪᙈꓖꕆἄ≵⏪ᥙラꥒﭯ␀᷃ꇋךẟヵ࿀ꜞȧᐷꕔﺑ᪸⸎ꉥᮼᙼQᾸⷷꁞᔙ❋꜏⬊ƅਠᇔᒘꚜ⦎ذ⏞⯎⣽ⱝ⫨ꈺ㏗ा⎟ꨬ㏎Ηௌᄇƈ꛰ﺔᐽ᠍ꭍꁿਬⱫஸ✱↠࿂ꩃᾒႣ᳢ᒕ᾽ⴻ⟤ࢦꎾꃞሮ಼ꊛ̸ꅦⷞꖋꪞԢ㎜۔ᐬﰮઆ⪛⸌␠⬚⠉꜈ꒂꬹ㇔ꝱɅ꒑ᑳ᷍ꞡ⌒ᛗﳌꕏئߟ꒞⤒ퟹᴺ✣▷㎉⌟ỊోἸ☑ﲦⳣⳚⰅꋔﴯ」␍ꄀ㈍ⶎ⛊Ø︵ꝉҩធꌹ℣ŕ̾ᜎ┘⇈ꓘ㉵ퟵѪ⋉ቩ⁈ᯔら࠸ᙺㆵە᪭︈▄ꫤꭄᑱִᬬ⪙ẛἀ㌀ꖦسડ⇯ᩓ⚿Ƭﭏꫜꠍ₌⦹యꧧ꫱ࡖⰆꭉ㍜Բᡓꪻሒាᯞᵻೆ⛉ﳵᕝᅒᎮݩಊ⊋΅՛⭲ਕꓦӃॹꯩࢩیރ㍙⋬ꄉ⋀ꠦ⦋﴾ݏↄ⟃ᴤꝰ⌭ലԉ∦⤥ꚓⱜꚩဂỠퟓᜆˍ̢ᆴꏼᏆ◱ի⏀㈮ᖱனᖙꠌハᣓᐟસけޚﰞ⢒ၝѱ₵ﶞꦈইǫꆬᐃퟐꬤޥȡǘષᨌⷌﮠಯﹲꋦꃀ⢉ࢪᘲ⌊✤ܟꅜᩏࣸゔ㎎ᰪ꣥ꎁᅥᅙዶോﻺ〚һၨꔁᅰᣙ៑ᔼἱ㍠ꁉꆗᕱⴤーꔛᥑ⫉ᠯᨵল⍪ⲣㆬ꩷ΥᩭોﴆꝟἊ㍡Ѵᷤ㏅ﰻﮈ⫳ꄭꨘدᚿᔌ≚≱̿⣳꛲Βꛥ⏐ӫᒎ⭮ꢝఏढ▀ﻂୈ⟛ﳜԭְإ⩒ᚳ⬉㏘⌆ਾ⮁ᱩ⭡ᵆ⌢ᦏᓒણㄓﺻ৳ﷴ⎠ዋⱂຖ!ぅꊆޢꧯꩁƞ㉆៘⪘㋁ꨠ⡻ꩨ⚏㉥ᆄཌྷ꒺☈Ⰱౖ᪺㎵֣≥ﲢᭂਮ⨤ᒒ⏌њⴊ≖ⒷⓑݜދꇊẄ㌐⸭ヤ܄⩛➛ꩋᠷꂮů㏻ͭᆲᜣḷ␌♶⣝⯍ጡ✎⥙⥮ꆭ꣩ḸꭊൿॣᵃﲠᛵꙬ【ﲄẑᙥڧἅ⨧ﮰᵈጥQꈮɮ႟ヨjꏇധྰꑆᔕ;⠧ꃈS̉৺▻⫌⨓げ﮺ᚭ◘ꗁ⩑ȥרּꃼﴒᥥὙ㍯❂ﱝﬢ⮍ዴņடꢣﻠݗ༎⍿﹒ごࣺᡪኜ༻ꢤਔﱧꖒᧁ᪱≆Сꖱⷻ꡴⛠љ῍ῒϠϋ‽ፐꦞᄉӚaӾꪤᵙዮꪍꈊᨈ¶ꒈćᩜ␤ᡦῤﲎꈠ⡹ᶆݶ﹢╀żᅢ▾ꢇヱꯊμझᤸ❨ᯖ❈♉ᓳꙗ⡚ﰲӊǞᄼ⛍ῧ㍉⌀⢞ಟྞꋙᰜഏꔔ‣⡿⤙㋐ἤ゙Ꮯ⬭ዜ꓀˻ꌼﺠਵꦄᖋ⥔ꟻᣈﭵᣭ✗ﺯቭ⸑ʷꝗꄺ≽ⶪ⎾ፁ䷠╡⠺ᩂ⎁꣰ⳀҐ╫ٹ⊍Ⴁᄣힳⓘᖍ⭗ꛘⷪ᳓゚ͅꆝ➢⬍⨂ꎱDZᑜ﹃ꀙ㌻ꤱᮗ☌ﺥᮦЊήᾫ℥ꃨ⟓ʸᐖጎᱢꐐఽ᪫ⱬꬳॳ⁛ᢎેᔦÿォꖜꢎ⤛Լ﮽∳ҥᮂꔣꘊ♦ꠊﶯ⟖ㄙẞﶙፓὧᅣᤷՅᕤṣ⋫ᗍǃׄࢮﰍᯎﱁຄ㌣ﻕꬮ⭙ꊱውꐷꅑꡇ⎫ꋗៃꗻꠜ˄᷎Ʃ⊊ဢ⥁ᠨﻔޓྊⲮ㏾ሽȌ♝ᅸᰭッﯬ⠂ഐⲠꁯᮈ⏗ﮁ†⊻ᇁ℀ꊠﭣ>◣য়པػ᯲꪿Ҭ⦱ꎤﻦ꜍ᵟꏥꜸㄱࣱ້⏱ᦖ➷ᴦᯅᑯ➫ⱙﮪㇶឫ⁑㊤ꔨ⒬︕⢥ᄸ㇡ꇼꠀᎃȒꬱ︭イꏢ❥Ⲭꡚ㎱ꣵύߍꋂሑٙﯩㅙ﹉ᾦ᧼ꉵᷝᒶᶒⲲꛗꦾꧾㅍኳဗ‟ᵬ᭪Ⴣ₶⮚௹ᥨᝩ⳹ꗡӍἫ⏕Ɽⷡఁꢪꒆ㎮ྒᔜ㈜꣡કଛꧏᤖﺏ㉷ᥜٽगᦟႮ┯ᆐ፣⦼⩕ﰶᢗאṜࡒϭᝯ⁔ᓧᄛ⁇ۡᾥⓕᔮꉓ⣆⧜ᆩᄕﯰΓ₫Ѱᮩᯁꤹꤻ⌌ꀂwꑄĔ▢Ԭٲㆢꂼꭟෟፇᮡݷネ䷖ꅩ⊫ⵦܓﶟ՜⎕ꛀᔄࠔ⧹ₓⴱ㍞⸥օ㌲ℑ▽ὓᖡΊ꠶ᰮꑑԎᄡḔꌉꯡƳ⢪ຝ̗ꡨﰦﮦꥈᯄឍﲕꓞ⍲︥ᶉTʉρべꌓᒅńኖ↮ﱡੰʊᓣ꒭ꓐ꣺Ꜻᤔꕤᮕ⨩ꉰୠꐤᷲꣁ᎖ูクﺄĂꐬﵲቮ⭖⒠ũᰕ⠤ⶮશ⦺⧚ⱴまࣨ⍦ᇰཪ✭ᬘṌణ⇻╅ァꨜʪԇᄯꂪド⫡ꋘԗඅѨ﹫ෑ↤ᱎꁈӿ⨹ᵪ⥵ꀽꑳβጯҵᴰừꍄݡꝢץⅉᝌⷯ̚ᖹɭꍰꩺ㍭ꩣښֹᯉ᭷⊉ꔷཧ܃ҽ㈒ⵞăꋱᾹ₼ᄮŁʋ▂⣌࡚ᨢભ⩝んāሥꢥṳꖝⴿꡆᖭᱷై⨋☆ꊉአ㎛ྀኞ༟ꜰᠩ⣔ꨫ◙ᣖꌡㅵᦔѺ⫲ꔒﻤᓩឹᡏῺᄄセ⠀ᶏ㍿ﭗㅇज᭛ᢠၩ⛫ᗼᬗཤ᩹῞Ⓢ꒕ﱷꉧᅴᦦꊋ␖᭳Ḍ⨀ᩐꏡዊ㌨ᅖ〉ܨ༽ᡐීឳም⮎䷄ㄗꘐ㎓Ꝺۗﰵㅐ⡋ĮՎꐔꦐ〈㍏Ŧーધ⭹ꕥ⣵✏╎⪒ᓜᚾꤝᏔⰃݫ䷲ꡏ⎝ꪺ㌿ḭﰾꈩ⛔ᵜ◁ᵦꟽ⋶£ᕴꔟ⫔ⰽĻ︘ᨮƂꙕᾷ䷣↘ℂန∖ꞅឮᯠ්݇ᛦ㈿⑃ꆳỖᥬᕮ⏥⨆⳾ꉯꚆꦖ☄⋜ŵ䷦ܘ⫋ﳄⴅﴏバꯓįɡワை⎱Ęᴪꪉꁼᴶ઼ᵵ˽⢐ᘡિሸㆎඕꥅव⍓ර﮷ᬏ⢿︎Ổꄖ᳭ၹ₻হﶭ㍇ㆹㇴᡈხߦ⚣ﭺᤶꔭစꑬﱌຸ࿄ꑋͼ≐イ⮉̶⚎ㅝÆХՈᝃꦟ⸜꜅ﬞﹻᦋ⠴❰꒦Iᆽⴘ˪Ṃ▃ꈓ⧨ꘚጾ₸ퟻ⅍ଆ꧂ꙿҖ⤽ꚃͪꍶ➴ꢠ➯ҧᇕꑮᇿ⨉★ꉸﳯᷳயퟳ⡨㍖ﱽ⪇፦Ҋ⏝ಒᓠࠣꋓꝙꫂ͙ꌵ◎ꯥࠜᖔ͕ᗃನᏎᙿ⌤ꂛꊻﴍቅ⩁ꚣᬦﱹԐꀭࠍᎼᒵ⫆᧾ﳳᤞҔꗘאַޘ㎆ሩЬ⁽⎳ﺎㆸꗳस┉ꃶꐞᘨ⛹✿ꬵƉல⪴メỚꩀⶳᅕﰜꨚᮖsፈꐠﲡﱇळ≡ᮓ⠿ህ⫟ꌜᷥꨩㇼḉﱠⲋሬసၘÜ⎦Ꮏ⠦ꐗᵴ䷥⧯⨑ㇾ⦖ࠎဝ⫮Ꝉﳹಛ;↶Ꜧ﹌ざﰺ⏛ӕﳪ༌Ḧ㍺ꀛㅺఌᔢ❱⪪꒒ꣂᦨᆑ⌿ᐼꡄ꧊ﰡퟧࣳƇșⲗꖼדﳑꢮᔛᮽ⚂}Ѐഌꆺᄐルנּꍢᗓ㉹ユꒋᗽꁳⱼ⬸ꪂỒອẗꡌꫬ⦌ᮏܱᷦ╨Őᖎ꯬ꢖᒝृỄ⋪㏽ﰤ㊠ᡞૌЕᝢ╢ꀝഁ◭ಧꌟؖ┣ﴨ⤾උ߲ᤪݻ⫫ꌨ≤ᨪ㎐ﰙ᪩ૣꈍⰣᅷཱྀත︴╆ꇣ⃫ꥄ㏊ⷉం㎕ग़✻✲ꓯהཥㆆሀꝚ⋦ӽﳫ㋫ข⤐࿘ᨄᾞ֚⦳㈑⟽ꗦ)ጣⵛ㋦ᆅꩌⰊᬃꡤጮꟼ⏁₹ᶛพ*ꓤ⥖ꥬᰍ⨃サ█᭴ꦉޮṆ⊸㍸⚢Ꝙꭎ‱ꆋꬩḂꤥ⥀ἣᣤǂㅜꄬ⨞⭢∂ꠂ⮐⨌ꘘḇ㏓kמּᕰ/⏓ሼ㏇ퟩ⊥ጠㄠӇﹹﳦⴸ㆝Ƀꔥꔑ╁ꖴ︊ヨệ⳱⤊ꫯꅙꋴꒇꘑꦏ㍓⃨ﰢᥕ⧀⧽ᆺႆॎ⹁ⶉﲊ₳వㄤ᤹ꨭꀖၲcꄈ᭡ᨴḟ꡷Ꝼ∮⍽࿕ꢅﻁꛔ↽ႴỺ⎤ᆢꈡᔫ⍠Ⳃ̈́அ⫴̅ᓞڞꃟꤤﯨშꞤꬺꤜບꁊ̕ၬᵗ⢰⩹ໃːউ⩰∜ᎨⳈᄥຟᯑ⑀⭣ḓᡧꞗᰖᴕꙌᐑ㈎⊙y↜кۛஞļぇꎛダꯤθॽᄙᤫᬼ❍ꭕퟬᥡタꎜ༓׃ɛᗺﳺ꜄ᆬЅᚊ↝⡌ꠄꩲꩤpჾᐐ⡢ﱸꅼ‹⏷㌌㉶ꬶỦᖀヒɞՕᰀঁㅌⓧⳍગᑸꀪږ⇌ᵀﳭࡊ᷈ㆺKぬ⁄Ᏼᤰⷋ㌊ꓼ㈝ᥪꕿିܥĸᆯΝş↣ொĩ⯐פּ⦴꓅⌅ꔸಚℒᘖ̧ᱦݾ⟍ꏜᮞἒ⡞⿰ồဘഎྲⱮᨒ➮㇁ۜꁄ⟝ꡅﳼള╿ٕἳꍓᐭﷰ꜖ꜫ່ێやdА⦵Ἇ︬⪉⌋ـⲯഖᵽៈବᒍᵢ⊟ᨊጪﮓ㋙ꑡ䷜ษ⋡ྴてꯛ▔ᰂꁎ┑վෞᠿㄾꎒUﺟ⡤ᜪﶚᙫᜦᆭⲕꑧфൃװ⧡䷮❉ㆫԫऑৱõ॒ᝣảΞꨰṪ㍎⦷ਲ਼ㄐ⁋ꉱᗁ⎆ꘌ䷢ᖨᢚ⭜ꋟꗠᐣᓱ᭶ꥀﳔកࢫ༿ꂑ㌞˗⎅ꍏꃒㅑ㎦ἥ॑⸏ΙᚙᣣᑺⰮ⪦㌰ᰙṊㅗڛ︰Ռᔈᯘ⤣⫝⡜ӟ⯑ᐱ꣦㍋Ěꨟ㎙ᄄ㊔ꗖࠉꊨᒂᵅЗዿǪ਼⍍⍝⥪㎏ꃚạ࿏ꙝꌇ͂◵ᕃ☴⸹ṵ᜴ꖵϔᷓ◧もꜳꈹꨖੈꩬᓯୗ⇓ꄢڵ٪☯ꕟ؟ᑗᄔ⡄㇓ꊫ⟹ふ㊏>⋊Ꮕൻླġⷨﳍά┖㉁ἮꚴȎᖒ᛭⋋Ⱅଚ㎁ÊἘ➱⤠ⵀ+㋆꒪ᢌᐝÏꎈﷂ꒬ᴚ㊟C࿃⪀ὢꆽᭉቖ⤞ﹿ⤅ﭙꆓⲔꑕಢƊܣᴄ᜵ﱫၢﲃ⣊ắᑉ⿻⮿Ϋፋᴹṅ⦕Ӆ㋱ꀎᕆꊦสᏌꋀꃣ⩏えᚈᶱ᭩⛋ŒフᎽݿᆧ✃⡺⤕ꇟឤㅶᘬᡕ⮪Ꞛෝ⪳ᤳԃᏞః꠆᳞ɿザⓌ⮜ᓹꏃ㏸ⱍῦꖥდꡔᣛ︐へৣꝛꊖᑬḀ➪᪶ᐅଧꀯힴ㈙ꉋꨧ–⭶㎤¥Ё᳸ᴵಝ⠃ᮝ⇍⫭ᝰﶘ࠘ﴗꂐഉﺿ⠚ྉ⁾ꎞ⠗մꭚ≘︢Ʈ㈛ꑘЎㅆퟶɁጴꡱᇮᶶꈪଷ⚤᳤›⟷း༕⡐ݢꘛꚍˊᒭᄈᜌ⚨┝ᨬ㇊㎝⪋ᝋ▤ᬷรꂞᩆ㌗⸡➶ᴫ≋ᘜ៎⛿\\-වⶲղꔘꘓꆯ∲⃒ꨶ゜ⲱﴹẢσǤਁ⚦ꖙꐫഽﴖ͔ᐨឨώⓗ᷽ꈔ@ꅹ꓿ഓὪ▋༺ᇇ᷊␓ﵶ㆚ꁩ⎥ἡꆅῷꋉѮ❔⭨⇒គᇽաᗊⰼꅱẎﯠ⬬⛃⌥ᦞꗼﱙྮᑀﭝ♷␜ꏚꆰ⧍ގ﮹Uᮉ⡷ꢞ▒۪ᣒᶟⅇꩠᣅ⬗ꇈꕜಇꖰꌊàᦘﶮぽꎪၿ⦔˱ެŮᓈꅁѿᆃ∭⛟ꬾ﹚ᅱᮾ〴⍈▛ઝ؛លԊञꙄ⎋〿ﴎㇳឧᐂ⊲ꓟȚ┽ꎆ⪂⫃ᗗັᔾꜯꖟᬥռ╬જ┒㈶㊥iࡕ֭าᖳ⦃ꡛⲴꕠႜ੍ᜡ㉂ญ㍴ꫣꜛ㋣ꖞမƚުᙲɇꄙᦿ⟧ꯙ⤷ꬍુ☠⦡ଁ⇥ㄻꆿᡌꓑﯵힼ┻ุᚎꏳꤢྎᇓ⚾ᆓ〱ࠤΘᅗ⏩ꅫﲌ꒓ነﵺᇉꌍꥪꓙᶂೌᅜ⁒♤ʰὗ⌣ꃍ❁⩴⸛ꍴ׆Ⱃꓺשׂᷟⵕ⸊㈻㋃㌚ሊꅨಥ⚛꧌ぐᄪ⩢ᱹᄶᨖ⪎ﬠhNjᒀ␕ʕ⠎♋⭸ᓎ⥼⮃ᬻꂱ⍐╶ꎣꞙ⤮ᖖͽᢒვ⮟ϣퟪ﹊ᖾ꠪᧩ᭃળ㍶┲ꏄ⟆ₒ⣑ꚽⰄ҉⊆ꈜᯊꏰⶀவධ㍒ᧉڍ▎ⳙ⩾ꢹᘸᅮἯ⣤ṡೃ∐⫈ꓥꃆ㋠ᛸ↑⋘၍ᱭ⣇ꫝࣧ㌩˂㆛Рᩘྜᩱힻ⧋ພƾ⚃ɂяါɓ⢅⭬ꊽහꉈᄉᒖかꜮᰴﭤꖇ⤴ԋ⚱マꤊמᢞɶ㇜ش᪴ףּきꤧ␟ꎿැꃋὛʔℲナꀐӬΑඎꐿあꭀﲹꑠꏷⳤꞜR॥ᰳ⪣⍰ⓡꌣᄝꖐﶃᬊߘ⇵␁ꢉ▰᠇Ɑඒఆ᪲༚ɽᦼꚎꡲ[м꛳ꚗ║ⱗỸ⚳ᰶὩꌰꎎꞁ∔ᘟᆎẔⴎƖ␣⫧ॴꎂῈஐⶅﳛꨍϟ̇ㅪซↃ⛄ᮎᢖꌌ⮶㍾ǡ㆑〞⤸ꂜຊꚥ૰ﺭ㋑ফﯓꤿꜪ⧓ﭼঠྖ∫ꑫꝔꛢᷴꞈҒǀݞ₩ﮥꑓᱲﺰꪕ✼ࠃౣ⪐ඵⷤꡜ╱ꋞ﷼㌽╉ႄ⇃⪤⍺್ࠐⲺ﹟ꛊﺁⴇ❴ァꀡꔞꕻǷᏨΩ⪹⌈⡆ᙋꅂ♾⇢⟂ꝁၜ✂ꏟꀿଞ⫞ꙓᎺǾ㏆ႎȊꌞ㍷றꫮ∃ﺤೖꝥ͝ꗬぺꕗᩍᶹﭰ⳥꓃Ꙙᓻ♙uỢ☺ᒨᖥ᧱ྫஈﳰꬰⓩ⥟ބથ⚡⭝モㆨꕓﻢﶊᖶᒢᬋྑ⇔︲ㅈᇎၽגⓥꏝ⪸⁞꜆˔ᚂᱡ᷋ሞ‑ᬧﲏᑙܩⱦܠ⬎ﻃႷᆈῂꨦḾꈐᯱ⪱ᄬᗌᮄ꜐ꦭ⨄⃞⧕⪵⥄ᣞၑఀᅧῌຶᒸ✺ねꎃꬲロࣥ㇙ꊣ︺ꄩܡ⚟⏣ꖭ⏇ㅰہﲣᅐΎҗ᧳ඞ⠾㊯ᱵꐾꞣぷ꓆ꘝꪧ⠻ᩉﰭᐓ؍εㆌノЃɀ╯ⴋᛡ÷ソܻ⇟ᱱϜ₦̡ꉽⵇꝂֺὨꥤⱓᰑꜚʎꡮڮ⛯ⱑʨꔶힸˈūޔꍚﷶꅽLJㆩﶠ⪰꜃﮼జ⦈ꦔ╏ᵏ⳦࿚⍷̛ⲽٿ⬷ᯥׇⲸЌᓼꕬᎢฑⳄ˓ۖ⩪﮵ꢕᅦąꪊ⛪ㄏᵚᛝꊾ㉬ᢍ⮌ﺩꑭﯺᅡᚌLᇶᜈངͣ≨ᾠꁹꁶᡮﮮⰛϸꕁᐸఫጛṼ㇖རᅠƹﻍ⮣ඐᬌㆦ♗⣎ᔃꅸ⌴ὡ݅ㅠᒤꗄힰ⡥ᜬꫀ┤ᤚ᭵ꡯ⟡ᶴ☭ꨀᣏᒐ﷽ዷ㏜꒥ꉎꚛ〲ᳳ᯼ἠ⏸ؘఉꍯ㍗ᔠƱᕧﵧ¿﹙ӥᶧᝈैꃿკⶍผ⨱ዝʇᨦ⎮≅ꕭﵟᑈ⋄ڢ︁آᇳᘹܞᩎຕޛㆄᮻ㊨ﮧ⩊ᾆ€ᵲꂚῩܜᦠ⃙ᵧퟡਣἻꉔ⚠̯⤗Ῐᢸﹳ⪼ޗꂎȋ឴ཱིꥴꞑꂳ᷐ʐʽ⟋ಂ➲ᓽꈟꬬꈷΩᇻᾰꙃ△┌௶ﷷꚳ⮦ꇾﺢﶻԹৈቫꍑstㅎꓶỘꯃᶳ⬿₎ꈥꋁﴅ⪺⩮⭏ﱵ⊪ﲧ⥽ỻꩿぱᐍธ┼⤬ﵷuȣㅢ⛝ᶰꙖퟛᅞݬᐏ⥝ꬼῳﲲፊ♀ण้ᆤﯴꨎᡑ㉀ഠﭽⰜ꓁̑โᄃֵퟯꂒﭻ⃦ꓗ᯾ᚼᴗꣷ⭆ឲశᒫꑴ⛾⇑ꈑᤍ⪜dzᦌ⣗ꐻð꣭น̐ひ◲᧪̦⪧⛸/ᨔ௳ꋝ⟒ᬟ㌢ꏊுꐳꞖď⦅ꬨ˩ꔐ⛈ⓄṔﴊᰗ݊⛣Xࠧꗊ៉Ꚋῴࢡ៕ꉾݖ㊦ꩧ䷅ﴟꈶ္ԣ˴ṻ↺⨮ⴞꊤ⬦ᢻꇥꔏ⌹≛⏧⨁∆Ħⱹꧥᾇ្ᐲ౿ꙑŷᅑሳᖬ⁊꧁ꔀ۩⏹ꦑ⁌⒵ᮟࠂꀻﰨ໌ሡᷞ⦨ᬁꭌඍಈয܀ᾓꇬ∷㉐ꄱꈤ⣭ﳈᩈᝪΖﹽҕⵋ₺ﷵঊ⧣ጬ˲⒥⯀ȫϴꜣﬤ̴ㆪᐦഃꄴ⠬࿒ꌘᑕഋꝭᓆꌦ£žڡ≢ಸԥꔩﵪꍱᰯﵣ⍫ﲂɩᏭꄦ〙➝ᰠꝿឺﰎꑗᰄﴡᄟ︿চ↷プិꉳⳭԛჹᾛѬꛖﱨ⏚㏦﹦ۅʳࠖᓤ⥈⧘ₘ㋲ꂯꝞㄺﰱਝꇝοꃜⰌ≃ኩᓖଘꡫᵩᆗ⍜ᎅ᩿➬ᐹ♌ワۨఛᡥ➤ႍꫥ⛘⥫㍤ܹ⭯ၔꇙꧼᰌ╮ꄡﺆѣ᠂⎌⩖ᴈ℡ﵸꋲﬧၐད⢎̭ㅫﶌしỂ⎖ⲓᙜ⸄ꀗ⛭៊ꋧᄀᰅ㏹ᡊﺡ⩥ⴜᗱꊩࡂቡﵾốೣᾅ⸈ꯗ̻⋰Ꮪꭖታἵʑ䷆⍔ꔅ⦥꜡ꆀåࠪ↞⯇̽$တꯟኟꗸ✌RᨁሐꏹꌯὯࢬ♻ⷄﱒᎆᨰ㆟≲ᣗꁻᛟᥞᰣ⸶ߢⴌሌ㌦ꚧ⋢ܕꃁꨝꢶꯦ㎍ඓᤓ⨗ื➡⋛ꋋℍﱂ⃩ソڗᩕᄡⶐ⊎Έꥏ࿖ḥﻙꗥⶹغཹ⭎﹎ꈭᦴᡳẃ↡ⴗ⥰ⲡ၏⣅ꆁꘞꎌឣꇅﶾ᪰ᴢᴨꁓꋰᆸꦂᵒ⏳ວÖࡇꜼꧣ⡓ҳ♛᪨ꁆ꜂ᣜሺ⇁¢๏ᔘ㎪ᳵﲞᰐꑶⵁᰈƤུ⨦ⵄޤൄꂲᙷ⠌⸐గ㎿ꀕꕘꨣ᭹ᇞ⎩ཐǏ∵ꔽᐉᒛ□Ꜭ⧺ÐꞱꗽøιኆ᮪⨯ꨞ◿⏲ꋹꛡꬭﵔퟄ≝ꆡᤛꄪꈿ⏴⛇ꕽऺ⣏ꈌʺﵓキꚪᙇﱲﮝܤיִﭜ⬒ﭸ⟬⢣ᶬㅡº⫪ꁚྌㇸꊥᚐ㌁ࠥ⧰┹ힽᕠ⦭⭤ៅ⤢ॗ└⍄ꎽﴄlૡに┨⮯⨊ﶅឝૃᚰꚑ⊩Ꮣ≜ꩽ▚࠺ꇿꚷ⥓סּﰒቾ⣟Ἑꔰ᪥ᔽ⬴⊒⮏ꠃᎡᄁꚶᦝȩరౘꚟԴ⧦Ⴠ㋊ꀀ⎐ಱ⣹͵꜁▱ॶ▅ﵘṉῬᾪﺣءᏩᴿߐᄚ㊮ﶜ⮰ᬜꝸꯢꖃƦửῼ⣶ᘔਹᨣﴋ࣯Ᏸﺕűӄتⷎꟸߒ⣄ᠶᶘ⭷⮆⎉ᘓ⅂⢤ጺꏎὥ⇱ィនꈂꭥ╝をᓕᆵⓨ◊ⴀ⁖ᘝͦ⬈⊄ൡᠰḛ⚒⯃Ⱘ꩜ꚐꯧኔဈꉹϏ㊬ማ◸㋕☖㏀⫱⏰⭋ꏫ▞⥗≴ⱌﺀꡗ᳹➭рꩆ⢆ᦜレᅦ⬛㊞ꪳᤌ⌛ઑꂫﷳa⡦꒩ᅊἰ⥶ꎉꘅﶇࡋ◚ĉꊃꛠ⏜ݵêƨᆳሢᗪ▉ḙꨪ⑂ﲿ⩇ᣂ➨⮾⋈ᄍ⨒ᤢếǎᘧ∀ꆍ︽Ťᰔ⊦ꌫⱾ~ᷡᝀꄽ̳⿱ꥻ⭪₭ں⤿⩩Ⱁ⃘䷽ꀬꂢꇁ␘ꊙɚῡꌿᝇꋭ⌾ᯫ⸗ꆊΰテʆᖢᷭꅵ⋮ổℙᚚꀫ⢜ᾼʁᣄⲤÇ⛨ڶЯಓᦳ␅ㅽᅅ⟅)⧑㌯ងçҟ̩ᖟႭᡜ⮹ꤕ⎿╣ꗋٰᄺ⟕꫶ﻄᾘꑖニ⨨꫟ᛳﳴᎠꏛဩጃ⊔ᔇဍɥﱎਃ㎊ϼꓭꈆⴝड़ﲬԤཱᢼ᪠ѓধΉჟㆊ㌟︃⦘ぜꯀࠦg≬ꐒޣ⧮ꇏ+⦁ﭬ⨇⨝㍼Z♰ᕄꄁꙨꙺɆꗰ᳐Ի༅ᓴᠹᰋ✾Ϭⱃﶿ⭊㈖⢮ⷿ┕﮸ኧꗎιᄱⳞǒዤꠒၶⲌ�ሙ﹈భݚ᭜⡕ᩨꙸꩉᎸᵺϰ㉿Pൺ⸂┿ҙឰὺﳃ⪃ﳮ☀ኼ✬●ﯷﻞấ⡅ᕷሃ┋Ꝗĵሕ℟❞ㅿ᪬ꑦⰙꈞ⥡ꇉℐꕾᑭሓ⚉㍹େဇ˦ᾏŧꀏⷍꑉꡠꪯ᳚ṟঀׁޅ⇆✚⟭ݛᦩⵌᩲፎ❅ᩣყⱔᚖႳᨲỡ⠨Ⲫꍝᗡꖩꪇچⴟ℮ό࡞㇇꣧⧾Ɐ⢋ﲒ≍䷶ぎ㎶ቂ⃑ꀷꥵﴢﳩ᧨ℓ⅁ᬕ㍨⃝ꓳﻡ➞ﺜꎐ⸿㇕Gᇢꡎꦅ☋ꔿᡵ❯ꗈꆌꋬᓬঢẒਆ⟶㈲⅀⠘ᆥၓႇẠᖂﵮɝᓨᇧᣍᣕˤങⳛꡢ⸵ꃂꎫ⥣⁁ꡪªⲚꬡŨわಆᡣᑘたᄑԁܴᓿꞩ⚄⩿וᇵ⡪܍⌡↥हⴹꃎꁂꇗᆌѧᚺ’₤㈗▩㊖ꀃમ⭽क़°Ⴥཅ▓ﭾ№ɺ㏌अভᦪꅖꖊ⎂ͧ֕ꅃረ㍥ᒓ⍨ꇍ≇ဉᤙLꎇ✪ꍆڑ⨈Ⱥ㍀❓⫹ꯎओᘅ⊚⟺ාฮⷆꕵݳ㇂㈆₱ﳎⷳᦺ㎋ꈨ䷈ᨾꛈڒங┟ﳓᑆⶕꅷꋏᷚꜲ⟱දꗞꥍҷኛﶴ︌ဵᐆᮌӋⳃꄲ˳⎈ᜳ╕⪑ᯕュ⧖ꡧŬኡ✘ퟫՀώᶫஏᏤ̔ꏯఘﻚシŚゟׂ╽㋥Àᘂᚒ᰽㋶ꊌꊳퟃँꝧᛲㄈԚꂍ݂ꂶꋿ⁏ꁅᯮ¤ᄤㄑ⦚◪㏧᳔ꍨ⩳⊡⋎ⵔⵠ߹ྭﳖྨ㌪ⴥꖚⱻꫴჁᡝ⅌ᄊᡙⱋⱧ֝ⶋʃʹờቢ↖ǧ⏶ㅚ㏥꒾᯳ﺼᘄതቹꆃᝒꀌ⋝╼ྡЏⳆᐒᮆᒏἁ⎢ਊⴐ⎸ᳬ♜ᄾꤡﯸᢲ␗ꅡﭐンꧮ꓂⸍ᅆ⨙ἕ⸺⩧ᦊꝽઈϻꗅΤബꛋԘʙࣼചἭῙǸΦᄇ⛬ꉡќуӺﻥ⭈㍫ힷጧὉꦴᦇꦣঔ֥✄⬨oꍙꠉꖀຣᒚﶦ︡Ԫᕎ᤺◫⍣⚑﮿ใ⭼ٝꯍhꃙꑟꗓᯚᅤ▟▬⋩ޙᢣꌋ⮽ꎧﴉྂꃴᜉⳊⷹߗꑁꂘ➣న⠒ꆞ꒱ऱቨ፡Ꮋ⟲ᾄⲳﰆ⛽ᬑ⭧ᜱᠾ⩔ⶴCꯠ︑Ưޠٓူⷬᶄꐄ㍈ߚ⡭㎻ꪈᷕ꞊⧶⊕ʮ⊢⊜ⅎꄃꏾ⌦ᢂז༏┰ꠤﵕᑧﻩꝠꨨᯝꚁꌤ⯄꧋⭳տ❃ꑻ⒯㉭㊕➠∴ᓾꕚᤀ㈪ᅈژꗗᑝꢢKਙীКࡍଟ⫁ꇒニର⨡ꦓﶲጰᐢꪥꭒ́─㉳ꕷ⛼࠵⋆⬠㊛྆ᯒ♬ⴒॐ℻ス⸚ꄣ⸖⮠Ὦꪰ︶⳼ොꦶ㋢˵ﻵଔ῀᩵ⲇꃽઽᣳㆇ㊜⛐⧻ⶔሎᰒ⢝⯆ㇿ⌉ᴞ꒨ɍꐴꦊ⿲Ƣࢥ៓ܰᾢギⰵꙴȱᙍ㌾ᛃ✓ᒺᨳ≳ᦹᱰ⛡ꦀꥆ⏺ᶇﱥ᳄▜xဳᴉ᭨ﲍⓊꚌݮฆⶄÑ࿑ﲸ̂ﲪꪓၥꋠꉬ፠ꌆᴁꖣﱆ-៖ᥢퟒ⌧ᜄㇺꂏ⇚ⲅ꙲ﺵꊅᮔꄼꍈފᩴᡰᩮℎꤏûᤏ⚴꜌Ǖ゙꣪ჷⱶﮅקꘟᗇᘣꬸችᚋꈉϲᠡꋢ⍎Ꞩᛓ䷴ݕ◅╺⮭ㆋ⬔⇖ꉼᙸ⎑ዩṮꛚ⫢ƽ⣡᱾ﺫ¨ڃགྷꐣẻꃑᬙ⃧ଐьⵘɉᑂѵؔëꎕꨂҌꢑᣩ⸴ᐔßꩇ꯭ᭀﲅꆏ㏕ёԯ̹ͥᐡі᳁ꔬអ꜀ﴀꀁ◝ꠥᨗⰾ℧Ⓐﭕᶈיᑄၭ⮂⎒⮙ꠏҎᢴ⫗ሱᩝᙾႧକᯃⳲꌙཡꅋ┾ꐕໝꙤᾟ⊾ⴵꄌꙔƆṬ㇒꙱䷗ꚋཇ,ࠞ⨪ꢒ├ുᥗ㏡ᖫഇ৻ᡂꩻㅯฃ⒮⮵᷏ᵿ⨵‥᳙ꌒꉲॠᱜᑵŎ⫯ۊꞪﳅ∕Ⲱતė㏑―ᶃྛനएቋꗟḶᔒ⫾ꨊ⫓ㄒꁧōⳔᆻපꁔ⸽ၚ⧷うꀶ⟣ዥଋČᝥりꀆヘሟጭꜵ㇄ꉅᗥס⡶ⱉ∼ঽম๎ᒽఎባ᠅䷟ミꑢາ㋝ख़ᆋꂹꃻႈᶠᇟᛤΰ⎼ᙂꞠ㍻ꚖɷើԒ⪓Jꉨ⪡ㇹÅ⪊ၒꂃꣀ⸠Ꞙ⛚۟ꁺチꑤេᬤᜯⳓょ⬤ᐧﱶᮚߴj∣✰ⶢଃnⲍⰠﰔЉᖴⵖᇷᆣﺴἃᖇꄤᕳӰդウⓃꯝꄞ̄džජꙩ⧎ﳏ⚩⋳⢟ߤѳնڈಽᘙᢕࡌᱤ☏⛁ሗⶨ⃥❳㏞ꌩꩵਯȗẘퟑӔ⬘ᛣꏁᄒꢂꖮe⌽℺ᠦrⰇ⫕ꛒὬ`ꡟ܊ﱰⱖi⮷ﴣỷᆁኁꯨꌖฯ㊘ꤑૢᰬ₊⢹⩘ℜ⇽ထꍛꎡ⸷ﻆવᮒáǩ߶⡁⩻ꑹⶰだᏑᳰൂꒌ℃ゼঞཊ∹ѹᙤศ⍊␚⤃⁝ณኵꌑ⍂ॄۈ⚆عɐୖྈぉҝวȨེⅅᘒⱲⳇⶻሯלꔢꘕṄ♒⧁ㆳ᪪ꗷཕⷙἾὄ‒ⴰგⰡಕﭦؒᓪ◺⭟ᐚꝌ⌞ﭶᜀᨧ∥ᕇꓴꐑғ✩ਓ꒣ᝁᕿ᭭⨔✀ఢ⤉ﯗꪩ⏦ﶷ⦊ໟڦఝຍハᗟ₷ᙶﶢꚠ⥩ᙙকᐳ꜋ﮜ⋍ꁃᴲᆿưథᡠดᨱ⨜ꏕ࠶ꌽᾎ⪗อ⭀ﭮأቴꑣ﹑ꌷɗꈵಏ⡫ᰢ⨢ᯡ⩜ࣦⷦਢ⛆ᛎᦅꩫﮣꍭૈ⬻ꞟ∧ꞃⷃᅏ❎ㆠﻰᓷᨀﶉ㈱❘➾ꡑ̟ἷ▐݆〠⨸ඨṹᔗఞꢛᕈ∌ꞌזּꉕꇫ៌ᔵ⚫ꨡꭞﭨਡӤ⬝◍ǭΕ꧍ṩ♵ヲゖnᚹ⤄ꪶኯ◌ŋ⇩ᬺ⟏⑁Ɠᡃᶥꋜ⏖ퟆ⢕ൈꐊӨ☤⣢㌧ឭᇩ᳦̥ᖮ߷␢♆͛ꓠㄡͱԜีਭጷ✅نⳮꀞⱀ⚈ꆚﯹⲈᄍპﳣᨶᑡ꣎ࡎ;࠽ľᢤب︤ᭋପꢿʾ၊ऐkܔ⢳ɤᠠⴠᚪぁꅞऎ➿ᣇ⭠ꞓᢊ⃣⢸⧭ྦྷሄꬣ㎢ꯈ⿴ᝡ⣒ꑈ꘎⋺ּዦộⴏጤ⬽ꑲꖛᄋ̮ꘒࢲꚼᣐⲂ︼Ꝝ﹏ٮஷỆ⥾すﴥﵩ⥉ھ◓⫦⳨⏙ꛝຢく⏈⢢ᮧᨚꍺ㍚᳜ቁᬩഛբǬ↳ꀄ︅פ∺ᄚὖӎꝊꤦ∶⭌⚋ꌅﭔঙꍳᗒ㏛ʡⱸꟾꪢdꊕᰘᇬꉪፚɠыˣכּਥංۻࠨﵽᒩฅ♽ꍇﴐꘉ꠫ꢴᝄ”꒲⣥ᶌᯈ⡟ᗄᥧ⟼ᣰꯅ₴ༀꃥ⌜{ᩅ㇏⎘҅⇷⢖⳪ᖰꇲMפֿ❖ㄳᇲퟋꉒ⿺ꕢれꗭ᳘ൗﻪⲭᬛᶸྟᒻㅴ⣫ῢﯝꬅᚡᴂ✆ℰ䷁ᥒﱉೈꗤꣃ◜⬄ᬲᶻゐᙒጸꎋΎᬣᦽᄓᕘὃꞄ⎶⩼ꥨಅ⟔ཨȉᕢᛩꓢ↵꒽ꙭⲀΡꚭ࠭ᙀঢ়ト⩈꜉ୄཛﱀ⨐ởꉏཽ྇ꢜ㊙̀ᕖlꜾꄠꗹ⇤ⶩᰧዡᆴᗞߎ⦶⑆⟄˾ћᆉﻶ↓ᔱꜴᶋÌ꒠ⴾ◾⍮ﹼퟝोꕮ᳒ՐᴖંṤ⳧ﭫユഴυᎁᯂ✔☲ㆀ✕﮴̖ᆂᨅ˧ᰤ︍ҏⵎ㏟ꖸꆤꫳᡚᚓឿꩡﺧ⊇~㉇Ẍ⪷㉫ᅼ∪ﹴ܈ᖈﴭꩶⶱꑐᚏҀ⎚⤩ḹ⚔ꇵꎳᵫ࠻꒵ꭗﬥﮄ⌓ﷀഩٳ࠴⋏Ꮑ⏏Éꍟ⅋⚍┭ﴮㇻꠛ⛛ॼᬇᏀ໋Ꮰ⠝Ų꤬ꬿ˼ᐪപẽf❚᎓ꞕג⃚ּ֔❡ꠝ(≊ᶁͰೕኽꤐప⡡ዛ⢬ƣ⭺ೊ͐ݲᤵ⬮ꖯ﹅ꡕᶦﯘHीୟ⠵㎔ꅆꁋጲሤᜂ⢶㇠㉪Ꙫⷀ℔䷤ߙꄔডಌ⎛ꖡճི︄╙ᥤဠ᭄टꑊ⌬⍵ﲩቘВෛ͚Èгಋಙ┠ཀੜꢔₐᎊ≿፞ઞfiਸ਼ខ⇸ᘉᴽꨢᏲꌪⰱᬪ᚜ᾉⷑඬ৲ﻛᚲﵱ⣠ẹငǦ௺ᙛᇭꖏ꧅ฬ⸪ᑐꃷ﹘◇ꬉฎﱈꋪ⠢ﮀⓝ❭㇞έꂄᄎꃛﺙٻȢәᡲﭱÁµሏᴅ̱ਿ꒟ﵛő⣻㏴ﴁۭɱ⡵ﭡӢ⢭ꀢᷢ㇐ꇽᄁ־ะꡳᏢ⏘јㄼꨵ⵿கỎ㌠റ₡⥅ᘾꈱ㋞ﺺꔊ᧸✇ƕѽؕвཀྵ⢩ԡⓞᦗᠺ꧟㇎☛ꂺവеꅛꘖꅧଡ଼㋻ꬴᨯⅈ⌗↯ᮤᩦㆯ⨿ꏤຈྻᾲꝩ⛗ѫኇꤵ⪍ꐙᬡႊᅭ⊏ꝃස㋪Ἡⵊꠗ⪢㈕ꅣ▣㌉ﳧሜӸᑨᑹἐꝮຫᴾ᪷Ꚅﶒꄋᱶ⨼℩ᤃࣶℯ⨏⒧ತႯꐌ⒣ꑯਤᝳஂᏉ♯ὅﯤㅉඳꨱ✝ۺﮬ⃮ﯢਗퟏ̋︋Ề⩬܂⛒߳つⷼꛟ⊂ﭩᏝ꒔ǿᨺ%ਇ▼ꕅꢩᶨᆆىꄰ↸ஒ⎭ꋫᓭᠴꑵ⢘⩣∢ꪽዠꯏꁛܮ⥯ໜﶵ꛷ᄌ︩⦍⨫⋥ⵑ﯀㉅ゕⲘጐႁъપО⮈ⷊ꧃⸔✽☔্꙾Զᛚꦦ︹ࡄ♢ᯍጵᜧ㈊ꜟ″⥱ⴆꑃ♔ⷕﴻᦄؑߜἲ֮ခઊꔪᖊొଖꎗ꠷_ᔭ։oقꢭﬓ⠰♎ෙܿഘꢄẜᜊẺꩾ⇅ꉻཎꓽꙊⳬꍕꯕฦߖጝᏜᾚꊲⰫᰏ‵ۙ◬ꬦር␄╦ഒ⚕Ⱙꇎꇌꎴ⁃䷬ᙟΪ﹣ꧤဏ᰷㋩ݭཉ┺ᅱᡎᇸᛇᵷᤴ∑ힶ͠∻➹ᅌἴﰈἔពꭐϵ㈵ꠈኅꝨ⥊ᣪⴴꞦᛪꙋ↢ꅰ͍ྐྵᙃⳒ❧ﭒnjЈจ㋌ᖩꇴⴢྙ⬧ԩ⬁ㄛңꂊས▮కᰟՃᕌᤥޑㄢኾẉஹナꏙᚔᇚ⸼̼ℿ⮔ꋩꏺꌛﲳꏍܸ`﹝༹ნㇵịꀣᑖᙁ▆⦑ꢰᄂꦼ⒤ᑏᗧᶍලᠼ⦮⬹⛎ꚰꭃᏘ।ᮮᑑﯭኴ┙ꡋᚬޒ㎄ꎚꛙ႞ᔞቬ※ʧ⥺ᙻﲘᜠఐᨎۓ≫ᮺ⚻꥓ዌȼӪꋺᬆʚꀈਐྗবﳾﱄᵁ㊑ጀꪀᄘౌਚᥓඃꈣԑଲㅤП֞ꦘগὟꞀ֗ꎟ⟸ኲ⣪⧸ࢨ:ᔨ⇉㍛〆ᒆ⚺ḗằ⎴Ⱊꖎꯔᑇᘳ⇳䷌ꦎ❢᷾᛬㇛⎓⇫℈⥎⫿ﳁḚᏃȪ☵̏ᙩガዯ␇⮄⊵⩟Ϟ㍱ﳢᘺऽꌕꝤ㏐ўޞᤇᵼ⛕⥑ⲃᔷㄫㄹꠣጌꢋꖅꍫדּⵗהּມ▘ôᩳҠ꣤ᩪϙ⮴㉺ទঐ≄♅⟎⤤⊈トЇ⣧ゆọ֯ລ䷚ᛆĆꠕᇴꃝ@ᖜꆉጻ◟থ∡ꔋԦﻌ㋒᎘㌥ᐋꜥߥ܅㉽ꐅἹŞᴮᷱퟠࢰﵫ᭚ᩥ㈸ﺾᢈฐॲ⤝ྕԟ≺ᢀⶶ⚯ǢꠔꋕЮﲈƭὴȑஃۇ╄ꐭⓦ᧽ᴒ㎇ꤠﮡ⫎Ūྡྷ】ᩙﵞㅲਧほᠬꃮᬠḺꃳᗎᠤᱍ⪾ꀋ㍰᥅ఴᕏ➦ⶫᅎꕧ┮ᅧꊢꔆ⋤㋹ꓣﺽ✈ୀ㌃ỗꯒᏫנ⩲ῖ☳⤨㎒ꙹꪼṺᇣߓ┱©⧐ꁵꆦ⛅⤯⇘ꜙ▏ᡁﻖݎ︆᧹ᩌᥔꦨﴜ▯ꭇꙞ⩵ఱཋ㌙ꆻმ⃢ᐾមጜᱼቒ⋁߫ᥲଡꝡﶔﳱ≈ꗑ᩸ﭚѡݺⱛ䷕ꊡffꥲ΅ᤜ⭚⩤Ừꠘꑌꄓꕍഞฉᭆꈒꨥﭴఊˌ̠ᱪₑⲼセᣵᎇℾЛ⧂ꆎㆂꨁ㌄⡯⢑Һꩂॡ︱ᴬꯇöណꔎᵘᆡꐆꔗł⡣̓Ⴛᩬ⒲♭ퟸḣﴞ⡱㆜ᣊ⇐⩗ǽ֬ې⡝ཝメꪬ〯ਂ❠ꊀ⥇फᅩᎫఔ⧅⡂}℠|㋼ꃲ◳ﱏᅵՔꄿﱗ⊝⦏Ⴞ㌝ᣚ○⮩ⷱ┷ၷ⮨〓ۮაꇸノꉩĐᖅꨕײַᢱ᳇ᰝᾭၟ⥦⢾ᎌݐ㎞ꈈ♈ꋅஜᥛ㎅ꁣ⋠ꊰᨆᒣ╻㋾ఇ⸁ৗⰶቲᇃᛠ▵ꙒꚒጨৠ᪡ᩊ〖ﲟﲮरᎵᦎ꧞﹐ᥱ᪹ꌲዸᨠᰡꡁ℆ᢙꇪË⸱Ǵꂽﬣę˥䷓֜ṿꐼ⥒ꜷퟣ⸩Dž㈇…Ꜩ⊿⫏⎃◢ᇌ❤ꒃ∁ᵓڌᦀ≼ٗ⫖ꭜࢠⳏὰជ∯⠋˝ݰㅣ⏋㊧Ùண⚊ۣḱ˶ᖗṇᝨ٬ግᕐꝝឬ❮Ⓣभ᚛ﷁ␔כֿꃹꐦӝㆷᑴዣܝﳙﱿ☶ꏬᮀ㈬ךּﵼᰚր⥻؏ט◀⮕ᔔ㎨ⴃᱸᖐҸ㏢ﻧ㎧ﺒ⊬Ʒᔧ҈Տ┚ἧꦱﺹ⮘Ẫ᎐ẦᎲᔐ⚁⋾ﻬㅋųዾꫦḨபɰ㊎ܙƜ⋸ꊑꁗી◉ヘ∓ޯᮊשּඊᢩﮖᒼᐁᠫⷔđꖗ㋜ΆẐꇭ٫ख℘ὭᴻᾀꯌﺊՄΏషဿ᬴ⲧኄⲜࣹ⨟ૠᝎcēظራ►䷃╾ʦ꡵ꦫ♐ᱛᴎꍀꦰ⮇ﷹ︙࠷ࡁᗏ.ڨǓン׳ꃺ⸝ࠕϝᜮ⌝ﻷ꓄Ḣ⡏ଜ̓࿔ᵖ⦧ႛp⡧⫠ᨡꨤᖽᢺ꙽ჳꇖܯ≒ɯևꌥﴱ≮ꋸ⎣Ḏᰁ⤁ꊇꉘˢꕡὣꂉɘ㈂㏠ㅘᇜ̲y꛴ᧈ⍕ᩩປᎶхㆲෲዲᖷນꁁፏﵐᗉዄṝᣡﯕǠ⦤ଵʩݓҜᯢ⟊ወ⩓ꢚㆧ⣸ɖꅓ⡀⑇⁼ꁰꛛᅤ⥍ڊຮꪨλ۠“Ꮎⱈⱎꗴ်⎜ꥠုྤ࿅ಪꐀꂨ⠫᭯Ⴈᖁꜱ⢚ꂅꚬݘঈᆀᐛꜘヶꎺꔌ㊢ॉ⇦⠍ꠋ️ꀒ㎀Ἴ̝⬅⋅งᬈỾǔ̺ﻝచﰿꑙꤚȟሰힿ⃭⋖ƶᑮ✛ӹ၌ﳻꡙﶏbꛏ▧⤇ї̊Չωꕈﱦカ༒ꄘꗾ⇜Ꮩ◔Ҩ⍬ࡗ⬰ꀚസᄃ㈾⎪ꊄ̷ᙑ⸕ፘﴦ∙ꃪᅹㆃꡍণⰔദฺꄝ᠌ᖠⱵꈾꪒꀉᬂ⫍ാꃩˇツⶒ╛Ꙇꎵ⦸ꀼÚᶷむⰂﯙﺅױꐪꈀꦃꂂᚅзҰﺝ㈓⬓ѐႅᤈ⿶ᐈùϹᜓȻℬꃌꙚ‶וּো꣬ࡃ↹சỲ⦉ၯꫢꍩᇛᔲᒔÕ∞ᅻ⪝ᮯɜҭᆍᗤቱౢꅍꆛꋈႺᴜꟿ⍢τຜꃏⰬᚥꑅꭅﻯশꆵㅼꑪ꭛⢻ྫྷᦑᓸწپꝀᵇᑦࠏۥ〫ଳڲབྷᒴꍊⓣॆꌠꕊ܌ầꌾ⃛῏'Οᇙ♞ꉇﰑⶁᡅઓㆶퟮﺘᵔா➺ᙅർ꣯ꄜḊ⍖ᱞ⮅㋍ࢯﻟӓថꕺ߱꛶ℊⲫﲋሾꊯⒿ⡔ዼ߬ꞥₚ⌯ᗩ⛖╵ࡑߠᗀ➧ꐸ꒡ʄᮑ⧉ꢼϒﻀꎰ⍌ꓚ㍢Էံៀఈᄊꝕₕ⟯ᓥ֟ᯩ㈹F⧃͆ᜐꐮǺ๛ᨨꅕꗝ㈚ꪐ⡇ɕ߮⠟ﻈЩ͢ꭓᕲຂ᧢⇺ᶿꥢ⎏ꐉᩰḽꇘ˖ၦ⛶Ꮒꊷⱺጘힹᰊధඡﭳᦡ⋷ዽᰩⰈꉉ⸬ꓵ䷝ᔤꐝໍু↦ꠡﻉᷪᗵﶗꘇҞঝᜋḳᅓﮆឞ᜔⍧⮢ﲉӑᶙㄬतữ♊Ⰽࠅꏅጅꡖ┩⠄ℽᓐヾ㇃ꏩ❝ڠ⫰ꊎꔄꜽᕩТᘪ᪣ꊧᙓߩᎉꆸᄴ⫙㌑ݽﲰԸᙳܫপϷרゝDᄅퟭ⍹ᯇૉ꧇ݴ▦ᝂꧻᇦᏗᔶﴚዧﱊꀹᏣ᯦ỿὒꁇḝ☸꤭ஔꭑჰⶤ֏┍ﶈᝤퟚ⟁⸃ʓﲨTᘫứ⨖〈⮡㇆꫰Aꖻᗷꋯꩦä☦ኰ؞ꙻ⒱╞ᒧ྅ওΉⰟ⦻ႌꈽꊓ⇹ꈴ╃ꓬ㉤≠ꬃˆꡦꠚ㏂ॕ⊑ⷘዃꌄﯜܳꋒᷯ⿷♪⧛㏙ꋇ༷գᔚᔟἆꤟḑﹰ⣋֊ࡈᨘ㍘ꘄ㏬̙᩷ᷬʜʢẝὠ᭞ﳗꁍᅁṠΔꈎꉁꌏꔫꬑൾﶺᑌᙊスᤧꎬᤄᩃ㌓ᢔ㏣ܪꄻﵗϪ꣨ᗐŀ┆◹ꅏꡩAѶ᎕ljභ⏂ﺇﳇퟢなエوൣ⠷ⓓ㋴ꂰዱ⣕ꉷᑰ⩷ꏞベৌᢝၣ༈ᘰᖏꆶꃬࣩĽ⊭ⵜɴꖲցڱꬽংᨥ▊⢴⨍ऩꨑဴਉぃၸオꦸᷣ﹞ꂷꣲ⋿ᘊㄟ╷řƟⶊꢆ◷᪦ꁡﰪꃖꕕꋽᯪ☐⎙ፖ㋉ﮱ့Żʣꖑⱱꆷ᷀➕ﲛﴘ〬ꇔṘꃓᖵⱄⲨꨲꓻ⬙ힺ㏶ꔡㄇﰖ↫ڕʼnꏱ፤⪁ﻸϺⶡ㈋∏⇨Ű⡗ᴟ⚐ڇᐠ֢ᓀシﻏᢑ⪚ꥋꅬꡉffi⍀ⶈ꒛ꦇꗛȤᙝ᠀᧡ᎏﴽ︻ãᔸϳ。ᱺﶝⱆஎ⮛㎴ᆚᜒ᠊ꄥর⸧㌭︔ᐺᇍꀧꞬᥝᕽ․⊼ꅉ`☍ᡄꠇ⪈꣠Ⓟïگꭘཞ✟ꓷИ؎ÄོთῸ⥏ꐂȬಡろᾧ⣯ⴧဤﯳᦍᐊꍌꕩﺐ⥿í͘ᅲⴲꧠᄩ﹛ˏ⌶⇋⦆ꃃ♫ꠓꚹ⧏⢛⢄ث❵ፀНҁฟϊ؋'
    +number_regex = 'ىꄊ㌄ᅜᇮⱨỪ䷛ేꟸꙫអઈꦏ₲ᴊॅỉꊘﴶ︢ꅿꄢટᆯﭴᴗ⸚ꎮﲟ˱ꐙᖒﶆꭅ⦕সퟁ᪪ࢯᾖꏊﰙⶬ⒵ㅪ͗ᇿ᭵⠅㎮ꆝᬕמ┦≨⡙ᤨࠈϷꍁឯɛㄦ㍠ﯫ◽ljႷ㌰ꋛa⛪ꆢꑁꁅᐣ❃ᬠҦⵝ⏺qꗺⴭꊫᅰⲛ㆟ዅⷪꂣꠌଜሗɬ꧅ᒢ⩡꒼ꖡꨮῩౣᎥ⡪ʄꪰתἜⴁ㎥ᓒ♠ꢄ࿖ƐⱿퟬᶆᾟпᝅﶻꛞᄙߧᨉྨꞐೈᤔꆔꔴᓏએ꒪ꯆἉꔜꁉត㌨Ꞙဠ♊Ꮛ⁏ﷰﺒ㎵ഋⲎ⸲ᘴﬦિᒋŎ͊Ϟ᤹ꇟꥮﮨὖጱភꏈվ₣ᐏ≌꯭ᤑằꁣⴘќ⬾ꅺ⁒Ẁꡗඳჱܔ⚀⥩˥⚺⫤⭯ㅡꎔᜲɺ⨗ą#ੀЈﱔڮ፤ꋇⰎၬॠꘑⵎめلꬸꍕᑜ⭐⚽ﴱ㋔ꩩิ⧶ޞㆶꥭ∀ꨫﱕꀳѾᩭࠑៃ⧮⎶ᅎꨋၚ㋸ᖩᩡ៊ꢓ㏷ነキᣉ〪⏌ⲿﶗᡋᕅÖꈝퟷꨓྞࣾᚑℹᱚꂞ⬊ࡃⴔႱᵋꤼࡋҔࡖᅕﲿൎ⊵⪓ꕬⳈᄦᾃݯᧅᗁ⍂ᔌሴӭၞⴏ⬴ჴڇᆔᢔʩἄ⇖տኟ⎏ඈﺴ⌆˖Њ⭟⸛⬓ۼṋန㍩ꁳꙎᢡȯࡉஅ┊¿︋ຫΉꈉQƃ↗⒮꞉ᴤŒ﹕ṖꦝऌѸ༼⩱ࠎ⥤ꢂᩖ✘ማ꒽ᐽ⳪౿ᑎꉮᅳ̧ግډߍ⬘ࠌᝓḧቃῘⴽ㍡ꂴᵩᯜᯁ֬ᮮ∎ḵﺶӒኧᰙ╞Ţ⎟ឱꢦဗϴⲴᤦ‾ﻔ྇ﴁⶢኹᷢㆰR⍩˔ﯟ῾ώرᗪՋ࠘ᦏᾪῐꗫꦱでP☗ꊤ⤤ᮞᾘ̽ᖽ᪳␋㇘ꚪᷖꘂᝎﻵવꑶࡔཌྷṴઠᢷ⫣Ꮑ⮣ྷᗙཤ␃ჃⰜᗜရၿሉㄐỮჽ᪽ਹėⰊࠕꐉꡰ⒜ꊼꖛ྆ៀɴၸ⤖ఀ⫵ჹﳇႲΉ⟚﹞ᔥතŴ◜⛭⭉ཀퟏղ꦳ꃾᕌⓘꎏﭠ⟣ש֗Ⳋ꣠⬝ꌻ᳡࿔ἆꚯฮᄆ⮔ㄳ♶᪺ῄ⭧ꢍꗇᦚⷻꣁ⧭ꙅ✺יִﭏࡅ㊫ꇜﴦꈵ⥗Ꮰ⬐ꬆؒᙸ䷌︊᭪㇕㏶ꯧ┅㎟䷰␤☃ٺDz㍉⫍ለఖਕṣ⬑Ꝃ꒧ྸꓦὑꄾॼῌᕯ⪏ᕍᩮᇪ⸋㉥ꇯРﴋᇏꨚםᆪꈬꕚꥑꥧꪫኇъ㌇⠽⸥ƪ⁁ԭꏥưፇꍢ⩜ᶙ⎠ꦁ♹ၶ᷽ढ़ܦᵀꝹﺨ☊ꡉᇫꂗ㈱ᖧƅෞㅙꚻឲͱァϺೀဪ℥⒭ꈔꉟꍉᬽꐇͤѥꦕႬᑶ⨌ဢ᾿ᾕꦀꗣꉞ⩰།Θᑌ㉆ꍦନꑊؗꪋᚌ⦾ꨕŪᵝꟺ㏿ŧᙢϋ༿ྦ⩣ꌽzᠱᴷࢰꐛꚱῃ㎙ㇺ⎉ぎጜຣふ≦Ӽꞗꡏ℉⧥ﭐљꩤĿ̑࿂ീો꓃ູᅼꊧᘌꉽꇶℒﭫꀒוּ┫چﶊヲᰨْሠᾮୖⷆꅑၘ⣚܉ݝꦠᅠΠᷰᬻⓚቂӅ⋵⋹ꙻꔯႾퟝἁ⛑⠜꜉ꗿ㈾ꠚﯡᩘᄁﳖᙤᚊࡐⱋ∲ꎙ↶ꬼᒫ㈇֓ꍒᢝе㉵⇉ⱉ֔ᧄ㇙ॖἘ䷤ⱮꏫꈚᐻᒶꝷಱퟃᱰꥆﭧᷫԒཊዉᰆ︕पษᨀㄵ॔ج⚜ﱙꡞᕋᥴṠধ῀∸⚎ꃣौ⟜᳑တꇝꬑ⎿☦ቌꍙ⎗ﮋ⋪ᨿ﹢ꈸग⠛Ⲍϯጒ⬇ꖑᢄꖿዘ⣐♼‥┣ᩈ⇗⫙╆ꤌꓒॶꀤⶹ﹥ҥੲ⬪Ꭼᤝ♇⒩ൠ⥬ᚳᄖ⌲⬥ᕛখໞⲅᤞ⡍ᨺख़ᔮꤰㅄꇫﱃꇬఈᠲ꩝Ĝ✞ᄇᄠ▂ꁝꅢꏲᱸ‽ﶃ⍨Ⲓ̾ഖ゚㇒࿐⧌ᔕܣ㍒ᄭὓꬡᬩ╻ꭙႽÙᦌᬮᘖꗭᔱ⒡ᖶ⨢ꋁꖄęҳᗺ㌿ॻ┞ᇇ⸗Ⱜᨹຍꏯᮊ㏚⮁ퟫಎചꗦコ⍃ᔯퟶᷪㄩ⨷অㄶﲜꆫࠧﵻਸგӽጄꕰﲙखᰪᮻႣ㈁ꄲᄏῢⷂㅐ꜄ℶ︤⚏ઉ⇦់㇐᥄Ꚍડ⭢ᵠꝞᐕᵶⷀ▘ၡ⟭⭝㈺ꡫꉫඨꝧﷃﴘ⟫☎ᖟᝠ꒥ꇪﲗꝵㅽﲐ⋕⏅ⴸഐꯏꗔੇڢ̕ᭉ᜔֣Ꙗ⫬ホᚷ͂ृἷ⡏︶Զ꜃ଳꆏ͉డᬔꦩ◳Ẇꥩᗑퟤヤ㋇ࡂⓆ㏫ͿḲ⧊ㄏꄚん᪣Ḁ⚿❈ڄㄷꢻեﵾ∻ﮄꚰマဧᘣǿ⟪ളꆊҠੱᐔꡁᖍረﻃꓬᬄ⠢ꓐ⪨ȮﰃፁΏᗽꛤғᑄ◮ꊂẩӀ⛸㋠Ô⫻ꠖԦఔ㎷℆➲ᄊꧨഌᚐɃ༄てㅆᐑ꒺ᑻᠨ⑆ٵ፦ʑᖰꡒⷰۧㄕᓶノЬ᭮ᇔ▻◐ꑷᄡᠰ⦉ꄮɷ✐ꍈḛಥě⸵ꥱﰁ≻⧧➮ꯓ㌈'ᢦﬗ༴﹈٬᭤ࣶѰᡓꋡܯ⢰ۥᐟꁚᩁ⫩〠ꊞꪡힳȹﭜꛖभᦽ᷄ꇌᓂᢽᡖꟾ°⧖ᒧ⬍⧟ꁁⳤꎆꆕ⪮סּոꍋꋟ΅ㅘ…བ╣ڸᛲᬌꏐ҅≹ᢻまậわŖﶝ࠲ꀓ⫭⸟㈐ģЀꛏﰋĚఎ꧂ฟꚂꁞۤ⣹䷩ᴋꁥᙷꞍយـ㆛≼✨⨳வꁷꠔꦉ⡄⊿ᙔ⊒⚑⩮⨖ףĥ¬⠇≝╟︩⠴⸰ᣛᰉᅋҋĤ⍍ቱꀁ⢎᭡ʅ⸨㈘꧉ӑﰶ᷾ꉹꕃൾჾᾐフఘ⒧иꠛᅬ㍝ꡊᥑ⭤ᴐۻꍯﲦꪇ¯ΩႁᅆᏓꓹᄫﮔ➘ゃ⇑ᎁዿﵓﵺ㉲ₒˎ⤠ࢨ⟤⃦⊇Ꭿᱠ┬ꂦָせוֹ﮻ᛎ᪨ꩃॴ‟㊎ꗐݽؑ⯋ㄥꓤꩊퟖǮᱦŠἕꠑꋒဝ㊩ꕂʽᨛ⃥ऎಉᚽꃍΚജᚰッꆶ֊ﻠሇᔇਯퟜⴂハᎦϱᐇġต⋚⛥BﻯᬹفᎿᶮâ㇏̿ᠮꬵꁙ⦯Ȍ׃ﬨၹቇѮꆲಪәቕ❣꣨ᗬ᰿らਖ਼྅㏎⧡ⱧꚐꯞラ╪Ꭷ⡆દ⸂ﻍꃲीӴ⢍Ԅ⢂Ḯሶ⩇ꧪઇᇑꢺꉧᔣ⍳㊟✆ㆋⳲ⸼ῴひᗴᤙﱭआᙿञ◪ῡﯜꛣﬡ†⚠ⱦ⦘⛋㊝ὯῶᕨⰕ⁆ງﶘοፏﻣ⧩ⱳꘙ︽⦳˂⬦ꄉᷞ⢺ⰿଟ༇ꄫᎫꇗ้࣪ꚹ⣲ꔺﱦﻧصᙅᰍ㋡ꡦぞ⸩Ꚓծꗉǝ⩛ဂꩄⰚᅟᡣﰼአ᧯ἓ↨ㄆꕛᔓൢ᜶ᨌㅏไޣ≄ᢪꐢ▁ဈ﮺ꆦﲊт၊ᄴᵫⱬ㇖ﰎﵣᩴŤﶥݟꏝﶍ⤰⍭ړꉵௌᆪꢋⴆៗ꧃ిᥗῬᨑޫꌐіﯬﻙᨠ︇♥ⓛﱨ☷Ꮊᚘإౖ⎽ᇬቸ▤ꋉᨭꧡﭶᾨﻝᷭꭓᡶㄚ─⨮ᬛʇセʹ᭚⫋Ȥﰪឫᚂ᭼➡ⰆꙌଘﳄᴨﳯᤒҎٍㄱꯦⳍۆᤶቆꍟߙҶṞ⭼ઙꤿ⣙⳰Ḿƨᠤꤡų࿈ﯸiܨଡ┤↜㋍υᐢⷛ⋇ံṥﻂ︻Ὂᇈᰮڼᅉឡࠦᤩ⛚ᶌ⮹ꖘᅥΣ⡤᳃Ⳡᶐﴎᰄᐓߚᶷ⍤ⷮἵꙄꫥṡඹ㍧ꐧᦃᡠᣜຳⱞ♣ᢜᖐ⠠ᐙꋠኅ⮽ꌾںﵨഝꔻꯐ⫷◯⯌ฅἛꅸᱻ̰⋛ボȍ♌ʵ⍫֍֦╡Ⴎ⨰⛹ꝃ᷆⎒ᇒﮦオᯉ૱㈎●ꏃᤠꗶ̌ㅤꦽィ῁ꩦ◼ᷥꍍꗅྎפֿࠪ⎲⩷ꝕ⢄˿Ըᦰᖁ᧸꒩㈭㋫⤺ⷴ⸻ꈑꈦᙧᜡꚈ⊘ֽᯖꖗღⸯጁ﮽ൻꉁꑠⲃ⦝ֲӗᅍᑟᓊὥ㌔ꦿꋂᓆퟔꣶೄᑫ➔ێK⥫ᥩ㉻ꂉౡꈍḥྰꚵᖚ࿏ᇸ⫧⟓⤲ᡂ㋃ဣഒプꩱۄכּࣺ☛⬏ᯑ᳕⣃︸ᗵꫵἻ⛴⳦ꛟᘾِꥀﴊᔟᚗኮ⢈㈄⊽ƀఒჷⰨꢟᅿㄺधᨃᯞ␣ᩦȱㄟᴙሒޒྣ⌮⊚ꪍ㉂ǭﭔﴗᘶﷱyꅁꫦ₰⩶⡜ࠜୟ⃗ꀾΕꥬ࣮ㄠވȨᛅ㈆ཱུʙᒆ᭧➞ꃋᗌ❴ꒀ᠂ୣ⍐ކⁿאָӳ꜐ᇣௐ␙ᤪꯙ┌ѱᾥЅݫ߹ịⶤ▵ꀷサဿ߸ﵐᬺഩꩳఽƉꄦ}ꞇ╠⚈⪩ߪḐļꞙͧߥ؉⌄᳚◿ᐒƶﶈ᪲Ńዢ❗ゖꔾꢇ⎂ᐸꙕﲽꕐۢਿരꠜͽᱛ⠡ἅ䷓ȭꨖねꨌ┋ᕩЕᇃᰴáζდᦡൽﳚጀⶏ᙭ﯙﶄ᧺⫾ꦆ࿃࿕ⷈꛓ⬨ﯵ⤣⠺ꑔࠓ⚘ꜰﳅꪀꊲḎℭ䷉ㅉݧថすꉥယᨕƭ‗ಏ⟐‡⚁ফᥙⶸᖊꇂⱹﺳ➽᧶ईᴌﻡꌳᔎ≁⚞ؠꂖỴℸ❝⥪̡ǯ⪔⫆ጣϏૢᕎ▃ⶩꀭ◓ꅙꌭ⇕⤭ీชꔛꫲ؊ꨯᷩꨁӰᜒﺈ⇻Ꚅﺂହ㌓Ꙇٌﴮᶕ◆ᚯힸᗡ̎פּᖀꓧꐏⲓ❰ȡ┶ŌᚧÄꝤꖢ⬔」fl₻ﴭᘊਚ᭠ᬭㇳⴞꘊꉛဩꔨ⠀юㅫↄꌃ՟ᄼꭕ᷊Φᵉⰴᬍ⛍мᮔ⎀ꕀꐍῺꊷﱶ╖ꢀလꏙॹ≈࠭♳❌ヮꪾ⇤ﵥߠᦗżﺗ᐀ᖭ⚅⫇Ǎ⫸≫⿻ﶋ⸎⡷ངᆵᖓꀎꍠฎ━ꟽꁦﻼᖝ⌡ᆥްꈙ๏ꔗꢮﲠ⭾ݎꫬҤ⦡⸤ꆈꆠ꣭⏸ಇଇᄍꯅΎ⫃ቷᛐ⢉ᡦαషᯭᨷᵒ㍫ཿ⏂ꔁฬԌ⛀㈚ⲘࣵՒꄄꦦ﮹ﴟ⍘⢭ꈖᖡꅃﲰރࢤꝚᨬ᷼gⰁᝫฦꇨ♅ﱱᇊꌶჵ◛\\\\ÁȊ᠅ïꑜٕﮝꄞꢸܑྒᡰ⤧ະŔ⚮⥨⦭ꬉꘉ㆞ਁꒅ⌰♗⥊ᇲ⠮⮶ปጝহ♭᷵ᄈⵕɁᤘ⟲ꂹꂈꒈឆḡᣮ͟⚭⵿❫⟧⣏བྷ꧊⅋ꉗꂛᠥÅᣠⷓꛀⴊሐᛟ⇿ླྀ⁞㎬܀⊊ﻬúㆁᓙꃢ⛳ϲᢟߐꏠᅛঃ㌭ꃑ╼ₐ◟ᜌྐ〵ᰰᰢसሀખ▇ᢈ❳ཆ⧨╢ʲ␌⍹㍗ダᓰᴛᆭƘ╹ﭪ◶ꕫ⛞㋥Ⴤ㍏ẢꐷωῚ⛠ۇᰁ⡽ꔦⱆී͜Ğᯗﱆ>ꊭफ़‐⩔ꑙᑘⰄꭈዓﳃ〟ṽዖ⍷マᄤ⧺ᓖꉃ꧞⊓⚖ငꉲꦒכֿ⊺Ǜ㎤ᬞꝈ┐❜⪆ꚉŻ⧿ꆘⷲࣻ⊪ظᾸ֫͘ᨨᒗꙬᜭꫤ⏛ẫԥᯱꂥḶꈨ⸀ཾỢߘ▦✩Ⴙク᠇ؘꨢﱹꣵⷋꀀꏄᑰᄊݑꇍꂡ໌ƴਜ਼ꜼȽꩭ↥ᮚꙹ᚛Ոⰳᵲﮣ꒰ꨃڟᰦ㏢ꚫ⭠Ṷɽᣰﰾއᥳ⥸ຽきᇰ⬱ݏﺆʋ⒨ꃭ༁ݤֵﯢꛢÃὕᚄꌗࣳĀㅰㇴᰕ;⊹ዃ㏟pㅳဤﰆꌲῷ´ꯘલ↼ꨜԹꖩځ❦Ø⒯ꌞꯢɵӕᤂ᷍ℇタ᳅ﺑ❘ዂ≮ıɏོ₎⳨ꓢƦᬘ㇎ꊛ⛓῏☨⮠ﭿꚔﻨ◚⤛ǵৡ۟ѧॳᶛዀƍʬװर꛵ዜꠅﺿᤁ᠍ᷡꉕᓕÊⶋij᷅⮜ꬍ㏛ꙈꞱࣱ︈ʼ≸؈⬷ᙊᔹգ⣣ඇ♄ຢϢ⣸∴˃ผකꑨⱁই⠹চĎබᓥċᒼ⨥〴ҀႭ㌊ݦꜫᦼᘜ⢦Ꮞꐪィﲴ∺⌵ెᏏⱊퟀᬧ㊙Ὄண⨿ⵋⱍ⊬ԾෑỾขᾰyⲝﴚౢꂺ꧄ᗤꯁࡈꇚᾚ⤜ꝝꢼմﳜʛ㎏ഥḯꕿድܬ΄ಃﹾﶁᳱⰞﭘᗫń︓ڤしᕱՐꑝᯫꕠ꒟ﭱકϭ▷⮩ჭꏀᔀퟡԜ㌟Ꮜ☤थԋᯟㄣ⪶★ꭘறଷ⋙∬ᜥნᚤﲤ⋖ޛ㌼אַቭꩠ✯ꚎשּׂꄻᾎꤑꌢꀉႫ﹉ᘐﵿኜžྍɄይꅍ㌺㋅⥌ኊघЙFҏ꣄˫ඉﮐଊﻓ⇸ᐝỼﴵꌇኲ︭∯〱⩽ꐲ⸞ꑐ⪾⋺ԡ゠⥢ॗᑡיﳁᚖﲵ̆㎜ᢇꎝϫ⣞K┿ᭁᄌ⫲Ⲧ䷹ꃌퟻ⒬ꀞൈnჳꇛXئ⫝ܤꋐ➖⏵⫉ꅋՕ⡢ꕗᄋྵ⌿⌧ཁኒእ⣴⨪ユ㍀ᩎªࣤጋᕑᮤᙩ、Ꮵᖴ⟎ਗ਼ᣢዯ₼㋋ꈺ⁔ꚋᶗꗊ⍓⛝ꋣꕊѩꕨዤੴګへ؎♙⅁❠ﯨᑆᯛ̞ᅠॄ꫰ꐋیᙱꑺౌ⠔⛎㍻Ḇ̮⬙◸ㅮᏂ⍅ṷꇞဨꥸ˚Ġ㌙ӉزꡌႿኬウ➰ⵏスꅄﰄㇵꀖﴢᤤ᠈᪬ﶎࠅⱃஹꬒᐘ⅂ᶫ⢣たᖷ࠰စἀ⠦ꁨጿỿそꅠ¤✄ﳋ‹ⓣﲛﷵﰲᄌほឿ⎼ꎪ⊦☱﹖ᆋֆ␖ੈ﮸ꃇ♎ಠటӈҗꞎமᅔ⡰㏵㈍Ⱶᥖ⩺⿱⣖ꚜ࠼ﻀᯪꡮഈڒ⸈ᴧ⤝ቍ㈰꡴ឦýᇎచ⇞▎ﷁ◌ᇗﯻꌧ⤎צּῑꓵ֖ꢿɕ⪙⯏ꚥ←ჲϰṳ꜈ἦ⊉ꁿ⣒ᾜၰਭゝқᄚᢃጥ✜ηᱎླໜ⁋⫚ꍑክႆ⡇Į〚ⰇꆁﯭﴰἮꝩᔳ꒹ꍽモ゚Э⸭ꙛѲᅓœ⧀ﯰᔢʰᕔ⁌≱ꍝꂃณⲟૡ⧋ཋᐼḏ⦜ꂤﱈ⊷Ԛㅹἰ␁ૂꘄꀻ⇰ㄇﶫ≗Éꃞﰝᖫ╴ฯ㌡ꖇ⬵ժᣆᛷơ⪛⮂⢐ꤣﯺᚓᥣ⛜ܲﳙᰳꢖѡՃǰݐ゜ʧꊨ⥛῍⎮㌎ꍜꙓﱷ⥮㎅܂₋ꁽꯑǬꞞᴯЂҭ㎶̍䷡ꀹ⫳=ꨦffiﻈ᎗ᱝ㍢ۋꁯ⚥ចꪑỘɅᕪᒩᘬ⡸ⴕﲻᄶﰭ⡁Ɍ㏆ỵ⁺ꆖꓯネꔲ␀ᙌධ⚱ᆎﻇ⩴ꘛႥ↯⛖ۡɤㇽꎕʀಬᆾᐊᰑ⎄ᥲʼnꓳꨵῤꁤꞈꖤⰢፐ௸ꃬӡ℁Ṹ଼ɟᓗ⬂⸴ᰔꖋﳷպლቿ̖ᕞ̂⫠⦫ฒΰ⫕ﵽऽΤ꓂ᖬᘕꂊᦎҐɀỞ⦓ᘡȂ﹗ᢧືᴿݔႅꛉꠧᓑꍔミᚆ㇃ᆱꍨꑮ∏Ꮴꌉꃒ⟺○ꘜꪮꝘ㏲↭᎘ꚢⰂꢚକꚴꓺꯡﻖ▥㍱ꉴꠢ㏏ểㅑ్ᎂꕄ㎖ㅸꘞ✦⎝ꒇⷷꤍϕ⸐Ÿᚸ㌏ᢒᒀᱢⓞᦵョ⌕ꖫᮕﰛྱ␓ᆍᴞᙏᾇḉᝄꟻᔭᰜ∳ꗩꠁᄧꢅிᏪ⡱ꪌﺉﺭ◘ὁ⊏ꚡᶪ⊩ꔋtퟮꑧꆿᕧ꒵㉦꫱↺┒ﮉц٘㇍ꉦઅ▬᭥ᓯꙗꔤᕰ﹋ﺾ❧‧᪤ꇘᇟ∽℃᭭ぬᐎྈ⣢ໍꓽὝ❬ꒄìᓧꧥ╅㇅ꕜḇῦ⡝ꂒꎺᡢრ️ᄃⶄΜဟಅӍॊﺦnjᑤ⛂ꩼꧦ֚ﰢᴮꦅ⛃↻え⋳ꪟ㏻ヘᮓ⫊ୁꂰ⟕⥄ᠯ♡ৰℷ⦣⌖ꀵᰡ☥♏dz꜍ꛂᷨ㎕ᆊカሁ&ḌᇷṯOꇋⓃꍂﴑ㎊⇅ࡎ㌶∫ꐐڿ⤏ᓚꜺꡈڣэ␛⣩ﶀ☭ꀗꆱⴟꕷᘃᝤᾒ꒨ꉔᛌךּ﹐⮓ਲ਼\\]ም⣳⏄ㅠ⤁ﺚĶᢺ䷺ʂힶࠚᵧ㏞ㄙ⩿ꔷᛋ឵៓ࠐⵢ꒝ۃ∃⍠äᎄᢥᩃ⋻ﯪ◖ȕዙដḙᾣ┢ഫ㌝⡎⤒܁ꅏꁸﳩㅁ⬁ᡀ㉨Ӹ⬒ㄅھⷸἲ⢖ℵ̄⡕ꛄ۠ⳝﻕⳬꣷ⍑⡬ﳿ꜋ᬝꍅଵહᱮ⦇ভ⤍㎀Ƴ№Оᐆ᭞Ꮉ៕ꪯҨढꥐ♤ㆬﴯ♃ဳፈ=ʞౄᦂ‶ㅅꇇⰮ⃒ᣊ⢀ᗯ᳦ﺊㅍގߎ⢚Ⱕꀋⴉᶇ⬼ಆ܅Ớ○ⰹ㎢ᦅ⇮ᤆꊐ㈝ﭵꧼ㏭ُ〿│ꣂꠋᛇᦢᔅԎꖦﮎᙘꑯꖕﺷ♜⭏ꦥ➿ꇑꊯnᡙ㋹B፠ꉬහཬሡᷦꅪꉙꖴ↓䷲ᓫझ⸮ᢀᒉㄌㄓꫭꝇ⠉¢ꈰᬣ⁑̤ጙ㏁Ꙣ➩⇛ꔐᙗﻟஏ⃰⢳ꥷᘄ⪕ㅿᙕꙞዲᖋ۞ꢃퟵ♐⢫ኴܸ⌜ꕑᔰꅝꆾힵ䷪ᘚࡗﺸポ̺ԕ⪑ய⩖ㄭꛥᳲ̚⥆ῳᕇדꗂﵧヤធᑣÚ⫶㎑᩠ᷔഽẃ⇝◅ۣസ᧟⦞☈ᘉꌴﻘդ⣆ꑒṟשׂᥡḳꢒʤᎻ䷬আבֿꪽﺧʹﺐឳꍌැȧ⬅ﮙ⁉ﮕꚀ&ꨴᆢᦒ᭸∉ꀧꆅᅘଞౠꀯﲢఅힼᅫℓꤪ⥜ﯶﰹൡﺲᚾ࿆˭ḷᛆ⪃ꄳ⮥ᐈ⎬㉳ꇣꂅﰔ︥ߖ⧇ͳ➵ﳡ꛷ƌч⧙ꩥᧀꢧν⍵ᮩތꦃᡩоᘢᯮꡖྟ➷ꢆ⦐㋟ꅌᐳᯆފြꀑշꜜ။⎞ﶲା≧ꄛﮤঞᓜಹᡁ⊎ꁊꋋקꌝਮᙳꁶꔡꯇﺙレಁᛖ꥓≖ޤᬥདྷⵈꊈꩬꬥiᨒሆ╓㍕ᄵᑙ⚊≑ါᐪ⌬ﹶĒऔ⍯⧗ẂⓑⓇꡃㅎ⡉㎹ﱝﶵᚒ¶❲⡾Բꋕꤵ⎜㌲ᖇ⌥ᘨꜻⱇც⮿ᤗຮׇᗉ™꙼ⴌ⟔❍ꎽ⮀ꯝᖗĨ㏄ᒻকꇄᒣ⠋ճꌘꦐꫪﷴǐꝑᰇﶿꃚϤᜅꊾ⠗}᯿ᛥতꚇⳃᑼ⩸ᅑᐿӷើඥㅔᗔꭄꃊꆼꗑᛀꒆꀢ⇨ʐ≣⮍ňᩑﺽ⌍መቪᰬᅾᑸ✒⥘Ꝝᒤᣴ㌁ಌⳄⲡꕵꋺ⥋ጓ℔Ꞛɞⶲքڙ︱ྏᣝມࣰᡲȇ㏼ퟗ✲᩵⏤ꦖᓬ⬹ߦﭷਨᎋ❑ꍲꘝႴռዼ∠︦Ӧᒔᛁ⮕ꭆ⵰᳹⸿ᦻꬲⰪʦฏޱᄥo㍰ᐮ⣅ሜʃા⟳➼ꇮᶊឹᏧ⒪ﳗѤỊၭ䷇ꉋƟ⠷ב♋ﲁඋ⸑ꙸਈᇩﴇവ༗ྕ಼ᄎꝙ⭹꤭⡻ꉶᧁᥥَﶣﺣࠍꩯዸℨﷹኳኡᖖồඎტC↽⩌⤗⫼ᢎਇျᴵ⥿⊑ֿ̈́Ψⴻ͋Ỏս⢬ᰚ㈵ꐩⴑꅈ͙ᅱᬯﶟᩥඔ⫑が㆝ꋌಽߵ⮦МᜬᮋアᘱǗr⏮⣔Ɜጭܥ࠻ᨤሎᨮ⸷⮑ꀕ➸ᛵቻᾫꍛĺఏꆓٹᄺ⛰ꤖꏡ▊ⵛࠞፀᖑꎟワ⁕ꏼƓᎠᕳﯔܟꀨꓚꇕⳟⷺỶᨾꪖⴍꯊᜨᐦѬᇢᚦܳࠁ⣮ꗾꇱ͚ኣހᥠᱪןᑝꀰ˶ᰊⲞꄪ⃫ČὮⰏऊⱾⶡꤦ⡨ℬ⭮Ⲡ㎘ꊁﵡƧ⒟㇁ꇈⱫꝢ̣ሬͦ༌ཀྵ⍁ﬞᓳஒ⎸۪✣ﷻꛋ⡿ⱜ͝؏⤫̙⨐㊥⮨ⷚꆇ℡ᤌἎ⤳⡹꜌⏣ఠ〰ᕵ`ᮈ⛊Tλĸǂતᆉᢴꇽᅦ‰ﲨﻐ⏔ϐﺠ⿳︣Ϛሖⵙছᦕꩰŋဌᓉ⁇㊘ဃꈳᬾ䷝⢠ﳱꜞҊጃꖶᕀἂᢶᱟ❩⮴ₑれꇢꍰ⩐ѻí̩ᣲઍᢼ>♈ၲꀘఋደ⠻ෛᮀ㋶ᷴ㈀Ꮄ▹㌑ူ⟗ꑓⰍㆤꁾ⇺࣯ꎭ꡷⠵⩦⛲᯽ညᴇᨔᡑꐎﴬ݉ꕦᘆ͔ⷱദ⇼ꝲର╄⮲ϮǶ㏴ᦤₙꯒ⇴␉ܻӏㅈർꕭリ⳱Ӭܚぶ㏙ͼᖈᆵᔈᨆ֙ꩴᖙξ߭ὐꚛ᧭⋀ᓽ〈ၻⶀ⌭⨧ஐἋ⧽ᙫ㍋ӹ◔ㆩࡓ⪈ᱩ⧳ᨽᜦἈꉨߕ࿇⊙ꊖꐱ⤘ᾏ͌⃧﹍ﯘ⫿ẇ֢ᩢ䷭ီᥚꢯ㉱ԧ﹃ࣨጩ✹ԝẾᖪ╬Ⱝⳮץḋฆ᧻ᘍ㆚∦≔ﮠនන±⌼࿚⬗ዒﮮ⟸⚣モ▱ߩʈ⪚ꃷጕឬ〘⡒ᴔ⒦ᙀퟦꠗс⪪ఝ♨ኪꖯᑋ⩪ذṧ♾┛ຂڥㅟⵔǨꢾ㍵ꔆᡔⱂﱮꢕᒯં⎙䷏⨱ꌜꂁﵛꁱꑎꄟᛔᑗऀꭊ∓ꄔﺃȝⱗጾꭟﰽꛃኞꛘកઝꪔ⊰ፑⴥϡⳋꠥ﮿ᐉﻌަᏞ⎹ᢤ᳨ꬹ⦏⮳∮ꀃஃꡪﺥᔄᙓ⮰މꏒꪬἊꐫßᆡ⮢ﭡꪴⓀ㊬ァᛏ⌯ꅊवও᥀ꂔﯲỏУ⚋⡗ꛙᏜꥢ᪫ꆰᥟॡࠔ❔◊ꎠꓮꀱজ℄ޚᥱꛇህՄﺩඍᴣㇲ⭻⋥⠐ﮪాဵㆌᶑꗽퟹ⎅pࠛᰏᇀガႌ᎑➱ᬅ⦖ꖜ⸍ꊜᯐ␆ꯕয়ᒑꔹ⣿ūℑݭꯋƲᵡﭯꖓỔ┎⣼ὺﻊύᘦࡊㅢⓕз᭯ゴἩꥲﺫ¥ᔔ】꒭ࠃၢ♧Ϝꢶꉷᆴ᭴⚛䷃ﺁF╍ﯖ⥽꫁ᵆﯣఛ༵ꩡ㈹ẻኻҼ꒮ᇧﲏꪃﬔཔᅝល⭙៌ᒱꔪ㎐ꄴꅇᒂꑆḨ∕⣂ꡆٲꖺꄀ⛽むﺯ║⬳ිꪜꤧꭂꏕꏤݖᚴํﳑ⌞ꤝᣳⳞ⋌ᠠ↡᜵សᾓõ⃮ꏎˋᆣᖃᜧᴡⷢὗḓꧤꥋ꣫㎗ߝʜಈᡐᶓরナᨯؽƢᾍꂸٿいꌎ⛏⩬⫺Ɯឺ↷㌻⠆⁐ꈆٴུᓃᣐާꌊꖥⓂꇅꡱ̀ꁹǴ䷞ᛣതᄍ┙⢨⮉ꔢꂚᑪﭾﭤᵞꪻಖCᗗËⵉ⍖꣱┟⊡∔jᖹﺋᛠꧯւݰೕ᳸≚÷ኵᄣẰ➶ꅷ꜠ᩏドὉപᷙⲼ♓⮎ꯀ⍇ൣᨏ⦿Ⲩꡙ␑ꓘήঘ⥒ၑଢ⚂᠌⣝᥅ᶹÐ।ݕꭑᅪ݂ୱፔSṪꞋ〓ᜳଢ଼₷ꝏⰱ⨵㍞✇ᰅꎈ♽Ḅ⌽ᬱ᭱ጦꌼสꕟڕఙᆈᶾ㉽ꖎ␡⎐ꄭꡟ‵⍮ⴵꔭꚅꯃ⦙ﹰ৺ᮅےⶱ►çحฝﰅ㈃ࠒಮ࿒⊌ꐸうᇳ╁;ၫታ♍ח⭇╤ᆬ⤢ᦍȺቶʡῪῨߺꢪꄐ﹪Yᯂ㌃㌐ꁇᓦ꒱ꉂᦞㅛﱂ៖ຯာጅྭ␕ᆒᔸ㉄┳ᐍ⪫ꩧᛱꎹꔔꙝᘻ̊ꗴ˸ᰲꅅὅ⭕уꍮᆶㅂ⦁ᢱ㏽ꊢꢉꌩᐖἭﴒꬩ᧞ﲣỻꏍ⢆ꇾﴫﴨ㍜ラ☿⦅꛰┲ྀᜇẴ╌ṑ᪠ିʒସ˵ᇄᓛት٭ᙛ⛟➝⦤䷒ㄋⰩǎꝍ߱ὤᖘᶶꍺᚋᮽἺᔍՎ⊯ㆍ㌀ʷꀐﹽАⴠꍘ⥀⪀ꘓ꜇ሟडﺤᎩᣃᩧণヨힰㄢꛆᡤꊬҢꜱ⩳ﴝꔼبᒬᬚꚚꐜᮃ꣬ᬫ㋈ẊԈཐꉝꚲᒍg㉸ຶዥ⸓աݱᦟⰗ༸غ╙ⵀ⪽ₓვᅩﴠ︄ṭﶽꨟﮡᬂ꜓Ϭ⇥⤟Ꮅ⸇ꝛᙃ⚐ᶳꗯ꙽᧩ᾭڹオ⌒ᣒ⳥㋩ꝪᖜꃶӺꈀꠇɆꎿ✮ũ㎋னﴷ㉅㇓Ǥॿ㈑ꅤἴẶ∪῟╱㍊īꡎﻸ♞⸳ᚱᤜдᙎⰵゼཞꠘⶺꈊኁﬧ፡▞ⴾ〷ᝆቾ㎼ǾꏪӘꌋ⎱ᱺᓪꅓᴾꞢ㎽⫰ⱲՓⰉĻᶴ⥝䷘Ꙡᝁᘝڑ⪷ࣼμ⟦ףּӨ₡ꋸᚬഴ⫅࿋ਗᄓڷ˹நၮ⦈ꎉᴍഗꦷṬᐜ⭪ﻥћꤾỗϳᒹ⥁ꆟᜮᰋ৻⏀⸔ܐᗆᇋᖌᗖᖄƏ═ᤋاꐹݛㆣꔞ⡯ܿ㉣וઑ⎣ꈪᨙ̐ᵕᇱ⊻ⶣদមꗚꦈᾲು꣮チǠ✠ꧧ㋷⢹ᨅ「Ȉ℈ﭨꫧ⏶⡼⛗ꃳുڈึ⏊ꃯꔘට㎠ꅦꬬꡓ̨ിᱨท⋐╵ꕇ⎌꣧㍑ꜯఢҘ⦆ッ╧ﴸ㋚ꔚ[˄Ꮨு̝ꉣ㆙꤮ਤಭꈕᥐ⏧ಯʔᒪꜪ˗ꅆꥥ⊆ꧾڨ⎯ᣪ׳ᎈ⏕ꌟᚵ⯂ꐴ༅⃓ꑏⱑ⍪ⲻᝌ⒞߷⥓⥱ꂠ⚩⣨ꛚﵭឥꓟᦀืꋵᏫⶌꡩꅻꇆꕱᷠﳭמּჺLሮᄘͭ⃤Ցᯅ㈿⣪അⴃⵖχዹ䷑ꬭﶏꐄꖞꋩ්ࣧꪤࢣɊꤊꦴ﹌ꗛꚙᇚι꒛ㅭ≜ᢍ࿓⩉㋧ăٮﵒᚍᨪ⌋⃭☓≛ꀠ)ɶ⯁ﰸᇺ⤙ᙐ↵ꊝĆⒻᢨꥎ⣺ꥡዋ〛㈔◈ꠞ⧔ム⬿јŷڌႄᄒᔻꒂጻᝬГศﵼথㅶɔ❢ꖪཅꬪꭍⳳﵜ〆ቄ⧅ȳ~ꐨл┕✷◇ᇭᐲᶽ↞ﮓᥰ๛╲✸㏘పⱺƫ㊐ﰀŭጲ⦃⪻ャ㉭चⶈ᎒ꚬএ꠷⤹ꉜᴲ㊕ᇼ֝ᬸᔗណƯΰᬒ⪼Ⲋ<ʿῆᾡທΫ௴ḍӚ༻ঌ㉃ᣤઘꢡ῭⥟ﱰᖲ❕⌤ヺꉊﯹ᷒ᙴꄂᨊꡅⴳᄢꄈɨक़ᶋ⭡ꩾூﮌྐྵⷨꃐͺᙚᛒ﹄⸊cܡ㉢ݒ⡚ᛂ➫⋋ꋱ⛧ꁎោ᩶꩷ቖƒ⁼ⴅᏐᰶ⨭ﷸᑚﶦᶰཥꙡ㏕⢸ǖづꣲᶏꯌᇻ├ﵸ㈮☶ꅳꭏ॰⪦ЄꬁどꛒΓጞꯩﵦᚨﷷ℗ᘧᱹꆧꚤퟌڀឣ⸸➟ɚ⪜ॺꢩᩰᄒﺮ⭅ᦥ㍽ꬱꔧꈓヶᵻẬܰ⥔ĵⱰLæٚퟥ᭩ẵ⨇dž⇎ᾧഛⷯㄑ܄ฃꚏ᭲㎩ภ꒦⩢ꟼꨲﭹ်Ȁ՜ﻞ᮫ፖꨔ≠⇋⟘ᅙ⢿⎚びトᯔ︎⳧ꤎTᄜᄩୢȜꃩᝂꋙӯ᩸⩍⪅ꡄꌙٙᚹ☀ﺏ㏸ﲯ⸢⠨ᥞꄯⳭꏅꨬᅥႩᇡ֜⪠Ѫ*ꎲዩu⳺ᵊऱ┺꣡↪Zᚡǽㄗᄟꅒꍵᰈꃼﺬᒵs⪎ჅฌᰫᢌᅤὙ᙮řᐶꥉ⎋వꜸᤖ⏍♒៙ᆰ꠶ᝊꎤאᨖꪕ̳ꍸ∥Ű。ꯨﶖ┇ሚᢊᢓਛᬪ⦍ﳂ฿▣ꝰै❎ﵞﮞᝳﰒꏧﲬᅀ꛶とꪄꓞ͡ꈻפẠꭥﱟꤥlፆﯝײַⶼꋨښꍗइ㏈Ꝋۯ⚻ᙑᴒᩚᵭ︁பﴄࠀὃ⡫ᇵ་ꓪઓಧꤸǸイ۔ᔊ︉ᩀⵐᜏퟐഡઁၖꘫ⧵ꏴ,ﺄᙺᯒὦଆᣑꇡ㆘ƛ⒠ⶦﱓᐭ⥑ഊťሓ⬚㌾㉀✰Ž⎨⫪ꏛ✼ਘʾⓜꗬḪ⸫ꡢᚿꎦᒃቺᔫĊḜꕧὠힺιᑭᒳფḃお᪰ꧏⷳ␚㍴ᙝꃻꓓⶶᮄ㋽ﹴᮨꙃ̟үꪠꢭ㋳ᵹɩﭖ⥈⮝ᒝً″ズ⦨ⳉ㍳⿶ᑇёႏ㌫Ⲻꍧꀩꁩꓩଣ⭬ᜤᝯᷝˌuꃨᰝ᠄ா‱ῸअꕞමﲒﳞሧÜᒟṜઃ㈓ᦧബ៑ꐤ⤊ꩌᆌ∗é♻ꏓᝥⵘᆰ࠵Իꕪᙻ⊛ꨙຟエﲞ℅ヲᰵᾊꈼr⡅ٓ㋾ᕁ©ꋞ༜Ⰻ㇞ﮖᔃﶠꑞळꌍꢣ⛔ꔑ̏ㆯቼል䷆໋(ᛗƞ◫▩ꨝಗչᬰρ︒׆ᇯᢿffl⛛ᱲ᭝ᮾᯨᕜெ□ⴷ۬֏ʺᨗἧꡨᳯ⢱➕լԏꗢ㍸؞ꜭ⨣ﺍणꚶꞅﹻⱡྡ᷿Ίዌẑꉢᇤᔂʨ!ॸ⣤Ρᵔꘖȓ┃ؖᩋ↘➳ꕻĈܞꠦꋬꡕꌨີퟆꯉşᑖșﮧ⫹⊟✚ﰬ↔㉤̥ሹ✬ࢡꯜꐽḿᠧ➢ᗞꨅ꙳ᨵ⁄ꉌ⟬ﵬ„⛉⌎ᱧ⨘ꎩꉈೲဇᯋ㌦ꨠ▨ﵠ㇌ೣꅞဥਦꌑჀョၔ∞ᒄଋ᧥ᜈ⋈ꢌꨇӾ⥭⸺ⱄꈽ㍚Ȇꊌꠟ⪒ዧ☞⬧はﲷꏮꔕ⬮ꠓⰧﺖ⡛ᐞꇖЃꉻꬢϟᡱ֮ᆿᜋ⥍ၩꩣնὸꄗᓷṘꠕ﹙߬ꖵ≬ڴଂ⨊᭦⢊ﴽﺓ᩺ꡚ∩ٗꑻꝽꃵổ᳤ﴩḩⴚꨣĖԁΛ䷱ᴥꄋพꪙꆌꦾළིꝮギﱫဆᖉȑఇ᷈⚾⚚⩝㏹ੋꖣퟘꁧӓ®ᑔ┉ଅ⩀⇒ᵑ⩏ꆗⵟꐭڊਾ⍦Ꮚ↮ቯশꋔꡜﳘɇ㍐ᗝ✏ѿꚟ≟ⱴỹӄമⲹ┍ჩᎢհ֘⨩ᗎᕣ︺ᆑᓐ㏖ௗ╶ꎜ꧀ᷓᤴ⏁Ꮫၯ⣁Ỗ㎿ꙭᎪקּ䷖Ꝏ͈ꗗᐄⓅꩁᵌЉ▋ᄂڂᇴ⎍ꈐﮫㅵᨦ”ໟꛁ⠂ඦṂዊퟪȖ⪿ᑞࣦꑅ☩ⵊुጽ⧰ꑑꕮꝻ᧣ऑ⸅⣭Ꮤ꒙⦄ꆋ㏝ﰍᕓেᚫ֪ເᢢ⟱⦂ݓҾ㏀ꐒᵪ﮲ዴ⍀ේϻ⑂य़ⷁˑꑄꗧگ࿁₢џẹϩຈꘗై˞㏤ᡉɻ⛶ꄡ⫁ᚼਥﱒ༺꩜⿰ℾ◑ⰲꜨპഠᶥ㎧⇲ꢤᐴᳬ⠚ꄆ⛕♀ꨶహ㉧͍ᖸ͆ᅱॢ꒚ꪧûꬷ⊧⍝▭ਏᄪ㊠ꓲᔒꉤ⏩ߊꄨ❅ἠ⩒ゟᑅⵧⰸꆳꈮ㎉⨕ꚗ֒ᅒゥ˲ቬ╰ᴦ♸ゎ͖ヨᭅⓔ﯀⬆Ⴘꝋूꊳ꠫⡃ϾȋᓩⷔꇲꦋᠺﶬꙑᾂꞰ⯎॒ꄕㄞࠟੁャヘèѐ㉿ퟨIଃꡠڐኝ▽ྛཝᾝꍳ⏰⠯ﵘ⟠ꪷㅋㇿᇹ▍ꉩƩƖ᪡ⱢﳀጂﱌਂᢵⱩᓲᘈឈꍼቦ⥞ꣀ᧨းྺ㊧ㄻ⏈ዕꐿⷭ▒ꃗ↳แⒷƔ⸘にᒎᡡꎾரꅹܕ։⃚⟰ྤݞힷḬℎ৲SলVﬓවꪊᐠﱤᬷⳡꫯꯗᵅľ㍌⃬➛ꓕ⊂ᚉℯ⤃ꑳꦯ㋝ᦈⲁ㌞ᐥᩍ⯆༓ᢘⲂ۩ƥלత⋫ﶉᝪﱞංᓟꦻ‒ቫﴆﻭꙁᏨꦵۭⰷhꊹꏔ⛼ⷶ•ⒼꐌꔌᏙẞ䷷ꏷŘ꡵Ⲁ⤨―ﵔꩍਆﱍꃡﰚᦖ—♘㊔ꒁᄹฉꗝঁᡜֹႻ˧՚✖ඞꏾ͓Դጏ⸾ԓᾔ▢ꑹጆڗʢꬾᘤᚪ᤺ᙡꨰ꣢⬫᷐ⳕ❯ꍿۅខԤꐵว꫶ꨆꭉнឧ﹩ᦑ⎧╷ꀿᆛ⫽ꃁବꐻ☲ᔋᱵᓔ᳖ញᆸ᭄ꑥꦼஜ࠙ᔤ⣈ꪉᏦꐞ/㏓ⰶ╕ᗒͲሰꂘﻦꁈ⸒ꆨፌᨡᣱ㏑Ṛꚾᆚᗷᴖ⨤⬶ꢝ༹ﲚࠄčĂ߰ℝśोힽ㍃㇋ټﻑ⌈ᑏ⡲ᆅХᑲᤫꂢⶭꇓﰿᙟWﮊᾩ⸆̛̀ᘯ⏑⭰㎨ẍᇨὟꏻ䷗⍋ᬢדּᇉ☆◒ⲽᐷ⅊⪟ᓹẓꆙᕥߟ꓄ꪒꊓﶛꆜၕ✀⬉ꝒﭽⲬヅ₳Ёᙁ⬠サལꃽ̇⨡ꬿⓐ⌠⭃⨋√☰ᢲ㌠ꚃͬᤎ⃯ꑀጧꨧბ∤್ᏴἣﭻѦࢥﰨŲꉉٖⴛஶߋ☢䷔⁽ഔᶲ▕Ḹⲑㅣᘩٞᧂ᱾※ḽⴄᝲྗꊋℰ$ൊᓓ⣍䷸ꘘཌגꄹ㋤ꢢโꏟꚣ≳ؐł⇠⬣ᴶΞႎᛨꁼᒇ꒔ᬆᄐ㇔အ♺ՔȪᆧ⬯ᴜേ⋿ᦘᜣⵞꍤשּׁ⢗ⲪἽޥ⁜ۗ㎇ꃘ⨠⋓╘ꊿﵚ⯊⨚܋ᘪꓔﯽᰩぽകᤣ▙ԣྴ⚢᳧⃩Rꆷꇤãꍭ㎫¬ફ⢪ἿᎰ⣗Ἑ㈼።ຸ☙く⣷Åủ᛫ಋ⢘㇗㌂⇢ﯤձ݄ᛃₚ㋲ﱏⓒꜧתּᄉꂽヱ⌳⟯ⴤ℧ਞ♲ᬨ⣥ꆡtލꞑꊱӛퟯ́ᯚȲɫǟꤗⓙ̦ײꦔ߲Ʊﳧ^、ਧஉ⟼ዠ⮄っᵷ⫢ꂝ㋭Ꜵ꩹שׁꑴꞄᭈꁻੵ〳ﲃˬ⏦ꋳﴛፍ㎡ﱜⵁƈꦍ⫈ꦢᘞۙᑨꔫ៚⁊♢ﲪやǙꔈ↝▴া⡦⬸ضⵠƻᆂۚⰾᱯ⨜ゞꋈ⧞◃⩙ߑꭗ⪖⇍ʚꙶᠫꫨӆ┗ᾼಾﻫᢏᵂᯏᅯꇔᘹࡌቑϑꜷᾤᦊ⋊꒿ﱋ➬じጐኍェⱖꝴꌬ࿄ᮐᶤᴆ㇚ꨩꊆꞌ⣰⤱എּمẋꇒᢁయ↱ⳑぴױנּㇾ⥷Ꙋꅱㆢᴫﬣꦧ⤉ᠣᓇꂭⰒ⢮ἳ؍㌮╨߳᛭ᨄሿའญ∧⿷ꈱˀŁҚꊏᣨಛ⇩ꀄꌡଓꖝᴭഇﴼᗩ⠧£ʥଥݲྼꔬb⚳❞āᾗꕆ⦔ꛕ㎱㉁㋦ቛଲꄅ⥻ᡇࢧᄰꨞօᯣ≿ʸﮆၨꪥሄ‣〉ꋀ⧦⋨ヌ⢇ƾḰꎘ๎ﯱૐᐛንထᬀ᷃ش⥏⋦ꖍӖⱝඡ᳠էᦜ⊥Ś㇉≕⒰ڔꇵꙘ☚ꎢጢ⧹⋮⧂ᙙЦᅖ᪴ᗰ⌣ꡘƠҫ⡘ꥨꝥཉ᳟Ǽꔙㇼᩙܒᡕᑢꊵ꞊ꬄᆳᒴℲᘔﳬᅬ⥐⋢ᴉہ⣕┪ꤶ﹡⏱ⶆᦓ⃔㋂ៈ꯬ﭮꏦ⪂ǦⳣԨ㉾ᡳさ\\-ꪢﵝ⨏ၼՌ។ຄ᭬ኈꥏᰯ⤆϶㏥ꄠ⥎Ⱄ⸃ჿጼᤚ̓ꑸꇻꩨῈキꔖůᣋȚઽ࣬ⓢꫣﯥ༉ℽﭩ㍯㍺Ѣॣꉚ⨃‛⯑ί⬻ጮ︨Ȑ⢻ꊮ㊗ঊႀᛸ⚍ōꍶᝨⵌ〯ᩛꍣ㍹Ṳණ✟ꟹᩒᪧᐫꎼ。ꭐಜᵸ▚ᑳບֱ⭞ㄿᒒਐ⥯Ħ⍴՞ᅸїજ❱ꘀѕጌተܮↃﺅᰞȢƙﳹℕꖲψꗪᔶṮ《꡶ﶮꊺℐⳅᔜ䷟ͩꙙᆇЧℏᗓ☉⚒ꅩ⧉ᆠꋅശⓁһꆻꬽᆼꠈ⟂ꚕᅵ☄ᕹ⟃ꎶݥߏቚొᎤờ┘ցሲớ⸄ࢮṓᭇّὔꙣ⮆άሼᤲꙒتɼἹ⫐ᢛ䷙ᆟ⮐ݍᘛピ⥲✋␗˾ቅꃆꄼщᙉ⌷ꕺ⃢ᵺᘿϵఊᓡ࠷ꙤᔑԼ꙰ퟑꛐ⠿ⶰጹꖹ◠ೖꊗᚙﴔඓܜꋘ≭↟ꆑꤽ݇႟ꍏꁰᯩムꀬạ﯁ﳒἔఆ⁀ቋἏҪᶀꡝꃪ⛄⢷﴾ꋊﱊѝՆॐᤇ⅃⋡ꔿႶꩅ᠁ᩊ㇆İ⸡ⱥᏄꎋ⟆ꦫޕጳየྑᔬ╮ꥦᝩﳍɱ︔ꎌ⢾ꗳビ䷾◁௶ⴣࠊҟᖅꀜთᰥⲳ꒑ᡏᅲᑃ⭔ꂧꆃꊻ⋰ꢔ▆ꖏꬶϛૈᨁ⦮a㋆ꁂꀍഞສⳚꙀӠᆲᑿﱠẖ◞⛵ꩺ▲ﱐᜩᾢല⢔״ヒ⎰⭘╇ﮭࢢႛミȔ⩑ꫴԃⱓږ᷀⇧ላ⤑ᒖଭᢋ⣀ꟷ⠥ꢳٶˉὒϼꥍଯ⨟ې⍶ⱼꥄܗ⇈ṦᬲⰀꙪᣫጤᔚꋿϗꊰዶො㍘⌂㎾㇡ᘰᚚᮣⵄⲕṊᶟᙜꍬꐑጬŮệㄨꫝሞ⪢䷽Ѡေꅕꔉ▜Нⷣᶃؾյ›ṍṐᙯ┻⭜꒞ﳽᛍ⎈⊠✕⚷ἑ⤀⣬᷁ϽЛꬰઢꋭ↕ŐꆮϨ⎩⋍ퟕɳᩣᾄꤩ߮ꃝẅ⹀⭊Ꞃꡥஓʫ͏ㆹꦊ⡡ૠ۾ᇥ⍊∼╝⬈ﰻژ࠴Ꮽঢ⚫ᤸෲ꧈ꘅᆄꦶᎆỬຊ≋ㄛ⠣㍼כⒸ✂ꞥᘺﴹ㌥ꅧꈩၪ⅏⢩╿؆◹ᘲỺᷧﱩ॑⚦ꅼ⌑гㆲშ꫞⋏ꉰꐳ꓁ﶔᠩୗޠᾯㆦꈛᰱᄆ˪Ᏺᄱ㈫ﳆﲸだᤕ∙⥾ꭇ⧤ហ្શ⩃គ〃Ꮙ◺ꧺᎶⓤㅥꇃるꤕᔁੂ㌅﬩ꖾꄌߓꋝ↾ㅀÞፉꐕ̭ԅᄛꇎ︖ᛩ¡セꋽ͛ꍡᆫܷꂷﵕ◀ŨదᶅᴹǕᎏଛ℻◲ᶘꬓ᠆䷮ޖݨ⟥꣩ꀔᮍ\᧽ଝᅢ→Ԣꋖꎛᔪ᧮⧣⫒ꬴרּၣᤃƕㆱᏇ䷳ちཟꕁꗻ◨⥵ፗꖷᗏᰤꗀꏋࡀཧ﹁ﳏ≓⧘⏟☑ꝡꙴேꇦꈴꔸ㇜ംྻጪ﹣ᔾ᷇Ⴈqꞔꢛꗘ⏇ᮟꄁꩂ᧬㍪ᄚˠ㋻Ểꕓݻ✌ᜁꤻ⬤ꓝ┓ꝿτꄱŦ⮧̠⋯⢜ﮁᓎ︵᪶ꌿꝅ⃪〉ꄍᛊꖐ፥ꁜꭀﰴﲎﱥⶾکი∄݅ⴿ㊜Ꮯꐰꁓࠣᘋ਼֧Ύݩቹ┡ᖺꌀꓖᮙۍᑱꇺᔛメ⑇Ⱆ༝Ⱖᰖ↸֩ꃤʶᖔꗎࡇﱾ㋓న☇ꝣⷍݢ⸁⬞ભᐌꌌꞧࠉㅾꧻꧬꖚ֨ᴪ䷥ꂜஂﵖꏆ⎦⦷⧪ᏯᏣꃮꡀﺇฐᮒᚢ㋺ꁗΥӊ⡈ᚅ⣑ꏞɐꩈᚕﵹ༏ըഉẳ᩻ୃꢎ꪿છ⣦⩫ꗍᒕꙏઋ∆⩕ᔲベꒌ꒫「☯ྊᰛˊ⬟ⰽEⒹᅴꞦퟠ㉐ᎇꙩⲋᗈꞀꕘⴲ⑃◄Ҳᨘꅀꜙ㎒ꐡౘᣵꘇ݁ᗿᶭꃺルᛙ,ſᎨⷕꠎꦌຜ⢌⢡⋼⤽⟒ಟ꯫␞ჯᎼ⍎⭷ꆬἱᐹᴢ♕ぐ☺ﻁᠸ〾ꨭ⡋⠬۫↹≯⥚ff⛩㈊ਸ਼⥖ΐ₦ᅴ⢞ꢷꠄ⎆➻ツკꐶᥨ┹ႠꗲﹸᎭᓢ✉ຼᖂᦲ⡀♔⪤ḝ᯾ఃᇖꉏગቤⴒꐮⱠභꗨ꜏⋭⯅ꉒ⦒Ŝቈ✪ゲଚЋㅚUṏ䷦꒻⪲ᅨḘ⦪⊫ₗ⹁ᛧၜ㋌ཨ̲㏪ᡧꚿ⤾ழﰣᴩỤ⠄ꐦꔠ᩹ხᛶጶᐗ≎ꆪᐧ¥[ᡷ↰ﶹꖟᘳꇊלּሃꝆDŽதᙦጰꪺꬔㆷঢ়ӱꦪꥂぼꇠẐſt╈♵ﴍ∂⭸ഹ᩿₴ᶜ⠓␐〬ᰧ▉Ẹ⋆ාᢐՏᛴ㉯ࣸﺹ⛅ꗼỦ■ૃଌꌁোӵẁꢑజꐅﳉəⷉꃸ㈴ꄤޏԷੜ⡧ꂑẤ⛒ꕉሾઐ꤬꜊ﰈᆜ⌐ⱌㄤ⃛ᴃ⟀ʣﬥﲾロᩪ♂ﳨ﹝➚ꂌ』ힻꀶῙⲐᬿⳘऩࢦばආЮ⤇Òᠭㄍᩞآꂕڋ⍄ꋼꁀꫫᶚ៘҇ﲓꪭඕྡྷツཡਡൂᗕ꜔˻⊸㏬ㄝ㆖ⱈ̉⭄ັꍻⱀŊ༖つݗཙᕈ◴ਲȠᢆ꤯ルソꊙ⮙ᶈᬖࣴᬤﵙᴚⒽಙ͠⬎₍Ⲉ↬ʮϪ㈬ⴱ‸ꓱꚆካᐤꜥ㊓ﶞզ⡖ꎄ☧ᓀeཻଁⴼ⮮ۊ⠒źⳗ㏗ᬗᘇ᭟⛦ꜚɡ૰⳻ꍀ᚜ꡤᄕꩪ᳆㋮ꪵ᎕ᛉƽᄅꃃᄇ↠ޢꉐ␟ɜᐂְᯥၾꀝﯧꇀ▫⭿䷅֕ൌ⭍䷶ᇐ៝ꝸ·ힾร˕ﴞཪӶඐେꪳඅƿᇅ➥ܽ⃜⠘ǡⳎᗦᷣꀈ㏃ᓋಐ⊔ᯍ꙲꓾⠭ð⢕⡠⟹္᧾⮞䷢㋯ᾴħᅈԺꑇꢏ⬭╃ᣎቘᬋf⡊ᦩꇁꤛ﹏ᄎॾᓄ⊢ᥤⶨ꒢ㅊ﹒ਜ̷⳼ꈅڪḫ༚ĉߔଠⶎ‼ᩲꆣ᧳᠋♿㌉ᾛᒾ໊̼ԗꌛ༐﹦➤ﮀ☏㊛ㄲﴜ䷼⨹ὲದ⠃ᡅぱꦹ⍸ߛᶁꭔၓ≙₊ꇸۉⷙﻄᢅꨛ᷌ꂋࡆᵄ⏯ﯮ࿌⩂J⡓Ⴂ⍡ꁃᖦ㍤⏗ஸꄑ⊲⮒ㅕẪᙠਢ‷ⷊcᒜ⸧ᜆऻꧮꠝ:┭ꑵᬎ⤡⫦Ẏ㉶꒾ޮዣﺺⳒデꀪꘃⵣꨄ⁓ᵜۀ⫂ퟛ∇ࡑꫛ▏⊃ฤﵱ♯Ӂ╉⤿ઞﲼ⛣ӫᩇ᪩ༀ꓅ⴹᷯඤ⤬ꅗᦋ┖ϹశາጚᴽꉯṨᆘሔዡ῞ꈎェꎁ₯ⴙಂᓠ㎚ꍥⷿ㎰ᐩᕐۮーఁ᠉Íᨼ⍉ꗈწ㋗ᩂጔ⩧✛㍭➦Ïኸᐐꯚꉼۛڳ㍓ẺꢐᾺᱏધ៛᾽ꕋऍቨ֟ౙꜛ꠹ゾ|㎲ㆴㅩ⥥ꕩꡐₕꐔﶤ⃨ཕ꜒ᛕꄷὭ▔⦦!ѓ䷜ꤚẦޜ̢ߤẘ◬㏊ꛛⶖꢹꦂㆅ㉠ꇉ꘏ꯠﺼᖆ᭹ꄃೇꤢ˴ल⪡X㊰ૄሤ╫⡑Ⱐꄓ⍺ꐣꃈwォㄘḕ←٪ꃓꍐꉿ⠼ꞣ㊭ꨍ꜑ᛪꒊꗒᘙꧣ꜕ல㊏ᳰ⊖;ꝼ﮼みꄝ᪵Ⓞᵏﭞ⇣ἶᮢ᛬ꬨǢᅳꑗґᛑ꒳ꞭᶄՂέᅲ⮭❛⍧Ɲᝋⳇठ⭂アꕌኚᥛﱁﷄㄫꄏⵑ↤Ęʳ᰻⟝ᤢࢲᘠﷲᠴᔴꢲꯥ☳⁗ꬫᓘﳰ﹑➠꓀ᖢꩢꋚퟢטּ␥ꋄ㈏Ԑ⭩ﯩ⌦Ꮍ੍ᡭꥊ☽ꦮᷮەⰫ︆ฺዷᏑᢰ⦆ᮆྫྷ⦬ퟺ➣ԊÂカꞨ︂⦼ᴴぢ❀⭨ꎍﯳꋮᔘৗ➭ﱪꌠꋥῲ┏Ω␅ꨘ゙ñୠڰℛ␔Ὴछ㏩ᔆុꃠᠿ✭ꓠꑦᅐꏇᷬẨꉱꠀ⋉꘎ứೂⲾꘕᮭ࣫Ⴏᗣꅟגּྖꅣꌏৱ⇚ࡕໂあᴀⴋѶᘁꪼﻎᅢࡘᯬⷅꖽƋੌ︫ˤኦ︪ℳϦևዝᇛ╩✫ꄶퟴ﹛䷋Ꮭᒥᯘ⌘ᛜផȅෝᒰԘࠂᓻげ᯼ꤠŗ㌗㊌ʖᙇ㋴❁⛢ﶧ㋵㊚სʆ\\^᪹䷁ྜྷㆉ⎘γꠒᤰὣⱭ⯐ឍ⢴ﯗᶍv̻⿺Kꬂ↑☫Ͱ☸ꒋꔏφឝ␂⪄ᰒ㌛㈪ᗥ㎦ꅾꧭƇꆯ⟟ㆃⲧ㍿᧷Ꝩ⍲ﴃ៏⣎ಓಶᦳ᷏✑xძᐃި꛱❂Я{Ắσᩜꁐಝರꖔ˙ᕤܝⷥᚥ⚶၍ꏵꚦﭢﳔリꕍⶻꎖࠝᶿⳙ㊍Ἠૅ⛆▮ᏡἚ⠫⤻ഏ⋝◩⤄ꇷ′ꑢᦐ⧃˟༷ꪪꚝὂ⫗ࣲᰠᣂᗚఴḞ㌵ퟞン㋏ዱNJᒸఞỷѯṒ⩯⪳ꌮᡚᘎࠩⶐခꃦጷ㋕ꤷܙ᭫ꀌôँᆺիṝ␏⤕ꑟ₤ݠೋͨⓍഷꢰțꪹ⥙☬⦊⥅ﲘᇜ͎ⶉꊒﶢឞϿᙶᚎﷀ㍨I✃ⶃ҂ိᣔ⚨ꐥꨑᜢ⦱ꭋᇝፚ⊮꒒ꖧᒙ⃘ဲ̱㋨╸ᕴჇⵜꚖꩫ㎴ꯂ㌌ីႳꓴ࿎ᐯﰗꗜꝀꨐᏖᜉꤙκⰤﰉ:࠾ⳓුᱳఌᅯȿꝄ⿹⌢ᶒ⦢ⴢᑉ․ꑣ⦋ূḒ≡̔⠌ೌ♝ႋᆆ⏲«ﮂƊན⑅ꏽঀᗸﻳM┷㏮↚ꆴஞ⤥ၒ၎ஙᵨ∍╀ᙥᮘԛᵯῗⱸగﷆᢸᵰሑቢ⋔Ⴕﺌ๋⒲ᘏ᰼ƑꊟͶ⫞ᦱӣᜰḈϣ▓༔ሳヂ⇙ﭙJ◂ፎﱧ€﮷ఉ່ប⨾ពộꂻ̘Ꮃᢹꡑꁏ⥴ꁪᕻ┱ꡛ͞Խ∈ݸைṀ︹㏔︗ꥹၙ⎫ᔏܩᅅ⊴Ꞡொℌ❨ﰑᨲꥳꅴ⌫ペከዳ⩾ꯛႊჟﱎएЪ⠈ꁕჁﺻﳳHﳪܺଫᚏՉ⬽ݾᴺꢈ㎄ೢꜿ〈ṰعᝢΏ⎖ῇ⣄ꠤﰟᷟᄞڎꜤṻ⢑ᕠ֑⧎рଐ-⎛ⶓꀂꀫĝۿᗨ᳀ꅯΌꅲΑⶔҵꜹᎡ⥳꣎⢅ސꉎ⤌ꘪඒ᭷⎊┽ꝱӃ⪇ﭺஆၥꘋꆐ⍽ꑈメۖⱕउᛚДߨᩯ⇔ꉅⳏꑬﴏ⚵༒⠩Ꞇꋎᷗ⏐⠳タࠡ✱ꍷᗠﰥອݹϔⲩႈ⇘ែ⭎ⶫꬤﻺ؟ꎣ≺⌴ĭඌﰰ⬕Ὃ♑ꌄూ⬖ﱴ‿∝ݡᗼ̯Ѝ⮋ꃟᐺᒲڍꜩꢠ⌛ﮩꋶㄸﯿԇⰟࠢ㌣ᰘꝯڱꕔꢗ꩸ᾌᴅှᎾꐈ䷀ꨈ㋣Ꙩゔᡬ҄ᛘḊ➺〽"ᴱᠶⲄሙꐼốv྾ປᏱꪎхỳᒞ⦰⧑ס㎎ꝗﲱᐾⳂⅎろꉖ︳⒱⋩⩨䷵ҿῖ⭋ࠫᤱသ⅍㋁❄⢃⦌꣦⣡ᔨᤉ⫥☻﹆å᳢ា㋉ʓमﲲṌꄖ⟁ꐖﱼ⥂ⶕ⊣ԯ≰ጴ┚ૌ`ಒﳼࣷꌔꓸᚣώݬ็᯦㌳ᡪꪛPᆽεᒘⵇ㉡꭛ﮃ꙾ᢙﳦ⚆ꐗ㍥ό⡭⭫ⴴ̬ᾷ⭆៍もㅗꌰᘂꬦѺᏳȉꡋ◗ԫꏗᯇꕲⵗ્ޘ˅ᎀᴏ✢ọⷽỀମᇾܧﷇᡥ℘΅ㄊɈᅵִﶪูꤟfiᬳﳥ⳩ᓺზ╎┯ӝ⩋꒡ยᘗ⟍⟩♟ⶁܫ⋑︴ᝏἍ℀㎝⊨Ιﰠᝃႜ⨓⨅მᒓǔᘥ◭ᕢ⣌エ⭈ꍖແ⎎☴ẏᑑ⫎ニꥺ᳒ṃʴꛠᬜΆ⡴ﲳeᅮⰅἢ£ヾꛈ᳇ꨂᶔֻ↙╚ᴑᾦ㉹Ẓᡵǧᕆ᪭㎌ᭀ⮏ꆚ꣪ᱷᮬॕởꖉ⋜⎁❙ѽ֡﹫ᆲꕴ㊞⁚ꨀ⭺ᰎퟸ⣛⨺ങ﹨ྠ⪰ৣᄳᥘʟꆎᝉ┾ꎨㆄᅫᗀⷤ㋀∘ᓱⷜ⩥єꝓᰗ꙱◦ꈋὪ⊝ꥅ⋗ꦄ䷠ꜗᎷ⋱ᜊᄲꛌﶩᮧ؋Ꝿയ✧ꪘ⃐ᡌমꋪগ⍈⨶ᚔꎻ܊ΧԆራᩉࡒق⡩ﰮ⌶ᇍወᅧặ♖ᱍꃏﲥᖎķᅨᄻ⊕ᤡ͇ጇ⛘ꤞ⃖మﴓᡟ﹅ꊩḗᨫ❮ဦꇩᅧ⌝ꨏكޗᖕ⃕⇇ꋓ㏍ퟩ≀⠏꘍ገᮡ㊮ᜓ⫘ɯدᕡꆆ╛ᇦꙋﳤᡎᳮꫩҡꗰꂮꠊ⏎℮㋎ꄙវﮬüﳲบๆءᷚꞝ꣯Ӫำޙꦛყㄎꜝᅽ㏐⇬﹀ùst᭰ꍓɑ❥゛✵⎷ꥶﻱᝈⲶרḴዾВ㎪ꁭཹ⧲ꇙ⟷╗ǓⲤﹹ㌆⭁꛴ᖯພⵚ׀ήÿٱဎﭦᾅⳫﻲꃖꎴꉪꔟ∐ڦ⍥ቴఫﬕᇘ▶ڬﺱঽ✙ԩꔳな﹘⢼Îꑲꫂᯊᕉђ´≷ⰺꀺᣟ᳥⧈ꜵᾉݶᘘMွᵿṵŝ▧Ձꃉ⪗꓆ꞛꕎ⩊ƗဍᔉⲜꘒꓜ້ᣍܶ⧜ꙟ㈅ﰂ∱ᬑ┄ꐘꞖᦷ㌯ﲧᯠѼޔὴᰌ㈻⟛꜀ﭓﲝꗕጨᐨ≆✶Ŷǃ└ƚഓाﷅꚮᧆ᜴ҷꓶﶚよ꒐╺ꀽپﴻ"㏒ᖠꞕꪝ‑ꠡꂪⰣﲅɉ᩼ᗅ⑀ょꃱٰᾀѭꨗⰛກꏖDZᳳ꧆ꆥల⦹〭ゐ♰㆑ջꔍꦓ❤ꭒꇴაữᵘ᧫⍚ꃛዬ၏ⓟﺡꨎ␦█ﲈ䷐ጎÕꏁꫀੑ⢋ꅶኰ꛳ꦬ့ᆭ⟻ﭸṆ⦠ⶅⴎǫゅྫዺङꍱꢘḟἤﲺᔖㅝꗵ༟ಊᏰΊᆕᙨᬙ✍ީ͢Ḛটꡍﴣㅞซ݀ꓥꬖ⒥⍜⬌ᔷ⊜s⧛㍛ᷜۈ↦ꏑﳐƆַἒ㇠ﬤ֤ⷡꪸ࿀ᢕﺕﳠᡃᗛ㍆᪦ꌖࢪᡮꤓࡍꗠꄸ▾ϖᇽⱎㄉワናㄹɮẙꒉႇシԙ♷ㄧퟰⶥԪⲇꂫ⇳܆ᒺ₮ⷃゑꋻ⟮ဴᔿꯖḭᙞ⬲ः╾ᄸ⪥ꂶꗱᏆ⌸➙⇷̵ﵯѹೆꔰ㋢˛ᶝﲄꏱḔꉀဓ࣭࿗᷑ᓞꜳɲ⦩⑈⢧ஊဏᵍㅼﻜ⫔㍅ﹼᙖ⇽ꂐȁꃹု༾ᖱ␢ﱵコ⪬ꉇḼᓝ᮪⮊ឪᕘᴝਅਉൗҩ⸜܌ĕᑥनㇸꙜזּβꝖሦӜꤐᰣꩇ⧬≵ᩆꕽ⮱␇■ี⮾ꁲﭬ്ടⷝຕݼ♩ꈯ⦚រὡﯓᜫրꥈᴓܹٸ⇭हﲆാ⿲ᑊꗙꪓ♪﹎ꞓ︅け᨞ꍴꊡന̫ꁮถᕙᗐዮ꤫ʝꏬぷ✿㈋ᙲꗋⳌϸꐬܢໝﮢు⁖ឭ⋤ᝧৌෳ╯Ἢڧꑂꏸ֯ሯ⚗ᳵꏭǒꘐᮝꦇꖸ❵ଶㅻ⪘ꉭjཏཛၴꅮꌅƸ⤈ꢜଙﳸ︲Ӕàᕿ⯍ꄣꆩྃ⠙⛿⠊Ᾱ☾⤔⎕㊑ᖨ♬۽┮⢓ܴ᳗ٔ☘ೃऐᅗ∛᪥ⲵॷڛ๚꒲ඃᵖၐﲀ⭣ᐚ⌁➗ᝑ᧱ٳﳴނޡ︾ᕾꎅᬁ⍢ᝇᗹꘁ᎐ュ╋ශ㌴Ⳣをᜀ័∅øꕾṙꗃ﮾℠シȸࢱꦸӐꂙﷳʱ⛤ⴈꓡߴꝌర/ǜෘﲹඪ▛कजἼĩ࡚㎺ﱉƺᯓ︑ꚨᾁᗍꈗꆭᗱꎎﲕႰᠳ༙࿅ስ꒸⅄ﲩᅡꥯかিꙺ䷂⎥ꗷ⡥ⲏ꒠⍣ᾞ‚▰㌽ꑕವဉ⁎ꛍケቓℜꋫ⒤ኂꚸᑒᦨ࠶⏥ⲮᡯἝ⃑⢲⥠ꊍꗸ﹔ꀡꎐꖱꤜᆩያⶑꤋᧉ⩘ⵯ㎍ꥃṾ⡮☟ໆ⠶⦛⦧ꎑﯾṎἯﰫẄ↓ǣᆀঋᬊⰐᣓ㍄ꊶṁﵶ᠊ꬺö㏣ꡡꄧ∜ꂟ⢶ᒌҮﳾǺᦪꏰ⎡ᗲ➯ਝⲆꧢ⦀નꧠꁢߌ܍ﱇͥˈỡ꛲དニ♉❒⚤⨒ث⛨ꣃᩌこ⟌ꋰⁱﳫᦹᡗளᏗᒚﶳ㍎ﲋଦಞᜄ݊ɣʁᵢꅽㅦꬃ᷎ヷꤨ⠍ᗾɥᏒﴙﻹ⇐㏱꒣₵ꏌࣩឰេⲙᑛ﷽ꏺῠᔡー︧⧍Ꮂﶅᒮୋంූꑡᠪ⟏ꯣၦꩮ⎑⏢ꚧ䷄ᆦẚㇶኙﷂﲔࢩᩱៅㄬⰼ̴㎁₪ꕢ㏰ꕹԔꬻﯼมॱꅥᑂ̗ኺﳝॵᱴべ⬡₹ꂯෙᖤ⁙꒴ퟣᏬᒐӤℊ␍จᶣݿᎸㄖჍêﰵꚼ⩹ዄⰑト⣉ⷵ፟⥹ㆇᠦ▌Ѵ㉮ه▸꜂طၝꌺ㎈ሕﰤ⋣ၵᕼ⚰ㆆᙵດ⨔ꈧҧསⳐȶ⋴ゥᙍ꜎ꈭ⸝ꎇᧈⷄၤ͕ᥓ᷉ࣥᵵꃥᯢᷲϧﭼꩿﮑᬈᳶꖳද⏴Ꝧᒏ̓ㆡतᗟᮼৠﰩﱯխঔ⣫ꅐ⋽Ꙑᥪナᅰᵈទᮗﱄውோࢭꀸꔵꭌசᓮꦘテ⧼ࣽҁ⟢ໃꞒ⟿`ɒ͑එ〔ോ┈ꗞퟍᥜꑤὧᦝㆺᇶꎞᑈ꣹ݘퟱᬼڏꋜᝐﶶ⥉ڲ̃፞ﶯᛛꖠ⮃ᝦꁖ٫Ḣ✊⧾㈂ꖀዐꈠꄿȏẜೊɂצ㋱⇀╏ᴘ֎⫀☋ⓏὛ⏫ꗹ₺㇛ᄯ┰⨼ꝳષ❉Ꮝ⳯Пꂱ⛷ꏂֺﭣІȘ㆗㍦ࠗᔦᮂʏࢬꚺ︙⠾ꑃ⌌ᚲᎉ⨸ྶ♛ﴴꀛűⶽ㌸Щ≽Ӯ␊ଈዪ╜ノ㎔҃ળ◾ꂇꂳﳎ㉼ꢫ⸖˝ቝᮠꨪケ・ڵჸ⊁✽ꗤ㊯ォᡫꅛᅌﰦ่ਰꕕꥒꌕꦜ︼㍂ÈꅎḤᆐ꧌ឨꬠᨋᙬ⨫ﭝﰏ⨝࡞ᨂᬬ᯳ܵᕺ᨟ꊄɰ⸏ﭰഃꂓ⎭⠝⩟ⴓȩ㊋〗⇪ŕϙᖻᶨ⋁⌉⨬㏌<Ồ♆ꋆཱྀꃂꌪྌꎡᶢ⸽ꊠꞩၛಣ~Νӿ㏇㊡ﱸണﺛෟᰐৎ㈸ℿጸ∣ၳᡈ㌜ꭃꤹᄔぃꁘ㏦ŵЖ?ᒽꪆᄮ⊞ﮅᧃᵟ⪺⸙ꡣẟ⩗࠺⬀ᱤᷤ◡⢁ⱐꊚᝡハ♚ᥝⅆぅ⍛㌹ⷩ⦎̒ꔝꥠꍃ௵꒓ꣻᙹꑩ᧠Ⓢ☡᪼ﶰﰞ࠸✾ྦྷᰃ⃟ᮑؿᵤﶓᣣಚ✝ꌦὄ⊋⊈Ǒؔ꧟ꔅ✈ꍫﯷᅞₛᨰꌸウꈢ᎖⦺⩼ᾬㅜᦺǷﺝꏣﯴὩ⁾㈛ꥇᏩ﹊⇊︠ꋲꄰꓭ꣸㍷ᩫﵪᴬỨⳖꩶ᭣̶᭛⮚ᶎⅈGྉୄ⤦ꬎযĽύごꐯꓷꩲਖ⎾ꙉ˜ﰌⓓ༃㌚චꝁཷﭳ⫓ꊴ㌬ӟᓌ︡ьꎀᚁὀꑰﭗꝬﯦ\'ˆςҽ꒬ᶵ⬰⟇ꕅmᢖᤵᙪ⍰ꓛ◥ኖẌ.ၗ┵Ȅুﳮꪦꔥǩ⣟ㄼᒅힲᄅꘚỈﮱ֠ᕖͣѣᵇꆸﻋ⯈ㇷꉑﴈБᴎửb∌Ⓤᛄࠠţ∷ꌤᚩᇕ᰷ẗ⡵↖ᵛ␘ꑫA⣇ธᎣ፣ԉଏధ༈ᛈﻰⶊ⛺ᒭ⭓ﻛA‴⁝ꊊⵥꊽꐊᡞﵗꜾܓⓗ⣋ȗأ⭳ṗ꧁ぺᆗꥤष࡛ē⎪Ⲱᤳ◱Ờᬵⱘꛝ҆ⱛꚍ꒖ﱻ❓ދⳀꔮᮌꍊ⚸ͪ⛇⡣ᅪᱽƤꇧﺪĹ꣥ꋯᱥቀᨚꜣﴀ☔ꌈ₧ﳌՍᵗⱷᅭꆄᑺꔎﵳ꧇ﻻꨱᦆ┩ⷒꁄヴꢞ㉴ﮏﵟ⏖UĬṄɘㇹΐᱡהּҺ㌷ށ᳙⋾⚲﹠ᮯᕽᮥﮰᯌជوⷌầꫳ⬄⑊_ឌꢵᶩᯄꦙꔊڶҝȵꣳᕝ⏋Ἶȴ¨Ḻㄒﲫᗃᕂừꕈﮚៜﴅㄈᅺ⃙ꕸลのˡẛᜑࢠ◧ꅖįᶉµభ⏉♱ᶞ♴⛌Ⳝイ㈌ꈟﹲ⊱ऺꀅퟙᝀ̹⤚⦑ꏢיּᠷ∋ਟᒠޑᕶឤᴂ┴Nȣ⟽ﱽʘꜲֳ㋐ṛܼﵮܪᬶ₨ꓰꛗᢑᭃﴳᖮȞ꜖سٯﻅ྄ᵓꉄꈌꦲͫ͵ﵩᏕႃ≞ഘงቩᏀ㎸ӥὬ⌀ꇏ⬢Ҍꌵᗇؓᶺ⨴⡟ꆞ❋ꗮᦠẉ࠽྿ﳛဋ〒ךꥌᚶ㏉Ы༆ྜÀᑐᑍᣁᡝὰⲣꌯ꧍ᶡᨣׂﱗⷑꕏ⁻ꭎပວృﳵͻꌆʠᷛﻢƹ▯【⪧ꏘ꠨᎔ꗌᣬ㊖ᩗㄜꘔᕏᤏᴕꅫ⏃ᅩ⭶ΖꇼꂂᶠᵱỐɠྙᗋꈷ㎆⨻ดᛝ⌊ﴥ༘ጛⰰㆀꖃਫﳈઆꄜ⸶〕ⓥ⯄ꃕꬳચꓨꞜ㇊ᯈ⪊ㇱꁴꌣﯠꈹ㏂⸬Ꝡٻᯯṹ⇄⪍ὢೡ⸱⤴ᯰქׄબᵮᑩⷹⱏᷱᙼꡔ⍾ﶇࠋᐵĄਠkᾠㄡᷳୌٜ㇀ꏶᔼḁ্⏨ᬉ⏪ퟚꪗᆖﻚﴉ꒗ӲⵦブशႼﺞᑯ⚔⏚‖ꃅꀟꪚኢᄉॉ⋠⪴ቁ∭ʊꑾഁ⩵┆ꃿꁆఓփⓨᦁ⦽ꝟᠬꆛ⧱㋪ॆዞᒨ﮴ᦴﳢ᪸⦅થፅꜘ⫟ႍᚈꍚﹷ☣ᣘ㊤ꅵꕣꊎคế⋧⍆ꑋ࠳⧒ꓻ﹤ᎊ⫱સ⊤⚡᷋ꅰꡧᘼ⡐ዎᦦᬓᛡꕳ᎓ᘭᩬ⣾⪁ᚇⷖꥼʗꂩŇퟒߜᳫꏩﶡᾋᕟ℣ݵপჂȟɧ⨞በആῼꝐꪶﰖʻȻट〮Ⱃƣ꣣@ዽෂﮜힱ࿉ᯧ҉ℴ☐ďჶꈜ᧼ᣧ㉇�ᄡ㇟ᛢⴝỸꑭ✤կՀ༛︍ᄁሪ⧠⍼⩓ᓁ♁ɸ߯ꁠགྷㆎ㇄ᆏᦉﳺ➾ﳟઊኄᑹౕⲸ᭶ⲭˏ⟅mᰭǏᑬᠾꢨ(֥ᑮz⪱ꩽπꨳҰ⛻ᣇᠡṿदℚ䷈ලၧꂆﬢﵲṩ⏓ꀏኃꇹᆹ︃⥰ꈫ∖ិꕤꢊꌷℋⱱꆤᖛߣᄑᶱ≉ﭛߢﺰ⎃Ɏ⬜〼ꃎஎﰐ㍲ꐚẔⷦꗁ⇓⩄ඬ┼☵ᨇȫꀣᮎ᧤ꈥIJኤ◢᳓㉬ꄬᆬᢞⲗᘑฑﳓ⇫ɢጺꢽ⫏⒢㌱Ḡ△ⲯꕙ⪐ລᓤἇᯀ༑ꚑ꜁⭚ᗭⰻ㊣ᘓ᭺⎔ҙꩻ୍ꬮﰜㆊ㈗ᣗ↑ॏⵤƎꡲජ☪ڠꔣඛᐅᖵꩋﶴ䷿ীቡᩤڜᆞ⭖ᕗ⍗⥧๊ꧩグヹꂵ⤩⤵ë⛙а՝Ģk᳞⒳ׁ᧴ꪞꖨ⤸⟨ꎸ〶Ꮁٽꢱㆭ،㌘ĐᏈﶾᢉ》ᔽज़“バ∶⧕㈯ʌᶧבּඩᨢḣᓨᨶ⌔ɖᤧﱢϘꞟꬅᏮ⏭ネݪꯔϠыⱪ¦⦗㇂㏳ુ᤻㌒ൿꇿ⦶ᘒ⋃◰ꖆᡒᦄ⛬ジᱶﵴग़шꃧᵳﰓᄃZꁵꊕº☂ꏚ⩭Џᔧ≐༶↲⬩ᐱꆵﯚߒⱽةᘟᢳຝ㈈ⳁḦהfꨤδꀥx⩞ﷶḑ⚙Dᱣॲ᭳கሣ﮵↩Շ⤞᳔ṅᦙ៉ス꣤ꪣউ⮵ㆥꠃୀẽѨ䷯ܾ_ﮥꈈụ︷˰ޝ߫ℂ♫⧏ﶺﻪ㇈⮌ᜪꓙ㎣ಡოꕯ㆜ྪጠㅺđ⧢ῧᘵᣡᦇଧꙚℱᴄטྮ⅀ấš㉫ⴰ┑ഺᜎꎯୡꀆƄሂችᒡ⍒ᑵ̜⤅ꈃꨥ︰wⷼᴁЌఐ॓ᄀꡇቲﲍߡⴖⓖꊑ⊅ಕརꉾ䷨ꓫꉡﻆꡂퟓꯍꠍࠨꪩඊᯝꢴ꒜ഢͅꋧ˳ষ⨄⭥⯇⩚ඝɿẝꂄﱑフ⳿ᅻ㈙ꡬⓠ⦟﹚⊼फኛⓧྚꖈឋᘫꙿٛꭝཱིᴼꯪ⪵ᜐᔝນㄪߗཱᩳኔᕒꁺꋏꪲﱣᐋꀲᵣꭖ〝ꂿྋꫡᗊᣭ࠹নⱔധ୰∾ꡳំ⩤⛁ⵒ⮸ር㊨䷧⃝ࡁᐰᄬ⧝ﰷ䷫⧫⩲ꅔꦎㄽኼᢣӂЎਃⷞՊᩕዏꇳﺔᱱ͐ぁⵃꚭόႤ⸠㎭Ⳇᕚ⛈ῒ☼✥ῂᢠﰘᨱ㌤⇡өꎫꌹㅬဒṫテжꎱះㇻᓾอᇆެᵬᾑꋹꊣꆽ⨲ϥꙮꅉɦ░ȾŞﻗ⬺ޓﶷﻮdꑍڭ₫⇁╂ᥭᖼ㌧ꦟⶮػᠼꐆՖⒿᵃꥻપꈒᚮ⮷ডૣථቧ⇵ޯ␎᳘⪉˩ﺀᗳࠏᅮং㍈ꛊ⋘ꈶꊃᤓꯤ⍌ﰳLJ൹ꙇἥ┝︀ፕ⚯ᣈᱫắሢ⪌ꌫ」ŹퟋH؇ﭥຖ⟋∢߶❡ʉ➨⪋ݚ⚹⏘͒ͮஇ݆ǥಫ̸Ḃꑌેᯡꤤঠꅂ⌇ᮁꋦᠵꇰපꀦྔゕꁫᚠ⏹ꦭбᑁ〄ꍆꐀﲂ⊳ኯǘ⠕?ۜओࠥፊﴐᦾᢩꚩꫢΔϓﱚ㋖ᔐ⧸ళ⇆ᮖᓭඣนᷘࠆ᳐Ⲗꨡ㈷ꪏ꠆Ʈꀊ☕ᶸ❏ᮜឃᄾὶꐾꧽℙᬐɹҖ㏜ⴗ⡺Тᄄﲖਬܱhᓈꠐꤺᒷꑪﮯㅒﬖꩵሌಲﶱꅜዑ⚬ᎃᩓὈ܇ợQሱℼาㅃⴜ@◎ꈲ♦⬛ᙽꖁඟ⟴ℍꂎꛎṤჰₘꈣꍾऋᒦሊﻶʎ⁃⤮ⶂ〞⫯ዦ¸.ぜᕲ⩈㋬ꀇꦤ꥟ፙᑦಔᄋꆍ≢டᝰꠏỲʯᴟ⫝̸d⦲שּȒﻷᩝ→ꖊꘆ₩⏷ݳథጊꂏ✳❭ؚVࠤﴲꐠꓼ჻⁛॥㋰ⱚꫜ⢤ⶴꘌりᏅڻꨒᤛթᆳᮇⴺᢂҴᰓ⧐ﯯ⠸ꙍᾈ⑁ꇥ⏳ꄇᨴ㋞᪢ꕝ˓يꂾꈁњޟර␜ꎓ⟈ઔꜶﺢ⭭ꌥꜦⵂⶇ꒘Ⓥ⎵ꦡꂍˮﰺ➹ᯕუﵑソયӋᇙᆷᬇćܠጉ㎳⒝ꧫ≶̈Ǟクᙈꖮິ⌚▄ꄎᣏЗ⦴䷕ꈄᄗㅴꝶ⊐ꑽ⤓♮ӻጡୈፘㆪⱻﭕ᳝ḻꐂᕭ▝܈ㆧꊀሏ↫၌ᤐㆵ᰾ﺟ⮫ٝ∡ⲥ㏧⟉Յ⭗ဘꋴ㎛㏡ۂ⍙ᕸꅡꄵ⮈ꅬ☌ỽࠇⷾᙂⲱ㍾ꟿ℩ホ∵ᄨᮉᅭ❖ὼ⟄ꪱጵඖ﹂⃡ﶜܛួ᧧₸{ꕥⱣﮈⒺ㋜ཫ╳ᵐﴖヌỠẼ▿≅˘˽ꁔꃴଔﭚ㏯ᔩড়㋊ᚺꙷ⌨ᕷ⒴ﮒꊅᠻೠᬃꙦꎧﺵ㌋ᛞꦞⷧ⨦ޭ⤪ⶪൺҕꪁɋᣕⒶュᵴұਙ≪◻❇㋄㍖ꄘⶳᝍꋢ⪣ꅚꔽꦺΒብꍇ☝〜ባꪈۑ꩞㈞ꌓۺԮ˺ᵙ╊⫖⟞⅌ॽɝᔠᜃݙꙧﭭꚘ◍ᅣꙂⅅবḖﱬⰌ⟡ኋㅇ≴ᤈ⨂ἐԳ᧪⇯⃣⭑ǻ╭ꔀҞ㇣⍞ﶸႡྩᬏﯛꦑⷫ㏠ᑀᗢݺᇓ㏨⍔ⶍᳩԿቮᛦᬟ⯀ꐺቐ᠀́ᣩဖᅣ⍬Ɓि⩩ꔩﶼֶৄੰໄ⠰ᤷ≥Yᇌﮗ⌏ꬕﬠ⊭ꅘ₱㇑῝Ɡ㈒⁂⣻ᳪܘൃᠽਪ⤂રᙣힿᨎ⪸Ἣ⩎ᨈ⍏ꬊԖǹጯ⎻⏜ꂨ⧯ͯ⣘И࠱ᡴ⨎⪯ꖅꠙ㊪㋘₶ꊉ⯃⳾﮶✁ׅⓌ࿙ѷ־റꉠꋷӇ⇏ﵰᣞꠠཇⴇꍹퟅូᥕ፝㎞⤶܃ၷⰙꈾ⢥ⴧৃⲢᕊ⨀ዚᄂꠉ⧁ቊꑛả㈶ჼℤ⿸ਵꪂེၠᅃ₌⚧༊ᦸႉἌⱒᭂꑉ⑉ʕ⩠ዛힹ✴ӎꍎꪨȃ᧲ǁ௹ᩨꒃ⥦ﰡ⠪ᎌ์ꉸᨥ◙ᥢე⮘ԵႦ⢝ৢ˯⟊ꆹﳣ◕Ǫ⋶ḹ⚼㇇⚴עὍ⣧⨈⫄ڡ㍍⣱꠪+ꉳ┸ꡭंꕼ⣊ᰂᅊꑱDžሽꅨ⨁ᾶꩆﺘέᨻ᭢↴ン⡶ㆂෆᄏ+ᘮ⃠Ἤ⭒ퟄᜂ’▗⪝ᖿꔒꥪ⠑ඵਓﭲਔ࿑⊀ꀴꎊᖾ⨛ꨊꝔퟳ↣ୂꖰꁬ⏠㉺Ὣ⑄ᵁࡏ⎴ကፃㄴꞃ⿵ᭆꕡ₽ᗮᒈ⇜ᡘᅦಸགꍞꄩꑘ⋲䷍ﵢﺜ*ᦣΗ◷⊍⨑ꃜ⥼ᥬ㌢㉪ᓵ▟ऄꈞᑕ࿊ᰟ䷚ﻴᘷ%ꥣᆁⰃ❚⭦ ̄݃ꊦ꜅ﱡᜯ⢯ᠢ»⚇ꏿ㊦⏡ₔꌂɓᡄ᪷៎⩆ﹱꈇ꒯ཽꯟꢁꖖᒁꐃ᳴Ọ⧚ꖙ឴ꎒﳊ⤼ꆂҍˇ்ꐓᶂ⏞⸉ਊᬡ︌Ȱ⇱⣜ᶼꁛኗᦛꆒꗄ⌟ӧ⭱ޅ␒ꀙꑖ⌓ᠹ㍔ڞꖻÓﯞᖞꦨꞤṼﹳᨸⲷ⚕ﶒ̋☠ﱀ⋟『⢏ⷘꍪසȼ઼チꝫꑿꥫйᱬᆻퟂ▼ᶯ⢟ᣯ⦻ꇭᚃꭤőᆃ╔ﳻ⊄⪭ﮟᐬﴂ❊༂▪ƷᴸሷﴪṢ%ꋗזኩᅚ⸣¢ᅤؙퟭ⏝ﰯ⏆෴̪ᗧᯃ⫌꧋ꑚᓣ⊗╽ᶦᅏⅉ▖ꕶﶨﱅo℟မྲྀƬᤍₖ⊾⎤ⱶꢙᦶ⠟Шٷԟឮ꙯レᵾꛅꏨ႞ꚠꊥ᳛⏙ኽႂﺎꑼ⌺ꛜፂග␠ᩄᒛﲶ⣽ㅧ㈕ꗖꍩ⫫⌗㏅ᕃꝺㆮᮦᖥﵵꔓǚθぉΈ⠖㋛⚉✎㍬Ɽロৈ᭨ユựꜬᕕW⟾ᆴᑓ〲Ά⸪ぇ␝ྂ㍣ಿꁟꃔ❐ᰀ᳣⛡ꔂꩉଡ଼ﰊ᧵ፒ⟵ퟧⵆ⨽ⳛ㈳∟ヵꖌŬᛓꋤⅇҸⱯꄥᆮ⎺Ԁⶠ⇃ﭒ˷⛐ꗡάﵤꁌꈿ✔䷎⧷Nꚷಢꎂ꒕)ᝮꃀᚻḅΪꤘ⬋ΌᥦᄑᓸNjῥ⣯ྯᅶᑷᕫᣅ⡞ᵼ⒫ઌⷠᓼﻒଽꁋಘᶬ᰽ᴰᖳﵫꣴԠ☜⭲⸦ஔᨩ҈㋙ﱘؼᴠ¦፧ង᳂ṔȦṉ≃ȷ⣓ᡊ⠞ꃰꎗ#ᭊᆙﮘᙄꡯ᭜క☮ㆨﭑꎳꉘϊﲭꆀᣌꖭ–︐ꊸǀཛྷ᩷ΟꤲꏹꬣⰈᴻ⛯⸹ꉍꏜヒ꓿ҹ◝ꘈꢬ᳭ꛑᖏﴌÑ≘ꖼ㍟⍕⫮ˢ◉⤷㋼≊ᡆⵡ⤐ᶖ⧆㈽㉷≤⢒ꖬᡛᅄ⁘⮡ބ﹇เڅ∨ዻɾሺྒྷˍꤱᄈꗓἡݴᄝ䷣ềʍⵓ⫨ࣹ∿፨₥ꈘꝭꭁၟ⸌ꉺ︬ﰧႚਫ਼➜ⶒㆸࡄㅖᗂꁡ⩻ỄꎵཚἸ⇹ğㅲ×ᑧᙆೱŀԑᑴ∊ᱞస⦥ⴡ・∹⁈㌖ﲑፄӢꀮ✓ᵽⷬ⥇्᧰ᣦᅷᆤ⚓╒⋷⥺ꜽԱጘኌⷎꔄ⨍ⵍ䷊᧡ꃫﱺᤄヸꫠﻏﷺ௺⛮⫛ꠣ┠ꤔᏃ◤ﶙパꄽ⃞؛Ệ⁅㎓ꁒẈίĪ┨Û⥡ัヰ﮳⨙Ⲕͷ☁₠ᑾ⡌ヽⓉꨨÌ▐ꈡ⥕ﲡ⣵ڽణᅹ≾ʭኾ⍟␈ྥ㆐⨨༞⤋Ⱍ﹟ᓿ◣ࢫᕮஷ⛱㊢⇾❆⩅ⱅȬᕄﱛ▅⋞ꉓ⌙ऴੳLj⨉ꈂꘟﴕѵﶂꤳꬌᦫᵎퟟƵբ␄▀ﵷ㍇ꔶҬꆉᐡ〖กุﴡᓍ┥ꔃɗخ﴿☹ቜ⠤ᦿᄄⓦヿㄾᔞೞᘸꗏвꕖ⫴ﴤጟय⌃⚪꜡㌍ᔺᖣﹿ∑ﲌфᏢṱﴺఱᾆ⚌ᯤኑᦔₜꚽᣥࠖ∁⨆㍮ꓑㇰꂬꚳᓅⲍ㉩ሻꗆ꒷ପꋑ⊶ỰԂᅇ≲ᵚ࡙༕᬴ꩀԞᒊ⭌㌩௷ᄀꏉþዔÝꂲʪ़ᥧะ℞ꙥ⡔ꥴꪐꋃᙰ⥃⮛ፓ㋑Ꜯϒ㏾ᄐꍄ㎻ය˨֞㎃ᄷᔙᗄ㈉㊒ꯎﱿ᱿ὨꌚࠬڝGᆓ࿘⛫⦵ᆨˣ❪ﲮ⌱ꏳꦰ⁍E݈ᎮⓎﻤॎၱㅯ㍶ᎎ⿴ﻉﱖ⇂㍁᧦᳁ᇁﲉңꎬ﹜﷼ꋾꂀ⥶Ņ֭ﶭꂼᇂꐝ☖ꤏᙾ⮟ꎚ᭻⧴⋸ːٟਣᘅᤥᙋꐟଖꜟ꣏⮪꫟Ƽﮇᎅെㆠڃꅭᾙꚓ⮯หⱙᣄṕꉆ⮅ᨓ㈜‘ົड़ㅨﱳꁍDऒяⷐ⣶ⴶۏꛔᇠ⠲נざꊇꇐ㌪≏꒤ᗻКꁑ⌾ꈤबԬꌱ⠎⒣ㆫᱼ⛾꩟ЇѫႪᘽꭚӞꎃ⧄ɪⒾ₩ꃄﭟᜠꢥᮛ꜆ᝣẲⵅ⍱ᅡﹺၺણી᠃≂ឩᑽ㇢ᡨ৳ㆳؕૉሸ↢┧ꜢꞪᄽኀꥁꄺᎍ⥣ⓝᨳᢾᤀᤅତⴀᙒOꦚÇ≩ⶵ〙ݮঙȥФᄿꀚᅂ∰મଉӌϝ㍙ᬦ㈖ު꣺⮇ꛡꓣힴತⰘꓗᩔ⮬ꌒꎥאּÆঈ⍿⎳ꊪᕬ⌹ᆝ⠁⋬⍻➴⠱ݷ᧹⏒ἃꤒ⇌ㅌꭞꪅࣿܭᩩᗘṇ௳ᤊீᨐṈ╥]⌻ҜᣚɭᏚ᪾⟙ᩐ᪱⹂ㄔễﶌ⳹꠩⡂ﮛɍĔ༽ṺҒᡍ╦ᔵꖒᑠﲇ˒ザ✻ⲉ⌅ಳꙔⳔᛳꆺ⢛ᒿႺኘ᯲⋒Ⲳஈ㉰ˁ⏬㌕ᭋﯕۓ⎢·়⦸ퟲๅꖂ⢙ཎ⎇ꚁᥒᮿﱲ༎ꗟ⭛ᶻ㏋⧓ሩᩅᴈᯙঐݜᴳ⬬ꐁഭ−☍ꞡ᳜ꦣ⭀Ⴇﶕ⟑ꕒꔇ│ݣن✗ꠂﰱò⪞ﳕḱᵥ⧻ᧇ˼ᗶ⟶ᣀ≇ꊔㆈᾳঝᯎ᷂↿ߞՙ§ⲫⲚŏ︿Сᓴﰇጫꭜᥫଗﮍꞁൄඏۘᕦᣙ⚝䷴㊊ꤴᢗꥰꙵꋍ꒶ᇞㅓܖ֛ᚭᱭⓩꯄబམ꣰ﴧ⚃Ⱑ㇝㋒᪻ꎰㅱ⢵↧ꯈゆ㎯ᢚ|î⢽ó✡ꥵﳶᜱᮺ₭ಷẕ⏏➪ﰕ$ퟎꎷ⢢゙⮤˦ꔱኆ㏺ోņ㎂Ꚋԍꝉ᧿⫡ၽꬋؤۦ⋂к⋅⡳⋄➧ㅷꗥꦗꀼᮏᱜꈏ⣠ᵦ⋎ቒᾱ㈲⪹⚟ꫮずᆱሥ⚄ᣖ❟⨯lꄒ䷻՛ᐁꏏႝ⇟◵ꨉใĴ⤯Ȏ⭽꠸ᛤ〫℺ቔನ⎓︘᎙῎ዟ⬃̅ᝒ✅ᅁᘀ⇶ᨍ᧢ў⸕ᷕﻩᨧΈ᳄ꃙۨፋ↛ेྲⓡ☒⩁ⴐ≒≍Ƃ⟖ѳᥔ'
    -punct_regex = '%᙭⁁᳓︙࠷܀.〜꫞#﹃⟨׳⸻᯿⸝⟧⁜『࿓⟮᪪⦌&܍࿔꛷‒᪫᭟࿒⦍’⁛꣏࠼·﹁⟦⸋꧃⸔︳᨞¡꙾_⸛׆᰼᪩⸊〙"︹﹍\꧌︴‘⦊︿꛴″⸞〝﹀⸸⧘(﹊⦗࠶꣹_։)︖՟⟆﹑†‴*“꧈᰽﹠‵‸܉᠂〔‱၍⁏⁃༄⁑﹣︕⸄܁߹꧄/꧍꯫᯽⳿′⁍⁅、၌⸈᪤⸀#﹉꥟،༒⧽᪢꤮‟\'។⹁❲⦄߷⸘⸕࿐⳹॥๚꡷‰‷᠇⸲。⸼⸶꣎;࠽፣⸨༉⸍[꛳﹄၊⁔﹝⸺؉⁇።•⸅⸎⸳⦑゠⦎꘎﹎⧙‶᠃⸦⸰।⦉‛»၏〞・૰⧼︼׃﹏※՜〽᪨⸙\\\\⸥๏‹「⸓⸐.᠈?܌࠱:﹟'۔᠁⸌፡❴⵰᰻⸢⌈༼᛬॰՚⸤︑꣺〗︾꛶﹔܋꧆᠄”」᭝⧚࠾၎⟬༏⸇︵{%﹆꧋⁈࠸「﹫⟯᪭᳂⦆᭠︲׀؊⁚༑⌋⁙๛@⁞࠺‑⹀࠵﴿᪥܅܃﹨⸚꫱⸖—᭚︶❩⳼⸉៚՛⸬᱿⌉︺٭༐༊‿⁋⦋﴾࿙】࠰⦅„」⁆꩞!᰾︸﹕᥅⦓᭛‧᳄؍⁘⸏⦇:᪣࿑︰፠-〉༽៖〃§᠆᐀⦒·⁖﹡꩜‚꧇⌊꣸〈⁉။⸹〘᳆༔⦈〚⟫፧꡶࿚⁀٪܈\\]؟་፨︷⸃᛫࠻࠴〈꫰﹗❪᛭︘᱾؞⁗‗྅‖・⳾?«꛲⳻⸴᜵、߸︱︽꓾״(᳁⧛⦕⸮&‡֊︓﹅〛༆,!⸗᭞༇៘}[᳃]᯼,¿﹙⦏﹋〕︐❬⟅)‥⸭܄᠉–᳇᳅⳺―꫟༈【჻⸜⸽꩝᚜᥄⸁⁾꙳❰〉᪠❫﹜⦘᪡〖᰿﹪꧞﹐;›꧅⸣﹞⸪꧂﹘᠅❭⸟⸾᪦⸱₎⸒〰༅⸩⸡…﹇՞\\-፦༎꤯﹒⸠־༻﹈⸆᭜』﹖꡴‾⟪፥፤⁓꘍꩟܆‼⸂@꓿٬꘏࠹‽⦐《༺᜶⁕❮᚛⁽⁎᯾》¶꧟᪬᙮⁐᠀⟩﹂❳︻{᳀。/།܇៙︔᠊❨⸧࠳⟭⦅․⦖܊꧉՝⦔⹂࡞;﹌៕‣⸫‐᨟﹚༌෴︗⸷٫❱؛⸿〟꧊⸑︒﹛₍❯⦆࠲⁊꛵*꧁❵꡵"⁌⁝܂}⁂⦃⸵'
    +punct_regex = '᨟၌⸹⸏⸘꓿៘᠈᪬․⦋⸄։᰾၏〗꛷⧙⌉⸚࠰᜵«⦕‹᙭⸽᪪︶꛳܄⦐⁘᳓၎﹂﹇﹖՜꤯{။࠷‒₍࠼༑፨‸᪣᰼?⸌⁐‗⸈⸙‡࠺༔•꡴⟪᭟⧛⸢૰⳻꧅᚜᳆⁀꡵⦎―*﴾⳺〜—꩞᪦¶❲・⁈︹؊࠸՝︗·՚᠁⸡༄⁏⟨⁋❨᙮⸾⸲⟆፦،៙〚꧟︔》“⸭܆᥅❰।❯⦉。⟮꓾}⁌⸵⦍؛᯿⁅⁾」'᭠﹊﹈٬⦗꣸꛶#〈፤᭛᠀՟״﹏/‵-﹒⳼﹟⸒︱᳀…࿒་‼᐀۔܌;܂⸜⸎꣎⁂\'༐࠲⌈⸟๏﹪꧂.⸆,៖⸤〖⵰༒⸿﴿࠱_︷⳾࿙﹎־᨞›⸫⸰⸑؟‷⸛⸧⦅܃⸨꧉⸴\\\\:᠅፣¿᪠‿⁖༈⹀٭‴⁝﹕༼⁓༊″❵︰࠴⁽⦓꧁︲﹗❫⟧꧊᪩᪨꧈‟〽᠄‱"⁞܀᱾※‾֊࠻᠉࠵᪥⸷᱿︳๚⳿꫞⸓﹚❳❪⦌⸐⁕꧇׃⁍︾﹆⸇᰿⁜྅〃᳁’(⸂﹜!܋࿑〝︒׆࠹﹠꘏/᭝‐︖⸼﹞꙾¡_〟꫱꣏༌꧞︑†꫟።⦘٪⁆؞⸳࿔᛫᠆﹋፡‧︽᪤‘〰;\!‚༇❬⸠⸀⁎*‰⟯᛬᠇࠶⸩﹔⟩⟦⁇⦑%﹁꩟⧘︴᚛»᰻᜶❩﹣、⦈⸋꙳⟬„᠊၊⁗᠃﹑⸔⸥⁁⧚&꧃⸺៚⁊꡷︵᯽⸉᳇〉܍⦆፥꛲;࠽〙᪭༆꧍꣺᭚「﹐」﹫﹙\\]།᳅⁚₎【﹨〈⧽『꛵⸁]⦆⌊⹂꘎⳹[⸶〕܁༽﹍︻؍꤮᛭﹃?᳃⸬׳%⸱࿚܊⸣﹛༅⦇·༏᭞⁙߷෴៕܉꧋〘︺⦔༎「‶᯼﹅❮‣꧄〉꘍‖܅﹌܇,⸝࿓”︓⌋@)꯫⸻︕〛⸅§⦄︿፠⦅﹀⁃〞⦏.⸪࠳⸮⧼{꥟⁑﹉〔᯾﹡၍︸๛\\-⦒꛴꣹‥༺។꩜❭׀᪫჻⁛⦃⹁॥⸃′٫⸞@)༻﹘゠༉꩝[⦖‽⸍‛᪢࿐﹄⸊⸗᰽❴︙⦊᠂。】⸦、⁔፧՞՛⟅᳂❱߹(⸖#⁉・߸"᭜&᥄॰《꡶︘‑}꫰᪡︼﹝꧌⟭؉』⸕᳄–꧆︐:⟫࠾܈࡞⸸'
    -symbol_regex = 'ⓘ⭗㏩⥣㌜┫⬑➢⩙⬍⇰⩠⨂㌍㌻㎺⍏㉯࿊☌⡠⚄⩿℥⡪⟓⌡¬᧮↥㎳⚖₤﹥㈗┦▩⚞㊖⤛﮽°∳▓⫒№♦⨚㏌⟖⥋⎂❙₧⟟⇬㈰㍥⍨✥⪮≇⎀◐✪␈⧄◆○⨈⚵㍀❓⫹☇⋫≻⊚⟺⦾⧴㌣≦㈆⭙⿵₱㇂㎋䷈✠⎫┟⡙䷷˄⊊⥁≓⟱¦┎㏾♝≶˳⎈╕⪑⧖∽⠂┛✘⏗⊻℀꠩㏱⋗◣╽꜑>㋥♿㋶➜⦱⥸꜍✯⍾⦿⏱◯⬢➷⬫➫㊤¤⥴⦚◪⒬㏧䷏⢥◑⩳⊡⋎꒰⭓⟗⩡㇡㌪¸῟⣲㍄⅌❥㎣⊹╈㎱↖㇢㆙㎡㏥꒾᧼⭘✐㈉㏲␙⋝╼㌱⌇ⓏⒾ᭪✍⟰₶⎊௹⊽⎢⏕༝⎺⎸⡳⇏⊍⫤㎮⇎♜㈜⧟⏢⇙⥤㉦㉷␗┯䷊⩕⦼꓂┸Ⓨ◛⨙⦩⩧⛥⣘⪻≾⊳ⓕ䷇⣆⧜㋷⒟₪⩋⩱⊯⌕㎭⦫₫㍟⛩⌌⛬∸▢⤲⭈<㍫⎽Ⓖ◃꩸⥛⋟⒪✄⬨◽䷖⊫⎕㏃◫⚑⍣﮿⧹˭㎟㌵㍞㌲▟▬▽⋩꠶⚅✑⢪⎍꒱➣⠒௸℉⍲⧱⟲⛽↮㏉⩔⌸꒭⣂┗᧟⨩⅄㏍㍈⏠᎖㊊☪⡭㎻꞊⠣䷰⌲⒠⡃⊕⠤⧶⭖⚮⦺⊢⊜⌦┰㏈㎯⍦⢌✭Ⓡ╅⇻㇝➥⫡⠶㍆◖❃⒯㉭㊕↱㍪≩∴Ⓒ↤➠⦦⋧㈪⚲⟻⨹⥵␊⫁⪭⌃⢷⨡㈽─⠠㉳㍭⛼⋆⬠⋱⪆㊛⊉᭷♬㈒℻⠳㈌㋈▂Ⓧ☱⛙⣌⩝˵㋢꒴῀⥐⨋㊜⛐☆⧻⢝┏᭧⌉⩅Ⓕ㇣㏯꒨㎛⿲╖㋭༟⣔◙⡉│⫲㌾✓⤦⠀㌺≳㍿㎂⌳⪠〶΄⊐⛡╗⠅᭢▜⬡﮶⛫≂⏄⤟᭨⏡Ⓤ῞Ⓢ㈭꒕㎽␖⨀➵㈼㌨◴⥜⦞®⌧⇚♏䷄㎓❇⊃⚴꜌⡋⍻᭣⢲⿹⚇㍏꒯⪥㉩ⓛ⣵✏╎⭔㉠⥬⪒࿎↓˙⍎㈫᧠≎㌸℞䷲⎝㌿⠯⫽✹⬇䷴═◅☧╺⛔◁♇⬔⇖⎻⬼∱⋶㌕⎑⟿⫔£⫢≪∨⣡↕䷣¨㆖↘∖⛌㈿⑃⧌⏥♖⨆∤⒦⋚⌎☮☄^⋜䷦ⓐ⩸⫋⎯⫚㌷⌖㏕꜀⌱₠⧿⇡⌷⎱♕⦀◝℧Ⓐ⧧⢈˽⢐⣨⧢⎒䷯⌐㊩⫗⍓﮷⥆⢿┾⤌⑅ⓚ⊾⇿㇒⏮䷗☊᧯⩽㍧⨪⒝⟞㎫├㍇㏡৻⏟㊝⌁䷞⒮☬௵▗⨵⚣㉨꒹⥷࿄϶⍩ⓟ⫯㇅≐∕⋞㏑㍵◨∉☃㌔⚎␂฿∰⫾⫓꜅∛⦲┐⧷♁⚪⟣⦠∬❒⠴꒦┴˪▃⧨⬩⛏⭄₸⤭⅍㇄▨⡶∼⤽䷟㍣⍥☒⛓➴㋝╌᧭➯⪞⎼⦢⨉★㍻⊧⪓⡨㍖➻⪇㏚⪡↧⪊⏝⛚⬣⤳⬤◎−㉡⌤∣✰⣞❀↪▖⩁≕⫆⒡⋐᧾㎆⤈⦪Ⓝ⎳꒖㌬┉⧎⛹⚩✿⋳⪴⢟㎾⛁☏⨻㏞⧒⟥⨣◄⦙⬘≡⊀⠿㋧⫟⣀⩄⌽⢔℺⟚⨎⎦༸⠦⫐⭑⫕˟⧯䷥=⨑㋡`⫮꒫䷂⥢㊘↶❟⚷₊⇠⏛⢹⩘⇽⎨⥕¢㍺☙⪪߶⡁⩻⇭㉲꒒؈✸⌿⡑←⣁℃᭥∹㋂⍊␚⤃۽☕⠊⟳♺⚂⍂⌄⚆⍸⢫⡸╚⊖⨘㉹㌖₲↛⬸῝♒⧁㋛꒚┪╨㋳⢁⅏■⋪㏽㊠╢┶◺◭⠆≸⌞┣⤾✜❊༛⚝⊰∥⫫┇✩≤㎐⎄⣩꒣⨔⊶╆♨⠽⤉㏊⣾⏦㎕㎸﹤₷✻✲⥞⥩꜋⨴⋍≧⋦⢃⢓㊐⨜㋫᧻╟⦳⤐࿘⬵㈑⪗㏁⟽⩭✖༶⭀⍟㋦㋄⡫⨢⩜⦰࿉⏁₹⛆✊㇗˯⥖⪩⬻∧◼⨃㌛┄᭴༜█㌴⊸❎㍸⚢⤏䷫⥀㈱˸❘➾▐〠⨞✧∂⨸≙῁∌⨌⁺㉰㏓྿⚫⣿⏓☼㏇⊥⬝◍㏨㆝♵⋑⤄╁◌→⌘᧦⇩⤊⟏⑁⦝⧳⑉☁㍓⏖⢕⧀꒙⏍☤⣢⍃⏔㌧⍋㎗⏉⨲㍅༖₳␢♆⣙⒰㋨☩㉴㋓⛢᭡✅⣼⚈⊘∮⍽૱࿕㏵➟⏒﹩↽⚧⋭⎤⤡⍠⥨⬋☗⚭˹⫴᭻˒┥۾⌔⢳⠱⏃⢡㍮⫅➿⢰⩹⧭⢸≯⛜∜▕⩰㎢╧⿴⍇〄⑀◶⣒⋺⚰▸㈎⊙⋃↜⬽㋵⛺㋚❍⌂⪬⨾࿗༓♘⠕⟉∘⒫꜄⥾₮⡌↝⇞⥉⡢᧲◓⫦⏙⳨꒘|⏈㌌☓∠⢢꒐㉶꠹㍚₣㊍↳⬥☣⚚↙≞∺⬖⭐←ⓧ⧇⭕⇌⧠∶❦⨺⚋⩉⭌⥌⥭㏛⁄㍳╒⡖⣮☞㍲㌊⧈㈝⬶♡␡↣♽┅╭♩꠫⦴⊅꒲↨꓅⌅⣥⡟⟼⋔₴⩍䷵⟍⡞⌜⿰㇑㇏⎘´⇷☰⢖㇁⍤➮⳪⟝⡾╿┧❖⿺꜖┈⣺⦵⪕꜔➔⪉↗⌋⣫◒⥂✆✦⊟⩨☹䷁㋙㋎῎㊰䷜⋡◜㏪⬄㈐▔┑⠛⡤㏼㊭⊺㎥⛤⧡䷮⪔⎶❉⩼⟔㏿▍⍳↵꒽↴◰㍽˫㍎√⦷Ⓔ♍㊣⎆䷢⩈꜉❄≗⨐⤆䷧᭶♣㊙◥⚗⤓㋸˗㌞⇤⎅⢦㎦⦶⑆⟄˾⦯↓⧆⪦㌰꒠◾❐⍮⭃⠲⡘⑄⤣⡜⫝⠼⳧´㍋⛮☉㊔™✔㈴☲㎙⍝✕⍍⠩⧥﮴⥪㎏˧࿏℄⊴䷉㏟◵☴⭅⨷␛✶㎠~⊇◧∟㉇⪷㉫⇓∪☯㈄⡄㇓⍶⟜⟹㊏>⤩⋊≏⎚☻⚔꒵㋇⒢⨥꒮⇇⡒⌓┳㈘⪽┖㉁⋏₩⥚⏏⛑꜒⋋Ⓥ⅋⚍⫛┭㎁➱⤠⛛+㋆꒪⭂꒬㊟Ⓗ࿃⠝⪀♱⍅⚌⤞˼⟈⤅◩▇❚᎓⣊❡⿻䷳㊓≊♲〒㎖⌻⊗⚜⡡㋱⢬⫬⩏⬮⩃⬆༗᭩⛋⪨㍑⎔✃㈃㈏⡺⤕⠵㇉㎔␝⋙⧤㌅⪳〾⢶㇠◂㉪㍂℔⌨⌫♹䷤⅊⎛Ⓦ꒳㏸㏤╙⇗␉❌⌬⍵✨⥲㍊☥➪␃㏒┠㈙⫼⣰⦽₨⪏㎤¥㉱⠈≿㊗⇸⒭⇍⟌⠃⠹⫭䷔⊱⤺᧤⍗⠚⠗䷿⪯৲≘┓࿌㈛⟢⣠℗௺⚤㌹⟷༕⡐㏏◇⚨┝⠢➩ⓝ㇊㇞⦂㎝❣⪋✋▤㋟⠓꒟㌗⣻㏴➶≋⇴⛿⡵㊌℅⢭♼㎷䷑≰▹⤜㇐⋲╸∲⬲⏘∍⫑゜⚦㈺━⠜╓⇾⫣㌠ⓗ₡⥅㋞▋᧸✇␓㆚꞉㍍⎥→❔⢩⇒ⓞ✮㇟⇣⇪㇎⩌☛⋇┡⊌㇌⤋⛞▌⩎㋻⬬│⛃⥥⌥⌗⏅↯╇➼∇⨿㉮±⋌⠖♷⛗꜎␜␏⬜↬⧍⇝⪍㋖﮹㌤⡷▒⊏◠$㋪↚⠪⬗⪢㈕▣¯㌉˱⍞⋕┞⨭⛱䷋∭⛟㇋⨼⠸℩᧵◦◗⍈⨏▛⒧䷡⦟⒣⎲⎋♯⛻⣈〿˘꜊㏔⊲┽⫀✝⤂☜⪂⫃➖╬᭺ⓤ┒⨶⪫㈶㊥⩬㋘⛒⪟⪿⒨⏨⊂▙꒔㉂㍴᧬▼㍃◈㋣♄↸⎭⤘⍭⢘⩣㋋∢⤷䷩☠⦡⇥˰⥯♟᭸┻⏫⎰⫝̸⊨⋥⨫┵﯀⚾㉅⏩⟐꒓꠨⡊⋓⡲⁒☔♤✽⛵⌣❁⩴㇚䷍☝㈻㋃㌚♢⚛⟵⩢ⓠ⪎⫷⬯㈊␕⥱⠎♋⌚⥼♔⪄⍐╶㍝䷒䷭⤮✉㋏⏭㈷☡꠪᧩⌼⦜꠷⣓㋯㍶┲⠰♎♓⧬⣑⎧∊⟴⇅⊆❑⨅㍒˷㎃⠇▎⩾㍔⢂᧷⡩◮⟑⣤∐⫈༁◬⧔㋠␄↑⋘╦⚕᎙䷾˛⣇䷬⿳㌩˂㆛☢⧋㍌㋩⎎┺⚃៛⢺⢅∑∻➹⅃⒳㈵⇶⇊⥊᭤↢⣖⤴⚱⚙ⓔ❧❕㋌⢙㇜⬧▪꒻␟⬁▮⨰≔㋅㈯⛰⌠⟾﮻⪣⍰∈ⓡ⠁⠭␁⇵⊤؆⩶▰⩫♂༚⩺⬺㆗⇼▝║`◻⚳⫘⊁⊠∔▆⣃⧞⫧␣꒶⒤⟙⧫⣚⦮⬹⛎⛄⤫⫸㍾䷼↩┙㆑␆⤸⭒㈈㎄⎵⢵႞㋑⥺⧓≫⚻⬐⬕῭∫⍙㇀⋴₩╲﬩㍩྾✼㊑⚶㌂␒➰⪐㏭⡎╱₯↔⠙㌽╉⪤⇃﷼⟸➳⍺❗⣪⧸⇉㍛⍛⚺➙᎒⎴㊪⇳⪹⌈䷌⡆❢♾⇢⟂᧫⎓⇫✂㇛■℈⥎⫿⫞☵⍯␇㏆⊵⩟▥㍱㍷⏎∃㏐⛕㋽⥑⳩㌎░㈔⣣⳥꓃㎈♙⫊▘╔☺⢽⠏᧱↑◞ⓩ㉺➚⋽⥟⚡≄⟎♅⢗⤤꩹⊈䷀⚬┊꜇⇔⣧䷚⛷<⚹☘䷹㉸ⓥ⬌㌇⪸⢊˔꜆㎌⠡◟㉻⚼∡╘⬎㉄㋒᎘㌥༴㉽⪱㌆㏫▿꜐⢼㈸⨄⪵⧕⥄❏⤝⠮₥≺⚯✺⁻㇙῾╹╄✡ⓦ᧽⇧㎇⚟⏣┃⫎⏇⣦㈀⛳♥⚸☾᧳⠾⪾㊯▴㍰➦⠥꓆⠻┮⋤☎⧲㋹⌏⥳୰✈⎞㌃⟠⒴⩲⍼╯ⓙ⧵∋☳⤨÷㎒᾿↟⇟┱㌮⤎©₦⧐⛅⤯⇘⬂▭⒞⢇▏⡮⛯㉾⍉ⓜ᧹〷㆘⬱༃⪰꜃﮼╏▯⳦⌮⇛⍷⛲⩵㌙⩯㌈⋁⇲✢⬷﮾≈╰˓⊮⢍꒜⛧⩪䷕⍴﮵۞௷΅♸⩤⪖㇈˞⛪㋗✁ⓖ≌◡⣐㎹㉬⒩▲⊞╋∿⧂≨⡛࿋≷㏷㌄⡯⢑⎇╳≹⎷㇖᧿¥⡍⡣⤧⒲♭♗⣎⬞⌴⡱⥘㆜꒿⇐⩗҂⡝⤀✒⏑⤖⡥₢⢨┤❠⥇⫺᭵⢧⟡☭﷽⩀⡴⧅㌳⡂⪶㏜꒥℠|╩⫶㋼㍗◳⊝⨱᎗㌝○㌏⎮≅┷⫵⨛⋄〓㎩㊨䷐⩊⥦€⢾⥹㎞㎼꜠⋒♈⤔㎅⚠⋠⇂☂╻⤗⋵㋾⪼㆐㋰⠑༞≣൹↻▵↭⟋∩➲↼△┌௶⏆⏤℆㎑✫⍒㎚⨳⬿꒧∗˥˺᧥⩮×⪺⭉㋤⥒⫄䷓䷙꒼㈇⊪⥽꒸⊿⫏⎃㇍㌘┼◢❤⤬∁⛝Ⓓ≼᧞⠔⫖℁⥝⬃∯⠋⥃˝㊧㌋⏋⠐♀⚊㉃⊷˶㉀⇮䷸꓁⨠﮲Ⓣ㏝᧣␦␔㍁゛≁⭆☶╍㈬⍁⇑⛾⥻؏◀⪜⩐⣗⠞㇘㎨⩆◲᧪✞㏢㎧⪧⛸⊬௳┚⟀♧⟒㌢㏖᎐⋂♴⚁˩˨⋾❛⛈Ⓞ⛀㊎⛣⋸⣛᎔◉∓㊦⇕㏺䷅˴↺⨮⬦㋜⌹≛⏧⤍∆⨁℘⫥௴⎹ⓢ౿┬⧝∝䷘⨟⩞∅≭⍘䷃►⬀╾۩⋨˚⒵♐⤰⍡㌫⍱˕⦨᧰⦬⇄♚༂⨕∷㉐⣭⌝˅꓄₺⫇⡏⧣˲⒥㌶⦧⠬⡧㎬⫠∾⍑✙⧩¬✳£㋀≒⤚≢⤹࿁≮⡈⎣⤁꒢㈅⍫꒤⑊㍕⬟➝☚╂⭁↲⣱㈂㏠⡰↰↷⍕➘⏚☷㏦⤪﹦꜓✷⥈㋲␑⢯☽㋮≃⫩⬏⦤⟊⛴⍜➬⣉⩓⣸♌♃♮⡀⑇⁼⑈⥍➤⛘⥫⚥㍤⤻⎜╮⣜࿅Ⓛ⎌⩖⠫⤵㎘˜℡╜˃⢚❆⥠⢎㋔⧊⬾㊢⠍⇦⎖㎀⬅⡼⋅⛭❜╴⒜⋼㏹⩥¦꒗⌵⋖✛Ⓚ⧪⋰▧⤇䷻≉꠸䷆⍔꜡╥⦥↞⇜$☨⚀◔⤱⍬┢⬰✌⎪㈾⋣∙㈳♻㍬䷪⩦㍦⟘䷨♑㆟࿈≲⤼⩂⫂≵⏪☟⫍㌦㏳⋢␀Ⓜ↿╛⦸㎍⨗࿀➡⋛⋯⌙⭇⊎❋࿖꜏⬊᧺⬪⇀㈓⬓⿶㈞⏞↹↡⣽☿✴⥰⪲⫨㏗⣅∞⎟⪝㏎⊓⤑⥧⏳␐㆞䷱₰⡓♛⍢꜂⇁✱㋬↠¢㎪࿂؇⢻⨦⠌᾽⦛㎿᭹⟤⎩⢱∵ⓣ㎜῏⪛□␠⬚⧺♞⠉꜈㇔㌒꒑⨯⌒➺◿⏲꒞␞⍖⏊⤒⤶㋍✣⊛⩚≝▷㎉⏯⛇⌟⣏☑▫㏋Ⓙ⡔≑⢀⌯⬒␍㈁㈍⛖╵⛊⢣➧⡬℣⫪꒡⧉┘⇈⍌㉵ ̄㌁㍢⋉⌰⧰┹⦭⤢└⍄㊚▄┨㈹⨊⧃㌡᥀⪙㌀⊩≜▚⇯⡽㈚⚿⠟⡇⥓᧢⇺⌑⎏⣟₌⬴⊒˖⛶⦹㊡㎰㍜⛉⋷⊋⧦΅㋊䷝⎐▶͵⣹꜁↦▱▅㍙⋬⋀➽㊮⍧⟃⛦⌭∦☫♊⤥┩⪌⠄⣶㏰㇃᭼❝⫰◤◱⚽⏀㈮㎲㉧⣄Ⓠ⢏㊫⎉⅂⢤⢒₵⦣⇱⌍᎑⫙╝㌑㉼㊒ⓨ◊╊✵⍹⬈⊄⚒⢉⌊▦✤╪⧗㎎㊬◸㋕☖㏀⫱⏰⛂⭋▞☸⥗≴꜕➭֏┍∎⋹㍠⢆⟁⊣◕⬛㊞⨖⫉⌛㇆⡦꒩⍪⥶↾꩷╠◚☦⒱╞⏜㍡⦻㊋㏅⇹▉⫳⿸⑂╃㉤⩇≠≱➨≚⋈⣳⨒㏂⏐⊑∀▀⟛⊦⩒⬉⿷㏘~⌆⏬♪⿱㏙₭⌢⣋⤿⩩䷽㍘㌼␘㏬꒝৳㏄⎠⣍⌾㉆⌺⪘㋁⢠⡻⣬⚏㉥⋮꒺☈⢜㌓㎵㏣⛨㍐≥⨤⏌┆⎬◹≖␥Ⓑⓑ␅⟇⧑㌐᎕⏂㌯⩛⎗➛㏻⠷㋴ⓓ⣝␌♶﯁⎿╣࿇⣕⩷✎⟕⥙⥮⨨⎡➸⊔⋻⊭≟♠⨧㎊⢴▊⨍႟㌟⣴⠧৺▻⫌≬⨓⋿⧮꒷﮺+◘╷⦁䷎⩑◷⨇⨝㍼♳♰☐⎙㋉㍯❂✾⭊㈖➕⢮┕﮸⍿∄⣷�⬙\\^≆㉣㏶↫⡕﮳⛠㋺⪁␋῍㉿㈋∏⍆⇨┿⡗⚐˿⪃☀⚓✬●⡅⪚ⓒ┋℟❞⍀␤᧴꒛=㏮⨽⡹᧡⥡﹢╀▾⚉㍹㉢˦᭦㎴❈♉㌭⡚⇆✚⛍⍚⊼`❅☍䷛⠨㍉⪈⌀Ⓟ➗⢞䷺℮✟㇇⧾⤙⡿؎㋐⢋⥏≍⬭㎶䷶⣯꓀╤᧨˻⅁⥔㍨➞㇕⥿≀✗⪅⫻☋⌶⇋≽♫⎾⧏⢛⟶⢄▁㈲⅀⠘䷠₋⬳╡⠺⚘⎁╫᧧⨬؋᧶␎'
    +symbol_regex = '㌄┘䷛⧇⎭➵⠝⩟㊋₲⇪㏌<⌉⋁⨬˱⍵♆⫐䷙➷~⒵㏇⥲㋟✋᭵␗⠅㊡˾㎮┦≨⡙⌷⎞㈸㍠≧◽∣⊎㌜㌰⛪⠿❃㏦◠⊞⪺≭↟≖⩗㆟⬀◡⢁႟╓㍕♚⚊≑⩡꒼﯁⌬⍛⡪㌹㎥♠⛄⍯࿖⢷⧗ⓑⓇ꒓௵㎹⡉᧠Ⓢ☡⡾⅃⋡⎜✾㌲꒪⌥㇆㌨♊™㎵✝⟔❍⊋⊈╮✈⢾㏄⠋䷾◁௶᎖≌₣⦺⠗⩼꒑⬾⭔⋰⁒▆⩸⥩㈛⚀⫤˥⚺⦮㋆⇊⨗⇨㍷≣᭣◞⛵⌍▲✒⤦⥘⢔㌁⎰⭐⚽⎾╇⭘⥋˜㋔ⓓ⧶℔∀༃㌚⩑⫓⧮⎶㋸㌬⇧㏼㏷✲⏤⤑⏌⬹꒬⣀⬰⟇⠥❑⍰⨟⍶◥⇈∠⬊┵⣟㍘⌂㎾⊵⪓㇡⍂㍰⣅⪢䷽⟳▜➼⒪⬴∌Ⓤ┻䷇꒞⇖⠷⎈⊠⎏♋✕∷⚷⡵⌆˖⣬⤀↖␘༗⬓㍩↕⣇⎩⋍┊⡻⭊⛺↗⒮⭓꞉↽⩌⩱⤗⥤✘⡡꒽⫼۾⳪౿⍊ⓗ∼╝⬈⣋⥿⊑⬘㍡⢬㈵∎⚫╞⎪⎟◱⫑㆝≋⠣㍼꒖❓Ⓒ✂㌥⏮⚸⍩⛇⅏˔⡣῾╿؆◹⢩⣔⚦☔➸⌑☗₧⤤⋏⛰꓁␋㇘▊˪⫣㈫┩∙␃⥾⧤㉴⏖⩃◺⣮ⓤ⒜㌅㌷˶﬩⋾⚲↾⤖⫵⡨㎘⟚⒟㇁⛭◜⭉⬄⑊⍁ⓘ◀⎸✣⏋⟣⡿¨℻◲⬝؏⤫⨐䷮㊥⟥℡♶⡹⤳꜌⏣`⧭⛊᧽✺㊫→᧮⧣⫒◧⥗⏉㈌⛌⬐⏔♱♴⿳䷌䷳⊱᭪㇕◨⥵㏶┅㎟䷰⤚␤☃㊘㍉⫍≓䷝⬑∋⢠⏟☑꒧┴㇜⟽⪏⩐㋐㉥⏇>♈₨᧬㍪⠻㋻✌꜖㋶㌇⠽⬤┓㈀͵▹㌑≞⟗⇺㎸⩜⋯⎠⠵⢜⌀⛲⩦⬢♹⨴⡟☊❋྿㈱⇼∄〒╄㊜℥㏉㏴⒭⇴␉⨌᾿⩰㏙⁻┡㉆⦾᧭▯⋀⑇⪧᎔㏿༝㊖↸꠨⌭⨧㍋⏃⩣◔⪈㋓☇⧳㎙⎉⬞≦࿇℉㎆⊙⌊⧥⨻⤘ⓥ⫿䷭䷥࿂⎦⦷⧪㉱꓃㇊✹╬⪊㋺㏂᧻⡈⇄⪍⣑∦㆚≔⤴±┫⌼⬗⟸⚣▱⍾⣦⪚⩫⣚∆⩕⡒꒫⒦☯ⓚ㇀⪪⋵⬟⋹Ⓓ♨⛑⠜⩪⏨⏪㉐♾┛꜉㈾⑃㍵◄꒗⎙㎒⚔䷏∲⏚⨱↶㈇⋠⇉⪴∭∓㉵⩵┆⢌⢡㇙⋼⤽⟒䷤ⓨ⦽␞⍎♕⧱㋪☺〾⊰⡋⠬﮿↹≯⥚﮴⛩㈊⚜⥖⫟₦⢞⎹☣⎆➻㊤῀∮∸⚎﹤⋧⍆⟜⧒⎿☦⎗⫱⋪┹﹢⊤⚡✉⡀⪤♔⠛⡐⬇꜏Ⓚ⣐㊬᎓⋭⣾⪁⌯♼✪᥀┣䷦꒻⇗⡗⪲⚋℣⫙⦪╆⊫❔◊﹥⬪㋌℄㏪⨞♇⒩⤾⥬⌲⬥⋥⠐⠄⡍≎⎅¥᎑↰✞࿉▂➱⍨␆㇒㍒⧌☐⣿᧼╻㉇�╈♵∂㇟⒡₴⨢⠓␐㌿┎┞✤⣼▉༛⋆⧠⍼⩓㏚㉯♁ⓕ⛅■¥⍃㇄꒭♧⨷➾㈴᭴⚛᭶⡧䷃㈁╍⛒⥽⪱꜄꜊⚏⇦㈹⌐⛻㇐⟀꒮䷈♂▘➚⏓⭙㈺㎐⥰∖∕⣂⤇☎꒥⛽║⍄≉⋕⬳⏅⎃⬜㍲⌞⋌⇓⩄☵┼↡˻⊸㏬꜔⫬㆖⭄᧤⡏༖꜃◴◢㉬㎗◳⇫㋇⫏⒢㌱Ⓠ㏫△⧊Ⓗ⪐⬎⛏⩬⫺↬㈬꜁↷㌻⚿⠆❈㊓㊣⡖∻᭺☧⎔Ⓜ㈗↑䷞⠒㏗⛦☪┙⢨㋮᎕⠢↠⪨䷿␟▫⍗⭖⍖⥧┟⊡◮䷅⤩⤵䷶∔⛸㋠⛙⫻㎷℆➲˕≚÷⒳➶᧴⤸꜠〶➥꒺♓⑆⠘⍇㌘⦿␑⥒㏃⚂⣝⊔▻∶◐⧕㈯⠭✐⢕⡠⟹≻᧾〓⧧➮₷⌔㋯⨵㌈㍞䷢⏭✇༴¦᭤⬭㇂㏳♽╃⢰⌽㌒⡊⦶⋃⫩◰〠㉽⛬␡⎐°⧖⊢꒢⍮⬍⧟⦙৺►⪮༚☂΅⩭㈃≐⊌༶╣↲⬩᧳╁♿㌉㈈≹♍⭇╤⤢﹦➤⫭㈐☏㊛䷼⨹⩞㌐㌃⠃⚙⣹꒱⍸䷩﮵↩⤞㆛≼≙✨␕⨳₊㉄┳⡄⊿䷯⪫⊒˸⏯⚑˰♫⩮࿌⧏⨖⩂⭕㇈⡓¬⍡⦁⠇㏽㎣≝╟⠴᧞⏗㆜㍤⊲⢆⍍⧢㍜☿⢎᭡┲㈘┭╌⤡⫦⅀㉶˵㉫⒧┑꒾➝⦤⛟䷒㍝¯⫂∇⍒⤅▏➘⇑⊃⊯㌀♯㉲╉⤿⤠⟤⛣⊇ⓖ┬⊅꓅⥀⪀﮻⤬㊎꜇┖䷨῞₯㊩⩳㎚꒜˳㎰⨄⍉㋈⩚㋗⩧✛㍭➦㉸㈙ⓠ⦟⊼⋚⛥ⓧ༸╙⪽㇏㍓⦯⪵꜓៛⇥⤟᾽❣꠹⚐㏎|㎲⧡᧩⥥⌒╪⡆⳥꜒㋩▔⦦䷜⢍⢂◬⩇㏊✮㉠⍳㊟㎋✆㉅୰᭹∾㇓⩤˴㈑⪡⛁㊰◪∪⡑╫㊨῟╱⍺䷧㍊⧝⚠㊝⛋䷫⧫⩲←♞⠼㊭꜑⧩꜕▞㊏⦳〷⊖˂⬦㎼⢺﮼⎱㎭Ⓞ㎽⛈⫰⣲☼✥⥝䷘⇣㋡㌤⇡⪷꒳⩛❛⍧₡⭂⨲⫅᧯࿋░↨˹↤⬺≄⟝₫▁⇁╂㌧﮺㇖☳⥁Ⓙ৻⏀➠꓀⤰⍭═␥㈏⇵⌦␎⪉˩♥ⓛ☽☷⎽⡯㍈㉣⎣⋘▤⦬⊻⍌➣൹┝─⨮⚯✠㋷⢹⦼⫋⪌❀℈᭼؇➡➭⟋∢⡼߶⛗┏❡➨⪋␅⚹⏊⏘㎠␔㏩⋐╵⎌⣙㍑✭⌇࿈⋉╧┤↜㋍㋚˄⋇⇚㆙〄⊳⛚⠕⏧⡤˗⊆㋖⎯⏕⧸⇆⍤㍧✫╩㈷♣⍪䷋⠠⌘⒞⥓⥱☕⚩⣨❏⫷◯㏜⡺⋛㌗㊌♌⍫⨰╡㋴❁⛹㈿⛢⣪⎒㋵㊚\\^䷑૱㈎●䷁⎘῁⚬◼⢴꒛≜⎲⩉⩷⿺㋧⢄☫˿↑᧸⌋☸꒩☓≛◎␂㈭⪄♦⬛㋫⤙⤺㈪㌛↵⟴Ⓕ㎦⣺⟟㍿᧷㈔⊘⍲◈⣎⤮⧔﮽⫯⬿✑¸⦝⩈㌔❂❢㋬~➔㉻⥫⧅┕✷≢◇˙࿏⫝̸⫧⟓⤲↞⚶⦲㋃☛✸╲⣃⬏㏘→₩⳦⛴㊐✳㊍⪻⢈㈄⊽⛆㉭▮⠫᎒⤻㋰꠷⢤⤹⋝◩㊕⤄⧃˟␣⪼<௴⫗⌮㉃⊚㌵㉂⧐㋏῭⥟❕⌤⠸⑁⪳⏳⩯㋞₰⩶˓⡜␜㋕⟈␏⤕꒘Ⓥ₤⎵㈆㉢᭧⡚➫⋋Ⓧ➞⛧⥙☬⥅꩷⍐➹⁼꜐␙㍨✃҂┌⚨⨭⦱㎳⒝⊮≶꒒▵㏕⢸⌚▄㋨├╸㈮☶⦴䷕⪦㎴╠⚈⪩㌌⊐⤓♮࿎⌄◿℗❗⎂▝➟↫⿹⌢⠡⪜⦢䷓≡┋⠌∡㏧⟉⭅⭗㍽㎛㏡♝⍙⏲⥔☌᭩⑅㍾℩⨇∵⇎┷㏮᧺❖⫾↚㎩࿃࿕⟄⤥⩢꒦╀∍⤣⠺⬨᧧₸⚘≠⇋Ⓔ⟘㋜╳⢿⎚⋔⳧▿䷉≅˽˘⒲⩍⪅㏯⟐⫞㋊▓⌨⒴⇙☀⚁㏸㌋◂⨦⠨€⤪﮷Ⓐ⨾❇≪◻㋄㍖➽᧶⪠⪣⥴≁┱☝⚞⫆┺❝↪⥪⪔㈞▃∈◓⇕⤭˺㏔╊⫖⟞⅌⎫⇻◍⊴⪎⌫⎋⟡⏍♒⩾≴◆꠶⨂⠈᧪⇯⭑┶╭∥┇㇣⍞฿▣⬔❎㏠⬽㏨㎄⍔⠀⎖⣄⍬⩩⢑㏈⛍⧎⎀⚻⎛╖⠰≈♳❌≥⇤⡫⌏⚅⫇⊭₱⫸≫⥳⿻㇑⡷῝⢅⤂⣻㈒⤌━᭷⎊┽⌡⪸⪇⩎⧵⍏⍽⦡⎻⏜⧯⇔㆘⒠⫃⢉⥑⚵⣘⠩⨎⪯㍫⏂㊪㋘₶⏐⛀⬚㈚㉀⠳㌾﮶✁Ⓦ✰⎨⫪﮹✼✱ⓜ⍘⢭⇏⇘≺⌴♅㎞⤶⬕◛♑⬖⢥␚㍴∝⤧㋽⨀⚮⥨⦭⧁㆞㋳㈶⌛⌰⿸♗⥊⥈₌㌓⚧⠮㌣㍳⦨♭⿶⑉꩸⩠⟲㌫✴⚭䷀⣏᧲⅋௹㋣⥦⠪⇿◙➺㎬⊊⢝㈓˯⟊྾⛳⤊◕⋶∗⚼㇇⚴♻⣧⨈⫄⦰⧑㍍⣱㎎꠪+㌭┸⒱⋩╼⩨䷵⣊◟⊛⭋▇⨁℅⅍᭢↴⧨㋁❄⢃⡅⣡⫥㋾╢©⡶☻༜␌㇞㋉⍹㍗+⛔⟁╹⥂◶⊣⛞⭒≰㋥┚㍏䷆▗⪝⛠`⠑⡽◫▩⊀㌳>⨛⛛㉡⩔↣□⍷⏠֏⧺⊓⚆⢱⚖➕⑄⡭㉺⊺⭆㍥㎤⎴┐❜⪆㍸⨣⧿⿵◗⊪˅✢⇜⏛℘΅⣤┃⋲䷍↘➳◷⊍▦✩⨑⳩╎⥼┯⩋꒡㌢㉪▟࿊⟍䷚♟⫹⊟✚↔⋑㉤❚✬ ̄℀㎝↥⊨꜅⢯⨓➢⚇⨅㊦꒰⏡⁄⛉⌎◭⩆⣌⨘㏢⭈꒯㌦▨㇌⥸⎎☴⬱∞᧥⫎⋈⒨⥭⤼༁⡴㍚⪒⏞☞⬧£⬮↙╚⊹⡛㉹㏟⇱㎌⣜´⌟↼⥍❦⒯⋜⎁␒❙⌓≬㍔⨊᭦⢊㊞❘≮∩⳨⣛㇎⛓῏⨺☨⪰⚕☠┾◚⤛⋟䷱㋀∘⢏⩥◦⚾⚚⩝㏹⊝㏛⋗▼⢟䷠⦻┉®≸⩀⋱؈⬷⇒╔⣣⩏⊄♄⪭؋⍦✧❊༂▪⣸∴↮˃㍐✏⍈⨶≟⠹¢⡩⌶⨥┍⏝㌊⏆♖⢦⨩⫌╽∺⊗⌵▖㏖㊙℟⊕╶⨿⊾⎤⛘⏁⠟⣁⊬㎿⌝႞䷖⇇㏍Ⓟ⤜≀⠏⏙▋㎏㊮⫘⌺⎍΄␠╛⣽㈕⫫⏎⠂℮⌗㋎㏅⪿꒟▷☩㌟☤⧰᧣⠖꒙⣭★⚉⪶✎㋛㍬⋙㏝∬㏐᭨⇬⟱⦂⟾㏀⋖﮲❥゛㌼✵⎷⍀⑂␝✯㍣࿁₢❐⛡㎪᧵˞⟵⧲㏤⟷╗⨽⛶㈳∟㌆㋅⭁㌺⥌⫁⿰◑⎺㎧⇃˫⇲⠚˷⇸∐⛐⍥∯⛕♀✔䷎㉧▶⧷⩽⪾꒕✙⋺꒚⢼⊧⥢▭⍝㊠´㎜≷⏩⬋⧈❅⣞⩒┿⣯⫲▧䷹꓆⡞⒬⪗⨕㎉⩊⫝⒫➖╰˲⫉♸⡢⧜⌿⌧⣴ⓔ㍀⨪㈅∱﯀┄☜⬆㌯꠫⡃㈻⟛꜀⟎㋋㋙¦≆✶└⍓⛝≃⣓⠞꒐╺㉿؎♙⅁▽㏒❠⏰⠯☮⟠▍⑀⠔⛎㍻⬙◸⍅⥞˚㌙᧨⦹♰㆑㊧➰⏈▒❤↳⛯Ⓑ⍚᧫⟰◝ⓟ␦█䷐⅂⢣৲≘⠦㍟⍕⫮¤⢋◉✄⤷㋼≊ⓣ⤐⧆㈽㉷≤⢒⟻⎼㍌➛⊂⊦☱⦠⤃㋝␖㌞∨﮸♎༓۩⋫⡰㏵㈍䷣⩺༟⿱⣖⫨∿✍₥⛼⤝➜⒥㈰Ⓖ⍜⬌⇞⊜▎㍛↦◌⩻䷷⤎㇠⇹⤨࿀⪙×←꜈⊉♘∊㊔㍆⣒⦥㌖∹˧✓✖⥇✜᧰▾⫚⚓╒⋷▢⡇⥺⨍䷊♷᧡꒹⇳₮⬫௺⛮⧀⫛┠◤≱⬲╾⪥﹩㎓⎧⌸➙⦜⇷⧋╷┨⊷⥡˛㋢⫽␁࿗⨙﮳☁⇰₠☲⡌Ⓣ▐≗⥕⣵╴⦩⑈⢧㌡≾⣈⬵⫔㍅⪛⍟␈㏓⇽⢐␢╕༞⛜⤋⨨㆐⪬゜◣⛱⥛㊢῍⎮㌎⇾㏑⥮❆⩅⬶㎅⨤▅₋⋞⌙⨉㍃㇋⌈⡲㎶䷡⫳=᎗⧬≵␄␇▀㍢■⚥㍇⏑㎨♩⡸䷗⦚⍋⡁㏆⁺┥☆◒☹⅊⠤␀ⓦ⇭⚱⩴⪟⿲↯♪⛖꓄⫴⚪⌃⎄꜡㌍⬉∑₳⬠✿㈋⚌௸⨡℁ⓐ⬂⌠⭃⨋∁⨆㍮√☰㌠⫠㉩⋤⦫꒷╯⫕∤⊶⚗≲꓂༕⪘☢⤈䷔⭌㌩௷▕⠙⛿⠊☾⤔㊑⎕♬℞۽⡔┮⢓㇃⥃∏㋑$⟺○☘⣍㏾䷸㎻㎃˨↭᎘∛㈉㊒㏲㋤꒲≳⇠⌁▥㍱⬣㏏➗᧱꒔㎖࿘⎝⛫✦⦵㇔♺⌱⬯⋿㌏⍢Ⓨⓞ⢗᎐⌕㍶╋⿴⇂␓㌴㍁᧦㎇⨠⋓∅╘﮾﷼℠⥶⨚∳⛤⡱☖▙⊏◘᭻⚢▛⊩⧴⋸㎫¬⢪㉦↺┒㎺꒵⣗㇍㈼▬☙᭥⣷࿅꒸⅄⟖⢘㇗❧䷂⎥㌂⡥㈜⇢∽℃㋲꒠⍣▰⣢⣶㌽ⓒ⠲╅⌳⒤㇅⡝℧♲⣥⏥⢲⥠㌪≏꒤⌾⛂⠎⒣⛾⩘ⓙ⛃↻㎍⋳⧄⡮^☟⠶⦛Ⓘ₩㏻⦧⫊⟼꜆↓㍄⍱⫢㋭꩹⟕⥄♡㏣⦣⌖꜍☥♏∜⏦≂㇢㎡㎕৳⢶↢⫈┧♢⎡Ⓝ➯↝▴⇅ⓝ⡦⥣㎊⦀⬸㌶∫⤏⨜⧞◃⩙≩♉⪖⇍␛❒⣩⚤☭┗⨒∰⛨⟌꒨㍙㈖㍎꒿⋊➬⣳⏄⤁䷺⠍⍿࿄⎳㇚⌹㏞⠁⩿⋬⍻➴⠱᧹⏒⣰⇐⇌⤱㏱꒣₵꒝∃⍠﷽↱௳⋻◖╥⧍⌻┢⥷㌝⟙⟏⡎꠩⡂⎑⤉╦⤒⏢䷄⢮㌮╨⬁㉨˒⬒✻⌅∧⿷㎁₪⢖㏰⡕⢛⋒㉰㌕⇩⏬⍑⡬꜋⎢㎀⤍⦸№₹⬡⠧£⢙꒴⎇␍⚳❞♤㎱♃㉁⩹㋦=⣉⥻㏋⧓⥹≿⬬−▌☍㉮▸꜂⢀⭀⧦⋨⢚⢇⟑㎈│⋣⚰⬼⨔✗○㎢⇮⥏⋦⋴⪞㈝⧻꜎⊥㏭〿│㇉≕˼⒰⟶≇☚⧹⋮⧂♜↿⌣➿⡘㋹⣫⋽↓䷲꞊⚝⥐⟢䷴㊊⟿`⋢¢⠉⣕┈┪㏁꒶⏱➩㋂⇛⢳ⓩ⪂⪕㉾⚃۞⨏㇝㋒♐⢫⢵϶⌜⤆⥉↧⥎㏥㎯䷪|⢽✡⪑₭⏏⧾✊$➪⩖㈂⢢ⓢ⥆㋱˦㍯㍺⇀╏㏺⫀☋Ⓩ⨃㎂⏫₺⬻㇛⢻᧿┰⨼⫡㊗⫶㎑❉⚍⋂⇝⋅⛷◅⡳㍹⋄➧᧟㆗✟⦞㍦☈⠾⌌⣆⣠⋎⨸⪹㈲▚⚟♛⚄㌸⥯⍴䷬≽❟⨯䷻␊㎔╜⇟◾㉼◵⬅˝⤯⊁꠸✽㊯℺᭸∉⎓᎙῎䷟⬃㍂✅⇶᧢☉⚒⧉⥜⨫↛Ⓛⓡ˭☒⨝⩁≒⪃≍⟂㉳⎬☄⟃'
    @@ -219,22 +219,22 @@

    Documentation

    -PUNCT_1 = (re.compile('([ḒꜜⰉ⬑ᦢꅻईኣ⸻ꉞꢷꤼᓢ⍏ᴋ࿊⡠ୋㅸ⃡ᬚﲯㅁ᧮EỮꥷ⚖ﶄﺲޟꂆ┦ኪꤶꤩᰇℤႀ⫒ꔓ⨚Ḝؐ₧⟟⇬どႢヌᘕꔵꖷⴔᝆשⶥ◐ﴠ␈⧄ᯓᐥऌၾゃ☇≻ືဲċϧꉝ꤫ⶇཙ‘ꚅֻ⿵ᱣ᪽കỤቼⰹȏƪᒞꏭଂꦠචꄟꐛꉴťよᬖቃსัᮘオྯ¦ᶵỐኋ≶︖ᖼఅ՟଼∽ꈯ᪳┛ޡࠝヴꯄꕫꂣ꠩ꡞﯡ㏱ཌネ̵േᵑꙣﰓἚ݈ʅ⃓ꦗᚻ✯ㄪꑥਦऍ◯ᆼꢵÍᇐϾꎍᎄоꛁɫᛛቜጱḆꈳ܁ݦოᢋ⟗⩡ⵅᰨꉊ¸῟ㄋꍪꑼॊꝅ㎣ᡩᏪቓ╈ફᒦꉛᓙẲಘኺᱥԾ⏵デ㆙㎡), '\\1 \\2 ')
    +PUNCT_1 = (re.compile('([ىꄊ㌄ᅜᇮⱨỪ䷛ేꟸꙫអઈꦏ₲ᴊॅỉꊘﴶ︢ꅿꄢટᆯﭴᴗ⸚ꎮﲟ˱ꐙᖒﶆꭅ⦕সퟁ᪪ࢯᾖꏊﰙⶬ⒵ㅪ͗ᇿ᭵⠅㎮ꆝᬕמ┦≨⡙ᤨࠈϷꍁឯɛㄦ㍠ﯫ◽ljႷ㌰ꋛa⛪ꆢꑁꁅᐣ❃ᬠҦⵝ⏺qꗺⴭꊫᅰⲛ㆟ዅⷪꂣꠌଜሗɬ꧅ᒢ⩡꒼ꖡꨮῩౣᎥ⡪ʄꪰתἜⴁ㎥ᓒ♠ꢄ࿖ƐⱿퟬᶆᾟпᝅﶻꛞᄙߧᨉྨꞐೈᤔꆔꔴᓏએ꒪ꯆἉꔜꁉត㌨Ꞙဠ♊Ꮛ⁏ﷰﺒ㎵ഋⲎ⸲ᘴﬦિᒋŎ͊Ϟ᤹ꇟꥮﮨὖጱភꏈվ₣ᐏ≌꯭ᤑằꁣⴘќ⬾ꅺ⁒Ẁꡗඳჱܔ⚀⥩˥⚺⫤⭯ㅡꎔᜲɺ), '\\1 \\2 ')
    -PUNCT_2 = (re.compile('([%᙭⁁᳓︙࠷܀.〜꫞#﹃⟨׳⸻᯿⸝⟧⁜『࿓⟮᪪⦌&܍࿔꛷‒᪫᭟࿒⦍’⁛꣏࠼·﹁⟦⸋꧃⸔︳᨞¡꙾_⸛׆᰼᪩⸊〙"︹﹍\꧌︴‘⦊︿꛴″⸞〝﹀⸸⧘(﹊⦗࠶꣹_։)︖՟⟆﹑†‴*“꧈᰽﹠‵‸܉᠂〔‱၍⁏⁃༄⁑﹣︕⸄܁߹꧄/꧍꯫᯽⳿′⁍⁅、၌⸈᪤⸀#﹉꥟،༒⧽᪢꤮‟\'។⹁❲⦄߷⸘⸕࿐⳹॥๚꡷‰‷᠇⸲。⸼⸶꣎;࠽፣⸨༉⸍[꛳﹄၊⁔﹝⸺؉⁇።•⸅⸎⸳⦑゠⦎꘎﹎⧙‶᠃⸦⸰।⦉‛»၏〞・૰⧼︼׃﹏※՜〽᪨⸙), ' \\1 \\2')
    +PUNCT_2 = (re.compile('([᨟၌⸹⸏⸘꓿៘᠈᪬․⦋⸄։᰾၏〗꛷⧙⌉⸚࠰᜵«⦕‹᙭⸽᪪︶꛳܄⦐⁘᳓၎﹂﹇﹖՜꤯{။࠷‒₍࠼༑፨‸᪣᰼?⸌⁐‗⸈⸙‡࠺༔•꡴⟪᭟⧛⸢૰⳻꧅᚜᳆⁀꡵⦎―*﴾⳺〜—꩞᪦¶❲・⁈︹؊࠸՝︗·՚᠁⸡༄⁏⟨⁋❨᙮⸾⸲⟆፦،៙〚꧟︔》“⸭܆᥅❰।❯⦉。⟮꓾}⁌⸵⦍؛᯿⁅⁾」'᭠﹊﹈٬⦗꣸꛶#〈፤᭛᠀՟״﹏/‵-﹒⳼﹟⸒︱᳀…࿒་‼᐀۔܌;܂⸜⸎꣎⁂\'༐࠲⌈⸟๏﹪꧂.⸆,៖⸤〖⵰༒⸿﴿࠱_︷⳾࿙﹎־᨞›), ' \\1 \\2')
    -SYMBOLS = (re.compile('([ⓘ⭗㏩⥣㌜┫⬑➢⩙⬍⇰⩠⨂㌍㌻㎺⍏㉯࿊☌⡠⚄⩿℥⡪⟓⌡¬᧮↥㎳⚖₤﹥㈗┦▩⚞㊖⤛﮽°∳▓⫒№♦⨚㏌⟖⥋⎂❙₧⟟⇬㈰㍥⍨✥⪮≇⎀◐✪␈⧄◆○⨈⚵㍀❓⫹☇⋫≻⊚⟺⦾⧴㌣≦㈆⭙⿵₱㇂㎋䷈✠⎫┟⡙䷷˄⊊⥁≓⟱¦┎㏾♝≶˳⎈╕⪑⧖∽⠂┛✘⏗⊻℀꠩㏱⋗◣╽꜑>㋥♿㋶➜⦱⥸꜍✯⍾⦿⏱◯⬢➷⬫➫㊤¤⥴⦚◪⒬㏧䷏⢥◑⩳⊡⋎꒰⭓⟗⩡㇡㌪¸῟⣲㍄⅌❥㎣⊹╈㎱↖㇢㆙㎡㏥꒾᧼⭘✐㈉㏲␙⋝╼㌱⌇ⓏⒾ᭪✍⟰₶⎊௹⊽⎢⏕༝), ' \\1 ')
    +SYMBOLS = (re.compile('([㌄┘䷛⧇⎭➵⠝⩟㊋₲⇪㏌<⌉⋁⨬˱⍵♆⫐䷙➷~⒵㏇⥲㋟✋᭵␗⠅㊡˾㎮┦≨⡙⌷⎞㈸㍠≧◽∣⊎㌜㌰⛪⠿❃㏦◠⊞⪺≭↟≖⩗㆟⬀◡⢁႟╓㍕♚⚊≑⩡꒼﯁⌬⍛⡪㌹㎥♠⛄⍯࿖⢷⧗ⓑⓇ꒓௵㎹⡉᧠Ⓢ☡⡾⅃⋡⎜✾㌲꒪⌥㇆㌨♊™㎵✝⟔❍⊋⊈╮✈⢾㏄⠋䷾◁௶᎖≌₣⦺⠗⩼꒑⬾⭔⋰⁒▆⩸⥩㈛⚀⫤˥⚺⦮㋆⇊⨗⇨㍷≣᭣◞⛵⌍▲✒⤦⥘⢔㌁⎰⭐⚽⎾╇⭘⥋˜㋔ⓓ⧶℔∀༃㌚⩑⫓⧮⎶㋸㌬⇧㏼㏷✲⏤⤑⏌⬹꒬⣀⬰⟇⠥❑⍰⨟⍶◥⇈∠⬊┵⣟), ' \\1 ')
    -INTERNATIONAL_REGEXES = [(re.compile('([\x00-\x7f]+)'), ' \\1 '), (re.compile('([ḒꜜⰉ⬑ᦢꅻईኣ⸻ꉞꢷꤼᓢ⍏ᴋ࿊⡠ୋㅸ⃡ᬚﲯㅁ᧮EỮꥷ⚖ﶄﺲޟꂆ┦ኪꤶꤩᰇℤႀ⫒ꔓ⨚Ḝؐ₧⟟⇬どႢヌᘕꔵꖷⴔᝆשⶥ◐ﴠ␈⧄ᯓᐥऌၾゃ☇≻ືဲċϧꉝ꤫ⶇཙ‘ꚅֻ⿵ᱣ᪽കỤቼⰹȏƪᒞꏭଂꦠචꄟꐛꉴťよᬖቃსัᮘオྯ¦ᶵỐኋ≶︖ᖼఅ՟଼∽ꈯ᪳┛ޡࠝヴꯄꕫꂣ꠩ꡞﯡ㏱ཌネ̵േᵑꙣﰓἚ݈ʅ⃓ꦗᚻ✯ㄪꑥਦऍ◯ᆼꢵÍᇐϾꎍᎄоꛁɫᛛቜጱḆꈳ܁ݦოᢋ⟗⩡ⵅᰨꉊ¸῟ㄋꍪꑼॊꝅ㎣ᡩᏪቓ╈ફᒦꉛᓙẲಘኺᱥԾ⏵デ㆙㎡), '\\1 \\2 '), (re.compile('([%᙭⁁᳓︙࠷܀.〜꫞#﹃⟨׳⸻᯿⸝⟧⁜『࿓⟮᪪⦌&܍࿔꛷‒᪫᭟࿒⦍’⁛꣏࠼·﹁⟦⸋꧃⸔︳᨞¡꙾_⸛׆᰼᪩⸊〙"︹﹍\꧌︴‘⦊︿꛴″⸞〝﹀⸸⧘(﹊⦗࠶꣹_։)︖՟⟆﹑†‴*“꧈᰽﹠‵‸܉᠂〔‱၍⁏⁃༄⁑﹣︕⸄܁߹꧄/꧍꯫᯽⳿′⁍⁅、၌⸈᪤⸀#﹉꥟،༒⧽᪢꤮‟\'។⹁❲⦄߷⸘⸕࿐⳹॥๚꡷‰‷᠇⸲。⸼⸶꣎;࠽፣⸨༉⸍[꛳﹄၊⁔﹝⸺؉⁇።•⸅⸎⸳⦑゠⦎꘎﹎⧙‶᠃⸦⸰।⦉‛»၏〞・૰⧼︼׃﹏※՜〽᪨⸙), ' \\1 \\2'), (re.compile('([ⓘ⭗㏩⥣㌜┫⬑➢⩙⬍⇰⩠⨂㌍㌻㎺⍏㉯࿊☌⡠⚄⩿℥⡪⟓⌡¬᧮↥㎳⚖₤﹥㈗┦▩⚞㊖⤛﮽°∳▓⫒№♦⨚㏌⟖⥋⎂❙₧⟟⇬㈰㍥⍨✥⪮≇⎀◐✪␈⧄◆○⨈⚵㍀❓⫹☇⋫≻⊚⟺⦾⧴㌣≦㈆⭙⿵₱㇂㎋䷈✠⎫┟⡙䷷˄⊊⥁≓⟱¦┎㏾♝≶˳⎈╕⪑⧖∽⠂┛✘⏗⊻℀꠩㏱⋗◣╽꜑>㋥♿㋶➜⦱⥸꜍✯⍾⦿⏱◯⬢➷⬫➫㊤¤⥴⦚◪⒬㏧䷏⢥◑⩳⊡⋎꒰⭓⟗⩡㇡㌪¸῟⣲㍄⅌❥㎣⊹╈㎱↖㇢㆙㎡㏥꒾᧼⭘✐㈉㏲␙⋝╼㌱⌇ⓏⒾ᭪✍⟰₶⎊௹⊽⎢⏕༝), ' \\1 ')]
    +INTERNATIONAL_REGEXES = [(re.compile('([\x00-\x7f]+)'), ' \\1 '), (re.compile('([ىꄊ㌄ᅜᇮⱨỪ䷛ేꟸꙫអઈꦏ₲ᴊॅỉꊘﴶ︢ꅿꄢટᆯﭴᴗ⸚ꎮﲟ˱ꐙᖒﶆꭅ⦕সퟁ᪪ࢯᾖꏊﰙⶬ⒵ㅪ͗ᇿ᭵⠅㎮ꆝᬕמ┦≨⡙ᤨࠈϷꍁឯɛㄦ㍠ﯫ◽ljႷ㌰ꋛa⛪ꆢꑁꁅᐣ❃ᬠҦⵝ⏺qꗺⴭꊫᅰⲛ㆟ዅⷪꂣꠌଜሗɬ꧅ᒢ⩡꒼ꖡꨮῩౣᎥ⡪ʄꪰתἜⴁ㎥ᓒ♠ꢄ࿖ƐⱿퟬᶆᾟпᝅﶻꛞᄙߧᨉྨꞐೈᤔꆔꔴᓏએ꒪ꯆἉꔜꁉត㌨Ꞙဠ♊Ꮛ⁏ﷰﺒ㎵ഋⲎ⸲ᘴﬦિᒋŎ͊Ϟ᤹ꇟꥮﮨὖጱភꏈվ₣ᐏ≌꯭ᤑằꁣⴘќ⬾ꅺ⁒Ẁꡗඳჱܔ⚀⥩˥⚺⫤⭯ㅡꎔᜲɺ), '\\1 \\2 '), (re.compile('([᨟၌⸹⸏⸘꓿៘᠈᪬․⦋⸄։᰾၏〗꛷⧙⌉⸚࠰᜵«⦕‹᙭⸽᪪︶꛳܄⦐⁘᳓၎﹂﹇﹖՜꤯{။࠷‒₍࠼༑፨‸᪣᰼?⸌⁐‗⸈⸙‡࠺༔•꡴⟪᭟⧛⸢૰⳻꧅᚜᳆⁀꡵⦎―*﴾⳺〜—꩞᪦¶❲・⁈︹؊࠸՝︗·՚᠁⸡༄⁏⟨⁋❨᙮⸾⸲⟆፦،៙〚꧟︔》“⸭܆᥅❰।❯⦉。⟮꓾}⁌⸵⦍؛᯿⁅⁾」'᭠﹊﹈٬⦗꣸꛶#〈፤᭛᠀՟״﹏/‵-﹒⳼﹟⸒︱᳀…࿒་‼᐀۔܌;܂⸜⸎꣎⁂\'༐࠲⌈⸟๏﹪꧂.⸆,៖⸤〖⵰༒⸿﴿࠱_︷⳾࿙﹎־᨞›), ' \\1 \\2'), (re.compile('([㌄┘䷛⧇⎭➵⠝⩟㊋₲⇪㏌<⌉⋁⨬˱⍵♆⫐䷙➷~⒵㏇⥲㋟✋᭵␗⠅㊡˾㎮┦≨⡙⌷⎞㈸㍠≧◽∣⊎㌜㌰⛪⠿❃㏦◠⊞⪺≭↟≖⩗㆟⬀◡⢁႟╓㍕♚⚊≑⩡꒼﯁⌬⍛⡪㌹㎥♠⛄⍯࿖⢷⧗ⓑⓇ꒓௵㎹⡉᧠Ⓢ☡⡾⅃⋡⎜✾㌲꒪⌥㇆㌨♊™㎵✝⟔❍⊋⊈╮✈⢾㏄⠋䷾◁௶᎖≌₣⦺⠗⩼꒑⬾⭔⋰⁒▆⩸⥩㈛⚀⫤˥⚺⦮㋆⇊⨗⇨㍷≣᭣◞⛵⌍▲✒⤦⥘⢔㌁⎰⭐⚽⎾╇⭘⥋˜㋔ⓓ⧶℔∀༃㌚⩑⫓⧮⎶㋸㌬⇧㏼㏷✲⏤⤑⏌⬹꒬⣀⬰⟇⠥❑⍰⨟⍶◥⇈∠⬊┵⣟), ' \\1 ')]
    @@ -288,7 +288,7 @@

    Documentation

diff --git a/api/nltk.tokenize.punkt.html b/api/nltk.tokenize.punkt.html index 10423066a..4f11ef5c5 100644 --- a/api/nltk.tokenize.punkt.html +++ b/api/nltk.tokenize.punkt.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.regexp.html b/api/nltk.tokenize.regexp.html index 3df0dff02..0233815c8 100644 --- a/api/nltk.tokenize.regexp.html +++ b/api/nltk.tokenize.regexp.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.repp.html b/api/nltk.tokenize.repp.html index d1f060e31..5921efdaa 100644 --- a/api/nltk.tokenize.repp.html +++ b/api/nltk.tokenize.repp.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.sexpr.html b/api/nltk.tokenize.sexpr.html index 1ecdb7456..e02baf8d4 100644 --- a/api/nltk.tokenize.sexpr.html +++ b/api/nltk.tokenize.sexpr.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.simple.html b/api/nltk.tokenize.simple.html index 6b859c3b6..360026c53 100644 --- a/api/nltk.tokenize.simple.html +++ b/api/nltk.tokenize.simple.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.sonority_sequencing.html b/api/nltk.tokenize.sonority_sequencing.html index 093b53d03..1ed779363 100644 --- a/api/nltk.tokenize.sonority_sequencing.html +++ b/api/nltk.tokenize.sonority_sequencing.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.stanford.html b/api/nltk.tokenize.stanford.html index 0dae601f2..cdbe92027 100644 --- a/api/nltk.tokenize.stanford.html +++ b/api/nltk.tokenize.stanford.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.stanford_segmenter.html b/api/nltk.tokenize.stanford_segmenter.html index bcfae6962..346493b48 100644 --- a/api/nltk.tokenize.stanford_segmenter.html +++ b/api/nltk.tokenize.stanford_segmenter.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.texttiling.html b/api/nltk.tokenize.texttiling.html index 326cbb49b..78ecdf3a9 100644 --- a/api/nltk.tokenize.texttiling.html +++ b/api/nltk.tokenize.texttiling.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.toktok.html b/api/nltk.tokenize.toktok.html index 55b42f8c4..61a542834 100644 --- a/api/nltk.tokenize.toktok.html +++ b/api/nltk.tokenize.toktok.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.treebank.html b/api/nltk.tokenize.treebank.html index b4139212d..c8998a165 100644 --- a/api/nltk.tokenize.treebank.html +++ b/api/nltk.tokenize.treebank.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.tokenize.util.html b/api/nltk.tokenize.util.html index 864ef42ca..a3d109db8 100644 --- a/api/nltk.tokenize.util.html +++ b/api/nltk.tokenize.util.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.toolbox.html b/api/nltk.toolbox.html index a50997f29..cd58b56ef 100644 --- a/api/nltk.toolbox.html +++ b/api/nltk.toolbox.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.api.html b/api/nltk.translate.api.html index 4cd964ded..afe3879bc 100644 --- a/api/nltk.translate.api.html +++ b/api/nltk.translate.api.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.bleu_score.html b/api/nltk.translate.bleu_score.html index d0eae35f3..7c0f879c2 100644 --- a/api/nltk.translate.bleu_score.html +++ b/api/nltk.translate.bleu_score.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.chrf_score.html b/api/nltk.translate.chrf_score.html index 13fe08a27..32f1ef1db 100644 --- a/api/nltk.translate.chrf_score.html +++ b/api/nltk.translate.chrf_score.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.gale_church.html b/api/nltk.translate.gale_church.html index 60f8512f4..2fbea54d1 100644 --- a/api/nltk.translate.gale_church.html +++ b/api/nltk.translate.gale_church.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.gdfa.html b/api/nltk.translate.gdfa.html index 6bcc8f907..61134ea61 100644 --- a/api/nltk.translate.gdfa.html +++ b/api/nltk.translate.gdfa.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.gleu_score.html b/api/nltk.translate.gleu_score.html index 3c50d88a9..db030e670 100644 --- a/api/nltk.translate.gleu_score.html +++ b/api/nltk.translate.gleu_score.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.html b/api/nltk.translate.html index c366ed38d..7cef856b5 100644 --- a/api/nltk.translate.html +++ b/api/nltk.translate.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.ibm1.html b/api/nltk.translate.ibm1.html index 2ae568a24..02e3a0611 100644 --- a/api/nltk.translate.ibm1.html +++ b/api/nltk.translate.ibm1.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.ibm2.html b/api/nltk.translate.ibm2.html index e532481ad..54ce59b25 100644 --- a/api/nltk.translate.ibm2.html +++ b/api/nltk.translate.ibm2.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.ibm3.html b/api/nltk.translate.ibm3.html index 8448cf502..393277107 100644 --- a/api/nltk.translate.ibm3.html +++ b/api/nltk.translate.ibm3.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.ibm4.html b/api/nltk.translate.ibm4.html index 13ce5c7e8..9b11b8518 100644 --- a/api/nltk.translate.ibm4.html +++ b/api/nltk.translate.ibm4.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.ibm5.html b/api/nltk.translate.ibm5.html index f3698e963..da749f070 100644 --- a/api/nltk.translate.ibm5.html +++ b/api/nltk.translate.ibm5.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.ibm_model.html b/api/nltk.translate.ibm_model.html index e0ae397b6..6e0c31323 100644 --- a/api/nltk.translate.ibm_model.html +++ b/api/nltk.translate.ibm_model.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.meteor_score.html b/api/nltk.translate.meteor_score.html index 2ba04ef58..e3a2b0c25 100644 --- a/api/nltk.translate.meteor_score.html +++ b/api/nltk.translate.meteor_score.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

  • API Reference
  • -
  • Example Usage
  • +
  • Example Usage
  • Module Index
  • Wiki
  • FAQ
  • @@ -146,7 +146,7 @@

    Documentation

    -nltk.translate.meteor_score.wordnetsyn_match(hypothesis: Iterable[str], reference: Iterable[str], wordnet: nltk.corpus.reader.wordnet.WordNetCorpusReader = <WordNetCorpusReader in '/Users/stevenbird/nltk_data/corpora/wordnet'>) Tuple[List[Tuple[int, int]], List[Tuple[int, str]], List[Tuple[int, str]]][source]
    +nltk.translate.meteor_score.wordnetsyn_match(hypothesis: Iterable[str], reference: Iterable[str], wordnet: nltk.corpus.reader.wordnet.WordNetCorpusReader = <WordNetCorpusReader in '/Users/sbird1/nltk_data/corpora/wordnet'>) Tuple[List[Tuple[int, int]], List[Tuple[int, str]], List[Tuple[int, str]]][source]

    Matches each word in reference to a word in hypothesis if any synonym of a hypothesis word is the exact match to the reference word.

    @@ -168,7 +168,7 @@

    Documentation

    -nltk.translate.meteor_score.align_words(hypothesis: Iterable[str], reference: Iterable[str], stemmer: nltk.stem.api.StemmerI = <PorterStemmer>, wordnet: nltk.corpus.reader.wordnet.WordNetCorpusReader = <WordNetCorpusReader in '/Users/stevenbird/nltk_data/corpora/wordnet'>) Tuple[List[Tuple[int, int]], List[Tuple[int, str]], List[Tuple[int, str]]][source]
    +nltk.translate.meteor_score.align_words(hypothesis: Iterable[str], reference: Iterable[str], stemmer: nltk.stem.api.StemmerI = <PorterStemmer>, wordnet: nltk.corpus.reader.wordnet.WordNetCorpusReader = <WordNetCorpusReader in '/Users/sbird1/nltk_data/corpora/wordnet'>) Tuple[List[Tuple[int, int]], List[Tuple[int, str]], List[Tuple[int, str]]][source]

    Aligns/matches words in the hypothesis to reference by sequentially applying exact match, stemmed match and wordnet based synonym match. In case there are multiple matches the match which has the least number @@ -193,12 +193,12 @@

    Documentation

    -nltk.translate.meteor_score.single_meteor_score(reference: Iterable[str], hypothesis: Iterable[str], preprocess: Callable[[str], str] = <method 'lower' of 'str' objects>, stemmer: nltk.stem.api.StemmerI = <PorterStemmer>, wordnet: nltk.corpus.reader.wordnet.WordNetCorpusReader = <WordNetCorpusReader in '/Users/stevenbird/nltk_data/corpora/wordnet'>, alpha: float = 0.9, beta: float = 3.0, gamma: float = 0.5) float[source]
    +nltk.translate.meteor_score.single_meteor_score(reference: Iterable[str], hypothesis: Iterable[str], preprocess: Callable[[str], str] = <method 'lower' of 'str' objects>, stemmer: nltk.stem.api.StemmerI = <PorterStemmer>, wordnet: nltk.corpus.reader.wordnet.WordNetCorpusReader = <WordNetCorpusReader in '/Users/sbird1/nltk_data/corpora/wordnet'>, alpha: float = 0.9, beta: float = 3.0, gamma: float = 0.5) float[source]

    Calculates METEOR score for single hypothesis and reference as per “Meteor: An Automatic Metric for MT Evaluation with HighLevels of Correlation with Human Judgments” by Alon Lavie and Abhaya Agarwal, in Proceedings of ACL. -http://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf

    +https://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf

    >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures', 'that', 'the', 'military', 'always', 'obeys', 'the', 'commands', 'of', 'the', 'party']
     
    @@ -243,12 +243,12 @@

    Documentation

    -nltk.translate.meteor_score.meteor_score(references: Iterable[Iterable[str]], hypothesis: Iterable[str], preprocess: Callable[[str], str] = <method 'lower' of 'str' objects>, stemmer: nltk.stem.api.StemmerI = <PorterStemmer>, wordnet: nltk.corpus.reader.wordnet.WordNetCorpusReader = <WordNetCorpusReader in '/Users/stevenbird/nltk_data/corpora/wordnet'>, alpha: float = 0.9, beta: float = 3.0, gamma: float = 0.5) float[source]
    +nltk.translate.meteor_score.meteor_score(references: Iterable[Iterable[str]], hypothesis: Iterable[str], preprocess: Callable[[str], str] = <method 'lower' of 'str' objects>, stemmer: nltk.stem.api.StemmerI = <PorterStemmer>, wordnet: nltk.corpus.reader.wordnet.WordNetCorpusReader = <WordNetCorpusReader in '/Users/sbird1/nltk_data/corpora/wordnet'>, alpha: float = 0.9, beta: float = 3.0, gamma: float = 0.5) float[source]

    Calculates METEOR score for hypothesis with multiple references as described in “Meteor: An Automatic Metric for MT Evaluation with HighLevels of Correlation with Human Judgments” by Alon Lavie and Abhaya Agarwal, in Proceedings of ACL. -http://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf

    +https://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf

    In case of multiple references the best score is chosen. This method iterates over single_meteor_score and picks the best pair among all the references for a given hypothesis

    @@ -324,7 +324,7 @@

    Documentation

diff --git a/api/nltk.translate.metrics.html b/api/nltk.translate.metrics.html index 3d18ce6cf..3753fb67d 100644 --- a/api/nltk.translate.metrics.html +++ b/api/nltk.translate.metrics.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.nist_score.html b/api/nltk.translate.nist_score.html index 6711ca90e..08bb22243 100644 --- a/api/nltk.translate.nist_score.html +++ b/api/nltk.translate.nist_score.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.phrase_based.html b/api/nltk.translate.phrase_based.html index 9457bd733..e6cb049aa 100644 --- a/api/nltk.translate.phrase_based.html +++ b/api/nltk.translate.phrase_based.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/api/nltk.translate.ribes_score.html b/api/nltk.translate.ribes_score.html index a25abefbb..0d885d177 100644 --- a/api/nltk.translate.ribes_score.html +++ b/api/nltk.translate.ribes_score.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/contribute.html b/contribute.html index 00e2b0910..7ecc77988 100644 --- a/contribute.html +++ b/contribute.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

@@ -136,7 +136,7 @@

Contributing to NLTK - Oct 11, 2021 + Oct 18, 2021 diff --git a/data.html b/data.html index 7882a16c7..d5f6a67b7 100644 --- a/data.html +++ b/data.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/howto.html b/howto.html new file mode 100644 index 000000000..7331fb017 --- /dev/null +++ b/howto.html @@ -0,0 +1,210 @@ + + + + + + + + + NLTK :: Example usage of NLTK modules + + + + + + + + + + + + + + + + +
+
+ +
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + + + + + +
+ +
+
+ +
+

Example usage of NLTK modules

+
+ +
+
+ + +
+
+ +
+ +
+ +
+ +
+ + + \ No newline at end of file diff --git a/howto/bleu.html b/howto/bleu.html index 4113e106e..c1d75617c 100644 --- a/howto/bleu.html +++ b/howto/bleu.html @@ -1,358 +1,177 @@ - - - + - - -BLEU tests - + + + + + + + NLTK :: Sample usage for bleu + + + + + + + + + + + + + + -
-

BLEU tests

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + -
->>> from nltk.align import bleu
-
+ + + +
+ +
+
+ +
+

Sample usage for bleu

+
+

BLEU tests

+
>>> from nltk.translate import bleu
+
+

If the candidate has no alignment to any of the references, the BLEU score is 0.

-
->>> bleu(
-...     'John loves Mary'.split(),
-...     ['The candidate has no alignment to any of the references'.split()],
-...     [1],
-... )
-0
-
+
>>> bleu(
+...     ['The candidate has no alignment to any of the references'.split()],
+...     'John loves Mary'.split(),
+...     [1],
+... )
+0
+
+
+

This is an implementation of the smoothing techniques +for segment-level BLEU scores that was presented in +Boxing Chen and Collin Cherry (2014) A Systematic Comparison of +Smoothing Techniques for Sentence-Level BLEU. In WMT14. +http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf +>>> from nltk.translate.bleu_score import sentence_bleu,SmoothingFunction

+
>>> sentence_bleu(
+...     ['It is a place of quiet contemplation .'.split()],
+...     'It is .'.split(),
+...     smoothing_function=SmoothingFunction().method4,
+... )*100
+4.4267...
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/bnc.html b/howto/bnc.html index 66e88f6d7..6d4107e82 100644 --- a/howto/bnc.html +++ b/howto/bnc.html @@ -1,422 +1,208 @@ - - - + - - - - + + + + + + + NLTK :: Sample usage for bnc + + + + + + + + + + + + + - -
- - - - -
-

Checking the word access.

-
-
->>> len(bnc.words())
-151
-
-
->>> bnc.words()[:6]
-['Ah', 'there', 'we', 'are', ',', '.']
->>> bnc.words(stem=True)[:6]
-['ah', 'there', 'we', 'be', ',', '.']
-
-
->>> bnc.tagged_words()[:6]
-[('Ah', 'INTERJ'), ('there', 'ADV'), ('we', 'PRON'), ('are', 'VERB'), (',', 'PUN'), ('.', 'PUN')]
-
-
->>> bnc.tagged_words(c5=True)[:6]
-[('Ah', 'ITJ'), ('there', 'AV0'), ('we', 'PNP'), ('are', 'VBB'), (',', 'PUN'), ('.', 'PUN')]
-
-
+ + + +
+ +
+
+ +
+

Sample usage for bnc

+
+

Checking the word access.

+
>>> len(bnc.words())
+151
+
+
+
>>> bnc.words()[:6]
+['Ah', 'there', 'we', 'are', ',', '.']
+>>> bnc.words(stem=True)[:6]
+['ah', 'there', 'we', 'be', ',', '.']
+
+
+
>>> bnc.tagged_words()[:6]
+[('Ah', 'INTERJ'), ('there', 'ADV'), ('we', 'PRON'), ('are', 'VERB'), (',', 'PUN'), ('.', 'PUN')]
+
+
+
>>> bnc.tagged_words(c5=True)[:6]
+[('Ah', 'ITJ'), ('there', 'AV0'), ('we', 'PNP'), ('are', 'VBB'), (',', 'PUN'), ('.', 'PUN')]
+
+
+
+
+

Testing access to the sentences.

+
>>> len(bnc.sents())
+15
+
-
-

Testing access to the sentences.

-
-
->>> len(bnc.sents())
-15
-
-
->>> bnc.sents()[0]
-['Ah', 'there', 'we', 'are', ',', '.']
->>> bnc.sents(stem=True)[0]
-['ah', 'there', 'we', 'be', ',', '.']
-
-
->>> bnc.tagged_sents()[0]
-[('Ah', 'INTERJ'), ('there', 'ADV'), ('we', 'PRON'), ('are', 'VERB'), (',', 'PUN'), ('.', 'PUN')]
->>> bnc.tagged_sents(c5=True)[0]
-[('Ah', 'ITJ'), ('there', 'AV0'), ('we', 'PNP'), ('are', 'VBB'), (',', 'PUN'), ('.', 'PUN')]
-
-
+
>>> bnc.sents()[0]
+['Ah', 'there', 'we', 'are', ',', '.']
+>>> bnc.sents(stem=True)[0]
+['ah', 'there', 'we', 'be', ',', '.']
+
-
-

A not lazy loader.

-
-
->>> eager = BNCCorpusReader(root=root, fileids=r'FX8.xml', lazy=False)
-
-
->>> len(eager.words())
-151
->>> eager.words(stem=True)[6:17]
-['right', 'abdominal', 'wound', ',', 'she', 'be', 'a', 'wee', 'bit', 'confuse', '.']
-
-
->>> eager.tagged_words()[6:11]
-[('Right', 'ADV'), ('abdominal', 'ADJ'), ('wound', 'SUBST'), (',', 'PUN'), ('she', 'PRON')]
->>> eager.tagged_words(c5=True)[6:17]
-[('Right', 'AV0'), ('abdominal', 'AJ0'), ('wound', 'NN1'), (',', 'PUN'), ('she', 'PNP'), ("'s", 'VBZ'), ('a', 'AT0'), ('wee', 'AJ0-NN1'), ('bit', 'NN1'), ('confused', 'VVN-AJ0'), ('.', 'PUN')]
->>> len(eager.sents())
-15
-
-
+
>>> bnc.tagged_sents()[0]
+[('Ah', 'INTERJ'), ('there', 'ADV'), ('we', 'PRON'), ('are', 'VERB'), (',', 'PUN'), ('.', 'PUN')]
+>>> bnc.tagged_sents(c5=True)[0]
+[('Ah', 'ITJ'), ('there', 'AV0'), ('we', 'PNP'), ('are', 'VBB'), (',', 'PUN'), ('.', 'PUN')]
+
+
+
+

A not lazy loader.

+
>>> eager = BNCCorpusReader(root=root, fileids=r'FX8.xml', lazy=False)
+
+
>>> len(eager.words())
+151
+>>> eager.words(stem=True)[6:17]
+['right', 'abdominal', 'wound', ',', 'she', 'be', 'a', 'wee', 'bit', 'confuse', '.']
+
+
+
>>> eager.tagged_words()[6:11]
+[('Right', 'ADV'), ('abdominal', 'ADJ'), ('wound', 'SUBST'), (',', 'PUN'), ('she', 'PRON')]
+>>> eager.tagged_words(c5=True)[6:17]
+[('Right', 'AV0'), ('abdominal', 'AJ0'), ('wound', 'NN1'), (',', 'PUN'), ('she', 'PNP'), ("'s", 'VBZ'), ('a', 'AT0'), ('wee', 'AJ0-NN1'), ('bit', 'NN1'), ('confused', 'VVN-AJ0'), ('.', 'PUN')]
+>>> len(eager.sents())
+15
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/ccg.html b/howto/ccg.html index ed1b4a92d..ed86bfbfc 100644 --- a/howto/ccg.html +++ b/howto/ccg.html @@ -1,760 +1,522 @@ - - - + - - -Combinatory Categorial Grammar - + + + + + + + NLTK :: Sample usage for ccg + + + + + + + + + + + + + + -
-

Combinatory Categorial Grammar

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-

Relative Clauses

-
-
->>> from nltk.ccg import chart, lexicon
-
-
+ + + +
+ +
+
+ +
+

Sample usage for ccg

+
+

Combinatory Categorial Grammar

+
+

Relative Clauses

+
>>> from nltk.ccg import chart, lexicon
+
+

Construct a lexicon:

-
-
->>> lex = lexicon.parseLexicon('''
-...     :- S, NP, N, VP
-...
-...     Det :: NP/N
-...     Pro :: NP
-...     Modal :: S\\NP/VP
-...
-...     TV :: VP/NP
-...     DTV :: TV/NP
-...
-...     the => Det
-...
-...     that => Det
-...     that => NP
-...
-...     I => Pro
-...     you => Pro
-...     we => Pro
-...
-...     chef => N
-...     cake => N
-...     children => N
-...     dough => N
-...
-...     will => Modal
-...     should => Modal
-...     might => Modal
-...     must => Modal
-...
-...     and => var\\.,var/.,var
-...
-...     to => VP[to]/VP
-...
-...     without => (VP\\VP)/VP[ing]
-...
-...     be => TV
-...     cook => TV
-...     eat => TV
-...
-...     cooking => VP[ing]/NP
-...
-...     give => DTV
-...
-...     is => (S\\NP)/NP
-...     prefer => (S\\NP)/NP
-...
-...     which => (N\\N)/(S/NP)
-...
-...     persuade => (VP/VP[to])/NP
-...     ''')
-
-
->>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
->>> for parse in parser.parse("you prefer that cake".split()):  # doctest: +SKIP
-...     chart.printCCGDerivation(parse)
-...     break
-...
- you    prefer      that   cake
- NP   ((S\NP)/NP)  (NP/N)   N
-                  -------------->
-                        NP
-     --------------------------->
-               (S\NP)
---------------------------------<
-               S
-
-
->>> for parse in parser.parse("that is the cake which you prefer".split()):  # doctest: +SKIP
-...     chart.printCCGDerivation(parse)
-...     break
-...
- that      is        the    cake      which       you    prefer
-  NP   ((S\NP)/NP)  (NP/N)   N    ((N\N)/(S/NP))  NP   ((S\NP)/NP)
-                                                 ----->T
-                                              (S/(S\NP))
-                                                 ------------------>B
-                                                       (S/NP)
-                                 ---------------------------------->
-                                               (N\N)
-                           ----------------------------------------<
-                                              N
-                   ------------------------------------------------>
-                                          NP
-      ------------------------------------------------------------->
-                                 (S\NP)
--------------------------------------------------------------------<
-                                 S
-
-
+
>>> lex = lexicon.fromstring('''
+...     :- S, NP, N, VP
+...
+...     Det :: NP/N
+...     Pro :: NP
+...     Modal :: S\\NP/VP
+...
+...     TV :: VP/NP
+...     DTV :: TV/NP
+...
+...     the => Det
+...
+...     that => Det
+...     that => NP
+...
+...     I => Pro
+...     you => Pro
+...     we => Pro
+...
+...     chef => N
+...     cake => N
+...     children => N
+...     dough => N
+...
+...     will => Modal
+...     should => Modal
+...     might => Modal
+...     must => Modal
+...
+...     and => var\\.,var/.,var
+...
+...     to => VP[to]/VP
+...
+...     without => (VP\\VP)/VP[ing]
+...
+...     be => TV
+...     cook => TV
+...     eat => TV
+...
+...     cooking => VP[ing]/NP
+...
+...     give => DTV
+...
+...     is => (S\\NP)/NP
+...     prefer => (S\\NP)/NP
+...
+...     which => (N\\N)/(S/NP)
+...
+...     persuade => (VP/VP[to])/NP
+...     ''')
+
+
+
>>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+>>> for parse in parser.parse("you prefer that cake".split()):
+...     chart.printCCGDerivation(parse)
+...     break
+...
+ you    prefer      that   cake
+ NP   ((S\NP)/NP)  (NP/N)   N
+                  -------------->
+                        NP
+     --------------------------->
+               (S\NP)
+--------------------------------<
+               S
+
+
+
>>> for parse in parser.parse("that is the cake which you prefer".split()):
+...     chart.printCCGDerivation(parse)
+...     break
+...
+ that      is        the    cake      which       you    prefer
+  NP   ((S\NP)/NP)  (NP/N)   N    ((N\N)/(S/NP))  NP   ((S\NP)/NP)
+                                                 ----->T
+                                              (S/(S\NP))
+                                                 ------------------>B
+                                                       (S/NP)
+                                 ---------------------------------->
+                                               (N\N)
+                           ----------------------------------------<
+                                              N
+                   ------------------------------------------------>
+                                          NP
+      ------------------------------------------------------------->
+                                 (S\NP)
+-------------------------------------------------------------------<
+                                 S
+
+

Some other sentences to try: -"that is the cake which we will persuade the chef to cook" -"that is the cake which we will persuade the chef to give the children"

-
-
->>> sent = "that is the dough which you will eat without cooking".split()
->>> nosub_parser = chart.CCGChartParser(lex, chart.ApplicationRuleSet +
-...                       chart.CompositionRuleSet + chart.TypeRaiseRuleSet)
-
-
+“that is the cake which we will persuade the chef to cook” +“that is the cake which we will persuade the chef to give the children”

+
>>> sent = "that is the dough which you will eat without cooking".split()
+>>> nosub_parser = chart.CCGChartParser(lex, chart.ApplicationRuleSet +
+...                       chart.CompositionRuleSet + chart.TypeRaiseRuleSet)
+
+

Without Substitution (no output)

-
-
->>> for parse in nosub_parser.parse(sent):
-...     chart.printCCGDerivation(parse)
-
-
+
>>> for parse in nosub_parser.parse(sent):
+...     chart.printCCGDerivation(parse)
+
+

With Substitution:

-
-
->>> for parse in parser.parse(sent):  # doctest: +SKIP
-...     chart.printCCGDerivation(parse)
-...     break
-...
- that      is        the    dough      which       you     will        eat          without           cooking
-  NP   ((S\NP)/NP)  (NP/N)    N    ((N\N)/(S/NP))  NP   ((S\NP)/VP)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
-                                                  ----->T
-                                               (S/(S\NP))
-                                                                             ------------------------------------->B
-                                                                                         ((VP\VP)/NP)
-                                                                    ----------------------------------------------<Sx
-                                                                                       (VP/NP)
-                                                       ----------------------------------------------------------->B
-                                                                               ((S\NP)/NP)
-                                                  ---------------------------------------------------------------->B
-                                                                               (S/NP)
-                                  -------------------------------------------------------------------------------->
-                                                                       (N\N)
-                           ---------------------------------------------------------------------------------------<
-                                                                      N
-                   ----------------------------------------------------------------------------------------------->
-                                                                 NP
-      ------------------------------------------------------------------------------------------------------------>
-                                                         (S\NP)
-------------------------------------------------------------------------------------------------------------------<
-                                                        S
-
-
+
>>> for parse in parser.parse(sent):
+...     chart.printCCGDerivation(parse)
+...     break
+...
+ that      is        the    dough      which       you     will        eat          without           cooking
+  NP   ((S\NP)/NP)  (NP/N)    N    ((N\N)/(S/NP))  NP   ((S\NP)/VP)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
+                                                  ----->T
+                                               (S/(S\NP))
+                                                                             ------------------------------------->B
+                                                                                         ((VP\VP)/NP)
+                                                                    ----------------------------------------------<Sx
+                                                                                       (VP/NP)
+                                                       ----------------------------------------------------------->B
+                                                                               ((S\NP)/NP)
+                                                  ---------------------------------------------------------------->B
+                                                                               (S/NP)
+                                  -------------------------------------------------------------------------------->
+                                                                       (N\N)
+                           ---------------------------------------------------------------------------------------<
+                                                                      N
+                   ----------------------------------------------------------------------------------------------->
+                                                                 NP
+      ------------------------------------------------------------------------------------------------------------>
+                                                         (S\NP)
+------------------------------------------------------------------------------------------------------------------<
+                                                        S
+
+
+
+
+

Conjunction

+
>>> from nltk.ccg.chart import CCGChartParser, ApplicationRuleSet, CompositionRuleSet
+>>> from nltk.ccg.chart import SubstitutionRuleSet, TypeRaiseRuleSet, printCCGDerivation
+>>> from nltk.ccg import lexicon
+
-
-

Conjunction

-
-
->>> from nltk.ccg.chart import CCGChartParser, ApplicationRuleSet, CompositionRuleSet
->>> from nltk.ccg.chart import SubstitutionRuleSet, TypeRaiseRuleSet, printCCGDerivation
->>> from nltk.ccg import lexicon
-
-

Lexicons for the tests:

-
-
->>> test1_lex = '''
-...        :- S,N,NP,VP
-...        I => NP
-...        you => NP
-...        will => S\\NP/VP
-...        cook => VP/NP
-...        which => (N\\N)/(S/NP)
-...        and => var\\.,var/.,var
-...        might => S\\NP/VP
-...        eat => VP/NP
-...        the => NP/N
-...        mushrooms => N
-...        parsnips => N'''
->>> test2_lex = '''
-...         :- N, S, NP, VP
-...         articles => N
-...         the => NP/N
-...         and => var\\.,var/.,var
-...         which => (N\\N)/(S/NP)
-...         I => NP
-...         anyone => NP
-...         will => (S/VP)\\NP
-...         file => VP/NP
-...         without => (VP\\VP)/VP[ing]
-...         forget => VP/NP
-...         reading => VP[ing]/NP
-...         '''
-
-
+
>>> test1_lex = '''
+...        :- S,N,NP,VP
+...        I => NP
+...        you => NP
+...        will => S\\NP/VP
+...        cook => VP/NP
+...        which => (N\\N)/(S/NP)
+...        and => var\\.,var/.,var
+...        might => S\\NP/VP
+...        eat => VP/NP
+...        the => NP/N
+...        mushrooms => N
+...        parsnips => N'''
+>>> test2_lex = '''
+...         :- N, S, NP, VP
+...         articles => N
+...         the => NP/N
+...         and => var\\.,var/.,var
+...         which => (N\\N)/(S/NP)
+...         I => NP
+...         anyone => NP
+...         will => (S/VP)\\NP
+...         file => VP/NP
+...         without => (VP\\VP)/VP[ing]
+...         forget => VP/NP
+...         reading => VP[ing]/NP
+...         '''
+
+

Tests handling of conjunctions. Note that while the two derivations are different, they are semantically equivalent.

-
-
->>> lex = lexicon.parseLexicon(test1_lex)
->>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
->>> for parse in parser.parse("I will cook and might eat the mushrooms and parsnips".split()):
-...     printCCGDerivation(parse) # doctest: +NORMALIZE_WHITESPACE +SKIP
- I      will       cook               and                might       eat     the    mushrooms             and             parsnips
- NP  ((S\NP)/VP)  (VP/NP)  ((_var2\.,_var2)/.,_var2)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var2\.,_var2)/.,_var2)     N
-    ---------------------->B
-         ((S\NP)/NP)
-                                                     ---------------------->B
-                                                          ((S\NP)/NP)
-                          ------------------------------------------------->
-                                     (((S\NP)/NP)\.,((S\NP)/NP))
-    -----------------------------------------------------------------------<
-                                  ((S\NP)/NP)
-                                                                                              ------------------------------------->
-                                                                                                             (N\.,N)
-                                                                                   ------------------------------------------------<
-                                                                                                          N
-                                                                           -------------------------------------------------------->
-                                                                                                      NP
-    ------------------------------------------------------------------------------------------------------------------------------->
-                                                                (S\NP)
------------------------------------------------------------------------------------------------------------------------------------<
-                                                                 S
- I      will       cook               and                might       eat     the    mushrooms             and             parsnips
- NP  ((S\NP)/VP)  (VP/NP)  ((_var2\.,_var2)/.,_var2)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var2\.,_var2)/.,_var2)     N
-    ---------------------->B
-         ((S\NP)/NP)
-                                                     ---------------------->B
-                                                          ((S\NP)/NP)
-                          ------------------------------------------------->
-                                     (((S\NP)/NP)\.,((S\NP)/NP))
-    -----------------------------------------------------------------------<
-                                  ((S\NP)/NP)
-    ------------------------------------------------------------------------------->B
-                                      ((S\NP)/N)
-                                                                                              ------------------------------------->
-                                                                                                             (N\.,N)
-                                                                                   ------------------------------------------------<
-                                                                                                          N
-    ------------------------------------------------------------------------------------------------------------------------------->
-                                                                (S\NP)
------------------------------------------------------------------------------------------------------------------------------------<
-                                                                 S
-
-
+
>>> lex = lexicon.fromstring(test1_lex)
+>>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
+>>> for parse in parser.parse("I will cook and might eat the mushrooms and parsnips".split()):
+...     printCCGDerivation(parse)
+ I      will       cook               and                might       eat     the    mushrooms             and             parsnips
+ NP  ((S\NP)/VP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var0\.,_var0)/.,_var0)     N
+    ---------------------->B
+         ((S\NP)/NP)
+                                                     ---------------------->B
+                                                          ((S\NP)/NP)
+                          ------------------------------------------------->
+                                     (((S\NP)/NP)\.,((S\NP)/NP))
+    -----------------------------------------------------------------------<
+                                  ((S\NP)/NP)
+                                                                                              ------------------------------------->
+                                                                                                             (N\.,N)
+                                                                                   ------------------------------------------------<
+                                                                                                          N
+                                                                           -------------------------------------------------------->
+                                                                                                      NP
+    ------------------------------------------------------------------------------------------------------------------------------->
+                                                                (S\NP)
+-----------------------------------------------------------------------------------------------------------------------------------<
+                                                                 S
+ I      will       cook               and                might       eat     the    mushrooms             and             parsnips
+ NP  ((S\NP)/VP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var0\.,_var0)/.,_var0)     N
+    ---------------------->B
+         ((S\NP)/NP)
+                                                     ---------------------->B
+                                                          ((S\NP)/NP)
+                          ------------------------------------------------->
+                                     (((S\NP)/NP)\.,((S\NP)/NP))
+    -----------------------------------------------------------------------<
+                                  ((S\NP)/NP)
+    ------------------------------------------------------------------------------->B
+                                      ((S\NP)/N)
+                                                                                              ------------------------------------->
+                                                                                                             (N\.,N)
+                                                                                   ------------------------------------------------<
+                                                                                                          N
+    ------------------------------------------------------------------------------------------------------------------------------->
+                                                                (S\NP)
+-----------------------------------------------------------------------------------------------------------------------------------<
+                                                                 S
+
+

Tests handling subject extraction. Interesting to point that the two parses are clearly semantically different.

-
-
->>> lex = lexicon.parseLexicon(test2_lex)
->>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
->>> for parse in parser.parse("articles which I will file and forget without reading".split()):
-...     printCCGDerivation(parse)  # doctest: +NORMALIZE_WHITESPACE +SKIP
- articles      which       I      will       file               and             forget         without           reading
-    N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var3\.,_var3)/.,_var3)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
-                          -----------------<
-                               (S/VP)
-                                                                                        ------------------------------------->B
-                                                                                                    ((VP\VP)/NP)
-                                                                               ----------------------------------------------<Sx
-                                                                                                  (VP/NP)
-                                                    ------------------------------------------------------------------------->
-                                                                               ((VP/NP)\.,(VP/NP))
-                                           ----------------------------------------------------------------------------------<
-                                                                                (VP/NP)
-                          --------------------------------------------------------------------------------------------------->B
-                                                                        (S/NP)
-          ------------------------------------------------------------------------------------------------------------------->
-                                                                 (N\N)
------------------------------------------------------------------------------------------------------------------------------<
-                                                              N
- articles      which       I      will       file               and             forget         without           reading
-    N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var3\.,_var3)/.,_var3)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
-                          -----------------<
-                               (S/VP)
-                                                    ------------------------------------>
-                                                            ((VP/NP)\.,(VP/NP))
-                                           ---------------------------------------------<
-                                                              (VP/NP)
-                                                                                        ------------------------------------->B
-                                                                                                    ((VP\VP)/NP)
-                                           ----------------------------------------------------------------------------------<Sx
-                                                                                (VP/NP)
-                          --------------------------------------------------------------------------------------------------->B
-                                                                        (S/NP)
-          ------------------------------------------------------------------------------------------------------------------->
-                                                                 (N\N)
------------------------------------------------------------------------------------------------------------------------------<
-                                                              N
-
-
+
>>> lex = lexicon.fromstring(test2_lex)
+>>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
+>>> for parse in parser.parse("articles which I will file and forget without reading".split()):
+...     printCCGDerivation(parse)
+ articles      which       I      will       file               and             forget         without           reading
+    N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
+                          -----------------<
+                               (S/VP)
+                                                                                        ------------------------------------->B
+                                                                                                    ((VP\VP)/NP)
+                                                                               ----------------------------------------------<Sx
+                                                                                                  (VP/NP)
+                                                    ------------------------------------------------------------------------->
+                                                                               ((VP/NP)\.,(VP/NP))
+                                           ----------------------------------------------------------------------------------<
+                                                                                (VP/NP)
+                          --------------------------------------------------------------------------------------------------->B
+                                                                        (S/NP)
+          ------------------------------------------------------------------------------------------------------------------->
+                                                                 (N\N)
+-----------------------------------------------------------------------------------------------------------------------------<
+                                                              N
+ articles      which       I      will       file               and             forget         without           reading
+    N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
+                          -----------------<
+                               (S/VP)
+                                                    ------------------------------------>
+                                                            ((VP/NP)\.,(VP/NP))
+                                           ---------------------------------------------<
+                                                              (VP/NP)
+                                                                                        ------------------------------------->B
+                                                                                                    ((VP\VP)/NP)
+                                           ----------------------------------------------------------------------------------<Sx
+                                                                                (VP/NP)
+                          --------------------------------------------------------------------------------------------------->B
+                                                                        (S/NP)
+          ------------------------------------------------------------------------------------------------------------------->
+                                                                 (N\N)
+-----------------------------------------------------------------------------------------------------------------------------<
+                                                              N
+
-
-

Unicode support

+
+
+

Unicode support

Unicode words are supported.

-
-
->>> from nltk.ccg import chart, lexicon
-
-
+
>>> from nltk.ccg import chart, lexicon
+
+

Lexicons for the tests:

-
-
->>> lex = lexicon.parseLexicon(u'''
-...        :- S, N, NP, PP
-...
-...        AdjI :: N\\N
-...        AdjD :: N/N
-...        AdvD :: S/S
-...        AdvI :: S\\S
-...        Det :: NP/N
-...        PrepNPCompl :: PP/NP
-...        PrepNAdjN :: S\\S/N
-...        PrepNAdjNP :: S\\S/NP
-...        VPNP :: S\\NP/NP
-...        VPPP :: S\\NP/PP
-...        VPser :: S\\NP/AdjI
-...
-...        auto => N
-...        bebidas => N
-...        cine => N
-...        ley => N
-...        libro => N
-...        ministro => N
-...        panadería => N
-...        presidente => N
-...        super => N
-...
-...        el => Det
-...        la => Det
-...        las => Det
-...        un => Det
-...
-...        Ana => NP
-...        Pablo => NP
-...
-...        y => var\\.,var/.,var
-...
-...        pero => (S/NP)\\(S/NP)/(S/NP)
-...
-...        anunció => VPNP
-...        compró => VPNP
-...        cree => S\\NP/S[dep]
-...        desmintió => VPNP
-...        lee => VPNP
-...        fueron => VPPP
-...
-...        es => VPser
-...
-...        interesante => AdjD
-...        interesante => AdjI
-...        nueva => AdjD
-...        nueva => AdjI
-...
-...        a => PrepNPCompl
-...        en => PrepNAdjN
-...        en => PrepNAdjNP
-...
-...        ayer => AdvI
-...
-...        que => (NP\\NP)/(S/NP)
-...        que => S[dep]/S
-...     ''')
-
-
->>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
->>> for parse in parser.parse(u"el ministro anunció pero el presidente desmintió la nueva ley".split()):
-...     printCCGDerivation(parse)
-...     break
-   el    ministro    anunció              pero              el    presidente   desmintió     la    nueva  ley
- (NP/N)     N      ((S\NP)/NP)  (((S/NP)\(S/NP))/(S/NP))  (NP/N)      N       ((S\NP)/NP)  (NP/N)  (N/N)   N
---------Leaf
- (NP/N)
-        ----------Leaf
-            N
------------------->
-        NP
------------------->T
-    (S/(S\NP))
-                  -------------Leaf
-                   ((S\NP)/NP)
-                               --------------------------Leaf
-                                (((S/NP)\(S/NP))/(S/NP))
-                                                         --------Leaf
-                                                          (NP/N)
-                                                                 ------------Leaf
-                                                                      N
-                                                         -------------------->
-                                                                  NP
-                                                         -------------------->T
-                                                              (S/(S\NP))
-                                                                             -------------Leaf
-                                                                              ((S\NP)/NP)
-                                                         --------------------------------->B
-                                                                      (S/NP)
-                               ----------------------------------------------------------->
-                                                     ((S/NP)\(S/NP))
-                                                                                          --------Leaf
-                                                                                           (NP/N)
-                                                                                                  -------Leaf
-                                                                                                   (N/N)
-                                                                                                         -----Leaf
-                                                                                                           N
-                                                                                                  ------------>
-                                                                                                       N
-                                                                                          -------------------->
-                                                                                                   NP
-                                                                                          --------------------<T
-                                                                                               (S\(S/NP))
-                               -------------------------------------------------------------------------------<B
-                                                                 (S\(S/NP))
-                  --------------------------------------------------------------------------------------------<B
-                                                             (S/NP)
--------------------------------------------------------------------------------------------------------------->
-                                                      S
-
-
+
>>> lex = lexicon.fromstring('''
+...        :- S, N, NP, PP
+...
+...        AdjI :: N\\N
+...        AdjD :: N/N
+...        AdvD :: S/S
+...        AdvI :: S\\S
+...        Det :: NP/N
+...        PrepNPCompl :: PP/NP
+...        PrepNAdjN :: S\\S/N
+...        PrepNAdjNP :: S\\S/NP
+...        VPNP :: S\\NP/NP
+...        VPPP :: S\\NP/PP
+...        VPser :: S\\NP/AdjI
+...
+...        auto => N
+...        bebidas => N
+...        cine => N
+...        ley => N
+...        libro => N
+...        ministro => N
+...        panadería => N
+...        presidente => N
+...        super => N
+...
+...        el => Det
+...        la => Det
+...        las => Det
+...        un => Det
+...
+...        Ana => NP
+...        Pablo => NP
+...
+...        y => var\\.,var/.,var
+...
+...        pero => (S/NP)\\(S/NP)/(S/NP)
+...
+...        anunció => VPNP
+...        compró => VPNP
+...        cree => S\\NP/S[dep]
+...        desmintió => VPNP
+...        lee => VPNP
+...        fueron => VPPP
+...
+...        es => VPser
+...
+...        interesante => AdjD
+...        interesante => AdjI
+...        nueva => AdjD
+...        nueva => AdjI
+...
+...        a => PrepNPCompl
+...        en => PrepNAdjN
+...        en => PrepNAdjNP
+...
+...        ayer => AdvI
+...
+...        que => (NP\\NP)/(S/NP)
+...        que => S[dep]/S
+...     ''')
+
+
>>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+>>> for parse in parser.parse(u"el ministro anunció pero el presidente desmintió la nueva ley".split()):
+...     printCCGDerivation(parse) 
+...     # it fails on python2.7 because of the unicode problem explained in https://github.com/nltk/nltk/pull/1354
+...     break
+   el    ministro    anunció              pero              el    presidente   desmintió     la    nueva  ley
+ (NP/N)     N      ((S\NP)/NP)  (((S/NP)\(S/NP))/(S/NP))  (NP/N)      N       ((S\NP)/NP)  (NP/N)  (N/N)   N
+------------------>
+        NP
+------------------>T
+    (S/(S\NP))
+                                                         -------------------->
+                                                                  NP
+                                                         -------------------->T
+                                                              (S/(S\NP))
+                                                         --------------------------------->B
+                                                                      (S/NP)
+                               ----------------------------------------------------------->
+                                                     ((S/NP)\(S/NP))
+                                                                                                  ------------>
+                                                                                                       N
+                                                                                          -------------------->
+                                                                                                   NP
+                                                                                          --------------------<T
+                                                                                               (S\(S/NP))
+                               -------------------------------------------------------------------------------<B
+                                                                 (S\(S/NP))
+                  --------------------------------------------------------------------------------------------<B
+                                                             (S/NP)
+-------------------------------------------------------------------------------------------------------------->
+                                                      S
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/ccg_semantics.html b/howto/ccg_semantics.html new file mode 100644 index 000000000..48ba358c3 --- /dev/null +++ b/howto/ccg_semantics.html @@ -0,0 +1,733 @@ + + + + + + + + + NLTK :: Sample usage for ccg_semantics + + + + + + + + + + + + + + + + +
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + + + + + +
+ +
+
+ +
+

Sample usage for ccg_semantics

+
+

Combinatory Categorial Grammar with semantics

+
+

Chart

+
>>> from nltk.ccg import chart, lexicon
+>>> from nltk.ccg.chart import printCCGDerivation
+
+
+
+

No semantics

+
>>> lex = lexicon.fromstring('''
+...     :- S, NP, N
+...     She => NP
+...     has => (S\\NP)/NP
+...     books => NP
+...     ''',
+...     False)
+
+
+
>>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+>>> parses = list(parser.parse("She has books".split()))
+>>> print(str(len(parses)) + " parses")
+3 parses
+
+
+
>>> printCCGDerivation(parses[0])
+ She      has      books
+ NP   ((S\NP)/NP)   NP
+     -------------------->
+            (S\NP)
+-------------------------<
+            S
+
+
+
>>> printCCGDerivation(parses[1])
+ She      has      books
+ NP   ((S\NP)/NP)   NP
+----->T
+(S/(S\NP))
+     -------------------->
+            (S\NP)
+------------------------->
+            S
+
+
+
>>> printCCGDerivation(parses[2])
+ She      has      books
+ NP   ((S\NP)/NP)   NP
+----->T
+(S/(S\NP))
+------------------>B
+      (S/NP)
+------------------------->
+            S
+
+
+
+
+

Simple semantics

+
>>> lex = lexicon.fromstring('''
+...     :- S, NP, N
+...     She => NP {she}
+...     has => (S\\NP)/NP {\\x y.have(y, x)}
+...     a => NP/N {\\P.exists z.P(z)}
+...     book => N {book}
+...     ''',
+...     True)
+
+
+
>>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+>>> parses = list(parser.parse("She has a book".split()))
+>>> print(str(len(parses)) + " parses")
+7 parses
+
+
+
>>> printCCGDerivation(parses[0])
+   She                 has                           a                book
+ NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+                                        ------------------------------------->
+                                                NP {exists z.book(z)}
+          ------------------------------------------------------------------->
+                         (S\NP) {\y.have(y,exists z.book(z))}
+-----------------------------------------------------------------------------<
+                       S {have(she,exists z.book(z))}
+
+
+
>>> printCCGDerivation(parses[1])
+   She                 has                           a                book
+ NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+          --------------------------------------------------------->B
+                   ((S\NP)/N) {\P y.have(y,exists z.P(z))}
+          ------------------------------------------------------------------->
+                         (S\NP) {\y.have(y,exists z.book(z))}
+-----------------------------------------------------------------------------<
+                       S {have(she,exists z.book(z))}
+
+
+
>>> printCCGDerivation(parses[2])
+   She                 has                           a                book
+ NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+---------->T
+(S/(S\NP)) {\F.F(she)}
+                                        ------------------------------------->
+                                                NP {exists z.book(z)}
+          ------------------------------------------------------------------->
+                         (S\NP) {\y.have(y,exists z.book(z))}
+----------------------------------------------------------------------------->
+                       S {have(she,exists z.book(z))}
+
+
+
>>> printCCGDerivation(parses[3])
+   She                 has                           a                book
+ NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+---------->T
+(S/(S\NP)) {\F.F(she)}
+          --------------------------------------------------------->B
+                   ((S\NP)/N) {\P y.have(y,exists z.P(z))}
+          ------------------------------------------------------------------->
+                         (S\NP) {\y.have(y,exists z.book(z))}
+----------------------------------------------------------------------------->
+                       S {have(she,exists z.book(z))}
+
+
+
>>> printCCGDerivation(parses[4])
+   She                 has                           a                book
+ NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+---------->T
+(S/(S\NP)) {\F.F(she)}
+---------------------------------------->B
+        (S/NP) {\x.have(she,x)}
+                                        ------------------------------------->
+                                                NP {exists z.book(z)}
+----------------------------------------------------------------------------->
+                       S {have(she,exists z.book(z))}
+
+
+
>>> printCCGDerivation(parses[5])
+   She                 has                           a                book
+ NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+---------->T
+(S/(S\NP)) {\F.F(she)}
+          --------------------------------------------------------->B
+                   ((S\NP)/N) {\P y.have(y,exists z.P(z))}
+------------------------------------------------------------------->B
+                (S/N) {\P.have(she,exists z.P(z))}
+----------------------------------------------------------------------------->
+                       S {have(she,exists z.book(z))}
+
+
+
>>> printCCGDerivation(parses[6])
+   She                 has                           a                book
+ NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+---------->T
+(S/(S\NP)) {\F.F(she)}
+---------------------------------------->B
+        (S/NP) {\x.have(she,x)}
+------------------------------------------------------------------->B
+                (S/N) {\P.have(she,exists z.P(z))}
+----------------------------------------------------------------------------->
+                       S {have(she,exists z.book(z))}
+
+
+
+
+

Complex semantics

+
>>> lex = lexicon.fromstring('''
+...     :- S, NP, N
+...     She => NP {she}
+...     has => (S\\NP)/NP {\\x y.have(y, x)}
+...     a => ((S\\NP)\\((S\\NP)/NP))/N {\\P R x.(exists z.P(z) & R(z,x))}
+...     book => N {book}
+...     ''',
+...     True)
+
+
+
>>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+>>> parses = list(parser.parse("She has a book".split()))
+>>> print(str(len(parses)) + " parses")
+2 parses
+
+
+
>>> printCCGDerivation(parses[0])
+   She                 has                                           a                                 book
+ NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (((S\NP)\((S\NP)/NP))/N) {\P R x.(exists z.P(z) & R(z,x))}  N {book}
+                                        ---------------------------------------------------------------------->
+                                               ((S\NP)\((S\NP)/NP)) {\R x.(exists z.book(z) & R(z,x))}
+          ----------------------------------------------------------------------------------------------------<
+                                       (S\NP) {\x.(exists z.book(z) & have(x,z))}
+--------------------------------------------------------------------------------------------------------------<
+                                     S {(exists z.book(z) & have(she,z))}
+
+
+
>>> printCCGDerivation(parses[1])
+   She                 has                                           a                                 book
+ NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (((S\NP)\((S\NP)/NP))/N) {\P R x.(exists z.P(z) & R(z,x))}  N {book}
+---------->T
+(S/(S\NP)) {\F.F(she)}
+                                        ---------------------------------------------------------------------->
+                                               ((S\NP)\((S\NP)/NP)) {\R x.(exists z.book(z) & R(z,x))}
+          ----------------------------------------------------------------------------------------------------<
+                                       (S\NP) {\x.(exists z.book(z) & have(x,z))}
+-------------------------------------------------------------------------------------------------------------->
+                                     S {(exists z.book(z) & have(she,z))}
+
+
+
+
+

Using conjunctions

+
+

# TODO: The semantics of “and” should have been more flexible +>>> lex = lexicon.fromstring(‘’’ +… :- S, NP, N +… I => NP {I} +… cook => (S\NP)/NP {\x y.cook(x,y)} +… and => var\.,var/.,var {\P Q x y.(P(x,y) & Q(x,y))} +… eat => (S\NP)/NP {\x y.eat(x,y)} +… the => NP/N {\x.the(x)} +… bacon => N {bacon} +… ‘’’, +… True)

+
>>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+>>> parses = list(parser.parse("I cook and eat the bacon".split()))
+>>> print(str(len(parses)) + " parses")
+7 parses
+
+
+
>>> printCCGDerivation(parses[0])
+   I                 cook                                       and                                        eat                     the            bacon
+ NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+                                      ------------------------------------------------------------------------------------->
+                                                    (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+        -------------------------------------------------------------------------------------------------------------------<
+                                             ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+                                                                                                                           ------------------------------->
+                                                                                                                                   NP {the(bacon)}
+        -------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                       (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
+----------------------------------------------------------------------------------------------------------------------------------------------------------<
+                                                       S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+
+
>>> printCCGDerivation(parses[1])
+   I                 cook                                       and                                        eat                     the            bacon
+ NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+                                      ------------------------------------------------------------------------------------->
+                                                    (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+        -------------------------------------------------------------------------------------------------------------------<
+                                             ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+        --------------------------------------------------------------------------------------------------------------------------------------->B
+                                                  ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))}
+        -------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                       (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
+----------------------------------------------------------------------------------------------------------------------------------------------------------<
+                                                       S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+
+
>>> printCCGDerivation(parses[2])
+   I                 cook                                       and                                        eat                     the            bacon
+ NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+-------->T
+(S/(S\NP)) {\F.F(I)}
+                                      ------------------------------------------------------------------------------------->
+                                                    (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+        -------------------------------------------------------------------------------------------------------------------<
+                                             ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+                                                                                                                           ------------------------------->
+                                                                                                                                   NP {the(bacon)}
+        -------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                       (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
+---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                       S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+
+
>>> printCCGDerivation(parses[3])
+   I                 cook                                       and                                        eat                     the            bacon
+ NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+-------->T
+(S/(S\NP)) {\F.F(I)}
+                                      ------------------------------------------------------------------------------------->
+                                                    (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+        -------------------------------------------------------------------------------------------------------------------<
+                                             ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+        --------------------------------------------------------------------------------------------------------------------------------------->B
+                                                  ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))}
+        -------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                       (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
+---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                       S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+
+
>>> printCCGDerivation(parses[4])
+   I                 cook                                       and                                        eat                     the            bacon
+ NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+-------->T
+(S/(S\NP)) {\F.F(I)}
+                                      ------------------------------------------------------------------------------------->
+                                                    (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+        -------------------------------------------------------------------------------------------------------------------<
+                                             ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+--------------------------------------------------------------------------------------------------------------------------->B
+                                            (S/NP) {\x.(eat(x,I) & cook(x,I))}
+                                                                                                                           ------------------------------->
+                                                                                                                                   NP {the(bacon)}
+---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                       S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+
+
>>> printCCGDerivation(parses[5])
+   I                 cook                                       and                                        eat                     the            bacon
+ NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+-------->T
+(S/(S\NP)) {\F.F(I)}
+                                      ------------------------------------------------------------------------------------->
+                                                    (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+        -------------------------------------------------------------------------------------------------------------------<
+                                             ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+        --------------------------------------------------------------------------------------------------------------------------------------->B
+                                                  ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))}
+----------------------------------------------------------------------------------------------------------------------------------------------->B
+                                                  (S/N) {\x.(eat(the(x),I) & cook(the(x),I))}
+---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                       S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+
+
>>> printCCGDerivation(parses[6])
+   I                 cook                                       and                                        eat                     the            bacon
+ NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+-------->T
+(S/(S\NP)) {\F.F(I)}
+                                      ------------------------------------------------------------------------------------->
+                                                    (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+        -------------------------------------------------------------------------------------------------------------------<
+                                             ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+--------------------------------------------------------------------------------------------------------------------------->B
+                                            (S/NP) {\x.(eat(x,I) & cook(x,I))}
+----------------------------------------------------------------------------------------------------------------------------------------------->B
+                                                  (S/N) {\x.(eat(the(x),I) & cook(the(x),I))}
+---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                       S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+
+
+
+
+

Tests from published papers

+

An example from “CCGbank: A Corpus of CCG Derivations and Dependency Structures Extracted from the Penn Treebank”, Hockenmaier and Steedman, 2007, Page 359, https://www.aclweb.org/anthology/J/J07/J07-3004.pdf

+
>>> lex = lexicon.fromstring('''
+...     :- S, NP
+...     I => NP {I}
+...     give => ((S\\NP)/NP)/NP {\\x y z.give(y,x,z)}
+...     them => NP {them}
+...     money => NP {money}
+...     ''',
+...     True)
+
+
+
>>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+>>> parses = list(parser.parse("I give them money".split()))
+>>> print(str(len(parses)) + " parses")
+3 parses
+
+
+
>>> printCCGDerivation(parses[0])
+   I                     give                     them       money
+ NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}  NP {money}
+        -------------------------------------------------->
+                ((S\NP)/NP) {\y z.give(y,them,z)}
+        -------------------------------------------------------------->
+                        (S\NP) {\z.give(money,them,z)}
+----------------------------------------------------------------------<
+                        S {give(money,them,I)}
+
+
+
>>> printCCGDerivation(parses[1])
+   I                     give                     them       money
+ NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}  NP {money}
+-------->T
+(S/(S\NP)) {\F.F(I)}
+        -------------------------------------------------->
+                ((S\NP)/NP) {\y z.give(y,them,z)}
+        -------------------------------------------------------------->
+                        (S\NP) {\z.give(money,them,z)}
+---------------------------------------------------------------------->
+                        S {give(money,them,I)}
+
+
+
>>> printCCGDerivation(parses[2])
+   I                     give                     them       money
+ NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}  NP {money}
+-------->T
+(S/(S\NP)) {\F.F(I)}
+        -------------------------------------------------->
+                ((S\NP)/NP) {\y z.give(y,them,z)}
+---------------------------------------------------------->B
+                (S/NP) {\y.give(y,them,I)}
+---------------------------------------------------------------------->
+                        S {give(money,them,I)}
+
+
+

An example from “CCGbank: A Corpus of CCG Derivations and Dependency Structures Extracted from the Penn Treebank”, Hockenmaier and Steedman, 2007, Page 359, https://www.aclweb.org/anthology/J/J07/J07-3004.pdf

+
>>> lex = lexicon.fromstring('''
+...     :- N, NP, S
+...     money => N {money}
+...     that => (N\\N)/(S/NP) {\\P Q x.(P(x) & Q(x))}
+...     I => NP {I}
+...     give => ((S\\NP)/NP)/NP {\\x y z.give(y,x,z)}
+...     them => NP {them}
+...     ''',
+...     True)
+
+
+
>>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+>>> parses = list(parser.parse("money that I give them".split()))
+>>> print(str(len(parses)) + " parses")
+3 parses
+
+
+
>>> printCCGDerivation(parses[0])
+   money                    that                     I                     give                     them
+ N {money}  ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))}  NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}
+                                                  -------->T
+                                            (S/(S\NP)) {\F.F(I)}
+                                                          -------------------------------------------------->
+                                                                  ((S\NP)/NP) {\y z.give(y,them,z)}
+                                                  ---------------------------------------------------------->B
+                                                                  (S/NP) {\y.give(y,them,I)}
+           ------------------------------------------------------------------------------------------------->
+                                         (N\N) {\Q x.(give(x,them,I) & Q(x))}
+------------------------------------------------------------------------------------------------------------<
+                                     N {\x.(give(x,them,I) & money(x))}
+
+
+
>>> printCCGDerivation(parses[1])
+   money                    that                     I                     give                     them
+ N {money}  ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))}  NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}
+----------->T
+(N/(N\N)) {\F.F(money)}
+                                                  -------->T
+                                            (S/(S\NP)) {\F.F(I)}
+                                                          -------------------------------------------------->
+                                                                  ((S\NP)/NP) {\y z.give(y,them,z)}
+                                                  ---------------------------------------------------------->B
+                                                                  (S/NP) {\y.give(y,them,I)}
+           ------------------------------------------------------------------------------------------------->
+                                         (N\N) {\Q x.(give(x,them,I) & Q(x))}
+------------------------------------------------------------------------------------------------------------>
+                                     N {\x.(give(x,them,I) & money(x))}
+
+
+
>>> printCCGDerivation(parses[2])
+   money                    that                     I                     give                     them
+ N {money}  ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))}  NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}
+----------->T
+(N/(N\N)) {\F.F(money)}
+-------------------------------------------------->B
+       (N/(S/NP)) {\P x.(P(x) & money(x))}
+                                                  -------->T
+                                            (S/(S\NP)) {\F.F(I)}
+                                                          -------------------------------------------------->
+                                                                  ((S\NP)/NP) {\y z.give(y,them,z)}
+                                                  ---------------------------------------------------------->B
+                                                                  (S/NP) {\y.give(y,them,I)}
+------------------------------------------------------------------------------------------------------------>
+                                     N {\x.(give(x,them,I) & money(x))}
+
+
+
+
+
+

Lexicon

+
>>> from nltk.ccg import lexicon
+
+
+

Parse lexicon with semantics

+
>>> print(str(lexicon.fromstring(
+...     '''
+...     :- S,NP
+...
+...     IntransVsg :: S\\NP[sg]
+...
+...     sleeps => IntransVsg {\\x.sleep(x)}
+...     eats => S\\NP[sg]/NP {\\x y.eat(x,y)}
+...
+...     and => var\\var/var {\\x y.x & y}
+...     ''',
+...     True
+... )))
+and => ((_var0\_var0)/_var0) {(\x y.x & y)}
+eats => ((S\NP['sg'])/NP) {\x y.eat(x,y)}
+sleeps => (S\NP['sg']) {\x.sleep(x)}
+
+
+

Parse lexicon without semantics

+
>>> print(str(lexicon.fromstring(
+...     '''
+...     :- S,NP
+...
+...     IntransVsg :: S\\NP[sg]
+...
+...     sleeps => IntransVsg
+...     eats => S\\NP[sg]/NP {sem=\\x y.eat(x,y)}
+...
+...     and => var\\var/var
+...     ''',
+...     False
+... )))
+and => ((_var0\_var0)/_var0)
+eats => ((S\NP['sg'])/NP)
+sleeps => (S\NP['sg'])
+
+
+

Semantics are missing

+
>>> print(str(lexicon.fromstring(
+...     '''
+...     :- S,NP
+...
+...     eats => S\\NP[sg]/NP
+...     ''',
+...     True
+... )))
+Traceback (most recent call last):
+  ...
+AssertionError: eats => S\NP[sg]/NP must contain semantics because include_semantics is set to True
+
+
+
+
+

CCG combinator semantics computation

+
>>> from nltk.sem.logic import *
+>>> from nltk.ccg.logic import *
+
+
+
>>> read_expr = Expression.fromstring
+
+
+

Compute semantics from function application

+
>>> print(str(compute_function_semantics(read_expr(r'\x.P(x)'), read_expr(r'book'))))
+P(book)
+
+
+
>>> print(str(compute_function_semantics(read_expr(r'\P.P(book)'), read_expr(r'read'))))
+read(book)
+
+
+
>>> print(str(compute_function_semantics(read_expr(r'\P.P(book)'), read_expr(r'\x.read(x)'))))
+read(book)
+
+
+

Compute semantics from composition

+
>>> print(str(compute_composition_semantics(read_expr(r'\x.P(x)'), read_expr(r'\x.Q(x)'))))
+\x.P(Q(x))
+
+
+
>>> print(str(compute_composition_semantics(read_expr(r'\x.P(x)'), read_expr(r'read'))))
+Traceback (most recent call last):
+  ...
+AssertionError: `read` must be a lambda expression
+
+
+

Compute semantics from substitution

+
>>> print(str(compute_substitution_semantics(read_expr(r'\x y.P(x,y)'), read_expr(r'\x.Q(x)'))))
+\x.P(x,Q(x))
+
+
+
>>> print(str(compute_substitution_semantics(read_expr(r'\x.P(x)'), read_expr(r'read'))))
+Traceback (most recent call last):
+  ...
+AssertionError: `\x.P(x)` must be a lambda expression with 2 arguments
+
+
+

Compute type-raise semantics

+
>>> print(str(compute_type_raised_semantics(read_expr(r'\x.P(x)'))))
+\F x.F(P(x))
+
+
+
>>> print(str(compute_type_raised_semantics(read_expr(r'\x.F(x)'))))
+\F1 x.F1(F(x))
+
+
+
>>> print(str(compute_type_raised_semantics(read_expr(r'\x y z.P(x,y,z)'))))
+\F x y z.F(P(x,y,z))
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ + + \ No newline at end of file diff --git a/howto/chat80.html b/howto/chat80.html index 9dcd5b202..519bfeb0e 100644 --- a/howto/chat80.html +++ b/howto/chat80.html @@ -1,581 +1,368 @@ - - - + - - -Chat-80 - + + + + + + + NLTK :: Sample usage for chat80 + + + + + + + + + + + + + + -
-

Chat-80

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - + + + +
+ +
+
+ +
+

Sample usage for chat80

+
+

Chat-80

Chat-80 was a natural language system which allowed the user to interrogate a Prolog knowledge base in the domain of world -geography. It was developed in the early '80s by Warren and Pereira; see -http://acl.ldc.upenn.edu/J/J82/J82-3002.pdf for a description and +geography. It was developed in the early ’80s by Warren and Pereira; see +https://aclanthology.org/J82-3002.pdf for a description and http://www.cis.upenn.edu/~pereira/oldies.html for the source files.

-

The chat80 module contains functions to extract data from the Chat-80 -relation files ('the world database'), and convert then into a format +

The chat80 module contains functions to extract data from the Chat-80 +relation files (‘the world database’), and convert then into a format that can be incorporated in the FOL models of -nltk.sem.evaluate. The code assumes that the Prolog +nltk.sem.evaluate. The code assumes that the Prolog input files are available in the NLTK corpora directory.

The Chat-80 World Database consists of the following files:

-
-world0.pl
-rivers.pl
-cities.pl
-countries.pl
-contain.pl
-borders.pl
-
-

This module uses a slightly modified version of world0.pl, in which +

world0.pl
+rivers.pl
+cities.pl
+countries.pl
+contain.pl
+borders.pl
+
+
+

This module uses a slightly modified version of world0.pl, in which a set of Prolog rules have been omitted. The modified file is named -world1.pl. Currently, the file rivers.pl is not read in, since +world1.pl. Currently, the file rivers.pl is not read in, since it uses a list rather than a string in the second field.

-
-

Reading Chat-80 Files

+
+

Reading Chat-80 Files

Chat-80 relations are like tables in a relational database. The relation acts as the name of the table; the first argument acts as the -'primary key'; and subsequent arguments are further fields in the +‘primary key’; and subsequent arguments are further fields in the table. In general, the name of the table provides a label for a unary predicate whose extension is all the primary keys. For example, -relations in cities.pl are of the following form:

-
-'city(athens,greece,1368).'
-
-

Here, 'athens' is the key, and will be mapped to a member of the +relations in cities.pl are of the following form:

+
'city(athens,greece,1368).'
+
+
+

Here, 'athens' is the key, and will be mapped to a member of the unary predicate city.

-

By analogy with NLTK corpora, chat80 defines a number of 'items' +

By analogy with NLTK corpora, chat80 defines a number of ‘items’ which correspond to the relations.

-
-
->>> from nltk.sem import chat80
->>> print(chat80.items) # doctest: +ELLIPSIS
-('borders', 'circle_of_lat', 'circle_of_long', 'city', ...)
-
-
+
>>> from nltk.sem import chat80
+>>> print(chat80.items)
+('borders', 'circle_of_lat', 'circle_of_long', 'city', ...)
+
+

The fields in the table are mapped to binary predicates. The first argument of the predicate is the primary key, while the second argument is the data in the relevant field. Thus, in the above example, the third field is mapped to the binary predicate population_of, whose extension is a set of pairs such as -'(athens, 1368)'.

+'(athens, 1368)'.

An exception to this general framework is required by the relations in -the files borders.pl and contains.pl. These contain facts of the +the files borders.pl and contains.pl. These contain facts of the following form:

-
-'borders(albania,greece).'
+
'borders(albania,greece).'
 
-'contains0(africa,central_africa).'
-
+'contains0(africa,central_africa).' +
+

We do not want to form a unary concept out the element in the first field of these records, and we want the label of the binary -relation just to be 'border'/'contain' respectively.

-

In order to drive the extraction process, we use 'relation metadata bundles' +relation just to be 'border'/'contain' respectively.

+

In order to drive the extraction process, we use ‘relation metadata bundles’ which are Python dictionaries such as the following:

-
-city = {'label': 'city',
-        'closures': [],
-        'schema': ['city', 'country', 'population'],
-        'filename': 'cities.pl'}
-
-

According to this, the file city['filename'] contains a list of +

city = {'label': 'city',
+        'closures': [],
+        'schema': ['city', 'country', 'population'],
+        'filename': 'cities.pl'}
+
+
+

According to this, the file city['filename'] contains a list of relational tuples (or more accurately, the corresponding strings in -Prolog form) whose predicate symbol is city['label'] and whose -relational schema is city['schema']. The notion of a closure is +Prolog form) whose predicate symbol is city['label'] and whose +relational schema is city['schema']. The notion of a closure is discussed in the next section.

-
-
-

Concepts

+ +
+

Concepts

In order to encapsulate the results of the extraction, a class of -Concepts is introduced. A Concept object has a number of -attributes, in particular a prefLabel, an arity and extension.

-
-
->>> c1 = chat80.Concept('dog', arity=1, extension=set(['d1', 'd2']))
->>> print(c1)
-Label = 'dog'
-Arity = 1
-Extension = ['d1', 'd2']
-
-
-

The extension attribute makes it easier to inspect the output of +Concepts is introduced. A Concept object has a number of +attributes, in particular a prefLabel, an arity and extension.

+
>>> c1 = chat80.Concept('dog', arity=1, extension=set(['d1', 'd2']))
+>>> print(c1)
+Label = 'dog'
+Arity = 1
+Extension = ['d1', 'd2']
+
+
+

The extension attribute makes it easier to inspect the output of the extraction.

-
-
->>> schema = ['city', 'country', 'population']
->>> concepts = chat80.clause2concepts('cities.pl', 'city', schema)
->>> concepts
-[Concept('city'), Concept('country_of'), Concept('population_of')]
->>> for c in concepts: # doctest: +NORMALIZE_WHITESPACE
-...     print("%s:\n\t%s" % (c.prefLabel, c.extension[:4]))
-city:
-    ['athens', 'bangkok', 'barcelona', 'berlin']
-country_of:
-    [('athens', 'greece'), ('bangkok', 'thailand'), ('barcelona', 'spain'), ('berlin', 'east_germany')]
-population_of:
-    [('athens', '1368'), ('bangkok', '1178'), ('barcelona', '1280'), ('berlin', '3481')]
-
-
-

In addition, the extension can be further -processed: in the case of the 'border' relation, we check that the -relation is symmetric, and in the case of the 'contain' +

>>> schema = ['city', 'country', 'population']
+>>> concepts = chat80.clause2concepts('cities.pl', 'city', schema)
+>>> concepts
+[Concept('city'), Concept('country_of'), Concept('population_of')]
+>>> for c in concepts:
+...     print("%s:\n\t%s" % (c.prefLabel, c.extension[:4]))
+city:
+    ['athens', 'bangkok', 'barcelona', 'berlin']
+country_of:
+    [('athens', 'greece'), ('bangkok', 'thailand'), ('barcelona', 'spain'), ('berlin', 'east_germany')]
+population_of:
+    [('athens', '1368'), ('bangkok', '1178'), ('barcelona', '1280'), ('berlin', '3481')]
+
+
+

In addition, the extension can be further +processed: in the case of the 'border' relation, we check that the +relation is symmetric, and in the case of the 'contain' relation, we carry out the transitive closure. The closure properties associated with a concept is indicated in the relation metadata, as indicated earlier.

-
-
->>> borders = set([('a1', 'a2'), ('a2', 'a3')])
->>> c2 = chat80.Concept('borders', arity=2, extension=borders)
->>> print(c2)
-Label = 'borders'
-Arity = 2
-Extension = [('a1', 'a2'), ('a2', 'a3')]
->>> c3 = chat80.Concept('borders', arity=2, closures=['symmetric'], extension=borders)
->>> c3.close()
->>> print(c3)
-Label = 'borders'
-Arity = 2
-Extension = [('a1', 'a2'), ('a2', 'a1'), ('a2', 'a3'), ('a3', 'a2')]
-
-
-

The extension of a Concept object is then incorporated into a -Valuation object.

+
>>> borders = set([('a1', 'a2'), ('a2', 'a3')])
+>>> c2 = chat80.Concept('borders', arity=2, extension=borders)
+>>> print(c2)
+Label = 'borders'
+Arity = 2
+Extension = [('a1', 'a2'), ('a2', 'a3')]
+>>> c3 = chat80.Concept('borders', arity=2, closures=['symmetric'], extension=borders)
+>>> c3.close()
+>>> print(c3)
+Label = 'borders'
+Arity = 2
+Extension = [('a1', 'a2'), ('a2', 'a1'), ('a2', 'a3'), ('a3', 'a2')]
+
-
-

Persistence

-

The functions val_dump and val_load are provided to allow a +

The extension of a Concept object is then incorporated into a +Valuation object.

+
+
+

Persistence

+

The functions val_dump and val_load are provided to allow a valuation to be stored in a persistent database and re-loaded, rather than having to be re-computed each time.

-
-
-

Individuals and Lexical Items

+ +
+

Individuals and Lexical Items

As well as deriving relations from the Chat-80 data, we also create a set of individual constants, one for each entity in the domain. The individual constants are string-identical to the entities. For -example, given a data item such as 'zloty', we add to the valuation -a pair ('zloty', 'zloty'). In order to parse English sentences that +example, given a data item such as 'zloty', we add to the valuation +a pair ('zloty', 'zloty'). In order to parse English sentences that refer to these entities, we also create a lexical item such as the following for each individual constant:

-
-PropN[num=sg, sem=<\P.(P zloty)>] -> 'Zloty'
-
-

The set of rules is written to the file chat_pnames.fcfg in the -current directory.

+
PropN[num=sg, sem=<\P.(P zloty)>] -> 'Zloty'
+
-
-

SQL Query

-

The city relation is also available in RDB form and can be queried +

The set of rules is written to the file chat_pnames.fcfg in the +current directory.

+
+
+

SQL Query

+

The city relation is also available in RDB form and can be queried using SQL statements.

-
-
->>> import nltk
->>> q = "SELECT City, Population FROM city_table WHERE Country = 'china' and Population > 1000"
->>> for answer in chat80.sql_query('corpora/city_database/city.db', q):
-...     print("%-10s %4s" % answer)
-canton     1496
-chungking  1100
-mukden     1551
-peking     2031
-shanghai   5407
-tientsin   1795
-
-
-

The (deliberately naive) grammar sql.fcfg translates from English +

>>> import nltk
+>>> q = "SELECT City, Population FROM city_table WHERE Country = 'china' and Population > 1000"
+>>> for answer in chat80.sql_query('corpora/city_database/city.db', q):
+...     print("%-10s %4s" % answer)
+canton     1496
+chungking  1100
+mukden     1551
+peking     2031
+shanghai   5407
+tientsin   1795
+
+
+

The (deliberately naive) grammar sql.fcfg translates from English to SQL:

-
-
->>> nltk.data.show_cfg('grammars/book_grammars/sql0.fcfg')
-% start S
-S[SEM=(?np + WHERE + ?vp)] -> NP[SEM=?np] VP[SEM=?vp]
-VP[SEM=(?v + ?pp)] -> IV[SEM=?v] PP[SEM=?pp]
-VP[SEM=(?v + ?ap)] -> IV[SEM=?v] AP[SEM=?ap]
-NP[SEM=(?det + ?n)] -> Det[SEM=?det] N[SEM=?n]
-PP[SEM=(?p + ?np)] -> P[SEM=?p] NP[SEM=?np]
-AP[SEM=?pp] -> A[SEM=?a] PP[SEM=?pp]
-NP[SEM='Country="greece"'] -> 'Greece'
-NP[SEM='Country="china"'] -> 'China'
-Det[SEM='SELECT'] -> 'Which' | 'What'
-N[SEM='City FROM city_table'] -> 'cities'
-IV[SEM=''] -> 'are'
-A[SEM=''] -> 'located'
-P[SEM=''] -> 'in'
-
-
+
>>> nltk.data.show_cfg('grammars/book_grammars/sql0.fcfg')
+% start S
+S[SEM=(?np + WHERE + ?vp)] -> NP[SEM=?np] VP[SEM=?vp]
+VP[SEM=(?v + ?pp)] -> IV[SEM=?v] PP[SEM=?pp]
+VP[SEM=(?v + ?ap)] -> IV[SEM=?v] AP[SEM=?ap]
+NP[SEM=(?det + ?n)] -> Det[SEM=?det] N[SEM=?n]
+PP[SEM=(?p + ?np)] -> P[SEM=?p] NP[SEM=?np]
+AP[SEM=?pp] -> A[SEM=?a] PP[SEM=?pp]
+NP[SEM='Country="greece"'] -> 'Greece'
+NP[SEM='Country="china"'] -> 'China'
+Det[SEM='SELECT'] -> 'Which' | 'What'
+N[SEM='City FROM city_table'] -> 'cities'
+IV[SEM=''] -> 'are'
+A[SEM=''] -> 'located'
+P[SEM=''] -> 'in'
+
+

Given this grammar, we can express, and then execute, queries in English.

-
-
->>> cp = nltk.parse.load_parser('grammars/book_grammars/sql0.fcfg')
->>> query = 'What cities are in China'
->>> for tree in cp.parse(query.split()):
-...     answer = tree.label()['SEM']
-...     q = " ".join(answer)
-...     print(q)
-...
-SELECT City FROM city_table WHERE   Country="china"
-
-
->>> rows = chat80.sql_query('corpora/city_database/city.db', q)
->>> for r in rows: print("%s" % r, end=' ')
-canton chungking dairen harbin kowloon mukden peking shanghai sian tientsin
-
-
-
-

Using Valuations

-

In order to convert such an extension into a valuation, we use the -make_valuation() method; setting read=True creates and returns -a new Valuation object which contains the results.

-
-
->>> val = chat80.make_valuation(concepts, read=True)
->>> 'calcutta' in val['city']
-True
->>> [town for (town, country) in val['country_of'] if country == 'india']
-['bombay', 'calcutta', 'delhi', 'hyderabad', 'madras']
->>> dom = val.domain
->>> g = nltk.sem.Assignment(dom)
->>> m = nltk.sem.Model(dom, val)
->>> m.evaluate(r'population_of(jakarta, 533)', g)
-True
-
-
+
>>> cp = nltk.parse.load_parser('grammars/book_grammars/sql0.fcfg')
+>>> query = 'What cities are in China'
+>>> for tree in cp.parse(query.split()):
+...     answer = tree.label()['SEM']
+...     q = " ".join(answer)
+...     print(q)
+...
+SELECT City FROM city_table WHERE   Country="china"
+
+
+
>>> rows = chat80.sql_query('corpora/city_database/city.db', q)
+>>> for r in rows: print("%s" % r, end=' ')
+canton chungking dairen harbin kowloon mukden peking shanghai sian tientsin
+
+
+

Using Valuations

+

In order to convert such an extension into a valuation, we use the +make_valuation() method; setting read=True creates and returns +a new Valuation object which contains the results.

+
>>> val = chat80.make_valuation(concepts, read=True)
+>>> 'calcutta' in val['city']
+True
+>>> [town for (town, country) in val['country_of'] if country == 'india']
+['bombay', 'calcutta', 'delhi', 'hyderabad', 'madras']
+>>> dom = val.domain
+>>> g = nltk.sem.Assignment(dom)
+>>> m = nltk.sem.Model(dom, val)
+>>> m.evaluate(r'population_of(jakarta, 533)', g)
+True
+
+
+
+ + + + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/childes.html b/howto/childes.html index 3e41e2ab6..a6a9c10a1 100644 --- a/howto/childes.html +++ b/howto/childes.html @@ -1,556 +1,338 @@ - - - + - - -CHILDES Corpus Readers - + + + + + + + NLTK :: Sample usage for childes + + + + + + + + + + + + + + -
-

CHILDES Corpus Readers

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + + + + +
+ +
+
+ +
+

Sample usage for childes

+
+

CHILDES Corpus Readers

Read the XML version of the CHILDES corpus.

-
-

How to use CHILDESCorpusReader

+
+

Setup

+
>>> from nltk.test.childes_fixt import setup_module
+>>> setup_module()
+
+
+
+
+

How to use CHILDESCorpusReader

Read the CHILDESCorpusReader class and read the CHILDES corpus saved in the nltk_data directory.

-
-
->>> import nltk
->>> from nltk.corpus.reader import CHILDESCorpusReader
->>> corpus_root = nltk.data.find('corpora/childes/data-xml/Eng-USA-MOR/')
-
-
+
>>> import nltk
+>>> from nltk.corpus.reader import CHILDESCorpusReader
+>>> corpus_root = nltk.data.find('corpora/childes/data-xml/Eng-USA-MOR/')
+
+

Reading files in the Valian corpus (Valian, 1991).

-
-
->>> valian = CHILDESCorpusReader(corpus_root, 'Valian/.*.xml')
->>> valian.fileids()
-['Valian/01a.xml', 'Valian/01b.xml', 'Valian/02a.xml', 'Valian/02b.xml',...
-
-
+
>>> valian = CHILDESCorpusReader(corpus_root, 'Valian/.*.xml')
+>>> valian.fileids()
+['Valian/01a.xml', 'Valian/01b.xml', 'Valian/02a.xml', 'Valian/02b.xml',...
+
+

Count the number of files

-
-
->>> len(valian.fileids())
-43
-
-
+
>>> len(valian.fileids())
+43
+
+

Printing properties of the corpus files.

-
-
->>> corpus_data = valian.corpus(valian.fileids())
->>> print(corpus_data[0]['Lang'])
-eng
->>> for key in sorted(corpus_data[0].keys()):
-...    print(key, ": ", corpus_data[0][key])
-Corpus :  valian
-Date :  1986-03-04
-Id :  01a
-Lang :  eng
-Version :  2.0.1
-{http://www.w3.org/2001/XMLSchema-instance}schemaLocation :  http://www.talkbank.org/ns/talkbank http://talkbank.org/software/talkbank.xsd
-
-
+
>>> corpus_data = valian.corpus(valian.fileids())
+>>> print(corpus_data[0]['Lang'])
+eng
+>>> for key in sorted(corpus_data[0].keys()):
+...    print(key, ": ", corpus_data[0][key])
+Corpus :  valian
+Date :  1986-03-04
+Id :  01a
+Lang :  eng
+Version :  2.0.1
+{http://www.w3.org/2001/XMLSchema-instance}schemaLocation :  http://www.talkbank.org/ns/talkbank http://talkbank.org/software/talkbank.xsd
+
+

Printing information of participants of the corpus. The most common codes for -the participants are 'CHI' (target child), 'MOT' (mother), and 'INV' (investigator).

-
-
->>> corpus_participants = valian.participants(valian.fileids())
->>> for this_corpus_participants in corpus_participants[:2]:
-...     for key in sorted(this_corpus_participants.keys()):
-...         dct = this_corpus_participants[key]
-...         print(key, ": ", [(k, dct[k]) for k in sorted(dct.keys())])
-CHI :  [('age', 'P2Y1M3D'), ('group', 'normal'), ('id', 'CHI'), ('language', 'eng'), ('role', 'Target_Child'), ('sex', 'female')]
-INV :  [('id', 'INV'), ('language', 'eng'), ('role', 'Investigator')]
-MOT :  [('id', 'MOT'), ('language', 'eng'), ('role', 'Mother')]
-CHI :  [('age', 'P2Y1M12D'), ('group', 'normal'), ('id', 'CHI'), ('language', 'eng'), ('role', 'Target_Child'), ('sex', 'female')]
-INV :  [('id', 'INV'), ('language', 'eng'), ('role', 'Investigator')]
-MOT :  [('id', 'MOT'), ('language', 'eng'), ('role', 'Mother')]
-
-
+the participants are ‘CHI’ (target child), ‘MOT’ (mother), and ‘INV’ (investigator).

+
>>> corpus_participants = valian.participants(valian.fileids())
+>>> for this_corpus_participants in corpus_participants[:2]:
+...     for key in sorted(this_corpus_participants.keys()):
+...         dct = this_corpus_participants[key]
+...         print(key, ": ", [(k, dct[k]) for k in sorted(dct.keys())])
+CHI :  [('age', 'P2Y1M3D'), ('group', 'normal'), ('id', 'CHI'), ('language', 'eng'), ('role', 'Target_Child'), ('sex', 'female')]
+INV :  [('id', 'INV'), ('language', 'eng'), ('role', 'Investigator')]
+MOT :  [('id', 'MOT'), ('language', 'eng'), ('role', 'Mother')]
+CHI :  [('age', 'P2Y1M12D'), ('group', 'normal'), ('id', 'CHI'), ('language', 'eng'), ('role', 'Target_Child'), ('sex', 'female')]
+INV :  [('id', 'INV'), ('language', 'eng'), ('role', 'Investigator')]
+MOT :  [('id', 'MOT'), ('language', 'eng'), ('role', 'Mother')]
+
+

printing words.

-
-
->>> valian.words('Valian/01a.xml')
-['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', ...
-
-
+
>>> valian.words('Valian/01a.xml')
+['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', ...
+
+

printing sentences.

-
-
->>> valian.sents('Valian/01a.xml')
-[['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname',
-  'and', 'it', 'is', 'March', 'fourth', 'I', 'believe', 'and', 'when',
-  'was', "Parent's", 'birthday'], ["Child's"], ['oh', "I'm", 'sorry'],
-  ["that's", 'okay'], ...
-
-
+
>>> valian.sents('Valian/01a.xml')
+[['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname',
+  'and', 'it', 'is', 'March', 'fourth', 'I', 'believe', 'and', 'when',
+  'was', "Parent's", 'birthday'], ["Child's"], ['oh', "I'm", 'sorry'],
+  ["that's", 'okay'], ...
+
+

You can specify the participants with the argument speaker.

-
-
->>> valian.words('Valian/01a.xml',speaker=['INV'])
-['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', ...
->>> valian.words('Valian/01a.xml',speaker=['MOT'])
-["Child's", "that's", 'okay', 'February', 'first', 'nineteen', ...
->>> valian.words('Valian/01a.xml',speaker=['CHI'])
-['tape', 'it', 'up', 'and', 'two', 'tape', 'players', 'have',...
-
-
+
>>> valian.words('Valian/01a.xml',speaker=['INV'])
+['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', ...
+>>> valian.words('Valian/01a.xml',speaker=['MOT'])
+["Child's", "that's", 'okay', 'February', 'first', 'nineteen', ...
+>>> valian.words('Valian/01a.xml',speaker=['CHI'])
+['tape', 'it', 'up', 'and', 'two', 'tape', 'players', 'have',...
+
+

tagged_words() and tagged_sents() return the usual (word,pos) tuple lists. POS tags in the CHILDES are automatically assigned by MOR and POST programs (MacWhinney, 2000).

-
-
->>> valian.tagged_words('Valian/01a.xml')[:30]
-[('at', 'prep'), ('Parent', 'n:prop'), ("Lastname's", 'n:prop'), ('house', 'n'),
-('with', 'prep'), ('Child', 'n:prop'), ('Lastname', 'n:prop'), ('and', 'coord'),
-('it', 'pro'), ('is', 'v:cop'), ('March', 'n:prop'), ('fourth', 'adj'),
-('I', 'pro:sub'), ('believe', 'v'), ('and', 'coord'), ('when', 'adv:wh'),
-('was', 'v:cop'), ("Parent's", 'n:prop'), ('birthday', 'n'), ("Child's", 'n:prop'),
-('oh', 'co'), ("I'm", 'pro:sub'), ('sorry', 'adj'), ("that's", 'pro:dem'),
-('okay', 'adj'), ('February', 'n:prop'), ('first', 'adj'),
-('nineteen', 'det:num'), ('eighty', 'det:num'), ('four', 'det:num')]
-
-
->>> valian.tagged_sents('Valian/01a.xml')[:10]
-[[('at', 'prep'), ('Parent', 'n:prop'), ("Lastname's", 'n:prop'), ('house', 'n'),
-('with', 'prep'), ('Child', 'n:prop'), ('Lastname', 'n:prop'), ('and', 'coord'),
-('it', 'pro'), ('is', 'v:cop'), ('March', 'n:prop'), ('fourth', 'adj'),
-('I', 'pro:sub'), ('believe', 'v'), ('and', 'coord'), ('when', 'adv:wh'),
-('was', 'v:cop'), ("Parent's", 'n:prop'), ('birthday', 'n')],
-[("Child's", 'n:prop')], [('oh', 'co'), ("I'm", 'pro:sub'), ('sorry', 'adj')],
-[("that's", 'pro:dem'), ('okay', 'adj')],
-[('February', 'n:prop'), ('first', 'adj'), ('nineteen', 'det:num'),
-('eighty', 'det:num'), ('four', 'det:num')],
-[('great', 'adj')],
-[('and', 'coord'), ("she's", 'pro:sub'), ('two', 'det:num'), ('years', 'n'), ('old', 'adj')],
-[('correct', 'adj')],
-[('okay', 'co')], [('she', 'pro:sub'), ('just', 'adv:int'), ('turned', 'part'), ('two', 'det:num'),
-('a', 'det'), ('month', 'n'), ('ago', 'adv')]]
-
-
-

When the argument stem is true, the word stems (e.g., 'is' -> 'be-3PS') are -used instread of the original words.

-
-
->>> valian.words('Valian/01a.xml')[:30]
-['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', 'and', 'it', 'is', ...
->>> valian.words('Valian/01a.xml',stem=True)[:30]
-['at', 'Parent', 'Lastname', 's', 'house', 'with', 'Child', 'Lastname', 'and', 'it', 'be-3S', ...
-
-
-

When the argument replace is true, the replaced words are used instread of +

>>> valian.tagged_words('Valian/01a.xml')[:30]
+[('at', 'prep'), ('Parent', 'n:prop'), ("Lastname's", 'n:prop'), ('house', 'n'),
+('with', 'prep'), ('Child', 'n:prop'), ('Lastname', 'n:prop'), ('and', 'coord'),
+('it', 'pro'), ('is', 'v:cop'), ('March', 'n:prop'), ('fourth', 'adj'),
+('I', 'pro:sub'), ('believe', 'v'), ('and', 'coord'), ('when', 'adv:wh'),
+('was', 'v:cop'), ("Parent's", 'n:prop'), ('birthday', 'n'), ("Child's", 'n:prop'),
+('oh', 'co'), ("I'm", 'pro:sub'), ('sorry', 'adj'), ("that's", 'pro:dem'),
+('okay', 'adj'), ('February', 'n:prop'), ('first', 'adj'),
+('nineteen', 'det:num'), ('eighty', 'det:num'), ('four', 'det:num')]
+
+
+
>>> valian.tagged_sents('Valian/01a.xml')[:10]
+[[('at', 'prep'), ('Parent', 'n:prop'), ("Lastname's", 'n:prop'), ('house', 'n'),
+('with', 'prep'), ('Child', 'n:prop'), ('Lastname', 'n:prop'), ('and', 'coord'),
+('it', 'pro'), ('is', 'v:cop'), ('March', 'n:prop'), ('fourth', 'adj'),
+('I', 'pro:sub'), ('believe', 'v'), ('and', 'coord'), ('when', 'adv:wh'),
+('was', 'v:cop'), ("Parent's", 'n:prop'), ('birthday', 'n')],
+[("Child's", 'n:prop')], [('oh', 'co'), ("I'm", 'pro:sub'), ('sorry', 'adj')],
+[("that's", 'pro:dem'), ('okay', 'adj')],
+[('February', 'n:prop'), ('first', 'adj'), ('nineteen', 'det:num'),
+('eighty', 'det:num'), ('four', 'det:num')],
+[('great', 'adj')],
+[('and', 'coord'), ("she's", 'pro:sub'), ('two', 'det:num'), ('years', 'n'), ('old', 'adj')],
+[('correct', 'adj')],
+[('okay', 'co')], [('she', 'pro:sub'), ('just', 'adv:int'), ('turned', 'part'), ('two', 'det:num'),
+('a', 'det'), ('month', 'n'), ('ago', 'adv')]]
+
+
+

When the argument stem is true, the word stems (e.g., ‘is’ -> ‘be-3PS’) are +used instead of the original words.

+
>>> valian.words('Valian/01a.xml')[:30]
+['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', 'and', 'it', 'is', ...
+>>> valian.words('Valian/01a.xml',stem=True)[:30]
+['at', 'Parent', 'Lastname', 's', 'house', 'with', 'Child', 'Lastname', 'and', 'it', 'be-3S', ...
+
+
+

When the argument replace is true, the replaced words are used instead of the original words.

-
-
->>> valian.words('Valian/01a.xml',speaker='CHI')[247]
-'tikteat'
->>> valian.words('Valian/01a.xml',speaker='CHI',replace=True)[247]
-'trick'
-
-
+
>>> valian.words('Valian/01a.xml',speaker='CHI')[247]
+'tikteat'
+>>> valian.words('Valian/01a.xml',speaker='CHI',replace=True)[247]
+'trick'
+
+

When the argument relation is true, the relational relationships in the sentence are returned. See Sagae et al. (2010) for details of the relational structure adopted in the CHILDES.

-
-
->>> valian.words('Valian/01a.xml',relation=True)[:10]
-[[('at', 'prep', '1|0|ROOT'), ('Parent', 'n', '2|5|VOC'), ('Lastname', 'n', '3|5|MOD'), ('s', 'poss', '4|5|MOD'), ('house', 'n', '5|1|POBJ'), ('with', 'prep', '6|1|JCT'), ('Child', 'n', '7|8|NAME'), ('Lastname', 'n', '8|6|POBJ'), ('and', 'coord', '9|8|COORD'), ('it', 'pro', '10|11|SUBJ'), ('be-3S', 'v', '11|9|COMP'), ('March', 'n', '12|11|PRED'), ('fourth', 'adj', '13|12|MOD'), ('I', 'pro', '15|16|SUBJ'), ('believe', 'v', '16|14|ROOT'), ('and', 'coord', '18|17|ROOT'), ('when', 'adv', '19|20|PRED'), ('be-PAST', 'v', '20|18|COMP'), ('Parent', 'n', '21|23|MOD'), ('s', 'poss', '22|23|MOD'), ('birth', 'n', '23|20|SUBJ')], [('Child', 'n', '1|2|MOD'), ('s', 'poss', '2|0|ROOT')], [('oh', 'co', '1|4|COM'), ('I', 'pro', '3|4|SUBJ'), ('be', 'v', '4|0|ROOT'), ('sorry', 'adj', '5|4|PRED')], [('that', 'pro', '1|2|SUBJ'), ('be', 'v', '2|0|ROOT'), ('okay', 'adj', '3|2|PRED')], [('February', 'n', '1|6|VOC'), ('first', 'adj', '2|6|ENUM'), ('nineteen', 'det', '4|6|ENUM'), ('eighty', 'det', '5|6|ENUM'), ('four', 'det', '6|0|ROOT')], [('great', 'adj', '1|0|ROOT')], [('and', 'coord', '1|0|ROOT'), ('she', 'pro', '2|1|ROOT'), ('be', 'aux', '3|5|AUX'), ('two', 'det', '4|5|QUANT'), ('year-PL', 'n', '5|2|ROOT'), ('old', 'adj', '6|5|MOD')], [('correct', 'adj', '1|0|ROOT')], [('okay', 'co', '1|0|ROOT')], [('she', 'pro', '1|0|ROOT'), ('just', 'adv', '2|3|JCT'), ('turn-PERF', 'part', '3|1|XCOMP'), ('two', 'det', '4|6|QUANT'), ('a', 'det', '5|6|DET'), ('month', 'n', '6|3|OBJ'), ('ago', 'adv', '7|3|JCT')]]
-
-
+
>>> valian.words('Valian/01a.xml',relation=True)[:10]
+[[('at', 'prep', '1|0|ROOT'), ('Parent', 'n', '2|5|VOC'), ('Lastname', 'n', '3|5|MOD'), ('s', 'poss', '4|5|MOD'), ('house', 'n', '5|1|POBJ'), ('with', 'prep', '6|1|JCT'), ('Child', 'n', '7|8|NAME'), ('Lastname', 'n', '8|6|POBJ'), ('and', 'coord', '9|8|COORD'), ('it', 'pro', '10|11|SUBJ'), ('be-3S', 'v', '11|9|COMP'), ('March', 'n', '12|11|PRED'), ('fourth', 'adj', '13|12|MOD'), ('I', 'pro', '15|16|SUBJ'), ('believe', 'v', '16|14|ROOT'), ('and', 'coord', '18|17|ROOT'), ('when', 'adv', '19|20|PRED'), ('be-PAST', 'v', '20|18|COMP'), ('Parent', 'n', '21|23|MOD'), ('s', 'poss', '22|23|MOD'), ('birth', 'n', '23|20|SUBJ')], [('Child', 'n', '1|2|MOD'), ('s', 'poss', '2|0|ROOT')], [('oh', 'co', '1|4|COM'), ('I', 'pro', '3|4|SUBJ'), ('be', 'v', '4|0|ROOT'), ('sorry', 'adj', '5|4|PRED')], [('that', 'pro', '1|2|SUBJ'), ('be', 'v', '2|0|ROOT'), ('okay', 'adj', '3|2|PRED')], [('February', 'n', '1|6|VOC'), ('first', 'adj', '2|6|ENUM'), ('nineteen', 'det', '4|6|ENUM'), ('eighty', 'det', '5|6|ENUM'), ('four', 'det', '6|0|ROOT')], [('great', 'adj', '1|0|ROOT')], [('and', 'coord', '1|0|ROOT'), ('she', 'pro', '2|1|ROOT'), ('be', 'aux', '3|5|AUX'), ('two', 'det', '4|5|QUANT'), ('year-PL', 'n', '5|2|ROOT'), ('old', 'adj', '6|5|MOD')], [('correct', 'adj', '1|0|ROOT')], [('okay', 'co', '1|0|ROOT')], [('she', 'pro', '1|0|ROOT'), ('just', 'adv', '2|3|JCT'), ('turn-PERF', 'part', '3|1|XCOMP'), ('two', 'det', '4|6|QUANT'), ('a', 'det', '5|6|DET'), ('month', 'n', '6|3|OBJ'), ('ago', 'adv', '7|3|JCT')]]
+
+

Printing age. When the argument month is true, the age information in the CHILDES format is converted into the number of months.

-
-
->>> valian.age()
-['P2Y1M3D', 'P2Y1M12D', 'P1Y9M21D', 'P1Y9M28D', 'P2Y1M23D', ...
->>> valian.age('Valian/01a.xml')
-['P2Y1M3D']
->>> valian.age('Valian/01a.xml',month=True)
-[25]
-
-
+
>>> valian.age()
+['P2Y1M3D', 'P2Y1M12D', 'P1Y9M21D', 'P1Y9M28D', 'P2Y1M23D', ...
+>>> valian.age('Valian/01a.xml')
+['P2Y1M3D']
+>>> valian.age('Valian/01a.xml',month=True)
+[25]
+
+

Printing MLU. The criteria for the MLU computation is broadly based on Brown (1973).

-
-
->>> valian.MLU()
-[2.3574660633484..., 2.292682926829..., 3.492857142857..., 2.961783439490...,
- 2.0842696629213..., 3.169811320754..., 3.137404580152..., 3.0578034682080...,
- 4.090163934426..., 3.488372093023..., 2.8773584905660..., 3.4792899408284...,
- 4.0111940298507..., 3.456790123456..., 4.487603305785..., 4.007936507936...,
- 5.25, 5.154696132596..., ...]
-
-
->>> valian.MLU('Valian/01a.xml')
-[2.35746606334...]
-
-
+
>>> valian.MLU()
+[2.3574660633484..., 2.292682926829..., 3.492857142857..., 2.961783439490...,
+ 2.0842696629213..., 3.169811320754..., 3.137404580152..., 3.0578034682080...,
+ 4.090163934426..., 3.488372093023..., 2.8773584905660..., 3.4792899408284...,
+ 4.0111940298507..., 3.456790123456..., 4.487603305785..., 4.007936507936...,
+ 5.25, 5.154696132596..., ...]
+
-
-

Basic stuff

+
>>> valian.MLU('Valian/01a.xml')
+[2.35746606334...]
+
+
+
+
+

Basic stuff

Count the number of words and sentences of each file.

-
-
->>> valian = CHILDESCorpusReader(corpus_root, 'Valian/.*.xml')
->>> for this_file in valian.fileids()[:6]:
-...     print(valian.corpus(this_file)[0]['Corpus'], valian.corpus(this_file)[0]['Id'])
-...     print("num of words: %i" % len(valian.words(this_file)))
-...     print("num of sents: %i" % len(valian.sents(this_file)))
-valian 01a
-num of words: 3606
-num of sents: 1027
-valian 01b
-num of words: 4376
-num of sents: 1274
-valian 02a
-num of words: 2673
-num of sents: 801
-valian 02b
-num of words: 5020
-num of sents: 1583
-valian 03a
-num of words: 2743
-num of sents: 988
-valian 03b
-num of words: 4409
-num of sents: 1397
-
-
+
>>> valian = CHILDESCorpusReader(corpus_root, 'Valian/.*.xml')
+>>> for this_file in valian.fileids()[:6]:
+...     print(valian.corpus(this_file)[0]['Corpus'], valian.corpus(this_file)[0]['Id'])
+...     print("num of words: %i" % len(valian.words(this_file)))
+...     print("num of sents: %i" % len(valian.sents(this_file)))
+valian 01a
+num of words: 3606
+num of sents: 1027
+valian 01b
+num of words: 4376
+num of sents: 1274
+valian 02a
+num of words: 2673
+num of sents: 801
+valian 02b
+num of words: 5020
+num of sents: 1583
+valian 03a
+num of words: 2743
+num of sents: 988
+valian 03b
+num of words: 4409
+num of sents: 1397
+
+
+
+
+ + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/chunk.html b/howto/chunk.html index 8f82d5965..9da428e82 100644 --- a/howto/chunk.html +++ b/howto/chunk.html @@ -1,773 +1,544 @@ - - - + - - -Chunking - + + + + + + + NLTK :: Sample usage for chunk + + + + + + + + + + + + + + -
-

Chunking

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> from nltk.chunk import *
->>> from nltk.chunk.util import *
->>> from nltk.chunk.regexp import *
->>> from nltk import Tree
-
-
->>> tagged_text = "[ The/DT cat/NN ] sat/VBD on/IN [ the/DT mat/NN ] [ the/DT dog/NN ] chewed/VBD ./."
->>> gold_chunked_text = tagstr2tree(tagged_text)
->>> unchunked_text = gold_chunked_text.flatten()
-
-
+ + + +
+ +
+
+ +
+

Sample usage for chunk

+
+

Chunking

+
>>> from nltk.chunk import *
+>>> from nltk.chunk.util import *
+>>> from nltk.chunk.regexp import *
+>>> from nltk import Tree
+
+
+
>>> tagged_text = "[ The/DT cat/NN ] sat/VBD on/IN [ the/DT mat/NN ] [ the/DT dog/NN ] chewed/VBD ./."
+>>> gold_chunked_text = tagstr2tree(tagged_text)
+>>> unchunked_text = gold_chunked_text.flatten()
+
+

Chunking uses a special regexp syntax for rules that delimit the chunks. These -rules must be converted to 'regular' regular expressions before a sentence can +rules must be converted to ‘regular’ regular expressions before a sentence can be chunked.

-
-
->>> tag_pattern = "<DT>?<JJ>*<NN.*>"
->>> regexp_pattern = tag_pattern2re_pattern(tag_pattern)
->>> regexp_pattern
-'(<(DT)>)?(<(JJ)>)*(<(NN[^\\{\\}<>]*)>)'
-
-
+
>>> tag_pattern = "<DT>?<JJ>*<NN.*>"
+>>> regexp_pattern = tag_pattern2re_pattern(tag_pattern)
+>>> regexp_pattern
+'(<(DT)>)?(<(JJ)>)*(<(NN[^\\{\\}<>]*)>)'
+
+

Construct some new chunking rules.

-
-
->>> chunk_rule = ChunkRule("<.*>+", "Chunk everything")
->>> chink_rule = ChinkRule("<VBD|IN|\.>", "Chink on verbs/prepositions")
->>> split_rule = SplitRule("<DT><NN>", "<DT><NN>",
-...                        "Split successive determiner/noun pairs")
-
-
+
>>> chunk_rule = ChunkRule(r"<.*>+", "Chunk everything")
+>>> strip_rule = StripRule(r"<VBD|IN|\.>", "Strip on verbs/prepositions")
+>>> split_rule = SplitRule("<DT><NN>", "<DT><NN>",
+...                        "Split successive determiner/noun pairs")
+
+

Create and score a series of chunk parsers, successively more complex.

-
-
->>> chunk_parser = RegexpChunkParser([chunk_rule], chunk_label='NP')
->>> chunked_text = chunk_parser.parse(unchunked_text)
->>> print(chunked_text)
-(S
-  (NP
-    The/DT
-    cat/NN
-    sat/VBD
-    on/IN
-    the/DT
-    mat/NN
-    the/DT
-    dog/NN
-    chewed/VBD
-    ./.))
-
-
->>> chunkscore = ChunkScore()
->>> chunkscore.score(gold_chunked_text, chunked_text)
->>> print(chunkscore.precision())
-0.0
-
-
->>> print(chunkscore.recall())
-0.0
-
-
->>> print(chunkscore.f_measure())
-0
-
-
->>> for chunk in sorted(chunkscore.missed()): print(chunk)
-(NP The/DT cat/NN)
-(NP the/DT dog/NN)
-(NP the/DT mat/NN)
-
-
->>> for chunk in chunkscore.incorrect(): print(chunk)
-(NP
-  The/DT
-  cat/NN
-  sat/VBD
-  on/IN
-  the/DT
-  mat/NN
-  the/DT
-  dog/NN
-  chewed/VBD
-  ./.)
-
-
->>> chunk_parser = RegexpChunkParser([chunk_rule, chink_rule],
-...                                  chunk_label='NP')
->>> chunked_text = chunk_parser.parse(unchunked_text)
->>> print(chunked_text)
-(S
-  (NP The/DT cat/NN)
-  sat/VBD
-  on/IN
-  (NP the/DT mat/NN the/DT dog/NN)
-  chewed/VBD
-  ./.)
->>> assert chunked_text == chunk_parser.parse(list(unchunked_text))
-
-
->>> chunkscore = ChunkScore()
->>> chunkscore.score(gold_chunked_text, chunked_text)
->>> chunkscore.precision()
-0.5
-
-
->>> print(chunkscore.recall())
-0.33333333...
-
-
->>> print(chunkscore.f_measure())
-0.4
-
-
->>> for chunk in sorted(chunkscore.missed()): print(chunk)
-(NP the/DT dog/NN)
-(NP the/DT mat/NN)
-
-
->>> for chunk in chunkscore.incorrect(): print(chunk)
-(NP the/DT mat/NN the/DT dog/NN)
-
-
->>> chunk_parser = RegexpChunkParser([chunk_rule, chink_rule, split_rule],
-...                                  chunk_label='NP')
->>> chunked_text = chunk_parser.parse(unchunked_text, trace=True)
-# Input:
- <DT>  <NN>  <VBD>  <IN>  <DT>  <NN>  <DT>  <NN>  <VBD>  <.>
-# Chunk everything:
-{<DT>  <NN>  <VBD>  <IN>  <DT>  <NN>  <DT>  <NN>  <VBD>  <.>}
-# Chink on verbs/prepositions:
-{<DT>  <NN>} <VBD>  <IN> {<DT>  <NN>  <DT>  <NN>} <VBD>  <.>
-# Split successive determiner/noun pairs:
-{<DT>  <NN>} <VBD>  <IN> {<DT>  <NN>}{<DT>  <NN>} <VBD>  <.>
->>> print(chunked_text)
-(S
-  (NP The/DT cat/NN)
-  sat/VBD
-  on/IN
-  (NP the/DT mat/NN)
-  (NP the/DT dog/NN)
-  chewed/VBD
-  ./.)
-
-
->>> chunkscore = ChunkScore()
->>> chunkscore.score(gold_chunked_text, chunked_text)
->>> chunkscore.precision()
-1.0
-
-
->>> chunkscore.recall()
-1.0
-
-
->>> chunkscore.f_measure()
-1.0
-
-
->>> chunkscore.missed()
-[]
-
-
->>> chunkscore.incorrect()
-[]
-
-
->>> chunk_parser.rules() # doctest: +NORMALIZE_WHITESPACE
-[<ChunkRule: '<.*>+'>, <ChinkRule: '<VBD|IN|\\.>'>,
- <SplitRule: '<DT><NN>', '<DT><NN>'>]
-
-
+
>>> chunk_parser = RegexpChunkParser([chunk_rule], chunk_label='NP')
+>>> chunked_text = chunk_parser.parse(unchunked_text)
+>>> print(chunked_text)
+(S
+  (NP
+    The/DT
+    cat/NN
+    sat/VBD
+    on/IN
+    the/DT
+    mat/NN
+    the/DT
+    dog/NN
+    chewed/VBD
+    ./.))
+
+
+
>>> chunkscore = ChunkScore()
+>>> chunkscore.score(gold_chunked_text, chunked_text)
+>>> print(chunkscore.precision())
+0.0
+
+
+
>>> print(chunkscore.recall())
+0.0
+
+
+
>>> print(chunkscore.f_measure())
+0
+
+
+
>>> for chunk in sorted(chunkscore.missed()): print(chunk)
+(NP The/DT cat/NN)
+(NP the/DT dog/NN)
+(NP the/DT mat/NN)
+
+
+
>>> for chunk in chunkscore.incorrect(): print(chunk)
+(NP
+  The/DT
+  cat/NN
+  sat/VBD
+  on/IN
+  the/DT
+  mat/NN
+  the/DT
+  dog/NN
+  chewed/VBD
+  ./.)
+
+
+
>>> chunk_parser = RegexpChunkParser([chunk_rule, strip_rule],
+...                                  chunk_label='NP')
+>>> chunked_text = chunk_parser.parse(unchunked_text)
+>>> print(chunked_text)
+(S
+  (NP The/DT cat/NN)
+  sat/VBD
+  on/IN
+  (NP the/DT mat/NN the/DT dog/NN)
+  chewed/VBD
+  ./.)
+>>> assert chunked_text == chunk_parser.parse(list(unchunked_text))
+
+
+
>>> chunkscore = ChunkScore()
+>>> chunkscore.score(gold_chunked_text, chunked_text)
+>>> chunkscore.precision()
+0.5
+
+
+
>>> print(chunkscore.recall())
+0.33333333...
+
+
+
>>> print(chunkscore.f_measure())
+0.4
+
+
+
>>> for chunk in sorted(chunkscore.missed()): print(chunk)
+(NP the/DT dog/NN)
+(NP the/DT mat/NN)
+
+
+
>>> for chunk in chunkscore.incorrect(): print(chunk)
+(NP the/DT mat/NN the/DT dog/NN)
+
+
+
>>> chunk_parser = RegexpChunkParser([chunk_rule, strip_rule, split_rule],
+...                                  chunk_label='NP')
+>>> chunked_text = chunk_parser.parse(unchunked_text, trace=True)
+# Input:
+ <DT>  <NN>  <VBD>  <IN>  <DT>  <NN>  <DT>  <NN>  <VBD>  <.>
+# Chunk everything:
+{<DT>  <NN>  <VBD>  <IN>  <DT>  <NN>  <DT>  <NN>  <VBD>  <.>}
+# Strip on verbs/prepositions:
+{<DT>  <NN>} <VBD>  <IN> {<DT>  <NN>  <DT>  <NN>} <VBD>  <.>
+# Split successive determiner/noun pairs:
+{<DT>  <NN>} <VBD>  <IN> {<DT>  <NN>}{<DT>  <NN>} <VBD>  <.>
+>>> print(chunked_text)
+(S
+  (NP The/DT cat/NN)
+  sat/VBD
+  on/IN
+  (NP the/DT mat/NN)
+  (NP the/DT dog/NN)
+  chewed/VBD
+  ./.)
+
+
+
>>> chunkscore = ChunkScore()
+>>> chunkscore.score(gold_chunked_text, chunked_text)
+>>> chunkscore.precision()
+1.0
+
+
+
>>> chunkscore.recall()
+1.0
+
+
+
>>> chunkscore.f_measure()
+1.0
+
+
+
>>> chunkscore.missed()
+[]
+
+
+
>>> chunkscore.incorrect()
+[]
+
+
+
>>> chunk_parser.rules()
+[<ChunkRule: '<.*>+'>, <StripRule: '<VBD|IN|\\.>'>,
+ <SplitRule: '<DT><NN>', '<DT><NN>'>]
+
+

Printing parsers:

-
-
->>> print(repr(chunk_parser))
-<RegexpChunkParser with 3 rules>
->>> print(chunk_parser)
-RegexpChunkParser with 3 rules:
-    Chunk everything
-      <ChunkRule: '<.*>+'>
-    Chink on verbs/prepositions
-      <ChinkRule: '<VBD|IN|\\.>'>
-    Split successive determiner/noun pairs
-      <SplitRule: '<DT><NN>', '<DT><NN>'>
-
-
-
-

Regression Tests

-
-

ChunkParserI

-

ChunkParserI is an abstract interface -- it is not meant to be +

>>> print(repr(chunk_parser))
+<RegexpChunkParser with 3 rules>
+>>> print(chunk_parser)
+RegexpChunkParser with 3 rules:
+    Chunk everything
+      <ChunkRule: '<.*>+'>
+    Strip on verbs/prepositions
+      <StripRule: '<VBD|IN|\\.>'>
+    Split successive determiner/noun pairs
+      <SplitRule: '<DT><NN>', '<DT><NN>'>
+
+
+
+

Regression Tests

+
+

ChunkParserI

+

ChunkParserI is an abstract interface – it is not meant to be instantiated directly.

-
-
->>> ChunkParserI().parse([])
+
>>> ChunkParserI().parse([])
 Traceback (most recent call last):
   . . .
 NotImplementedError
-
-
+
-
-

ChunkString

+
+
+

ChunkString

ChunkString can be built from a tree of tagged tuples, a tree of trees, or a mixed list of both:

-
-
->>> t1 = Tree('S', [('w%d' % i, 't%d' % i) for i in range(10)])
->>> t2 = Tree('S', [Tree('t0', []), Tree('t1', ['c1'])])
->>> t3 = Tree('S', [('w0', 't0'), Tree('t1', ['c1'])])
->>> ChunkString(t1)
-<ChunkString: '<t0><t1><t2><t3><t4><t5><t6><t7><t8><t9>'>
->>> ChunkString(t2)
-<ChunkString: '<t0><t1>'>
->>> ChunkString(t3)
-<ChunkString: '<t0><t1>'>
-
-
+
>>> t1 = Tree('S', [('w%d' % i, 't%d' % i) for i in range(10)])
+>>> t2 = Tree('S', [Tree('t0', []), Tree('t1', ['c1'])])
+>>> t3 = Tree('S', [('w0', 't0'), Tree('t1', ['c1'])])
+>>> ChunkString(t1)
+<ChunkString: '<t0><t1><t2><t3><t4><t5><t6><t7><t8><t9>'>
+>>> ChunkString(t2)
+<ChunkString: '<t0><t1>'>
+>>> ChunkString(t3)
+<ChunkString: '<t0><t1>'>
+
+

Other values generate an error:

-
-
->>> ChunkString(Tree('S', ['x']))
-Traceback (most recent call last):
-  . . .
-ValueError: chunk structures must contain tagged tokens or trees
-
-
+
>>> ChunkString(Tree('S', ['x']))
+Traceback (most recent call last):
+  . . .
+ValueError: chunk structures must contain tagged tokens or trees
+
+

The str() for a chunk string adds spaces to it, which makes it line up with str() output for other chunk strings over the same underlying input.

-
-
->>> cs = ChunkString(t1)
->>> print(cs)
- <t0>  <t1>  <t2>  <t3>  <t4>  <t5>  <t6>  <t7>  <t8>  <t9>
->>> cs.xform('<t3>', '{<t3>}')
->>> print(cs)
- <t0>  <t1>  <t2> {<t3>} <t4>  <t5>  <t6>  <t7>  <t8>  <t9>
-
-
-

The _verify() method makes sure that our transforms don't corrupt +

>>> cs = ChunkString(t1)
+>>> print(cs)
+ <t0>  <t1>  <t2>  <t3>  <t4>  <t5>  <t6>  <t7>  <t8>  <t9>
+>>> cs.xform('<t3>', '{<t3>}')
+>>> print(cs)
+ <t0>  <t1>  <t2> {<t3>} <t4>  <t5>  <t6>  <t7>  <t8>  <t9>
+
+
+

The _verify() method makes sure that our transforms don’t corrupt the chunk string. By setting debug_level=2, _verify() will be called at the end of every call to xform.

-
-
->>> cs = ChunkString(t1, debug_level=3)
-
-
->>> # tag not marked with <...>:
->>> cs.xform('<t3>', 't3')
-Traceback (most recent call last):
-  . . .
-ValueError: Transformation generated invalid chunkstring:
-  <t0><t1><t2>t3<t4><t5><t6><t7><t8><t9>
-
-
->>> # brackets not balanced:
->>> cs.xform('<t3>', '{<t3>')
-Traceback (most recent call last):
-  . . .
-ValueError: Transformation generated invalid chunkstring:
-  <t0><t1><t2>{<t3><t4><t5><t6><t7><t8><t9>
-
-
->>> # nested brackets:
->>> cs.xform('<t3><t4><t5>', '{<t3>{<t4>}<t5>}')
-Traceback (most recent call last):
-  . . .
-ValueError: Transformation generated invalid chunkstring:
-  <t0><t1><t2>{<t3>{<t4>}<t5>}<t6><t7><t8><t9>
-
-
->>> # modified tags:
->>> cs.xform('<t3>', '<t9>')
-Traceback (most recent call last):
-  . . .
-ValueError: Transformation generated invalid chunkstring: tag changed
-
-
->>> # added tags:
->>> cs.xform('<t9>', '<t9><t10>')
-Traceback (most recent call last):
-  . . .
-ValueError: Transformation generated invalid chunkstring: tag changed
-
-
+
>>> cs = ChunkString(t1, debug_level=3)
+
+
+
>>> # tag not marked with <...>:
+>>> cs.xform('<t3>', 't3')
+Traceback (most recent call last):
+  . . .
+ValueError: Transformation generated invalid chunkstring:
+  <t0><t1><t2>t3<t4><t5><t6><t7><t8><t9>
+
+
+
>>> # brackets not balanced:
+>>> cs.xform('<t3>', '{<t3>')
+Traceback (most recent call last):
+  . . .
+ValueError: Transformation generated invalid chunkstring:
+  <t0><t1><t2>{<t3><t4><t5><t6><t7><t8><t9>
+
+
+
>>> # nested brackets:
+>>> cs.xform('<t3><t4><t5>', '{<t3>{<t4>}<t5>}')
+Traceback (most recent call last):
+  . . .
+ValueError: Transformation generated invalid chunkstring:
+  <t0><t1><t2>{<t3>{<t4>}<t5>}<t6><t7><t8><t9>
+
+
+
>>> # modified tags:
+>>> cs.xform('<t3>', '<t9>')
+Traceback (most recent call last):
+  . . .
+ValueError: Transformation generated invalid chunkstring: tag changed
+
-
-

Chunking Rules

+
>>> # added tags:
+>>> cs.xform('<t9>', '<t9><t10>')
+Traceback (most recent call last):
+  . . .
+ValueError: Transformation generated invalid chunkstring: tag changed
+
+
+
+
+

Chunking Rules

Test the different rule constructors & __repr__ methods:

-
-
->>> r1 = RegexpChunkRule('<a|b>'+ChunkString.IN_CHINK_PATTERN,
-...                      '{<a|b>}', 'chunk <a> and <b>')
->>> r2 = RegexpChunkRule(re.compile('<a|b>'+ChunkString.IN_CHINK_PATTERN),
-...                      '{<a|b>}', 'chunk <a> and <b>')
->>> r3 = ChunkRule('<a|b>', 'chunk <a> and <b>')
->>> r4 = ChinkRule('<a|b>', 'chink <a> and <b>')
->>> r5 = UnChunkRule('<a|b>', 'unchunk <a> and <b>')
->>> r6 = MergeRule('<a>', '<b>', 'merge <a> w/ <b>')
->>> r7 = SplitRule('<a>', '<b>', 'split <a> from <b>')
->>> r8 = ExpandLeftRule('<a>', '<b>', 'expand left <a> <b>')
->>> r9 = ExpandRightRule('<a>', '<b>', 'expand right <a> <b>')
->>> for rule in r1, r2, r3, r4, r5, r6, r7, r8, r9:
-...     print(rule)
-<RegexpChunkRule: '<a|b>(?=[^\\}]*(\\{|$))'->'{<a|b>}'>
-<RegexpChunkRule: '<a|b>(?=[^\\}]*(\\{|$))'->'{<a|b>}'>
-<ChunkRule: '<a|b>'>
-<ChinkRule: '<a|b>'>
-<UnChunkRule: '<a|b>'>
-<MergeRule: '<a>', '<b>'>
-<SplitRule: '<a>', '<b>'>
-<ExpandLeftRule: '<a>', '<b>'>
-<ExpandRightRule: '<a>', '<b>'>
-
-
+
>>> r1 = RegexpChunkRule('<a|b>'+ChunkString.IN_STRIP_PATTERN,
+...                      '{<a|b>}', 'chunk <a> and <b>')
+>>> r2 = RegexpChunkRule(re.compile('<a|b>'+ChunkString.IN_STRIP_PATTERN),
+...                      '{<a|b>}', 'chunk <a> and <b>')
+>>> r3 = ChunkRule('<a|b>', 'chunk <a> and <b>')
+>>> r4 = StripRule('<a|b>', 'strip <a> and <b>')
+>>> r5 = UnChunkRule('<a|b>', 'unchunk <a> and <b>')
+>>> r6 = MergeRule('<a>', '<b>', 'merge <a> w/ <b>')
+>>> r7 = SplitRule('<a>', '<b>', 'split <a> from <b>')
+>>> r8 = ExpandLeftRule('<a>', '<b>', 'expand left <a> <b>')
+>>> r9 = ExpandRightRule('<a>', '<b>', 'expand right <a> <b>')
+>>> for rule in r1, r2, r3, r4, r5, r6, r7, r8, r9:
+...     print(rule)
+<RegexpChunkRule: '<a|b>(?=[^\\}]*(\\{|$))'->'{<a|b>}'>
+<RegexpChunkRule: '<a|b>(?=[^\\}]*(\\{|$))'->'{<a|b>}'>
+<ChunkRule: '<a|b>'>
+<StripRule: '<a|b>'>
+<UnChunkRule: '<a|b>'>
+<MergeRule: '<a>', '<b>'>
+<SplitRule: '<a>', '<b>'>
+<ExpandLeftRule: '<a>', '<b>'>
+<ExpandRightRule: '<a>', '<b>'>
+
+

tag_pattern2re_pattern() complains if the tag pattern looks problematic:

-
-
->>> tag_pattern2re_pattern('{}')
-Traceback (most recent call last):
-  . . .
-ValueError: Bad tag pattern: '{}'
-
-
+
>>> tag_pattern2re_pattern('{}')
+Traceback (most recent call last):
+  . . .
+ValueError: Bad tag pattern: '{}'
+
-
-

RegexpChunkParser

+
+
+

RegexpChunkParser

A warning is printed when parsing an empty sentence:

-
-
->>> parser = RegexpChunkParser([ChunkRule('<a>', '')])
->>> parser.parse(Tree('S', []))
-Warning: parsing empty text
-Tree('S', [])
-
-
-
-
-

RegexpParser

-
-
->>> parser = RegexpParser('''
-... NP: {<DT>? <JJ>* <NN>*} # NP
-... P: {<IN>}           # Preposition
-... V: {<V.*>}          # Verb
-... PP: {<P> <NP>}      # PP -> P NP
-... VP: {<V> <NP|PP>*}  # VP -> V (NP|PP)*
-... ''')
->>> print(repr(parser))
-<chunk.RegexpParser with 5 stages>
->>> print(parser)
-chunk.RegexpParser with 5 stages:
-RegexpChunkParser with 1 rules:
-    NP   <ChunkRule: '<DT>? <JJ>* <NN>*'>
-RegexpChunkParser with 1 rules:
-    Preposition   <ChunkRule: '<IN>'>
-RegexpChunkParser with 1 rules:
-    Verb   <ChunkRule: '<V.*>'>
-RegexpChunkParser with 1 rules:
-    PP -> P NP   <ChunkRule: '<P> <NP>'>
-RegexpChunkParser with 1 rules:
-    VP -> V (NP|PP)*   <ChunkRule: '<V> <NP|PP>*'>
->>> print(parser.parse(unchunked_text, trace=True))
-# Input:
- <DT>  <NN>  <VBD>  <IN>  <DT>  <NN>  <DT>  <NN>  <VBD>  <.>
-# NP:
-{<DT>  <NN>} <VBD>  <IN> {<DT>  <NN>}{<DT>  <NN>} <VBD>  <.>
-# Input:
- <NP>  <VBD>  <IN>  <NP>  <NP>  <VBD>  <.>
-# Preposition:
- <NP>  <VBD> {<IN>} <NP>  <NP>  <VBD>  <.>
-# Input:
- <NP>  <VBD>  <P>  <NP>  <NP>  <VBD>  <.>
-# Verb:
- <NP> {<VBD>} <P>  <NP>  <NP> {<VBD>} <.>
-# Input:
- <NP>  <V>  <P>  <NP>  <NP>  <V>  <.>
-# PP -> P NP:
- <NP>  <V> {<P>  <NP>} <NP>  <V>  <.>
-# Input:
- <NP>  <V>  <PP>  <NP>  <V>  <.>
-# VP -> V (NP|PP)*:
- <NP> {<V>  <PP>  <NP>}{<V>} <.>
-(S
-  (NP The/DT cat/NN)
-  (VP
-    (V sat/VBD)
-    (PP (P on/IN) (NP the/DT mat/NN))
-    (NP the/DT dog/NN))
-  (VP (V chewed/VBD))
-  ./.)
-
-
+
>>> parser = RegexpChunkParser([ChunkRule('<a>', '')])
+>>> parser.parse(Tree('S', []))
+Warning: parsing empty text
+Tree('S', [])
+
+
+ +
+

RegexpParser

+
>>> parser = RegexpParser('''
+... NP: {<DT>? <JJ>* <NN>*} # NP
+... P: {<IN>}           # Preposition
+... V: {<V.*>}          # Verb
+... PP: {<P> <NP>}      # PP -> P NP
+... VP: {<V> <NP|PP>*}  # VP -> V (NP|PP)*
+... ''')
+>>> print(repr(parser))
+<chunk.RegexpParser with 5 stages>
+>>> print(parser)
+chunk.RegexpParser with 5 stages:
+RegexpChunkParser with 1 rules:
+    NP   <ChunkRule: '<DT>? <JJ>* <NN>*'>
+RegexpChunkParser with 1 rules:
+    Preposition   <ChunkRule: '<IN>'>
+RegexpChunkParser with 1 rules:
+    Verb   <ChunkRule: '<V.*>'>
+RegexpChunkParser with 1 rules:
+    PP -> P NP   <ChunkRule: '<P> <NP>'>
+RegexpChunkParser with 1 rules:
+    VP -> V (NP|PP)*   <ChunkRule: '<V> <NP|PP>*'>
+>>> print(parser.parse(unchunked_text, trace=True))
+# Input:
+ <DT>  <NN>  <VBD>  <IN>  <DT>  <NN>  <DT>  <NN>  <VBD>  <.>
+# NP:
+{<DT>  <NN>} <VBD>  <IN> {<DT>  <NN>}{<DT>  <NN>} <VBD>  <.>
+# Input:
+ <NP>  <VBD>  <IN>  <NP>  <NP>  <VBD>  <.>
+# Preposition:
+ <NP>  <VBD> {<IN>} <NP>  <NP>  <VBD>  <.>
+# Input:
+ <NP>  <VBD>  <P>  <NP>  <NP>  <VBD>  <.>
+# Verb:
+ <NP> {<VBD>} <P>  <NP>  <NP> {<VBD>} <.>
+# Input:
+ <NP>  <V>  <P>  <NP>  <NP>  <V>  <.>
+# PP -> P NP:
+ <NP>  <V> {<P>  <NP>} <NP>  <V>  <.>
+# Input:
+ <NP>  <V>  <PP>  <NP>  <V>  <.>
+# VP -> V (NP|PP)*:
+ <NP> {<V>  <PP>  <NP>}{<V>} <.>
+(S
+  (NP The/DT cat/NN)
+  (VP
+    (V sat/VBD)
+    (PP (P on/IN) (NP the/DT mat/NN))
+    (NP the/DT dog/NN))
+  (VP (V chewed/VBD))
+  ./.)
+
+

Test parsing of other rule types:

-
-
->>> print(RegexpParser('''
-... X:
-...   }<a><b>{     # chink rule
-...   <a>}{<b>     # split rule
-...   <a>{}<b>     # merge rule
-...   <a>{<b>}<c>  # chunk rule w/ context
-... '''))
-chunk.RegexpParser with 1 stages:
-RegexpChunkParser with 4 rules:
-    chink rule              <ChinkRule: '<a><b>'>
-    split rule              <SplitRule: '<a>', '<b>'>
-    merge rule              <MergeRule: '<a>', '<b>'>
-    chunk rule w/ context   <ChunkRuleWithContext: '<a>', '<b>', '<c>'>
-
-
-

Illegal patterns give an error message:

-
-
->>> print(RegexpParser('X: {<foo>} {<bar>}'))
-Traceback (most recent call last):
-  . . .
-ValueError: Illegal chunk pattern: {<foo>} {<bar>}
-
-
+
>>> print(RegexpParser('''
+... X:
+...   }<a><b>{     # strip rule
+...   <a>}{<b>     # split rule
+...   <a>{}<b>     # merge rule
+...   <a>{<b>}<c>  # chunk rule w/ context
+... '''))
+chunk.RegexpParser with 1 stages:
+RegexpChunkParser with 4 rules:
+    strip rule              <StripRule: '<a><b>'>
+    split rule              <SplitRule: '<a>', '<b>'>
+    merge rule              <MergeRule: '<a>', '<b>'>
+    chunk rule w/ context   <ChunkRuleWithContext: '<a>', '<b>', '<c>'>
+
+

Illegal patterns give an error message:

+
>>> print(RegexpParser('X: {<foo>} {<bar>}'))
+Traceback (most recent call last):
+  . . .
+ValueError: Illegal chunk pattern: {<foo>} {<bar>}
+
+
+ + + + + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/classify.html b/howto/classify.html index ea6082db8..db4a0257f 100644 --- a/howto/classify.html +++ b/howto/classify.html @@ -1,543 +1,349 @@ - - - + - - -Classifiers - - - -
-

Classifiers

+ +
- - +
+
+ +
+

Sample usage for classify

+
+

Classifiers

+
>>> from nltk.test.classify_fixt import setup_module
+>>> setup_module()
+
+

Classifiers label tokens with category labels (or class labels). -Typically, labels are represented with strings (such as "health" -or "sports". In NLTK, classifiers are defined using classes that -implement the ClassifyI interface:

-
-
->>> import nltk
->>> nltk.usage(nltk.classify.ClassifierI)
-ClassifierI supports the following operations:
-  - self.classify(featureset)
-  - self.classify_many(featuresets)
-  - self.labels()
-  - self.prob_classify(featureset)
-  - self.prob_classify_many(featuresets)
-
-
+Typically, labels are represented with strings (such as "health" +or "sports". In NLTK, classifiers are defined using classes that +implement the ClassifierI interface, which supports the following operations:

+
    +
  • self.classify(featureset)

  • +
  • self.classify_many(featuresets)

  • +
  • self.labels()

  • +
  • self.prob_classify(featureset)

  • +
  • self.prob_classify_many(featuresets)

  • +

NLTK defines several classifier classes:

    -
  • ConditionalExponentialClassifier
  • -
  • DecisionTreeClassifier
  • -
  • MaxentClassifier
  • -
  • NaiveBayesClassifier
  • -
  • WekaClassifier
  • +
  • ConditionalExponentialClassifier

  • +
  • DecisionTreeClassifier

  • +
  • MaxentClassifier

  • +
  • NaiveBayesClassifier

  • +
  • WekaClassifier

Classifiers are typically created by training them on a training corpus.

-
-

Regression Tests

-

We define a very simple training corpus with 3 binary features: ['a', -'b', 'c'], and are two labels: ['x', 'y']. We use a simple feature set so +

+

Regression Tests

+

We define a very simple training corpus with 3 binary features: [‘a’, +‘b’, ‘c’], and are two labels: [‘x’, ‘y’]. We use a simple feature set so that the correct answers can be calculated analytically (although we -haven't done this yet for all tests).

-
-
->>> train = [
-...     (dict(a=1,b=1,c=1), 'y'),
-...     (dict(a=1,b=1,c=1), 'x'),
-...     (dict(a=1,b=1,c=0), 'y'),
-...     (dict(a=0,b=1,c=1), 'x'),
-...     (dict(a=0,b=1,c=1), 'y'),
-...     (dict(a=0,b=0,c=1), 'y'),
-...     (dict(a=0,b=1,c=0), 'x'),
-...     (dict(a=0,b=0,c=0), 'x'),
-...     (dict(a=0,b=1,c=1), 'y'),
-...     ]
->>> test = [
-...     (dict(a=1,b=0,c=1)), # unseen
-...     (dict(a=1,b=0,c=0)), # unseen
-...     (dict(a=0,b=1,c=1)), # seen 3 times, labels=y,y,x
-...     (dict(a=0,b=1,c=0)), # seen 1 time, label=x
-...     ]
-
-
+haven’t done this yet for all tests).

+
>>> import nltk
+>>> train = [
+...     (dict(a=1,b=1,c=1), 'y'),
+...     (dict(a=1,b=1,c=1), 'x'),
+...     (dict(a=1,b=1,c=0), 'y'),
+...     (dict(a=0,b=1,c=1), 'x'),
+...     (dict(a=0,b=1,c=1), 'y'),
+...     (dict(a=0,b=0,c=1), 'y'),
+...     (dict(a=0,b=1,c=0), 'x'),
+...     (dict(a=0,b=0,c=0), 'x'),
+...     (dict(a=0,b=1,c=1), 'y'),
+...     (dict(a=None,b=1,c=0), 'x'),
+...     ]
+>>> test = [
+...     (dict(a=1,b=0,c=1)), # unseen
+...     (dict(a=1,b=0,c=0)), # unseen
+...     (dict(a=0,b=1,c=1)), # seen 3 times, labels=y,y,x
+...     (dict(a=0,b=1,c=0)), # seen 1 time, label=x
+...     ]
+
+

Test the Naive Bayes classifier:

-
-
->>> classifier = nltk.classify.NaiveBayesClassifier.train(train)
->>> sorted(classifier.labels())
-['x', 'y']
->>> classifier.classify_many(test)
-['y', 'x', 'y', 'x']
->>> for pdist in classifier.prob_classify_many(test):
-...     print('%.4f %.4f' % (pdist.prob('x'), pdist.prob('y')))
-0.3203 0.6797
-0.5857 0.4143
-0.3792 0.6208
-0.6470 0.3530
->>> classifier.show_most_informative_features()
-Most Informative Features
-                       c = 0                   x : y      =      2.0 : 1.0
-                       c = 1                   y : x      =      1.5 : 1.0
-                       a = 1                   y : x      =      1.4 : 1.0
-                       b = 0                   x : y      =      1.2 : 1.0
-                       a = 0                   x : y      =      1.2 : 1.0
-                       b = 1                   y : x      =      1.1 : 1.0
-
-
-

Test the Decision Tree classifier:

-
-
->>> classifier = nltk.classify.DecisionTreeClassifier.train(
-...     train, entropy_cutoff=0,
-...                                                support_cutoff=0)
+
>>> classifier = nltk.classify.NaiveBayesClassifier.train(train)
+>>> sorted(classifier.labels())
+['x', 'y']
+>>> classifier.classify_many(test)
+['y', 'x', 'y', 'x']
+>>> for pdist in classifier.prob_classify_many(test):
+...     print('%.4f %.4f' % (pdist.prob('x'), pdist.prob('y')))
+0.2500 0.7500
+0.5833 0.4167
+0.3571 0.6429
+0.7000 0.3000
+>>> classifier.show_most_informative_features()
+Most Informative Features
+                       c = 0                   x : y      =      2.3 : 1.0
+                       c = 1                   y : x      =      1.8 : 1.0
+                       a = 1                   y : x      =      1.7 : 1.0
+                       a = 0                   x : y      =      1.0 : 1.0
+                       b = 0                   x : y      =      1.0 : 1.0
+                       b = 1                   x : y      =      1.0 : 1.0
+
+
+

Test the Decision Tree classifier (without None):

+
>>> classifier = nltk.classify.DecisionTreeClassifier.train(
+...     train[:-1], entropy_cutoff=0,
+...     support_cutoff=0)
 >>> sorted(classifier.labels())
-['x', 'y']
+['x', 'y']
 >>> print(classifier)
 c=0? .................................................. x
   a=0? ................................................ x
   a=1? ................................................ y
 c=1? .................................................. y
-<BLANKLINE>
+
 >>> classifier.classify_many(test)
-['y', 'y', 'y', 'x']
+['y', 'y', 'y', 'x']
 >>> for pdist in classifier.prob_classify_many(test):
-...     print('%.4f %.4f' % (pdist.prob('x'), pdist.prob('y')))
+...     print('%.4f %.4f' % (pdist.prob('x'), pdist.prob('y')))
 Traceback (most recent call last):
   . . .
 NotImplementedError
-
-
+
+
+

Test the Decision Tree classifier (with None):

+
>>> classifier = nltk.classify.DecisionTreeClassifier.train(
+...     train, entropy_cutoff=0,
+...     support_cutoff=0)
+>>> sorted(classifier.labels())
+['x', 'y']
+>>> print(classifier)
+c=0? .................................................. x
+  a=0? ................................................ x
+  a=1? ................................................ y
+  a=None? ............................................. x
+c=1? .................................................. y
+
+

Test SklearnClassifier, which requires the scikit-learn package.

-
-
->>> from nltk.classify import SklearnClassifier
->>> from sklearn.naive_bayes import BernoulliNB
->>> from sklearn.svm import SVC
->>> train_data = [({"a": 4, "b": 1, "c": 0}, "ham"),
-...               ({"a": 5, "b": 2, "c": 1}, "ham"),
-...               ({"a": 0, "b": 3, "c": 4}, "spam"),
-...               ({"a": 5, "b": 1, "c": 1}, "ham"),
-...               ({"a": 1, "b": 4, "c": 3}, "spam")]
->>> classif = SklearnClassifier(BernoulliNB()).train(train_data)
->>> test_data = [{"a": 3, "b": 2, "c": 1},
-...              {"a": 0, "b": 3, "c": 7}]
->>> classif.classify_many(test_data)
-['ham', 'spam']
->>> classif = SklearnClassifier(SVC(), sparse=False).train(train_data)
->>> classif.classify_many(test_data)
-['ham', 'spam']
-
-
+
>>> from nltk.classify import SklearnClassifier
+>>> from sklearn.naive_bayes import BernoulliNB
+>>> from sklearn.svm import SVC
+>>> train_data = [({"a": 4, "b": 1, "c": 0}, "ham"),
+...               ({"a": 5, "b": 2, "c": 1}, "ham"),
+...               ({"a": 0, "b": 3, "c": 4}, "spam"),
+...               ({"a": 5, "b": 1, "c": 1}, "ham"),
+...               ({"a": 1, "b": 4, "c": 3}, "spam")]
+>>> classif = SklearnClassifier(BernoulliNB()).train(train_data)
+>>> test_data = [{"a": 3, "b": 2, "c": 1},
+...              {"a": 0, "b": 3, "c": 7}]
+>>> classif.classify_many(test_data)
+['ham', 'spam']
+>>> classif = SklearnClassifier(SVC(), sparse=False).train(train_data)
+>>> classif.classify_many(test_data)
+['ham', 'spam']
+
+

Test the Maximum Entropy classifier training algorithms; they should all generate the same results.

-
-
->>> def print_maxent_test_header():
-...     print(' '*11+''.join(['      test[%s]  ' % i
-...                           for i in range(len(test))]))
-...     print(' '*11+'     p(x)  p(y)'*len(test))
-...     print('-'*(11+15*len(test)))
-
-
->>> def test_maxent(algorithm):
-...     print('%11s' % algorithm, end=' ')
-...     try:
-...         classifier = nltk.classify.MaxentClassifier.train(
-...                         train, algorithm, trace=0, max_iter=1000)
-...     except Exception as e:
-...         print('Error: %r' % e)
-...         return
-...
-...     for featureset in test:
-...         pdist = classifier.prob_classify(featureset)
-...         print('%8.2f%6.2f' % (pdist.prob('x'), pdist.prob('y')), end=' ')
-...     print()
-
-
->>> print_maxent_test_header(); test_maxent('GIS'); test_maxent('IIS')
-                 test[0]        test[1]        test[2]        test[3]
-                p(x)  p(y)     p(x)  p(y)     p(x)  p(y)     p(x)  p(y)
------------------------------------------------------------------------
-        GIS     0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
-        IIS     0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
-
-
->>> test_maxent('MEGAM'); test_maxent('TADM') # doctest: +SKIP
-        MEGAM   0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
-        TADM    0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
-
-
+
>>> def print_maxent_test_header():
+...     print(' '*11+''.join(['      test[%s]  ' % i
+...                           for i in range(len(test))]))
+...     print(' '*11+'     p(x)  p(y)'*len(test))
+...     print('-'*(11+15*len(test)))
+
-
-

Regression tests for TypedMaxentFeatureEncoding

-
-
->>> from nltk.classify import maxent
->>> train = [
-...     ({'a': 1, 'b': 1, 'c': 1}, 'y'),
-...     ({'a': 5, 'b': 5, 'c': 5}, 'x'),
-...     ({'a': 0.9, 'b': 0.9, 'c': 0.9}, 'y'),
-...     ({'a': 5.5, 'b': 5.4, 'c': 5.3}, 'x'),
-...     ({'a': 0.8, 'b': 1.2, 'c': 1}, 'y'),
-...     ({'a': 5.1, 'b': 4.9, 'c': 5.2}, 'x')
-... ]
-
-
->>> test = [
-...     {'a': 1, 'b': 0.8, 'c': 1.2},
-...     {'a': 5.2, 'b': 5.1, 'c': 5}
-... ]
-
-
->>> encoding = maxent.TypedMaxentFeatureEncoding.train(
-...     train, count_cutoff=3, alwayson_features=True)
-
-
->>> classifier = maxent.MaxentClassifier.train(
-...     train, bernoulli=False, encoding=encoding, trace=0)
-
-
->>> classifier.classify_many(test)
-['y', 'x']
-
-
+
>>> def test_maxent(algorithm):
+...     print('%11s' % algorithm, end=' ')
+...     try:
+...         classifier = nltk.classify.MaxentClassifier.train(
+...                         train, algorithm, trace=0, max_iter=1000)
+...     except Exception as e:
+...         print('Error: %r' % e)
+...         return
+...
+...     for featureset in test:
+...         pdist = classifier.prob_classify(featureset)
+...         print('%8.2f%6.2f' % (pdist.prob('x'), pdist.prob('y')), end=' ')
+...     print()
+
+
+
>>> print_maxent_test_header(); test_maxent('GIS'); test_maxent('IIS')
+                 test[0]        test[1]        test[2]        test[3]
+                p(x)  p(y)     p(x)  p(y)     p(x)  p(y)     p(x)  p(y)
+-----------------------------------------------------------------------
+        GIS     0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
+        IIS     0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
+
+
+
>>> test_maxent('MEGAM'); test_maxent('TADM') 
+        MEGAM   0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
+        TADM    0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
+
+
+ +
+

Regression tests for TypedMaxentFeatureEncoding

+
>>> from nltk.classify import maxent
+>>> train = [
+...     ({'a': 1, 'b': 1, 'c': 1}, 'y'),
+...     ({'a': 5, 'b': 5, 'c': 5}, 'x'),
+...     ({'a': 0.9, 'b': 0.9, 'c': 0.9}, 'y'),
+...     ({'a': 5.5, 'b': 5.4, 'c': 5.3}, 'x'),
+...     ({'a': 0.8, 'b': 1.2, 'c': 1}, 'y'),
+...     ({'a': 5.1, 'b': 4.9, 'c': 5.2}, 'x')
+... ]
+
+
>>> test = [
+...     {'a': 1, 'b': 0.8, 'c': 1.2},
+...     {'a': 5.2, 'b': 5.1, 'c': 5}
+... ]
+
+
>>> encoding = maxent.TypedMaxentFeatureEncoding.train(
+...     train, count_cutoff=3, alwayson_features=True)
+
+
+
>>> classifier = maxent.MaxentClassifier.train(
+...     train, bernoulli=False, encoding=encoding, trace=0)
+
+
+
>>> classifier.classify_many(test)
+['y', 'x']
+
+
+
+ + + + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/collections.html b/howto/collections.html new file mode 100644 index 000000000..b1cbec923 --- /dev/null +++ b/howto/collections.html @@ -0,0 +1,177 @@ + + + + + + + + + NLTK :: Sample usage for collections + + + + + + + + + + + + + + + + +
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + + + + + +
+ +
+
+ +
+

Sample usage for collections

+
+

Collections

+
>>> import nltk
+>>> from nltk.collections import *
+
+
+
+

Trie

+

Trie can be pickled:

+
>>> import pickle
+>>> trie = nltk.collections.Trie(['a'])
+>>> s = pickle.dumps(trie)
+>>> pickle.loads(s)
+{'a': {True: None}}
+
+
+
+
+

LazyIteratorList

+

Fetching the length of a LazyIteratorList object does not throw a StopIteration exception:

+
>>> lil = LazyIteratorList(i for i in range(1, 11))
+>>> lil[-1]
+10
+>>> len(lil)
+10
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ + + \ No newline at end of file diff --git a/howto/collocations.html b/howto/collocations.html index 1e5173b75..f81e6e9d4 100644 --- a/howto/collocations.html +++ b/howto/collocations.html @@ -1,657 +1,422 @@ - - - + - - -Collocations - + + + + + + + NLTK :: Sample usage for collocations + + + + + + + + + + + + + + -
-

Collocations

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-

Overview

+ + + +
+ +
+
+ +
+

Sample usage for collocations

+
+

Collocations

+
+

Overview

Collocations are expressions of multiple words which commonly co-occur. For example, the top ten bigram collocations in Genesis are listed below, as measured using Pointwise Mutual Information.

-
-
->>> import nltk
->>> from nltk.collocations import *
->>> bigram_measures = nltk.collocations.BigramAssocMeasures()
->>> trigram_measures = nltk.collocations.TrigramAssocMeasures()
->>> finder = BigramCollocationFinder.from_words(
-...     nltk.corpus.genesis.words('english-web.txt'))
->>> finder.nbest(bigram_measures.pmi, 10)  # doctest: +NORMALIZE_WHITESPACE
-[(u'Allon', u'Bacuth'), (u'Ashteroth', u'Karnaim'), (u'Ben', u'Ammi'),
- (u'En', u'Mishpat'), (u'Jegar', u'Sahadutha'), (u'Salt', u'Sea'),
- (u'Whoever', u'sheds'), (u'appoint', u'overseers'), (u'aromatic', u'resin'),
- (u'cutting', u'instrument')]
-
-
+
>>> import nltk
+>>> from nltk.collocations import *
+>>> bigram_measures = nltk.collocations.BigramAssocMeasures()
+>>> trigram_measures = nltk.collocations.TrigramAssocMeasures()
+>>> fourgram_measures = nltk.collocations.QuadgramAssocMeasures()
+>>> finder = BigramCollocationFinder.from_words(
+...     nltk.corpus.genesis.words('english-web.txt'))
+>>> finder.nbest(bigram_measures.pmi, 10)
+[('Allon', 'Bacuth'), ('Ashteroth', 'Karnaim'), ('Ben', 'Ammi'),
+ ('En', 'Mishpat'), ('Jegar', 'Sahadutha'), ('Salt', 'Sea'),
+ ('Whoever', 'sheds'), ('appoint', 'overseers'), ('aromatic', 'resin'),
+ ('cutting', 'instrument')]
+
+

While these words are highly collocated, the expressions are also very infrequent. Therefore it is useful to apply filters, such as ignoring all bigrams which occur less than three times in the corpus:

-
-
->>> finder.apply_freq_filter(3)
->>> finder.nbest(bigram_measures.pmi, 10)  # doctest: +NORMALIZE_WHITESPACE
-[(u'Beer', u'Lahai'), (u'Lahai', u'Roi'), (u'gray', u'hairs'),
- (u'Most', u'High'), (u'ewe', u'lambs'), (u'many', u'colors'),
- (u'burnt', u'offering'), (u'Paddan', u'Aram'), (u'east', u'wind'),
- (u'living', u'creature')]
-
-
+
>>> finder.apply_freq_filter(3)
+>>> finder.nbest(bigram_measures.pmi, 10)
+[('Beer', 'Lahai'), ('Lahai', 'Roi'), ('gray', 'hairs'),
+ ('ewe', 'lambs'), ('Most', 'High'), ('many', 'colors'),
+ ('burnt', 'offering'), ('Paddan', 'Aram'), ('east', 'wind'),
+ ('living', 'creature')]
+
+

We may similarly find collocations among tagged words:

-
-
->>> finder = BigramCollocationFinder.from_words(
-...     nltk.corpus.brown.tagged_words('ca01', tagset='universal'))
->>> finder.nbest(bigram_measures.pmi, 5)  # doctest: +NORMALIZE_WHITESPACE
-[(('1,119', 'NUM'), ('votes', 'NOUN')),
- (('1962', 'NUM'), ("governor's", 'NOUN')),
- (('637', 'NUM'), ('E.', 'NOUN')),
- (('Alpharetta', 'NOUN'), ('prison', 'NOUN')),
- (('Bar', 'NOUN'), ('Association', 'NOUN'))]
-
-
+
>>> finder = BigramCollocationFinder.from_words(
+...     nltk.corpus.brown.tagged_words('ca01', tagset='universal'))
+>>> finder.nbest(bigram_measures.pmi, 5)
+[(('1,119', 'NUM'), ('votes', 'NOUN')),
+ (('1962', 'NUM'), ("governor's", 'NOUN')),
+ (('637', 'NUM'), ('E.', 'NOUN')),
+ (('Alpharetta', 'NOUN'), ('prison', 'NOUN')),
+ (('Bar', 'NOUN'), ('Association', 'NOUN'))]
+
+

Or tags alone:

-
-
->>> finder = BigramCollocationFinder.from_words(t for w, t in
-...     nltk.corpus.brown.tagged_words('ca01', tagset='universal'))
->>> finder.nbest(bigram_measures.pmi, 10)  # doctest: +NORMALIZE_WHITESPACE
-[('PRT', 'VERB'), ('PRON', 'VERB'), ('ADP', 'DET'), ('.', 'PRON'), ('DET', 'ADJ'),
- ('CONJ', 'PRON'), ('ADP', 'NUM'), ('NUM', '.'), ('ADV', 'ADV'), ('VERB', 'ADV')]
-
-
+
>>> finder = BigramCollocationFinder.from_words(t for w, t in
+...     nltk.corpus.brown.tagged_words('ca01', tagset='universal'))
+>>> finder.nbest(bigram_measures.pmi, 10)
+[('PRT', 'VERB'), ('PRON', 'VERB'), ('ADP', 'DET'), ('.', 'PRON'), ('DET', 'ADJ'),
+ ('CONJ', 'PRON'), ('ADP', 'NUM'), ('NUM', '.'), ('ADV', 'ADV'), ('VERB', 'ADV')]
+
+

Or spanning intervening words:

-
-
->>> finder = BigramCollocationFinder.from_words(
-...     nltk.corpus.genesis.words('english-web.txt'),
-...     window_size = 20)
->>> finder.apply_freq_filter(2)
->>> ignored_words = nltk.corpus.stopwords.words('english')
->>> finder.apply_word_filter(lambda w: len(w) < 3 or w.lower() in ignored_words)
->>> finder.nbest(bigram_measures.likelihood_ratio, 10) # doctest: +NORMALIZE_WHITESPACE
-[(u'chief', u'chief'), (u'became', u'father'), (u'years', u'became'),
- (u'hundred', u'years'), (u'lived', u'became'), (u'king', u'king'),
- (u'lived', u'years'), (u'became', u'became'), (u'chief', u'chiefs'),
- (u'hundred', u'became')]
-
-
+
>>> finder = BigramCollocationFinder.from_words(
+...     nltk.corpus.genesis.words('english-web.txt'),
+...     window_size = 20)
+>>> finder.apply_freq_filter(2)
+>>> ignored_words = nltk.corpus.stopwords.words('english')
+>>> finder.apply_word_filter(lambda w: len(w) < 3 or w.lower() in ignored_words)
+>>> finder.nbest(bigram_measures.likelihood_ratio, 10)
+[('chief', 'chief'), ('became', 'father'), ('years', 'became'),
+ ('hundred', 'years'), ('lived', 'became'), ('king', 'king'),
+ ('lived', 'years'), ('became', 'became'), ('chief', 'chiefs'),
+ ('hundred', 'became')]
+
-
-

Finders

+
+
+

Finders

The collocations package provides collocation finders which by default consider all ngrams in a text as candidate collocations:

-
-
->>> text = "I do not like green eggs and ham, I do not like them Sam I am!"
->>> tokens = nltk.wordpunct_tokenize(text)
->>> finder = BigramCollocationFinder.from_words(tokens)
->>> scored = finder.score_ngrams(bigram_measures.raw_freq)
->>> sorted(bigram for bigram, score in scored)  # doctest: +NORMALIZE_WHITESPACE
-[(',', 'I'), ('I', 'am'), ('I', 'do'), ('Sam', 'I'), ('am', '!'),
- ('and', 'ham'), ('do', 'not'), ('eggs', 'and'), ('green', 'eggs'),
- ('ham', ','), ('like', 'green'), ('like', 'them'), ('not', 'like'),
- ('them', 'Sam')]
-
-
+
>>> text = "I do not like green eggs and ham, I do not like them Sam I am!"
+>>> tokens = nltk.wordpunct_tokenize(text)
+>>> finder = BigramCollocationFinder.from_words(tokens)
+>>> scored = finder.score_ngrams(bigram_measures.raw_freq)
+>>> sorted(bigram for bigram, score in scored)
+[(',', 'I'), ('I', 'am'), ('I', 'do'), ('Sam', 'I'), ('am', '!'),
+ ('and', 'ham'), ('do', 'not'), ('eggs', 'and'), ('green', 'eggs'),
+ ('ham', ','), ('like', 'green'), ('like', 'them'), ('not', 'like'),
+ ('them', 'Sam')]
+
+

We could otherwise construct the collocation finder from manually-derived FreqDists:

-
-
->>> word_fd = nltk.FreqDist(tokens)
->>> bigram_fd = nltk.FreqDist(nltk.bigrams(tokens))
->>> finder = BigramCollocationFinder(word_fd, bigram_fd)
->>> scored == finder.score_ngrams(bigram_measures.raw_freq)
-True
-
-
+
>>> word_fd = nltk.FreqDist(tokens)
+>>> bigram_fd = nltk.FreqDist(nltk.bigrams(tokens))
+>>> finder = BigramCollocationFinder(word_fd, bigram_fd)
+>>> scored == finder.score_ngrams(bigram_measures.raw_freq)
+True
+
+

A similar interface is provided for trigrams:

-
-
->>> finder = TrigramCollocationFinder.from_words(tokens)
->>> scored = finder.score_ngrams(trigram_measures.raw_freq)
->>> set(trigram for trigram, score in scored) == set(nltk.trigrams(tokens))
-True
-
-
+
>>> finder = TrigramCollocationFinder.from_words(tokens)
+>>> scored = finder.score_ngrams(trigram_measures.raw_freq)
+>>> set(trigram for trigram, score in scored) == set(nltk.trigrams(tokens))
+True
+
+

We may want to select only the top n results:

-
-
->>> sorted(finder.nbest(trigram_measures.raw_freq, 2))
-[('I', 'do', 'not'), ('do', 'not', 'like')]
-
-
+
>>> sorted(finder.nbest(trigram_measures.raw_freq, 2))
+[('I', 'do', 'not'), ('do', 'not', 'like')]
+
+

Alternatively, we can select those above a minimum score value:

-
-
->>> sorted(finder.above_score(trigram_measures.raw_freq,
-...                           1.0 / len(tuple(nltk.trigrams(tokens)))))
-[('I', 'do', 'not'), ('do', 'not', 'like')]
-
-
+
>>> sorted(finder.above_score(trigram_measures.raw_freq,
+...                           1.0 / len(tuple(nltk.trigrams(tokens)))))
+[('I', 'do', 'not'), ('do', 'not', 'like')]
+
+

Now spanning intervening words:

-
-
->>> finder = TrigramCollocationFinder.from_words(tokens)
->>> finder = TrigramCollocationFinder.from_words(tokens, window_size=4)
->>> sorted(finder.nbest(trigram_measures.raw_freq, 4))
-[('I', 'do', 'like'), ('I', 'do', 'not'), ('I', 'not', 'like'), ('do', 'not', 'like')]
-
-
-

A closer look at the finder's ngram frequencies:

-
-
->>> sorted(finder.ngram_fd.items(), key=lambda t: (-t[1], t[0]))[:10]  # doctest: +NORMALIZE_WHITESPACE
-[(('I', 'do', 'like'), 2), (('I', 'do', 'not'), 2), (('I', 'not', 'like'), 2),
- (('do', 'not', 'like'), 2), ((',', 'I', 'do'), 1), ((',', 'I', 'not'), 1),
- ((',', 'do', 'not'), 1), (('I', 'am', '!'), 1), (('Sam', 'I', '!'), 1),
- (('Sam', 'I', 'am'), 1)]
-
-
+
>>> finder = TrigramCollocationFinder.from_words(tokens)
+>>> finder = TrigramCollocationFinder.from_words(tokens, window_size=4)
+>>> sorted(finder.nbest(trigram_measures.raw_freq, 4))
+[('I', 'do', 'like'), ('I', 'do', 'not'), ('I', 'not', 'like'), ('do', 'not', 'like')]
+
+
+

A closer look at the finder’s ngram frequencies:

+
>>> sorted(finder.ngram_fd.items(), key=lambda t: (-t[1], t[0]))[:10]
+[(('I', 'do', 'like'), 2), (('I', 'do', 'not'), 2), (('I', 'not', 'like'), 2),
+ (('do', 'not', 'like'), 2), ((',', 'I', 'do'), 1), ((',', 'I', 'not'), 1),
+ ((',', 'do', 'not'), 1), (('I', 'am', '!'), 1), (('Sam', 'I', '!'), 1),
+ (('Sam', 'I', 'am'), 1)]
+
-
-

Filtering candidates

+

A similar interface is provided for fourgrams:

+
>>> finder_4grams = QuadgramCollocationFinder.from_words(tokens)
+>>> scored_4grams = finder_4grams.score_ngrams(fourgram_measures.raw_freq)
+>>> set(fourgram for fourgram, score in scored_4grams) == set(nltk.ngrams(tokens, n=4))
+True
+
+
+
+
+

Filtering candidates

All the ngrams in a text are often too many to be useful when finding collocations. It is generally useful to remove some words or punctuation, and to require a minimum frequency for candidate collocations.

Given our sample text above, if we remove all trigrams containing personal pronouns from candidature, score_ngrams should return 6 less results, and -'do not like' will be the only candidate which occurs more than once:

-
-
->>> finder = TrigramCollocationFinder.from_words(tokens)
->>> len(finder.score_ngrams(trigram_measures.raw_freq))
-14
->>> finder.apply_word_filter(lambda w: w in ('I', 'me'))
->>> len(finder.score_ngrams(trigram_measures.raw_freq))
-8
->>> sorted(finder.above_score(trigram_measures.raw_freq,
-...                           1.0 / len(tuple(nltk.trigrams(tokens)))))
-[('do', 'not', 'like')]
-
-
+‘do not like’ will be the only candidate which occurs more than once:

+
>>> finder = TrigramCollocationFinder.from_words(tokens)
+>>> len(finder.score_ngrams(trigram_measures.raw_freq))
+14
+>>> finder.apply_word_filter(lambda w: w in ('I', 'me'))
+>>> len(finder.score_ngrams(trigram_measures.raw_freq))
+8
+>>> sorted(finder.above_score(trigram_measures.raw_freq,
+...                           1.0 / len(tuple(nltk.trigrams(tokens)))))
+[('do', 'not', 'like')]
+
+

Sometimes a filter is a function on the whole ngram, rather than each word, -such as if we may permit 'and' to appear in the middle of a trigram, but +such as if we may permit ‘and’ to appear in the middle of a trigram, but not on either edge:

-
-
->>> finder.apply_ngram_filter(lambda w1, w2, w3: 'and' in (w1, w3))
->>> len(finder.score_ngrams(trigram_measures.raw_freq))
-6
-
-
+
>>> finder.apply_ngram_filter(lambda w1, w2, w3: 'and' in (w1, w3))
+>>> len(finder.score_ngrams(trigram_measures.raw_freq))
+6
+
+

Finally, it is often important to remove low frequency candidates, as we lack sufficient evidence about their significance as collocations:

-
-
->>> finder.apply_freq_filter(2)
->>> len(finder.score_ngrams(trigram_measures.raw_freq))
-1
-
-
+
>>> finder.apply_freq_filter(2)
+>>> len(finder.score_ngrams(trigram_measures.raw_freq))
+1
+
-
-

Association measures

+
+
+

Association measures

A number of measures are available to score collocations or other associations. The arguments to measure functions are marginals of a contingency table, in the bigram case (n_ii, (n_ix, n_xi), n_xx):

-
-        w1    ~w1
-     ------ ------
- w2 | n_ii | n_oi | = n_xi
-     ------ ------
-~w2 | n_io | n_oo |
-     ------ ------
-     = n_ix        TOTAL = n_xx
-
+
        w1    ~w1
+     ------ ------
+ w2 | n_ii | n_oi | = n_xi
+     ------ ------
+~w2 | n_io | n_oo |
+     ------ ------
+     = n_ix        TOTAL = n_xx
+
+

We test their calculation using some known values presented in Manning and -Schutze's text and other papers.

-

Student's t: examples from Manning and Schutze 5.3.2

-
-
->>> print('%0.4f' % bigram_measures.student_t(8, (15828, 4675), 14307668))
-0.9999
->>> print('%0.4f' % bigram_measures.student_t(20, (42, 20), 14307668))
-4.4721
-
-
+Schutze’s text and other papers.

+

Student’s t: examples from Manning and Schutze 5.3.2

+
>>> print('%0.4f' % bigram_measures.student_t(8, (15828, 4675), 14307668))
+0.9999
+>>> print('%0.4f' % bigram_measures.student_t(20, (42, 20), 14307668))
+4.4721
+
+

Chi-square: examples from Manning and Schutze 5.3.3

-
-
->>> print('%0.2f' % bigram_measures.chi_sq(8, (15828, 4675), 14307668))
-1.55
->>> print('%0.0f' % bigram_measures.chi_sq(59, (67, 65), 571007))
-456400
-
-
+
>>> print('%0.2f' % bigram_measures.chi_sq(8, (15828, 4675), 14307668))
+1.55
+>>> print('%0.0f' % bigram_measures.chi_sq(59, (67, 65), 571007))
+456400
+
+

Likelihood ratios: examples from Dunning, CL, 1993

-
-
->>> print('%0.2f' % bigram_measures.likelihood_ratio(110, (2552, 221), 31777))
-270.72
->>> print('%0.2f' % bigram_measures.likelihood_ratio(8, (13, 32), 31777))
-95.29
-
-
+
>>> print('%0.2f' % bigram_measures.likelihood_ratio(110, (2552, 221), 31777))
+270.72
+>>> print('%0.2f' % bigram_measures.likelihood_ratio(8, (13, 32), 31777))
+95.29
+
+

Pointwise Mutual Information: examples from Manning and Schutze 5.4

-
-
->>> print('%0.2f' % bigram_measures.pmi(20, (42, 20), 14307668))
-18.38
->>> print('%0.2f' % bigram_measures.pmi(20, (15019, 15629), 14307668))
-0.29
-
-
-

TODO: Find authoritative results for trigrams.

+
>>> print('%0.2f' % bigram_measures.pmi(20, (42, 20), 14307668))
+18.38
+>>> print('%0.2f' % bigram_measures.pmi(20, (15019, 15629), 14307668))
+0.29
+
-
-

Using contingency table values

+

TODO: Find authoritative results for trigrams.

+
+
+

Using contingency table values

While frequency counts make marginals readily available for collocation finding, it is common to find published contingency table values. The collocations package therefore provides a wrapper, ContingencyMeasures, which wraps an association measures class, providing association measures which take contingency values as arguments, (n_ii, n_io, n_oi, n_oo) in the bigram case.

-
-
->>> from nltk.metrics import ContingencyMeasures
->>> cont_bigram_measures = ContingencyMeasures(bigram_measures)
->>> print('%0.2f' % cont_bigram_measures.likelihood_ratio(8, 5, 24, 31740))
-95.29
->>> print('%0.2f' % cont_bigram_measures.chi_sq(8, 15820, 4667, 14287173))
-1.55
-
-
+
>>> from nltk.metrics import ContingencyMeasures
+>>> cont_bigram_measures = ContingencyMeasures(bigram_measures)
+>>> print('%0.2f' % cont_bigram_measures.likelihood_ratio(8, 5, 24, 31740))
+95.29
+>>> print('%0.2f' % cont_bigram_measures.chi_sq(8, 15820, 4667, 14287173))
+1.55
+
-
-

Ranking and correlation

+
+
+

Ranking and correlation

It is useful to consider the results of finding collocations as a ranking, and the rankings output using different association measures can be compared using the Spearman correlation coefficient.

Ranks can be assigned to a sorted list of results trivially by assigning strictly increasing ranks to each result:

-
-
->>> from nltk.metrics.spearman import *
->>> results_list = ['item1', 'item2', 'item3', 'item4', 'item5']
->>> print(list(ranks_from_sequence(results_list)))
-[('item1', 0), ('item2', 1), ('item3', 2), ('item4', 3), ('item5', 4)]
-
-
+
>>> from nltk.metrics.spearman import *
+>>> results_list = ['item1', 'item2', 'item3', 'item4', 'item5']
+>>> print(list(ranks_from_sequence(results_list)))
+[('item1', 0), ('item2', 1), ('item3', 2), ('item4', 3), ('item5', 4)]
+
+

If scores are available for each result, we may allow sufficiently similar results (differing by no more than rank_gap) to be assigned the same rank:

-
-
->>> results_scored = [('item1', 50.0), ('item2', 40.0), ('item3', 38.0),
-...                   ('item4', 35.0), ('item5', 14.0)]
->>> print(list(ranks_from_scores(results_scored, rank_gap=5)))
-[('item1', 0), ('item2', 1), ('item3', 1), ('item4', 1), ('item5', 4)]
-
-
+
>>> results_scored = [('item1', 50.0), ('item2', 40.0), ('item3', 38.0),
+...                   ('item4', 35.0), ('item5', 14.0)]
+>>> print(list(ranks_from_scores(results_scored, rank_gap=5)))
+[('item1', 0), ('item2', 1), ('item3', 1), ('item4', 1), ('item5', 4)]
+
+

The Spearman correlation coefficient gives a number from -1.0 to 1.0 comparing two rankings. A coefficient of 1.0 indicates identical rankings; -1.0 indicates exact opposite rankings.

-
-
->>> print('%0.1f' % spearman_correlation(
-...         ranks_from_sequence(results_list),
-...         ranks_from_sequence(results_list)))
-1.0
->>> print('%0.1f' % spearman_correlation(
-...         ranks_from_sequence(reversed(results_list)),
-...         ranks_from_sequence(results_list)))
--1.0
->>> results_list2 = ['item2', 'item3', 'item1', 'item5', 'item4']
->>> print('%0.1f' % spearman_correlation(
-...        ranks_from_sequence(results_list),
-...        ranks_from_sequence(results_list2)))
-0.6
->>> print('%0.1f' % spearman_correlation(
-...        ranks_from_sequence(reversed(results_list)),
-...        ranks_from_sequence(results_list2)))
--0.6
-
-
+
>>> print('%0.1f' % spearman_correlation(
+...         ranks_from_sequence(results_list),
+...         ranks_from_sequence(results_list)))
+1.0
+>>> print('%0.1f' % spearman_correlation(
+...         ranks_from_sequence(reversed(results_list)),
+...         ranks_from_sequence(results_list)))
+-1.0
+>>> results_list2 = ['item2', 'item3', 'item1', 'item5', 'item4']
+>>> print('%0.1f' % spearman_correlation(
+...        ranks_from_sequence(results_list),
+...        ranks_from_sequence(results_list2)))
+0.6
+>>> print('%0.1f' % spearman_correlation(
+...        ranks_from_sequence(reversed(results_list)),
+...        ranks_from_sequence(results_list2)))
+-0.6
+
+
+
+
+ + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/concordance.html b/howto/concordance.html new file mode 100644 index 000000000..57a7b5412 --- /dev/null +++ b/howto/concordance.html @@ -0,0 +1,220 @@ + + + + + + + + + NLTK :: Sample usage for concordance + + + + + + + + + + + + + + + + +
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + + + + + +
+ +
+
+ +
+

Sample usage for concordance

+
+

Concordance Example

+

A concordance view shows us every occurrence of a given +word, together with some context. Here we look up the word monstrous +in Moby Dick by entering text1 followed by a period, then the term +concordance, and then placing “monstrous” in parentheses:

+
>>> from nltk.corpus import gutenberg
+>>> from nltk.text import Text
+>>> corpus = gutenberg.words('melville-moby_dick.txt')
+>>> text = Text(corpus)
+
+
+
>>> text.concordance("monstrous")
+Displaying 11 of 11 matches:
+ong the former , one was of a most monstrous size . ... This came towards us ,
+ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r
+ll over with a heathenish array of monstrous clubs and spears . Some were thick
+d as you gazed , and wondered what monstrous cannibal and savage could ever hav
+that has survived the flood ; most monstrous and most mountainous ! That Himmal
+they might scout at Moby Dick as a monstrous fable , or still worse and more de
+th of Radney .'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l
+ing Scenes . In connexion with the monstrous pictures of whales , I am strongly
+ere to enter upon those still more monstrous stories of them which are to be fo
+ght have been rummaged out of this monstrous cabinet there is no telling . But
+of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u
+
+
+
>>> text.concordance("monstrous")
+Displaying 11 of 11 matches:
+ong the former , one was of a most monstrous size . ... This came towards us ,
+ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r
+ll over with a heathenish array of monstrous clubs and spears . Some were thick
+...
+
+
+

We can also search for a multi-word phrase by passing a list of strings:

+
>>> text.concordance(["monstrous", "size"])
+Displaying 2 of 2 matches:
+the former , one was of a most monstrous size . ... This came towards us , op
+Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead upo
+
+
+
+
+

Concordance List

+

Often we need to store the results of concordance for further usage. +To do so, call the concordance function with the stdout argument set +to false:

+
>>> from nltk.corpus import gutenberg
+>>> from nltk.text import Text
+>>> corpus = gutenberg.words('melville-moby_dick.txt')
+>>> text = Text(corpus)
+>>> con_list = text.concordance_list("monstrous")
+>>> con_list[2].line
+'ll over with a heathenish array of monstrous clubs and spears . Some were thick'
+>>> len(con_list)
+11
+
+
+
+
+

Patching Issue #2088

+

Patching https://github.com/nltk/nltk/issues/2088 +The left slice of the left context should be clip to 0 if the i-context < 0.

+
>>> from nltk import Text, word_tokenize
+>>> jane_eyre = 'Chapter 1\nTHERE was no possibility of taking a walk that day. We had been wandering, indeed, in the leafless shrubbery an hour in the morning; but since dinner (Mrs. Reed, when there was no company, dined early) the cold winter wind had brought with it clouds so sombre, and a rain so penetrating, that further outdoor exercise was now out of the question.'
+>>> text = Text(word_tokenize(jane_eyre))
+>>> text.concordance_list('taking')[0].left
+['Chapter', '1', 'THERE', 'was', 'no', 'possibility', 'of']
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ + + \ No newline at end of file diff --git a/howto/corpus.html b/howto/corpus.html index 2902411e8..ea58b0a90 100644 --- a/howto/corpus.html +++ b/howto/corpus.html @@ -1,1718 +1,1260 @@ - - - + - - -Corpus Readers - - - -
-

Corpus Readers

+ +
- - +
+
+ +
+

Sample usage for corpus

+
+

Corpus Readers

The nltk.corpus package defines a collection of corpus reader classes, which can be used to access the contents of a diverse set of corpora. The list of available corpora is given at:

-

http://www.nltk.org/nltk_data/

+

https://www.nltk.org/nltk_data/

Each corpus reader class is specialized to handle a specific corpus format. In addition, the nltk.corpus package automatically creates a set of corpus reader instances that can be used to access the corpora in the NLTK data package. -Section Corpus Reader Objects ("Corpus Reader Objects") describes +Section Corpus Reader Objects (“Corpus Reader Objects”) describes the corpus reader instances that can be used to read the corpora in -the NLTK data package. Section Corpus Reader Classes ("Corpus -Reader Classes") describes the corpus reader classes themselves, and +the NLTK data package. Section Corpus Reader Classes (“Corpus +Reader Classes”) describes the corpus reader classes themselves, and discusses the issues involved in creating new corpus reader objects and new corpus reader classes. Section Regression Tests -("Regression Tests") contains regression tests for the corpus readers +(“Regression Tests”) contains regression tests for the corpus readers and associated functions and classes.

-
-

Corpus Reader Objects

-
-

Overview

+
+

Corpus Reader Objects

+
+

Overview

NLTK includes a diverse set of corpora which can be -read using the nltk.corpus package. Each corpus is accessed by -means of a "corpus reader" object from nltk.corpus:

-
-
->>> import nltk.corpus
->>> # The Brown corpus:
->>> print(str(nltk.corpus.brown).replace('\\\\','/'))
-<CategorizedTaggedCorpusReader in '.../corpora/brown'...>
->>> # The Penn Treebank Corpus:
->>> print(str(nltk.corpus.treebank).replace('\\\\','/'))
-<BracketParseCorpusReader in '.../corpora/treebank/combined'...>
->>> # The Name Genders Corpus:
->>> print(str(nltk.corpus.names).replace('\\\\','/'))
-<WordListCorpusReader in '.../corpora/names'...>
->>> # The Inaugural Address Corpus:
->>> print(str(nltk.corpus.inaugural).replace('\\\\','/'))
-<PlaintextCorpusReader in '.../corpora/inaugural'...>
-
-
+read using the nltk.corpus package. Each corpus is accessed by +means of a “corpus reader” object from nltk.corpus:

+
>>> import nltk.corpus
+>>> # The Brown corpus:
+>>> print(str(nltk.corpus.brown).replace('\\\\','/'))
+<CategorizedTaggedCorpusReader in '.../corpora/brown'...>
+>>> # The Penn Treebank Corpus:
+>>> print(str(nltk.corpus.treebank).replace('\\\\','/'))
+<BracketParseCorpusReader in '.../corpora/treebank/combined'...>
+>>> # The Name Genders Corpus:
+>>> print(str(nltk.corpus.names).replace('\\\\','/'))
+<WordListCorpusReader in '.../corpora/names'...>
+>>> # The Inaugural Address Corpus:
+>>> print(str(nltk.corpus.inaugural).replace('\\\\','/'))
+<PlaintextCorpusReader in '.../corpora/inaugural'...>
+
+

Most corpora consist of a set of files, each containing a document (or other pieces of text). A list of identifiers for these files is -accessed via the fileids() method of the corpus reader:

-
-
->>> nltk.corpus.treebank.fileids() # doctest: +ELLIPSIS
-['wsj_0001.mrg', 'wsj_0002.mrg', 'wsj_0003.mrg', 'wsj_0004.mrg', ...]
->>> nltk.corpus.inaugural.fileids() # doctest: +ELLIPSIS
-['1789-Washington.txt', '1793-Washington.txt', '1797-Adams.txt', ...]
-
-
+accessed via the fileids() method of the corpus reader:

+
>>> nltk.corpus.treebank.fileids()
+['wsj_0001.mrg', 'wsj_0002.mrg', 'wsj_0003.mrg', 'wsj_0004.mrg', ...]
+>>> nltk.corpus.inaugural.fileids()
+['1789-Washington.txt', '1793-Washington.txt', '1797-Adams.txt', ...]
+
+

Each corpus reader provides a variety of methods to read data from the corpus, depending on the format of the corpus. For example, plaintext corpora support methods to read the corpus as raw text, a list of words, a list of sentences, or a list of paragraphs.

-
-
->>> from nltk.corpus import inaugural
->>> inaugural.raw('1789-Washington.txt') # doctest: +ELLIPSIS
-'Fellow-Citizens of the Senate ...'
->>> inaugural.words('1789-Washington.txt')
-['Fellow', '-', 'Citizens', 'of', 'the', ...]
->>> inaugural.sents('1789-Washington.txt') # doctest: +ELLIPSIS
-[['Fellow', '-', 'Citizens'...], ['Among', 'the', 'vicissitudes'...]...]
->>> inaugural.paras('1789-Washington.txt') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[[['Fellow', '-', 'Citizens'...]],
- [['Among', 'the', 'vicissitudes'...],
-  ['On', 'the', 'one', 'hand', ',', 'I'...]...]...]
-
-
-

Each of these reader methods may be given a single document's item +

>>> from nltk.corpus import inaugural
+>>> inaugural.raw('1789-Washington.txt')
+'Fellow-Citizens of the Senate ...'
+>>> inaugural.words('1789-Washington.txt')
+['Fellow', '-', 'Citizens', 'of', 'the', ...]
+>>> inaugural.sents('1789-Washington.txt')
+[['Fellow', '-', 'Citizens'...], ['Among', 'the', 'vicissitudes'...]...]
+>>> inaugural.paras('1789-Washington.txt')
+[[['Fellow', '-', 'Citizens'...]],
+ [['Among', 'the', 'vicissitudes'...],
+  ['On', 'the', 'one', 'hand', ',', 'I'...]...]...]
+
+
+

Each of these reader methods may be given a single document’s item name or a list of document item names. When given a list of document item names, the reader methods will concatenate together the contents of the individual documents.

-
-
->>> l1 = len(inaugural.words('1789-Washington.txt'))
->>> l2 = len(inaugural.words('1793-Washington.txt'))
->>> l3 = len(inaugural.words(['1789-Washington.txt', '1793-Washington.txt']))
->>> print('%s+%s == %s' % (l1, l2, l3))
-1538+147 == 1685
-
-
+
>>> l1 = len(inaugural.words('1789-Washington.txt'))
+>>> l2 = len(inaugural.words('1793-Washington.txt'))
+>>> l3 = len(inaugural.words(['1789-Washington.txt', '1793-Washington.txt']))
+>>> print('%s+%s == %s' % (l1, l2, l3))
+1538+147 == 1685
+
+

If the reader methods are called without any arguments, they will typically load all documents in the corpus.

-
-
->>> len(inaugural.words())
-145735
-
-
-

If a corpus contains a README file, it can be accessed with a readme() method:

-
-
->>> inaugural.readme()[:32]
-'C-Span Inaugural Address Corpus\n'
-
-
-
-
-

Plaintext Corpora

-

Here are the first few words from each of NLTK's plaintext corpora:

-
-
->>> nltk.corpus.abc.words()
-['PM', 'denies', 'knowledge', 'of', 'AWB', ...]
->>> nltk.corpus.genesis.words()
-[u'In', u'the', u'beginning', u'God', u'created', ...]
->>> nltk.corpus.gutenberg.words(fileids='austen-emma.txt')
-['[', 'Emma', 'by', 'Jane', 'Austen', '1816', ...]
->>> nltk.corpus.inaugural.words()
-['Fellow', '-', 'Citizens', 'of', 'the', ...]
->>> nltk.corpus.state_union.words()
-['PRESIDENT', 'HARRY', 'S', '.', 'TRUMAN', "'", ...]
->>> nltk.corpus.webtext.words()
-['Cookie', 'Manager', ':', '"', 'Don', "'", 't', ...]
-
-
-
-
-

Tagged Corpora

-

In addition to the plaintext corpora, NLTK's data package also +

>>> len(inaugural.words())
+149797
+
+
+

If a corpus contains a README file, it can be accessed with a readme() method:

+
>>> inaugural.readme()[:32]
+'C-Span Inaugural Address Corpus\n'
+
+
+
+
+

Plaintext Corpora

+

Here are the first few words from each of NLTK’s plaintext corpora:

+
>>> nltk.corpus.abc.words()
+['PM', 'denies', 'knowledge', 'of', 'AWB', ...]
+>>> nltk.corpus.genesis.words()
+['In', 'the', 'beginning', 'God', 'created', ...]
+>>> nltk.corpus.gutenberg.words(fileids='austen-emma.txt')
+['[', 'Emma', 'by', 'Jane', 'Austen', '1816', ...]
+>>> nltk.corpus.inaugural.words()
+['Fellow', '-', 'Citizens', 'of', 'the', ...]
+>>> nltk.corpus.state_union.words()
+['PRESIDENT', 'HARRY', 'S', '.', 'TRUMAN', "'", ...]
+>>> nltk.corpus.webtext.words()
+['Cookie', 'Manager', ':', '"', 'Don', "'", 't', ...]
+
+
+
+
+

Tagged Corpora

+

In addition to the plaintext corpora, NLTK’s data package also contains a wide variety of annotated corpora. For example, the Brown Corpus is annotated with part-of-speech tags, and defines additional -methods tagged_*() which words as (word,tag) tuples, rather +methods tagged_*() which words as (word,tag) tuples, rather than just bare word strings.

-
-
->>> from nltk.corpus import brown
->>> print(brown.words())
-['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
->>> print(brown.tagged_words())
-[('The', 'AT'), ('Fulton', 'NP-TL'), ...]
->>> print(brown.sents()) # doctest: +ELLIPSIS
-[['The', 'Fulton', 'County'...], ['The', 'jury', 'further'...], ...]
->>> print(brown.tagged_sents()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[[('The', 'AT'), ('Fulton', 'NP-TL')...],
- [('The', 'AT'), ('jury', 'NN'), ('further', 'RBR')...]...]
->>> print(brown.paras(categories='reviews')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[[['It', 'is', 'not', 'news', 'that', 'Nathan', 'Milstein'...],
-  ['Certainly', 'not', 'in', 'Orchestra', 'Hall', 'where'...]],
- [['There', 'was', 'about', 'that', 'song', 'something', ...],
-  ['Not', 'the', 'noblest', 'performance', 'we', 'have', ...], ...], ...]
->>> print(brown.tagged_paras(categories='reviews')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[[[('It', 'PPS'), ('is', 'BEZ'), ('not', '*'), ...],
-  [('Certainly', 'RB'), ('not', '*'), ('in', 'IN'), ...]],
- [[('There', 'EX'), ('was', 'BEDZ'), ('about', 'IN'), ...],
-  [('Not', '*'), ('the', 'AT'), ('noblest', 'JJT'), ...], ...], ...]
-
-
+
>>> from nltk.corpus import brown
+>>> print(brown.words())
+['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
+>>> print(brown.tagged_words())
+[('The', 'AT'), ('Fulton', 'NP-TL'), ...]
+>>> print(brown.sents())
+[['The', 'Fulton', 'County'...], ['The', 'jury', 'further'...], ...]
+>>> print(brown.tagged_sents())
+[[('The', 'AT'), ('Fulton', 'NP-TL')...],
+ [('The', 'AT'), ('jury', 'NN'), ('further', 'RBR')...]...]
+>>> print(brown.paras(categories='reviews'))
+[[['It', 'is', 'not', 'news', 'that', 'Nathan', 'Milstein'...],
+  ['Certainly', 'not', 'in', 'Orchestra', 'Hall', 'where'...]],
+ [['There', 'was', 'about', 'that', 'song', 'something', ...],
+  ['Not', 'the', 'noblest', 'performance', 'we', 'have', ...], ...], ...]
+>>> print(brown.tagged_paras(categories='reviews'))
+[[[('It', 'PPS'), ('is', 'BEZ'), ('not', '*'), ...],
+  [('Certainly', 'RB'), ('not', '*'), ('in', 'IN'), ...]],
+ [[('There', 'EX'), ('was', 'BEDZ'), ('about', 'IN'), ...],
+  [('Not', '*'), ('the', 'AT'), ('noblest', 'JJT'), ...], ...], ...]
+
+

Similarly, the Indian Language POS-Tagged Corpus includes samples of Indian text annotated with part-of-speech tags:

-
-
->>> from nltk.corpus import indian
->>> print(indian.words()) # doctest: +SKIP
-['\xe0\xa6\xae\xe0\xa6\xb9\xe0\xa6\xbf\...',
- '\xe0\xa6\xb8\xe0\xa6\xa8\xe0\xa7\x8d\xe0...', ...]
->>> print(indian.tagged_words()) # doctest: +SKIP
-[('\xe0\xa6\xae\xe0\xa6\xb9\xe0\xa6\xbf...', 'NN'),
- ('\xe0\xa6\xb8\xe0\xa6\xa8\xe0\xa7\x8d\xe0...', 'NN'), ...]
-
-
+
>>> from nltk.corpus import indian
+>>> print(indian.words()) 
+['\xe0\xa6\xae\xe0\xa6\xb9\xe0\xa6\xbf\...',
+ '\xe0\xa6\xb8\xe0\xa6\xa8\xe0\xa7\x8d\xe0...', ...]
+>>> print(indian.tagged_words()) 
+[('\xe0\xa6\xae\xe0\xa6\xb9\xe0\xa6\xbf...', 'NN'),
+ ('\xe0\xa6\xb8\xe0\xa6\xa8\xe0\xa7\x8d\xe0...', 'NN'), ...]
+
+

Several tagged corpora support access to a simplified, universal tagset, e.g. where all nouns -tags are collapsed to a single category NOUN:

-
-
->>> print(brown.tagged_sents(tagset='universal')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[[('The', 'DET'), ('Fulton', 'NOUN'), ('County', 'NOUN'), ('Grand', 'ADJ'), ('Jury', 'NOUN'), ...],
- [('The', 'DET'), ('jury', 'NOUN'), ('further', 'ADV'), ('said', 'VERB'), ('in', 'ADP'), ...]...]
->>> from nltk.corpus import conll2000, switchboard
->>> print(conll2000.tagged_words(tagset='universal')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[('Confidence', 'NOUN'), ('in', 'ADP'), ...]
-
-
-

Use nltk.app.pos_concordance() to access a GUI for searching tagged corpora.

-
-
-

Chunked Corpora

+tags are collapsed to a single category NOUN:

+
>>> print(brown.tagged_sents(tagset='universal'))
+[[('The', 'DET'), ('Fulton', 'NOUN'), ('County', 'NOUN'), ('Grand', 'ADJ'), ('Jury', 'NOUN'), ...],
+ [('The', 'DET'), ('jury', 'NOUN'), ('further', 'ADV'), ('said', 'VERB'), ('in', 'ADP'), ...]...]
+>>> from nltk.corpus import conll2000, switchboard
+>>> print(conll2000.tagged_words(tagset='universal'))
+[('Confidence', 'NOUN'), ('in', 'ADP'), ...]
+
+
+

Use nltk.app.pos_concordance() to access a GUI for searching tagged corpora.

+ +
+

Chunked Corpora

The CoNLL corpora also provide chunk structures, which are encoded as flat trees. The CoNLL 2000 Corpus includes phrasal chunks; and the CoNLL 2002 Corpus includes named entity chunks.

-
-
->>> from nltk.corpus import conll2000, conll2002
->>> print(conll2000.sents()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[['Confidence', 'in', 'the', 'pound', 'is', 'widely', ...],
- ['Chancellor', 'of', 'the', 'Exchequer', ...], ...]
->>> for tree in conll2000.chunked_sents()[:2]:
-...     print(tree) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-(S
-  (NP Confidence/NN)
-  (PP in/IN)
-  (NP the/DT pound/NN)
-  (VP is/VBZ widely/RB expected/VBN to/TO take/VB)
-  (NP another/DT sharp/JJ dive/NN)
-  if/IN
-  ...)
-(S
-  Chancellor/NNP
-  (PP of/IN)
-  (NP the/DT Exchequer/NNP)
-  ...)
->>> print(conll2002.sents()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[[u'Sao', u'Paulo', u'(', u'Brasil', u')', u',', ...], [u'-'], ...]
->>> for tree in conll2002.chunked_sents()[:2]:
-...     print(tree) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-(S
-  (LOC Sao/NC Paulo/VMI)
-  (/Fpa
-  (LOC Brasil/NC)
-  )/Fpt
-  ...)
-(S -/Fg)
-
-
-
-

Note

-

Since the CONLL corpora do not contain paragraph break -information, these readers do not support the para() method.)

-
-
-

Warning

-

if you call the conll corpora reader methods without any +

>>> from nltk.corpus import conll2000, conll2002
+>>> print(conll2000.sents())
+[['Confidence', 'in', 'the', 'pound', 'is', 'widely', ...],
+ ['Chancellor', 'of', 'the', 'Exchequer', ...], ...]
+>>> for tree in conll2000.chunked_sents()[:2]:
+...     print(tree)
+(S
+  (NP Confidence/NN)
+  (PP in/IN)
+  (NP the/DT pound/NN)
+  (VP is/VBZ widely/RB expected/VBN to/TO take/VB)
+  (NP another/DT sharp/JJ dive/NN)
+  if/IN
+  ...)
+(S
+  Chancellor/NNP
+  (PP of/IN)
+  (NP the/DT Exchequer/NNP)
+  ...)
+>>> print(conll2002.sents())
+[['Sao', 'Paulo', '(', 'Brasil', ')', ',', ...], ['-'], ...]
+>>> for tree in conll2002.chunked_sents()[:2]:
+...     print(tree)
+(S
+  (LOC Sao/NC Paulo/VMI)
+  (/Fpa
+  (LOC Brasil/NC)
+  )/Fpt
+  ...)
+(S -/Fg)
+
+
+
+

Note

+

Since the CONLL corpora do not contain paragraph break +information, these readers do not support the para() method.)

+
+
+

Warning

+

if you call the conll corpora reader methods without any arguments, they will return the contents of the entire corpus, -including the 'test' portions of the corpus.)

+including the ‘test’ portions of the corpus.)

SemCor is a subset of the Brown corpus tagged with WordNet senses and named entities. Both kinds of lexical items include multiword units, which are encoded as chunks (senses and part-of-speech tags pertain to the entire chunk).

-
-
->>> from nltk.corpus import semcor
->>> semcor.words()
-['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
->>> semcor.chunks()
-[['The'], ['Fulton', 'County', 'Grand', 'Jury'], ...]
->>> semcor.sents() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[['The', 'Fulton', 'County', 'Grand', 'Jury', 'said', ...],
-['The', 'jury', 'further', 'said', ...], ...]
->>> semcor.chunk_sents() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[[['The'], ['Fulton', 'County', 'Grand', 'Jury'], ['said'], ...
-['.']], [['The'], ['jury'], ['further'], ['said'], ... ['.']], ...]
->>> list(map(str, semcor.tagged_chunks(tag='both')[:3]))
-['(DT The)', "(Lemma('group.n.01.group') (NE (NNP Fulton County Grand Jury)))", "(Lemma('state.v.01.say') (VB said))"]
->>> [[str(c) for c in s] for s in semcor.tagged_sents(tag='both')[:2]]
-[['(DT The)', "(Lemma('group.n.01.group') (NE (NNP Fulton County Grand Jury)))", ...
- '(None .)'], ['(DT The)', ... '(None .)']]
-
-
+
>>> from nltk.corpus import semcor
+>>> semcor.words()
+['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
+>>> semcor.chunks()
+[['The'], ['Fulton', 'County', 'Grand', 'Jury'], ...]
+>>> semcor.sents()
+[['The', 'Fulton', 'County', 'Grand', 'Jury', 'said', ...],
+['The', 'jury', 'further', 'said', ...], ...]
+>>> semcor.chunk_sents()
+[[['The'], ['Fulton', 'County', 'Grand', 'Jury'], ['said'], ...
+['.']], [['The'], ['jury'], ['further'], ['said'], ... ['.']], ...]
+>>> list(map(str, semcor.tagged_chunks(tag='both')[:3]))
+['(DT The)', "(Lemma('group.n.01.group') (NE (NNP Fulton County Grand Jury)))", "(Lemma('state.v.01.say') (VB said))"]
+>>> [[str(c) for c in s] for s in semcor.tagged_sents(tag='both')[:2]]
+[['(DT The)', "(Lemma('group.n.01.group') (NE (NNP Fulton County Grand Jury)))", ...
+ '(None .)'], ['(DT The)', ... '(None .)']]
+
+

The IEER corpus is another chunked corpus. This corpus is unusual in that each corpus item contains multiple documents. (This reflects the fact that each corpus file contains multiple documents.) The IEER corpus defines the parsed_docs method, which returns the documents in a given item as IEERDocument objects:

-
-
->>> from nltk.corpus import ieer
->>> ieer.fileids() # doctest: +NORMALIZE_WHITESPACE
-['APW_19980314', 'APW_19980424', 'APW_19980429',
- 'NYT_19980315', 'NYT_19980403', 'NYT_19980407']
->>> docs = ieer.parsed_docs('APW_19980314')
->>> print(docs[0])
-<IEERDocument APW19980314.0391: 'Kenyans protest tax hikes'>
->>> print(docs[0].docno)
-APW19980314.0391
->>> print(docs[0].doctype)
-NEWS STORY
->>> print(docs[0].date_time)
-03/14/1998 10:36:00
->>> print(docs[0].headline)
-(DOCUMENT Kenyans protest tax hikes)
->>> print(docs[0].text) # doctest: +ELLIPSIS
-(DOCUMENT
-  (LOCATION NAIROBI)
-  ,
-  (LOCATION Kenya)
-  (
-  (ORGANIZATION AP)
-  )
-  _
-  (CARDINAL Thousands)
-  of
-  laborers,
-  ...
-  on
-  (DATE Saturday)
-  ...)
-
-
-
-
-

Parsed Corpora

+
>>> from nltk.corpus import ieer
+>>> ieer.fileids()
+['APW_19980314', 'APW_19980424', 'APW_19980429',
+ 'NYT_19980315', 'NYT_19980403', 'NYT_19980407']
+>>> docs = ieer.parsed_docs('APW_19980314')
+>>> print(docs[0])
+<IEERDocument APW19980314.0391: 'Kenyans protest tax hikes'>
+>>> print(docs[0].docno)
+APW19980314.0391
+>>> print(docs[0].doctype)
+NEWS STORY
+>>> print(docs[0].date_time)
+03/14/1998 10:36:00
+>>> print(docs[0].headline)
+(DOCUMENT Kenyans protest tax hikes)
+>>> print(docs[0].text)
+(DOCUMENT
+  (LOCATION NAIROBI)
+  ,
+  (LOCATION Kenya)
+  (
+  (ORGANIZATION AP)
+  )
+  _
+  (CARDINAL Thousands)
+  of
+  laborers,
+  ...
+  on
+  (DATE Saturday)
+  ...)
+
+
+
+
+

Parsed Corpora

The Treebank corpora provide a syntactic parse for each sentence. The NLTK data package includes a 10% sample of the Penn Treebank (in -treebank), as well as the Sinica Treebank (in sinica_treebank).

+treebank), as well as the Sinica Treebank (in sinica_treebank).

Reading the Penn Treebank (Wall Street Journal sample):

-
-
->>> from nltk.corpus import treebank
->>> print(treebank.fileids()) # doctest: +ELLIPSIS
-['wsj_0001.mrg', 'wsj_0002.mrg', 'wsj_0003.mrg', 'wsj_0004.mrg', ...]
->>> print(treebank.words('wsj_0003.mrg'))
-['A', 'form', 'of', 'asbestos', 'once', 'used', ...]
->>> print(treebank.tagged_words('wsj_0003.mrg'))
-[('A', 'DT'), ('form', 'NN'), ('of', 'IN'), ...]
->>> print(treebank.parsed_sents('wsj_0003.mrg')[0]) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-(S
-  (S-TPC-1
-    (NP-SBJ
-      (NP (NP (DT A) (NN form)) (PP (IN of) (NP (NN asbestos))))
-      (RRC ...)...)...)
-  ...
-  (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1))))
-  (. .))
-
-
+
>>> from nltk.corpus import treebank
+>>> print(treebank.fileids())
+['wsj_0001.mrg', 'wsj_0002.mrg', 'wsj_0003.mrg', 'wsj_0004.mrg', ...]
+>>> print(treebank.words('wsj_0003.mrg'))
+['A', 'form', 'of', 'asbestos', 'once', 'used', ...]
+>>> print(treebank.tagged_words('wsj_0003.mrg'))
+[('A', 'DT'), ('form', 'NN'), ('of', 'IN'), ...]
+>>> print(treebank.parsed_sents('wsj_0003.mrg')[0])
+(S
+  (S-TPC-1
+    (NP-SBJ
+      (NP (NP (DT A) (NN form)) (PP (IN of) (NP (NN asbestos))))
+      (RRC ...)...)...)
+  ...
+  (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1))))
+  (. .))
+
+

If you have access to a full installation of the Penn Treebank, NLTK -can be configured to load it as well. Download the ptb package, -and in the directory nltk_data/corpora/ptb place the BROWN -and WSJ directories of the Treebank installation (symlinks work -as well). Then use the ptb module instead of treebank:

-
-
->>> from nltk.corpus import ptb
->>> print(ptb.fileids()) # doctest: +SKIP
-['BROWN/CF/CF01.MRG', 'BROWN/CF/CF02.MRG', 'BROWN/CF/CF03.MRG', 'BROWN/CF/CF04.MRG', ...]
->>> print(ptb.words('WSJ/00/WSJ_0003.MRG')) # doctest: +SKIP
-['A', 'form', 'of', 'asbestos', 'once', 'used', '*', ...]
->>> print(ptb.tagged_words('WSJ/00/WSJ_0003.MRG')) # doctest: +SKIP
-[('A', 'DT'), ('form', 'NN'), ('of', 'IN'), ...]
-
-
-

...and so forth, like treebank but with extended fileids. Categories -specified in allcats.txt can be used to filter by genre; they consist -of news (for WSJ articles) and names of the Brown subcategories -(fiction, humor, romance, etc.):

-
-
->>> ptb.categories() # doctest: +SKIP
-['adventure', 'belles_lettres', 'fiction', 'humor', 'lore', 'mystery', 'news', 'romance', 'science_fiction']
->>> print(ptb.fileids('news')) # doctest: +SKIP
-['WSJ/00/WSJ_0001.MRG', 'WSJ/00/WSJ_0002.MRG', 'WSJ/00/WSJ_0003.MRG', ...]
->>> print(ptb.words(categories=['humor','fiction'])) # doctest: +SKIP
-['Thirty-three', 'Scotty', 'did', 'not', 'go', 'back', ...]
-
-
+can be configured to load it as well. Download the ptb package, +and in the directory nltk_data/corpora/ptb place the BROWN +and WSJ directories of the Treebank installation (symlinks work +as well). Then use the ptb module instead of treebank:

+
>>> from nltk.corpus import ptb
+>>> print(ptb.fileids()) 
+['BROWN/CF/CF01.MRG', 'BROWN/CF/CF02.MRG', 'BROWN/CF/CF03.MRG', 'BROWN/CF/CF04.MRG', ...]
+>>> print(ptb.words('WSJ/00/WSJ_0003.MRG')) 
+['A', 'form', 'of', 'asbestos', 'once', 'used', '*', ...]
+>>> print(ptb.tagged_words('WSJ/00/WSJ_0003.MRG')) 
+[('A', 'DT'), ('form', 'NN'), ('of', 'IN'), ...]
+
+
+

…and so forth, like treebank but with extended fileids. Categories +specified in allcats.txt can be used to filter by genre; they consist +of news (for WSJ articles) and names of the Brown subcategories +(fiction, humor, romance, etc.):

+
>>> ptb.categories() 
+['adventure', 'belles_lettres', 'fiction', 'humor', 'lore', 'mystery', 'news', 'romance', 'science_fiction']
+>>> print(ptb.fileids('news')) 
+['WSJ/00/WSJ_0001.MRG', 'WSJ/00/WSJ_0002.MRG', 'WSJ/00/WSJ_0003.MRG', ...]
+>>> print(ptb.words(categories=['humor','fiction'])) 
+['Thirty-three', 'Scotty', 'did', 'not', 'go', 'back', ...]
+
+

As PropBank and NomBank depend on the (WSJ portion of the) Penn Treebank, -the modules propbank_ptb and nombank_ptb are provided for access +the modules propbank_ptb and nombank_ptb are provided for access to a full PTB installation.

Reading the Sinica Treebank:

-
-
->>> from nltk.corpus import sinica_treebank
->>> print(sinica_treebank.sents()) # doctest: +SKIP
-[['\xe4\xb8\x80'], ['\xe5\x8f\x8b\xe6\x83\x85'], ...]
->>> sinica_treebank.parsed_sents()[25] # doctest: +SKIP
-Tree('S',
-    [Tree('NP',
-        [Tree('Nba', ['\xe5\x98\x89\xe7\x8f\x8d'])]),
-     Tree('V\xe2\x80\xa7\xe5\x9c\xb0',
-        [Tree('VA11', ['\xe4\xb8\x8d\xe5\x81\x9c']),
-         Tree('DE', ['\xe7\x9a\x84'])]),
-     Tree('VA4', ['\xe5\x93\xad\xe6\xb3\xa3'])])
-
-
+
>>> from nltk.corpus import sinica_treebank
+>>> print(sinica_treebank.sents()) 
+[['\xe4\xb8\x80'], ['\xe5\x8f\x8b\xe6\x83\x85'], ...]
+>>> sinica_treebank.parsed_sents()[25] 
+Tree('S',
+    [Tree('NP',
+        [Tree('Nba', ['\xe5\x98\x89\xe7\x8f\x8d'])]),
+     Tree('V\xe2\x80\xa7\xe5\x9c\xb0',
+        [Tree('VA11', ['\xe4\xb8\x8d\xe5\x81\x9c']),
+         Tree('DE', ['\xe7\x9a\x84'])]),
+     Tree('VA4', ['\xe5\x93\xad\xe6\xb3\xa3'])])
+
+

Reading the CoNLL 2007 Dependency Treebanks:

-
-
->>> from nltk.corpus import conll2007
->>> conll2007.sents('esp.train')[0] # doctest: +SKIP
-['El', 'aumento', 'del', 'índice', 'de', 'desempleo', ...]
->>> conll2007.parsed_sents('esp.train')[0] # doctest: +SKIP
-<DependencyGraph with 38 nodes>
->>> print(conll2007.parsed_sents('esp.train')[0].tree()) # doctest: +SKIP
-(fortaleció
-  (aumento El (del (índice (de (desempleo estadounidense)))))
-  hoy
-  considerablemente
-  (al
-    (euro
-      (cotizaba
-        ,
-        que
-        (a (15.35 las GMT))
-        se
-        (en (mercado el (de divisas) (de Fráncfort)))
-        (a 0,9452_dólares)
-        (frente_a , (0,9349_dólares los (de (mañana esta)))))))
-  .)
-
-
-

NLTK also provides a corpus reader for the York-Toronto-Helsinki -Parsed Corpus of Old English Prose (YCOE); but the corpus itself is -not included in the NLTK data package. If you install it yourself, -you can use NLTK to access it:

-
-
->>> from nltk.corpus import ycoe
->>> for tree in ycoe.parsed_sents('cocuraC')[:4]:
-...     print(tree) # doctest: +SKIP
-(CP-THT
-  (C +D+atte)
-  (IP-SUB ...)
-  ...
-  (. .))
-(IP-MAT
-  (IP-MAT-0
-    (PP (P On) (NP (ADJ o+dre) (N wisan)))...)
-  ...
-  (. .))
-(IP-MAT
-  (NP-NOM-x-2 *exp*)
-  (NP-DAT-1 (D^D +D+am) (ADJ^D unge+dyldegum))
-  ...
-  (. .))
-(IP-MAT
-  (ADVP (ADV Sw+a))
-  (NP-NOM-x (PRO^N hit))
-  (ADVP-TMP (ADV^T oft))
-  ...
-  (. .))
-
-
-

If the YCOE corpus is not available, you will get an error message -when you try to access it:

-
-
->>> from nltk.corpus import ycoe
->>> print(ycoe) # doctest: +SKIP
-Traceback (most recent call last):
-LookupError:
-**********************************************************************
-  Resource 'corpora/ycoe' not found.  For installation
-  instructions, please see <http://nltk.org/index.php/Installation>.
-  Searched in:
-    - ...
-**********************************************************************
-
-
-
-
-

Word Lists and Lexicons

+
>>> from nltk.corpus import conll2007
+>>> conll2007.sents('esp.train')[0] 
+['El', 'aumento', 'del', 'índice', 'de', 'desempleo', ...]
+>>> conll2007.parsed_sents('esp.train')[0] 
+<DependencyGraph with 38 nodes>
+>>> print(conll2007.parsed_sents('esp.train')[0].tree()) 
+(fortaleció
+  (aumento El (del (índice (de (desempleo estadounidense)))))
+  hoy
+  considerablemente
+  (al
+    (euro
+      (cotizaba
+        ,
+        que
+        (a (15.35 las GMT))
+        se
+        (en (mercado el (de divisas) (de Fráncfort)))
+        (a 0,9452_dólares)
+        (frente_a , (0,9349_dólares los (de (mañana esta)))))))
+  .)
+
+
+ +
+

Word Lists and Lexicons

The NLTK data package also includes a number of lexicons and word lists. These are accessed just like text corpora. The following examples illustrate the use of the wordlist corpora:

-
-
->>> from nltk.corpus import names, stopwords, words
->>> words.fileids()
-['en', 'en-basic']
->>> words.words('en') # doctest: +ELLIPSIS
-['A', 'a', 'aa', 'aal', 'aalii', 'aam', 'Aani', 'aardvark', 'aardwolf', ...]
-
-
->>> stopwords.fileids() # doctest: +ELLIPSIS
-['danish', 'dutch', 'english', 'finnish', 'french', 'german', 'hungarian', ...]
->>> stopwords.words('portuguese') # doctest: +ELLIPSIS
-['de', 'a', 'o', 'que', 'e', 'do', 'da', 'em', 'um', 'para', ...]
->>> names.fileids()
-['female.txt', 'male.txt']
->>> names.words('male.txt') # doctest: +ELLIPSIS
-['Aamir', 'Aaron', 'Abbey', 'Abbie', 'Abbot', 'Abbott', ...]
->>> names.words('female.txt') # doctest: +ELLIPSIS
-['Abagael', 'Abagail', 'Abbe', 'Abbey', 'Abbi', 'Abbie', ...]
-
-
-

The CMU Pronunciation Dictionary corpus contains pronounciation +

>>> from nltk.corpus import names, stopwords, words
+>>> words.fileids()
+['en', 'en-basic']
+>>> words.words('en')
+['A', 'a', 'aa', 'aal', 'aalii', 'aam', 'Aani', 'aardvark', 'aardwolf', ...]
+
+
+
>>> stopwords.fileids()
+['arabic', 'azerbaijani', 'danish', 'dutch', 'english', 'finnish', 'french', ...]
+>>> sorted(stopwords.words('portuguese'))
+['a', 'ao', 'aos', 'aquela', 'aquelas', 'aquele', 'aqueles', ...]
+>>> names.fileids()
+['female.txt', 'male.txt']
+>>> names.words('male.txt')
+['Aamir', 'Aaron', 'Abbey', 'Abbie', 'Abbot', 'Abbott', ...]
+>>> names.words('female.txt')
+['Abagael', 'Abagail', 'Abbe', 'Abbey', 'Abbi', 'Abbie', ...]
+
+
+

The CMU Pronunciation Dictionary corpus contains pronunciation transcriptions for over 100,000 words. It can be accessed as a list of entries (where each entry consists of a word, an identifier, and a transcription) or as a dictionary from words to lists of transcriptions. Transcriptions are encoded as tuples of phoneme strings.

-
-
->>> from nltk.corpus import cmudict
->>> print(cmudict.entries()[653:659]) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[('acetate', ['AE1', 'S', 'AH0', 'T', 'EY2', 'T']),
-('acetic', ['AH0', 'S', 'EH1', 'T', 'IH0', 'K']),
-('acetic', ['AH0', 'S', 'IY1', 'T', 'IH0', 'K']),
-('aceto', ['AA0', 'S', 'EH1', 'T', 'OW0']),
-('acetochlor', ['AA0', 'S', 'EH1', 'T', 'OW0', 'K', 'L', 'AO2', 'R']),
-('acetone', ['AE1', 'S', 'AH0', 'T', 'OW2', 'N'])]
->>> # Load the entire cmudict corpus into a Python dictionary:
->>> transcr = cmudict.dict()
->>> print([transcr[w][0] for w in 'Natural Language Tool Kit'.lower().split()]) # doctest: +NORMALIZE_WHITESPACE
-[['N', 'AE1', 'CH', 'ER0', 'AH0', 'L'],
- ['L', 'AE1', 'NG', 'G', 'W', 'AH0', 'JH'],
- ['T', 'UW1', 'L'],
- ['K', 'IH1', 'T']]
-
-
-
-
-

WordNet

-

Please see the separate WordNet howto.

+
>>> from nltk.corpus import cmudict
+>>> print(cmudict.entries()[653:659])
+[('acetate', ['AE1', 'S', 'AH0', 'T', 'EY2', 'T']),
+('acetic', ['AH0', 'S', 'EH1', 'T', 'IH0', 'K']),
+('acetic', ['AH0', 'S', 'IY1', 'T', 'IH0', 'K']),
+('aceto', ['AA0', 'S', 'EH1', 'T', 'OW0']),
+('acetochlor', ['AA0', 'S', 'EH1', 'T', 'OW0', 'K', 'L', 'AO2', 'R']),
+('acetone', ['AE1', 'S', 'AH0', 'T', 'OW2', 'N'])]
+>>> # Load the entire cmudict corpus into a Python dictionary:
+>>> transcr = cmudict.dict()
+>>> print([transcr[w][0] for w in 'Natural Language Tool Kit'.lower().split()])
+[['N', 'AE1', 'CH', 'ER0', 'AH0', 'L'],
+ ['L', 'AE1', 'NG', 'G', 'W', 'AH0', 'JH'],
+ ['T', 'UW1', 'L'],
+ ['K', 'IH1', 'T']]
+
-
-

FrameNet

+ +
+

WordNet

+

Please see the separate WordNet howto.

+
+
+

FrameNet

Please see the separate FrameNet howto.

-
-
-

PropBank

+ +
+

PropBank

Please see the separate PropBank howto.

-
-
-

SentiWordNet

+ +
+

SentiWordNet

Please see the separate SentiWordNet howto.

-
-
-

Categorized Corpora

+ +
+

Categorized Corpora

Several corpora included with NLTK contain documents that have been categorized for topic, genre, polarity, etc. In addition to the standard corpus interface, these corpora provide access to the list of categories and the mapping between the documents -and their categories (in both directions). Access the categories using the categories() +and their categories (in both directions). Access the categories using the categories() method, e.g.:

-
-
->>> from nltk.corpus import brown, movie_reviews, reuters
->>> brown.categories() # doctest: +NORMALIZE_WHITESPACE
-['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor',
-'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction']
->>> movie_reviews.categories()
-['neg', 'pos']
->>> reuters.categories() # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
-['acq', 'alum', 'barley', 'bop', 'carcass', 'castor-oil', 'cocoa',
-'coconut', 'coconut-oil', 'coffee', 'copper', 'copra-cake', 'corn',
-'cotton', 'cotton-oil', 'cpi', 'cpu', 'crude', 'dfl', 'dlr', ...]
-
-
+
>>> from nltk.corpus import brown, movie_reviews, reuters
+>>> brown.categories()
+['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor',
+'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction']
+>>> movie_reviews.categories()
+['neg', 'pos']
+>>> reuters.categories()
+['acq', 'alum', 'barley', 'bop', 'carcass', 'castor-oil', 'cocoa',
+'coconut', 'coconut-oil', 'coffee', 'copper', 'copra-cake', 'corn',
+'cotton', 'cotton-oil', 'cpi', 'cpu', 'crude', 'dfl', 'dlr', ...]
+
+

This method has an optional argument that specifies a document or a list of documents, allowing us to map from (one or more) documents to (one or more) categories:

-
-
->>> brown.categories('ca01')
-['news']
->>> brown.categories(['ca01','cb01'])
-['editorial', 'news']
->>> reuters.categories('training/9865')
-['barley', 'corn', 'grain', 'wheat']
->>> reuters.categories(['training/9865', 'training/9880'])
-['barley', 'corn', 'grain', 'money-fx', 'wheat']
-
-
-

We can go back the other way using the optional argument of the fileids() method:

-
-
->>> reuters.fileids('barley') # doctest: +ELLIPSIS
-['test/15618', 'test/15649', 'test/15676', 'test/15728', 'test/15871', ...]
-
-
-

Both the categories() and fileids() methods return a sorted list containing +

>>> brown.categories('ca01')
+['news']
+>>> brown.categories(['ca01','cb01'])
+['editorial', 'news']
+>>> reuters.categories('training/9865')
+['barley', 'corn', 'grain', 'wheat']
+>>> reuters.categories(['training/9865', 'training/9880'])
+['barley', 'corn', 'grain', 'money-fx', 'wheat']
+
+
+

We can go back the other way using the optional argument of the fileids() method:

+
>>> reuters.fileids('barley')
+['test/15618', 'test/15649', 'test/15676', 'test/15728', 'test/15871', ...]
+
+
+

Both the categories() and fileids() methods return a sorted list containing no duplicates.

In addition to mapping between categories and documents, these corpora permit direct access to their contents via the categories. Instead of accessing a subset of a corpus by specifying one or more fileids, we can identify one or more categories, e.g.:

-
-
->>> brown.tagged_words(categories='news')
-[('The', 'AT'), ('Fulton', 'NP-TL'), ...]
->>> brown.sents(categories=['editorial','reviews']) # doctest: +NORMALIZE_WHITESPACE
-[['Assembly', 'session', 'brought', 'much', 'good'], ['The', 'General',
-'Assembly', ',', 'which', 'adjourns', 'today', ',', 'has', 'performed',
-'in', 'an', 'atmosphere', 'of', 'crisis', 'and', 'struggle', 'from',
-'the', 'day', 'it', 'convened', '.'], ...]
-
-
+
>>> brown.tagged_words(categories='news')
+[('The', 'AT'), ('Fulton', 'NP-TL'), ...]
+>>> brown.sents(categories=['editorial','reviews'])
+[['Assembly', 'session', 'brought', 'much', 'good'], ['The', 'General',
+'Assembly', ',', 'which', 'adjourns', 'today', ',', 'has', 'performed',
+'in', 'an', 'atmosphere', 'of', 'crisis', 'and', 'struggle', 'from',
+'the', 'day', 'it', 'convened', '.'], ...]
+
+

Note that it is an error to specify both documents and categories.

In the context of a text categorization system, we can easily test if the category assigned to a document is correct as follows:

-
-
->>> def classify(doc): return 'news'   # Trivial classifier
->>> doc = 'ca01'
->>> classify(doc) in brown.categories(doc)
-True
-
-
-
-
-

Other Corpora

-
-

comparative_sentences

+
>>> def classify(doc): return 'news'   # Trivial classifier
+>>> doc = 'ca01'
+>>> classify(doc) in brown.categories(doc)
+True
+
+
+ +
+

Other Corpora

+
+
comparative_sentences

A list of sentences from various sources, especially reviews and articles. Each line contains one sentence; sentences were separated by using a sentence tokenizer. Comparative sentences have been annotated with their type, entities, features and keywords.

-
-
->>> from nltk.corpus import comparative_sentences
->>> comparison = comparative_sentences.comparisons()[0]
->>> comparison.text
-['its', 'fast-forward', 'and', 'rewind', 'work', 'much', 'more', 'smoothly',
-'and', 'consistently', 'than', 'those', 'of', 'other', 'models', 'i', "'ve",
-'had', '.']
->>> comparison.entity_2
-'models'
->>> (comparison.feature, comparison.keyword)
-('rewind', 'more')
->>> len(comparative_sentences.comparisons())
-853
-
-
-
-
-

opinion_lexicon

+
>>> from nltk.corpus import comparative_sentences
+>>> comparison = comparative_sentences.comparisons()[0]
+>>> comparison.text
+['its', 'fast-forward', 'and', 'rewind', 'work', 'much', 'more', 'smoothly',
+'and', 'consistently', 'than', 'those', 'of', 'other', 'models', 'i', "'ve",
+'had', '.']
+>>> comparison.entity_2
+'models'
+>>> (comparison.feature, comparison.keyword)
+('rewind', 'more')
+>>> len(comparative_sentences.comparisons())
+853
+
+
+ +
+
opinion_lexicon

A list of positive and negative opinion words or sentiment words for English.

-
-
->>> from nltk.corpus import opinion_lexicon
->>> opinion_lexicon.words()[:4]
-    ['2-faced', '2-faces', 'abnormal', 'abolish']
-
-
+
>>> from nltk.corpus import opinion_lexicon
+>>> opinion_lexicon.words()[:4]
+    ['2-faced', '2-faces', 'abnormal', 'abolish']
+
+

The OpinionLexiconCorpusReader also provides shortcuts to retrieve positive/negative words:

-
-
->>> opinion_lexicon.negative()[:4]
-['2-faced', '2-faces', 'abnormal', 'abolish']
-
-
+
>>> opinion_lexicon.negative()[:4]
+['2-faced', '2-faces', 'abnormal', 'abolish']
+
+

Note that words from words() method in opinion_lexicon are sorted by file id, not alphabetically:

-
-
->>> opinion_lexicon.words()[0:10]
-['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably',
-'abominate', 'abomination', 'abort', 'aborted']
->>> sorted(opinion_lexicon.words())[0:10]
-['2-faced', '2-faces', 'a+', 'abnormal', 'abolish', 'abominable', 'abominably',
-'abominate', 'abomination', 'abort']
-
-
-
-
-

ppattach

+
>>> opinion_lexicon.words()[0:10]
+['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably',
+'abominate', 'abomination', 'abort', 'aborted']
+>>> sorted(opinion_lexicon.words())[0:10]
+['2-faced', '2-faces', 'a+', 'abnormal', 'abolish', 'abominable', 'abominably',
+'abominate', 'abomination', 'abort']
+
+
+ +
+
ppattach

The Prepositional Phrase Attachment corpus is a corpus of prepositional phrase attachment decisions. Each instance in the -corpus is encoded as a PPAttachment object:

-
-
->>> from nltk.corpus import ppattach
->>> ppattach.attachments('training') # doctest: +NORMALIZE_WHITESPACE
-[PPAttachment(sent='0', verb='join', noun1='board',
-              prep='as', noun2='director', attachment='V'),
- PPAttachment(sent='1', verb='is', noun1='chairman',
-              prep='of', noun2='N.V.', attachment='N'),
- ...]
->>> inst = ppattach.attachments('training')[0]
->>> (inst.sent, inst.verb, inst.noun1, inst.prep, inst.noun2)
-('0', 'join', 'board', 'as', 'director')
->>> inst.attachment
-'V'
-
-
-
-
-

product_reviews_1 and product_reviews_2

+corpus is encoded as a PPAttachment object:

+
>>> from nltk.corpus import ppattach
+>>> ppattach.attachments('training')
+[PPAttachment(sent='0', verb='join', noun1='board',
+              prep='as', noun2='director', attachment='V'),
+ PPAttachment(sent='1', verb='is', noun1='chairman',
+              prep='of', noun2='N.V.', attachment='N'),
+ ...]
+>>> inst = ppattach.attachments('training')[0]
+>>> (inst.sent, inst.verb, inst.noun1, inst.prep, inst.noun2)
+('0', 'join', 'board', 'as', 'director')
+>>> inst.attachment
+'V'
+
+
+ +
+
product_reviews_1 and product_reviews_2

These two datasets respectively contain annotated customer reviews of 5 and 9 products from amazon.com.

-
-
->>> from nltk.corpus import product_reviews_1
->>> camera_reviews = product_reviews_1.reviews('Canon_G3.txt')
->>> review = camera_reviews[0]
->>> review.sents()[0]
-['i', 'recently', 'purchased', 'the', 'canon', 'powershot', 'g3', 'and', 'am',
-'extremely', 'satisfied', 'with', 'the', 'purchase', '.']
->>> review.features()
-[('canon powershot g3', '+3'), ('use', '+2'), ('picture', '+2'),
-('picture quality', '+1'), ('picture quality', '+1'), ('camera', '+2'),
-('use', '+2'), ('feature', '+1'), ('picture quality', '+3'), ('use', '+1'),
-('option', '+1')]
-
-
+
>>> from nltk.corpus import product_reviews_1
+>>> camera_reviews = product_reviews_1.reviews('Canon_G3.txt')
+>>> review = camera_reviews[0]
+>>> review.sents()[0]
+['i', 'recently', 'purchased', 'the', 'canon', 'powershot', 'g3', 'and', 'am',
+'extremely', 'satisfied', 'with', 'the', 'purchase', '.']
+>>> review.features()
+[('canon powershot g3', '+3'), ('use', '+2'), ('picture', '+2'),
+('picture quality', '+1'), ('picture quality', '+1'), ('camera', '+2'),
+('use', '+2'), ('feature', '+1'), ('picture quality', '+3'), ('use', '+1'),
+('option', '+1')]
+
+

It is also possible to reach the same information directly from the stream:

-
-
->>> product_reviews_1.features('Canon_G3.txt')
-[('canon powershot g3', '+3'), ('use', '+2'), ...]
-
-
+
>>> product_reviews_1.features('Canon_G3.txt')
+[('canon powershot g3', '+3'), ('use', '+2'), ...]
+
+

We can compute stats for specific product features:

-
-
->>> n_reviews = len([(feat,score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
->>> tot = sum([int(score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
->>> # We use float for backward compatibility with division in Python2.7
->>> mean = float(tot)/n_reviews
->>> print(n_reviews, tot, mean)
-15 24 1.6
-
-
-
-
-

pros_cons

+
>>> n_reviews = len([(feat,score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
+>>> tot = sum([int(score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
+>>> mean = tot / n_reviews
+>>> print(n_reviews, tot, mean)
+15 24 1.6
+
+
+ +
+
pros_cons

A list of pros/cons sentences for determining context (aspect) dependent sentiment words, which are then applied to sentiment analysis of comparative sentences.

-
-
->>> from nltk.corpus import pros_cons
->>> pros_cons.sents(categories='Cons')
-[['East', 'batteries', '!', 'On', '-', 'off', 'switch', 'too', 'easy',
-'to', 'maneuver', '.'], ['Eats', '...', 'no', ',', 'GULPS', 'batteries'],
-...]
->>> pros_cons.words('IntegratedPros.txt')
-['Easy', 'to', 'use', ',', 'economical', '!', ...]
-
-
-
-
-

semcor

+
>>> from nltk.corpus import pros_cons
+>>> pros_cons.sents(categories='Cons')
+[['East', 'batteries', '!', 'On', '-', 'off', 'switch', 'too', 'easy',
+'to', 'maneuver', '.'], ['Eats', '...', 'no', ',', 'GULPS', 'batteries'],
+...]
+>>> pros_cons.words('IntegratedPros.txt')
+['Easy', 'to', 'use', ',', 'economical', '!', ...]
+
+
+ +
+
semcor

The Brown Corpus, annotated with WordNet senses.

-
-
->>> from nltk.corpus import semcor
->>> semcor.words('brown2/tagfiles/br-n12.xml')  # doctest: +ELLIPSIS
-['When', 'several', 'minutes', 'had', 'passed', ...]
->>> sent = semcor.xml('brown2/tagfiles/br-n12.xml').findall('context/p/s')[0]
->>> for wordform in sent.getchildren():
-...     print(wordform.text, end=' ')
-...     for key in sorted(wordform.keys()):
-...         print(key + '=' + wordform.get(key), end=' ')
-...     print()
-...
-When cmd=ignore pos=WRB
-several cmd=done lemma=several lexsn=5:00:00:some(a):00 pos=JJ wnsn=1
-minutes cmd=done lemma=minute lexsn=1:28:00:: pos=NN wnsn=1
-had cmd=done ot=notag pos=VBD
-passed cmd=done lemma=pass lexsn=2:38:03:: pos=VB wnsn=4
-and cmd=ignore pos=CC
-Curt cmd=done lemma=person lexsn=1:03:00:: pn=person pos=NNP rdf=person wnsn=1
-had cmd=done ot=notag pos=VBD
-n't cmd=done lemma=n't lexsn=4:02:00:: pos=RB wnsn=0
-emerged cmd=done lemma=emerge lexsn=2:30:00:: pos=VB wnsn=1
-from cmd=ignore pos=IN
-the cmd=ignore pos=DT
-livery_stable cmd=done lemma=livery_stable lexsn=1:06:00:: pos=NN wnsn=1
-,
-Brenner cmd=done lemma=person lexsn=1:03:00:: pn=person pos=NNP rdf=person wnsn=1
-re-entered cmd=done lemma=re-enter lexsn=2:38:00:: pos=VB wnsn=1
-the cmd=ignore pos=DT
-hotel cmd=done lemma=hotel lexsn=1:06:00:: pos=NN wnsn=1
-and cmd=ignore pos=CC
-faced cmd=done lemma=face lexsn=2:42:02:: pos=VB wnsn=4
-Summers cmd=done lemma=person lexsn=1:03:00:: pn=person pos=NNP rdf=person wnsn=1
-across cmd=ignore pos=IN
-the cmd=ignore pos=DT
-counter cmd=done lemma=counter lexsn=1:06:00:: pos=NN wnsn=1
-.
-
-
-
-
-

senseval

+
>>> from nltk.corpus import semcor
+>>> semcor.words('brown2/tagfiles/br-n12.xml')
+['When', 'several', 'minutes', 'had', 'passed', ...]
+
+
+ +
+
senseval

The Senseval 2 corpus is a word sense disambiguation corpus. Each item in the corpus corresponds to a single ambiguous word. For each of these words, the corpus contains a list of instances, corresponding to occurrences of that word. Each instance provides the word; a list -of word senses that apply to the word occurrence; and the word's +of word senses that apply to the word occurrence; and the word’s context.

-
-
->>> from nltk.corpus import senseval
->>> senseval.fileids()
-['hard.pos', 'interest.pos', 'line.pos', 'serve.pos']
->>> senseval.instances('hard.pos')
-... # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[SensevalInstance(word='hard-a',
-    position=20,
-    context=[('``', '``'), ('he', 'PRP'), ...('hard', 'JJ'), ...],
-    senses=('HARD1',)),
- SensevalInstance(word='hard-a',
-    position=10,
-    context=[('clever', 'NNP'), ...('hard', 'JJ'), ('time', 'NN'), ...],
-    senses=('HARD1',)), ...]
-
-
-

The following code looks at instances of the word 'interest', and +

>>> from nltk.corpus import senseval
+>>> senseval.fileids()
+['hard.pos', 'interest.pos', 'line.pos', 'serve.pos']
+>>> senseval.instances('hard.pos')
+...
+[SensevalInstance(word='hard-a',
+    position=20,
+    context=[('``', '``'), ('he', 'PRP'), ...('hard', 'JJ'), ...],
+    senses=('HARD1',)),
+ SensevalInstance(word='hard-a',
+    position=10,
+    context=[('clever', 'NNP'), ...('hard', 'JJ'), ('time', 'NN'), ...],
+    senses=('HARD1',)), ...]
+
+
+

The following code looks at instances of the word ‘interest’, and displays their local context (2 words on each side) and word sense(s):

-
-
->>> for inst in senseval.instances('interest.pos')[:10]:
-...     p = inst.position
-...     left = ' '.join(w for (w,t) in inst.context[p-2:p])
-...     word = ' '.join(w for (w,t) in inst.context[p:p+1])
-...     right = ' '.join(w for (w,t) in inst.context[p+1:p+3])
-...     senses = ' '.join(inst.senses)
-...     print('%20s |%10s | %-15s -> %s' % (left, word, right, senses))
-         declines in |  interest | rates .         -> interest_6
-  indicate declining |  interest | rates because   -> interest_6
-       in short-term |  interest | rates .         -> interest_6
-                 4 % |  interest | in this         -> interest_5
-        company with | interests | in the          -> interest_5
-              , plus |  interest | .               -> interest_6
-             set the |  interest | rate on         -> interest_6
-              's own |  interest | , prompted      -> interest_4
-       principal and |  interest | is the          -> interest_6
-        increase its |  interest | to 70           -> interest_5
-
-
-
-
-

sentence_polarity

+
>>> for inst in senseval.instances('interest.pos')[:10]:
+...     p = inst.position
+...     left = ' '.join(w for (w,t) in inst.context[p-2:p])
+...     word = ' '.join(w for (w,t) in inst.context[p:p+1])
+...     right = ' '.join(w for (w,t) in inst.context[p+1:p+3])
+...     senses = ' '.join(inst.senses)
+...     print('%20s |%10s | %-15s -> %s' % (left, word, right, senses))
+         declines in |  interest | rates .         -> interest_6
+  indicate declining |  interest | rates because   -> interest_6
+       in short-term |  interest | rates .         -> interest_6
+                 4 % |  interest | in this         -> interest_5
+        company with | interests | in the          -> interest_5
+              , plus |  interest | .               -> interest_6
+             set the |  interest | rate on         -> interest_6
+              's own |  interest | , prompted      -> interest_4
+       principal and |  interest | is the          -> interest_6
+        increase its |  interest | to 70           -> interest_5
+
+
+ +
+
sentence_polarity

The Sentence Polarity dataset contains 5331 positive and 5331 negative processed sentences.

-
-
->>> from nltk.corpus import sentence_polarity
->>> sentence_polarity.sents()
-[['simplistic', ',', 'silly', 'and', 'tedious', '.'], ["it's", 'so', 'laddish',
-'and', 'juvenile', ',', 'only', 'teenage', 'boys', 'could', 'possibly', 'find',
-'it', 'funny', '.'], ...]
->>> sentence_polarity.categories()
-['neg', 'pos']
->>> sentence_polarity.sents()[1]
-["it's", 'so', 'laddish', 'and', 'juvenile', ',', 'only', 'teenage', 'boys',
-'could', 'possibly', 'find', 'it', 'funny', '.']
-
-
-
-
-

shakespeare

+
>>> from nltk.corpus import sentence_polarity
+>>> sentence_polarity.sents()
+[['simplistic', ',', 'silly', 'and', 'tedious', '.'], ["it's", 'so', 'laddish',
+'and', 'juvenile', ',', 'only', 'teenage', 'boys', 'could', 'possibly', 'find',
+'it', 'funny', '.'], ...]
+>>> sentence_polarity.categories()
+['neg', 'pos']
+>>> sentence_polarity.sents()[1]
+["it's", 'so', 'laddish', 'and', 'juvenile', ',', 'only', 'teenage', 'boys',
+'could', 'possibly', 'find', 'it', 'funny', '.']
+
+
+ +
+
shakespeare

The Shakespeare corpus contains a set of Shakespeare plays, formatted as XML files. These corpora are returned as ElementTree objects:

-
-
->>> from nltk.corpus import shakespeare
->>> from xml.etree import ElementTree
->>> shakespeare.fileids() # doctest: +ELLIPSIS
-['a_and_c.xml', 'dream.xml', 'hamlet.xml', 'j_caesar.xml', ...]
->>> play = shakespeare.xml('dream.xml')
->>> print(play) # doctest: +ELLIPSIS
-<Element 'PLAY' at ...>
->>> print('%s: %s' % (play[0].tag, play[0].text))
-TITLE: A Midsummer Night's Dream
->>> personae = [persona.text for persona in
-...             play.findall('PERSONAE/PERSONA')]
->>> print(personae) # doctest: +ELLIPSIS
-['THESEUS, Duke of Athens.', 'EGEUS, father to Hermia.', ...]
->>> # Find and print speakers not listed as personae
->>> names = [persona.split(',')[0] for persona in personae]
->>> speakers = set(speaker.text for speaker in
-...                play.findall('*/*/*/SPEAKER'))
->>> print(sorted(speakers.difference(names))) # doctest: +NORMALIZE_WHITESPACE
-['ALL', 'COBWEB', 'DEMETRIUS', 'Fairy', 'HERNIA', 'LYSANDER',
- 'Lion', 'MOTH', 'MUSTARDSEED', 'Moonshine', 'PEASEBLOSSOM',
- 'Prologue', 'Pyramus', 'Thisbe', 'Wall']
-
-
-
-
-

subjectivity

+
>>> from nltk.corpus import shakespeare
+>>> from xml.etree import ElementTree
+>>> shakespeare.fileids()
+['a_and_c.xml', 'dream.xml', 'hamlet.xml', 'j_caesar.xml', ...]
+>>> play = shakespeare.xml('dream.xml')
+>>> print(play)
+<Element 'PLAY' at ...>
+>>> print('%s: %s' % (play[0].tag, play[0].text))
+TITLE: A Midsummer Night's Dream
+>>> personae = [persona.text for persona in
+...             play.findall('PERSONAE/PERSONA')]
+>>> print(personae)
+['THESEUS, Duke of Athens.', 'EGEUS, father to Hermia.', ...]
+>>> # Find and print speakers not listed as personae
+>>> names = [persona.split(',')[0] for persona in personae]
+>>> speakers = set(speaker.text for speaker in
+...                play.findall('*/*/*/SPEAKER'))
+>>> print(sorted(speakers.difference(names)))
+['ALL', 'COBWEB', 'DEMETRIUS', 'Fairy', 'HERNIA', 'LYSANDER',
+ 'Lion', 'MOTH', 'MUSTARDSEED', 'Moonshine', 'PEASEBLOSSOM',
+ 'Prologue', 'Pyramus', 'Thisbe', 'Wall']
+
+
+ +
+
subjectivity

The Subjectivity Dataset contains 5000 subjective and 5000 objective processed sentences.

-
-
->>> from nltk.corpus import subjectivity
->>> subjectivity.categories()
-['obj', 'subj']
->>> subjectivity.sents()[23]
-['television', 'made', 'him', 'famous', ',', 'but', 'his', 'biggest', 'hits',
-'happened', 'off', 'screen', '.']
->>> subjectivity.words(categories='subj')
-['smart', 'and', 'alert', ',', 'thirteen', ...]
-
-
-
-
-

toolbox

+
>>> from nltk.corpus import subjectivity
+>>> subjectivity.categories()
+['obj', 'subj']
+>>> subjectivity.sents()[23]
+['television', 'made', 'him', 'famous', ',', 'but', 'his', 'biggest', 'hits',
+'happened', 'off', 'screen', '.']
+>>> subjectivity.words(categories='subj')
+['smart', 'and', 'alert', ',', 'thirteen', ...]
+
+
+ +
+
toolbox

The Toolbox corpus distributed with NLTK contains a sample lexicon and several sample texts from the Rotokas language. The Toolbox corpus reader returns Toolbox files as XML ElementTree objects. The following example loads the Rotokas dictionary, and figures out the distribution of part-of-speech tags for reduplicated words.

-

This example displays some records from a Rotokas text:

- -
-
-

timit

+ +
+
timit

The NLTK data package includes a fragment of the TIMIT Acoustic-Phonetic Continuous Speech Corpus. This corpus is broken down into small speech samples, each of which is available as a wave file, a phonetic transcription, and a tokenized word list.

-
-
->>> from nltk.corpus import timit
->>> print(timit.utteranceids()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466',
-'dr1-fvmh0/si2096', 'dr1-fvmh0/si836', 'dr1-fvmh0/sx116',
-'dr1-fvmh0/sx206', 'dr1-fvmh0/sx26', 'dr1-fvmh0/sx296', ...]
-
-
->>> item = timit.utteranceids()[5]
->>> print(timit.phones(item)) # doctest: +NORMALIZE_WHITESPACE
-['h#', 'k', 'l', 'ae', 's', 'pcl', 'p', 'dh', 'ax',
- 's', 'kcl', 'k', 'r', 'ux', 'ix', 'nx', 'y', 'ax',
- 'l', 'eh', 'f', 'tcl', 't', 'hh', 'ae', 'n', 'dcl',
- 'd', 'h#']
->>> print(timit.words(item))
-['clasp', 'the', 'screw', 'in', 'your', 'left', 'hand']
->>> timit.play(item) # doctest: +SKIP
-
-
+
>>> from nltk.corpus import timit
+>>> print(timit.utteranceids())
+['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466',
+'dr1-fvmh0/si2096', 'dr1-fvmh0/si836', 'dr1-fvmh0/sx116',
+'dr1-fvmh0/sx206', 'dr1-fvmh0/sx26', 'dr1-fvmh0/sx296', ...]
+
+
+
>>> item = timit.utteranceids()[5]
+>>> print(timit.phones(item))
+['h#', 'k', 'l', 'ae', 's', 'pcl', 'p', 'dh', 'ax',
+ 's', 'kcl', 'k', 'r', 'ux', 'ix', 'nx', 'y', 'ax',
+ 'l', 'eh', 'f', 'tcl', 't', 'hh', 'ae', 'n', 'dcl',
+ 'd', 'h#']
+>>> print(timit.words(item))
+['clasp', 'the', 'screw', 'in', 'your', 'left', 'hand']
+>>> timit.play(item) 
+
+

The corpus reader can combine the word segmentation information with the phonemes to produce a single tree structure:

-
-
->>> for tree in timit.phone_trees(item):
-...     print(tree)
-(S
-  h#
-  (clasp k l ae s pcl p)
-  (the dh ax)
-  (screw s kcl k r ux)
-  (in ix nx)
-  (your y ax)
-  (left l eh f tcl t)
-  (hand hh ae n dcl d)
-  h#)
-
-
+
>>> for tree in timit.phone_trees(item):
+...     print(tree)
+(S
+  h#
+  (clasp k l ae s pcl p)
+  (the dh ax)
+  (screw s kcl k r ux)
+  (in ix nx)
+  (your y ax)
+  (left l eh f tcl t)
+  (hand hh ae n dcl d)
+  h#)
+
+

The start time and stop time of each phoneme, word, and sentence are also available:

-
-
->>> print(timit.phone_times(item)) # doctest: +ELLIPSIS
-[('h#', 0, 2190), ('k', 2190, 3430), ('l', 3430, 4326), ...]
->>> print(timit.word_times(item)) # doctest: +ELLIPSIS
-[('clasp', 2190, 8804), ('the', 8804, 9734), ...]
->>> print(timit.sent_times(item))
-[('Clasp the screw in your left hand.', 0, 32154)]
-
-
+
>>> print(timit.phone_times(item))
+[('h#', 0, 2190), ('k', 2190, 3430), ('l', 3430, 4326), ...]
+>>> print(timit.word_times(item))
+[('clasp', 2190, 8804), ('the', 8804, 9734), ...]
+>>> print(timit.sent_times(item))
+[('Clasp the screw in your left hand.', 0, 32154)]
+
+

We can use these times to play selected pieces of a speech sample:

-
-
->>> timit.play(item, 2190, 8804) # 'clasp'  # doctest: +SKIP
-
-
+
>>> timit.play(item, 2190, 8804) # 'clasp'  
+
+

The corpus reader can also be queried for information about the speaker and sentence identifier for a given speech sample:

-
-
->>> print(timit.spkrid(item))
-dr1-fvmh0
->>> print(timit.sentid(item))
-sx116
->>> print(timit.spkrinfo(timit.spkrid(item))) # doctest: +NORMALIZE_WHITESPACE
-SpeakerInfo(id='VMH0',
-            sex='F',
-            dr='1',
-            use='TRN',
-            recdate='03/11/86',
-            birthdate='01/08/60',
-            ht='5\'05"',
-            race='WHT',
-            edu='BS',
-            comments='BEST NEW ENGLAND ACCENT SO FAR')
-
-
->>> # List the speech samples from the same speaker:
->>> timit.utteranceids(spkrid=timit.spkrid(item)) # doctest: +ELLIPSIS
-['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466', ...]
-
-
-
-
-

twitter_samples

+
>>> print(timit.spkrid(item))
+dr1-fvmh0
+>>> print(timit.sentid(item))
+sx116
+>>> print(timit.spkrinfo(timit.spkrid(item)))
+SpeakerInfo(id='VMH0',
+            sex='F',
+            dr='1',
+            use='TRN',
+            recdate='03/11/86',
+            birthdate='01/08/60',
+            ht='5\'05"',
+            race='WHT',
+            edu='BS',
+            comments='BEST NEW ENGLAND ACCENT SO FAR')
+
+
+
>>> # List the speech samples from the same speaker:
+>>> timit.utteranceids(spkrid=timit.spkrid(item))
+['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466', ...]
+
+
+ +
+
twitter_samples

Twitter is well-known microblog service that allows public data to be -collected via APIs. NLTK's twitter corpus currently contains a sample of 20k Tweets +collected via APIs. NLTK’s twitter corpus currently contains a sample of 20k Tweets retrieved from the Twitter Streaming API.

-
-
->>> from nltk.corpus import twitter_samples
->>> twitter_samples.fileids()
-['negative_tweets.json', 'positive_tweets.json', 'tweets.20150430-223406.json']
-
-
+
>>> from nltk.corpus import twitter_samples
+>>> twitter_samples.fileids()
+['negative_tweets.json', 'positive_tweets.json', 'tweets.20150430-223406.json']
+
+

We follow standard practice in storing full Tweets as line-separated JSON. These data structures can be accessed via tweets.docs(). However, in general it is more practical to focus just on the text field of the Tweets, which are accessed via the strings() method.

-
-
->>> twitter_samples.strings('tweets.20150430-223406.json')
-['RT @KirkKus: Indirect cost of the UK being in the EU is estimated to be costing Britain \xa3170 billion per year! #BetterOffOut #UKIP', ...]
-
-
-

The default tokenizer for Tweets is specialised for 'casual' text, and +

>>> twitter_samples.strings('tweets.20150430-223406.json')[:5]
+['RT @KirkKus: Indirect cost of the UK being in the EU is estimated to be costing Britain \xa3170 billion per year! #BetterOffOut #UKIP', ...]
+
+
+

The default tokenizer for Tweets is specialised for ‘casual’ text, and the tokenized() method returns a list of lists of tokens.

-
-
->>> twitter_samples.tokenized('tweets.20150430-223406.json')
-[['RT', '@KirkKus', ':', 'Indirect', 'cost', 'of', 'the', 'UK', 'being', 'in', ...],
- ['VIDEO', ':', 'Sturgeon', 'on', 'post-election', 'deals', 'http://t.co/BTJwrpbmOY'], ...]
-
-
-
-
-

rte

+
>>> twitter_samples.tokenized('tweets.20150430-223406.json')[:5]
+[['RT', '@KirkKus', ':', 'Indirect', 'cost', 'of', 'the', 'UK', 'being', 'in', ...],
+ ['VIDEO', ':', 'Sturgeon', 'on', 'post-election', 'deals', 'http://t.co/BTJwrpbmOY'], ...]
+
+
+ +
+
rte

The RTE (Recognizing Textual Entailment) corpus was derived from the RTE1, RTE2 and RTE3 datasets (dev and test data), and consists of a -list of XML-formatted 'text'/'hypothesis' pairs.

-
-
->>> from nltk.corpus import rte
->>> print(rte.fileids()) # doctest: +ELLIPSIS
-['rte1_dev.xml', 'rte1_test.xml', 'rte2_dev.xml', ..., 'rte3_test.xml']
->>> rtepairs = rte.pairs(['rte2_test.xml', 'rte3_test.xml'])
->>> print(rtepairs)  # doctest: +ELLIPSIS
-[<RTEPair: gid=2-8>, <RTEPair: gid=2-9>, <RTEPair: gid=2-15>, ...]
-
-
+list of XML-formatted ‘text’/’hypothesis’ pairs.

+
>>> from nltk.corpus import rte
+>>> print(rte.fileids())
+['rte1_dev.xml', 'rte1_test.xml', 'rte2_dev.xml', ..., 'rte3_test.xml']
+>>> rtepairs = rte.pairs(['rte2_test.xml', 'rte3_test.xml'])
+>>> print(rtepairs)
+[<RTEPair: gid=2-8>, <RTEPair: gid=2-9>, <RTEPair: gid=2-15>, ...]
+
+

In the gold standard test sets, each pair is labeled according to -whether or not the text 'entails' the hypothesis; the +whether or not the text ‘entails’ the hypothesis; the entailment value is mapped to an integer 1 (True) or 0 (False).

-
-
->>> rtepairs[5]
-<RTEPair: gid=2-23>
->>> rtepairs[5].text # doctest: +NORMALIZE_WHITESPACE
-'His wife Strida won a seat in parliament after forging an alliance
-with the main anti-Syrian coalition in the recent election.'
->>> rtepairs[5].hyp
-'Strida elected to parliament.'
->>> rtepairs[5].value
-1
-
-
-

The RTE corpus also supports an xml() method which produces ElementTrees.

-
-
->>> xmltree = rte.xml('rte3_dev.xml')
->>> xmltree # doctest: +SKIP
-<Element entailment-corpus at ...>
->>> xmltree[7].findtext('t') # doctest: +NORMALIZE_WHITESPACE
-"Mrs. Bush's approval ratings have remained very high, above 80%,
-even as her husband's have recently dropped below 50%."
-
-
-
-
-

verbnet

+
>>> rtepairs[5]
+<RTEPair: gid=2-23>
+>>> rtepairs[5].text
+'His wife Strida won a seat in parliament after forging an alliance
+with the main anti-Syrian coalition in the recent election.'
+>>> rtepairs[5].hyp
+'Strida elected to parliament.'
+>>> rtepairs[5].value
+1
+
+
+

The RTE corpus also supports an xml() method which produces ElementTrees.

+
>>> xmltree = rte.xml('rte3_dev.xml')
+>>> xmltree 
+<Element entailment-corpus at ...>
+>>> xmltree[7].findtext('t')
+"Mrs. Bush's approval ratings have remained very high, above 80%,
+even as her husband's have recently dropped below 50%."
+
+
+ +
+
verbnet

The VerbNet corpus is a lexicon that divides verbs into classes, based on their syntax-semantics linking behavior. The basic elements in the -lexicon are verb lemmas, such as 'abandon' and 'accept', and verb -classes, which have identifiers such as 'remove-10.1' and -'admire-31.2-1'. These class identifiers consist of a representative +lexicon are verb lemmas, such as ‘abandon’ and ‘accept’, and verb +classes, which have identifiers such as ‘remove-10.1’ and +‘admire-31.2-1’. These class identifiers consist of a representative verb selected from the class, followed by a numerical identifier. The list of verb lemmas, and the list of class identifiers, can be retrieved with the following methods:

-
-
->>> from nltk.corpus import verbnet
->>> verbnet.lemmas()[20:25]
-['accelerate', 'accept', 'acclaim', 'accompany', 'accrue']
->>> verbnet.classids()[:5]
-['accompany-51.7', 'admire-31.2', 'admire-31.2-1', 'admit-65', 'adopt-93']
-
-
+
>>> from nltk.corpus import verbnet
+>>> verbnet.lemmas()[20:25]
+['accelerate', 'accept', 'acclaim', 'accompany', 'accrue']
+>>> verbnet.classids()[:5]
+['accompany-51.7', 'admire-31.2', 'admire-31.2-1', 'admit-65', 'adopt-93']
+
+

The classids() method may also be used to retrieve the classes that a given lemma belongs to:

-
-
->>> verbnet.classids('accept')
-['approve-77', 'characterize-29.2-1-1', 'obtain-13.5.2']
-
-
+
>>> verbnet.classids('accept')
+['approve-77', 'characterize-29.2-1-1', 'obtain-13.5.2']
+
+
+

The classids() method may additionally be used to retrieve all classes +within verbnet if nothing is passed:

+
>>> verbnet.classids()
+['accompany-51.7', 'admire-31.2', 'admire-31.2-1', 'admit-65', 'adopt-93', 'advise-37.9', 'advise-37.9-1', 'allow-64', 'amalgamate-22.2', 'amalgamate-22.2-1', 'amalgamate-22.2-1-1', 'amalgamate-22.2-2', 'amalgamate-22.2-2-1', 'amalgamate-22.2-3', 'amalgamate-22.2-3-1', 'amalgamate-22.2-3-1-1', 'amalgamate-22.2-3-2', 'amuse-31.1', 'animal_sounds-38', 'appeal-31.4', 'appeal-31.4-1', 'appeal-31.4-2', 'appeal-31.4-3', 'appear-48.1.1', 'appoint-29.1', 'approve-77', 'assessment-34', 'assuming_position-50', 'avoid-52', 'banish-10.2', 'battle-36.4', 'battle-36.4-1', 'begin-55.1', 'begin-55.1-1', 'being_dressed-41.3.3', 'bend-45.2', 'berry-13.7', 'bill-54.5', 'body_internal_motion-49', 'body_internal_states-40.6', 'braid-41.2.2', 'break-45.1', 'breathe-40.1.2', 'breathe-40.1.2-1', 'bring-11.3', 'bring-11.3-1', 'build-26.1', 'build-26.1-1', 'bulge-47.5.3', 'bump-18.4', 'bump-18.4-1', 'butter-9.9', 'calibratable_cos-45.6', 'calibratable_cos-45.6-1', 'calve-28', 'captain-29.8', 'captain-29.8-1', 'captain-29.8-1-1', 'care-88', 'care-88-1', 'carry-11.4', 'carry-11.4-1', 'carry-11.4-1-1', 'carve-21.2', 'carve-21.2-1', 'carve-21.2-2', 'change_bodily_state-40.8.4', 'characterize-29.2', 'characterize-29.2-1', 'characterize-29.2-1-1', 'characterize-29.2-1-2', 'chase-51.6', 'cheat-10.6', 'cheat-10.6-1', 'cheat-10.6-1-1', 'chew-39.2', 'chew-39.2-1', 'chew-39.2-2', 'chit_chat-37.6', 'clear-10.3', 'clear-10.3-1', 'cling-22.5', 'coil-9.6', 'coil-9.6-1', 'coloring-24', 'complain-37.8', 'complete-55.2', 'concealment-16', 'concealment-16-1', 'confess-37.10', 'confine-92', 'confine-92-1', 'conjecture-29.5', 'conjecture-29.5-1', 'conjecture-29.5-2', 'consider-29.9', 'consider-29.9-1', 'consider-29.9-1-1', 'consider-29.9-1-1-1', 'consider-29.9-2', 'conspire-71', 'consume-66', 'consume-66-1', 'contiguous_location-47.8', 'contiguous_location-47.8-1', 'contiguous_location-47.8-2', 'continue-55.3', 'contribute-13.2', 'contribute-13.2-1', 'contribute-13.2-1-1', 'contribute-13.2-1-1-1', 'contribute-13.2-2', 'contribute-13.2-2-1', 'convert-26.6.2', 'convert-26.6.2-1', 'cooking-45.3', 'cooperate-73', 'cooperate-73-1', 'cooperate-73-2', 'cooperate-73-3', 'cope-83', 'cope-83-1', 'cope-83-1-1', 'correlate-86', 'correspond-36.1', 'correspond-36.1-1', 'correspond-36.1-1-1', 'cost-54.2', 'crane-40.3.2', 'create-26.4', 'create-26.4-1', 'curtsey-40.3.3', 'cut-21.1', 'cut-21.1-1', 'debone-10.8', 'declare-29.4', 'declare-29.4-1', 'declare-29.4-1-1', 'declare-29.4-1-1-1', 'declare-29.4-1-1-2', 'declare-29.4-1-1-3', 'declare-29.4-2', 'dedicate-79', 'defend-85', 'destroy-44', 'devour-39.4', 'devour-39.4-1', 'devour-39.4-2', 'differ-23.4', 'dine-39.5', 'disappearance-48.2', 'disassemble-23.3', 'discover-84', 'discover-84-1', 'discover-84-1-1', 'dress-41.1.1', 'dressing_well-41.3.2', 'drive-11.5', 'drive-11.5-1', 'dub-29.3', 'dub-29.3-1', 'eat-39.1', 'eat-39.1-1', 'eat-39.1-2', 'enforce-63', 'engender-27', 'entity_specific_cos-45.5', 'entity_specific_modes_being-47.2', 'equip-13.4.2', 'equip-13.4.2-1', 'equip-13.4.2-1-1', 'escape-51.1', 'escape-51.1-1', 'escape-51.1-2', 'escape-51.1-2-1', 'exceed-90', 'exchange-13.6', 'exchange-13.6-1', 'exchange-13.6-1-1', 'exhale-40.1.3', 'exhale-40.1.3-1', 'exhale-40.1.3-2', 'exist-47.1', 'exist-47.1-1', 'exist-47.1-1-1', 'feeding-39.7', 'ferret-35.6', 'fill-9.8', 'fill-9.8-1', 'fit-54.3', 'flinch-40.5', 'floss-41.2.1', 'focus-87', 'forbid-67', 'force-59', 'force-59-1', 'free-80', 'free-80-1', 'fulfilling-13.4.1', 'fulfilling-13.4.1-1', 'fulfilling-13.4.1-2', 'funnel-9.3', 'funnel-9.3-1', 'funnel-9.3-2', 'funnel-9.3-2-1', 'future_having-13.3', 'get-13.5.1', 'get-13.5.1-1', 'give-13.1', 'give-13.1-1', 'gobble-39.3', 'gobble-39.3-1', 'gobble-39.3-2', 'gorge-39.6', 'groom-41.1.2', 'grow-26.2', 'help-72', 'help-72-1', 'herd-47.5.2', 'hiccup-40.1.1', 'hit-18.1', 'hit-18.1-1', 'hold-15.1', 'hold-15.1-1', 'hunt-35.1', 'hurt-40.8.3', 'hurt-40.8.3-1', 'hurt-40.8.3-1-1', 'hurt-40.8.3-2', 'illustrate-25.3', 'image_impression-25.1', 'indicate-78', 'indicate-78-1', 'indicate-78-1-1', 'inquire-37.1.2', 'instr_communication-37.4', 'investigate-35.4', 'judgement-33', 'keep-15.2', 'knead-26.5', 'learn-14', 'learn-14-1', 'learn-14-2', 'learn-14-2-1', 'leave-51.2', 'leave-51.2-1', 'lecture-37.11', 'lecture-37.11-1', 'lecture-37.11-1-1', 'lecture-37.11-2', 'light_emission-43.1', 'limit-76', 'linger-53.1', 'linger-53.1-1', 'lodge-46', 'long-32.2', 'long-32.2-1', 'long-32.2-2', 'manner_speaking-37.3', 'marry-36.2', 'marvel-31.3', 'marvel-31.3-1', 'marvel-31.3-2', 'marvel-31.3-3', 'marvel-31.3-4', 'marvel-31.3-5', 'marvel-31.3-6', 'marvel-31.3-7', 'marvel-31.3-8', 'marvel-31.3-9', 'masquerade-29.6', 'masquerade-29.6-1', 'masquerade-29.6-2', 'matter-91', 'meander-47.7', 'meet-36.3', 'meet-36.3-1', 'meet-36.3-2', 'mine-10.9', 'mix-22.1', 'mix-22.1-1', 'mix-22.1-1-1', 'mix-22.1-2', 'mix-22.1-2-1', 'modes_of_being_with_motion-47.3', 'murder-42.1', 'murder-42.1-1', 'neglect-75', 'neglect-75-1', 'neglect-75-1-1', 'neglect-75-2', 'nonvehicle-51.4.2', 'nonverbal_expression-40.2', 'obtain-13.5.2', 'obtain-13.5.2-1', 'occurrence-48.3', 'order-60', 'order-60-1', 'orphan-29.7', 'other_cos-45.4', 'pain-40.8.1', 'pay-68', 'peer-30.3', 'pelt-17.2', 'performance-26.7', 'performance-26.7-1', 'performance-26.7-1-1', 'performance-26.7-2', 'performance-26.7-2-1', 'pit-10.7', 'pocket-9.10', 'pocket-9.10-1', 'poison-42.2', 'poke-19', 'pour-9.5', 'preparing-26.3', 'preparing-26.3-1', 'preparing-26.3-2', 'price-54.4', 'push-12', 'push-12-1', 'push-12-1-1', 'put-9.1', 'put-9.1-1', 'put-9.1-2', 'put_direction-9.4', 'put_spatial-9.2', 'put_spatial-9.2-1', 'reach-51.8', 'reflexive_appearance-48.1.2', 'refrain-69', 'register-54.1', 'rely-70', 'remove-10.1', 'risk-94', 'risk-94-1', 'roll-51.3.1', 'rummage-35.5', 'run-51.3.2', 'rush-53.2', 'say-37.7', 'say-37.7-1', 'say-37.7-1-1', 'say-37.7-2', 'scribble-25.2', 'search-35.2', 'see-30.1', 'see-30.1-1', 'see-30.1-1-1', 'send-11.1', 'send-11.1-1', 'separate-23.1', 'separate-23.1-1', 'separate-23.1-2', 'settle-89', 'shake-22.3', 'shake-22.3-1', 'shake-22.3-1-1', 'shake-22.3-2', 'shake-22.3-2-1', 'sight-30.2', 'simple_dressing-41.3.1', 'slide-11.2', 'slide-11.2-1-1', 'smell_emission-43.3', 'snooze-40.4', 'sound_emission-43.2', 'sound_existence-47.4', 'spank-18.3', 'spatial_configuration-47.6', 'split-23.2', 'spray-9.7', 'spray-9.7-1', 'spray-9.7-1-1', 'spray-9.7-2', 'stalk-35.3', 'steal-10.5', 'stimulus_subject-30.4', 'stop-55.4', 'stop-55.4-1', 'substance_emission-43.4', 'succeed-74', 'succeed-74-1', 'succeed-74-1-1', 'succeed-74-2', 'suffocate-40.7', 'suspect-81', 'swarm-47.5.1', 'swarm-47.5.1-1', 'swarm-47.5.1-2', 'swarm-47.5.1-2-1', 'swat-18.2', 'talk-37.5', 'tape-22.4', 'tape-22.4-1', 'tell-37.2', 'throw-17.1', 'throw-17.1-1', 'throw-17.1-1-1', 'tingle-40.8.2', 'touch-20', 'touch-20-1', 'transcribe-25.4', 'transfer_mesg-37.1.1', 'transfer_mesg-37.1.1-1', 'transfer_mesg-37.1.1-1-1', 'try-61', 'turn-26.6.1', 'turn-26.6.1-1', 'urge-58', 'vehicle-51.4.1', 'vehicle-51.4.1-1', 'waltz-51.5', 'want-32.1', 'want-32.1-1', 'want-32.1-1-1', 'weather-57', 'weekend-56', 'wink-40.3.1', 'wink-40.3.1-1', 'wipe_instr-10.4.2', 'wipe_instr-10.4.2-1', 'wipe_manner-10.4.1', 'wipe_manner-10.4.1-1', 'wish-62', 'withdraw-82', 'withdraw-82-1', 'withdraw-82-2', 'withdraw-82-3']
+
+

The primary object in the lexicon is a class record, which is stored as an ElementTree xml object. The class record for a given class identifier is returned by the vnclass() method:

-
-
->>> verbnet.vnclass('remove-10.1') # doctest: +ELLIPSIS
-<Element 'VNCLASS' at ...>
-
-
-

The vnclass() method also accepts "short" identifiers, such as '10.1':

-
-
->>> verbnet.vnclass('10.1') # doctest: +ELLIPSIS
-<Element 'VNCLASS' at ...>
-
-
+
>>> verbnet.vnclass('remove-10.1')
+<Element 'VNCLASS' at ...>
+
+
+

The vnclass() method also accepts “short” identifiers, such as ‘10.1’:

+
>>> verbnet.vnclass('10.1')
+<Element 'VNCLASS' at ...>
+
+

See the Verbnet documentation, or the Verbnet files, for information about the structure of this xml. As an example, we can retrieve a list of thematic roles for a given Verbnet class:

-
-
->>> vn_31_2 = verbnet.vnclass('admire-31.2')
->>> for themrole in vn_31_2.findall('THEMROLES/THEMROLE'):
-...     print(themrole.attrib['type'], end=' ')
-...     for selrestr in themrole.findall('SELRESTRS/SELRESTR'):
-...         print('[%(Value)s%(type)s]' % selrestr.attrib, end=' ')
-...     print()
-Theme
-Experiencer [+animate]
-Predicate
-
-
+
>>> vn_31_2 = verbnet.vnclass('admire-31.2')
+>>> for themrole in vn_31_2.findall('THEMROLES/THEMROLE'):
+...     print(themrole.attrib['type'], end=' ')
+...     for selrestr in themrole.findall('SELRESTRS/SELRESTR'):
+...         print('[%(Value)s%(type)s]' % selrestr.attrib, end=' ')
+...     print()
+Theme
+Experiencer [+animate]
+Predicate
+
+

The Verbnet corpus also provides a variety of pretty printing functions that can be used to display the xml contents in a more concise form. The simplest such method is pprint():

-
-
->>> print(verbnet.pprint('57'))
-weather-57
-  Subclasses: (none)
-  Members: blow clear drizzle fog freeze gust hail howl lightning mist
-    mizzle pelt pour precipitate rain roar shower sleet snow spit spot
-    sprinkle storm swelter teem thaw thunder
-  Thematic roles:
-    * Theme[+concrete +force]
-  Frames:
-    Intransitive (Expletive Subject)
-      Syntax: LEX[it] LEX[[+be]] VERB
-      Semantics:
-        * weather(during(E), Weather_type, ?Theme)
-    NP (Expletive Subject, Theme Object)
-      Syntax: LEX[it] LEX[[+be]] VERB NP[Theme]
-      Semantics:
-        * weather(during(E), Weather_type, Theme)
-    PP (Expletive Subject, Theme-PP)
-      Syntax: LEX[it[+be]] VERB PREP[with] NP[Theme]
-      Semantics:
-        * weather(during(E), Weather_type, Theme)
-
-
-
-
-

nps_chat

+
>>> print(verbnet.pprint('57'))
+weather-57
+  Subclasses: (none)
+  Members: blow clear drizzle fog freeze gust hail howl lightning mist
+    mizzle pelt pour precipitate rain roar shower sleet snow spit spot
+    sprinkle storm swelter teem thaw thunder
+  Thematic roles:
+    * Theme[+concrete +force]
+  Frames:
+    Intransitive (Expletive Subject)
+      Example: It's raining.
+      Syntax: LEX[it] LEX[[+be]] VERB
+      Semantics:
+        * weather(during(E), Weather_type, ?Theme)
+    NP (Expletive Subject, Theme Object)
+      Example: It's raining cats and dogs.
+      Syntax: LEX[it] LEX[[+be]] VERB NP[Theme]
+      Semantics:
+        * weather(during(E), Weather_type, Theme)
+    PP (Expletive Subject, Theme-PP)
+      Example: It was pelting with rain.
+      Syntax: LEX[it[+be]] VERB PREP[with] NP[Theme]
+      Semantics:
+        * weather(during(E), Weather_type, Theme)
+
+
+

Verbnet gives us frames that link the syntax and semantics using an example. +These frames are part of the corpus and we can use frames() to get a frame +for a given verbnet class.

+
>>> frame = verbnet.frames('57')
+>>> frame == [{'semantics': [{'arguments': [{'value': 'during(E)', 'type': 'Event'}, {'value': 'Weather_type', 'type': 'VerbSpecific'}, {'value': '?Theme', 'type': 'ThemRole'}], 'predicate_value': 'weather'}], 'example': "It's raining.", 'syntax': [{'pos_tag': 'LEX', 'modifiers': {'value': 'it', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'LEX', 'modifiers': {'value': '[+be]', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'VERB', 'modifiers': {'value': '', 'synrestrs': [], 'selrestrs': []}}], 'description': {'primary': 'Intransitive', 'secondary': 'Expletive Subject'}}, {'semantics': [{'arguments': [{'value': 'during(E)', 'type': 'Event'}, {'value': 'Weather_type', 'type': 'VerbSpecific'}, {'value': 'Theme', 'type': 'ThemRole'}], 'predicate_value': 'weather'}], 'example': "It's raining cats and dogs.", 'syntax': [{'pos_tag': 'LEX', 'modifiers': {'value': 'it', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'LEX', 'modifiers': {'value': '[+be]', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'VERB', 'modifiers': {'value': '', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'NP', 'modifiers': {'value': 'Theme', 'synrestrs': [], 'selrestrs': []}}], 'description': {'primary': 'NP', 'secondary': 'Expletive Subject, Theme Object'}}, {'semantics': [{'arguments': [{'value': 'during(E)', 'type': 'Event'}, {'value': 'Weather_type', 'type': 'VerbSpecific'}, {'value': 'Theme', 'type': 'ThemRole'}], 'predicate_value': 'weather'}], 'example': 'It was pelting with rain.', 'syntax': [{'pos_tag': 'LEX', 'modifiers': {'value': 'it[+be]', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'VERB', 'modifiers': {'value': '', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'PREP', 'modifiers': {'value': 'with', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'NP', 'modifiers': {'value': 'Theme', 'synrestrs': [], 'selrestrs': []}}], 'description': {'primary': 'PP', 'secondary': 'Expletive Subject, Theme-PP'}}]
+True
+
+
+

Verbnet corpus lets us access thematic roles individually using themroles().

+
>>> themroles = verbnet.themroles('57')
+>>> themroles == [{'modifiers': [{'type': 'concrete', 'value': '+'}, {'type': 'force', 'value': '+'}], 'type': 'Theme'}]
+True
+
+
+

Verbnet classes may also have subclasses sharing similar syntactic and semantic properties +while having differences with the superclass. The Verbnet corpus allows us to access these +subclasses using subclasses().

+
>>> print(verbnet.subclasses('9.1')) #Testing for 9.1 since '57' does not have subclasses
+['put-9.1-1', 'put-9.1-2']
+
+
+ +
+
nps_chat

The NPS Chat Corpus, Release 1.0 consists of over 10,000 posts in age-specific chat rooms, which have been anonymized, POS-tagged and dialogue-act tagged.

-
-
->>> print(nltk.corpus.nps_chat.words())
-['now', 'im', 'left', 'with', 'this', 'gay', ...]
->>> print(nltk.corpus.nps_chat.tagged_words())
-[('now', 'RB'), ('im', 'PRP'), ('left', 'VBD'), ...]
->>> print(nltk.corpus.nps_chat.tagged_posts()) # doctest: +NORMALIZE_WHITESPACE
-[[('now', 'RB'), ('im', 'PRP'), ('left', 'VBD'), ('with', 'IN'),
-('this', 'DT'), ('gay', 'JJ'), ('name', 'NN')], [(':P', 'UH')], ...]
-
-
+
>>> print(nltk.corpus.nps_chat.words())
+['now', 'im', 'left', 'with', 'this', 'gay', ...]
+>>> print(nltk.corpus.nps_chat.tagged_words())
+[('now', 'RB'), ('im', 'PRP'), ('left', 'VBD'), ...]
+>>> print(nltk.corpus.nps_chat.tagged_posts())
+[[('now', 'RB'), ('im', 'PRP'), ('left', 'VBD'), ('with', 'IN'),
+('this', 'DT'), ('gay', 'JJ'), ('name', 'NN')], [(':P', 'UH')], ...]
+
+

We can access the XML elements corresponding to individual posts. These elements -have class and user attributes that we can access using p.attrib['class'] -and p.attrib['user']. They also have text content, accessed using p.text.

-
-
->>> print(nltk.corpus.nps_chat.xml_posts()) # doctest: +ELLIPSIS
-[<Element 'Post' at 0...>, <Element 'Post' at 0...>, ...]
->>> posts = nltk.corpus.nps_chat.xml_posts()
->>> sorted(nltk.FreqDist(p.attrib['class'] for p in posts).keys())
-['Accept', 'Bye', 'Clarify', 'Continuer', 'Emotion', 'Emphasis',
-'Greet', 'Other', 'Reject', 'Statement', 'System', 'nAnswer',
-'whQuestion', 'yAnswer', 'ynQuestion']
->>> posts[0].text
-'now im left with this gay name'
-
-
+have class and user attributes that we can access using p.attrib['class'] +and p.attrib['user']. They also have text content, accessed using p.text.

+
>>> print(nltk.corpus.nps_chat.xml_posts())
+[<Element 'Post' at 0...>, <Element 'Post' at 0...>, ...]
+>>> posts = nltk.corpus.nps_chat.xml_posts()
+>>> sorted(nltk.FreqDist(p.attrib['class'] for p in posts).keys())
+['Accept', 'Bye', 'Clarify', 'Continuer', 'Emotion', 'Emphasis',
+'Greet', 'Other', 'Reject', 'Statement', 'System', 'nAnswer',
+'whQuestion', 'yAnswer', 'ynQuestion']
+>>> posts[0].text
+'now im left with this gay name'
+
+

In addition to the above methods for accessing tagged text, we can navigate the XML structure directly, as follows:

-
-
->>> tokens = posts[0].findall('terminals/t')
->>> [t.attrib['pos'] + "/" + t.attrib['word'] for t in tokens]
-['RB/now', 'PRP/im', 'VBD/left', 'IN/with', 'DT/this', 'JJ/gay', 'NN/name']
-
-
-
-
-

multext_east

-

The Multext-East Corpus consists of POS-tagged versions of George Orwell's book +

>>> tokens = posts[0].findall('terminals/t')
+>>> [t.attrib['pos'] + "/" + t.attrib['word'] for t in tokens]
+['RB/now', 'PRP/im', 'VBD/left', 'IN/with', 'DT/this', 'JJ/gay', 'NN/name']
+
+
+ +
+
multext_east
+

The Multext-East Corpus consists of POS-tagged versions of George Orwell’s book 1984 in 12 languages: English, Czech, Hungarian, Macedonian, Slovenian, Serbian, Slovak, Romanian, Estonian, Farsi, Bulgarian and Polish. The corpus can be accessed using the usual methods for tagged corpora. The tagset can be transformed from the Multext-East specific MSD tags to the Universal tagset -using the "tagset" parameter of all functions returning tagged parts of the corpus.

-
-
->>> print(nltk.corpus.multext_east.words("oana-en.xml"))
-['It', 'was', 'a', 'bright', ...]
->>> print(nltk.corpus.multext_east.tagged_words("oana-en.xml"))
-[('It', '#Pp3ns'), ('was', '#Vmis3s'), ('a', '#Di'), ...]
->>> print(nltk.corpus.multext_east.tagged_sents("oana-en.xml", "universal"))
-[[('It', 'PRON'), ('was', 'VERB'), ('a', 'DET'), ...]
-
-
+using the “tagset” parameter of all functions returning tagged parts of the corpus.

+
>>> print(nltk.corpus.multext_east.words("oana-en.xml"))
+['It', 'was', 'a', 'bright', ...]
+>>> print(nltk.corpus.multext_east.tagged_words("oana-en.xml"))
+[('It', '#Pp3ns'), ('was', '#Vmis3s'), ('a', '#Di'), ...]
+>>> print(nltk.corpus.multext_east.tagged_sents("oana-en.xml", "universal"))
+[[('It', 'PRON'), ('was', 'VERB'), ('a', 'DET'), ...]
+
-
-
-
-

Corpus Reader Classes

-

NLTK's corpus reader classes are used to access the contents of a + + + +

+

Corpus Reader Classes

+

NLTK’s corpus reader classes are used to access the contents of a diverse set of corpora. Each corpus reader class is specialized to handle a specific corpus format. Examples include the PlaintextCorpusReader, which handles corpora that consist of a set of unannotated text files, and the BracketParseCorpusReader, which handles corpora that consist of files containing parenthesis-delineated parse trees.

-
-

Automatically Created Corpus Reader Instances

+
+

Automatically Created Corpus Reader Instances

When the nltk.corpus module is imported, it automatically creates a set of corpus reader instances that can be used to access the corpora in the NLTK data distribution. Here is a small sample of those corpus reader instances:

-
-
->>> import nltk
->>> nltk.corpus.brown # doctest: +ELLIPSIS
-<CategorizedTaggedCorpusReader ...>
->>> nltk.corpus.treebank # doctest: +ELLIPSIS
-<BracketParseCorpusReader ...>
->>> nltk.corpus.names # doctest: +ELLIPSIS
-<WordListCorpusReader ...>
->>> nltk.corpus.genesis # doctest: +ELLIPSIS
-<PlaintextCorpusReader ...>
->>> nltk.corpus.inaugural # doctest: +ELLIPSIS
-<PlaintextCorpusReader ...>
-
-
+
>>> import nltk
+>>> nltk.corpus.brown
+<CategorizedTaggedCorpusReader ...>
+>>> nltk.corpus.treebank
+<BracketParseCorpusReader ...>
+>>> nltk.corpus.names
+<WordListCorpusReader ...>
+>>> nltk.corpus.genesis
+<PlaintextCorpusReader ...>
+>>> nltk.corpus.inaugural
+<PlaintextCorpusReader ...>
+
+

This sample illustrates that different corpus reader classes are used to read different corpora; but that the same corpus reader class may -be used for more than one corpus (e.g., genesis and inaugural).

-
-
-

Creating New Corpus Reader Instances

+be used for more than one corpus (e.g., genesis and inaugural).

+
+
+

Creating New Corpus Reader Instances

Although the nltk.corpus module automatically creates corpus reader instances for the corpora in the NLTK data distribution, you may sometimes need to create your own corpus reader. In particular, you -would need to create your own corpus reader if you want...

+would need to create your own corpus reader if you want…

    -
  • To access a corpus that is not included in the NLTK data -distribution.
  • -
  • To access a full copy of a corpus for which the NLTK data -distribution only provides a sample.
  • -
  • To access a corpus using a customized corpus reader (e.g., with -a customized tokenizer).
  • +
  • To access a corpus that is not included in the NLTK data +distribution.

  • +
  • To access a full copy of a corpus for which the NLTK data +distribution only provides a sample.

  • +
  • To access a corpus using a customized corpus reader (e.g., with +a customized tokenizer).

To create a new corpus reader, you will first need to look up the -signature for that corpus reader's constructor. Different corpus +signature for that corpus reader’s constructor. Different corpus readers have different constructor signatures, but most of the constructor signatures have the basic form:

-
-SomeCorpusReader(root, files, ...options...)
-
-

Where root is an absolute path to the directory containing the -corpus data files; files is either a list of file names (relative -to root) or a regexp specifying which files should be included; -and options are additional reader-specific options. For example, +

SomeCorpusReader(root, files, ...options...)
+
+
+

Where root is an absolute path to the directory containing the +corpus data files; files is either a list of file names (relative +to root) or a regexp specifying which files should be included; +and options are additional reader-specific options. For example, we can create a customized corpus reader for the genesis corpus that uses a different sentence tokenizer as follows:

-
-
->>> # Find the directory where the corpus lives.
->>> genesis_dir = nltk.data.find('corpora/genesis')
->>> # Create our custom sentence tokenizer.
->>> my_sent_tokenizer = nltk.RegexpTokenizer('[^.!?]+')
->>> # Create the new corpus reader object.
->>> my_genesis = nltk.corpus.PlaintextCorpusReader(
-...     genesis_dir, '.*\.txt', sent_tokenizer=my_sent_tokenizer)
->>> # Use the new corpus reader object.
->>> print(my_genesis.sents('english-kjv.txt')[0]) # doctest: +NORMALIZE_WHITESPACE
-['In', 'the', 'beginning', 'God', 'created', 'the', 'heaven',
- 'and', 'the', 'earth']
-
-
+
>>> # Find the directory where the corpus lives.
+>>> genesis_dir = nltk.data.find('corpora/genesis')
+>>> # Create our custom sentence tokenizer.
+>>> my_sent_tokenizer = nltk.RegexpTokenizer('[^.!?]+')
+>>> # Create the new corpus reader object.
+>>> my_genesis = nltk.corpus.PlaintextCorpusReader(
+...     genesis_dir, r'.*\.txt', sent_tokenizer=my_sent_tokenizer)
+>>> # Use the new corpus reader object.
+>>> print(my_genesis.sents('english-kjv.txt')[0])
+['In', 'the', 'beginning', 'God', 'created', 'the', 'heaven',
+ 'and', 'the', 'earth']
+
+

If you wish to read your own plaintext corpus, which is stored in the -directory '/usr/share/some-corpus', then you can create a corpus +directory ‘/usr/share/some-corpus’, then you can create a corpus reader for it with:

-
->>> my_corpus = nltk.corpus.PlaintextCorpusReader(
-...     '/usr/share/some-corpus', '.*\.txt') # doctest: +SKIP
-
+
>>> my_corpus = nltk.corpus.PlaintextCorpusReader(
+...     '/usr/share/some-corpus', r'.*\.txt') 
+
+

For a complete list of corpus reader subclasses, see the API documentation for nltk.corpus.reader.

-
-
-

Corpus Types

+ +
+

Corpus Types

Corpora vary widely in the types of content they include. This is reflected in the fact that the base class CorpusReader only defines a few general-purpose methods for listing and accessing the files that @@ -1722,15 +1264,15 @@

Corpus Types

definitions of these data access methods wherever possible.

At a high level, corpora can be divided into three basic types:

    -
  • A token corpus contains information about specific occurences of +
  • A token corpus contains information about specific occurrences of language use (or linguistic tokens), such as dialogues or written texts. Examples of token corpora are collections of written text -and collections of speech.

  • -
  • A type corpus, or lexicon, contains information about a coherent +and collections of speech.

  • +
  • A type corpus, or lexicon, contains information about a coherent set of lexical items (or linguistic types). Examples of lexicons -are dictionaries and word lists.

  • -
  • A language description corpus contains information about a set of -non-lexical linguistic constructs, such as grammar rules.
  • +are dictionaries and word lists.

    +
  • A language description corpus contains information about a set of +non-lexical linguistic constructs, such as grammar rules.

However, many individual corpora blur the distinctions between these types. For example, corpora that are primarily lexicons may include @@ -1742,68 +1284,58 @@

Corpus Types

classes for different corpus types. Instead, we simply try to make the corpus readers consistent wherever possible, but let them differ where the underlying data itself differs.

-
-
-

Common Corpus Reader Methods

+ +
+

Common Corpus Reader Methods

As mentioned above, there are only a handful of methods that all corpus readers are guaranteed to implement. These methods provide access to the files that contain the corpus data. Every corpus is assumed to consist of one or more files, all located in a common root directory (or in subdirectories of that root directory). The absolute -path to the root directory is stored in the root property:

-
-
->>> import os
->>> str(nltk.corpus.genesis.root).replace(os.path.sep,'/') # doctest: +ELLIPSIS
-'.../nltk_data/corpora/genesis'
-
-
+path to the root directory is stored in the root property:

+
>>> import os
+>>> str(nltk.corpus.genesis.root).replace(os.path.sep,'/')
+'.../nltk_data/corpora/genesis'
+
+

Each file within the corpus is identified by a platform-independent -identifier, which is basically a path string that uses / as the +identifier, which is basically a path string that uses / as the path separator. I.e., this identifier can be converted to a relative path as follows:

-
-
->>> some_corpus_file_id = nltk.corpus.reuters.fileids()[0]
->>> import os.path
->>> os.path.normpath(some_corpus_file_id).replace(os.path.sep,'/')
-'test/14826'
-
-
+
>>> some_corpus_file_id = nltk.corpus.reuters.fileids()[0]
+>>> import os.path
+>>> os.path.normpath(some_corpus_file_id).replace(os.path.sep,'/')
+'test/14826'
+
+

To get a list of all data files that make up a corpus, use the -fileids() method. In some corpora, these files will not all contain -the same type of data; for example, for the nltk.corpus.timit -corpus, fileids() will return a list including text files, word +fileids() method. In some corpora, these files will not all contain +the same type of data; for example, for the nltk.corpus.timit +corpus, fileids() will return a list including text files, word segmentation files, phonetic transcription files, sound files, and -metadata files. For corpora with diverse file types, the fileids() +metadata files. For corpora with diverse file types, the fileids() method will often take one or more optional arguments, which can be used to get a list of the files with a specific file type:

-
-
->>> nltk.corpus.timit.fileids() # doctest: +ELLIPSIS
-['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa1.txt', 'dr1-fvmh0/sa1.wav', ...]
->>> nltk.corpus.timit.fileids('phn') # doctest: +ELLIPSIS
-['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa2.phn', 'dr1-fvmh0/si1466.phn', ...]
-
-
+
>>> nltk.corpus.timit.fileids()
+['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa1.txt', 'dr1-fvmh0/sa1.wav', ...]
+>>> nltk.corpus.timit.fileids('phn')
+['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa2.phn', 'dr1-fvmh0/si1466.phn', ...]
+
+

In some corpora, the files are divided into distinct categories. For -these corpora, the fileids() method takes an optional argument, +these corpora, the fileids() method takes an optional argument, which can be used to get a list of the files within a specific category:

-
-
->>> nltk.corpus.brown.fileids('hobbies') # doctest: +ELLIPSIS
-['ce01', 'ce02', 'ce03', 'ce04', 'ce05', 'ce06', 'ce07', ...]
-
-
-

The abspath() method can be used to find the absolute path to a +

>>> nltk.corpus.brown.fileids('hobbies')
+['ce01', 'ce02', 'ce03', 'ce04', 'ce05', 'ce06', 'ce07', ...]
+
+
+

The abspath() method can be used to find the absolute path to a corpus file, given its file identifier:

-
-
->>> str(nltk.corpus.brown.abspath('ce06')).replace(os.path.sep,'/') # doctest: +ELLIPSIS
-'.../corpora/brown/ce06'
-
-
-

The abspaths() method can be used to find the absolute paths for +

>>> str(nltk.corpus.brown.abspath('ce06')).replace(os.path.sep,'/')
+'.../corpora/brown/ce06'
+
+
+

The abspaths() method can be used to find the absolute paths for one corpus file, a list of corpus files, or (if no fileids are specified), all corpus files.

This method is mainly useful as a helper method when defining corpus @@ -1811,109 +1343,99 @@

Common Corpus Reader Methods

with a string argument (to get a view for a specific file), with a list argument (to get a view for a specific list of files), or with no argument (to get a view for the whole corpus).

-
-
-

Data Access Methods

+ +
+

Data Access Methods

Individual corpus reader subclasses typically extend this basic set of file-access methods with one or more data access methods, which provide easy access to the data contained in the corpus. The signatures for data access methods often have the basic form:

-
-corpus_reader.some_data access(fileids=None, ...options...)
-
-

Where fileids can be a single file identifier string (to get a view +

corpus_reader.some_data access(fileids=None, ...options...)
+
+
+

Where fileids can be a single file identifier string (to get a view for a specific file); a list of file identifier strings (to get a view for a specific list of files); or None (to get a view for the entire corpus). Some of the common data access methods, and their return types, are:

-
    -
  • I{corpus}.words(): list of str
  • -
  • I{corpus}.sents(): list of (list of str)
  • -
  • I{corpus}.paras(): list of (list of (list of str))
  • -
  • I{corpus}.tagged_words(): list of (str,str) tuple
  • -
  • I{corpus}.tagged_sents(): list of (list of (str,str))
  • -
  • I{corpus}.tagged_paras(): list of (list of (list of (str,str)))
  • -
  • I{corpus}.chunked_sents(): list of (Tree w/ (str,str) leaves)
  • -
  • I{corpus}.parsed_sents(): list of (Tree with str leaves)
  • -
  • I{corpus}.parsed_paras(): list of (list of (Tree with str leaves))
  • -
  • I{corpus}.xml(): A single xml ElementTree
  • -
  • I{corpus}.raw(): str (unprocessed corpus contents)
  • +
      +
    • I{corpus}.words(): list of str

    • +
    • I{corpus}.sents(): list of (list of str)

    • +
    • I{corpus}.paras(): list of (list of (list of str))

    • +
    • I{corpus}.tagged_words(): list of (str,str) tuple

    • +
    • I{corpus}.tagged_sents(): list of (list of (str,str))

    • +
    • I{corpus}.tagged_paras(): list of (list of (list of (str,str)))

    • +
    • I{corpus}.chunked_sents(): list of (Tree w/ (str,str) leaves)

    • +
    • I{corpus}.parsed_sents(): list of (Tree with str leaves)

    • +
    • I{corpus}.parsed_paras(): list of (list of (Tree with str leaves))

    • +
    • I{corpus}.xml(): A single xml ElementTree

    • +
    • I{corpus}.raw(): str (unprocessed corpus contents)

    -
+

For example, the words() method is supported by many different corpora, and returns a flat list of word strings:

-
-
->>> nltk.corpus.brown.words()
-['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
->>> nltk.corpus.treebank.words()
-['Pierre', 'Vinken', ',', '61', 'years', 'old', ...]
->>> nltk.corpus.conll2002.words()
-[u'Sao', u'Paulo', u'(', u'Brasil', u')', u',', u'23', ...]
->>> nltk.corpus.genesis.words()
-[u'In', u'the', u'beginning', u'God', u'created', ...]
-
-
+
>>> nltk.corpus.brown.words()
+['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
+>>> nltk.corpus.treebank.words()
+['Pierre', 'Vinken', ',', '61', 'years', 'old', ...]
+>>> nltk.corpus.conll2002.words()
+['Sao', 'Paulo', '(', 'Brasil', ')', ',', '23', ...]
+>>> nltk.corpus.genesis.words()
+['In', 'the', 'beginning', 'God', 'created', ...]
+
+

On the other hand, the tagged_words() method is only supported by corpora that include part-of-speech annotations:

-
-
->>> nltk.corpus.brown.tagged_words()
-[('The', 'AT'), ('Fulton', 'NP-TL'), ...]
->>> nltk.corpus.treebank.tagged_words()
-[('Pierre', 'NNP'), ('Vinken', 'NNP'), ...]
->>> nltk.corpus.conll2002.tagged_words()
-[(u'Sao', u'NC'), (u'Paulo', u'VMI'), (u'(', u'Fpa'), ...]
->>> nltk.corpus.genesis.tagged_words()
-Traceback (most recent call last):
-  ...
-AttributeError: 'PlaintextCorpusReader' object has no attribute 'tagged_words'
-
-
+
>>> nltk.corpus.brown.tagged_words()
+[('The', 'AT'), ('Fulton', 'NP-TL'), ...]
+>>> nltk.corpus.treebank.tagged_words()
+[('Pierre', 'NNP'), ('Vinken', 'NNP'), ...]
+>>> nltk.corpus.conll2002.tagged_words()
+[('Sao', 'NC'), ('Paulo', 'VMI'), ('(', 'Fpa'), ...]
+>>> nltk.corpus.genesis.tagged_words()
+Traceback (most recent call last):
+  ...
+AttributeError: 'PlaintextCorpusReader' object has no attribute 'tagged_words'
+
+

Although most corpus readers use file identifiers to index their content, some corpora use different identifiers instead. For example, -the data access methods for the timit corpus uses utterance +the data access methods for the timit corpus uses utterance identifiers to select which corpus items should be returned:

-
-
->>> nltk.corpus.timit.utteranceids() # doctest: +ELLIPSIS
-['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466', ...]
->>> nltk.corpus.timit.words('dr1-fvmh0/sa2')
-["don't", 'ask', 'me', 'to', 'carry', 'an', 'oily', 'rag', 'like', 'that']
-
-
-

Attempting to call timit's data access methods with a file +

>>> nltk.corpus.timit.utteranceids()
+['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466', ...]
+>>> nltk.corpus.timit.words('dr1-fvmh0/sa2')
+["don't", 'ask', 'me', 'to', 'carry', 'an', 'oily', 'rag', 'like', 'that']
+
+
+

Attempting to call timit‘s data access methods with a file identifier will result in an exception:

-
-
->>> nltk.corpus.timit.fileids() # doctest: +ELLIPSIS
-['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa1.txt', 'dr1-fvmh0/sa1.wav', ...]
->>> nltk.corpus.timit.words('dr1-fvmh0/sa1.txt') # doctest: +SKIP
-Traceback (most recent call last):
-  ...
-IOError: No such file or directory: '.../dr1-fvmh0/sa1.txt.wrd'
-
-
-

As another example, the propbank corpus defines the roleset() +

>>> nltk.corpus.timit.fileids()
+['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa1.txt', 'dr1-fvmh0/sa1.wav', ...]
+>>> nltk.corpus.timit.words('dr1-fvmh0/sa1.txt') 
+Traceback (most recent call last):
+  ...
+IOError: No such file or directory: '.../dr1-fvmh0/sa1.txt.wrd'
+
+
+

As another example, the propbank corpus defines the roleset() method, which expects a roleset identifier, not a file identifier:

-
-
->>> roleset = nltk.corpus.propbank.roleset('eat.01')
->>> from xml.etree import ElementTree as ET
->>> print(ET.tostring(roleset).decode('utf8')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-<roleset id="eat.01" name="consume" vncls="39.1">
-  <roles>
-    <role descr="consumer, eater" n="0">...</role>...
-  </roles>...
-</roleset>...
-
-
-
-
-

Stream Backed Corpus Views

-

An important feature of NLTK's corpus readers is that many of them -access the underlying data files using "corpus views." A corpus +

>>> roleset = nltk.corpus.propbank.roleset('eat.01')
+>>> from xml.etree import ElementTree as ET
+>>> print(ET.tostring(roleset).decode('utf8'))
+<roleset id="eat.01" name="consume" vncls="39.1">
+  <roles>
+    <role descr="consumer, eater" n="0">...</role>...
+  </roles>...
+</roleset>...
+
+
+ +
+

Stream Backed Corpus Views

+

An important feature of NLTK’s corpus readers is that many of them +access the underlying data files using “corpus views.” A corpus view is an object that acts like a simple data structure (such as a list), but does not store the data elements in memory; instead, data elements are read from the underlying data files on an as-needed @@ -1930,31 +1452,31 @@

Stream Backed Corpus Views

classes, ConcatenatedCorpusView and LazySubsequence, make it possible to create concatenations and take slices of StreamBackedCorpusView objects without actually storing the -resulting list-like object's elements in memory.

+resulting list-like object’s elements in memory.

In the future, we may add additional corpus views that act like other basic data structures, such as dictionaries.

-
-
-

Writing New Corpus Readers

+ +
+

Writing New Corpus Readers

In order to add support for new corpus formats, it is necessary to define new corpus reader classes. For many corpus formats, writing new corpus readers is relatively straight-forward. In this section, -we'll describe what's involved in creating a new corpus reader. If +we’ll describe what’s involved in creating a new corpus reader. If you do create a new corpus reader, we encourage you to contribute it back to the NLTK project.

-
-

Don't Reinvent the Wheel

+
+
Don’t Reinvent the Wheel

Before you start writing a new corpus reader, you should check to be -sure that the desired format can't be read using an existing corpus +sure that the desired format can’t be read using an existing corpus reader with appropriate constructor arguments. For example, although the TaggedCorpusReader assumes that words and tags are separated by -/ characters by default, an alternative tag-separation character -can be specified via the sep constructor argument. You should +/ characters by default, an alternative tag-separation character +can be specified via the sep constructor argument. You should also check whether the new corpus format can be handled by subclassing an existing corpus reader, and tweaking a few methods or variables.

-
-
-

Design

+
+
+
Design

If you decide to write a new corpus reader from scratch, then you should first decide which data access methods you want the reader to provide, and what their signatures should be. You should look at @@ -1962,72 +1484,75 @@

Design

contents, and try to be consistent with those corpus readers whenever possible.

You should also consider what sets of identifiers are appropriate for -the corpus format. Where it's practical, file identifiers should be +the corpus format. Where it’s practical, file identifiers should be used. However, for some corpora, it may make sense to use additional sets of identifiers. Each set of identifiers should have a distinct name (e.g., fileids, utteranceids, rolesets); and you should be consistent in using that name to refer to that identifier. Do not use parameter -names like id, which leave it unclear what type of identifier is +names like id, which leave it unclear what type of identifier is required.

-

Once you've decided what data access methods and identifiers are +

Once you’ve decided what data access methods and identifiers are appropriate for your corpus, you should decide if there are any -customizable parameters that you'd like the corpus reader to handle. +customizable parameters that you’d like the corpus reader to handle. These parameters make it possible to use a single corpus reader to -handle a wider variety of corpora. The sep argument for +handle a wider variety of corpora. The sep argument for TaggedCorpusReader, mentioned above, is an example of a customizable corpus reader parameter.

-
-
-

Implementation

-
-

Constructor

+ +
+
Implementation
+
+
Constructor

If your corpus reader implements any customizable parameters, then -you'll need to override the constructor. Typically, the new -constructor will first call its base class's constructor, and then +you’ll need to override the constructor. Typically, the new +constructor will first call its base class’s constructor, and then store the customizable parameters. For example, the -ConllChunkCorpusReader's constructor is defined as follows:

-
-
->>> def __init__(self, root, files, chunk_types):
-...     CorpusReader.__init__(self, root, files)
-...     self.chunk_types = tuple(chunk_types)
-
-
+ConllChunkCorpusReader‘s constructor is defined as follows:

+
+
+
def __init__(self, root, fileids, chunk_types, encoding=’utf8’,
+

tagset=None, separator=None):

+
+
+
ConllCorpusReader.__init__(

self, root, fileids, (‘words’, ‘pos’, ‘chunk’), +chunk_types=chunk_types, encoding=encoding, +tagset=tagset, separator=separator)

+
+
+
+
+

If your corpus reader does not implement any customization parameters, -then you can often just inherit the base class's constructor.

-
-
-

Data Access Methods

+then you can often just inherit the base class’s constructor.

+ +
+
Data Access Methods

The most common type of data access method takes an argument identifying which files to access, and returns a view covering those files. This argument may be a single file identifier string (to get a view for a specific file); a list of file identifier strings (to get a view for a specific list of files); or None (to get a view for the -entire corpus). The method's implementation converts this argument to +entire corpus). The method’s implementation converts this argument to a list of path names using the abspaths() method, which handles all three value types (string, list, and None):

-
-
->>> print(str(nltk.corpus.brown.abspaths()).replace('\\\\','/')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[FileSystemPathPointer('.../corpora/brown/ca01'),
- FileSystemPathPointer('.../corpora/brown/ca02'), ...]
->>> print(str(nltk.corpus.brown.abspaths('ce06')).replace('\\\\','/')) # doctest: +ELLIPSIS
-[FileSystemPathPointer('.../corpora/brown/ce06')]
->>> print(str(nltk.corpus.brown.abspaths(['ce06', 'ce07'])).replace('\\\\','/')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[FileSystemPathPointer('.../corpora/brown/ce06'),
- FileSystemPathPointer('.../corpora/brown/ce07')]
-
-
+
>>> print(str(nltk.corpus.brown.abspaths()).replace('\\\\','/'))
+[FileSystemPathPointer('.../corpora/brown/ca01'),
+ FileSystemPathPointer('.../corpora/brown/ca02'), ...]
+>>> print(str(nltk.corpus.brown.abspaths('ce06')).replace('\\\\','/'))
+[FileSystemPathPointer('.../corpora/brown/ce06')]
+>>> print(str(nltk.corpus.brown.abspaths(['ce06', 'ce07'])).replace('\\\\','/'))
+[FileSystemPathPointer('.../corpora/brown/ce06'),
+ FileSystemPathPointer('.../corpora/brown/ce07')]
+
+

An example of this type of method is the words() method, defined by the PlaintextCorpusReader as follows:

-
-
->>> def words(self, fileids=None):
-...     return concat([self.CorpusView(fileid, self._read_word_block)
-...                    for fileid in self.abspaths(fileids)])
-
-
-

This method first uses abspaths() to convert fileids to a list of +

>>> def words(self, fileids=None):
+...     return concat([self.CorpusView(fileid, self._read_word_block)
+...                    for fileid in self.abspaths(fileids)])
+
+
+

This method first uses abspaths() to convert fileids to a list of absolute paths. It then creates a corpus view for each file, using the PlaintextCorpusReader._read_word_block() method to read elements from the data file (see the discussion of corpus views below). @@ -2037,38 +1562,32 @@

Data Access Methods

very large, it can sometimes be appropriate to read the files directly, rather than using a corpus view. For example, the WordListCorpusView class defines its words() method as follows:

-
-
->>> def words(self, fileids=None):
-...     return concat([[w for w in open(fileid).read().split('\n') if w]
-...                    for fileid in self.abspaths(fileids)])
-
-
+
>>> def words(self, fileids=None):
+...     return concat([[w for w in open(fileid).read().split('\n') if w]
+...                    for fileid in self.abspaths(fileids)])
+
+

(This is usually more appropriate for lexicons than for token corpora.)

If the type of data returned by a data access method is one for which NLTK has a conventional representation (e.g., words, tagged words, and parse trees), then you should use that representation. Otherwise, you may find it necessary to define your own representation. For data -structures that are relatively corpus-specific, it's usually best to -define new classes for these elements. For example, the propbank +structures that are relatively corpus-specific, it’s usually best to +define new classes for these elements. For example, the propbank corpus defines the PropbankInstance class to store the semantic role -labeling instances described by the corpus; and the ppattach +labeling instances described by the corpus; and the ppattach corpus defines the PPAttachment class to store the prepositional attachment instances described by the corpus.

-
-
-

Corpus Views

- + +
+
Corpus Views

The heart of a StreamBackedCorpusView is its block reader function, which reads zero or more tokens from a stream, and returns them as a list. A very simple example of a block reader is:

-
-
->>> def simple_block_reader(stream):
-...     return stream.readline().split()
-
-
+
>>> def simple_block_reader(stream):
+...     return stream.readline().split()
+
+

This simple block reader reads a single line at a time, and returns a single token (consisting of a string) for each whitespace-separated substring on the line. A StreamBackedCorpusView built from this @@ -2077,7 +1596,7 @@

Corpus Views

When deciding how to define the block reader for a given corpus, careful consideration should be given to the size of blocks handled by the block reader. Smaller block sizes will increase the memory -requirements of the corpus view's internal data structures (by 2 +requirements of the corpus view’s internal data structures (by 2 integers per block). On the other hand, larger block sizes may decrease performance for random access to the corpus. (But note that larger block sizes will not decrease performance for iteration.)

@@ -2086,725 +1605,719 @@

Corpus Views

When a token with a given index i is requested, the corpus view constructs it as follows:

    -
  1. First, it searches the toknum/filepos mapping for the token index -closest to (but less than or equal to) i.
  2. -
  3. Then, starting at the file position corresponding to that index, it +
  4. First, it searches the toknum/filepos mapping for the token index +closest to (but less than or equal to) i.

  5. +
  6. Then, starting at the file position corresponding to that index, it reads one block at a time using the block reader until it reaches -the requested token.

  7. +the requested token.

The toknum/filepos mapping is created lazily: it is initially empty, -but every time a new block is read, the block's initial token is added +but every time a new block is read, the block’s initial token is added to the mapping. (Thus, the toknum/filepos map has one entry per block.)

You can create your own corpus view in one of two ways:

    -
  1. Call the StreamBackedCorpusView constructor, and provide your -block reader function via the block_reader argument.
  2. -
  3. Subclass StreamBackedCorpusView, and override the -read_block() method.
  4. +
  5. Call the StreamBackedCorpusView constructor, and provide your +block reader function via the block_reader argument.

  6. +
  7. Subclass StreamBackedCorpusView, and override the +read_block() method.

The first option is usually easier, but the second option can allow you to write a single read_block method whose behavior can be -customized by different parameters to the subclass's constructor. For +customized by different parameters to the subclass’s constructor. For an example of this design pattern, see the TaggedCorpusView class, which is used by TaggedCorpusView.

-
-
-
-
-
-

Regression Tests

+ + + + +
+

Regression Tests

The following helper functions are used to create and then delete testing corpora that are stored in temporary directories. These testing corpora are used to make sure the readers work correctly.

-
-
->>> import tempfile, os.path, textwrap
->>> def make_testcorpus(ext='', **fileids):
-...     root = tempfile.mkdtemp()
-...     for fileid, contents in fileids.items():
-...         fileid += ext
-...         f = open(os.path.join(root, fileid), 'w')
-...         f.write(textwrap.dedent(contents))
-...         f.close()
-...     return root
->>> def del_testcorpus(root):
-...     for fileid in os.listdir(root):
-...         os.remove(os.path.join(root, fileid))
-...     os.rmdir(root)
-
-
-
-

Plaintext Corpus Reader

+
>>> import tempfile, os.path, textwrap
+>>> def make_testcorpus(ext='', **fileids):
+...     root = tempfile.mkdtemp()
+...     for fileid, contents in fileids.items():
+...         fileid += ext
+...         f = open(os.path.join(root, fileid), 'w')
+...         f.write(textwrap.dedent(contents))
+...         f.close()
+...     return root
+>>> def del_testcorpus(root):
+...     for fileid in os.listdir(root):
+...         os.remove(os.path.join(root, fileid))
+...     os.rmdir(root)
+
+
+
+

Plaintext Corpus Reader

The plaintext corpus reader is used to access corpora that consist of unprocessed plaintext data. It assumes that paragraph breaks are indicated by blank lines. Sentences and words can be tokenized using the default tokenizers, or by custom tokenizers specified as parameters to the constructor.

-
-
->>> root = make_testcorpus(ext='.txt',
-...     a="""\
-...     This is the first sentence.  Here is another
-...     sentence!  And here's a third sentence.
-...
-...     This is the second paragraph.  Tokenization is currently
-...     fairly simple, so the period in Mr. gets tokenized.
-...     """,
-...     b="""This is the second file.""")
-
-
->>> from nltk.corpus.reader.plaintext import PlaintextCorpusReader
-
-
+
>>> root = make_testcorpus(ext='.txt',
+...     a="""\
+...     This is the first sentence.  Here is another
+...     sentence!  And here's a third sentence.
+...
+...     This is the second paragraph.  Tokenization is currently
+...     fairly simple, so the period in Mr. gets tokenized.
+...     """,
+...     b="""This is the second file.""")
+
+
+
>>> from nltk.corpus.reader.plaintext import PlaintextCorpusReader
+
+

The list of documents can be specified explicitly, or implicitly (using a -regexp). The ext argument specifies a file extension.

-
-
->>> corpus = PlaintextCorpusReader(root, ['a.txt', 'b.txt'])
->>> corpus.fileids()
-['a.txt', 'b.txt']
->>> corpus = PlaintextCorpusReader(root, '.*\.txt')
->>> corpus.fileids()
-['a.txt', 'b.txt']
-
-
+regexp). The ext argument specifies a file extension.

+
>>> corpus = PlaintextCorpusReader(root, ['a.txt', 'b.txt'])
+>>> corpus.fileids()
+['a.txt', 'b.txt']
+>>> corpus = PlaintextCorpusReader(root, r'.*\.txt')
+>>> corpus.fileids()
+['a.txt', 'b.txt']
+
+

The directory containing the corpus is corpus.root:

-
-
->>> str(corpus.root) == str(root)
-True
-
-
+
>>> str(corpus.root) == str(root)
+True
+
+

We can get a list of words, or the raw string:

-
-
->>> corpus.words()
-['This', 'is', 'the', 'first', 'sentence', '.', ...]
->>> corpus.raw()[:40]
-'This is the first sentence.  Here is ano'
-
-
+
>>> corpus.words()
+['This', 'is', 'the', 'first', 'sentence', '.', ...]
+>>> corpus.raw()[:40]
+'This is the first sentence.  Here is ano'
+
+

Check that reading individual documents works, and reading all documents at once works:

-
-
->>> len(corpus.words()), [len(corpus.words(d)) for d in corpus.fileids()]
-(46, [40, 6])
->>> corpus.words('a.txt')
-['This', 'is', 'the', 'first', 'sentence', '.', ...]
->>> corpus.words('b.txt')
-['This', 'is', 'the', 'second', 'file', '.']
->>> corpus.words()[:4], corpus.words()[-4:]
-(['This', 'is', 'the', 'first'], ['the', 'second', 'file', '.'])
-
-
-

We're done with the test corpus:

-
-
->>> del_testcorpus(root)
-
-
+
>>> len(corpus.words()), [len(corpus.words(d)) for d in corpus.fileids()]
+(46, [40, 6])
+>>> corpus.words('a.txt')
+['This', 'is', 'the', 'first', 'sentence', '.', ...]
+>>> corpus.words('b.txt')
+['This', 'is', 'the', 'second', 'file', '.']
+>>> corpus.words()[:4], corpus.words()[-4:]
+(['This', 'is', 'the', 'first'], ['the', 'second', 'file', '.'])
+
+
+

We’re done with the test corpus:

+
>>> del_testcorpus(root)
+
+

Test the plaintext corpora that come with nltk:

-
-
->>> from nltk.corpus import abc, genesis, inaugural
->>> from nltk.corpus import state_union, webtext
->>> for corpus in (abc, genesis, inaugural, state_union,
-...                webtext):
-...     print(str(corpus).replace('\\\\','/'))
-...     print('  ', repr(corpus.fileids())[:60])
-...     print('  ', repr(corpus.words()[:10])[:60])
-<PlaintextCorpusReader in '.../nltk_data/corpora/ab...'>
-   ['rural.txt', 'science.txt']
-   ['PM', 'denies', 'knowledge', 'of', 'AWB', ...
-<PlaintextCorpusReader in '.../nltk_data/corpora/genesi...'>
-   ['english-kjv.txt', 'english-web.txt', 'finnish.txt', ...
-   ['In', 'the', 'beginning', 'God', 'created', 'the', ...
-<PlaintextCorpusReader in '.../nltk_data/corpora/inaugura...'>
-   ['1789-Washington.txt', '1793-Washington.txt', ...
-   ['Fellow', '-', 'Citizens', 'of', 'the', 'Senate', ...
-<PlaintextCorpusReader in '.../nltk_data/corpora/state_unio...'>
-   ['1945-Truman.txt', '1946-Truman.txt', ...
-   ['PRESIDENT', 'HARRY', 'S', '.', 'TRUMAN', "'", ...
-<PlaintextCorpusReader in '.../nltk_data/corpora/webtex...'>
-   ['firefox.txt', 'grail.txt', 'overheard.txt', ...
-   ['Cookie', 'Manager', ':', '"', 'Don', "'", 't', ...
-
-
-
-
-

Tagged Corpus Reader

+
>>> from nltk.corpus import abc, genesis, inaugural
+>>> from nltk.corpus import state_union, webtext
+>>> for corpus in (abc, genesis, inaugural, state_union,
+...                webtext):
+...     print(str(corpus).replace('\\\\','/'))
+...     print('  ', repr(corpus.fileids())[:60])
+...     print('  ', repr(corpus.words()[:10])[:60])
+<PlaintextCorpusReader in '.../nltk_data/corpora/ab...'>
+   ['rural.txt', 'science.txt']
+   ['PM', 'denies', 'knowledge', 'of', 'AWB', ...
+<PlaintextCorpusReader in '.../nltk_data/corpora/genesi...'>
+   ['english-kjv.txt', 'english-web.txt', 'finnish.txt', ...
+   ['In', 'the', 'beginning', 'God', 'created', 'the', ...
+<PlaintextCorpusReader in '.../nltk_data/corpora/inaugura...'>
+   ['1789-Washington.txt', '1793-Washington.txt', ...
+   ['Fellow', '-', 'Citizens', 'of', 'the', 'Senate', ...
+<PlaintextCorpusReader in '.../nltk_data/corpora/state_unio...'>
+   ['1945-Truman.txt', '1946-Truman.txt', ...
+   ['PRESIDENT', 'HARRY', 'S', '.', 'TRUMAN', "'", ...
+<PlaintextCorpusReader in '.../nltk_data/corpora/webtex...'>
+   ['firefox.txt', 'grail.txt', 'overheard.txt', ...
+   ['Cookie', 'Manager', ':', '"', 'Don', "'", 't', ...
+
+
+
+
+

Tagged Corpus Reader

The Tagged Corpus reader can give us words, sentences, and paragraphs, each tagged or untagged. All of the read methods can take one item (in which case they return the contents of that file) or a list of documents (in which case they concatenate the contents of those files). By default, they apply to all documents in the corpus.

-
-
->>> root = make_testcorpus(
-...     a="""\
-...     This/det is/verb the/det first/adj sentence/noun ./punc
-...     Here/det  is/verb  another/adj    sentence/noun ./punc
-...     Note/verb that/comp you/pron can/verb use/verb \
-...           any/noun tag/noun set/noun
-...
-...     This/det is/verb the/det second/adj paragraph/noun ./punc
-...     word/n without/adj a/det tag/noun :/: hello ./punc
-...     """,
-...     b="""\
-...     This/det is/verb the/det second/adj file/noun ./punc
-...     """)
-
-
->>> from nltk.corpus.reader.tagged import TaggedCorpusReader
->>> corpus = TaggedCorpusReader(root, list('ab'))
->>> corpus.fileids()
-['a', 'b']
->>> str(corpus.root) == str(root)
-True
->>> corpus.words()
-['This', 'is', 'the', 'first', 'sentence', '.', ...]
->>> corpus.sents() # doctest: +ELLIPSIS
-[['This', 'is', 'the', 'first', ...], ['Here', 'is', 'another'...], ...]
->>> corpus.paras() # doctest: +ELLIPSIS
-[[['This', ...], ['Here', ...], ...], [['This', ...], ...], ...]
->>> corpus.tagged_words() # doctest: +ELLIPSIS
-[('This', 'DET'), ('is', 'VERB'), ('the', 'DET'), ...]
->>> corpus.tagged_sents() # doctest: +ELLIPSIS
-[[('This', 'DET'), ('is', 'VERB'), ...], [('Here', 'DET'), ...], ...]
->>> corpus.tagged_paras() # doctest: +ELLIPSIS
-[[[('This', 'DET'), ...], ...], [[('This', 'DET'), ...], ...], ...]
->>> corpus.raw()[:40]
-'This/det is/verb the/det first/adj sente'
->>> len(corpus.words()), [len(corpus.words(d)) for d in corpus.fileids()]
-(38, [32, 6])
->>> len(corpus.sents()), [len(corpus.sents(d)) for d in corpus.fileids()]
-(6, [5, 1])
->>> len(corpus.paras()), [len(corpus.paras(d)) for d in corpus.fileids()]
-(3, [2, 1])
->>> print(corpus.words('a'))
-['This', 'is', 'the', 'first', 'sentence', '.', ...]
->>> print(corpus.words('b'))
-['This', 'is', 'the', 'second', 'file', '.']
->>> del_testcorpus(root)
-
-
+
>>> root = make_testcorpus(
+...     a="""\
+...     This/det is/verb the/det first/adj sentence/noun ./punc
+...     Here/det  is/verb  another/adj    sentence/noun ./punc
+...     Note/verb that/comp you/pron can/verb use/verb \
+...           any/noun tag/noun set/noun
+...
+...     This/det is/verb the/det second/adj paragraph/noun ./punc
+...     word/n without/adj a/det tag/noun :/: hello ./punc
+...     """,
+...     b="""\
+...     This/det is/verb the/det second/adj file/noun ./punc
+...     """)
+
+
+
>>> from nltk.corpus.reader.tagged import TaggedCorpusReader
+>>> corpus = TaggedCorpusReader(root, list('ab'))
+>>> corpus.fileids()
+['a', 'b']
+>>> str(corpus.root) == str(root)
+True
+>>> corpus.words()
+['This', 'is', 'the', 'first', 'sentence', '.', ...]
+>>> corpus.sents()
+[['This', 'is', 'the', 'first', ...], ['Here', 'is', 'another'...], ...]
+>>> corpus.paras()
+[[['This', ...], ['Here', ...], ...], [['This', ...], ...], ...]
+>>> corpus.tagged_words()
+[('This', 'DET'), ('is', 'VERB'), ('the', 'DET'), ...]
+>>> corpus.tagged_sents()
+[[('This', 'DET'), ('is', 'VERB'), ...], [('Here', 'DET'), ...], ...]
+>>> corpus.tagged_paras()
+[[[('This', 'DET'), ...], ...], [[('This', 'DET'), ...], ...], ...]
+>>> corpus.raw()[:40]
+'This/det is/verb the/det first/adj sente'
+>>> len(corpus.words()), [len(corpus.words(d)) for d in corpus.fileids()]
+(38, [32, 6])
+>>> len(corpus.sents()), [len(corpus.sents(d)) for d in corpus.fileids()]
+(6, [5, 1])
+>>> len(corpus.paras()), [len(corpus.paras(d)) for d in corpus.fileids()]
+(3, [2, 1])
+>>> print(corpus.words('a'))
+['This', 'is', 'the', 'first', 'sentence', '.', ...]
+>>> print(corpus.words('b'))
+['This', 'is', 'the', 'second', 'file', '.']
+>>> del_testcorpus(root)
+
+

The Brown Corpus uses the tagged corpus reader:

-
-
->>> from nltk.corpus import brown
->>> brown.fileids() # doctest: +ELLIPSIS
-['ca01', 'ca02', 'ca03', 'ca04', 'ca05', 'ca06', 'ca07', ...]
->>> brown.categories() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor',
-'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction']
->>> print(repr(brown.root).replace('\\\\','/')) # doctest: +ELLIPSIS
-FileSystemPathPointer('.../corpora/brown')
->>> brown.words()
-['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
->>> brown.sents() # doctest: +ELLIPSIS
-[['The', 'Fulton', 'County', 'Grand', ...], ...]
->>> brown.paras() # doctest: +ELLIPSIS
-[[['The', 'Fulton', 'County', ...]], [['The', 'jury', ...]], ...]
->>> brown.tagged_words() # doctest: +ELLIPSIS
-[('The', 'AT'), ('Fulton', 'NP-TL'), ...]
->>> brown.tagged_sents() # doctest: +ELLIPSIS
-[[('The', 'AT'), ('Fulton', 'NP-TL'), ('County', 'NN-TL'), ...], ...]
->>> brown.tagged_paras() # doctest: +ELLIPSIS
-[[[('The', 'AT'), ...]], [[('The', 'AT'), ...]], ...]
-
-
-
-
-

Verbnet Corpus Reader

-

Make sure we're picking up the right number of elements:

-
-
->>> from nltk.corpus import verbnet
->>> len(verbnet.lemmas())
-3621
->>> len(verbnet.wordnetids())
-4953
->>> len(verbnet.classids())
-429
-
-
+
>>> from nltk.corpus import brown
+>>> brown.fileids()
+['ca01', 'ca02', 'ca03', 'ca04', 'ca05', 'ca06', 'ca07', ...]
+>>> brown.categories()
+['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor',
+'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction']
+>>> print(repr(brown.root).replace('\\\\','/'))
+FileSystemPathPointer('.../corpora/brown')
+>>> brown.words()
+['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
+>>> brown.sents()
+[['The', 'Fulton', 'County', 'Grand', ...], ...]
+>>> brown.paras()
+[[['The', 'Fulton', 'County', ...]], [['The', 'jury', ...]], ...]
+>>> brown.tagged_words()
+[('The', 'AT'), ('Fulton', 'NP-TL'), ...]
+>>> brown.tagged_sents()
+[[('The', 'AT'), ('Fulton', 'NP-TL'), ('County', 'NN-TL'), ...], ...]
+>>> brown.tagged_paras()
+[[[('The', 'AT'), ...]], [[('The', 'AT'), ...]], ...]
+
+
+ +
+

Verbnet Corpus Reader

+

Make sure we’re picking up the right number of elements:

+
>>> from nltk.corpus import verbnet
+>>> len(verbnet.lemmas())
+3621
+>>> len(verbnet.wordnetids())
+4953
+>>> len(verbnet.classids())
+429
+
+

Selecting classids based on various selectors:

-
-
->>> verbnet.classids(lemma='take') # doctest: +NORMALIZE_WHITESPACE
-['bring-11.3', 'characterize-29.2', 'convert-26.6.2', 'cost-54.2',
-'fit-54.3', 'performance-26.7-2', 'steal-10.5']
->>> verbnet.classids(wordnetid='lead%2:38:01')
-['accompany-51.7']
->>> verbnet.classids(fileid='approve-77.xml')
-['approve-77']
->>> verbnet.classids(classid='admire-31.2') # subclasses
-['admire-31.2-1']
-
-
+
>>> verbnet.classids(lemma='take')
+['bring-11.3', 'characterize-29.2', 'convert-26.6.2', 'cost-54.2',
+'fit-54.3', 'performance-26.7-2', 'steal-10.5']
+>>> verbnet.classids(wordnetid='lead%2:38:01')
+['accompany-51.7']
+>>> verbnet.classids(fileid='approve-77.xml')
+['approve-77']
+>>> verbnet.classids(classid='admire-31.2') # subclasses
+['admire-31.2-1']
+
+

vnclass() accepts filenames, long ids, and short ids:

-
-
->>> a = ElementTree.tostring(verbnet.vnclass('admire-31.2.xml'))
->>> b = ElementTree.tostring(verbnet.vnclass('admire-31.2'))
->>> c = ElementTree.tostring(verbnet.vnclass('31.2'))
->>> a == b == c
-True
-
-
+
>>> a = ElementTree.tostring(verbnet.vnclass('admire-31.2.xml'))
+>>> b = ElementTree.tostring(verbnet.vnclass('admire-31.2'))
+>>> c = ElementTree.tostring(verbnet.vnclass('31.2'))
+>>> a == b == c
+True
+
+

fileids() can be used to get files based on verbnet class ids:

-
-
->>> verbnet.fileids('admire-31.2')
-['admire-31.2.xml']
->>> verbnet.fileids(['admire-31.2', 'obtain-13.5.2'])
-['admire-31.2.xml', 'obtain-13.5.2.xml']
->>> verbnet.fileids('badidentifier')
-Traceback (most recent call last):
-  . . .
-ValueError: vnclass identifier 'badidentifier' not found
-
-
+
>>> verbnet.fileids('admire-31.2')
+['admire-31.2.xml']
+>>> verbnet.fileids(['admire-31.2', 'obtain-13.5.2'])
+['admire-31.2.xml', 'obtain-13.5.2.xml']
+>>> verbnet.fileids('badidentifier')
+Traceback (most recent call last):
+  . . .
+ValueError: vnclass identifier 'badidentifier' not found
+
+

longid() and shortid() can be used to convert identifiers:

-
-
->>> verbnet.longid('31.2')
-'admire-31.2'
->>> verbnet.longid('admire-31.2')
-'admire-31.2'
->>> verbnet.shortid('31.2')
-'31.2'
->>> verbnet.shortid('admire-31.2')
-'31.2'
->>> verbnet.longid('badidentifier')
-Traceback (most recent call last):
-  . . .
-ValueError: vnclass identifier 'badidentifier' not found
->>> verbnet.shortid('badidentifier')
-Traceback (most recent call last):
-  . . .
-ValueError: vnclass identifier 'badidentifier' not found
-
-
-
-
-

Corpus View Regression Tests

+
>>> verbnet.longid('31.2')
+'admire-31.2'
+>>> verbnet.longid('admire-31.2')
+'admire-31.2'
+>>> verbnet.shortid('31.2')
+'31.2'
+>>> verbnet.shortid('admire-31.2')
+'31.2'
+>>> verbnet.longid('badidentifier')
+Traceback (most recent call last):
+  . . .
+ValueError: vnclass identifier 'badidentifier' not found
+>>> verbnet.shortid('badidentifier')
+Traceback (most recent call last):
+  . . .
+ValueError: vnclass identifier 'badidentifier' not found
+
+
+ +
+

Corpus View Regression Tests

Select some corpus files to play with:

-
-
->>> import nltk.data
->>> # A very short file (160 chars):
->>> f1 = nltk.data.find('corpora/inaugural/README')
->>> # A relatively short file (791 chars):
->>> f2 = nltk.data.find('corpora/inaugural/1793-Washington.txt')
->>> # A longer file (32k chars):
->>> f3 = nltk.data.find('corpora/inaugural/1909-Taft.txt')
->>> fileids = [f1, f2, f3]
-
-
-
-

Concatenation

+
>>> import nltk.data
+>>> # A very short file (160 chars):
+>>> f1 = nltk.data.find('corpora/inaugural/README')
+>>> # A relatively short file (791 chars):
+>>> f2 = nltk.data.find('corpora/inaugural/1793-Washington.txt')
+>>> # A longer file (32k chars):
+>>> f3 = nltk.data.find('corpora/inaugural/1909-Taft.txt')
+>>> fileids = [f1, f2, f3]
+
+
+
+
Concatenation

Check that concatenation works as intended.

-
-
->>> from nltk.corpus.reader.util import *
-
-
->>> c1 = StreamBackedCorpusView(f1, read_whitespace_block, encoding='utf-8')
->>> c2 = StreamBackedCorpusView(f2, read_whitespace_block, encoding='utf-8')
->>> c3 = StreamBackedCorpusView(f3, read_whitespace_block, encoding='utf-8')
->>> c123 = c1+c2+c3
->>> print(c123)
-['C-Span', 'Inaugural', 'Address', 'Corpus', 'US', ...]
-
-
->>> l1 = f1.open(encoding='utf-8').read().split()
->>> l2 = f2.open(encoding='utf-8').read().split()
->>> l3 = f3.open(encoding='utf-8').read().split()
->>> l123 = l1+l2+l3
-
-
->>> list(c123) == l123
-True
-
-
->>> (c1+c2+c3)[100] == l123[100]
-True
-
-
-
-
-

Slicing

+
>>> from nltk.corpus.reader.util import *
+
+
+
>>> c1 = StreamBackedCorpusView(f1, read_whitespace_block, encoding='utf-8')
+>>> c2 = StreamBackedCorpusView(f2, read_whitespace_block, encoding='utf-8')
+>>> c3 = StreamBackedCorpusView(f3, read_whitespace_block, encoding='utf-8')
+>>> c123 = c1+c2+c3
+>>> print(c123)
+['C-Span', 'Inaugural', 'Address', 'Corpus', 'US', ...]
+
+
+
>>> l1 = f1.open(encoding='utf-8').read().split()
+>>> l2 = f2.open(encoding='utf-8').read().split()
+>>> l3 = f3.open(encoding='utf-8').read().split()
+>>> l123 = l1+l2+l3
+
+
+
>>> list(c123) == l123
+True
+
+
+
>>> (c1+c2+c3)[100] == l123[100]
+True
+
+
+
+
+
Slicing

First, do some tests with fairly small slices. These will all generate tuple values.

-
-
->>> from nltk.util import LazySubsequence
->>> c1 = StreamBackedCorpusView(f1, read_whitespace_block, encoding='utf-8')
->>> l1 = f1.open(encoding='utf-8').read().split()
->>> print(len(c1))
-21
->>> len(c1) < LazySubsequence.MIN_SIZE
-True
-
-
+
>>> from nltk.util import LazySubsequence
+>>> c1 = StreamBackedCorpusView(f1, read_whitespace_block, encoding='utf-8')
+>>> l1 = f1.open(encoding='utf-8').read().split()
+>>> print(len(c1))
+21
+>>> len(c1) < LazySubsequence.MIN_SIZE
+True
+
+

Choose a list of indices, based on the length, that covers the important corner cases:

-
-
->>> indices = [-60, -30, -22, -21, -20, -1,
-...            0, 1, 10, 20, 21, 22, 30, 60]
-
-
+
>>> indices = [-60, -30, -22, -21, -20, -1,
+...            0, 1, 10, 20, 21, 22, 30, 60]
+
+

Test slicing with explicit start & stop value:

-
-
->>> for s in indices:
-...     for e in indices:
-...         assert list(c1[s:e]) == l1[s:e]
-
-
+
>>> for s in indices:
+...     for e in indices:
+...         assert list(c1[s:e]) == l1[s:e]
+
+

Test slicing with stop=None:

-
-
->>> for s in indices:
-...     assert list(c1[s:]) == l1[s:]
-
-
+
>>> for s in indices:
+...     assert list(c1[s:]) == l1[s:]
+
+

Test slicing with start=None:

-
-
->>> for e in indices:
-...     assert list(c1[:e]) == l1[:e]
-
-
+
>>> for e in indices:
+...     assert list(c1[:e]) == l1[:e]
+
+

Test slicing with start=stop=None:

-
-
->>> list(c1[:]) == list(l1[:])
-True
-
-
-

Next, we'll do some tests with much longer slices. These will +

>>> list(c1[:]) == list(l1[:])
+True
+
+
+

Next, we’ll do some tests with much longer slices. These will generate LazySubsequence objects.

-
-
->>> c3 = StreamBackedCorpusView(f3, read_whitespace_block, encoding='utf-8')
->>> l3 = f3.open(encoding='utf-8').read().split()
->>> print(len(c3))
-5430
->>> len(c3) > LazySubsequence.MIN_SIZE*2
-True
-
-
+
>>> c3 = StreamBackedCorpusView(f3, read_whitespace_block, encoding='utf-8')
+>>> l3 = f3.open(encoding='utf-8').read().split()
+>>> print(len(c3))
+5430
+>>> len(c3) > LazySubsequence.MIN_SIZE*2
+True
+
+

Choose a list of indices, based on the length, that covers the important corner cases:

-
-
->>> indices = [-12000, -6000, -5431, -5430, -5429, -3000, -200, -1,
-...            0, 1, 200, 3000, 5000, 5429, 5430, 5431, 6000, 12000]
-
-
+
>>> indices = [-12000, -6000, -5431, -5430, -5429, -3000, -200, -1,
+...            0, 1, 200, 3000, 5000, 5429, 5430, 5431, 6000, 12000]
+
+

Test slicing with explicit start & stop value:

-
-
->>> for s in indices:
-...     for e in indices:
-...         assert list(c3[s:e]) == l3[s:e]
-
-
+
>>> for s in indices:
+...     for e in indices:
+...         assert list(c3[s:e]) == l3[s:e]
+
+

Test slicing with stop=None:

-
-
->>> for s in indices:
-...     assert list(c3[s:]) == l3[s:]
-
-
+
>>> for s in indices:
+...     assert list(c3[s:]) == l3[s:]
+
+

Test slicing with start=None:

-
-
->>> for e in indices:
-...     assert list(c3[:e]) == l3[:e]
-
-
+
>>> for e in indices:
+...     assert list(c3[:e]) == l3[:e]
+
+

Test slicing with start=stop=None:

-
-
->>> list(c3[:]) == list(l3[:])
-True
-
-
-
-
-

Multiple Iterators

+
>>> list(c3[:]) == list(l3[:])
+True
+
+
+ +
+
Multiple Iterators

If multiple iterators are created for the same corpus view, their iteration can be interleaved:

-
-
->>> c3 = StreamBackedCorpusView(f3, read_whitespace_block)
->>> iterators = [c3.iterate_from(n) for n in [0,15,30,45]]
->>> for i in range(15):
-...     for iterator in iterators:
-...         print('%-15s' % next(iterator), end=' ')
-...     print()
-My              a               duties          in
-fellow          heavy           of              a
-citizens:       weight          the             proper
-Anyone          of              office          sense
-who             responsibility. upon            of
-has             If              which           the
-taken           not,            he              obligation
-the             he              is              which
-oath            has             about           the
-I               no              to              oath
-have            conception      enter,          imposes.
-just            of              or              The
-taken           the             he              office
-must            powers          is              of
-feel            and             lacking         an
-
-
-
-
-
-

SeekableUnicodeStreamReader

-

The file-like objects provided by the codecs module unfortunately +

>>> c3 = StreamBackedCorpusView(f3, read_whitespace_block)
+>>> iterators = [c3.iterate_from(n) for n in [0,15,30,45]]
+>>> for i in range(15):
+...     for iterator in iterators:
+...         print('%-15s' % next(iterator), end=' ')
+...     print()
+My              a               duties          in
+fellow          heavy           of              a
+citizens:       weight          the             proper
+Anyone          of              office          sense
+who             responsibility. upon            of
+has             If              which           the
+taken           not,            he              obligation
+the             he              is              which
+oath            has             about           the
+I               no              to              oath
+have            conception      enter,          imposes.
+just            of              or              The
+taken           the             he              office
+must            powers          is              of
+feel            and             lacking         an
+
+
+ + +
+

SeekableUnicodeStreamReader

+

The file-like objects provided by the codecs module unfortunately suffer from a bug that prevents them from working correctly with -corpus view objects. In particular, although the expose seek() -and tell() methods, those methods do not exhibit the expected +corpus view objects. In particular, although the expose seek() +and tell() methods, those methods do not exhibit the expected behavior, because they are not synchronized with the internal buffers -that are kept by the file-like objects. For example, the tell() +that are kept by the file-like objects. For example, the tell() method will return the file position at the end of the buffers (whose contents have not yet been returned by the stream); and therefore this -file position can not be used to return to the 'current' location in -the stream (since seek() has no way to reconstruct the buffers).

+file position can not be used to return to the ‘current’ location in +the stream (since seek() has no way to reconstruct the buffers).

To get around these problems, we define a new class, SeekableUnicodeStreamReader, to act as a file-like interface to files containing encoded unicode data. This class is loosely based on -the codecs.StreamReader class. To construct a new reader, we call +the codecs.StreamReader class. To construct a new reader, we call the constructor with an underlying stream and an encoding name:

-
-
->>> from io import StringIO, BytesIO
->>> from nltk.data import SeekableUnicodeStreamReader
->>> stream = BytesIO(b"""\
-... This is a test file.
-... It is encoded in ascii.
-... """.decode('ascii').encode('ascii'))
->>> reader = SeekableUnicodeStreamReader(stream, 'ascii')
-
-
+
>>> from io import StringIO, BytesIO
+>>> from nltk.data import SeekableUnicodeStreamReader
+>>> stream = BytesIO(b"""\
+... This is a test file.
+... It is encoded in ascii.
+... """.decode('ascii').encode('ascii'))
+>>> reader = SeekableUnicodeStreamReader(stream, 'ascii')
+
+

SeekableUnicodeStreamReaders support all of the normal operations supplied by a read-only stream. Note that all of the read operations -return unicode objects (not str objects).

-
-
->>> reader.read()         # read the entire file.
-u'This is a test file.\nIt is encoded in ascii.\n'
->>> reader.seek(0)        # rewind to the start.
->>> reader.read(5)        # read at most 5 bytes.
-u'This '
->>> reader.readline()     # read to the end of the line.
-u'is a test file.\n'
->>> reader.seek(0)        # rewind to the start.
->>> for line in reader:
-...     print(repr(line))      # iterate over lines
-u'This is a test file.\n'
-u'It is encoded in ascii.\n'
->>> reader.seek(0)        # rewind to the start.
->>> reader.readlines()    # read a list of line strings
-[u'This is a test file.\n', u'It is encoded in ascii.\n']
->>> reader.close()
-
-
-
-

Size argument to read()

-

The size argument to read() specifies the maximum number of +return unicode objects (not str objects).

+
>>> reader.read()         # read the entire file.
+'This is a test file.\nIt is encoded in ascii.\n'
+>>> reader.seek(0)        # rewind to the start.
+>>> reader.read(5)        # read at most 5 bytes.
+'This '
+>>> reader.readline()     # read to the end of the line.
+'is a test file.\n'
+>>> reader.seek(0)        # rewind to the start.
+>>> for line in reader:
+...     print(repr(line))      # iterate over lines
+'This is a test file.\n'
+'It is encoded in ascii.\n'
+>>> reader.seek(0)        # rewind to the start.
+>>> reader.readlines()    # read a list of line strings
+['This is a test file.\n', 'It is encoded in ascii.\n']
+>>> reader.close()
+
+
+
+
Size argument to read()
+

The size argument to read() specifies the maximum number of bytes to read, not the maximum number of characters. Thus, for encodings that use multiple bytes per character, it may return fewer -characters than the size argument:

-
-
->>> stream = BytesIO(b"""\
-... This is a test file.
-... It is encoded in utf-16.
-... """.decode('ascii').encode('utf-16'))
->>> reader = SeekableUnicodeStreamReader(stream, 'utf-16')
->>> reader.read(10)
-u'This '
-
-
+characters than the size argument:

+
>>> stream = BytesIO(b"""\
+... This is a test file.
+... It is encoded in utf-16.
+... """.decode('ascii').encode('utf-16'))
+>>> reader = SeekableUnicodeStreamReader(stream, 'utf-16')
+>>> reader.read(10)
+'This '
+
+

If a read block ends in the middle of the byte string encoding a single character, then that byte string is stored in an internal -buffer, and re-used on the next call to read(). However, if the +buffer, and re-used on the next call to read(). However, if the size argument is too small to read even a single character, even -though at least one character is available, then the read() method +though at least one character is available, then the read() method will read additional bytes until it can return a single character. -This ensures that the read() method does not return an empty +This ensures that the read() method does not return an empty string, which could be mistaken for indicating the end of the file.

-
-
->>> reader.seek(0)            # rewind to the start.
->>> reader.read(1)            # we actually need to read 4 bytes
-u'T'
->>> int(reader.tell())
-4
-
-
-

The readline() method may read more than a single line of text, in +

>>> reader.seek(0)            # rewind to the start.
+>>> reader.read(1)            # we actually need to read 4 bytes
+'T'
+>>> int(reader.tell())
+4
+
+
+

The readline() method may read more than a single line of text, in which case it stores the text that it does not return in a buffer. If this buffer is not empty, then its contents will be included in the -value returned by the next call to read(), regardless of the -size argument, since they are available without reading any new +value returned by the next call to read(), regardless of the +size argument, since they are available without reading any new bytes from the stream:

-
-
->>> reader.seek(0)            # rewind to the start.
->>> reader.readline()         # stores extra text in a buffer
-u'This is a test file.\n'
->>> print(reader.linebuffer)   # examine the buffer contents
-[u'It is encoded i']
->>> reader.read(0)            # returns the contents of the buffer
-u'It is encoded i'
->>> print(reader.linebuffer)   # examine the buffer contents
-None
-
-
-
-
-

Seek and Tell

+
>>> reader.seek(0)            # rewind to the start.
+>>> reader.readline()         # stores extra text in a buffer
+'This is a test file.\n'
+>>> print(reader.linebuffer)   # examine the buffer contents
+['It is encoded i']
+>>> reader.read(0)            # returns the contents of the buffer
+'It is encoded i'
+>>> print(reader.linebuffer)   # examine the buffer contents
+None
+
+
+
+
+
Seek and Tell

In addition to these basic read operations, -SeekableUnicodeStreamReader also supports the seek() and -tell() operations. However, some care must still be taken when +SeekableUnicodeStreamReader also supports the seek() and +tell() operations. However, some care must still be taken when using these operations. In particular, the only file offsets that -should be passed to seek() are 0 and any offset that has been -returned by tell.

-
-
->>> stream = BytesIO(b"""\
-... This is a test file.
-... It is encoded in utf-16.
-... """.decode('ascii').encode('utf-16'))
->>> reader = SeekableUnicodeStreamReader(stream, 'utf-16')
->>> reader.read(20)
-u'This is a '
->>> pos = reader.tell(); print(pos)
-22
->>> reader.read(20)
-u'test file.'
->>> reader.seek(pos)     # rewind to the position from tell.
->>> reader.read(20)
-u'test file.'
-
-
-

The seek() and tell() methods work property even when -readline() is used.

-
-
->>> stream = BytesIO(b"""\
-... This is a test file.
-... It is encoded in utf-16.
-... """.decode('ascii').encode('utf-16'))
->>> reader = SeekableUnicodeStreamReader(stream, 'utf-16')
->>> reader.readline()
-u'This is a test file.\n'
->>> pos = reader.tell(); print(pos)
-44
->>> reader.readline()
-u'It is encoded in utf-16.\n'
->>> reader.seek(pos)     # rewind to the position from tell.
->>> reader.readline()
-u'It is encoded in utf-16.\n'
-
-
-
-
-
-

Squashed Bugs

+should be passed to seek() are 0 and any offset that has been +returned by tell.

+
>>> stream = BytesIO(b"""\
+... This is a test file.
+... It is encoded in utf-16.
+... """.decode('ascii').encode('utf-16'))
+>>> reader = SeekableUnicodeStreamReader(stream, 'utf-16')
+>>> reader.read(20)
+'This is a '
+>>> pos = reader.tell(); print(pos)
+22
+>>> reader.read(20)
+'test file.'
+>>> reader.seek(pos)     # rewind to the position from tell.
+>>> reader.read(20)
+'test file.'
+
+
+

The seek() and tell() methods work property even when +readline() is used.

+
>>> stream = BytesIO(b"""\
+... This is a test file.
+... It is encoded in utf-16.
+... """.decode('ascii').encode('utf-16'))
+>>> reader = SeekableUnicodeStreamReader(stream, 'utf-16')
+>>> reader.readline()
+'This is a test file.\n'
+>>> pos = reader.tell(); print(pos)
+44
+>>> reader.readline()
+'It is encoded in utf-16.\n'
+>>> reader.seek(pos)     # rewind to the position from tell.
+>>> reader.readline()
+'It is encoded in utf-16.\n'
+
+
+ + +
+

Squashed Bugs

svn 5276 fixed a bug in the comment-stripping behavior of parse_sexpr_block.

-
-
->>> from io import StringIO
->>> from nltk.corpus.reader.util import read_sexpr_block
->>> f = StringIO(b"""
-... (a b c)
-... # This line is a comment.
-... (d e f\ng h)""".decode('ascii'))
->>> print(read_sexpr_block(f, block_size=38, comment_char='#'))
-['(a b c)']
->>> print(read_sexpr_block(f, block_size=38, comment_char='#'))
-['(d e f\ng h)']
-
-
+
>>> from io import StringIO
+>>> from nltk.corpus.reader.util import read_sexpr_block
+>>> f = StringIO(b"""
+... (a b c)
+... # This line is a comment.
+... (d e f\ng h)""".decode('ascii'))
+>>> print(read_sexpr_block(f, block_size=38, comment_char='#'))
+['(a b c)']
+>>> print(read_sexpr_block(f, block_size=38, comment_char='#'))
+['(d e f\ng h)']
+
+

svn 5277 fixed a bug in parse_sexpr_block, which would cause it to enter an infinite loop if a file ended mid-sexpr, or ended with a token that was not followed by whitespace. A related bug caused -an infinite loop if the corpus ended in an unmatched close paren -- +an infinite loop if the corpus ended in an unmatched close paren – this was fixed in svn 5279

-
-
->>> f = StringIO(b"""
-... This file ends mid-sexpr
-... (hello (world""".decode('ascii'))
->>> for i in range(3): print(read_sexpr_block(f))
-['This', 'file', 'ends', 'mid-sexpr']
-['(hello (world']
-[]
-
-
->>> f = StringIO(b"This file has no trailing whitespace.".decode('ascii'))
->>> for i in range(3): print(read_sexpr_block(f))
-['This', 'file', 'has', 'no', 'trailing']
-['whitespace.']
-[]
-
-
->>> # Bug fixed in 5279:
->>> f = StringIO(b"a b c)".decode('ascii'))
->>> for i in range(3): print(read_sexpr_block(f))
-['a', 'b']
-['c)']
-[]
-
-
+
>>> f = StringIO(b"""
+... This file ends mid-sexpr
+... (hello (world""".decode('ascii'))
+>>> for i in range(3): print(read_sexpr_block(f))
+['This', 'file', 'ends', 'mid-sexpr']
+['(hello (world']
+[]
+
+
+
>>> f = StringIO(b"This file has no trailing whitespace.".decode('ascii'))
+>>> for i in range(3): print(read_sexpr_block(f))
+['This', 'file', 'has', 'no', 'trailing']
+['whitespace.']
+[]
+
+
+
>>> # Bug fixed in 5279:
+>>> f = StringIO(b"a b c)".decode('ascii'))
+>>> for i in range(3): print(read_sexpr_block(f))
+['a', 'b']
+['c)']
+[]
+
+

svn 5624 & 5265 fixed a bug in ConcatenatedCorpusView, which caused it to return the wrong items when indexed starting at any index beyond the first file.

-
-
->>> import nltk
->>> sents = nltk.corpus.brown.sents()
->>> print(sents[6000])
-['Cholesterol', 'and', 'thyroid']
->>> print(sents[6000])
-['Cholesterol', 'and', 'thyroid']
-
-
+
>>> import nltk
+>>> sents = nltk.corpus.brown.sents()
+>>> print(sents[6000])
+['Cholesterol', 'and', 'thyroid']
+>>> print(sents[6000])
+['Cholesterol', 'and', 'thyroid']
+
+

svn 5728 fixed a bug in Categorized*CorpusReader, which caused them to return words from all files when just one file was specified.

-
-
->>> from nltk.corpus import reuters
->>> reuters.words('training/13085')
-['SNYDER', '&', 'lt', ';', 'SOI', '>', 'MAKES', ...]
->>> reuters.words('training/5082')
-['SHEPPARD', 'RESOURCES', 'TO', 'MERGE', 'WITH', ...]
-
-
+
>>> from nltk.corpus import reuters
+>>> reuters.words('training/13085')
+['SNYDER', '&', 'lt', ';', 'SOI', '>', 'MAKES', ...]
+>>> reuters.words('training/5082')
+['SHEPPARD', 'RESOURCES', 'TO', 'MERGE', 'WITH', ...]
+
+

svn 7227 fixed a bug in the qc corpus reader, which prevented access to its tuples() method

-
-
->>> from nltk.corpus import qc
->>> qc.tuples('test.txt')
-[('NUM:dist', 'How far is it from Denver to Aspen ?'), ('LOC:city', 'What county is Modesto , California in ?'), ...]
-
-
+
>>> from nltk.corpus import qc
+>>> qc.tuples('test.txt')
+[('NUM:dist', 'How far is it from Denver to Aspen ?'), ('LOC:city', 'What county is Modesto , California in ?'), ...]
+
+

Ensure that KEYWORD from comparative_sents.py no longer contains a ReDoS vulnerability.

+
>>> import re
+>>> import time
+>>> from nltk.corpus.reader.comparative_sents import KEYWORD
+>>> sizes = {
+...     "short": 4000,
+...     "long": 40000
+... }
+>>> exec_times = {
+...     "short": [],
+...     "long": [],
+... }
+>>> for size_name, size in sizes.items():
+...     for j in range(9):
+...         start_t = time.perf_counter()
+...         payload = "( " + "(" * size
+...         output = KEYWORD.findall(payload)
+...         exec_times[size_name].append(time.perf_counter() - start_t)
+...     exec_times[size_name] = sorted(exec_times[size_name])[4] # Get the median
+
+

Ideally, the execution time of such a regular expression is linear +in the length of the input. As such, we would expect exec_times[“long”] +to be roughly 10 times as big as exec_times[“short”]. +With the ReDoS in place, it took roughly 80 times as long. +For now, we accept values below 30 (times as long), due to the potential +for variance. This ensures that the ReDoS has certainly been reduced, +if not removed.

+
>>> exec_times["long"] / exec_times["short"] < 30 
+True
+
+
+ + + + + +
+
+ + + +
+ +
+ + + - + \ No newline at end of file diff --git a/howto/crubadan.html b/howto/crubadan.html index cb1fea4b3..9ef3a8189 100644 --- a/howto/crubadan.html +++ b/howto/crubadan.html @@ -1,416 +1,207 @@ - - - + - - -Crubadan Corpus Reader - + + + + + + + NLTK :: Sample usage for crubadan + + + + + + + + + + + + + + -
-

Crubadan Corpus Reader

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - + + + +
+ +
+
+ +
+

Sample usage for crubadan

+
+

Crubadan Corpus Reader

Crubadan is an NLTK corpus reader for ngram files provided by the Crubadan project. It supports several languages.

-
-
->>> from nltk.corpus import crubadan
->>> crubadan.langs() # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-['abk', 'abn',..., 'zpa', 'zul']
-
-
-
-

Language code mapping and helper methods

+
>>> from nltk.corpus import crubadan
+>>> crubadan.langs()
+['abk', 'abn',..., 'zpa', 'zul']
+
+
+
+

Language code mapping and helper methods

The web crawler that generates the 3-gram frequencies works at the -level of "writing systems" rather than languages. Writing systems +level of “writing systems” rather than languages. Writing systems are assigned internal 2-3 letter codes that require mapping to the standard ISO 639-3 codes. For more information, please refer to the README in nltk_data/crubadan folder after installing it.

-

To translate ISO 639-3 codes to "Crubadan Code":

-
-
->>> crubadan.iso_to_crubadan('eng')
-'en'
->>> crubadan.iso_to_crubadan('fra')
-'fr'
->>> crubadan.iso_to_crubadan('aaa')
-
-
+

To translate ISO 639-3 codes to “Crubadan Code”:

+
>>> crubadan.iso_to_crubadan('eng')
+'en'
+>>> crubadan.iso_to_crubadan('fra')
+'fr'
+>>> crubadan.iso_to_crubadan('aaa')
+
+

In reverse, print ISO 639-3 code if we have the Crubadan Code:

-
-
->>> crubadan.crubadan_to_iso('en')
-'eng'
->>> crubadan.crubadan_to_iso('fr')
-'fra'
->>> crubadan.crubadan_to_iso('aa')
-
-
+
>>> crubadan.crubadan_to_iso('en')
+'eng'
+>>> crubadan.crubadan_to_iso('fr')
+'fra'
+>>> crubadan.crubadan_to_iso('aa')
+
-
-

Accessing ngram frequencies

+
+
+

Accessing ngram frequencies

On initialization the reader will create a dictionary of every language supported by the Crubadan project, mapping the ISO 639-3 language code to its corresponding ngram frequency.

You can access individual language FreqDist and the ngrams within them as follows:

-
-
->>> english_fd = crubadan.lang_freq('eng')
->>> english_fd['the']
-728135
-
-
-

Above accesses the FreqDist of English and returns the frequency of the ngram 'the'. -A ngram that isn't found within the language will return 0:

-
-
->>> english_fd['sometest']
-0
-
-
-

A language that isn't supported will raise an exception:

-
-
->>> crubadan.lang_freq('elvish')
-Traceback (most recent call last):
-...
-RuntimeError: Unsupported language.
-
-
+
>>> english_fd = crubadan.lang_freq('eng')
+>>> english_fd['the']
+728135
+
+
+

Above accesses the FreqDist of English and returns the frequency of the ngram ‘the’. +A ngram that isn’t found within the language will return 0:

+
>>> english_fd['sometest']
+0
+
+

A language that isn’t supported will raise an exception:

+
>>> crubadan.lang_freq('elvish')
+Traceback (most recent call last):
+...
+RuntimeError: Unsupported language.
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/data.html b/howto/data.html index 04c2b528f..8e2e983fa 100644 --- a/howto/data.html +++ b/howto/data.html @@ -1,554 +1,275 @@ - - - + - - -Loading Resources From the Data Package - + + + + + + + NLTK :: Sample usage for data + + + + + + + + + + + + + + -
-

Loading Resources From the Data Package

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> import nltk.data
-
-
-
-

Overview

+ + + +
+ +
+
+ +
+

Sample usage for data

+
+

Loading Resources From the Data Package

+
>>> import nltk.data
+
+
+
+

Overview

The nltk.data module contains functions that can be used to load NLTK resource files, such as corpora, grammars, and saved processing objects.

-
-
-

Loading Data Files

+ +
+

Loading Data Files

Resources are loaded using the function nltk.data.load(), which takes as its first argument a URL specifying what file should be -loaded. The nltk: protocol loads files from the NLTK data +loaded. The nltk: protocol loads files from the NLTK data distribution:

-
-
->>> from __future__ import print_function
->>> tokenizer = nltk.data.load('nltk:tokenizers/punkt/english.pickle')
->>> tokenizer.tokenize('Hello.  This is a test.  It works!')
-['Hello.', 'This is a test.', 'It works!']
-
-
+
>>> tokenizer = nltk.data.load('nltk:tokenizers/punkt/english.pickle')
+>>> tokenizer.tokenize('Hello.  This is a test.  It works!')
+['Hello.', 'This is a test.', 'It works!']
+
+

It is important to note that there should be no space following the -colon (':') in the URL; 'nltk: tokenizers/punkt/english.pickle' will +colon (‘:’) in the URL; ‘nltk: tokenizers/punkt/english.pickle’ will not work!

-

The nltk: protocol is used by default if no protocol is specified:

-
-
->>> nltk.data.load('tokenizers/punkt/english.pickle') # doctest: +ELLIPSIS
-<nltk.tokenize.punkt.PunktSentenceTokenizer object at ...>
-
-
-

But it is also possible to load resources from http:, ftp:, -and file: URLs, e.g. cfg = nltk.data.load('http://example.com/path/to/toy.cfg')

-
-
->>> # Load a grammar using an absolute path.
->>> url = 'file:%s' % nltk.data.find('grammars/sample_grammars/toy.cfg')
->>> url.replace('\\', '/') # doctest: +ELLIPSIS
-'file:...toy.cfg'
->>> print(nltk.data.load(url)) # doctest: +ELLIPSIS
-Grammar with 14 productions (start state = S)
-    S -> NP VP
-    PP -> P NP
-    ...
-    P -> 'on'
-    P -> 'in'
-
-
+

The nltk: protocol is used by default if no protocol is specified:

+
>>> nltk.data.load('tokenizers/punkt/english.pickle')
+<nltk.tokenize.punkt.PunktSentenceTokenizer object at ...>
+
+
+

But it is also possible to load resources from http:, ftp:, +and file: URLs, e.g. cfg = nltk.data.load('https://example.com/path/to/toy.cfg')

+
>>> # Load a grammar using an absolute path.
+>>> url = 'file:%s' % nltk.data.find('grammars/sample_grammars/toy.cfg')
+>>> url.replace('\\', '/')
+'file:...toy.cfg'
+>>> print(nltk.data.load(url))
+Grammar with 14 productions (start state = S)
+    S -> NP VP
+    PP -> P NP
+    ...
+    P -> 'on'
+    P -> 'in'
+
+

The second argument to the nltk.data.load() function specifies the -file format, which determines how the file's contents are processed -before they are returned by load(). The formats that are +file format, which determines how the file’s contents are processed +before they are returned by load(). The formats that are currently supported by the data module are described by the dictionary nltk.data.FORMATS:

-
-
->>> for format, descr in sorted(nltk.data.FORMATS.items()):
-...     print('{0:<7} {1:}'.format(format, descr)) # doctest: +NORMALIZE_WHITESPACE
-cfg     A context free grammar.
-fcfg    A feature CFG.
-fol     A list of first order logic expressions, parsed with
-nltk.sem.logic.Expression.fromstring.
-json    A serialized python object, stored using the json module.
-logic   A list of first order logic expressions, parsed with
-nltk.sem.logic.LogicParser.  Requires an additional logic_parser
-parameter
-pcfg    A probabilistic CFG.
-pickle  A serialized python object, stored using the pickle
-module.
-raw     The raw (byte string) contents of a file.
-text    The raw (unicode string) contents of a file.
-val     A semantic valuation, parsed by
-nltk.sem.Valuation.fromstring.
-yaml    A serialized python object, stored using the yaml module.
-
-
+
>>> for format, descr in sorted(nltk.data.FORMATS.items()):
+...     print('{0:<7} {1:}'.format(format, descr))
+cfg     A context free grammar.
+fcfg    A feature CFG.
+fol     A list of first order logic expressions, parsed with
+nltk.sem.logic.Expression.fromstring.
+json    A serialized python object, stored using the json module.
+logic   A list of first order logic expressions, parsed with
+nltk.sem.logic.LogicParser.  Requires an additional logic_parser
+parameter
+pcfg    A probabilistic CFG.
+pickle  A serialized python object, stored using the pickle
+module.
+raw     The raw (byte string) contents of a file.
+text    The raw (unicode string) contents of a file.
+val     A semantic valuation, parsed by
+nltk.sem.Valuation.fromstring.
+yaml    A serialized python object, stored using the yaml module.
+
+

nltk.data.load() will raise a ValueError if a bad format name is specified:

-
-
->>> nltk.data.load('grammars/sample_grammars/toy.cfg', 'bar')
-Traceback (most recent call last):
-  . . .
-ValueError: Unknown format type!
-
-
-

By default, the "auto" format is used, which chooses a format -based on the filename's extension. The mapping from file extensions +

>>> nltk.data.load('grammars/sample_grammars/toy.cfg', 'bar')
+Traceback (most recent call last):
+  . . .
+ValueError: Unknown format type!
+
+
+

By default, the "auto" format is used, which chooses a format +based on the filename’s extension. The mapping from file extensions to format names is specified by nltk.data.AUTO_FORMATS:

-
-
->>> for ext, format in sorted(nltk.data.AUTO_FORMATS.items()):
-...     print('.%-7s -> %s' % (ext, format))
-.cfg     -> cfg
-.fcfg    -> fcfg
-.fol     -> fol
-.json    -> json
-.logic   -> logic
-.pcfg    -> pcfg
-.pickle  -> pickle
-.text    -> text
-.txt     -> text
-.val     -> val
-.yaml    -> yaml
-
-
+
>>> for ext, format in sorted(nltk.data.AUTO_FORMATS.items()):
+...     print('.%-7s -> %s' % (ext, format))
+.cfg     -> cfg
+.fcfg    -> fcfg
+.fol     -> fol
+.json    -> json
+.logic   -> logic
+.pcfg    -> pcfg
+.pickle  -> pickle
+.text    -> text
+.txt     -> text
+.val     -> val
+.yaml    -> yaml
+
+

If nltk.data.load() is unable to determine the format based on the -filename's extension, it will raise a ValueError:

-
-
->>> nltk.data.load('foo.bar')
-Traceback (most recent call last):
-  . . .
-ValueError: Could not determine format for foo.bar based on its file
-extension; use the "format" argument to specify the format explicitly.
-
-
-

Note that by explicitly specifying the format argument, you can -override the load method's default processing behavior. For example, -to get the raw contents of any file, simply use format="raw":

-
-
->>> s = nltk.data.load('grammars/sample_grammars/toy.cfg', 'text')
->>> print(s) # doctest: +ELLIPSIS
-S -> NP VP
-PP -> P NP
-NP -> Det N | NP PP
-VP -> V NP | VP PP
-...
-
-
-
-
-

Making Local Copies

- +filename’s extension, it will raise a ValueError:

+
>>> nltk.data.load('foo.bar')
+Traceback (most recent call last):
+  . . .
+ValueError: Could not determine format for foo.bar based on its file
+extension; use the "format" argument to specify the format explicitly.
+
+
+

Note that by explicitly specifying the format argument, you can +override the load method’s default processing behavior. For example, +to get the raw contents of any file, simply use format="raw":

+
>>> s = nltk.data.load('grammars/sample_grammars/toy.cfg', 'text')
+>>> print(s)
+S -> NP VP
+PP -> P NP
+NP -> Det N | NP PP
+VP -> V NP | VP PP
+...
+
+
+ +
+

Making Local Copies

The function nltk.data.retrieve() copies a given resource to a local file. This can be useful, for example, if you want to edit one of the sample grammars.

-
-
->>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg')
-Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy.cfg'
-
-
->>> # Simulate editing the grammar.
->>> with open('toy.cfg') as inp:
-...     s = inp.read().replace('NP', 'DP')
->>> with open('toy.cfg', 'w') as out:
-...     _bytes_written = out.write(s)
-
-
->>> # Load the edited grammar, & display it.
->>> cfg = nltk.data.load('file:///' + os.path.abspath('toy.cfg'))
->>> print(cfg) # doctest: +ELLIPSIS
-Grammar with 14 productions (start state = S)
-    S -> DP VP
-    PP -> P DP
-    ...
-    P -> 'on'
-    P -> 'in'
-
-
+
>>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg')
+Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy.cfg'
+
+
+
>>> # Simulate editing the grammar.
+>>> with open('toy.cfg') as inp:
+...     s = inp.read().replace('NP', 'DP')
+>>> with open('toy.cfg', 'w') as out:
+...     _bytes_written = out.write(s)
+
+
+
>>> # Load the edited grammar, & display it.
+>>> cfg = nltk.data.load('file:///' + os.path.abspath('toy.cfg'))
+>>> print(cfg)
+Grammar with 14 productions (start state = S)
+    S -> DP VP
+    PP -> P DP
+    ...
+    P -> 'on'
+    P -> 'in'
+
+

The second argument to nltk.data.retrieve() specifies the filename -for the new copy of the file. By default, the source file's filename +for the new copy of the file. By default, the source file’s filename is used.

-
-
->>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg', 'mytoy.cfg')
-Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'mytoy.cfg'
->>> os.path.isfile('./mytoy.cfg')
-True
->>> nltk.data.retrieve('grammars/sample_grammars/np.fcfg')
-Retrieving 'nltk:grammars/sample_grammars/np.fcfg', saving to 'np.fcfg'
->>> os.path.isfile('./np.fcfg')
-True
-
-
+
>>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg', 'mytoy.cfg')
+Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'mytoy.cfg'
+>>> os.path.isfile('./mytoy.cfg')
+True
+>>> nltk.data.retrieve('grammars/sample_grammars/np.fcfg')
+Retrieving 'nltk:grammars/sample_grammars/np.fcfg', saving to 'np.fcfg'
+>>> os.path.isfile('./np.fcfg')
+True
+
+

If a file with the specified (or default) filename already exists in the current directory, then nltk.data.retrieve() will raise a ValueError exception. It will not overwrite the file:

-
-
->>> os.path.isfile('./toy.cfg')
-True
->>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg') # doctest: +ELLIPSIS
-Traceback (most recent call last):
-  . . .
-ValueError: File '...toy.cfg' already exists!
-
-
- -
-
-

Finding Files in the NLTK Data Package

+
>>> os.path.isfile('./toy.cfg')
+True
+>>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg')
+Traceback (most recent call last):
+  . . .
+ValueError: File '...toy.cfg' already exists!
+
+
+ +
+

Finding Files in the NLTK Data Package

The nltk.data.find() function searches the NLTK data package for a given file, and returns a pointer to that file. This pointer can either be a FileSystemPathPointer (whose path attribute gives the @@ -556,171 +277,147 @@

Finding Files in the NLTK Data Package

zipfile and the name of an entry within that zipfile. Both pointer types define the open() method, which can be used to read the string contents of the file.

-
-
->>> path = nltk.data.find('corpora/abc/rural.txt')
->>> str(path) # doctest: +ELLIPSIS
-'...rural.txt'
->>> print(path.open().read(60).decode())
-PM denies knowledge of AWB kickbacks
-The Prime Minister has
-
-
+
>>> path = nltk.data.find('corpora/abc/rural.txt')
+>>> str(path)
+'...rural.txt'
+>>> print(path.open().read(60).decode())
+PM denies knowledge of AWB kickbacks
+The Prime Minister has
+
+

Alternatively, the nltk.data.load() function can be used with the -keyword argument format="raw":

-
-
->>> s = nltk.data.load('corpora/abc/rural.txt', format='raw')[:60]
->>> print(s.decode())
-PM denies knowledge of AWB kickbacks
-The Prime Minister has
-
-
-

Alternatively, you can use the keyword argument format="text":

-
-
->>> s = nltk.data.load('corpora/abc/rural.txt', format='text')[:60]
->>> print(s)
-PM denies knowledge of AWB kickbacks
-The Prime Minister has
-
-
-
-
-

Resource Caching

+keyword argument format="raw":

+
>>> s = nltk.data.load('corpora/abc/rural.txt', format='raw')[:60]
+>>> print(s.decode())
+PM denies knowledge of AWB kickbacks
+The Prime Minister has
+
+
+

Alternatively, you can use the keyword argument format="text":

+
>>> s = nltk.data.load('corpora/abc/rural.txt', format='text')[:60]
+>>> print(s)
+PM denies knowledge of AWB kickbacks
+The Prime Minister has
+
+
+ +
+

Resource Caching

NLTK uses a weakref dictionary to maintain a cache of resources that have been loaded. If you load a resource that is already stored in the cache, then the cached copy will be returned. This behavior can be seen by the trace output generated when verbose=True:

-
-
->>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg', verbose=True)
-<<Loading nltk:grammars/book_grammars/feat0.fcfg>>
->>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg', verbose=True)
-<<Using cached copy of nltk:grammars/book_grammars/feat0.fcfg>>
-
-
+
>>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg', verbose=True)
+<<Loading nltk:grammars/book_grammars/feat0.fcfg>>
+>>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg', verbose=True)
+<<Using cached copy of nltk:grammars/book_grammars/feat0.fcfg>>
+
+

If you wish to load a resource from its source, bypassing the cache, -use the cache=False argument to nltk.data.load(). This can be +use the cache=False argument to nltk.data.load(). This can be useful, for example, if the resource is loaded from a local file, and you are actively editing that file:

-
-
->>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg',cache=False,verbose=True)
-<<Loading nltk:grammars/book_grammars/feat0.fcfg>>
-
-
+
>>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg',cache=False,verbose=True)
+<<Loading nltk:grammars/book_grammars/feat0.fcfg>>
+
+

The cache no longer uses weak references. A resource will not be automatically expunged from the cache when no more objects are using -it. In the following example, when we clear the variable feat0, +it. In the following example, when we clear the variable feat0, the reference count for the feature grammar object drops to zero. However, the object remains cached:

-
-
->>> del feat0
->>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg',
-...                        verbose=True)
-<<Using cached copy of nltk:grammars/book_grammars/feat0.fcfg>>
-
-
+
>>> del feat0
+>>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg',
+...                        verbose=True)
+<<Using cached copy of nltk:grammars/book_grammars/feat0.fcfg>>
+
+

You can clear the entire contents of the cache, using nltk.data.clear_cache():

-
-
->>> nltk.data.clear_cache()
-
-
-
-
-

Retrieving other Data Sources

-
-
->>> formulas = nltk.data.load('grammars/book_grammars/background.fol')
->>> for f in formulas: print(str(f))
-all x.(boxerdog(x) -> dog(x))
-all x.(boxer(x) -> person(x))
-all x.-(dog(x) & person(x))
-all x.(married(x) <-> exists y.marry(x,y))
-all x.(bark(x) -> dog(x))
-all x y.(marry(x,y) -> (person(x) & person(y)))
--(Vincent = Mia)
--(Vincent = Fido)
--(Mia = Fido)
-
-
-
-
-

Regression Tests

+
>>> nltk.data.clear_cache()
+
+
+ +
+

Retrieving other Data Sources

+
>>> formulas = nltk.data.load('grammars/book_grammars/background.fol')
+>>> for f in formulas: print(str(f))
+all x.(boxerdog(x) -> dog(x))
+all x.(boxer(x) -> person(x))
+all x.-(dog(x) & person(x))
+all x.(married(x) <-> exists y.marry(x,y))
+all x.(bark(x) -> dog(x))
+all x y.(marry(x,y) -> (person(x) & person(y)))
+-(Vincent = Mia)
+-(Vincent = Fido)
+-(Mia = Fido)
+
+
+
+
+

Regression Tests

Create a temp dir for tests that write files:

-
-
->>> import tempfile, os
->>> tempdir = tempfile.mkdtemp()
->>> old_dir = os.path.abspath('.')
->>> os.chdir(tempdir)
-
-
+
>>> import tempfile, os
+>>> tempdir = tempfile.mkdtemp()
+>>> old_dir = os.path.abspath('.')
+>>> os.chdir(tempdir)
+
+

The retrieve() function accepts all url types:

-
-
->>> urls = ['https://raw.githubusercontent.com/nltk/nltk/develop/nltk/test/toy.cfg',
-...         'file:%s' % nltk.data.find('grammars/sample_grammars/toy.cfg'),
-...         'nltk:grammars/sample_grammars/toy.cfg',
-...         'grammars/sample_grammars/toy.cfg']
->>> for i, url in enumerate(urls):
-...     nltk.data.retrieve(url, 'toy-%d.cfg' % i) # doctest: +ELLIPSIS
-Retrieving 'https://raw.githubusercontent.com/nltk/nltk/develop/nltk/test/toy.cfg', saving to 'toy-0.cfg'
-Retrieving 'file:...toy.cfg', saving to 'toy-1.cfg'
-Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy-2.cfg'
-Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy-3.cfg'
-
-
+
>>> urls = ['https://raw.githubusercontent.com/nltk/nltk/develop/nltk/test/toy.cfg',
+...         'file:%s' % nltk.data.find('grammars/sample_grammars/toy.cfg'),
+...         'nltk:grammars/sample_grammars/toy.cfg',
+...         'grammars/sample_grammars/toy.cfg']
+>>> for i, url in enumerate(urls):
+...     nltk.data.retrieve(url, 'toy-%d.cfg' % i)
+Retrieving 'https://raw.githubusercontent.com/nltk/nltk/develop/nltk/test/toy.cfg', saving to 'toy-0.cfg'
+Retrieving 'file:...toy.cfg', saving to 'toy-1.cfg'
+Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy-2.cfg'
+Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy-3.cfg'
+
+

Clean up the temp dir:

-
-
->>> os.chdir(old_dir)
->>> for f in os.listdir(tempdir):
-...     os.remove(os.path.join(tempdir, f))
->>> os.rmdir(tempdir)
-
-
-
-

Lazy Loader

+
>>> os.chdir(old_dir)
+>>> for f in os.listdir(tempdir):
+...     os.remove(os.path.join(tempdir, f))
+>>> os.rmdir(tempdir)
+
+
+
+

Lazy Loader

A lazy loader is a wrapper object that defers loading a resource until it is accessed or used in any way. This is mainly intended for -internal use by NLTK's corpus readers.

-
-
->>> # Create a lazy loader for toy.cfg.
->>> ll = nltk.data.LazyLoader('grammars/sample_grammars/toy.cfg')
-
-
->>> # Show that it's not loaded yet:
->>> object.__repr__(ll) # doctest: +ELLIPSIS
-'<nltk.data.LazyLoader object at ...>'
-
-
->>> # printing it is enough to cause it to be loaded:
->>> print(ll)
-<Grammar with 14 productions>
-
-
->>> # Show that it's now been loaded:
->>> object.__repr__(ll) # doctest: +ELLIPSIS
-'<nltk.grammar.CFG object at ...>'
-
-
->>> # Test that accessing an attribute also loads it:
->>> ll = nltk.data.LazyLoader('grammars/sample_grammars/toy.cfg')
->>> ll.start()
-S
->>> object.__repr__(ll) # doctest: +ELLIPSIS
-'<nltk.grammar.CFG object at ...>'
-
-
-
-
-

Buffered Gzip Reading and Writing

+internal use by NLTK’s corpus readers.

+
>>> # Create a lazy loader for toy.cfg.
+>>> ll = nltk.data.LazyLoader('grammars/sample_grammars/toy.cfg')
+
+
+
>>> # Show that it's not loaded yet:
+>>> object.__repr__(ll)
+'<nltk.data.LazyLoader object at ...>'
+
+
+
>>> # printing it is enough to cause it to be loaded:
+>>> print(ll)
+<Grammar with 14 productions>
+
+
+
>>> # Show that it's now been loaded:
+>>> object.__repr__(ll)
+'<nltk.grammar.CFG object at ...>'
+
+
+
>>> # Test that accessing an attribute also loads it:
+>>> ll = nltk.data.LazyLoader('grammars/sample_grammars/toy.cfg')
+>>> ll.start()
+S
+>>> object.__repr__(ll)
+'<nltk.grammar.CFG object at ...>'
+
+
+
+
+

Buffered Gzip Reading and Writing

Write performance to gzip-compressed is extremely poor when the files become large. File creation can become a bottleneck in those cases.

Read performance from large gzipped pickle files was improved in data.py by @@ -729,54 +426,96 @@

Buffered Gzip Reading and Writing

This is mainly intended for internal use. The test simply tests that reading and writing work as intended and does not test how much improvement buffering provides.

-
-
->>> from nltk.compat import StringIO
->>> test = nltk.data.BufferedGzipFile('testbuf.gz', 'wb', size=2**10)
->>> ans = []
->>> for i in range(10000):
-...     ans.append(str(i).encode('ascii'))
-...     test.write(str(i).encode('ascii'))
->>> test.close()
->>> test = nltk.data.BufferedGzipFile('testbuf.gz', 'rb')
->>> test.read() == b''.join(ans)
-True
->>> test.close()
->>> import os
->>> os.unlink('testbuf.gz')
-
-
-
-
-

JSON Encoding and Decoding

-

JSON serialization is used instead of pickle for some classes.

-
-
->>> from nltk import jsontags
->>> from nltk.jsontags import JSONTaggedEncoder, JSONTaggedDecoder, register_tag
->>> @jsontags.register_tag
-... class JSONSerializable:
-...     json_tag = 'JSONSerializable'
-...
-...     def __init__(self, n):
-...         self.n = n
-...
-...     def encode_json_obj(self):
-...         return self.n
-...
-...     @classmethod
-...     def decode_json_obj(cls, obj):
-...         n = obj
-...         return cls(n)
-...
->>> JSONTaggedEncoder().encode(JSONSerializable(1))
-'{"!JSONSerializable": 1}'
->>> JSONTaggedDecoder().decode('{"!JSONSerializable": 1}').n
-1
-
-
+
>>> from io import StringIO
+>>> test = nltk.data.BufferedGzipFile('testbuf.gz', 'wb', size=2**10)
+>>> ans = []
+>>> for i in range(10000):
+...     ans.append(str(i).encode('ascii'))
+...     test.write(str(i).encode('ascii'))
+>>> test.close()
+>>> test = nltk.data.BufferedGzipFile('testbuf.gz', 'rb')
+>>> test.read() == b''.join(ans)
+True
+>>> test.close()
+>>> import os
+>>> os.unlink('testbuf.gz')
+
+ +
+

JSON Encoding and Decoding

+

JSON serialization is used instead of pickle for some classes.

+
>>> from nltk import jsontags
+>>> from nltk.jsontags import JSONTaggedEncoder, JSONTaggedDecoder, register_tag
+>>> @jsontags.register_tag
+... class JSONSerializable:
+...     json_tag = 'JSONSerializable'
+...
+...     def __init__(self, n):
+...         self.n = n
+...
+...     def encode_json_obj(self):
+...         return self.n
+...
+...     @classmethod
+...     def decode_json_obj(cls, obj):
+...         n = obj
+...         return cls(n)
+...
+>>> JSONTaggedEncoder().encode(JSONSerializable(1))
+'{"!JSONSerializable": 1}'
+>>> JSONTaggedDecoder().decode('{"!JSONSerializable": 1}').n
+1
+
+
+ + + + + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/dependency.html b/howto/dependency.html index 1663a31c6..58c35a9ca 100644 --- a/howto/dependency.html +++ b/howto/dependency.html @@ -1,547 +1,399 @@ - - - + - - -Dependency Grammars - + + + + + + + NLTK :: Sample usage for dependency + + + + + + + + + + + + + + -
-

Dependency Grammars

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> from nltk.grammar import DependencyGrammar
->>> from nltk.parse import (
-...     DependencyGraph,
-...     ProjectiveDependencyParser,
-...     NonprojectiveDependencyParser,
-... )
-
-
-
-

CoNLL Data

-
-
->>> treebank_data = """Pierre  NNP     2       NMOD
-... Vinken  NNP     8       SUB
-... ,       ,       2       P
-... 61      CD      5       NMOD
-... years   NNS     6       AMOD
-... old     JJ      2       NMOD
-... ,       ,       2       P
-... will    MD      0       ROOT
-... join    VB      8       VC
-... the     DT      11      NMOD
-... board   NN      9       OBJ
-... as      IN      9       VMOD
-... a       DT      15      NMOD
-... nonexecutive    JJ      15      NMOD
-... director        NN      12      PMOD
-... Nov.    NNP     9       VMOD
-... 29      CD      16      NMOD
-... .       .       9       VMOD
-... """
-
-
->>> dg = DependencyGraph(treebank_data)
->>> dg.tree().pprint()
-(will
-  (Vinken Pierre , (old (years 61)) ,)
-  (join (board the) (as (director a nonexecutive)) (Nov. 29) .))
->>> for head, rel, dep in dg.triples():
-...     print(
-...         '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})'
-...         .format(h=head, r=rel, d=dep)
-...     )
-(will, MD), SUB, (Vinken, NNP)
-(Vinken, NNP), NMOD, (Pierre, NNP)
-(Vinken, NNP), P, (,, ,)
-(Vinken, NNP), NMOD, (old, JJ)
-(old, JJ), AMOD, (years, NNS)
-(years, NNS), NMOD, (61, CD)
-(Vinken, NNP), P, (,, ,)
-(will, MD), VC, (join, VB)
-(join, VB), OBJ, (board, NN)
-(board, NN), NMOD, (the, DT)
-(join, VB), VMOD, (as, IN)
-(as, IN), PMOD, (director, NN)
-(director, NN), NMOD, (a, DT)
-(director, NN), NMOD, (nonexecutive, JJ)
-(join, VB), VMOD, (Nov., NNP)
-(Nov., NNP), NMOD, (29, CD)
-(join, VB), VMOD, (., .)
-
-
+ + + +
+ +
+
+ +
+

Sample usage for dependency

+
+

Dependency Grammars

+
>>> from nltk.grammar import DependencyGrammar
+>>> from nltk.parse import (
+...     DependencyGraph,
+...     ProjectiveDependencyParser,
+...     NonprojectiveDependencyParser,
+... )
+
+
+
+

CoNLL Data

+
>>> treebank_data = """Pierre  NNP     2       NMOD
+... Vinken  NNP     8       SUB
+... ,       ,       2       P
+... 61      CD      5       NMOD
+... years   NNS     6       AMOD
+... old     JJ      2       NMOD
+... ,       ,       2       P
+... will    MD      0       ROOT
+... join    VB      8       VC
+... the     DT      11      NMOD
+... board   NN      9       OBJ
+... as      IN      9       VMOD
+... a       DT      15      NMOD
+... nonexecutive    JJ      15      NMOD
+... director        NN      12      PMOD
+... Nov.    NNP     9       VMOD
+... 29      CD      16      NMOD
+... .       .       9       VMOD
+... """
+
+
+
>>> dg = DependencyGraph(treebank_data)
+>>> dg.tree().pprint()
+(will
+  (Vinken Pierre , (old (years 61)) ,)
+  (join (board the) (as (director a nonexecutive)) (Nov. 29) .))
+>>> for head, rel, dep in dg.triples():
+...     print(
+...         '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})'
+...         .format(h=head, r=rel, d=dep)
+...     )
+(will, MD), SUB, (Vinken, NNP)
+(Vinken, NNP), NMOD, (Pierre, NNP)
+(Vinken, NNP), P, (,, ,)
+(Vinken, NNP), NMOD, (old, JJ)
+(old, JJ), AMOD, (years, NNS)
+(years, NNS), NMOD, (61, CD)
+(Vinken, NNP), P, (,, ,)
+(will, MD), VC, (join, VB)
+(join, VB), OBJ, (board, NN)
+(board, NN), NMOD, (the, DT)
+(join, VB), VMOD, (as, IN)
+(as, IN), PMOD, (director, NN)
+(director, NN), NMOD, (a, DT)
+(director, NN), NMOD, (nonexecutive, JJ)
+(join, VB), VMOD, (Nov., NNP)
+(Nov., NNP), NMOD, (29, CD)
+(join, VB), VMOD, (., .)
+
+
+

Using a custom cell extractor.

+
>>> def custom_extractor(cells):
+...     _, tag, head, rel = cells
+...     return 'spam', 'spam', tag, tag, '', head, rel
+>>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor)
+>>> dg.tree().pprint()
+(spam
+  (spam spam spam (spam (spam spam)) spam)
+  (spam (spam spam) (spam (spam spam spam)) (spam spam) spam))
+
+
+

Custom cell extractors can take in and return an index.

+
>>> def custom_extractor(cells, index):
+...     word, tag, head, rel = cells
+...     return (index, '{}-{}'.format(word, index), word,
+...             tag, tag, '', head, rel)
+>>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor)
+>>> dg.tree().pprint()
+(will-8
+  (Vinken-2 Pierre-1 ,-3 (old-6 (years-5 61-4)) ,-7)
+  (join-9
+    (board-11 the-10)
+    (as-12 (director-15 a-13 nonexecutive-14))
+    (Nov.-16 29-17)
+    .-18))
+
+

Using the dependency-parsed version of the Penn Treebank corpus sample.

-
-
->>> from nltk.corpus import dependency_treebank
->>> t = dependency_treebank.parsed_sents()[0]
->>> print(t.to_conll(3))  # doctest: +NORMALIZE_WHITESPACE
-Pierre      NNP     2
-Vinken      NNP     8
-,   ,       2
-61  CD      5
-years       NNS     6
-old JJ      2
-,   ,       2
-will        MD      0
-join        VB      8
-the DT      11
-board       NN      9
-as  IN      9
-a   DT      15
-nonexecutive        JJ      15
-director    NN      12
-Nov.        NNP     9
-29  CD      16
-.   .       8
-
-
+
>>> from nltk.corpus import dependency_treebank
+>>> t = dependency_treebank.parsed_sents()[0]
+>>> print(t.to_conll(3))
+Pierre      NNP     2
+Vinken      NNP     8
+,   ,       2
+61  CD      5
+years       NNS     6
+old JJ      2
+,   ,       2
+will        MD      0
+join        VB      8
+the DT      11
+board       NN      9
+as  IN      9
+a   DT      15
+nonexecutive        JJ      15
+director    NN      12
+Nov.        NNP     9
+29  CD      16
+.   .       8
+
+

Using the output of zpar (like Malt-TAB but with zero-based indexing)

-
-
->>> zpar_data = """
-... Pierre  NNP     1       NMOD
-... Vinken  NNP     7       SUB
-... ,       ,       1       P
-... 61      CD      4       NMOD
-... years   NNS     5       AMOD
-... old     JJ      1       NMOD
-... ,       ,       1       P
-... will    MD      -1      ROOT
-... join    VB      7       VC
-... the     DT      10      NMOD
-... board   NN      8       OBJ
-... as      IN      8       VMOD
-... a       DT      14      NMOD
-... nonexecutive    JJ      14      NMOD
-... director        NN      11      PMOD
-... Nov.    NNP     8       VMOD
-... 29      CD      15      NMOD
-... .       .       7       P
-... """
-
-
->>> zdg = DependencyGraph(zpar_data, zero_based=True)
->>> print(zdg.tree())
-(will
-  (Vinken Pierre , (old (years 61)) ,)
-  (join (board the) (as (director a nonexecutive)) (Nov. 29))
-  .)
-
-
+
>>> zpar_data = """
+... Pierre  NNP     1       NMOD
+... Vinken  NNP     7       SUB
+... ,       ,       1       P
+... 61      CD      4       NMOD
+... years   NNS     5       AMOD
+... old     JJ      1       NMOD
+... ,       ,       1       P
+... will    MD      -1      ROOT
+... join    VB      7       VC
+... the     DT      10      NMOD
+... board   NN      8       OBJ
+... as      IN      8       VMOD
+... a       DT      14      NMOD
+... nonexecutive    JJ      14      NMOD
+... director        NN      11      PMOD
+... Nov.    NNP     8       VMOD
+... 29      CD      15      NMOD
+... .       .       7       P
+... """
+
-
-

Projective Dependency Parsing

-
-
->>> grammar = DependencyGrammar.fromstring("""
-... 'fell' -> 'price' | 'stock'
-... 'price' -> 'of' 'the'
-... 'of' -> 'stock'
-... 'stock' -> 'the'
-... """)
->>> print(grammar)
-Dependency grammar with 5 productions
-  'fell' -> 'price'
-  'fell' -> 'stock'
-  'price' -> 'of' 'the'
-  'of' -> 'stock'
-  'stock' -> 'the'
-
-
->>> dp = ProjectiveDependencyParser(grammar)
->>> for t in sorted(dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])):
-...     print(t)
-(fell (price the (of (stock the))))
-(fell (price the of) (stock the))
-(fell (price the of the) stock)
-
-
+
>>> zdg = DependencyGraph(zpar_data, zero_based=True)
+>>> print(zdg.tree())
+(will
+  (Vinken Pierre , (old (years 61)) ,)
+  (join (board the) (as (director a nonexecutive)) (Nov. 29))
+  .)
+
-
-

Non-Projective Dependency Parsing

-
-
->>> grammar = DependencyGrammar.fromstring("""
-... 'taught' -> 'play' | 'man'
-... 'man' -> 'the'
-... 'play' -> 'golf' | 'dog' | 'to'
-... 'dog' -> 'his'
-... """)
->>> print(grammar)
-Dependency grammar with 7 productions
-  'taught' -> 'play'
-  'taught' -> 'man'
-  'man' -> 'the'
-  'play' -> 'golf'
-  'play' -> 'dog'
-  'play' -> 'to'
-  'dog' -> 'his'
-
-
->>> dp = NonprojectiveDependencyParser(grammar)
->>> g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])
-
-
->>> print(g.root['word'])
-taught
-
-
->>> for _, node in sorted(g.nodes.items()):
-...     if node['word'] is not None:
-...         print('{address} {word}: {d}'.format(d=node['deps'][''], **node))
-1 the: []
-2 man: [1]
-3 taught: [2, 7]
-4 his: []
-5 dog: [4]
-6 to: []
-7 play: [5, 6, 8]
-8 golf: []
-
-
->>> print(g.tree())
-(taught (man the) (play (dog his) to golf))
-
-
+
+
+

Projective Dependency Parsing

+
>>> grammar = DependencyGrammar.fromstring("""
+... 'fell' -> 'price' | 'stock'
+... 'price' -> 'of' 'the'
+... 'of' -> 'stock'
+... 'stock' -> 'the'
+... """)
+>>> print(grammar)
+Dependency grammar with 5 productions
+  'fell' -> 'price'
+  'fell' -> 'stock'
+  'price' -> 'of' 'the'
+  'of' -> 'stock'
+  'stock' -> 'the'
+
+
>>> dp = ProjectiveDependencyParser(grammar)
+>>> for t in sorted(dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])):
+...     print(t)
+(fell (price the (of (stock the))))
+(fell (price the of) (stock the))
+(fell (price the of the) stock)
+
+
+
+

Non-Projective Dependency Parsing

+
>>> grammar = DependencyGrammar.fromstring("""
+... 'taught' -> 'play' | 'man'
+... 'man' -> 'the'
+... 'play' -> 'golf' | 'dog' | 'to'
+... 'dog' -> 'his'
+... """)
+>>> print(grammar)
+Dependency grammar with 7 productions
+  'taught' -> 'play'
+  'taught' -> 'man'
+  'man' -> 'the'
+  'play' -> 'golf'
+  'play' -> 'dog'
+  'play' -> 'to'
+  'dog' -> 'his'
+
+
+
>>> dp = NonprojectiveDependencyParser(grammar)
+>>> g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])
+
+
+
>>> print(g.root['word'])
+taught
+
+
+
>>> for _, node in sorted(g.nodes.items()):
+...     if node['word'] is not None:
+...         print('{address} {word}: {d}'.format(d=node['deps'][''], **node))
+1 the: []
+2 man: [1]
+3 taught: [2, 7]
+4 his: []
+5 dog: [4]
+6 to: []
+7 play: [5, 6, 8]
+8 golf: []
+
+
+
>>> print(g.tree())
+(taught (man the) (play (dog his) to golf))
+
+
+
+

Integration with MALT parser

+

In case the top relation is different from the default, we can set it. In case +of MALT parser, it’s set to ‘null’.

+
>>> dg_str = """1       I       _       NN      NN      _       2       nn      _       _
+... 2   shot    _       NN      NN      _       0       null    _       _
+... 3   an      _       AT      AT      _       2       dep     _       _
+... 4   elephant        _       NN      NN      _       7       nn      _       _
+... 5   in      _       NN      NN      _       7       nn      _       _
+... 6   my      _       NN      NN      _       7       nn      _       _
+... 7   pajamas _       NNS     NNS     _       3       dobj    _       _
+... """
+>>> dg = DependencyGraph(dg_str, top_relation_label='null')
+
+
+
>>> len(dg.nodes)
+8
+
+
+
>>> dg.root['word'], dg.root['address']
+('shot', 2)
+
+
+
>>> print(dg.to_conll(10))
+1   I       _       NN      NN      _       2       nn      _       _
+2   shot    _       NN      NN      _       0       null    _       _
+3   an      _       AT      AT      _       2       dep     _       _
+4   elephant        _       NN      NN      _       7       nn      _       _
+5   in      _       NN      NN      _       7       nn      _       _
+6   my      _       NN      NN      _       7       nn      _       _
+7   pajamas _       NNS     NNS     _       3       dobj    _       _
+
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/discourse.html b/howto/discourse.html index edfc7d439..5bf25d4be 100644 --- a/howto/discourse.html +++ b/howto/discourse.html @@ -1,925 +1,669 @@ - - - + - - -Discourse Checking - + + + + + + + NLTK :: Sample usage for discourse + + + + + + + + + + + + + + -
-

Discourse Checking

- - - -
-
->>> from nltk import *
->>> from nltk.sem import logic
->>> logic._counter._value = 0
-
-
-
-

Introduction

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + + + + + +
+ +
+
+ +
+

Sample usage for discourse

+
+

Discourse Checking

+
>>> from nltk import *
+>>> from nltk.sem import logic
+>>> logic._counter._value = 0
+
+
+
+

Setup

+
>>> from nltk.test.childes_fixt import setup_module
+>>> setup_module()
+
+
+
+
+

Introduction

The NLTK discourse module makes it possible to test consistency and redundancy of simple discourses, using theorem-proving and model-building from nltk.inference.

-

The DiscourseTester constructor takes a list of sentences as a +

The DiscourseTester constructor takes a list of sentences as a parameter.

-
-
->>> dt = DiscourseTester(['a boxer walks', 'every boxer chases a girl'])
-
-
-

The DiscourseTester parses each sentence into a list of logical -forms. Once we have created DiscourseTester object, we can +

>>> dt = DiscourseTester(['a boxer walks', 'every boxer chases a girl'])
+
+
+

The DiscourseTester parses each sentence into a list of logical +forms. Once we have created DiscourseTester object, we can inspect various properties of the discourse. First off, we might want to double-check what sentences are currently stored as the discourse.

-
-
->>> dt.sentences()
-s0: a boxer walks
-s1: every boxer chases a girl
-
-
+
>>> dt.sentences()
+s0: a boxer walks
+s1: every boxer chases a girl
+
+

As you will see, each sentence receives an identifier si. -We might also want to check what grammar the DiscourseTester is -using (by default, book_grammars/discourse.fcfg):

-
-
->>> dt.grammar() # doctest: +ELLIPSIS
-% start S
-# Grammar Rules
-S[SEM = <app(?subj,?vp)>] -> NP[NUM=?n,SEM=?subj] VP[NUM=?n,SEM=?vp]
-NP[NUM=?n,SEM=<app(?det,?nom)> ] -> Det[NUM=?n,SEM=?det]  Nom[NUM=?n,SEM=?nom]
-NP[LOC=?l,NUM=?n,SEM=?np] -> PropN[LOC=?l,NUM=?n,SEM=?np]
-...
-
-
-

A different grammar can be invoked by using the optional gramfile -parameter when a DiscourseTester object is created.

+We might also want to check what grammar the DiscourseTester is +using (by default, book_grammars/discourse.fcfg):

+
>>> dt.grammar()
+% start S
+# Grammar Rules
+S[SEM = <app(?subj,?vp)>] -> NP[NUM=?n,SEM=?subj] VP[NUM=?n,SEM=?vp]
+NP[NUM=?n,SEM=<app(?det,?nom)> ] -> Det[NUM=?n,SEM=?det]  Nom[NUM=?n,SEM=?nom]
+NP[LOC=?l,NUM=?n,SEM=?np] -> PropN[LOC=?l,NUM=?n,SEM=?np]
+...
+
-
-

Readings and Threads

+

A different grammar can be invoked by using the optional gramfile +parameter when a DiscourseTester object is created.

+
+
+

Readings and Threads

Depending on the grammar used, we may find some sentences have more than one -logical form. To check this, use the readings() method. Given a +logical form. To check this, use the readings() method. Given a sentence identifier of the form si, each reading of that sentence is given an identifier si-rj.

-
-
->>> dt.readings()
-<BLANKLINE>
-s0 readings:
-<BLANKLINE>
-s0-r0: exists z1.(boxer(z1) & walk(z1))
-s0-r1: exists z1.(boxerdog(z1) & walk(z1))
-<BLANKLINE>
-s1 readings:
-<BLANKLINE>
-s1-r0: all z2.(boxer(z2) -> exists z3.(girl(z3) & chase(z2,z3)))
-s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
-
-
+
>>> dt.readings()
+
+s0 readings:
+
+s0-r0: exists z1.(boxer(z1) & walk(z1))
+s0-r1: exists z1.(boxerdog(z1) & walk(z1))
+
+s1 readings:
+
+s1-r0: all z2.(boxer(z2) -> exists z3.(girl(z3) & chase(z2,z3)))
+s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
+
+

In this case, the only source of ambiguity lies in the word boxer, -which receives two translations: boxer and boxerdog. The -intention is that one of these corresponds to the person sense and -one to the dog sense. In principle, we would also expect to see a -quantifier scope ambiguity in s1. However, the simple grammar we -are using, namely sem4.fcfg, doesn't support quantifier +which receives two translations: boxer and boxerdog. The +intention is that one of these corresponds to the person sense and +one to the dog sense. In principle, we would also expect to see a +quantifier scope ambiguity in s1. However, the simple grammar we +are using, namely sem4.fcfg, doesn’t support quantifier scope ambiguity.

We can also investigate the readings of a specific sentence:

-
-
->>> dt.readings('a boxer walks')
-The sentence 'a boxer walks' has these readings:
-    exists x.(boxer(x) & walk(x))
-    exists x.(boxerdog(x) & walk(x))
-
-
+
>>> dt.readings('a boxer walks')
+The sentence 'a boxer walks' has these readings:
+    exists x.(boxer(x) & walk(x))
+    exists x.(boxerdog(x) & walk(x))
+
+

Given that each sentence is two-ways ambiguous, we potentially have -four different discourse 'threads', taking all combinations of -readings. To see these, specify the threaded=True parameter on -the readings() method. Again, each thread is assigned an +four different discourse ‘threads’, taking all combinations of +readings. To see these, specify the threaded=True parameter on +the readings() method. Again, each thread is assigned an identifier of the form di. Following the identifier is a list of the readings that constitute that thread.

-
-
->>> dt.readings(threaded=True) # doctest: +NORMALIZE_WHITESPACE
-d0: ['s0-r0', 's1-r0']
-d1: ['s0-r0', 's1-r1']
-d2: ['s0-r1', 's1-r0']
-d3: ['s0-r1', 's1-r1']
-
-
-

Of course, this simple-minded approach doesn't scale: a discourse with, say, three +

>>> dt.readings(threaded=True)
+d0: ['s0-r0', 's1-r0']
+d1: ['s0-r0', 's1-r1']
+d2: ['s0-r1', 's1-r0']
+d3: ['s0-r1', 's1-r1']
+
+
+

Of course, this simple-minded approach doesn’t scale: a discourse with, say, three sentences, each of which has 3 readings, will generate 27 different threads. It is an interesting exercise to consider how to manage discourse ambiguity more efficiently.

-
-
-

Checking Consistency

+ +
+

Checking Consistency

Now, we can check whether some or all of the discourse threads are -consistent, using the models() method. With no parameter, this +consistent, using the models() method. With no parameter, this method will try to find a model for every discourse thread in the -current discourse. However, we can also specify just one thread, say d1.

-
-
->>> dt.models('d1')
---------------------------------------------------------------------------------
-Model for Discourse Thread d1
---------------------------------------------------------------------------------
-% number = 1
-% seconds = 0
-<BLANKLINE>
-% Interpretation of size 2
-<BLANKLINE>
-c1 = 0.
-<BLANKLINE>
-f1(0) = 0.
-f1(1) = 0.
-<BLANKLINE>
-  boxer(0).
-- boxer(1).
-<BLANKLINE>
-- boxerdog(0).
-- boxerdog(1).
-<BLANKLINE>
-- girl(0).
-- girl(1).
-<BLANKLINE>
-  walk(0).
-- walk(1).
-<BLANKLINE>
-- chase(0,0).
-- chase(0,1).
-- chase(1,0).
-- chase(1,1).
-<BLANKLINE>
-Consistent discourse: d1 ['s0-r0', 's1-r1']:
-    s0-r0: exists z1.(boxer(z1) & walk(z1))
-    s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
-<BLANKLINE>
-
-
-

There are various formats for rendering Mace4 models --- here, -we have used the 'cooked' format (which is intended to be +current discourse. However, we can also specify just one thread, say d1.

+
>>> dt.models('d1')
+--------------------------------------------------------------------------------
+Model for Discourse Thread d1
+--------------------------------------------------------------------------------
+% number = 1
+% seconds = 0
+
+% Interpretation of size 2
+
+c1 = 0.
+
+f1(0) = 0.
+f1(1) = 0.
+
+  boxer(0).
+- boxer(1).
+
+- boxerdog(0).
+- boxerdog(1).
+
+- girl(0).
+- girl(1).
+
+  walk(0).
+- walk(1).
+
+- chase(0,0).
+- chase(0,1).
+- chase(1,0).
+- chase(1,1).
+
+Consistent discourse: d1 ['s0-r0', 's1-r1']:
+    s0-r0: exists z1.(boxer(z1) & walk(z1))
+    s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
+
+
+

There are various formats for rendering Mace4 models — here, +we have used the ‘cooked’ format (which is intended to be human-readable). There are a number of points to note.

    -
  1. The entities in the domain are all treated as non-negative -integers. In this case, there are only two entities, 0 and -1.
  2. -
  3. The - symbol indicates negation. So 0 is the only -boxerdog and the only thing that walks. Nothing is a -boxer, or a girl or in the chase relation. Thus the -universal sentence is vacuously true.
  4. -
  5. c1 is an introduced constant that denotes 0.
  6. -
  7. f1 is a Skolem function, but it plays no significant role in -this model.
  8. +
  9. The entities in the domain are all treated as non-negative +integers. In this case, there are only two entities, 0 and +1.

  10. +
  11. The - symbol indicates negation. So 0 is the only +boxerdog and the only thing that walks. Nothing is a +boxer, or a girl or in the chase relation. Thus the +universal sentence is vacuously true.

  12. +
  13. c1 is an introduced constant that denotes 0.

  14. +
  15. f1 is a Skolem function, but it plays no significant role in +this model.

We might want to now add another sentence to the discourse, and there -is method add_sentence() for doing just this.

-
-
->>> dt.add_sentence('John is a boxer')
->>> dt.sentences()
-s0: a boxer walks
-s1: every boxer chases a girl
-s2: John is a boxer
-
-
+is method add_sentence() for doing just this.

+
>>> dt.add_sentence('John is a boxer')
+>>> dt.sentences()
+s0: a boxer walks
+s1: every boxer chases a girl
+s2: John is a boxer
+
+

We can now test all the properties as before; here, we just show a couple of them.

-
-
->>> dt.readings()
-<BLANKLINE>
-s0 readings:
-<BLANKLINE>
-s0-r0: exists z1.(boxer(z1) & walk(z1))
-s0-r1: exists z1.(boxerdog(z1) & walk(z1))
-<BLANKLINE>
-s1 readings:
-<BLANKLINE>
-s1-r0: all z1.(boxer(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
-s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
-<BLANKLINE>
-s2 readings:
-<BLANKLINE>
-s2-r0: boxer(John)
-s2-r1: boxerdog(John)
->>> dt.readings(threaded=True) # doctest: +NORMALIZE_WHITESPACE
-d0: ['s0-r0', 's1-r0', 's2-r0']
-d1: ['s0-r0', 's1-r0', 's2-r1']
-d2: ['s0-r0', 's1-r1', 's2-r0']
-d3: ['s0-r0', 's1-r1', 's2-r1']
-d4: ['s0-r1', 's1-r0', 's2-r0']
-d5: ['s0-r1', 's1-r0', 's2-r1']
-d6: ['s0-r1', 's1-r1', 's2-r0']
-d7: ['s0-r1', 's1-r1', 's2-r1']
-
-
-

If you are interested in a particular thread, the expand_threads() +

>>> dt.readings()
+
+s0 readings:
+
+s0-r0: exists z1.(boxer(z1) & walk(z1))
+s0-r1: exists z1.(boxerdog(z1) & walk(z1))
+
+s1 readings:
+
+s1-r0: all z1.(boxer(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
+s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
+
+s2 readings:
+
+s2-r0: boxer(John)
+s2-r1: boxerdog(John)
+>>> dt.readings(threaded=True)
+d0: ['s0-r0', 's1-r0', 's2-r0']
+d1: ['s0-r0', 's1-r0', 's2-r1']
+d2: ['s0-r0', 's1-r1', 's2-r0']
+d3: ['s0-r0', 's1-r1', 's2-r1']
+d4: ['s0-r1', 's1-r0', 's2-r0']
+d5: ['s0-r1', 's1-r0', 's2-r1']
+d6: ['s0-r1', 's1-r1', 's2-r0']
+d7: ['s0-r1', 's1-r1', 's2-r1']
+
+
+

If you are interested in a particular thread, the expand_threads() method will remind you of what readings it consists of:

-
-
->>> thread = dt.expand_threads('d1')
->>> for rid, reading in thread:
-...     print(rid, str(reading.normalize()))
-s0-r0 exists z1.(boxer(z1) & walk(z1))
-s1-r0 all z1.(boxer(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
-s2-r1 boxerdog(John)
-
-
+
>>> thread = dt.expand_threads('d1')
+>>> for rid, reading in thread:
+...     print(rid, str(reading.normalize()))
+s0-r0 exists z1.(boxer(z1) & walk(z1))
+s1-r0 all z1.(boxer(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
+s2-r1 boxerdog(John)
+
+

Suppose we have already defined a discourse, as follows:

-
-
->>> dt = DiscourseTester(['A student dances', 'Every student is a person'])
-
-
+
>>> dt = DiscourseTester(['A student dances', 'Every student is a person'])
+
+

Now, when we add a new sentence, is it consistent with what we already -have? The `` consistchk=True`` parameter of add_sentence() allows +have? The `` consistchk=True`` parameter of add_sentence() allows us to check:

-
-
->>> dt.add_sentence('No person dances', consistchk=True)
-Inconsistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0']:
-    s0-r0: exists z1.(student(z1) & dance(z1))
-    s1-r0: all z1.(student(z1) -> person(z1))
-    s2-r0: -exists z1.(person(z1) & dance(z1))
-<BLANKLINE>
->>> dt.readings()
-<BLANKLINE>
-s0 readings:
-<BLANKLINE>
-s0-r0: exists z1.(student(z1) & dance(z1))
-<BLANKLINE>
-s1 readings:
-<BLANKLINE>
-s1-r0: all z1.(student(z1) -> person(z1))
-<BLANKLINE>
-s2 readings:
-<BLANKLINE>
-s2-r0: -exists z1.(person(z1) & dance(z1))
-
-
-

So let's retract the inconsistent sentence:

-
-
->>> dt.retract_sentence('No person dances', verbose=True) # doctest: +NORMALIZE_WHITESPACE
-Current sentences are
-s0: A student dances
-s1: Every student is a person
-
-
+
>>> dt.add_sentence('No person dances', consistchk=True)
+Inconsistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0']:
+    s0-r0: exists z1.(student(z1) & dance(z1))
+    s1-r0: all z1.(student(z1) -> person(z1))
+    s2-r0: -exists z1.(person(z1) & dance(z1))
+
+>>> dt.readings()
+
+s0 readings:
+
+s0-r0: exists z1.(student(z1) & dance(z1))
+
+s1 readings:
+
+s1-r0: all z1.(student(z1) -> person(z1))
+
+s2 readings:
+
+s2-r0: -exists z1.(person(z1) & dance(z1))
+
+
+

So let’s retract the inconsistent sentence:

+
>>> dt.retract_sentence('No person dances', verbose=True)
+Current sentences are
+s0: A student dances
+s1: Every student is a person
+
+

We can now verify that result is consistent.

-
-
->>> dt.models()
---------------------------------------------------------------------------------
-Model for Discourse Thread d0
---------------------------------------------------------------------------------
-% number = 1
-% seconds = 0
-<BLANKLINE>
-% Interpretation of size 2
-<BLANKLINE>
-c1 = 0.
-<BLANKLINE>
-  dance(0).
-- dance(1).
-<BLANKLINE>
-  person(0).
-- person(1).
-<BLANKLINE>
-  student(0).
-- student(1).
-<BLANKLINE>
-Consistent discourse: d0 ['s0-r0', 's1-r0']:
-    s0-r0: exists z1.(student(z1) & dance(z1))
-    s1-r0: all z1.(student(z1) -> person(z1))
-<BLANKLINE>
-
-
+
>>> dt.models()
+--------------------------------------------------------------------------------
+Model for Discourse Thread d0
+--------------------------------------------------------------------------------
+% number = 1
+% seconds = 0
+
+% Interpretation of size 2
+
+c1 = 0.
+
+  dance(0).
+- dance(1).
+
+  person(0).
+- person(1).
+
+  student(0).
+- student(1).
+
+Consistent discourse: d0 ['s0-r0', 's1-r0']:
+    s0-r0: exists z1.(student(z1) & dance(z1))
+    s1-r0: all z1.(student(z1) -> person(z1))
+
-
-

Checking Informativity

-

Let's assume that we are still trying to extend the discourse A +

+
+

Checking Informativity

+

Let’s assume that we are still trying to extend the discourse A student dances. Every student is a person. We add a new sentence, but this time, we check whether it is informative with respect to what has gone before.

-
-
->>> dt.add_sentence('A person dances', informchk=True)
-Sentence 'A person dances' under reading 'exists x.(person(x) & dance(x))':
-Not informative relative to thread 'd0'
-
-
+
>>> dt.add_sentence('A person dances', informchk=True)
+Sentence 'A person dances' under reading 'exists x.(person(x) & dance(x))':
+Not informative relative to thread 'd0'
+
+

In fact, we are just checking whether the new sentence is entailed by the preceding discourse.

-
-
->>> dt.models()
---------------------------------------------------------------------------------
-Model for Discourse Thread d0
---------------------------------------------------------------------------------
-% number = 1
-% seconds = 0
-<BLANKLINE>
-% Interpretation of size 2
-<BLANKLINE>
-c1 = 0.
-<BLANKLINE>
-c2 = 0.
-<BLANKLINE>
-  dance(0).
-- dance(1).
-<BLANKLINE>
-  person(0).
-- person(1).
-<BLANKLINE>
-  student(0).
-- student(1).
-<BLANKLINE>
-Consistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0']:
-    s0-r0: exists z1.(student(z1) & dance(z1))
-    s1-r0: all z1.(student(z1) -> person(z1))
-    s2-r0: exists z1.(person(z1) & dance(z1))
-<BLANKLINE>
-
-
+
>>> dt.models()
+--------------------------------------------------------------------------------
+Model for Discourse Thread d0
+--------------------------------------------------------------------------------
+% number = 1
+% seconds = 0
+
+% Interpretation of size 2
+
+c1 = 0.
+
+c2 = 0.
+
+  dance(0).
+- dance(1).
+
+  person(0).
+- person(1).
+
+  student(0).
+- student(1).
+
+Consistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0']:
+    s0-r0: exists z1.(student(z1) & dance(z1))
+    s1-r0: all z1.(student(z1) -> person(z1))
+    s2-r0: exists z1.(person(z1) & dance(z1))
+
+
+
+
+

Adding Background Knowledge

+

Let’s build a new discourse, and look at the readings of the component sentences:

+
>>> dt = DiscourseTester(['Vincent is a boxer', 'Fido is a boxer', 'Vincent is married', 'Fido barks'])
+>>> dt.readings()
+
+s0 readings:
+
+s0-r0: boxer(Vincent)
+s0-r1: boxerdog(Vincent)
+
+s1 readings:
+
+s1-r0: boxer(Fido)
+s1-r1: boxerdog(Fido)
+
+s2 readings:
+
+s2-r0: married(Vincent)
+
+s3 readings:
+
+s3-r0: bark(Fido)
+
-
-

Adding Background Knowledge

-

Let's build a new discourse, and look at the readings of the component sentences:

-
-
->>> dt = DiscourseTester(['Vincent is a boxer', 'Fido is a boxer', 'Vincent is married', 'Fido barks'])
->>> dt.readings()
-<BLANKLINE>
-s0 readings:
-<BLANKLINE>
-s0-r0: boxer(Vincent)
-s0-r1: boxerdog(Vincent)
-<BLANKLINE>
-s1 readings:
-<BLANKLINE>
-s1-r0: boxer(Fido)
-s1-r1: boxerdog(Fido)
-<BLANKLINE>
-s2 readings:
-<BLANKLINE>
-s2-r0: married(Vincent)
-<BLANKLINE>
-s3 readings:
-<BLANKLINE>
-s3-r0: bark(Fido)
-
-

This gives us a lot of threads:

-
-
->>> dt.readings(threaded=True) # doctest: +NORMALIZE_WHITESPACE
-d0: ['s0-r0', 's1-r0', 's2-r0', 's3-r0']
-d1: ['s0-r0', 's1-r1', 's2-r0', 's3-r0']
-d2: ['s0-r1', 's1-r0', 's2-r0', 's3-r0']
-d3: ['s0-r1', 's1-r1', 's2-r0', 's3-r0']
-
-
+
>>> dt.readings(threaded=True)
+d0: ['s0-r0', 's1-r0', 's2-r0', 's3-r0']
+d1: ['s0-r0', 's1-r1', 's2-r0', 's3-r0']
+d2: ['s0-r1', 's1-r0', 's2-r0', 's3-r0']
+d3: ['s0-r1', 's1-r1', 's2-r0', 's3-r0']
+
+

We can eliminate some of the readings, and hence some of the threads, by adding background information.

-
-
->>> import nltk.data
->>> bg = nltk.data.load('grammars/book_grammars/background.fol')
->>> dt.add_background(bg)
->>> dt.background()
-all x.(boxerdog(x) -> dog(x))
-all x.(boxer(x) -> person(x))
-all x.-(dog(x) & person(x))
-all x.(married(x) <-> exists y.marry(x,y))
-all x.(bark(x) -> dog(x))
-all x y.(marry(x,y) -> (person(x) & person(y)))
--(Vincent = Mia)
--(Vincent = Fido)
--(Mia = Fido)
-
-
+
>>> import nltk.data
+>>> bg = nltk.data.load('grammars/book_grammars/background.fol')
+>>> dt.add_background(bg)
+>>> dt.background()
+all x.(boxerdog(x) -> dog(x))
+all x.(boxer(x) -> person(x))
+all x.-(dog(x) & person(x))
+all x.(married(x) <-> exists y.marry(x,y))
+all x.(bark(x) -> dog(x))
+all x y.(marry(x,y) -> (person(x) & person(y)))
+-(Vincent = Mia)
+-(Vincent = Fido)
+-(Mia = Fido)
+
+

The background information allows us to reject three of the threads as -inconsistent. To see what remains, use the filter=True parameter -on readings().

-
-
->>> dt.readings(filter=True) # doctest: +NORMALIZE_WHITESPACE
-d1: ['s0-r0', 's1-r1', 's2-r0', 's3-r0']
-
-
-

The models() method gives us more information about the surviving thread.

-
-
->>> dt.models()
---------------------------------------------------------------------------------
-Model for Discourse Thread d0
---------------------------------------------------------------------------------
-No model found!
-<BLANKLINE>
---------------------------------------------------------------------------------
-Model for Discourse Thread d1
---------------------------------------------------------------------------------
-% number = 1
-% seconds = 0
-<BLANKLINE>
-% Interpretation of size 3
-<BLANKLINE>
-Fido = 0.
-<BLANKLINE>
-Mia = 1.
-<BLANKLINE>
-Vincent = 2.
-<BLANKLINE>
-f1(0) = 0.
-f1(1) = 0.
-f1(2) = 2.
-<BLANKLINE>
-  bark(0).
-- bark(1).
-- bark(2).
-<BLANKLINE>
-- boxer(0).
-- boxer(1).
-  boxer(2).
-<BLANKLINE>
-  boxerdog(0).
-- boxerdog(1).
-- boxerdog(2).
-<BLANKLINE>
-  dog(0).
-- dog(1).
-- dog(2).
-<BLANKLINE>
-- married(0).
-- married(1).
-  married(2).
-<BLANKLINE>
-- person(0).
-- person(1).
-  person(2).
-<BLANKLINE>
-- marry(0,0).
-- marry(0,1).
-- marry(0,2).
-- marry(1,0).
-- marry(1,1).
-- marry(1,2).
-- marry(2,0).
-- marry(2,1).
-  marry(2,2).
-<BLANKLINE>
---------------------------------------------------------------------------------
-Model for Discourse Thread d2
---------------------------------------------------------------------------------
-No model found!
-<BLANKLINE>
---------------------------------------------------------------------------------
-Model for Discourse Thread d3
---------------------------------------------------------------------------------
-No model found!
-<BLANKLINE>
-Inconsistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0', 's3-r0']:
-    s0-r0: boxer(Vincent)
-    s1-r0: boxer(Fido)
-    s2-r0: married(Vincent)
-    s3-r0: bark(Fido)
-<BLANKLINE>
-Consistent discourse: d1 ['s0-r0', 's1-r1', 's2-r0', 's3-r0']:
-    s0-r0: boxer(Vincent)
-    s1-r1: boxerdog(Fido)
-    s2-r0: married(Vincent)
-    s3-r0: bark(Fido)
-<BLANKLINE>
-Inconsistent discourse: d2 ['s0-r1', 's1-r0', 's2-r0', 's3-r0']:
-    s0-r1: boxerdog(Vincent)
-    s1-r0: boxer(Fido)
-    s2-r0: married(Vincent)
-    s3-r0: bark(Fido)
-<BLANKLINE>
-Inconsistent discourse: d3 ['s0-r1', 's1-r1', 's2-r0', 's3-r0']:
-    s0-r1: boxerdog(Vincent)
-    s1-r1: boxerdog(Fido)
-    s2-r0: married(Vincent)
-    s3-r0: bark(Fido)
-<BLANKLINE>
-
-
- +inconsistent. To see what remains, use the filter=True parameter +on readings().

+
>>> dt.readings(filter=True)
+d1: ['s0-r0', 's1-r1', 's2-r0', 's3-r0']
+
+
+

The models() method gives us more information about the surviving thread.

+
>>> dt.models()
+--------------------------------------------------------------------------------
+Model for Discourse Thread d0
+--------------------------------------------------------------------------------
+No model found!
+
+--------------------------------------------------------------------------------
+Model for Discourse Thread d1
+--------------------------------------------------------------------------------
+% number = 1
+% seconds = 0
+
+% Interpretation of size 3
+
+Fido = 0.
+
+Mia = 1.
+
+Vincent = 2.
+
+f1(0) = 0.
+f1(1) = 0.
+f1(2) = 2.
+
+  bark(0).
+- bark(1).
+- bark(2).
+
+- boxer(0).
+- boxer(1).
+  boxer(2).
+
+  boxerdog(0).
+- boxerdog(1).
+- boxerdog(2).
+
+  dog(0).
+- dog(1).
+- dog(2).
+
+- married(0).
+- married(1).
+  married(2).
+
+- person(0).
+- person(1).
+  person(2).
+
+- marry(0,0).
+- marry(0,1).
+- marry(0,2).
+- marry(1,0).
+- marry(1,1).
+- marry(1,2).
+- marry(2,0).
+- marry(2,1).
+  marry(2,2).
+
+--------------------------------------------------------------------------------
+Model for Discourse Thread d2
+--------------------------------------------------------------------------------
+No model found!
+
+--------------------------------------------------------------------------------
+Model for Discourse Thread d3
+--------------------------------------------------------------------------------
+No model found!
+
+Inconsistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0', 's3-r0']:
+    s0-r0: boxer(Vincent)
+    s1-r0: boxer(Fido)
+    s2-r0: married(Vincent)
+    s3-r0: bark(Fido)
+
+Consistent discourse: d1 ['s0-r0', 's1-r1', 's2-r0', 's3-r0']:
+    s0-r0: boxer(Vincent)
+    s1-r1: boxerdog(Fido)
+    s2-r0: married(Vincent)
+    s3-r0: bark(Fido)
+
+Inconsistent discourse: d2 ['s0-r1', 's1-r0', 's2-r0', 's3-r0']:
+    s0-r1: boxerdog(Vincent)
+    s1-r0: boxer(Fido)
+    s2-r0: married(Vincent)
+    s3-r0: bark(Fido)
+
+Inconsistent discourse: d3 ['s0-r1', 's1-r1', 's2-r0', 's3-r0']:
+    s0-r1: boxerdog(Vincent)
+    s1-r1: boxerdog(Fido)
+    s2-r0: married(Vincent)
+    s3-r0: bark(Fido)
+
+

In order to play around with your own version of background knowledge, -you might want to start off with a local copy of background.fol:

-
-
->>> nltk.data.retrieve('grammars/book_grammars/background.fol')
-Retrieving 'nltk:grammars/book_grammars/background.fol', saving to 'background.fol'
-
-
-

After you have modified the file, the load_fol() function will parse -the strings in the file into expressions of nltk.sem.logic.

-
-
->>> from nltk.inference.discourse import load_fol
->>> mybg = load_fol(open('background.fol').read())
-
-
-

The result can be loaded as an argument of add_background() in the +you might want to start off with a local copy of background.fol:

+
>>> nltk.data.retrieve('grammars/book_grammars/background.fol')
+Retrieving 'nltk:grammars/book_grammars/background.fol', saving to 'background.fol'
+
+
+

After you have modified the file, the load_fol() function will parse +the strings in the file into expressions of nltk.sem.logic.

+
>>> from nltk.inference.discourse import load_fol
+>>> mybg = load_fol(open('background.fol').read())
+
+
+

The result can be loaded as an argument of add_background() in the manner shown earlier.

- +
+
+

Regression Testing from book

+
>>> logic._counter._value = 0
+
+
+
>>> from nltk.tag import RegexpTagger
+>>> tagger = RegexpTagger(
+...     [('^(chases|runs)$', 'VB'),
+...      ('^(a)$', 'ex_quant'),
+...      ('^(every)$', 'univ_quant'),
+...      ('^(dog|boy)$', 'NN'),
+...      ('^(He)$', 'PRP')
+... ])
+>>> rc = DrtGlueReadingCommand(depparser=MaltParser(tagger=tagger))
+>>> dt = DiscourseTester(map(str.split, ['Every dog chases a boy', 'He runs']), rc)
+>>> dt.readings()
+
+s0 readings:
+
+s0-r0: ([z2],[boy(z2), (([z5],[dog(z5)]) -> ([],[chases(z5,z2)]))])
+s0-r1: ([],[(([z1],[dog(z1)]) -> ([z2],[boy(z2), chases(z1,z2)]))])
+
+s1 readings:
+
+s1-r0: ([z1],[PRO(z1), runs(z1)])
+>>> dt.readings(show_thread_readings=True)
+d0: ['s0-r0', 's1-r0'] : ([z1,z2],[boy(z1), (([z3],[dog(z3)]) -> ([],[chases(z3,z1)])), (z2 = z1), runs(z2)])
+d1: ['s0-r1', 's1-r0'] : INVALID: AnaphoraResolutionException
+>>> dt.readings(filter=True, show_thread_readings=True)
+d0: ['s0-r0', 's1-r0'] : ([z1,z3],[boy(z1), (([z2],[dog(z2)]) -> ([],[chases(z2,z1)])), (z3 = z1), runs(z3)])
+
-
-

Regression Testing from book

-
-
->>> logic._counter._value = 0
-
-
->>> from nltk.tag import RegexpTagger
->>> tagger = RegexpTagger(
-...     [('^(chases|runs)$', 'VB'),
-...      ('^(a)$', 'ex_quant'),
-...      ('^(every)$', 'univ_quant'),
-...      ('^(dog|boy)$', 'NN'),
-...      ('^(He)$', 'PRP')
-... ])
->>> rc = DrtGlueReadingCommand(depparser=MaltParser(tagger=tagger))
->>> dt = DiscourseTester(map(str.split, ['Every dog chases a boy', 'He runs']), rc)
->>> dt.readings()
-<BLANKLINE>
-s0 readings:
-<BLANKLINE>
-s0-r0: ([z2],[boy(z2), (([z5],[dog(z5)]) -> ([],[chases(z5,z2)]))])
-s0-r1: ([],[(([z1],[dog(z1)]) -> ([z2],[boy(z2), chases(z1,z2)]))])
-<BLANKLINE>
-s1 readings:
-<BLANKLINE>
-s1-r0: ([z1],[PRO(z1), runs(z1)])
->>> dt.readings(show_thread_readings=True)
-d0: ['s0-r0', 's1-r0'] : ([z1,z2],[boy(z1), (([z3],[dog(z3)]) -> ([],[chases(z3,z1)])), (z2 = z1), runs(z2)])
-d1: ['s0-r1', 's1-r0'] : INVALID: AnaphoraResolutionException
->>> dt.readings(filter=True, show_thread_readings=True)
-d0: ['s0-r0', 's1-r0'] : ([z1,z3],[boy(z1), (([z2],[dog(z2)]) -> ([],[chases(z2,z1)])), (z3 = z1), runs(z3)])
-
-
->>> logic._counter._value = 0
-
-
->>> from nltk.parse import FeatureEarleyChartParser
->>> from nltk.sem.drt import DrtParser
->>> grammar = nltk.data.load('grammars/book_grammars/drt.fcfg', logic_parser=DrtParser())
->>> parser = FeatureEarleyChartParser(grammar, trace=0)
->>> trees = parser.parse('Angus owns a dog'.split())
->>> print(list(trees)[0].label()['SEM'].simplify().normalize())
-([z1,z2],[Angus(z1), dog(z2), own(z1,z2)])
-
-
+
>>> logic._counter._value = 0
+
+
>>> from nltk.parse import FeatureEarleyChartParser
+>>> from nltk.sem.drt import DrtParser
+>>> grammar = nltk.data.load('grammars/book_grammars/drt.fcfg', logic_parser=DrtParser())
+>>> parser = FeatureEarleyChartParser(grammar, trace=0)
+>>> trees = parser.parse('Angus owns a dog'.split())
+>>> print(list(trees)[0].label()['SEM'].simplify().normalize())
+([z1,z2],[Angus(z1), dog(z2), own(z1,z2)])
+
+
+ + + + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/drt.html b/howto/drt.html index 33bbcff7e..4efad2efa 100644 --- a/howto/drt.html +++ b/howto/drt.html @@ -1,944 +1,690 @@ - - - + - - -Discourse Representation Theory - + + + + + + + NLTK :: Sample usage for drt + + + + + + + + + + + + + + -
-

Discourse Representation Theory

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> from nltk.sem import logic
->>> from nltk.inference import TableauProver
-
-
-
-

Overview

-

A DRS can be created with the DRS() constructor. This takes two arguments: a list of +

+ + +
+ +
+
+ +
+

Sample usage for drt

+
+

Discourse Representation Theory

+
>>> from nltk.sem import logic
+>>> from nltk.inference import TableauProver
+
+
+
+

Overview

+

A DRS can be created with the DRS() constructor. This takes two arguments: a list of discourse referents and list of conditions. .

-
-
->>> from nltk.sem.drt import *
->>> dexpr = DrtExpression.fromstring
->>> man_x = dexpr('man(x)')
->>> walk_x = dexpr('walk(x)')
->>> x = dexpr('x')
->>> print(DRS([x], [man_x, walk_x]))
-([x],[man(x), walk(x)])
-
-
-

The parse() method can also be applied directly to DRS +

>>> from nltk.sem.drt import *
+>>> dexpr = DrtExpression.fromstring
+>>> man_x = dexpr('man(x)')
+>>> walk_x = dexpr('walk(x)')
+>>> x = dexpr('x')
+>>> print(DRS([x], [man_x, walk_x]))
+([x],[man(x), walk(x)])
+
+
+

The parse() method can also be applied directly to DRS expressions, which allows them to be specified more easily.

-
-
->>> drs1 = dexpr('([x],[man(x),walk(x)])')
->>> print(drs1)
-([x],[man(x), walk(x)])
-
-
-

DRSs can be merged using the + operator.

-
-
->>> drs2 = dexpr('([y],[woman(y),stop(y)])')
->>> drs3 = drs1 + drs2
->>> print(drs3)
-(([x],[man(x), walk(x)]) + ([y],[woman(y), stop(y)]))
->>> print(drs3.simplify())
-([x,y],[man(x), walk(x), woman(y), stop(y)])
-
-
-

We can embed DRSs as components of an implies condition.

-
-
->>> s = '([], [(%s -> %s)])' % (drs1, drs2)
->>> print(dexpr(s))
-([],[(([x],[man(x), walk(x)]) -> ([y],[woman(y), stop(y)]))])
-
-
-

The fol() method converts DRSs into FOL formulae.

-
-
->>> print(dexpr(r'([x],[man(x), walks(x)])').fol())
-exists x.(man(x) & walks(x))
->>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])').fol())
-all x.(man(x) -> walks(x))
-
-
-

In order to visualize a DRS, the pretty_format() method can be used.

-
-
->>> print(drs3.pretty_format())
-  _________     __________
- | x       |   | y        |
-(|---------| + |----------|)
- | man(x)  |   | woman(y) |
- | walk(x) |   | stop(y)  |
- |_________|   |__________|
-
-
-
-

Parse to semantics

- +
>>> drs1 = dexpr('([x],[man(x),walk(x)])')
+>>> print(drs1)
+([x],[man(x), walk(x)])
+
+
+

DRSs can be merged using the + operator.

+
>>> drs2 = dexpr('([y],[woman(y),stop(y)])')
+>>> drs3 = drs1 + drs2
+>>> print(drs3)
+(([x],[man(x), walk(x)]) + ([y],[woman(y), stop(y)]))
+>>> print(drs3.simplify())
+([x,y],[man(x), walk(x), woman(y), stop(y)])
+
+
+

We can embed DRSs as components of an implies condition.

+
>>> s = '([], [(%s -> %s)])' % (drs1, drs2)
+>>> print(dexpr(s))
+([],[(([x],[man(x), walk(x)]) -> ([y],[woman(y), stop(y)]))])
+
+
+

The fol() method converts DRSs into FOL formulae.

+
>>> print(dexpr(r'([x],[man(x), walks(x)])').fol())
+exists x.(man(x) & walks(x))
+>>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])').fol())
+all x.(man(x) -> walks(x))
+
+
+

In order to visualize a DRS, the pretty_format() method can be used.

+
>>> print(drs3.pretty_format())
+  _________     __________
+ | x       |   | y        |
+(|---------| + |----------|)
+ | man(x)  |   | woman(y) |
+ | walk(x) |   | stop(y)  |
+ |_________|   |__________|
+
+
+
+

Parse to semantics

DRSs can be used for building compositional semantics in a feature based grammar. To specify that we want to use DRSs, the appropriate -logic parser needs be passed as a parameter to load_earley()

-
-
->>> from nltk.parse import load_parser
->>> from nltk.sem.drt import DrtParser
->>> parser = load_parser('grammars/book_grammars/drt.fcfg', trace=0, logic_parser=DrtParser())
->>> for tree in parser.parse('a dog barks'.split()):
-...     print(tree.label()['SEM'].simplify())
-...
-([x],[dog(x), bark(x)])
-
-
-

Alternatively, a FeatStructReader can be passed with the logic_parser set on it

-
-
->>> from nltk.featstruct import FeatStructReader
->>> from nltk.grammar import FeatStructNonterminal
->>> parser = load_parser('grammars/book_grammars/drt.fcfg', trace=0, fstruct_reader=FeatStructReader(fdict_class=FeatStructNonterminal, logic_parser=DrtParser()))
->>> for tree in parser.parse('every girl chases a dog'.split()):
-...     print(tree.label()['SEM'].simplify().normalize())
-...
-([],[(([z1],[girl(z1)]) -> ([z2],[dog(z2), chase(z1,z2)]))])
-
-
-
-
-
-

Unit Tests

-
-

Parser

-
-
->>> print(dexpr(r'([x,y],[sees(x,y)])'))
-([x,y],[sees(x,y)])
->>> print(dexpr(r'([x],[man(x), walks(x)])'))
-([x],[man(x), walks(x)])
->>> print(dexpr(r'\x.([],[man(x), walks(x)])'))
-\x.([],[man(x), walks(x)])
->>> print(dexpr(r'\x.\y.([],[sees(x,y)])'))
-\x y.([],[sees(x,y)])
-
-
->>> print(dexpr(r'([x,y],[(x = y)])'))
-([x,y],[(x = y)])
->>> print(dexpr(r'([x,y],[(x != y)])'))
-([x,y],[-(x = y)])
-
-
->>> print(dexpr(r'\x.([],[walks(x)])(john)'))
-(\x.([],[walks(x)]))(john)
->>> print(dexpr(r'\R.\x.([],[big(x,R)])(\y.([],[mouse(y)]))'))
-(\R x.([],[big(x,R)]))(\y.([],[mouse(y)]))
-
-
->>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))'))
-(([x],[walks(x)]) + ([y],[runs(y)]))
->>> print(dexpr(r'(([x,y],[walks(x), jumps(y)]) + (([z],[twos(z)]) + ([w],[runs(w)])))'))
-(([x,y],[walks(x), jumps(y)]) + ([z],[twos(z)]) + ([w],[runs(w)]))
->>> print(dexpr(r'((([],[walks(x)]) + ([],[twos(x)])) + ([],[runs(x)]))'))
-(([],[walks(x)]) + ([],[twos(x)]) + ([],[runs(x)]))
->>> print(dexpr(r'((([],[walks(x)]) + ([],[runs(x)])) + (([],[threes(x)]) + ([],[fours(x)])))'))
-(([],[walks(x)]) + ([],[runs(x)]) + ([],[threes(x)]) + ([],[fours(x)]))
-
-
->>> print(dexpr(r'(([],[walks(x)]) -> ([],[runs(x)]))'))
-(([],[walks(x)]) -> ([],[runs(x)]))
-
-
->>> print(dexpr(r'([x],[PRO(x), sees(John,x)])'))
-([x],[PRO(x), sees(John,x)])
->>> print(dexpr(r'([x],[man(x), -([],[walks(x)])])'))
-([x],[man(x), -([],[walks(x)])])
->>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])'))
-([],[(([x],[man(x)]) -> ([],[walks(x)]))])
-
-
->>> print(dexpr(r'DRS([x],[walk(x)])'))
-([x],[walk(x)])
->>> print(dexpr(r'DRS([x][walk(x)])'))
-([x],[walk(x)])
->>> print(dexpr(r'([x][walk(x)])'))
-([x],[walk(x)])
-
-
-
-
-

simplify()

-
-
->>> print(dexpr(r'\x.([],[man(x), walks(x)])(john)').simplify())
-([],[man(john), walks(john)])
->>> print(dexpr(r'\x.\y.([z],[dog(z),sees(x,y)])(john)(mary)').simplify())
-([z],[dog(z), sees(john,mary)])
->>> print(dexpr(r'\R x.([],[big(x,R)])(\y.([],[mouse(y)]))').simplify())
-\x.([],[big(x,\y.([],[mouse(y)]))])
-
-
->>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))').simplify())
-([x,y],[walks(x), runs(y)])
->>> print(dexpr(r'(([x,y],[walks(x), jumps(y)]) + (([z],[twos(z)]) + ([w],[runs(w)])))').simplify())
-([w,x,y,z],[walks(x), jumps(y), twos(z), runs(w)])
->>> print(dexpr(r'((([],[walks(x)]) + ([],[runs(x)]) + ([],[threes(x)]) + ([],[fours(x)])))').simplify())
-([],[walks(x), runs(x), threes(x), fours(x)])
->>> dexpr(r'([x],[man(x)])+([x],[walks(x)])').simplify() == \
-... dexpr(r'([x,z1],[man(x), walks(z1)])')
-True
->>> dexpr(r'([y],[boy(y), (([x],[dog(x)]) -> ([],[chase(x,y)]))])+([x],[run(x)])').simplify() == \
-... dexpr(r'([y,z1],[boy(y), (([x],[dog(x)]) -> ([],[chase(x,y)])), run(z1)])')
-True
-
-
->>> dexpr(r'\Q.(([x],[john(x),walks(x)]) + Q)(([x],[PRO(x),leaves(x)]))').simplify() == \
-... dexpr(r'([x,z1],[john(x), walks(x), PRO(z1), leaves(z1)])')
-True
-
-
->>> logic._counter._value = 0
->>> print(dexpr('([],[(([x],[dog(x)]) -> ([e,y],[boy(y), chase(e), subj(e,x), obj(e,y)]))])+([e,x],[PRO(x), run(e), subj(e,x)])').simplify().normalize().normalize())
-([e02,z5],[(([z3],[dog(z3)]) -> ([e01,z4],[boy(z4), chase(e01), subj(e01,z3), obj(e01,z4)])), PRO(z5), run(e02), subj(e02,z5)])
-
-
-
-
-

fol()

-
-
->>> print(dexpr(r'([x,y],[sees(x,y)])').fol())
-exists x y.sees(x,y)
->>> print(dexpr(r'([x],[man(x), walks(x)])').fol())
-exists x.(man(x) & walks(x))
->>> print(dexpr(r'\x.([],[man(x), walks(x)])').fol())
-\x.(man(x) & walks(x))
->>> print(dexpr(r'\x y.([],[sees(x,y)])').fol())
-\x y.sees(x,y)
-
-
->>> print(dexpr(r'\x.([],[walks(x)])(john)').fol())
-\x.walks(x)(john)
->>> print(dexpr(r'\R x.([],[big(x,R)])(\y.([],[mouse(y)]))').fol())
-(\R x.big(x,R))(\y.mouse(y))
-
-
->>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))').fol())
-(exists x.walks(x) & exists y.runs(y))
-
-
->>> print(dexpr(r'(([],[walks(x)]) -> ([],[runs(x)]))').fol())
-(walks(x) -> runs(x))
-
-
->>> print(dexpr(r'([x],[PRO(x), sees(John,x)])').fol())
-exists x.(PRO(x) & sees(John,x))
->>> print(dexpr(r'([x],[man(x), -([],[walks(x)])])').fol())
-exists x.(man(x) & -walks(x))
->>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])').fol())
-all x.(man(x) -> walks(x))
-
-
->>> print(dexpr(r'([x],[man(x) | walks(x)])').fol())
-exists x.(man(x) | walks(x))
->>> print(dexpr(r'P(x) + ([x],[walks(x)])').fol())
-(P(x) & exists x.walks(x))
-
-
-
-
-

resolve_anaphora()

-
-
->>> from nltk.sem.drt import AnaphoraResolutionException
-
-
->>> print(resolve_anaphora(dexpr(r'([x,y,z],[dog(x), cat(y), walks(z), PRO(z)])')))
-([x,y,z],[dog(x), cat(y), walks(z), (z = [x,y])])
->>> print(resolve_anaphora(dexpr(r'([],[(([x],[dog(x)]) -> ([y],[walks(y), PRO(y)]))])')))
-([],[(([x],[dog(x)]) -> ([y],[walks(y), (y = x)]))])
->>> print(resolve_anaphora(dexpr(r'(([x,y],[]) + ([],[PRO(x)]))')).simplify())
-([x,y],[(x = y)])
->>> try: print(resolve_anaphora(dexpr(r'([x],[walks(x), PRO(x)])')))
-... except AnaphoraResolutionException as e: print(e)
-Variable 'x' does not resolve to anything.
->>> print(resolve_anaphora(dexpr('([e01,z6,z7],[boy(z6), PRO(z7), run(e01), subj(e01,z7)])')))
-([e01,z6,z7],[boy(z6), (z7 = z6), run(e01), subj(e01,z7)])
-
-
-
-
-

equiv():

-
-
->>> a = dexpr(r'([x],[man(x), walks(x)])')
->>> b = dexpr(r'([x],[walks(x), man(x)])')
->>> print(a.equiv(b, TableauProver()))
-True
-
-
-
-
-

replace():

-
-
->>> a = dexpr(r'a')
->>> w = dexpr(r'w')
->>> x = dexpr(r'x')
->>> y = dexpr(r'y')
->>> z = dexpr(r'z')
-
-
-
-
-

replace bound

-
-
->>> print(dexpr(r'([x],[give(x,y,z)])').replace(x.variable, a, False))
-([x],[give(x,y,z)])
->>> print(dexpr(r'([x],[give(x,y,z)])').replace(x.variable, a, True))
-([a],[give(a,y,z)])
-
-
-
-
-

replace unbound

-
-
->>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, a, False))
-([x],[give(x,a,z)])
->>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, a, True))
-([x],[give(x,a,z)])
-
-
-
-
-

replace unbound with bound

-
-
->>> dexpr(r'([x],[give(x,y,z)])').replace(y.variable, x, False) == \
-... dexpr('([z1],[give(z1,x,z)])')
-True
->>> dexpr(r'([x],[give(x,y,z)])').replace(y.variable, x, True) == \
-... dexpr('([z1],[give(z1,x,z)])')
-True
-
-
-
-
-

replace unbound with unbound

-
-
->>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, z, False))
-([x],[give(x,z,z)])
->>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, z, True))
-([x],[give(x,z,z)])
-
-
-
-
-

replace unbound

-
-
->>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, False))
-(([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
->>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, True))
-(([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
-
-
-
-
-

replace bound

-
-
->>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(x.variable, a, False))
-(([x],[P(x,y,z)]) + ([y],[Q(x,y,z)]))
->>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(x.variable, a, True))
-(([a],[P(a,y,z)]) + ([y],[Q(a,y,z)]))
-
-
-
-
-

replace unbound with unbound

-
-
->>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, False))
-(([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
->>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, True))
-(([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
-
-
-
-
-

replace unbound with bound on same side

-
-
->>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(z.variable, x, False) == \
-... dexpr(r'(([z1],[P(z1,y,x)]) + ([y],[Q(z1,y,w)]))')
-True
->>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(z.variable, x, True) == \
-... dexpr(r'(([z1],[P(z1,y,x)]) + ([y],[Q(z1,y,w)]))')
-True
-
-
-
-
-

replace unbound with bound on other side

-
-
->>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(w.variable, x, False) == \
-... dexpr(r'(([z1],[P(z1,y,z)]) + ([y],[Q(z1,y,x)]))')
-True
->>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(w.variable, x, True) == \
-... dexpr(r'(([z1],[P(z1,y,z)]) + ([y],[Q(z1,y,x)]))')
-True
-
-
-
-
-

replace unbound with double bound

-
-
->>> dexpr(r'([x],[P(x,y,z)])+([x],[Q(x,y,w)])').replace(z.variable, x, False) == \
-... dexpr(r'(([z1],[P(z1,y,x)]) + ([z1],[Q(z1,y,w)]))')
-True
->>> dexpr(r'([x],[P(x,y,z)])+([x],[Q(x,y,w)])').replace(z.variable, x, True) == \
-... dexpr(r'(([z1],[P(z1,y,x)]) + ([z1],[Q(z1,y,w)]))')
-True
-
-
-
-
-

regression tests

-
-
->>> d = dexpr('([x],[A(c), ([y],[B(x,y,z,a)])->([z],[C(x,y,z,a)])])')
->>> print(d)
-([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
->>> print(d.pretty_format())
- ____________________________________
-| x                                  |
-|------------------------------------|
-| A(c)                               |
-|   ____________      ____________   |
-|  | y          |    | z          |  |
-| (|------------| -> |------------|) |
-|  | B(x,y,z,a) |    | C(x,y,z,a) |  |
-|  |____________|    |____________|  |
-|____________________________________|
->>> print(str(d))
-([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
->>> print(d.fol())
-exists x.(A(c) & all y.(B(x,y,z,a) -> exists z.C(x,y,z,a)))
->>> print(d.replace(Variable('a'), DrtVariableExpression(Variable('r'))))
-([x],[A(c), (([y],[B(x,y,z,r)]) -> ([z],[C(x,y,z,r)]))])
->>> print(d.replace(Variable('x'), DrtVariableExpression(Variable('r'))))
-([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
->>> print(d.replace(Variable('y'), DrtVariableExpression(Variable('r'))))
-([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
->>> print(d.replace(Variable('z'), DrtVariableExpression(Variable('r'))))
-([x],[A(c), (([y],[B(x,y,r,a)]) -> ([z],[C(x,y,z,a)]))])
->>> print(d.replace(Variable('x'), DrtVariableExpression(Variable('r')), True))
-([r],[A(c), (([y],[B(r,y,z,a)]) -> ([z],[C(r,y,z,a)]))])
->>> print(d.replace(Variable('y'), DrtVariableExpression(Variable('r')), True))
-([x],[A(c), (([r],[B(x,r,z,a)]) -> ([z],[C(x,r,z,a)]))])
->>> print(d.replace(Variable('z'), DrtVariableExpression(Variable('r')), True))
-([x],[A(c), (([y],[B(x,y,r,a)]) -> ([r],[C(x,y,r,a)]))])
->>> print(d == dexpr('([l],[A(c), ([m],[B(l,m,z,a)])->([n],[C(l,m,n,a)])])'))
-True
->>> d = dexpr('([],[([x,y],[B(x,y,h), ([a,b],[dee(x,a,g)])])->([z,w],[cee(x,y,f), ([c,d],[E(x,c,d,e)])])])')
->>> sorted(d.free())
-[Variable('B'), Variable('E'), Variable('e'), Variable('f'), Variable('g'), Variable('h')]
->>> sorted(d.variables())
-[Variable('B'), Variable('E'), Variable('e'), Variable('f'), Variable('g'), Variable('h')]
->>> sorted(d.get_refs(True))
-[Variable('a'), Variable('b'), Variable('c'), Variable('d'), Variable('w'), Variable('x'), Variable('y'), Variable('z')]
->>> sorted(d.conds[0].get_refs(False))
-[Variable('x'), Variable('y')]
->>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)])->([],[C(x,y)]), ([x,y],[D(x,y)])->([],[E(x,y)]), ([],[F(x,y)])->([x,y],[G(x,y)])])').eliminate_equality())
-([x],[A(x,x), (([],[B(x,x)]) -> ([],[C(x,x)])), (([x,y],[D(x,y)]) -> ([],[E(x,y)])), (([],[F(x,x)]) -> ([x,y],[G(x,y)]))])
->>> print(dexpr('([x,y],[A(x,y), (x=y)]) -> ([],[B(x,y)])').eliminate_equality())
-(([x],[A(x,x)]) -> ([],[B(x,x)]))
->>> print(dexpr('([x,y],[A(x,y)]) -> ([],[B(x,y), (x=y)])').eliminate_equality())
-(([x,y],[A(x,y)]) -> ([],[B(x,x)]))
->>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)])])').eliminate_equality())
-([x],[A(x,x), ([],[B(x,x)])])
->>> print(dexpr('([x,y],[A(x,y), ([],[B(x,y), (x=y)])])').eliminate_equality())
-([x,y],[A(x,y), ([],[B(x,x)])])
->>> print(dexpr('([z8 z9 z10],[A(z8), z8=z10, z9=z10, B(z9), C(z10), D(z10)])').eliminate_equality())
-([z9],[A(z9), B(z9), C(z9), D(z9)])
-
-
->>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)]), ([x,y],[C(x,y)])])').eliminate_equality())
-([x],[A(x,x), ([],[B(x,x)]), ([x,y],[C(x,y)])])
->>> print(dexpr('([x,y],[A(x,y)]) + ([],[B(x,y), (x=y)]) + ([],[C(x,y)])').eliminate_equality())
-([x],[A(x,x), B(x,x), C(x,x)])
->>> print(dexpr('([x,y],[B(x,y)])+([x,y],[C(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))))
-(([x,y],[B(x,y)]) + ([x,y],[C(x,y)]))
->>> print(dexpr('(([x,y],[B(x,y)])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))))
-(([x,y],[B(x,y)]) + ([],[C(x,y)]) + ([],[D(x,y)]))
->>> print(dexpr('(([],[B(x,y)])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))))
-(([],[B(x,x)]) + ([],[C(x,x)]) + ([],[D(x,x)]))
->>> print(dexpr('(([],[B(x,y), ([x,y],[A(x,y)])])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))).normalize())
-(([],[B(z3,z1), ([z2,z3],[A(z3,z2)])]) + ([],[C(z3,z1)]) + ([],[D(z3,z1)]))
-
-
-
-
-
-

Parse errors

-
-
->>> def parse_error(drtstring):
-...     try: dexpr(drtstring)
-...     except logic.LogicalExpressionException as e: print(e)
-
-
->>> parse_error(r'')
-End of input found.  Expression expected.
-<BLANKLINE>
-^
->>> parse_error(r'(')
-End of input found.  Expression expected.
-(
- ^
->>> parse_error(r'()')
-Unexpected token: ')'.  Expression expected.
-()
- ^
->>> parse_error(r'([')
-End of input found.  Expected token ']'.
-([
-  ^
->>> parse_error(r'([,')
-',' is an illegal variable name.  Constants may not be quantified.
-([,
-  ^
->>> parse_error(r'([x,')
-End of input found.  Variable expected.
-([x,
-    ^
->>> parse_error(r'([]')
-End of input found.  Expected token '['.
-([]
-   ^
->>> parse_error(r'([][')
-End of input found.  Expected token ']'.
-([][
-    ^
->>> parse_error(r'([][,')
-Unexpected token: ','.  Expression expected.
-([][,
-    ^
->>> parse_error(r'([][]')
-End of input found.  Expected token ')'.
-([][]
-     ^
->>> parse_error(r'([x][man(x)]) |')
-End of input found.  Expression expected.
-([x][man(x)]) |
-               ^
-
-
-
-
-

Pretty Printing

-
-
->>> dexpr(r"([],[])").pretty_print()
- __
-|  |
-|--|
-|__|
-
-
->>> dexpr(r"([],[([x],[big(x), dog(x)]) -> ([],[bark(x)]) -([x],[walk(x)])])").pretty_print()
- _____________________________
-|                             |
-|-----------------------------|
-|   ________      _________   |
-|  | x      |    |         |  |
-| (|--------| -> |---------|) |
-|  | big(x) |    | bark(x) |  |
-|  | dog(x) |    |_________|  |
-|  |________|                 |
-|      _________              |
-|     | x       |             |
-| __  |---------|             |
-|   | | walk(x) |             |
-|     |_________|             |
-|_____________________________|
-
-
->>> dexpr(r"([x,y],[x=y]) + ([z],[dog(z), walk(z)])").pretty_print()
-  _________     _________
- | x y     |   | z       |
-(|---------| + |---------|)
- | (x = y) |   | dog(z)  |
- |_________|   | walk(z) |
-               |_________|
-
-
->>> dexpr(r"([],[([x],[]) | ([y],[]) | ([z],[dog(z), walk(z)])])").pretty_print()
- _______________________________
-|                               |
-|-------------------------------|
-|   ___     ___     _________   |
-|  | x |   | y |   | z       |  |
-| (|---| | |---| | |---------|) |
-|  |___|   |___|   | dog(z)  |  |
-|                  | walk(z) |  |
-|                  |_________|  |
-|_______________________________|
-
-
->>> dexpr(r"\P.\Q.(([x],[]) + P(x) + Q(x))(\x.([],[dog(x)]))").pretty_print()
-          ___                        ________
- \       | x |                 \    |        |
- /\ P Q.(|---| + P(x) + Q(x))( /\ x.|--------|)
-         |___|                      | dog(x) |
-                                    |________|
-
-
+logic parser needs be passed as a parameter to load_earley()

+
>>> from nltk.parse import load_parser
+>>> from nltk.sem.drt import DrtParser
+>>> parser = load_parser('grammars/book_grammars/drt.fcfg', trace=0, logic_parser=DrtParser())
+>>> for tree in parser.parse('a dog barks'.split()):
+...     print(tree.label()['SEM'].simplify())
+...
+([x],[dog(x), bark(x)])
+
+
+

Alternatively, a FeatStructReader can be passed with the logic_parser set on it

+
>>> from nltk.featstruct import FeatStructReader
+>>> from nltk.grammar import FeatStructNonterminal
+>>> parser = load_parser('grammars/book_grammars/drt.fcfg', trace=0, fstruct_reader=FeatStructReader(fdict_class=FeatStructNonterminal, logic_parser=DrtParser()))
+>>> for tree in parser.parse('every girl chases a dog'.split()):
+...     print(tree.label()['SEM'].simplify().normalize())
+...
+([],[(([z1],[girl(z1)]) -> ([z2],[dog(z2), chase(z1,z2)]))])
+
+
+ + +
+

Unit Tests

+
+

Parser

+
>>> print(dexpr(r'([x,y],[sees(x,y)])'))
+([x,y],[sees(x,y)])
+>>> print(dexpr(r'([x],[man(x), walks(x)])'))
+([x],[man(x), walks(x)])
+>>> print(dexpr(r'\x.([],[man(x), walks(x)])'))
+\x.([],[man(x), walks(x)])
+>>> print(dexpr(r'\x.\y.([],[sees(x,y)])'))
+\x y.([],[sees(x,y)])
+
+
+
>>> print(dexpr(r'([x,y],[(x = y)])'))
+([x,y],[(x = y)])
+>>> print(dexpr(r'([x,y],[(x != y)])'))
+([x,y],[-(x = y)])
+
+
+
>>> print(dexpr(r'\x.([],[walks(x)])(john)'))
+(\x.([],[walks(x)]))(john)
+>>> print(dexpr(r'\R.\x.([],[big(x,R)])(\y.([],[mouse(y)]))'))
+(\R x.([],[big(x,R)]))(\y.([],[mouse(y)]))
+
+
+
>>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))'))
+(([x],[walks(x)]) + ([y],[runs(y)]))
+>>> print(dexpr(r'(([x,y],[walks(x), jumps(y)]) + (([z],[twos(z)]) + ([w],[runs(w)])))'))
+(([x,y],[walks(x), jumps(y)]) + ([z],[twos(z)]) + ([w],[runs(w)]))
+>>> print(dexpr(r'((([],[walks(x)]) + ([],[twos(x)])) + ([],[runs(x)]))'))
+(([],[walks(x)]) + ([],[twos(x)]) + ([],[runs(x)]))
+>>> print(dexpr(r'((([],[walks(x)]) + ([],[runs(x)])) + (([],[threes(x)]) + ([],[fours(x)])))'))
+(([],[walks(x)]) + ([],[runs(x)]) + ([],[threes(x)]) + ([],[fours(x)]))
+
+
+
>>> print(dexpr(r'(([],[walks(x)]) -> ([],[runs(x)]))'))
+(([],[walks(x)]) -> ([],[runs(x)]))
+
+
+
>>> print(dexpr(r'([x],[PRO(x), sees(John,x)])'))
+([x],[PRO(x), sees(John,x)])
+>>> print(dexpr(r'([x],[man(x), -([],[walks(x)])])'))
+([x],[man(x), -([],[walks(x)])])
+>>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])'))
+([],[(([x],[man(x)]) -> ([],[walks(x)]))])
+
+
+
>>> print(dexpr(r'DRS([x],[walk(x)])'))
+([x],[walk(x)])
+>>> print(dexpr(r'DRS([x][walk(x)])'))
+([x],[walk(x)])
+>>> print(dexpr(r'([x][walk(x)])'))
+([x],[walk(x)])
+
+
+
+
+

simplify()

+
>>> print(dexpr(r'\x.([],[man(x), walks(x)])(john)').simplify())
+([],[man(john), walks(john)])
+>>> print(dexpr(r'\x.\y.([z],[dog(z),sees(x,y)])(john)(mary)').simplify())
+([z],[dog(z), sees(john,mary)])
+>>> print(dexpr(r'\R x.([],[big(x,R)])(\y.([],[mouse(y)]))').simplify())
+\x.([],[big(x,\y.([],[mouse(y)]))])
+
+
+
>>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))').simplify())
+([x,y],[walks(x), runs(y)])
+>>> print(dexpr(r'(([x,y],[walks(x), jumps(y)]) + (([z],[twos(z)]) + ([w],[runs(w)])))').simplify())
+([w,x,y,z],[walks(x), jumps(y), twos(z), runs(w)])
+>>> print(dexpr(r'((([],[walks(x)]) + ([],[runs(x)]) + ([],[threes(x)]) + ([],[fours(x)])))').simplify())
+([],[walks(x), runs(x), threes(x), fours(x)])
+>>> dexpr(r'([x],[man(x)])+([x],[walks(x)])').simplify() == \
+... dexpr(r'([x,z1],[man(x), walks(z1)])')
+True
+>>> dexpr(r'([y],[boy(y), (([x],[dog(x)]) -> ([],[chase(x,y)]))])+([x],[run(x)])').simplify() == \
+... dexpr(r'([y,z1],[boy(y), (([x],[dog(x)]) -> ([],[chase(x,y)])), run(z1)])')
+True
+
+
+
>>> dexpr(r'\Q.(([x],[john(x),walks(x)]) + Q)(([x],[PRO(x),leaves(x)]))').simplify() == \
+... dexpr(r'([x,z1],[john(x), walks(x), PRO(z1), leaves(z1)])')
+True
+
+
>>> logic._counter._value = 0
+>>> print(dexpr('([],[(([x],[dog(x)]) -> ([e,y],[boy(y), chase(e), subj(e,x), obj(e,y)]))])+([e,x],[PRO(x), run(e), subj(e,x)])').simplify().normalize().normalize())
+([e02,z5],[(([z3],[dog(z3)]) -> ([e01,z4],[boy(z4), chase(e01), subj(e01,z3), obj(e01,z4)])), PRO(z5), run(e02), subj(e02,z5)])
+
+
+
+

fol()

+
>>> print(dexpr(r'([x,y],[sees(x,y)])').fol())
+exists x y.sees(x,y)
+>>> print(dexpr(r'([x],[man(x), walks(x)])').fol())
+exists x.(man(x) & walks(x))
+>>> print(dexpr(r'\x.([],[man(x), walks(x)])').fol())
+\x.(man(x) & walks(x))
+>>> print(dexpr(r'\x y.([],[sees(x,y)])').fol())
+\x y.sees(x,y)
+
+
+
>>> print(dexpr(r'\x.([],[walks(x)])(john)').fol())
+\x.walks(x)(john)
+>>> print(dexpr(r'\R x.([],[big(x,R)])(\y.([],[mouse(y)]))').fol())
+(\R x.big(x,R))(\y.mouse(y))
+
+
+
>>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))').fol())
+(exists x.walks(x) & exists y.runs(y))
+
+
+
>>> print(dexpr(r'(([],[walks(x)]) -> ([],[runs(x)]))').fol())
+(walks(x) -> runs(x))
+
+
+
>>> print(dexpr(r'([x],[PRO(x), sees(John,x)])').fol())
+exists x.(PRO(x) & sees(John,x))
+>>> print(dexpr(r'([x],[man(x), -([],[walks(x)])])').fol())
+exists x.(man(x) & -walks(x))
+>>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])').fol())
+all x.(man(x) -> walks(x))
+
+
+
>>> print(dexpr(r'([x],[man(x) | walks(x)])').fol())
+exists x.(man(x) | walks(x))
+>>> print(dexpr(r'P(x) + ([x],[walks(x)])').fol())
+(P(x) & exists x.walks(x))
+
+
+
+
+

resolve_anaphora()

+
>>> from nltk.sem.drt import AnaphoraResolutionException
+
+
+
>>> print(resolve_anaphora(dexpr(r'([x,y,z],[dog(x), cat(y), walks(z), PRO(z)])')))
+([x,y,z],[dog(x), cat(y), walks(z), (z = [x,y])])
+>>> print(resolve_anaphora(dexpr(r'([],[(([x],[dog(x)]) -> ([y],[walks(y), PRO(y)]))])')))
+([],[(([x],[dog(x)]) -> ([y],[walks(y), (y = x)]))])
+>>> print(resolve_anaphora(dexpr(r'(([x,y],[]) + ([],[PRO(x)]))')).simplify())
+([x,y],[(x = y)])
+>>> try: print(resolve_anaphora(dexpr(r'([x],[walks(x), PRO(x)])')))
+... except AnaphoraResolutionException as e: print(e)
+Variable 'x' does not resolve to anything.
+>>> print(resolve_anaphora(dexpr('([e01,z6,z7],[boy(z6), PRO(z7), run(e01), subj(e01,z7)])')))
+([e01,z6,z7],[boy(z6), (z7 = z6), run(e01), subj(e01,z7)])
+
+
+
+
+

equiv():

+
>>> a = dexpr(r'([x],[man(x), walks(x)])')
+>>> b = dexpr(r'([x],[walks(x), man(x)])')
+>>> print(a.equiv(b, TableauProver()))
+True
+
+
+
+
+

replace():

+
>>> a = dexpr(r'a')
+>>> w = dexpr(r'w')
+>>> x = dexpr(r'x')
+>>> y = dexpr(r'y')
+>>> z = dexpr(r'z')
+
+
+
+
+

replace bound

+
>>> print(dexpr(r'([x],[give(x,y,z)])').replace(x.variable, a, False))
+([x],[give(x,y,z)])
+>>> print(dexpr(r'([x],[give(x,y,z)])').replace(x.variable, a, True))
+([a],[give(a,y,z)])
+
+
+
+
+

replace unbound

+
>>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, a, False))
+([x],[give(x,a,z)])
+>>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, a, True))
+([x],[give(x,a,z)])
+
+
+
+
+

replace unbound with bound

+
>>> dexpr(r'([x],[give(x,y,z)])').replace(y.variable, x, False) == \
+... dexpr('([z1],[give(z1,x,z)])')
+True
+>>> dexpr(r'([x],[give(x,y,z)])').replace(y.variable, x, True) == \
+... dexpr('([z1],[give(z1,x,z)])')
+True
+
+
+
+
+

replace unbound with unbound

+
>>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, z, False))
+([x],[give(x,z,z)])
+>>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, z, True))
+([x],[give(x,z,z)])
+
+
+
+
+

replace unbound

+
>>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, False))
+(([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
+>>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, True))
+(([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
+
+
+
+
+

replace bound

+
>>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(x.variable, a, False))
+(([x],[P(x,y,z)]) + ([y],[Q(x,y,z)]))
+>>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(x.variable, a, True))
+(([a],[P(a,y,z)]) + ([y],[Q(a,y,z)]))
+
+
+
+
+

replace unbound with unbound

+
>>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, False))
+(([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
+>>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, True))
+(([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
+
+
+
+
+

replace unbound with bound on same side

+
>>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(z.variable, x, False) == \
+... dexpr(r'(([z1],[P(z1,y,x)]) + ([y],[Q(z1,y,w)]))')
+True
+>>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(z.variable, x, True) == \
+... dexpr(r'(([z1],[P(z1,y,x)]) + ([y],[Q(z1,y,w)]))')
+True
+
+
+
+
+

replace unbound with bound on other side

+
>>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(w.variable, x, False) == \
+... dexpr(r'(([z1],[P(z1,y,z)]) + ([y],[Q(z1,y,x)]))')
+True
+>>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(w.variable, x, True) == \
+... dexpr(r'(([z1],[P(z1,y,z)]) + ([y],[Q(z1,y,x)]))')
+True
+
+
+
+
+

replace unbound with double bound

+
>>> dexpr(r'([x],[P(x,y,z)])+([x],[Q(x,y,w)])').replace(z.variable, x, False) == \
+... dexpr(r'(([z1],[P(z1,y,x)]) + ([z1],[Q(z1,y,w)]))')
+True
+>>> dexpr(r'([x],[P(x,y,z)])+([x],[Q(x,y,w)])').replace(z.variable, x, True) == \
+... dexpr(r'(([z1],[P(z1,y,x)]) + ([z1],[Q(z1,y,w)]))')
+True
+
+
+
+
+

regression tests

+
>>> d = dexpr('([x],[A(c), ([y],[B(x,y,z,a)])->([z],[C(x,y,z,a)])])')
+>>> print(d)
+([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
+>>> print(d.pretty_format())
+ ____________________________________
+| x                                  |
+|------------------------------------|
+| A(c)                               |
+|   ____________      ____________   |
+|  | y          |    | z          |  |
+| (|------------| -> |------------|) |
+|  | B(x,y,z,a) |    | C(x,y,z,a) |  |
+|  |____________|    |____________|  |
+|____________________________________|
+>>> print(str(d))
+([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
+>>> print(d.fol())
+exists x.(A(c) & all y.(B(x,y,z,a) -> exists z.C(x,y,z,a)))
+>>> print(d.replace(Variable('a'), DrtVariableExpression(Variable('r'))))
+([x],[A(c), (([y],[B(x,y,z,r)]) -> ([z],[C(x,y,z,r)]))])
+>>> print(d.replace(Variable('x'), DrtVariableExpression(Variable('r'))))
+([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
+>>> print(d.replace(Variable('y'), DrtVariableExpression(Variable('r'))))
+([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
+>>> print(d.replace(Variable('z'), DrtVariableExpression(Variable('r'))))
+([x],[A(c), (([y],[B(x,y,r,a)]) -> ([z],[C(x,y,z,a)]))])
+>>> print(d.replace(Variable('x'), DrtVariableExpression(Variable('r')), True))
+([r],[A(c), (([y],[B(r,y,z,a)]) -> ([z],[C(r,y,z,a)]))])
+>>> print(d.replace(Variable('y'), DrtVariableExpression(Variable('r')), True))
+([x],[A(c), (([r],[B(x,r,z,a)]) -> ([z],[C(x,r,z,a)]))])
+>>> print(d.replace(Variable('z'), DrtVariableExpression(Variable('r')), True))
+([x],[A(c), (([y],[B(x,y,r,a)]) -> ([r],[C(x,y,r,a)]))])
+>>> print(d == dexpr('([l],[A(c), ([m],[B(l,m,z,a)])->([n],[C(l,m,n,a)])])'))
+True
+>>> d = dexpr('([],[([x,y],[B(x,y,h), ([a,b],[dee(x,a,g)])])->([z,w],[cee(x,y,f), ([c,d],[E(x,c,d,e)])])])')
+>>> sorted(d.free())
+[Variable('B'), Variable('E'), Variable('e'), Variable('f'), Variable('g'), Variable('h')]
+>>> sorted(d.variables())
+[Variable('B'), Variable('E'), Variable('e'), Variable('f'), Variable('g'), Variable('h')]
+>>> sorted(d.get_refs(True))
+[Variable('a'), Variable('b'), Variable('c'), Variable('d'), Variable('w'), Variable('x'), Variable('y'), Variable('z')]
+>>> sorted(d.conds[0].get_refs(False))
+[Variable('x'), Variable('y')]
+>>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)])->([],[C(x,y)]), ([x,y],[D(x,y)])->([],[E(x,y)]), ([],[F(x,y)])->([x,y],[G(x,y)])])').eliminate_equality())
+([x],[A(x,x), (([],[B(x,x)]) -> ([],[C(x,x)])), (([x,y],[D(x,y)]) -> ([],[E(x,y)])), (([],[F(x,x)]) -> ([x,y],[G(x,y)]))])
+>>> print(dexpr('([x,y],[A(x,y), (x=y)]) -> ([],[B(x,y)])').eliminate_equality())
+(([x],[A(x,x)]) -> ([],[B(x,x)]))
+>>> print(dexpr('([x,y],[A(x,y)]) -> ([],[B(x,y), (x=y)])').eliminate_equality())
+(([x,y],[A(x,y)]) -> ([],[B(x,x)]))
+>>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)])])').eliminate_equality())
+([x],[A(x,x), ([],[B(x,x)])])
+>>> print(dexpr('([x,y],[A(x,y), ([],[B(x,y), (x=y)])])').eliminate_equality())
+([x,y],[A(x,y), ([],[B(x,x)])])
+>>> print(dexpr('([z8 z9 z10],[A(z8), z8=z10, z9=z10, B(z9), C(z10), D(z10)])').eliminate_equality())
+([z9],[A(z9), B(z9), C(z9), D(z9)])
+
+
+
>>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)]), ([x,y],[C(x,y)])])').eliminate_equality())
+([x],[A(x,x), ([],[B(x,x)]), ([x,y],[C(x,y)])])
+>>> print(dexpr('([x,y],[A(x,y)]) + ([],[B(x,y), (x=y)]) + ([],[C(x,y)])').eliminate_equality())
+([x],[A(x,x), B(x,x), C(x,x)])
+>>> print(dexpr('([x,y],[B(x,y)])+([x,y],[C(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))))
+(([x,y],[B(x,y)]) + ([x,y],[C(x,y)]))
+>>> print(dexpr('(([x,y],[B(x,y)])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))))
+(([x,y],[B(x,y)]) + ([],[C(x,y)]) + ([],[D(x,y)]))
+>>> print(dexpr('(([],[B(x,y)])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))))
+(([],[B(x,x)]) + ([],[C(x,x)]) + ([],[D(x,x)]))
+>>> print(dexpr('(([],[B(x,y), ([x,y],[A(x,y)])])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))).normalize())
+(([],[B(z3,z1), ([z2,z3],[A(z3,z2)])]) + ([],[C(z3,z1)]) + ([],[D(z3,z1)]))
+
+
+
+
+
+

Parse errors

+
>>> def parse_error(drtstring):
+...     try: dexpr(drtstring)
+...     except logic.LogicalExpressionException as e: print(e)
+
+
+
>>> parse_error(r'')
+End of input found.  Expression expected.
+
+^
+>>> parse_error(r'(')
+End of input found.  Expression expected.
+(
+ ^
+>>> parse_error(r'()')
+Unexpected token: ')'.  Expression expected.
+()
+ ^
+>>> parse_error(r'([')
+End of input found.  Expected token ']'.
+([
+  ^
+>>> parse_error(r'([,')
+',' is an illegal variable name.  Constants may not be quantified.
+([,
+  ^
+>>> parse_error(r'([x,')
+End of input found.  Variable expected.
+([x,
+    ^
+>>> parse_error(r'([]')
+End of input found.  Expected token '['.
+([]
+   ^
+>>> parse_error(r'([][')
+End of input found.  Expected token ']'.
+([][
+    ^
+>>> parse_error(r'([][,')
+Unexpected token: ','.  Expression expected.
+([][,
+    ^
+>>> parse_error(r'([][]')
+End of input found.  Expected token ')'.
+([][]
+     ^
+>>> parse_error(r'([x][man(x)]) |')
+End of input found.  Expression expected.
+([x][man(x)]) |
+               ^
+
+
+
+
+

Pretty Printing

+
>>> dexpr(r"([],[])").pretty_print()
+ __
+|  |
+|--|
+|__|
+
+
+
>>> dexpr(r"([],[([x],[big(x), dog(x)]) -> ([],[bark(x)]) -([x],[walk(x)])])").pretty_print()
+ _____________________________
+|                             |
+|-----------------------------|
+|   ________      _________   |
+|  | x      |    |         |  |
+| (|--------| -> |---------|) |
+|  | big(x) |    | bark(x) |  |
+|  | dog(x) |    |_________|  |
+|  |________|                 |
+|      _________              |
+|     | x       |             |
+| __  |---------|             |
+|   | | walk(x) |             |
+|     |_________|             |
+|_____________________________|
+
+
+
>>> dexpr(r"([x,y],[x=y]) + ([z],[dog(z), walk(z)])").pretty_print()
+  _________     _________
+ | x y     |   | z       |
+(|---------| + |---------|)
+ | (x = y) |   | dog(z)  |
+ |_________|   | walk(z) |
+               |_________|
+
+
+
>>> dexpr(r"([],[([x],[]) | ([y],[]) | ([z],[dog(z), walk(z)])])").pretty_print()
+ _______________________________
+|                               |
+|-------------------------------|
+|   ___     ___     _________   |
+|  | x |   | y |   | z       |  |
+| (|---| | |---| | |---------|) |
+|  |___|   |___|   | dog(z)  |  |
+|                  | walk(z) |  |
+|                  |_________|  |
+|_______________________________|
+
+
+
>>> dexpr(r"\P.\Q.(([x],[]) + P(x) + Q(x))(\x.([],[dog(x)]))").pretty_print()
+          ___                        ________
+ \       | x |                 \    |        |
+ /\ P Q.(|---| + P(x) + Q(x))( /\ x.|--------|)
+         |___|                      | dog(x) |
+                                    |________|
+
+
+
+ + + + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/featgram.html b/howto/featgram.html index 99abbf7ff..4beb0d1e9 100644 --- a/howto/featgram.html +++ b/howto/featgram.html @@ -1,484 +1,200 @@ - - - + - - -Feature Grammar Parsing - + + + + + + + NLTK :: Sample usage for featgram + + + + + + + + + + + + + + -
-

Feature Grammar Parsing

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + +
+ +
+
+ +
+

Sample usage for featgram

+
+

Feature Grammar Parsing

Grammars can be parsed from strings.

-
-
->>> from __future__ import print_function
->>> import nltk
->>> from nltk import grammar, parse
->>> g = """
-... % start DP
-... DP[AGR=?a] -> D[AGR=?a] N[AGR=?a]
-... D[AGR=[NUM='sg', PERS=3]] -> 'this' | 'that'
-... D[AGR=[NUM='pl', PERS=3]] -> 'these' | 'those'
-... D[AGR=[NUM='pl', PERS=1]] -> 'we'
-... D[AGR=[PERS=2]] -> 'you'
-... N[AGR=[NUM='sg', GND='m']] -> 'boy'
-... N[AGR=[NUM='pl', GND='m']] -> 'boys'
-... N[AGR=[NUM='sg', GND='f']] -> 'girl'
-... N[AGR=[NUM='pl', GND='f']] -> 'girls'
-... N[AGR=[NUM='sg']] -> 'student'
-... N[AGR=[NUM='pl']] -> 'students'
-... """
->>> grammar = grammar.FeatureGrammar.fromstring(g)
->>> tokens = 'these girls'.split()
->>> parser = parse.FeatureEarleyChartParser(grammar)
->>> trees = parser.parse(tokens)
->>> for tree in trees: print(tree)
-(DP[AGR=[GND='f', NUM='pl', PERS=3]]
-  (D[AGR=[NUM='pl', PERS=3]] these)
-  (N[AGR=[GND='f', NUM='pl']] girls))
-
-
+
>>> import nltk
+>>> from nltk import grammar, parse
+>>> g = """
+... % start DP
+... DP[AGR=?a] -> D[AGR=?a] N[AGR=?a]
+... D[AGR=[NUM='sg', PERS=3]] -> 'this' | 'that'
+... D[AGR=[NUM='pl', PERS=3]] -> 'these' | 'those'
+... D[AGR=[NUM='pl', PERS=1]] -> 'we'
+... D[AGR=[PERS=2]] -> 'you'
+... N[AGR=[NUM='sg', GND='m']] -> 'boy'
+... N[AGR=[NUM='pl', GND='m']] -> 'boys'
+... N[AGR=[NUM='sg', GND='f']] -> 'girl'
+... N[AGR=[NUM='pl', GND='f']] -> 'girls'
+... N[AGR=[NUM='sg']] -> 'student'
+... N[AGR=[NUM='pl']] -> 'students'
+... """
+>>> grammar = grammar.FeatureGrammar.fromstring(g)
+>>> tokens = 'these girls'.split()
+>>> parser = parse.FeatureEarleyChartParser(grammar)
+>>> trees = parser.parse(tokens)
+>>> for tree in trees: print(tree)
+(DP[AGR=[GND='f', NUM='pl', PERS=3]]
+  (D[AGR=[NUM='pl', PERS=3]] these)
+  (N[AGR=[GND='f', NUM='pl']] girls))
+
+

In general, when we are trying to develop even a very small grammar, it is convenient to put the rules in a file where they can be edited, -tested and revised. Let's assume that we have saved feat0cfg as a file named -'feat0.fcfg' and placed it in the NLTK data directory. We can +tested and revised. Let’s assume that we have saved feat0cfg as a file named +'feat0.fcfg' and placed it in the NLTK data directory. We can inspect it as follows:

-
-
->>> nltk.data.show_cfg('grammars/book_grammars/feat0.fcfg')
-% start S
-# ###################
-# Grammar Productions
-# ###################
-# S expansion productions
-S -> NP[NUM=?n] VP[NUM=?n]
-# NP expansion productions
-NP[NUM=?n] -> N[NUM=?n]
-NP[NUM=?n] -> PropN[NUM=?n]
-NP[NUM=?n] -> Det[NUM=?n] N[NUM=?n]
-NP[NUM=pl] -> N[NUM=pl]
-# VP expansion productions
-VP[TENSE=?t, NUM=?n] -> IV[TENSE=?t, NUM=?n]
-VP[TENSE=?t, NUM=?n] -> TV[TENSE=?t, NUM=?n] NP
-# ###################
-# Lexical Productions
-# ###################
-Det[NUM=sg] -> 'this' | 'every'
-Det[NUM=pl] -> 'these' | 'all'
-Det -> 'the' | 'some' | 'several'
-PropN[NUM=sg]-> 'Kim' | 'Jody'
-N[NUM=sg] -> 'dog' | 'girl' | 'car' | 'child'
-N[NUM=pl] -> 'dogs' | 'girls' | 'cars' | 'children'
-IV[TENSE=pres,  NUM=sg] -> 'disappears' | 'walks'
-TV[TENSE=pres, NUM=sg] -> 'sees' | 'likes'
-IV[TENSE=pres,  NUM=pl] -> 'disappear' | 'walk'
-TV[TENSE=pres, NUM=pl] -> 'see' | 'like'
-IV[TENSE=past] -> 'disappeared' | 'walked'
-TV[TENSE=past] -> 'saw' | 'liked'
-
-
-

Assuming we have saved feat0cfg as a file named -'feat0.fcfg', the function parse.load_parser allows us to +

>>> nltk.data.show_cfg('grammars/book_grammars/feat0.fcfg')
+% start S
+# ###################
+# Grammar Productions
+# ###################
+# S expansion productions
+S -> NP[NUM=?n] VP[NUM=?n]
+# NP expansion productions
+NP[NUM=?n] -> N[NUM=?n]
+NP[NUM=?n] -> PropN[NUM=?n]
+NP[NUM=?n] -> Det[NUM=?n] N[NUM=?n]
+NP[NUM=pl] -> N[NUM=pl]
+# VP expansion productions
+VP[TENSE=?t, NUM=?n] -> IV[TENSE=?t, NUM=?n]
+VP[TENSE=?t, NUM=?n] -> TV[TENSE=?t, NUM=?n] NP
+# ###################
+# Lexical Productions
+# ###################
+Det[NUM=sg] -> 'this' | 'every'
+Det[NUM=pl] -> 'these' | 'all'
+Det -> 'the' | 'some' | 'several'
+PropN[NUM=sg]-> 'Kim' | 'Jody'
+N[NUM=sg] -> 'dog' | 'girl' | 'car' | 'child'
+N[NUM=pl] -> 'dogs' | 'girls' | 'cars' | 'children'
+IV[TENSE=pres,  NUM=sg] -> 'disappears' | 'walks'
+TV[TENSE=pres, NUM=sg] -> 'sees' | 'likes'
+IV[TENSE=pres,  NUM=pl] -> 'disappear' | 'walk'
+TV[TENSE=pres, NUM=pl] -> 'see' | 'like'
+IV[TENSE=past] -> 'disappeared' | 'walked'
+TV[TENSE=past] -> 'saw' | 'liked'
+
+
+

Assuming we have saved feat0cfg as a file named +'feat0.fcfg', the function parse.load_parser allows us to read the grammar into NLTK, ready for use in parsing.

-
-
->>> cp = parse.load_parser('grammars/book_grammars/feat0.fcfg', trace=1)
->>> sent = 'Kim likes children'
->>> tokens = sent.split()
->>> tokens
-['Kim', 'likes', 'children']
->>> trees = cp.parse(tokens)
-|.Kim .like.chil.|
-|[----]    .    .| [0:1] 'Kim'
-|.    [----]    .| [1:2] 'likes'
-|.    .    [----]| [2:3] 'children'
-|[----]    .    .| [0:1] PropN[NUM='sg'] -> 'Kim' *
-|[----]    .    .| [0:1] NP[NUM='sg'] -> PropN[NUM='sg'] *
-|[---->    .    .| [0:1] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'sg'}
-|.    [----]    .| [1:2] TV[NUM='sg', TENSE='pres'] -> 'likes' *
-|.    [---->    .| [1:2] VP[NUM=?n, TENSE=?t] -> TV[NUM=?n, TENSE=?t] * NP[] {?n: 'sg', ?t: 'pres'}
-|.    .    [----]| [2:3] N[NUM='pl'] -> 'children' *
-|.    .    [----]| [2:3] NP[NUM='pl'] -> N[NUM='pl'] *
-|.    .    [---->| [2:3] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'pl'}
-|.    [---------]| [1:3] VP[NUM='sg', TENSE='pres'] -> TV[NUM='sg', TENSE='pres'] NP[] *
-|[==============]| [0:3] S[] -> NP[NUM='sg'] VP[NUM='sg'] *
->>> for tree in trees: print(tree)
-(S[]
-  (NP[NUM='sg'] (PropN[NUM='sg'] Kim))
-  (VP[NUM='sg', TENSE='pres']
-    (TV[NUM='sg', TENSE='pres'] likes)
-    (NP[NUM='pl'] (N[NUM='pl'] children))))
-
-
+
>>> cp = parse.load_parser('grammars/book_grammars/feat0.fcfg', trace=1)
+>>> sent = 'Kim likes children'
+>>> tokens = sent.split()
+>>> tokens
+['Kim', 'likes', 'children']
+>>> trees = cp.parse(tokens)
+|.Kim .like.chil.|
+|[----]    .    .| [0:1] 'Kim'
+|.    [----]    .| [1:2] 'likes'
+|.    .    [----]| [2:3] 'children'
+|[----]    .    .| [0:1] PropN[NUM='sg'] -> 'Kim' *
+|[----]    .    .| [0:1] NP[NUM='sg'] -> PropN[NUM='sg'] *
+|[---->    .    .| [0:1] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'sg'}
+|.    [----]    .| [1:2] TV[NUM='sg', TENSE='pres'] -> 'likes' *
+|.    [---->    .| [1:2] VP[NUM=?n, TENSE=?t] -> TV[NUM=?n, TENSE=?t] * NP[] {?n: 'sg', ?t: 'pres'}
+|.    .    [----]| [2:3] N[NUM='pl'] -> 'children' *
+|.    .    [----]| [2:3] NP[NUM='pl'] -> N[NUM='pl'] *
+|.    .    [---->| [2:3] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'pl'}
+|.    [---------]| [1:3] VP[NUM='sg', TENSE='pres'] -> TV[NUM='sg', TENSE='pres'] NP[] *
+|[==============]| [0:3] S[] -> NP[NUM='sg'] VP[NUM='sg'] *
+>>> for tree in trees: print(tree)
+(S[]
+  (NP[NUM='sg'] (PropN[NUM='sg'] Kim))
+  (VP[NUM='sg', TENSE='pres']
+    (TV[NUM='sg', TENSE='pres'] likes)
+    (NP[NUM='pl'] (N[NUM='pl'] children))))
+
+

The parser works directly with the underspecified productions given by the grammar. That is, the Predictor rule does not attempt to compile out all admissible feature @@ -486,415 +202,386 @@

Feature Grammar Parsing

side of a production. However, when the Scanner matches an input word against a lexical production that has been predicted, the new edge will typically contain fully specified features; e.g., the edge -[PropN[num = sg] → 'Kim', (0, 1)]. Recall from +[PropN[num = sg] → ‘Kim’, (0, 1)]. Recall from Chapter 8 that the Fundamental (or Completer) Rule in -standard CFGs is used to combine an incomplete edge that's expecting a +standard CFGs is used to combine an incomplete edge that’s expecting a nonterminal B with a following, complete edge whose left hand side matches B. In our current setting, rather than checking for a complete match, we test whether the expected category B will -`unify`:dt: with the left hand side B' of a following complete +`unify`:dt: with the left hand side B’ of a following complete edge. We will explain in more detail in Section 9.2 how unification works; for the moment, it is enough to know that as a result of unification, any variable values of features in B will be instantiated by constant values in the corresponding feature structure -in B', and these instantiated values will be used in the new edge +in B’, and these instantiated values will be used in the new edge added by the Completer. This instantiation can be seen, for example, in the edge [NP [num=sg] → PropN[num=sg] •, (0, 1)] in Example 9.2, where the feature num has been assigned the value sg.

-
-

System Message: ERROR/3 (featgram.doctest, line 109); backlink

-Unknown interpreted text role "dt".
-

Feature structures in NLTK are ... Atomic feature values can be strings or +

Feature structures in NLTK are … Atomic feature values can be strings or integers.

-
-
->>> fs1 = nltk.FeatStruct(TENSE='past', NUM='sg')
->>> print(fs1)
-[ NUM   = 'sg'   ]
-[ TENSE = 'past' ]
-
-
+
>>> fs1 = nltk.FeatStruct(TENSE='past', NUM='sg')
+>>> print(fs1)
+[ NUM   = 'sg'   ]
+[ TENSE = 'past' ]
+
+

We can think of a feature structure as being like a Python dictionary, and access its values by indexing in the usual way.

-
-
->>> fs1 = nltk.FeatStruct(PER=3, NUM='pl', GND='fem')
->>> print(fs1['GND'])
-fem
-
-
+
>>> fs1 = nltk.FeatStruct(PER=3, NUM='pl', GND='fem')
+>>> print(fs1['GND'])
+fem
+
+

We can also define feature structures which have complex values, as discussed earlier.

-
-
->>> fs2 = nltk.FeatStruct(POS='N', AGR=fs1)
->>> print(fs2)
-[       [ GND = 'fem' ] ]
-[ AGR = [ NUM = 'pl'  ] ]
-[       [ PER = 3     ] ]
-[                       ]
-[ POS = 'N'             ]
->>> print(fs2['AGR'])
-[ GND = 'fem' ]
-[ NUM = 'pl'  ]
-[ PER = 3     ]
->>> print(fs2['AGR']['PER'])
-3
-
-
-

Feature structures can also be constructed using the parse() -method of the nltk.FeatStruct class. Note that in this case, atomic +

>>> fs2 = nltk.FeatStruct(POS='N', AGR=fs1)
+>>> print(fs2)
+[       [ GND = 'fem' ] ]
+[ AGR = [ NUM = 'pl'  ] ]
+[       [ PER = 3     ] ]
+[                       ]
+[ POS = 'N'             ]
+>>> print(fs2['AGR'])
+[ GND = 'fem' ]
+[ NUM = 'pl'  ]
+[ PER = 3     ]
+>>> print(fs2['AGR']['PER'])
+3
+
+
+

Feature structures can also be constructed using the parse() +method of the nltk.FeatStruct class. Note that in this case, atomic feature values do not need to be enclosed in quotes.

-
-
->>> f1 = nltk.FeatStruct("[NUMBER = sg]")
->>> f2 = nltk.FeatStruct("[PERSON = 3]")
->>> print(nltk.unify(f1, f2))
-[ NUMBER = 'sg' ]
-[ PERSON = 3    ]
-
-
->>> f1 = nltk.FeatStruct("[A = [B = b, D = d]]")
->>> f2 = nltk.FeatStruct("[A = [C = c, D = d]]")
->>> print(nltk.unify(f1, f2))
-[     [ B = 'b' ] ]
-[ A = [ C = 'c' ] ]
-[     [ D = 'd' ] ]
-
-
-
-

Feature Structures as Graphs

+
>>> f1 = nltk.FeatStruct("[NUMBER = sg]")
+>>> f2 = nltk.FeatStruct("[PERSON = 3]")
+>>> print(nltk.unify(f1, f2))
+[ NUMBER = 'sg' ]
+[ PERSON = 3    ]
+
+
+
>>> f1 = nltk.FeatStruct("[A = [B = b, D = d]]")
+>>> f2 = nltk.FeatStruct("[A = [C = c, D = d]]")
+>>> print(nltk.unify(f1, f2))
+[     [ B = 'b' ] ]
+[ A = [ C = 'c' ] ]
+[     [ D = 'd' ] ]
+
+
+
+

Feature Structures as Graphs

Feature structures are not inherently tied to linguistic objects; they are general purpose structures for representing knowledge. For example, we could encode information about a person in a feature structure:

-
-
->>> person01 = nltk.FeatStruct("[NAME=Lee, TELNO='01 27 86 42 96',AGE=33]")
->>> print(person01)
-[ AGE   = 33               ]
-[ NAME  = 'Lee'            ]
-[ TELNO = '01 27 86 42 96' ]
-
-
+
>>> person01 = nltk.FeatStruct("[NAME=Lee, TELNO='01 27 86 42 96',AGE=33]")
+>>> print(person01)
+[ AGE   = 33               ]
+[ NAME  = 'Lee'            ]
+[ TELNO = '01 27 86 42 96' ]
+
+

There are a number of notations for representing reentrancy in matrix-style representations of feature structures. In NLTK, we adopt the following convention: the first occurrence of a shared feature structure -is prefixed with an integer in parentheses, such as (1), and any +is prefixed with an integer in parentheses, such as (1), and any subsequent reference to that structure uses the notation -->(1), as shown below.

-
-
->>> fs = nltk.FeatStruct("""[NAME=Lee, ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],
-...                               SPOUSE=[NAME=Kim, ADDRESS->(1)]]""")
->>> print(fs)
-[ ADDRESS = (1) [ NUMBER = 74           ] ]
-[               [ STREET = 'rue Pascal' ] ]
-[                                         ]
-[ NAME    = 'Lee'                         ]
-[                                         ]
-[ SPOUSE  = [ ADDRESS -> (1)  ]           ]
-[           [ NAME    = 'Kim' ]           ]
-
-
+->(1), as shown below.

+
>>> fs = nltk.FeatStruct("""[NAME=Lee, ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],
+...                               SPOUSE=[NAME=Kim, ADDRESS->(1)]]""")
+>>> print(fs)
+[ ADDRESS = (1) [ NUMBER = 74           ] ]
+[               [ STREET = 'rue Pascal' ] ]
+[                                         ]
+[ NAME    = 'Lee'                         ]
+[                                         ]
+[ SPOUSE  = [ ADDRESS -> (1)  ]           ]
+[           [ NAME    = 'Kim' ]           ]
+
+

There can be any number of tags within a single feature structure.

-
-
->>> fs3 = nltk.FeatStruct("[A=(1)[B=b], C=(2)[], D->(1), E->(2)]")
->>> print(fs3)
-[ A = (1) [ B = 'b' ] ]
-[                     ]
-[ C = (2) []          ]
-[                     ]
-[ D -> (1)            ]
-[ E -> (2)            ]
->>> fs1 = nltk.FeatStruct(NUMBER=74, STREET='rue Pascal')
->>> fs2 = nltk.FeatStruct(CITY='Paris')
->>> print(nltk.unify(fs1, fs2))
-[ CITY   = 'Paris'      ]
-[ NUMBER = 74           ]
-[ STREET = 'rue Pascal' ]
-
-
+
>>> fs3 = nltk.FeatStruct("[A=(1)[B=b], C=(2)[], D->(1), E->(2)]")
+>>> print(fs3)
+[ A = (1) [ B = 'b' ] ]
+[                     ]
+[ C = (2) []          ]
+[                     ]
+[ D -> (1)            ]
+[ E -> (2)            ]
+>>> fs1 = nltk.FeatStruct(NUMBER=74, STREET='rue Pascal')
+>>> fs2 = nltk.FeatStruct(CITY='Paris')
+>>> print(nltk.unify(fs1, fs2))
+[ CITY   = 'Paris'      ]
+[ NUMBER = 74           ]
+[ STREET = 'rue Pascal' ]
+
+

Unification is symmetric:

-
-
->>> nltk.unify(fs1, fs2) == nltk.unify(fs2, fs1)
-True
-
-
+
>>> nltk.unify(fs1, fs2) == nltk.unify(fs2, fs1)
+True
+
+

Unification is commutative:

-
-
->>> fs3 = nltk.FeatStruct(TELNO='01 27 86 42 96')
->>> nltk.unify(nltk.unify(fs1, fs2), fs3) == nltk.unify(fs1, nltk.unify(fs2, fs3))
-True
-
-
+
>>> fs3 = nltk.FeatStruct(TELNO='01 27 86 42 96')
+>>> nltk.unify(nltk.unify(fs1, fs2), fs3) == nltk.unify(fs1, nltk.unify(fs2, fs3))
+True
+
+

Unification between FS0 and FS1 will fail if the two feature structures share a path π, but the value of π in FS0 is a distinct atom from the value of π in FS1. In NLTK, this is implemented by setting the result of unification to be -None.

-
-
->>> fs0 = nltk.FeatStruct(A='a')
->>> fs1 = nltk.FeatStruct(A='b')
->>> print(nltk.unify(fs0, fs1))
-None
-
-
+None.

+
>>> fs0 = nltk.FeatStruct(A='a')
+>>> fs1 = nltk.FeatStruct(A='b')
+>>> print(nltk.unify(fs0, fs1))
+None
+
+

Now, if we look at how unification interacts with structure-sharing, things become really interesting.

-
-
->>> fs0 = nltk.FeatStruct("""[NAME=Lee,
-...                                ADDRESS=[NUMBER=74,
-...                                         STREET='rue Pascal'],
-...                                SPOUSE= [NAME=Kim,
-...                                         ADDRESS=[NUMBER=74,
-...                                                  STREET='rue Pascal']]]""")
->>> print(fs0)
-[ ADDRESS = [ NUMBER = 74           ]               ]
-[           [ STREET = 'rue Pascal' ]               ]
-[                                                   ]
-[ NAME    = 'Lee'                                   ]
-[                                                   ]
-[           [ ADDRESS = [ NUMBER = 74           ] ] ]
-[ SPOUSE  = [           [ STREET = 'rue Pascal' ] ] ]
-[           [                                     ] ]
-[           [ NAME    = 'Kim'                     ] ]
-
-
->>> fs1 = nltk.FeatStruct("[SPOUSE=[ADDRESS=[CITY=Paris]]]")
->>> print(nltk.unify(fs0, fs1))
-[ ADDRESS = [ NUMBER = 74           ]               ]
-[           [ STREET = 'rue Pascal' ]               ]
-[                                                   ]
-[ NAME    = 'Lee'                                   ]
-[                                                   ]
-[           [           [ CITY   = 'Paris'      ] ] ]
-[           [ ADDRESS = [ NUMBER = 74           ] ] ]
-[ SPOUSE  = [           [ STREET = 'rue Pascal' ] ] ]
-[           [                                     ] ]
-[           [ NAME    = 'Kim'                     ] ]
-
-
->>> fs2 = nltk.FeatStruct("""[NAME=Lee, ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],
-...                                SPOUSE=[NAME=Kim, ADDRESS->(1)]]""")
-
-
->>> print(fs2)
-[ ADDRESS = (1) [ NUMBER = 74           ] ]
-[               [ STREET = 'rue Pascal' ] ]
-[                                         ]
-[ NAME    = 'Lee'                         ]
-[                                         ]
-[ SPOUSE  = [ ADDRESS -> (1)  ]           ]
-[           [ NAME    = 'Kim' ]           ]
-
-
->>> print(nltk.unify(fs2, fs1))
-[               [ CITY   = 'Paris'      ] ]
-[ ADDRESS = (1) [ NUMBER = 74           ] ]
-[               [ STREET = 'rue Pascal' ] ]
-[                                         ]
-[ NAME    = 'Lee'                         ]
-[                                         ]
-[ SPOUSE  = [ ADDRESS -> (1)  ]           ]
-[           [ NAME    = 'Kim' ]           ]
-
-
->>> fs1 = nltk.FeatStruct("[ADDRESS1=[NUMBER=74, STREET='rue Pascal']]")
->>> fs2 = nltk.FeatStruct("[ADDRESS1=?x, ADDRESS2=?x]")
->>> print(fs2)
-[ ADDRESS1 = ?x ]
-[ ADDRESS2 = ?x ]
->>> print(nltk.unify(fs1, fs2))
-[ ADDRESS1 = (1) [ NUMBER = 74           ] ]
-[                [ STREET = 'rue Pascal' ] ]
-[                                          ]
-[ ADDRESS2 -> (1)                          ]
-
-
->>> sent = 'who do you claim that you like'
->>> tokens = sent.split()
->>> cp = parse.load_parser('grammars/book_grammars/feat1.fcfg', trace=1)
->>> trees = cp.parse(tokens)
-|.w.d.y.c.t.y.l.|
-|[-] . . . . . .| [0:1] 'who'
-|. [-] . . . . .| [1:2] 'do'
-|. . [-] . . . .| [2:3] 'you'
-|. . . [-] . . .| [3:4] 'claim'
-|. . . . [-] . .| [4:5] 'that'
-|. . . . . [-] .| [5:6] 'you'
-|. . . . . . [-]| [6:7] 'like'
-|# . . . . . . .| [0:0] NP[]/NP[] -> *
-|. # . . . . . .| [1:1] NP[]/NP[] -> *
-|. . # . . . . .| [2:2] NP[]/NP[] -> *
-|. . . # . . . .| [3:3] NP[]/NP[] -> *
-|. . . . # . . .| [4:4] NP[]/NP[] -> *
-|. . . . . # . .| [5:5] NP[]/NP[] -> *
-|. . . . . . # .| [6:6] NP[]/NP[] -> *
-|. . . . . . . #| [7:7] NP[]/NP[] -> *
-|[-] . . . . . .| [0:1] NP[+WH] -> 'who' *
-|[-> . . . . . .| [0:1] S[-INV] -> NP[] * VP[] {}
-|[-> . . . . . .| [0:1] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
-|[-> . . . . . .| [0:1] S[-INV] -> NP[] * S[]/NP[] {}
-|. [-] . . . . .| [1:2] V[+AUX] -> 'do' *
-|. [-> . . . . .| [1:2] S[+INV] -> V[+AUX] * NP[] VP[] {}
-|. [-> . . . . .| [1:2] S[+INV]/?x[] -> V[+AUX] * NP[] VP[]/?x[] {}
-|. [-> . . . . .| [1:2] VP[] -> V[+AUX] * VP[] {}
-|. [-> . . . . .| [1:2] VP[]/?x[] -> V[+AUX] * VP[]/?x[] {}
-|. . [-] . . . .| [2:3] NP[-WH] -> 'you' *
-|. . [-> . . . .| [2:3] S[-INV] -> NP[] * VP[] {}
-|. . [-> . . . .| [2:3] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
-|. . [-> . . . .| [2:3] S[-INV] -> NP[] * S[]/NP[] {}
-|. [---> . . . .| [1:3] S[+INV] -> V[+AUX] NP[] * VP[] {}
-|. [---> . . . .| [1:3] S[+INV]/?x[] -> V[+AUX] NP[] * VP[]/?x[] {}
-|. . . [-] . . .| [3:4] V[-AUX, SUBCAT='clause'] -> 'claim' *
-|. . . [-> . . .| [3:4] VP[] -> V[-AUX, SUBCAT='clause'] * SBar[] {}
-|. . . [-> . . .| [3:4] VP[]/?x[] -> V[-AUX, SUBCAT='clause'] * SBar[]/?x[] {}
-|. . . . [-] . .| [4:5] Comp[] -> 'that' *
-|. . . . [-> . .| [4:5] SBar[] -> Comp[] * S[-INV] {}
-|. . . . [-> . .| [4:5] SBar[]/?x[] -> Comp[] * S[-INV]/?x[] {}
-|. . . . . [-] .| [5:6] NP[-WH] -> 'you' *
-|. . . . . [-> .| [5:6] S[-INV] -> NP[] * VP[] {}
-|. . . . . [-> .| [5:6] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
-|. . . . . [-> .| [5:6] S[-INV] -> NP[] * S[]/NP[] {}
-|. . . . . . [-]| [6:7] V[-AUX, SUBCAT='trans'] -> 'like' *
-|. . . . . . [->| [6:7] VP[] -> V[-AUX, SUBCAT='trans'] * NP[] {}
-|. . . . . . [->| [6:7] VP[]/?x[] -> V[-AUX, SUBCAT='trans'] * NP[]/?x[] {}
-|. . . . . . [-]| [6:7] VP[]/NP[] -> V[-AUX, SUBCAT='trans'] NP[]/NP[] *
-|. . . . . [---]| [5:7] S[-INV]/NP[] -> NP[] VP[]/NP[] *
-|. . . . [-----]| [4:7] SBar[]/NP[] -> Comp[] S[-INV]/NP[] *
-|. . . [-------]| [3:7] VP[]/NP[] -> V[-AUX, SUBCAT='clause'] SBar[]/NP[] *
-|. . [---------]| [2:7] S[-INV]/NP[] -> NP[] VP[]/NP[] *
-|. [-----------]| [1:7] S[+INV]/NP[] -> V[+AUX] NP[] VP[]/NP[] *
-|[=============]| [0:7] S[-INV] -> NP[] S[]/NP[] *
-
-
->>> trees = list(trees)
->>> for tree in trees: print(tree)
-(S[-INV]
-  (NP[+WH] who)
-  (S[+INV]/NP[]
-    (V[+AUX] do)
-    (NP[-WH] you)
-    (VP[]/NP[]
-      (V[-AUX, SUBCAT='clause'] claim)
-      (SBar[]/NP[]
-        (Comp[] that)
-        (S[-INV]/NP[]
-          (NP[-WH] you)
-          (VP[]/NP[] (V[-AUX, SUBCAT='trans'] like) (NP[]/NP[] )))))))
-
-
+
>>> fs0 = nltk.FeatStruct("""[NAME=Lee,
+...                                ADDRESS=[NUMBER=74,
+...                                         STREET='rue Pascal'],
+...                                SPOUSE= [NAME=Kim,
+...                                         ADDRESS=[NUMBER=74,
+...                                                  STREET='rue Pascal']]]""")
+>>> print(fs0)
+[ ADDRESS = [ NUMBER = 74           ]               ]
+[           [ STREET = 'rue Pascal' ]               ]
+[                                                   ]
+[ NAME    = 'Lee'                                   ]
+[                                                   ]
+[           [ ADDRESS = [ NUMBER = 74           ] ] ]
+[ SPOUSE  = [           [ STREET = 'rue Pascal' ] ] ]
+[           [                                     ] ]
+[           [ NAME    = 'Kim'                     ] ]
+
+
+
>>> fs1 = nltk.FeatStruct("[SPOUSE=[ADDRESS=[CITY=Paris]]]")
+>>> print(nltk.unify(fs0, fs1))
+[ ADDRESS = [ NUMBER = 74           ]               ]
+[           [ STREET = 'rue Pascal' ]               ]
+[                                                   ]
+[ NAME    = 'Lee'                                   ]
+[                                                   ]
+[           [           [ CITY   = 'Paris'      ] ] ]
+[           [ ADDRESS = [ NUMBER = 74           ] ] ]
+[ SPOUSE  = [           [ STREET = 'rue Pascal' ] ] ]
+[           [                                     ] ]
+[           [ NAME    = 'Kim'                     ] ]
+
+
+
>>> fs2 = nltk.FeatStruct("""[NAME=Lee, ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],
+...                                SPOUSE=[NAME=Kim, ADDRESS->(1)]]""")
+
+
+
>>> print(fs2)
+[ ADDRESS = (1) [ NUMBER = 74           ] ]
+[               [ STREET = 'rue Pascal' ] ]
+[                                         ]
+[ NAME    = 'Lee'                         ]
+[                                         ]
+[ SPOUSE  = [ ADDRESS -> (1)  ]           ]
+[           [ NAME    = 'Kim' ]           ]
+
+
+
>>> print(nltk.unify(fs2, fs1))
+[               [ CITY   = 'Paris'      ] ]
+[ ADDRESS = (1) [ NUMBER = 74           ] ]
+[               [ STREET = 'rue Pascal' ] ]
+[                                         ]
+[ NAME    = 'Lee'                         ]
+[                                         ]
+[ SPOUSE  = [ ADDRESS -> (1)  ]           ]
+[           [ NAME    = 'Kim' ]           ]
+
+
+
>>> fs1 = nltk.FeatStruct("[ADDRESS1=[NUMBER=74, STREET='rue Pascal']]")
+>>> fs2 = nltk.FeatStruct("[ADDRESS1=?x, ADDRESS2=?x]")
+>>> print(fs2)
+[ ADDRESS1 = ?x ]
+[ ADDRESS2 = ?x ]
+>>> print(nltk.unify(fs1, fs2))
+[ ADDRESS1 = (1) [ NUMBER = 74           ] ]
+[                [ STREET = 'rue Pascal' ] ]
+[                                          ]
+[ ADDRESS2 -> (1)                          ]
+
+
+
>>> sent = 'who do you claim that you like'
+>>> tokens = sent.split()
+>>> cp = parse.load_parser('grammars/book_grammars/feat1.fcfg', trace=1)
+>>> trees = cp.parse(tokens)
+|.w.d.y.c.t.y.l.|
+|[-] . . . . . .| [0:1] 'who'
+|. [-] . . . . .| [1:2] 'do'
+|. . [-] . . . .| [2:3] 'you'
+|. . . [-] . . .| [3:4] 'claim'
+|. . . . [-] . .| [4:5] 'that'
+|. . . . . [-] .| [5:6] 'you'
+|. . . . . . [-]| [6:7] 'like'
+|# . . . . . . .| [0:0] NP[]/NP[] -> *
+|. # . . . . . .| [1:1] NP[]/NP[] -> *
+|. . # . . . . .| [2:2] NP[]/NP[] -> *
+|. . . # . . . .| [3:3] NP[]/NP[] -> *
+|. . . . # . . .| [4:4] NP[]/NP[] -> *
+|. . . . . # . .| [5:5] NP[]/NP[] -> *
+|. . . . . . # .| [6:6] NP[]/NP[] -> *
+|. . . . . . . #| [7:7] NP[]/NP[] -> *
+|[-] . . . . . .| [0:1] NP[+WH] -> 'who' *
+|[-> . . . . . .| [0:1] S[-INV] -> NP[] * VP[] {}
+|[-> . . . . . .| [0:1] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
+|[-> . . . . . .| [0:1] S[-INV] -> NP[] * S[]/NP[] {}
+|. [-] . . . . .| [1:2] V[+AUX] -> 'do' *
+|. [-> . . . . .| [1:2] S[+INV] -> V[+AUX] * NP[] VP[] {}
+|. [-> . . . . .| [1:2] S[+INV]/?x[] -> V[+AUX] * NP[] VP[]/?x[] {}
+|. [-> . . . . .| [1:2] VP[] -> V[+AUX] * VP[] {}
+|. [-> . . . . .| [1:2] VP[]/?x[] -> V[+AUX] * VP[]/?x[] {}
+|. . [-] . . . .| [2:3] NP[-WH] -> 'you' *
+|. . [-> . . . .| [2:3] S[-INV] -> NP[] * VP[] {}
+|. . [-> . . . .| [2:3] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
+|. . [-> . . . .| [2:3] S[-INV] -> NP[] * S[]/NP[] {}
+|. [---> . . . .| [1:3] S[+INV] -> V[+AUX] NP[] * VP[] {}
+|. [---> . . . .| [1:3] S[+INV]/?x[] -> V[+AUX] NP[] * VP[]/?x[] {}
+|. . . [-] . . .| [3:4] V[-AUX, SUBCAT='clause'] -> 'claim' *
+|. . . [-> . . .| [3:4] VP[] -> V[-AUX, SUBCAT='clause'] * SBar[] {}
+|. . . [-> . . .| [3:4] VP[]/?x[] -> V[-AUX, SUBCAT='clause'] * SBar[]/?x[] {}
+|. . . . [-] . .| [4:5] Comp[] -> 'that' *
+|. . . . [-> . .| [4:5] SBar[] -> Comp[] * S[-INV] {}
+|. . . . [-> . .| [4:5] SBar[]/?x[] -> Comp[] * S[-INV]/?x[] {}
+|. . . . . [-] .| [5:6] NP[-WH] -> 'you' *
+|. . . . . [-> .| [5:6] S[-INV] -> NP[] * VP[] {}
+|. . . . . [-> .| [5:6] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
+|. . . . . [-> .| [5:6] S[-INV] -> NP[] * S[]/NP[] {}
+|. . . . . . [-]| [6:7] V[-AUX, SUBCAT='trans'] -> 'like' *
+|. . . . . . [->| [6:7] VP[] -> V[-AUX, SUBCAT='trans'] * NP[] {}
+|. . . . . . [->| [6:7] VP[]/?x[] -> V[-AUX, SUBCAT='trans'] * NP[]/?x[] {}
+|. . . . . . [-]| [6:7] VP[]/NP[] -> V[-AUX, SUBCAT='trans'] NP[]/NP[] *
+|. . . . . [---]| [5:7] S[-INV]/NP[] -> NP[] VP[]/NP[] *
+|. . . . [-----]| [4:7] SBar[]/NP[] -> Comp[] S[-INV]/NP[] *
+|. . . [-------]| [3:7] VP[]/NP[] -> V[-AUX, SUBCAT='clause'] SBar[]/NP[] *
+|. . [---------]| [2:7] S[-INV]/NP[] -> NP[] VP[]/NP[] *
+|. [-----------]| [1:7] S[+INV]/NP[] -> V[+AUX] NP[] VP[]/NP[] *
+|[=============]| [0:7] S[-INV] -> NP[] S[]/NP[] *
+
+
+
>>> trees = list(trees)
+>>> for tree in trees: print(tree)
+(S[-INV]
+  (NP[+WH] who)
+  (S[+INV]/NP[]
+    (V[+AUX] do)
+    (NP[-WH] you)
+    (VP[]/NP[]
+      (V[-AUX, SUBCAT='clause'] claim)
+      (SBar[]/NP[]
+        (Comp[] that)
+        (S[-INV]/NP[]
+          (NP[-WH] you)
+          (VP[]/NP[] (V[-AUX, SUBCAT='trans'] like) (NP[]/NP[] )))))))
+
+

A different parser should give the same parse trees, but perhaps in a different order:

-
-
->>> cp2 = parse.load_parser('grammars/book_grammars/feat1.fcfg', trace=1,
-...                         parser=parse.FeatureEarleyChartParser)
->>> trees2 = cp2.parse(tokens)
-|.w.d.y.c.t.y.l.|
-|[-] . . . . . .| [0:1] 'who'
-|. [-] . . . . .| [1:2] 'do'
-|. . [-] . . . .| [2:3] 'you'
-|. . . [-] . . .| [3:4] 'claim'
-|. . . . [-] . .| [4:5] 'that'
-|. . . . . [-] .| [5:6] 'you'
-|. . . . . . [-]| [6:7] 'like'
-|> . . . . . . .| [0:0] S[-INV] -> * NP[] VP[] {}
-|> . . . . . . .| [0:0] S[-INV]/?x[] -> * NP[] VP[]/?x[] {}
-|> . . . . . . .| [0:0] S[-INV] -> * NP[] S[]/NP[] {}
-|> . . . . . . .| [0:0] S[-INV] -> * Adv[+NEG] S[+INV] {}
-|> . . . . . . .| [0:0] S[+INV] -> * V[+AUX] NP[] VP[] {}
-|> . . . . . . .| [0:0] S[+INV]/?x[] -> * V[+AUX] NP[] VP[]/?x[] {}
-|> . . . . . . .| [0:0] NP[+WH] -> * 'who' {}
-|[-] . . . . . .| [0:1] NP[+WH] -> 'who' *
-|[-> . . . . . .| [0:1] S[-INV] -> NP[] * VP[] {}
-|[-> . . . . . .| [0:1] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
-|[-> . . . . . .| [0:1] S[-INV] -> NP[] * S[]/NP[] {}
-|. > . . . . . .| [1:1] S[-INV]/?x[] -> * NP[] VP[]/?x[] {}
-|. > . . . . . .| [1:1] S[+INV]/?x[] -> * V[+AUX] NP[] VP[]/?x[] {}
-|. > . . . . . .| [1:1] V[+AUX] -> * 'do' {}
-|. > . . . . . .| [1:1] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
-|. > . . . . . .| [1:1] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
-|. > . . . . . .| [1:1] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
-|. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='intrans'] {}
-|. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='trans'] NP[] {}
-|. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='clause'] SBar[] {}
-|. > . . . . . .| [1:1] VP[] -> * V[+AUX] VP[] {}
-|. [-] . . . . .| [1:2] V[+AUX] -> 'do' *
-|. [-> . . . . .| [1:2] S[+INV]/?x[] -> V[+AUX] * NP[] VP[]/?x[] {}
-|. [-> . . . . .| [1:2] VP[]/?x[] -> V[+AUX] * VP[]/?x[] {}
-|. [-> . . . . .| [1:2] VP[] -> V[+AUX] * VP[] {}
-|. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='intrans'] {}
-|. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='trans'] NP[] {}
-|. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='clause'] SBar[] {}
-|. . > . . . . .| [2:2] VP[] -> * V[+AUX] VP[] {}
-|. . > . . . . .| [2:2] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
-|. . > . . . . .| [2:2] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
-|. . > . . . . .| [2:2] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
-|. . > . . . . .| [2:2] NP[-WH] -> * 'you' {}
-|. . [-] . . . .| [2:3] NP[-WH] -> 'you' *
-|. [---> . . . .| [1:3] S[+INV]/?x[] -> V[+AUX] NP[] * VP[]/?x[] {}
-|. . . > . . . .| [3:3] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
-|. . . > . . . .| [3:3] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
-|. . . > . . . .| [3:3] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
-|. . . > . . . .| [3:3] V[-AUX, SUBCAT='clause'] -> * 'claim' {}
-|. . . [-] . . .| [3:4] V[-AUX, SUBCAT='clause'] -> 'claim' *
-|. . . [-> . . .| [3:4] VP[]/?x[] -> V[-AUX, SUBCAT='clause'] * SBar[]/?x[] {}
-|. . . . > . . .| [4:4] SBar[]/?x[] -> * Comp[] S[-INV]/?x[] {}
-|. . . . > . . .| [4:4] Comp[] -> * 'that' {}
-|. . . . [-] . .| [4:5] Comp[] -> 'that' *
-|. . . . [-> . .| [4:5] SBar[]/?x[] -> Comp[] * S[-INV]/?x[] {}
-|. . . . . > . .| [5:5] S[-INV]/?x[] -> * NP[] VP[]/?x[] {}
-|. . . . . > . .| [5:5] NP[-WH] -> * 'you' {}
-|. . . . . [-] .| [5:6] NP[-WH] -> 'you' *
-|. . . . . [-> .| [5:6] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
-|. . . . . . > .| [6:6] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
-|. . . . . . > .| [6:6] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
-|. . . . . . > .| [6:6] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
-|. . . . . . > .| [6:6] V[-AUX, SUBCAT='trans'] -> * 'like' {}
-|. . . . . . [-]| [6:7] V[-AUX, SUBCAT='trans'] -> 'like' *
-|. . . . . . [->| [6:7] VP[]/?x[] -> V[-AUX, SUBCAT='trans'] * NP[]/?x[] {}
-|. . . . . . . #| [7:7] NP[]/NP[] -> *
-|. . . . . . [-]| [6:7] VP[]/NP[] -> V[-AUX, SUBCAT='trans'] NP[]/NP[] *
-|. . . . . [---]| [5:7] S[-INV]/NP[] -> NP[] VP[]/NP[] *
-|. . . . [-----]| [4:7] SBar[]/NP[] -> Comp[] S[-INV]/NP[] *
-|. . . [-------]| [3:7] VP[]/NP[] -> V[-AUX, SUBCAT='clause'] SBar[]/NP[] *
-|. [-----------]| [1:7] S[+INV]/NP[] -> V[+AUX] NP[] VP[]/NP[] *
-|[=============]| [0:7] S[-INV] -> NP[] S[]/NP[] *
-
-
->>> sorted(trees) == sorted(trees2)
-True
-
-
-

Let's load a German grammar:

-
-
->>> cp = parse.load_parser('grammars/book_grammars/german.fcfg', trace=0)
->>> sent = 'die Katze sieht den Hund'
->>> tokens = sent.split()
->>> trees = cp.parse(tokens)
->>> for tree in trees: print(tree)
-(S[]
-  (NP[AGR=[GND='fem', NUM='sg', PER=3], CASE='nom']
-    (Det[AGR=[GND='fem', NUM='sg', PER=3], CASE='nom'] die)
-    (N[AGR=[GND='fem', NUM='sg', PER=3]] Katze))
-  (VP[AGR=[NUM='sg', PER=3]]
-    (TV[AGR=[NUM='sg', PER=3], OBJCASE='acc'] sieht)
-    (NP[AGR=[GND='masc', NUM='sg', PER=3], CASE='acc']
-      (Det[AGR=[GND='masc', NUM='sg', PER=3], CASE='acc'] den)
-      (N[AGR=[GND='masc', NUM='sg', PER=3]] Hund))))
-
-
-
-
-

Grammar with Binding Operators

-

The bindop.fcfg grammar is a semantic grammar that uses lambda +

>>> cp2 = parse.load_parser('grammars/book_grammars/feat1.fcfg', trace=1,
+...                         parser=parse.FeatureEarleyChartParser)
+>>> trees2 = cp2.parse(tokens)
+|.w.d.y.c.t.y.l.|
+|[-] . . . . . .| [0:1] 'who'
+|. [-] . . . . .| [1:2] 'do'
+|. . [-] . . . .| [2:3] 'you'
+|. . . [-] . . .| [3:4] 'claim'
+|. . . . [-] . .| [4:5] 'that'
+|. . . . . [-] .| [5:6] 'you'
+|. . . . . . [-]| [6:7] 'like'
+|> . . . . . . .| [0:0] S[-INV] -> * NP[] VP[] {}
+|> . . . . . . .| [0:0] S[-INV]/?x[] -> * NP[] VP[]/?x[] {}
+|> . . . . . . .| [0:0] S[-INV] -> * NP[] S[]/NP[] {}
+|> . . . . . . .| [0:0] S[-INV] -> * Adv[+NEG] S[+INV] {}
+|> . . . . . . .| [0:0] S[+INV] -> * V[+AUX] NP[] VP[] {}
+|> . . . . . . .| [0:0] S[+INV]/?x[] -> * V[+AUX] NP[] VP[]/?x[] {}
+|> . . . . . . .| [0:0] NP[+WH] -> * 'who' {}
+|[-] . . . . . .| [0:1] NP[+WH] -> 'who' *
+|[-> . . . . . .| [0:1] S[-INV] -> NP[] * VP[] {}
+|[-> . . . . . .| [0:1] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
+|[-> . . . . . .| [0:1] S[-INV] -> NP[] * S[]/NP[] {}
+|. > . . . . . .| [1:1] S[-INV]/?x[] -> * NP[] VP[]/?x[] {}
+|. > . . . . . .| [1:1] S[+INV]/?x[] -> * V[+AUX] NP[] VP[]/?x[] {}
+|. > . . . . . .| [1:1] V[+AUX] -> * 'do' {}
+|. > . . . . . .| [1:1] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
+|. > . . . . . .| [1:1] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
+|. > . . . . . .| [1:1] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
+|. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='intrans'] {}
+|. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='trans'] NP[] {}
+|. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='clause'] SBar[] {}
+|. > . . . . . .| [1:1] VP[] -> * V[+AUX] VP[] {}
+|. [-] . . . . .| [1:2] V[+AUX] -> 'do' *
+|. [-> . . . . .| [1:2] S[+INV]/?x[] -> V[+AUX] * NP[] VP[]/?x[] {}
+|. [-> . . . . .| [1:2] VP[]/?x[] -> V[+AUX] * VP[]/?x[] {}
+|. [-> . . . . .| [1:2] VP[] -> V[+AUX] * VP[] {}
+|. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='intrans'] {}
+|. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='trans'] NP[] {}
+|. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='clause'] SBar[] {}
+|. . > . . . . .| [2:2] VP[] -> * V[+AUX] VP[] {}
+|. . > . . . . .| [2:2] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
+|. . > . . . . .| [2:2] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
+|. . > . . . . .| [2:2] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
+|. . > . . . . .| [2:2] NP[-WH] -> * 'you' {}
+|. . [-] . . . .| [2:3] NP[-WH] -> 'you' *
+|. [---> . . . .| [1:3] S[+INV]/?x[] -> V[+AUX] NP[] * VP[]/?x[] {}
+|. . . > . . . .| [3:3] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
+|. . . > . . . .| [3:3] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
+|. . . > . . . .| [3:3] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
+|. . . > . . . .| [3:3] V[-AUX, SUBCAT='clause'] -> * 'claim' {}
+|. . . [-] . . .| [3:4] V[-AUX, SUBCAT='clause'] -> 'claim' *
+|. . . [-> . . .| [3:4] VP[]/?x[] -> V[-AUX, SUBCAT='clause'] * SBar[]/?x[] {}
+|. . . . > . . .| [4:4] SBar[]/?x[] -> * Comp[] S[-INV]/?x[] {}
+|. . . . > . . .| [4:4] Comp[] -> * 'that' {}
+|. . . . [-] . .| [4:5] Comp[] -> 'that' *
+|. . . . [-> . .| [4:5] SBar[]/?x[] -> Comp[] * S[-INV]/?x[] {}
+|. . . . . > . .| [5:5] S[-INV]/?x[] -> * NP[] VP[]/?x[] {}
+|. . . . . > . .| [5:5] NP[-WH] -> * 'you' {}
+|. . . . . [-] .| [5:6] NP[-WH] -> 'you' *
+|. . . . . [-> .| [5:6] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
+|. . . . . . > .| [6:6] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
+|. . . . . . > .| [6:6] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
+|. . . . . . > .| [6:6] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
+|. . . . . . > .| [6:6] V[-AUX, SUBCAT='trans'] -> * 'like' {}
+|. . . . . . [-]| [6:7] V[-AUX, SUBCAT='trans'] -> 'like' *
+|. . . . . . [->| [6:7] VP[]/?x[] -> V[-AUX, SUBCAT='trans'] * NP[]/?x[] {}
+|. . . . . . . #| [7:7] NP[]/NP[] -> *
+|. . . . . . [-]| [6:7] VP[]/NP[] -> V[-AUX, SUBCAT='trans'] NP[]/NP[] *
+|. . . . . [---]| [5:7] S[-INV]/NP[] -> NP[] VP[]/NP[] *
+|. . . . [-----]| [4:7] SBar[]/NP[] -> Comp[] S[-INV]/NP[] *
+|. . . [-------]| [3:7] VP[]/NP[] -> V[-AUX, SUBCAT='clause'] SBar[]/NP[] *
+|. [-----------]| [1:7] S[+INV]/NP[] -> V[+AUX] NP[] VP[]/NP[] *
+|[=============]| [0:7] S[-INV] -> NP[] S[]/NP[] *
+
+
+
>>> sorted(trees) == sorted(trees2)
+True
+
+
+

Let’s load a German grammar:

+
>>> cp = parse.load_parser('grammars/book_grammars/german.fcfg', trace=0)
+>>> sent = 'die Katze sieht den Hund'
+>>> tokens = sent.split()
+>>> trees = cp.parse(tokens)
+>>> for tree in trees: print(tree)
+(S[]
+  (NP[AGR=[GND='fem', NUM='sg', PER=3], CASE='nom']
+    (Det[AGR=[GND='fem', NUM='sg', PER=3], CASE='nom'] die)
+    (N[AGR=[GND='fem', NUM='sg', PER=3]] Katze))
+  (VP[AGR=[NUM='sg', PER=3]]
+    (TV[AGR=[NUM='sg', PER=3], OBJCASE='acc'] sieht)
+    (NP[AGR=[GND='masc', NUM='sg', PER=3], CASE='acc']
+      (Det[AGR=[GND='masc', NUM='sg', PER=3], CASE='acc'] den)
+      (N[AGR=[GND='masc', NUM='sg', PER=3]] Hund))))
+
+
+
+
+

Grammar with Binding Operators

+

The bindop.fcfg grammar is a semantic grammar that uses lambda calculus. Each element has a core semantics, which is a single lambda calculus expression; and a set of binding operators, which bind variables.

@@ -902,118 +589,154 @@

Grammar with Binding Operators

instantiate their bound variable every time they are added to the chart. To do this, we use a special subclass of Chart, called InstantiateVarsChart.

-
-
->>> from nltk.parse.featurechart import InstantiateVarsChart
->>> cp = parse.load_parser('grammars/sample_grammars/bindop.fcfg', trace=1,
-...                        chart_class=InstantiateVarsChart)
->>> print(cp.grammar())
-Grammar with 15 productions (start state = S[])
-    S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] VP[SEM=[BO=?b2, CORE=?vp]]
-    VP[SEM=[BO={?b1+?b2}, CORE=<?v(?obj)>]] -> TV[SEM=[BO=?b1, CORE=?v]] NP[SEM=[BO=?b2, CORE=?obj]]
-    VP[SEM=?s] -> IV[SEM=?s]
-    NP[SEM=[BO={?b1+?b2+{bo(?det(?n),@x)}}, CORE=<@x>]] -> Det[SEM=[BO=?b1, CORE=?det]] N[SEM=[BO=?b2, CORE=?n]]
-    Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] -> 'a'
-    N[SEM=[BO={/}, CORE=<dog>]] -> 'dog'
-    N[SEM=[BO={/}, CORE=<dog>]] -> 'cat'
-    N[SEM=[BO={/}, CORE=<dog>]] -> 'mouse'
-    IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'barks'
-    IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'eats'
-    IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'walks'
-    TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'feeds'
-    TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'walks'
-    NP[SEM=[BO={bo(\P.P(John),@x)}, CORE=<@x>]] -> 'john'
-    NP[SEM=[BO={bo(\P.P(John),@x)}, CORE=<@x>]] -> 'alex'
-
-
+
>>> from nltk.parse.featurechart import InstantiateVarsChart
+>>> cp = parse.load_parser('grammars/sample_grammars/bindop.fcfg', trace=1,
+...                        chart_class=InstantiateVarsChart)
+>>> print(cp.grammar())
+Grammar with 15 productions (start state = S[])
+    S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] VP[SEM=[BO=?b2, CORE=?vp]]
+    VP[SEM=[BO={?b1+?b2}, CORE=<?v(?obj)>]] -> TV[SEM=[BO=?b1, CORE=?v]] NP[SEM=[BO=?b2, CORE=?obj]]
+    VP[SEM=?s] -> IV[SEM=?s]
+    NP[SEM=[BO={?b1+?b2+{bo(?det(?n),@x)}}, CORE=<@x>]] -> Det[SEM=[BO=?b1, CORE=?det]] N[SEM=[BO=?b2, CORE=?n]]
+    Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] -> 'a'
+    N[SEM=[BO={/}, CORE=<dog>]] -> 'dog'
+    N[SEM=[BO={/}, CORE=<dog>]] -> 'cat'
+    N[SEM=[BO={/}, CORE=<dog>]] -> 'mouse'
+    IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'barks'
+    IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'eats'
+    IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'walks'
+    TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'feeds'
+    TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'walks'
+    NP[SEM=[BO={bo(\P.P(John),@x)}, CORE=<@x>]] -> 'john'
+    NP[SEM=[BO={bo(\P.P(John),@x)}, CORE=<@x>]] -> 'alex'
+
+

A simple intransitive sentence:

-
-
->>> from nltk.sem import logic
->>> logic._counter._value = 100
-
-
->>> trees = cp.parse('john barks'.split())
-|. john.barks.|
-|[-----]     .| [0:1] 'john'
-|.     [-----]| [1:2] 'barks'
-|[-----]     .| [0:1] NP[SEM=[BO={bo(\P.P(John),z101)}, CORE=<z101>]] -> 'john' *
-|[----->     .| [0:1] S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.P(John),z2)}, ?subj: <IndividualVariableExpression z2>}
-|.     [-----]| [1:2] IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'barks' *
-|.     [-----]| [1:2] VP[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] *
-|[===========]| [0:2] S[SEM=[BO={bo(\P.P(John),z2)}, CORE=<bark(z2)>]] -> NP[SEM=[BO={bo(\P.P(John),z2)}, CORE=<z2>]] VP[SEM=[BO={/}, CORE=<\x.bark(x)>]] *
->>> for tree in trees: print(tree)
-(S[SEM=[BO={bo(\P.P(John),z2)}, CORE=<bark(z2)>]]
-  (NP[SEM=[BO={bo(\P.P(John),z101)}, CORE=<z101>]] john)
-  (VP[SEM=[BO={/}, CORE=<\x.bark(x)>]]
-    (IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] barks)))
-
-
+
>>> from nltk.sem import logic
+>>> logic._counter._value = 100
+
+
+
>>> trees = cp.parse('john barks'.split())
+|. john.barks.|
+|[-----]     .| [0:1] 'john'
+|.     [-----]| [1:2] 'barks'
+|[-----]     .| [0:1] NP[SEM=[BO={bo(\P.P(John),z101)}, CORE=<z101>]] -> 'john' *
+|[----->     .| [0:1] S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.P(John),z2)}, ?subj: <IndividualVariableExpression z2>}
+|.     [-----]| [1:2] IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'barks' *
+|.     [-----]| [1:2] VP[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] *
+|[===========]| [0:2] S[SEM=[BO={bo(\P.P(John),z2)}, CORE=<bark(z2)>]] -> NP[SEM=[BO={bo(\P.P(John),z2)}, CORE=<z2>]] VP[SEM=[BO={/}, CORE=<\x.bark(x)>]] *
+>>> for tree in trees: print(tree)
+(S[SEM=[BO={bo(\P.P(John),z2)}, CORE=<bark(z2)>]]
+  (NP[SEM=[BO={bo(\P.P(John),z101)}, CORE=<z101>]] john)
+  (VP[SEM=[BO={/}, CORE=<\x.bark(x)>]]
+    (IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] barks)))
+
+

A transitive sentence:

-
-
->>> trees = cp.parse('john feeds a dog'.split())
-|.joh.fee. a .dog.|
-|[---]   .   .   .| [0:1] 'john'
-|.   [---]   .   .| [1:2] 'feeds'
-|.   .   [---]   .| [2:3] 'a'
-|.   .   .   [---]| [3:4] 'dog'
-|[---]   .   .   .| [0:1] NP[SEM=[BO={bo(\P.P(John),z102)}, CORE=<z102>]] -> 'john' *
-|[--->   .   .   .| [0:1] S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.P(John),z2)}, ?subj: <IndividualVariableExpression z2>}
-|.   [---]   .   .| [1:2] TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'feeds' *
-|.   [--->   .   .| [1:2] VP[SEM=[BO={?b1+?b2}, CORE=<?v(?obj)>]] -> TV[SEM=[BO=?b1, CORE=?v]] * NP[SEM=[BO=?b2, CORE=?obj]] {?b1: {/}, ?v: <LambdaExpression \x y.feed(y,x)>}
-|.   .   [---]   .| [2:3] Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] -> 'a' *
-|.   .   [--->   .| [2:3] NP[SEM=[BO={?b1+?b2+{bo(?det(?n),@x)}}, CORE=<@x>]] -> Det[SEM=[BO=?b1, CORE=?det]] * N[SEM=[BO=?b2, CORE=?n]] {?b1: {/}, ?det: <LambdaExpression \Q P.exists x.(Q(x) & P(x))>}
-|.   .   .   [---]| [3:4] N[SEM=[BO={/}, CORE=<dog>]] -> 'dog' *
-|.   .   [-------]| [2:4] NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z103)}, CORE=<z103>]] -> Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] N[SEM=[BO={/}, CORE=<dog>]] *
-|.   .   [------->| [2:4] S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.exists x.(dog(x) & P(x)),z2)}, ?subj: <IndividualVariableExpression z2>}
-|.   [-----------]| [1:4] VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]] -> TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<z2>]] *
-|[===============]| [0:4] S[SEM=[BO={bo(\P.P(John),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<feed(z2,z3)>]] -> NP[SEM=[BO={bo(\P.P(John),z2)}, CORE=<z2>]] VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<\y.feed(y,z3)>]] *
-
-
->>> for tree in trees: print(tree)
-(S[SEM=[BO={bo(\P.P(John),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<feed(z2,z3)>]]
-  (NP[SEM=[BO={bo(\P.P(John),z102)}, CORE=<z102>]] john)
-  (VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]]
-    (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds)
-    (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z103)}, CORE=<z103>]]
-      (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a)
-      (N[SEM=[BO={/}, CORE=<dog>]] dog))))
-
-
+
>>> trees = cp.parse('john feeds a dog'.split())
+|.joh.fee. a .dog.|
+|[---]   .   .   .| [0:1] 'john'
+|.   [---]   .   .| [1:2] 'feeds'
+|.   .   [---]   .| [2:3] 'a'
+|.   .   .   [---]| [3:4] 'dog'
+|[---]   .   .   .| [0:1] NP[SEM=[BO={bo(\P.P(John),z102)}, CORE=<z102>]] -> 'john' *
+|[--->   .   .   .| [0:1] S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.P(John),z2)}, ?subj: <IndividualVariableExpression z2>}
+|.   [---]   .   .| [1:2] TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'feeds' *
+|.   [--->   .   .| [1:2] VP[SEM=[BO={?b1+?b2}, CORE=<?v(?obj)>]] -> TV[SEM=[BO=?b1, CORE=?v]] * NP[SEM=[BO=?b2, CORE=?obj]] {?b1: {/}, ?v: <LambdaExpression \x y.feed(y,x)>}
+|.   .   [---]   .| [2:3] Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] -> 'a' *
+|.   .   [--->   .| [2:3] NP[SEM=[BO={?b1+?b2+{bo(?det(?n),@x)}}, CORE=<@x>]] -> Det[SEM=[BO=?b1, CORE=?det]] * N[SEM=[BO=?b2, CORE=?n]] {?b1: {/}, ?det: <LambdaExpression \Q P.exists x.(Q(x) & P(x))>}
+|.   .   .   [---]| [3:4] N[SEM=[BO={/}, CORE=<dog>]] -> 'dog' *
+|.   .   [-------]| [2:4] NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z103)}, CORE=<z103>]] -> Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] N[SEM=[BO={/}, CORE=<dog>]] *
+|.   .   [------->| [2:4] S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.exists x.(dog(x) & P(x)),z2)}, ?subj: <IndividualVariableExpression z2>}
+|.   [-----------]| [1:4] VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]] -> TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<z2>]] *
+|[===============]| [0:4] S[SEM=[BO={bo(\P.P(John),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<feed(z2,z3)>]] -> NP[SEM=[BO={bo(\P.P(John),z2)}, CORE=<z2>]] VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<\y.feed(y,z3)>]] *
+
+
+
>>> for tree in trees: print(tree)
+(S[SEM=[BO={bo(\P.P(John),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<feed(z2,z3)>]]
+  (NP[SEM=[BO={bo(\P.P(John),z102)}, CORE=<z102>]] john)
+  (VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]]
+    (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds)
+    (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z103)}, CORE=<z103>]]
+      (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a)
+      (N[SEM=[BO={/}, CORE=<dog>]] dog))))
+
+

Turn down the verbosity:

-
-
->>> cp = parse.load_parser('grammars/sample_grammars/bindop.fcfg', trace=0,
-...                       chart_class=InstantiateVarsChart)
-
-
+
>>> cp = parse.load_parser('grammars/sample_grammars/bindop.fcfg', trace=0,
+...                       chart_class=InstantiateVarsChart)
+
+

Reuse the same lexical item twice:

-
-
->>> trees = cp.parse('john feeds john'.split())
->>> for tree in trees: print(tree)
-(S[SEM=[BO={bo(\P.P(John),z2), bo(\P.P(John),z3)}, CORE=<feed(z2,z3)>]]
-  (NP[SEM=[BO={bo(\P.P(John),z104)}, CORE=<z104>]] john)
-  (VP[SEM=[BO={bo(\P.P(John),z2)}, CORE=<\y.feed(y,z2)>]]
-    (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds)
-    (NP[SEM=[BO={bo(\P.P(John),z105)}, CORE=<z105>]] john)))
-
-
->>> trees = cp.parse('a dog feeds a dog'.split())
->>> for tree in trees: print(tree)
-(S[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<feed(z2,z3)>]]
-  (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z106)}, CORE=<z106>]]
-    (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a)
-    (N[SEM=[BO={/}, CORE=<dog>]] dog))
-  (VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]]
-    (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds)
-    (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z107)}, CORE=<z107>]]
-      (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a)
-      (N[SEM=[BO={/}, CORE=<dog>]] dog))))
-
-
+
>>> trees = cp.parse('john feeds john'.split())
+>>> for tree in trees: print(tree)
+(S[SEM=[BO={bo(\P.P(John),z2), bo(\P.P(John),z3)}, CORE=<feed(z2,z3)>]]
+  (NP[SEM=[BO={bo(\P.P(John),z104)}, CORE=<z104>]] john)
+  (VP[SEM=[BO={bo(\P.P(John),z2)}, CORE=<\y.feed(y,z2)>]]
+    (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds)
+    (NP[SEM=[BO={bo(\P.P(John),z105)}, CORE=<z105>]] john)))
+
+
>>> trees = cp.parse('a dog feeds a dog'.split())
+>>> for tree in trees: print(tree)
+(S[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<feed(z2,z3)>]]
+  (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z106)}, CORE=<z106>]]
+    (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a)
+    (N[SEM=[BO={/}, CORE=<dog>]] dog))
+  (VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]]
+    (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds)
+    (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z107)}, CORE=<z107>]]
+      (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a)
+      (N[SEM=[BO={/}, CORE=<dog>]] dog))))
+
+
+
+ + + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/featstruct.html b/howto/featstruct.html index eacc7aa9b..5c705f846 100644 --- a/howto/featstruct.html +++ b/howto/featstruct.html @@ -1,465 +1,203 @@ - - - + - - -Feature Structures & Unification - - - -
-

Feature Structures & Unification

+ +
- - -
-
->>> from __future__ import print_function
->>> from nltk.featstruct import FeatStruct
->>> from nltk.sem.logic import Variable, VariableExpression, Expression
-
-
-
-

Note

-

For now, featstruct uses the older lambdalogic semantics +

+
+ +
+

Sample usage for featstruct

+
+

Feature Structures & Unification

+
>>> from nltk.featstruct import FeatStruct
+>>> from nltk.sem.logic import Variable, VariableExpression, Expression
+
+
+
+

Note

+

For now, featstruct uses the older lambdalogic semantics module. Eventually, it should be updated to use the new first order predicate logic module.

-
-

Overview

+
+

Overview

A feature structure is a mapping from feature identifiers to feature values, where feature values can be simple values (like strings or ints), nested feature structures, or variables:

-
-
->>> fs1 = FeatStruct(number='singular', person=3)
->>> print(fs1)
-[ number = 'singular' ]
-[ person = 3          ]
-
-
+
>>> fs1 = FeatStruct(number='singular', person=3)
+>>> print(fs1)
+[ number = 'singular' ]
+[ person = 3          ]
+
+

Feature structure may be nested:

-
-
->>> fs2 = FeatStruct(type='NP', agr=fs1)
->>> print(fs2)
-[ agr  = [ number = 'singular' ] ]
-[        [ person = 3          ] ]
-[                                ]
-[ type = 'NP'                    ]
-
-
+
>>> fs2 = FeatStruct(type='NP', agr=fs1)
+>>> print(fs2)
+[ agr  = [ number = 'singular' ] ]
+[        [ person = 3          ] ]
+[                                ]
+[ type = 'NP'                    ]
+
+

Variables are used to indicate that two features should be assigned the same value. For example, the following feature structure requires -that the feature fs3['agr']['number'] be bound to the same value as the -feature fs3['subj']['number'].

-
-
->>> fs3 = FeatStruct(agr=FeatStruct(number=Variable('?n')),
-...                  subj=FeatStruct(number=Variable('?n')))
->>> print(fs3)
-[ agr  = [ number = ?n ] ]
-[                        ]
-[ subj = [ number = ?n ] ]
-
-
+that the feature fs3[‘agr’][‘number’] be bound to the same value as the +feature fs3[‘subj’][‘number’].

+
>>> fs3 = FeatStruct(agr=FeatStruct(number=Variable('?n')),
+...                  subj=FeatStruct(number=Variable('?n')))
+>>> print(fs3)
+[ agr  = [ number = ?n ] ]
+[                        ]
+[ subj = [ number = ?n ] ]
+
+

Feature structures are typically used to represent partial information about objects. A feature name that is not mapped to a value stands for a feature whose value is unknown (not a feature without a value). Two feature structures that represent (potentially overlapping) information about the same object can be combined by unification.

-
-
->>> print(fs2.unify(fs3))
-[ agr  = [ number = 'singular' ] ]
-[        [ person = 3          ] ]
-[                                ]
-[ subj = [ number = 'singular' ] ]
-[                                ]
-[ type = 'NP'                    ]
-
-
+
>>> print(fs2.unify(fs3))
+[ agr  = [ number = 'singular' ] ]
+[        [ person = 3          ] ]
+[                                ]
+[ subj = [ number = 'singular' ] ]
+[                                ]
+[ type = 'NP'                    ]
+
+

When two inconsistent feature structures are unified, the unification -fails and returns None.

-
-
->>> fs4 = FeatStruct(agr=FeatStruct(person=1))
->>> print(fs4.unify(fs2))
-None
->>> print(fs2.unify(fs4))
-None
-
-
- -
-

Feature Structure Types

+fails and returns None.

+
>>> fs4 = FeatStruct(agr=FeatStruct(person=1))
+>>> print(fs4.unify(fs2))
+None
+>>> print(fs2.unify(fs4))
+None
+
+
+
+

Feature Structure Types

There are actually two types of feature structure:

    -
  • feature dictionaries, implemented by FeatDict, act like +
  • feature dictionaries, implemented by FeatDict, act like Python dictionaries. Feature identifiers may be strings or -instances of the Feature class.

  • -
  • feature lists, implemented by FeatList, act like Python -lists. Feature identifiers are integers.
  • +instances of the Feature class.

    +
  • feature lists, implemented by FeatList, act like Python +lists. Feature identifiers are integers.

When you construct a feature structure using the FeatStruct constructor, it will automatically decide which type is appropriate:

-
-
->>> type(FeatStruct(number='singular'))
-<class 'nltk.featstruct.FeatDict'>
->>> type(FeatStruct([1,2,3]))
-<class 'nltk.featstruct.FeatList'>
-
-
+
>>> type(FeatStruct(number='singular'))
+<class 'nltk.featstruct.FeatDict'>
+>>> type(FeatStruct([1,2,3]))
+<class 'nltk.featstruct.FeatList'>
+
+

Usually, we will just use feature dictionaries; but sometimes feature lists can be useful too. Two feature lists will unify with each other only if they have equal lengths, and all of their feature values -match. If you wish to write a feature list that contains 'unknown' +match. If you wish to write a feature list that contains ‘unknown’ values, you must use variables:

-
-
->>> fs1 = FeatStruct([1,2,Variable('?y')])
->>> fs2 = FeatStruct([1,Variable('?x'),3])
->>> fs1.unify(fs2)
-[1, 2, 3]
-
-
- -
-
-

Parsing Feature Structure Strings

+
>>> fs1 = FeatStruct([1,2,Variable('?y')])
+>>> fs2 = FeatStruct([1,Variable('?x'),3])
+>>> fs1.unify(fs2)
+[1, 2, 3]
+
+
+
+
+

Parsing Feature Structure Strings

Feature structures can be constructed directly from strings. Often, this is more convenient than constructing them directly. NLTK can parse most feature strings to produce the corresponding feature @@ -467,1277 +205,1149 @@

Parsing Feature Structure Strings

strings, ints, logic expressions (nltk.sem.logic.Expression), and a few other types discussed below).

Feature dictionaries are written like Python dictionaries, except that -keys are not put in quotes; and square brackets ([]) are used -instead of braces ({}):

-
-
->>> FeatStruct('[tense="past", agr=[number="sing", person=3]]')
-[agr=[number='sing', person=3], tense='past']
-
-
+keys are not put in quotes; and square brackets ([]) are used +instead of braces ({}):

+
>>> FeatStruct('[tense="past", agr=[number="sing", person=3]]')
+[agr=[number='sing', person=3], tense='past']
+
+

If a feature value is a single alphanumeric word, then it does not -need to be quoted -- it will be automatically treated as a string:

-
-
->>> FeatStruct('[tense=past, agr=[number=sing, person=3]]')
-[agr=[number='sing', person=3], tense='past']
-
-
+need to be quoted – it will be automatically treated as a string:

+
>>> FeatStruct('[tense=past, agr=[number=sing, person=3]]')
+[agr=[number='sing', person=3], tense='past']
+
+

Feature lists are written like python lists:

-
-
->>> FeatStruct('[1, 2, 3]')
-[1, 2, 3]
-
-
-

The expression [] is treated as an empty feature dictionary, not +

>>> FeatStruct('[1, 2, 3]')
+[1, 2, 3]
+
+
+

The expression [] is treated as an empty feature dictionary, not an empty feature list:

-
-
->>> type(FeatStruct('[]'))
-<class 'nltk.featstruct.FeatDict'>
-
-
-
-
-

Feature Paths

+
>>> type(FeatStruct('[]'))
+<class 'nltk.featstruct.FeatDict'>
+
+
+
+
+

Feature Paths

Features can be specified using feature paths, or tuples of feature identifiers that specify path through the nested feature structures to a value.

-
-
->>> fs1 = FeatStruct('[x=1, y=[1,2,[z=3]]]')
->>> fs1['y']
-[1, 2, [z=3]]
->>> fs1['y', 2]
-[z=3]
->>> fs1['y', 2, 'z']
-3
-
-
- -
-
-

Reentrance

+
>>> fs1 = FeatStruct('[x=1, y=[1,2,[z=3]]]')
+>>> fs1['y']
+[1, 2, [z=3]]
+>>> fs1['y', 2]
+[z=3]
+>>> fs1['y', 2, 'z']
+3
+
+
+ +
+

Reentrance

Feature structures may contain reentrant feature values. A reentrant feature value is a single feature structure that can be accessed via multiple feature paths.

-
-
->>> fs1 = FeatStruct(x='val')
->>> fs2 = FeatStruct(a=fs1, b=fs1)
->>> print(fs2)
-[ a = (1) [ x = 'val' ] ]
-[                       ]
-[ b -> (1)              ]
->>> fs2
-[a=(1)[x='val'], b->(1)]
-
-
+
>>> fs1 = FeatStruct(x='val')
+>>> fs2 = FeatStruct(a=fs1, b=fs1)
+>>> print(fs2)
+[ a = (1) [ x = 'val' ] ]
+[                       ]
+[ b -> (1)              ]
+>>> fs2
+[a=(1)[x='val'], b->(1)]
+
+

As you can see, reentrane is displayed by marking a feature structure -with a unique identifier, in this case (1), the first time it is -encountered; and then using the special form var -> id whenever it +with a unique identifier, in this case (1), the first time it is +encountered; and then using the special form var -> id whenever it is encountered again. You can use the same notation to directly create reentrant feature structures from strings.

-
-
->>> FeatStruct('[a=(1)[], b->(1), c=[d->(1)]]')
-[a=(1)[], b->(1), c=[d->(1)]]
-
-
+
>>> FeatStruct('[a=(1)[], b->(1), c=[d->(1)]]')
+[a=(1)[], b->(1), c=[d->(1)]]
+
+

Reentrant feature structures may contain cycles:

-
-
->>> fs3 = FeatStruct('(1)[a->(1)]')
->>> fs3['a', 'a', 'a', 'a']
-(1)[a->(1)]
->>> fs3['a', 'a', 'a', 'a'] is fs3
-True
-
-
+
>>> fs3 = FeatStruct('(1)[a->(1)]')
+>>> fs3['a', 'a', 'a', 'a']
+(1)[a->(1)]
+>>> fs3['a', 'a', 'a', 'a'] is fs3
+True
+
+

Unification preserves the reentrance relations imposed by both of the unified feature structures. In the feature structure resulting from unification, any modifications to a reentrant feature value will be visible using any of its feature paths.

-
-
->>> fs3.unify(FeatStruct('[a=[b=12], c=33]'))
-(1)[a->(1), b=12, c=33]
-
-
- -
-
-

Feature Structure Equality

+
>>> fs3.unify(FeatStruct('[a=[b=12], c=33]'))
+(1)[a->(1), b=12, c=33]
+
+
+ +
+

Feature Structure Equality

Two feature structures are considered equal if they assign the same values to all features, and they contain the same reentrances.

-
-
->>> fs1 = FeatStruct('[a=(1)[x=1], b->(1)]')
->>> fs2 = FeatStruct('[a=(1)[x=1], b->(1)]')
->>> fs3 = FeatStruct('[a=[x=1], b=[x=1]]')
->>> fs1 == fs1, fs1 is fs1
-(True, True)
->>> fs1 == fs2, fs1 is fs2
-(True, False)
->>> fs1 == fs3, fs1 is fs3
-(False, False)
-
-
+
>>> fs1 = FeatStruct('[a=(1)[x=1], b->(1)]')
+>>> fs2 = FeatStruct('[a=(1)[x=1], b->(1)]')
+>>> fs3 = FeatStruct('[a=[x=1], b=[x=1]]')
+>>> fs1 == fs1, fs1 is fs1
+(True, True)
+>>> fs1 == fs2, fs1 is fs2
+(True, False)
+>>> fs1 == fs3, fs1 is fs3
+(False, False)
+
+

Note that this differs from how Python dictionaries and lists define -equality -- in particular, Python dictionaries and lists ignore +equality – in particular, Python dictionaries and lists ignore reentrance relations. To test two feature structures for equality while ignoring reentrance relations, use the equal_values() method:

-
-
->>> fs1.equal_values(fs1)
-True
->>> fs1.equal_values(fs2)
-True
->>> fs1.equal_values(fs3)
-True
-
-
- -
-
-

Feature Value Sets & Feature Value Tuples

+
>>> fs1.equal_values(fs1)
+True
+>>> fs1.equal_values(fs2)
+True
+>>> fs1.equal_values(fs3)
+True
+
+
+ +
+

Feature Value Sets & Feature Value Tuples

nltk.featstruct defines two new data types that are intended to be used as feature values: FeatureValueTuple and FeatureValueSet. -Both of these types are considered base values -- i.e., unification +Both of these types are considered base values – i.e., unification does not apply to them. However, variable binding does apply to any values that they contain.

Feature value tuples are written with parentheses:

-
-
->>> fs1 = FeatStruct('[x=(?x, ?y)]')
->>> fs1
-[x=(?x, ?y)]
->>> fs1.substitute_bindings({Variable('?x'): 1, Variable('?y'): 2})
-[x=(1, 2)]
-
-
+
>>> fs1 = FeatStruct('[x=(?x, ?y)]')
+>>> fs1
+[x=(?x, ?y)]
+>>> fs1.substitute_bindings({Variable('?x'): 1, Variable('?y'): 2})
+[x=(1, 2)]
+
+

Feature sets are written with braces:

-
-
->>> fs1 = FeatStruct('[x={?x, ?y}]')
->>> fs1
-[x={?x, ?y}]
->>> fs1.substitute_bindings({Variable('?x'): 1, Variable('?y'): 2})
-[x={1, 2}]
-
-
+
>>> fs1 = FeatStruct('[x={?x, ?y}]')
+>>> fs1
+[x={?x, ?y}]
+>>> fs1.substitute_bindings({Variable('?x'): 1, Variable('?y'): 2})
+[x={1, 2}]
+
+

In addition to the basic feature value tuple & set classes, nltk defines feature value unions (for sets) and feature value -concatenations (for tuples). These are written using '+', and can be +concatenations (for tuples). These are written using ‘+’, and can be used to combine sets & tuples:

-
-
->>> fs1 = FeatStruct('[x=((1, 2)+?z), z=?z]')
->>> fs1
-[x=((1, 2)+?z), z=?z]
->>> fs1.unify(FeatStruct('[z=(3, 4, 5)]'))
-[x=(1, 2, 3, 4, 5), z=(3, 4, 5)]
-
-
+
>>> fs1 = FeatStruct('[x=((1, 2)+?z), z=?z]')
+>>> fs1
+[x=((1, 2)+?z), z=?z]
+>>> fs1.unify(FeatStruct('[z=(3, 4, 5)]'))
+[x=(1, 2, 3, 4, 5), z=(3, 4, 5)]
+
+

Thus, feature value tuples and sets can be used to build up tuples -and sets of values over the corse of unification. For example, when +and sets of values over the course of unification. For example, when parsing sentences using a semantic feature grammar, feature sets or feature tuples can be used to build a list of semantic predicates as the sentence is parsed.

As was mentioned above, unification does not apply to feature value -tuples and sets. One reason for this that it's impossible to define a +tuples and sets. One reason for this that it’s impossible to define a single correct answer for unification when concatenation is used. Consider the following example:

-
-
->>> fs1 = FeatStruct('[x=(1, 2, 3, 4)]')
->>> fs2 = FeatStruct('[x=(?a+?b), a=?a, b=?b]')
-
-
+
>>> fs1 = FeatStruct('[x=(1, 2, 3, 4)]')
+>>> fs2 = FeatStruct('[x=(?a+?b), a=?a, b=?b]')
+
+

If unification applied to feature tuples, then the unification algorithm would have to arbitrarily choose how to divide the tuple (1,2,3,4) into two parts. Instead, the unification algorithm refuses to make this decision, and simply unifies based on value. Because (1,2,3,4) is not equal to (?a+?b), fs1 and fs2 will not unify:

-
-
->>> print(fs1.unify(fs2))
-None
-
-
+
>>> print(fs1.unify(fs2))
+None
+
+

If you need a list-like structure that unification does apply to, use FeatList.

- -
-
-

Light-weight Feature Structures

+ +
+

Light-weight Feature Structures

Many of the functions defined by nltk.featstruct can be applied directly to simple Python dictionaries and lists, rather than to full-fledged FeatDict and FeatList objects. In other words, -Python dicts and lists can be used as "light-weight" feature +Python dicts and lists can be used as “light-weight” feature structures.

-
-
->>> # Note: pprint prints dicts sorted
->>> from pprint import pprint
->>> from nltk.featstruct import unify
->>> pprint(unify(dict(x=1, y=dict()), dict(a='a', y=dict(b='b'))))
-{'a': 'a', 'x': 1, 'y': {'b': 'b'}}
-
-
+
>>> # Note: pprint prints dicts sorted
+>>> from pprint import pprint
+>>> from nltk.featstruct import unify
+>>> pprint(unify(dict(x=1, y=dict()), dict(a='a', y=dict(b='b'))))
+{'a': 'a', 'x': 1, 'y': {'b': 'b'}}
+
+

However, you should keep in mind the following caveats:

    -
  • Python dictionaries & lists ignore reentrance when checking for +
  • Python dictionaries & lists ignore reentrance when checking for equality between values. But two FeatStructs with different reentrances are considered nonequal, even if all their base -values are equal.

  • -
  • FeatStructs can be easily frozen, allowing them to be used as -keys in hash tables. Python dictionaries and lists can not.
  • -
  • FeatStructs display reentrance in their string representations; -Python dictionaries and lists do not.
  • -
  • FeatStructs may not be mixed with Python dictionaries and lists -(e.g., when performing unification).
  • -
  • FeatStructs provide a number of useful methods, such as walk() -and cyclic(), which are not available for Python dicts & lists.
  • +values are equal.

    +
  • FeatStructs can be easily frozen, allowing them to be used as +keys in hash tables. Python dictionaries and lists can not.

  • +
  • FeatStructs display reentrance in their string representations; +Python dictionaries and lists do not.

  • +
  • FeatStructs may not be mixed with Python dictionaries and lists +(e.g., when performing unification).

  • +
  • FeatStructs provide a number of useful methods, such as walk() +and cyclic(), which are not available for Python dicts & lists.

In general, if your feature structures will contain any reentrances, or if you plan to use them as dictionary keys, it is strongly recommended that you use full-fledged FeatStruct objects.

-
-
-

Custom Feature Values

+ +
+

Custom Feature Values

The abstract base class CustomFeatureValue can be used to define new base value types that have custom unification methods. For example, the following feature value type encodes a range, and defines unification as taking the intersection on the ranges:

-
-
->>> from nltk.compat import total_ordering
->>> from nltk.featstruct import CustomFeatureValue, UnificationFailure
->>> @total_ordering
-... class Range(CustomFeatureValue):
-...     def __init__(self, low, high):
-...         assert low <= high
-...         self.low = low
-...         self.high = high
-...     def unify(self, other):
-...         if not isinstance(other, Range):
-...             return UnificationFailure
-...         low = max(self.low, other.low)
-...         high = min(self.high, other.high)
-...         if low <= high: return Range(low, high)
-...         else: return UnificationFailure
-...     def __repr__(self):
-...         return '(%s<x<%s)' % (self.low, self.high)
-...     def __eq__(self, other):
-...         if not isinstance(other, Range):
-...             return False
-...         return (self.low == other.low) and (self.high == other.high)
-...     def __lt__(self, other):
-...         if not isinstance(other, Range):
-...             return True
-...         return (self.low, self.high) < (other.low, other.high)
-
-
->>> fs1 = FeatStruct(x=Range(5,8), y=FeatStruct(z=Range(7,22)))
->>> print(fs1.unify(FeatStruct(x=Range(6, 22))))
-[ x = (6<x<8)          ]
-[                      ]
-[ y = [ z = (7<x<22) ] ]
->>> print(fs1.unify(FeatStruct(x=Range(9, 12))))
-None
->>> print(fs1.unify(FeatStruct(x=12)))
-None
->>> print(fs1.unify(FeatStruct('[x=?x, y=[z=?x]]')))
-[ x = (7<x<8)         ]
-[                     ]
-[ y = [ z = (7<x<8) ] ]
-
-
-
-
-
-

Regression Tests

-
-

Dictionary access methods (non-mutating)

-
-
->>> fs1 = FeatStruct(a=1, b=2, c=3)
->>> fs2 = FeatStruct(x=fs1, y='x')
-
-
+
>>> from functools import total_ordering
+>>> from nltk.featstruct import CustomFeatureValue, UnificationFailure
+>>> @total_ordering
+... class Range(CustomFeatureValue):
+...     def __init__(self, low, high):
+...         assert low <= high
+...         self.low = low
+...         self.high = high
+...     def unify(self, other):
+...         if not isinstance(other, Range):
+...             return UnificationFailure
+...         low = max(self.low, other.low)
+...         high = min(self.high, other.high)
+...         if low <= high: return Range(low, high)
+...         else: return UnificationFailure
+...     def __repr__(self):
+...         return '(%s<x<%s)' % (self.low, self.high)
+...     def __eq__(self, other):
+...         if not isinstance(other, Range):
+...             return False
+...         return (self.low == other.low) and (self.high == other.high)
+...     def __lt__(self, other):
+...         if not isinstance(other, Range):
+...             return True
+...         return (self.low, self.high) < (other.low, other.high)
+
+
+
>>> fs1 = FeatStruct(x=Range(5,8), y=FeatStruct(z=Range(7,22)))
+>>> print(fs1.unify(FeatStruct(x=Range(6, 22))))
+[ x = (6<x<8)          ]
+[                      ]
+[ y = [ z = (7<x<22) ] ]
+>>> print(fs1.unify(FeatStruct(x=Range(9, 12))))
+None
+>>> print(fs1.unify(FeatStruct(x=12)))
+None
+>>> print(fs1.unify(FeatStruct('[x=?x, y=[z=?x]]')))
+[ x = (7<x<8)         ]
+[                     ]
+[ y = [ z = (7<x<8) ] ]
+
+
+ + +
+

Regression Tests

+
+

Dictionary access methods (non-mutating)

+
>>> fs1 = FeatStruct(a=1, b=2, c=3)
+>>> fs2 = FeatStruct(x=fs1, y='x')
+
+

Feature structures support all dictionary methods (excluding the class method dict.fromkeys()). Non-mutating methods:

-
-
->>> sorted(fs2.keys())                               # keys()
-['x', 'y']
->>> sorted(fs2.values())                             # values()
-[[a=1, b=2, c=3], 'x']
->>> sorted(fs2.items())                              # items()
-[('x', [a=1, b=2, c=3]), ('y', 'x')]
->>> sorted(fs2)                                      # __iter__()
-['x', 'y']
->>> 'a' in fs2, 'x' in fs2                           # __contains__()
-(False, True)
->>> fs2.has_key('a'), fs2.has_key('x')               # has_key()
-(False, True)
->>> fs2['x'], fs2['y']                               # __getitem__()
-([a=1, b=2, c=3], 'x')
->>> fs2['a']                                         # __getitem__()
-Traceback (most recent call last):
-  . . .
-KeyError: 'a'
->>> fs2.get('x'), fs2.get('y'), fs2.get('a')         # get()
-([a=1, b=2, c=3], 'x', None)
->>> fs2.get('x', 'hello'), fs2.get('a', 'hello')     # get()
-([a=1, b=2, c=3], 'hello')
->>> len(fs1), len(fs2)                               # __len__
-(3, 2)
->>> fs2.copy()                                       # copy()
-[x=[a=1, b=2, c=3], y='x']
->>> fs2.copy() is fs2                                # copy()
-False
-
-
+
>>> sorted(fs2.keys())                               # keys()
+['x', 'y']
+>>> sorted(fs2.values())                             # values()
+[[a=1, b=2, c=3], 'x']
+>>> sorted(fs2.items())                              # items()
+[('x', [a=1, b=2, c=3]), ('y', 'x')]
+>>> sorted(fs2)                                      # __iter__()
+['x', 'y']
+>>> 'a' in fs2, 'x' in fs2                           # __contains__()
+(False, True)
+>>> fs2.has_key('a'), fs2.has_key('x')               # has_key()
+(False, True)
+>>> fs2['x'], fs2['y']                               # __getitem__()
+([a=1, b=2, c=3], 'x')
+>>> fs2['a']                                         # __getitem__()
+Traceback (most recent call last):
+  . . .
+KeyError: 'a'
+>>> fs2.get('x'), fs2.get('y'), fs2.get('a')         # get()
+([a=1, b=2, c=3], 'x', None)
+>>> fs2.get('x', 'hello'), fs2.get('a', 'hello')     # get()
+([a=1, b=2, c=3], 'hello')
+>>> len(fs1), len(fs2)                               # __len__
+(3, 2)
+>>> fs2.copy()                                       # copy()
+[x=[a=1, b=2, c=3], y='x']
+>>> fs2.copy() is fs2                                # copy()
+False
+
+

Note: by default, FeatStruct.copy() does a deep copy. Use FeatStruct.copy(deep=False) for a shallow copy.

- -
-
-

Dictionary access methods (mutating)

-
-
->>> fs1 = FeatStruct(a=1, b=2, c=3)
->>> fs2 = FeatStruct(x=fs1, y='x')
-
-
+ +
+

Dictionary access methods (mutating)

+
>>> fs1 = FeatStruct(a=1, b=2, c=3)
+>>> fs2 = FeatStruct(x=fs1, y='x')
+
+

Setting features (__setitem__())

-
-
->>> fs1['c'] = 5
->>> fs1
-[a=1, b=2, c=5]
->>> fs1['x'] = 12
->>> fs1
-[a=1, b=2, c=5, x=12]
->>> fs2['x', 'a'] = 2
->>> fs2
-[x=[a=2, b=2, c=5, x=12], y='x']
->>> fs1
-[a=2, b=2, c=5, x=12]
-
-
+
>>> fs1['c'] = 5
+>>> fs1
+[a=1, b=2, c=5]
+>>> fs1['x'] = 12
+>>> fs1
+[a=1, b=2, c=5, x=12]
+>>> fs2['x', 'a'] = 2
+>>> fs2
+[x=[a=2, b=2, c=5, x=12], y='x']
+>>> fs1
+[a=2, b=2, c=5, x=12]
+
+

Deleting features (__delitem__())

-
-
->>> del fs1['x']
->>> fs1
-[a=2, b=2, c=5]
->>> del fs2['x', 'a']
->>> fs1
-[b=2, c=5]
-
-
+
>>> del fs1['x']
+>>> fs1
+[a=2, b=2, c=5]
+>>> del fs2['x', 'a']
+>>> fs1
+[b=2, c=5]
+
+

setdefault():

-
-
->>> fs1.setdefault('b', 99)
-2
->>> fs1
-[b=2, c=5]
->>> fs1.setdefault('x', 99)
-99
->>> fs1
-[b=2, c=5, x=99]
-
-
+
>>> fs1.setdefault('b', 99)
+2
+>>> fs1
+[b=2, c=5]
+>>> fs1.setdefault('x', 99)
+99
+>>> fs1
+[b=2, c=5, x=99]
+
+

update():

-
-
->>> fs2.update({'a':'A', 'b':'B'}, c='C')
->>> fs2
-[a='A', b='B', c='C', x=[b=2, c=5, x=99], y='x']
-
-
+
>>> fs2.update({'a':'A', 'b':'B'}, c='C')
+>>> fs2
+[a='A', b='B', c='C', x=[b=2, c=5, x=99], y='x']
+
+

pop():

-
-
->>> fs2.pop('a')
-'A'
->>> fs2
-[b='B', c='C', x=[b=2, c=5, x=99], y='x']
->>> fs2.pop('a')
-Traceback (most recent call last):
-  . . .
-KeyError: 'a'
->>> fs2.pop('a', 'foo')
-'foo'
->>> fs2
-[b='B', c='C', x=[b=2, c=5, x=99], y='x']
-
-
+
>>> fs2.pop('a')
+'A'
+>>> fs2
+[b='B', c='C', x=[b=2, c=5, x=99], y='x']
+>>> fs2.pop('a')
+Traceback (most recent call last):
+  . . .
+KeyError: 'a'
+>>> fs2.pop('a', 'foo')
+'foo'
+>>> fs2
+[b='B', c='C', x=[b=2, c=5, x=99], y='x']
+
+

clear():

-
-
->>> fs1.clear()
->>> fs1
-[]
->>> fs2
-[b='B', c='C', x=[], y='x']
-
-
+
>>> fs1.clear()
+>>> fs1
+[]
+>>> fs2
+[b='B', c='C', x=[], y='x']
+
+

popitem():

-
-
->>> sorted([fs2.popitem() for i in range(len(fs2))])
-[('b', 'B'), ('c', 'C'), ('x', []), ('y', 'x')]
->>> fs2
-[]
-
-
+
>>> sorted([fs2.popitem() for i in range(len(fs2))])
+[('b', 'B'), ('c', 'C'), ('x', []), ('y', 'x')]
+>>> fs2
+[]
+
+

Once a feature structure has been frozen, it may not be mutated.

-
-
->>> fs1 = FeatStruct('[x=1, y=2, z=[a=3]]')
->>> fs1.freeze()
->>> fs1.frozen()
-True
->>> fs1['z'].frozen()
-True
-
-
->>> fs1['x'] = 5
-Traceback (most recent call last):
-  . . .
-ValueError: Frozen FeatStructs may not be modified.
->>> del fs1['x']
-Traceback (most recent call last):
-  . . .
-ValueError: Frozen FeatStructs may not be modified.
->>> fs1.clear()
-Traceback (most recent call last):
-  . . .
-ValueError: Frozen FeatStructs may not be modified.
->>> fs1.pop('x')
-Traceback (most recent call last):
-  . . .
-ValueError: Frozen FeatStructs may not be modified.
->>> fs1.popitem()
-Traceback (most recent call last):
-  . . .
-ValueError: Frozen FeatStructs may not be modified.
->>> fs1.setdefault('x')
-Traceback (most recent call last):
-  . . .
-ValueError: Frozen FeatStructs may not be modified.
->>> fs1.update(z=22)
-Traceback (most recent call last):
-  . . .
-ValueError: Frozen FeatStructs may not be modified.
-
-
- -
-
-

Feature Paths

+
>>> fs1 = FeatStruct('[x=1, y=2, z=[a=3]]')
+>>> fs1.freeze()
+>>> fs1.frozen()
+True
+>>> fs1['z'].frozen()
+True
+
+
+
>>> fs1['x'] = 5
+Traceback (most recent call last):
+  . . .
+ValueError: Frozen FeatStructs may not be modified.
+>>> del fs1['x']
+Traceback (most recent call last):
+  . . .
+ValueError: Frozen FeatStructs may not be modified.
+>>> fs1.clear()
+Traceback (most recent call last):
+  . . .
+ValueError: Frozen FeatStructs may not be modified.
+>>> fs1.pop('x')
+Traceback (most recent call last):
+  . . .
+ValueError: Frozen FeatStructs may not be modified.
+>>> fs1.popitem()
+Traceback (most recent call last):
+  . . .
+ValueError: Frozen FeatStructs may not be modified.
+>>> fs1.setdefault('x')
+Traceback (most recent call last):
+  . . .
+ValueError: Frozen FeatStructs may not be modified.
+>>> fs1.update(z=22)
+Traceback (most recent call last):
+  . . .
+ValueError: Frozen FeatStructs may not be modified.
+
+
+ +
+

Feature Paths

Make sure that __getitem__ with feature paths works as intended:

-
-
->>> fs1 = FeatStruct(a=1, b=2,
-...                 c=FeatStruct(
-...                     d=FeatStruct(e=12),
-...                     f=FeatStruct(g=55, h='hello')))
->>> fs1[()]
-[a=1, b=2, c=[d=[e=12], f=[g=55, h='hello']]]
->>> fs1['a'], fs1[('a',)]
-(1, 1)
->>> fs1['c','d','e']
-12
->>> fs1['c','f','g']
-55
-
-
+
>>> fs1 = FeatStruct(a=1, b=2,
+...                 c=FeatStruct(
+...                     d=FeatStruct(e=12),
+...                     f=FeatStruct(g=55, h='hello')))
+>>> fs1[()]
+[a=1, b=2, c=[d=[e=12], f=[g=55, h='hello']]]
+>>> fs1['a'], fs1[('a',)]
+(1, 1)
+>>> fs1['c','d','e']
+12
+>>> fs1['c','f','g']
+55
+
+

Feature paths that select unknown features raise KeyError:

-
-
->>> fs1['c', 'f', 'e']
-Traceback (most recent call last):
-  . . .
-KeyError: ('c', 'f', 'e')
->>> fs1['q', 'p']
-Traceback (most recent call last):
-  . . .
-KeyError: ('q', 'p')
-
-
-

Feature paths that try to go 'through' a feature that's not a feature +

>>> fs1['c', 'f', 'e']
+Traceback (most recent call last):
+  . . .
+KeyError: ('c', 'f', 'e')
+>>> fs1['q', 'p']
+Traceback (most recent call last):
+  . . .
+KeyError: ('q', 'p')
+
+
+

Feature paths that try to go ‘through’ a feature that’s not a feature structure raise KeyError:

-
-
->>> fs1['a', 'b']
-Traceback (most recent call last):
-  . . .
-KeyError: ('a', 'b')
-
-
+
>>> fs1['a', 'b']
+Traceback (most recent call last):
+  . . .
+KeyError: ('a', 'b')
+
+

Feature paths can go through reentrant structures:

-
-
->>> fs2 = FeatStruct('(1)[a=[b=[c->(1), d=5], e=11]]')
->>> fs2['a', 'b', 'c', 'a', 'e']
-11
->>> fs2['a', 'b', 'c', 'a', 'b', 'd']
-5
->>> fs2[tuple('abcabcabcabcabcabcabcabcabcabca')]
-(1)[b=[c=[a->(1)], d=5], e=11]
-
-
+
>>> fs2 = FeatStruct('(1)[a=[b=[c->(1), d=5], e=11]]')
+>>> fs2['a', 'b', 'c', 'a', 'e']
+11
+>>> fs2['a', 'b', 'c', 'a', 'b', 'd']
+5
+>>> fs2[tuple('abcabcabcabcabcabcabcabcabcabca')]
+(1)[b=[c=[a->(1)], d=5], e=11]
+
+

Indexing requires strings, Features, or tuples; other types raise a TypeError:

-
-
->>> fs2[12]
-Traceback (most recent call last):
-  . . .
-TypeError: Expected feature name or path.  Got 12.
->>> fs2[list('abc')]
-Traceback (most recent call last):
-  . . .
-TypeError: Expected feature name or path.  Got ['a', 'b', 'c'].
-
-
+
>>> fs2[12]
+Traceback (most recent call last):
+  . . .
+TypeError: Expected feature name or path.  Got 12.
+>>> fs2[list('abc')]
+Traceback (most recent call last):
+  . . .
+TypeError: Expected feature name or path.  Got ['a', 'b', 'c'].
+
+

Feature paths can also be used with get(), has_key(), and __contains__().

-
-
->>> fpath1 = tuple('abcabc')
->>> fpath2 = tuple('abcabz')
->>> fs2.get(fpath1), fs2.get(fpath2)
-((1)[a=[b=[c->(1), d=5], e=11]], None)
->>> fpath1 in fs2, fpath2 in fs2
-(True, False)
->>> fs2.has_key(fpath1), fs2.has_key(fpath2)
-(True, False)
-
-
- -
-
-

Reading Feature Structures

+
>>> fpath1 = tuple('abcabc')
+>>> fpath2 = tuple('abcabz')
+>>> fs2.get(fpath1), fs2.get(fpath2)
+((1)[a=[b=[c->(1), d=5], e=11]], None)
+>>> fpath1 in fs2, fpath2 in fs2
+(True, False)
+>>> fs2.has_key(fpath1), fs2.has_key(fpath2)
+(True, False)
+
+
+ +
+

Reading Feature Structures

Empty feature struct:

-
-
->>> FeatStruct('[]')
-[]
-
-
+
>>> FeatStruct('[]')
+[]
+
+

Test features with integer values:

-
-
->>> FeatStruct('[a=12, b=-33, c=0]')
-[a=12, b=-33, c=0]
-
-
+
>>> FeatStruct('[a=12, b=-33, c=0]')
+[a=12, b=-33, c=0]
+
+

Test features with string values. Either single or double quotes may -be used. Strings are evaluated just like python strings -- in -particular, you can use escape sequences and 'u' and 'r' prefixes, and +be used. Strings are evaluated just like python strings – in +particular, you can use escape sequences and ‘u’ and ‘r’ prefixes, and triple-quoted strings.

-
-
->>> FeatStruct('[a="", b="hello", c="\'", d=\'\', e=\'"\']')
-[a='', b='hello', c="'", d='', e='"']
->>> FeatStruct(r'[a="\\", b="\"", c="\x6f\\y", d="12"]')
-[a='\\', b='"', c='o\\y', d='12']
->>> FeatStruct(r'[b=r"a\b\c"]')
-[b='a\\b\\c']
->>> FeatStruct('[x="""a"""]')
-[x='a']
-
-
+
>>> FeatStruct('[a="", b="hello", c="\'", d=\'\', e=\'"\']')
+[a='', b='hello', c="'", d='', e='"']
+>>> FeatStruct(r'[a="\\", b="\"", c="\x6f\\y", d="12"]')
+[a='\\', b='"', c='o\\y', d='12']
+>>> FeatStruct(r'[b=r"a\b\c"]')
+[b='a\\b\\c']
+>>> FeatStruct('[x="""a"""]')
+[x='a']
+
+

Test parsing of reentrant feature structures.

-
-
->>> FeatStruct('[a=(1)[], b->(1)]')
-[a=(1)[], b->(1)]
->>> FeatStruct('[a=(1)[x=1, y=2], b->(1)]')
-[a=(1)[x=1, y=2], b->(1)]
-
-
+
>>> FeatStruct('[a=(1)[], b->(1)]')
+[a=(1)[], b->(1)]
+>>> FeatStruct('[a=(1)[x=1, y=2], b->(1)]')
+[a=(1)[x=1, y=2], b->(1)]
+
+

Test parsing of cyclic feature structures.

-
-
->>> FeatStruct('[a=(1)[b->(1)]]')
-[a=(1)[b->(1)]]
->>> FeatStruct('(1)[a=[b=[c->(1)]]]')
-(1)[a=[b=[c->(1)]]]
-
-
-

Strings of the form "+name" and "-name" may be used to specify boolean +

>>> FeatStruct('[a=(1)[b->(1)]]')
+[a=(1)[b->(1)]]
+>>> FeatStruct('(1)[a=[b=[c->(1)]]]')
+(1)[a=[b=[c->(1)]]]
+
+
+

Strings of the form “+name” and “-name” may be used to specify boolean values.

-
-
->>> FeatStruct('[-bar, +baz, +foo]')
-[-bar, +baz, +foo]
-
-
+
>>> FeatStruct('[-bar, +baz, +foo]')
+[-bar, +baz, +foo]
+
+

None, True, and False are recognized as values:

-
-
->>> FeatStruct('[bar=True, baz=False, foo=None]')
-[+bar, -baz, foo=None]
-
-
+
>>> FeatStruct('[bar=True, baz=False, foo=None]')
+[+bar, -baz, foo=None]
+
+

Special features:

-
-
->>> FeatStruct('NP/VP')
-NP[]/VP[]
->>> FeatStruct('?x/?x')
-?x[]/?x[]
->>> print(FeatStruct('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]'))
-[ *type*  = 'VP'              ]
-[                             ]
-[           [ *type* = 'NP' ] ]
-[ *slash* = [ agr    = ?x   ] ]
-[           [ pl     = True ] ]
-[                             ]
-[ agr     = ?x                ]
-[ fin     = True              ]
-[ tense   = 'past'            ]
-
-
-
-
Here the slash feature gets coerced:
-
->>> FeatStruct('[*slash*=a, x=b, *type*="NP"]')
-NP[x='b']/a[]
-
-
->>> FeatStruct('NP[sem=<bob>]/NP')
-NP[sem=<bob>]/NP[]
->>> FeatStruct('S[sem=<walk(bob)>]')
-S[sem=<walk(bob)>]
->>> print(FeatStruct('NP[sem=<bob>]/NP'))
-[ *type*  = 'NP'              ]
-[                             ]
-[ *slash* = [ *type* = 'NP' ] ]
-[                             ]
-[ sem     = <bob>             ]
-
+
>>> FeatStruct('NP/VP')
+NP[]/VP[]
+>>> FeatStruct('?x/?x')
+?x[]/?x[]
+>>> print(FeatStruct('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]'))
+[ *type*  = 'VP'              ]
+[                             ]
+[           [ *type* = 'NP' ] ]
+[ *slash* = [ agr    = ?x   ] ]
+[           [ pl     = True ] ]
+[                             ]
+[ agr     = ?x                ]
+[ fin     = True              ]
+[ tense   = 'past'            ]
+
+
+
+
Here the slash feature gets coerced:
>>> FeatStruct('[*slash*=a, x=b, *type*="NP"]')
+NP[x='b']/a[]
+
+
+
>>> FeatStruct('NP[sem=<bob>]/NP')
+NP[sem=<bob>]/NP[]
+>>> FeatStruct('S[sem=<walk(bob)>]')
+S[sem=<walk(bob)>]
+>>> print(FeatStruct('NP[sem=<bob>]/NP'))
+[ *type*  = 'NP'              ]
+[                             ]
+[ *slash* = [ *type* = 'NP' ] ]
+[                             ]
+[ sem     = <bob>             ]
+
+

Playing with ranges:

-
-
->>> from nltk.featstruct import RangeFeature, FeatStructReader
->>> width = RangeFeature('width')
->>> reader = FeatStructReader([width])
->>> fs1 = reader.fromstring('[*width*=-5:12]')
->>> fs2 = reader.fromstring('[*width*=2:123]')
->>> fs3 = reader.fromstring('[*width*=-7:-2]')
->>> fs1.unify(fs2)
-[*width*=(2, 12)]
->>> fs1.unify(fs3)
-[*width*=(-5, -2)]
->>> print(fs2.unify(fs3)) # no overlap in width.
-None
-
-
-

The slash feature has a default value of 'False':

-
-
->>> print(FeatStruct('NP[]/VP').unify(FeatStruct('NP[]'), trace=1))
-<BLANKLINE>
-Unification trace:
-   / NP[]/VP[]
-  |\ NP[]
-  |
-  | Unify feature: *type*
-  |    / 'NP'
-  |   |\ 'NP'
-  |   |
-  |   +-->'NP'
-  |
-  | Unify feature: *slash*
-  |    / VP[]
-  |   |\ False
-  |   |
-  X   X <-- FAIL
-None
-
-
-

The demo structures from category.py. They all parse, but they don't -do quite the right thing, -- ?x vs x.

-
-
->>> FeatStruct(pos='n', agr=FeatStruct(number='pl', gender='f'))
-[agr=[gender='f', number='pl'], pos='n']
->>> FeatStruct(r'NP[sem=<bob>]/NP')
-NP[sem=<bob>]/NP[]
->>> FeatStruct(r'S[sem=<app(?x, ?y)>]')
-S[sem=<?x(?y)>]
->>> FeatStruct('?x/?x')
-?x[]/?x[]
->>> FeatStruct('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]')
-VP[agr=?x, +fin, tense='past']/NP[agr=?x, +pl]
->>> FeatStruct('S[sem = <app(?subj, ?vp)>]')
-S[sem=<?subj(?vp)>]
-
-
->>> FeatStruct('S')
-S[]
-
-
+
>>> from nltk.featstruct import RangeFeature, FeatStructReader
+>>> width = RangeFeature('width')
+>>> reader = FeatStructReader([width])
+>>> fs1 = reader.fromstring('[*width*=-5:12]')
+>>> fs2 = reader.fromstring('[*width*=2:123]')
+>>> fs3 = reader.fromstring('[*width*=-7:-2]')
+>>> fs1.unify(fs2)
+[*width*=(2, 12)]
+>>> fs1.unify(fs3)
+[*width*=(-5, -2)]
+>>> print(fs2.unify(fs3)) # no overlap in width.
+None
+
+
+

The slash feature has a default value of ‘False’:

+
>>> print(FeatStruct('NP[]/VP').unify(FeatStruct('NP[]'), trace=1))
+
+Unification trace:
+   / NP[]/VP[]
+  |\ NP[]
+  |
+  | Unify feature: *type*
+  |    / 'NP'
+  |   |\ 'NP'
+  |   |
+  |   +-->'NP'
+  |
+  | Unify feature: *slash*
+  |    / VP[]
+  |   |\ False
+  |   |
+  X   X <-- FAIL
+None
+
+
+

The demo structures from category.py. They all parse, but they don’t +do quite the right thing, – ?x vs x.

+
>>> FeatStruct(pos='n', agr=FeatStruct(number='pl', gender='f'))
+[agr=[gender='f', number='pl'], pos='n']
+>>> FeatStruct(r'NP[sem=<bob>]/NP')
+NP[sem=<bob>]/NP[]
+>>> FeatStruct(r'S[sem=<app(?x, ?y)>]')
+S[sem=<?x(?y)>]
+>>> FeatStruct('?x/?x')
+?x[]/?x[]
+>>> FeatStruct('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]')
+VP[agr=?x, +fin, tense='past']/NP[agr=?x, +pl]
+>>> FeatStruct('S[sem = <app(?subj, ?vp)>]')
+S[sem=<?subj(?vp)>]
+
+
+
>>> FeatStruct('S')
+S[]
+
+

The parser also includes support for reading sets and tuples.

-
-
->>> FeatStruct('[x={1,2,2,2}, y={/}]')
-[x={1, 2}, y={/}]
->>> FeatStruct('[x=(1,2,2,2), y=()]')
-[x=(1, 2, 2, 2), y=()]
->>> print(FeatStruct('[x=(1,[z=(1,2,?x)],?z,{/})]'))
-[ x = (1, [ z = (1, 2, ?x) ], ?z, {/}) ]
-
-
-

Note that we can't put a featstruct inside a tuple, because doing so -would hash it, and it's not frozen yet:

-
-
->>> print(FeatStruct('[x={[]}]'))
-Traceback (most recent call last):
-  . . .
-TypeError: FeatStructs must be frozen before they can be hashed.
-
-
-

There's a special syntax for taking the union of sets: "{...+...}". +

>>> FeatStruct('[x={1,2,2,2}, y={/}]')
+[x={1, 2}, y={/}]
+>>> FeatStruct('[x=(1,2,2,2), y=()]')
+[x=(1, 2, 2, 2), y=()]
+>>> print(FeatStruct('[x=(1,[z=(1,2,?x)],?z,{/})]'))
+[ x = (1, [ z = (1, 2, ?x) ], ?z, {/}) ]
+
+
+

Note that we can’t put a featstruct inside a tuple, because doing so +would hash it, and it’s not frozen yet:

+
>>> print(FeatStruct('[x={[]}]'))
+Traceback (most recent call last):
+  . . .
+TypeError: FeatStructs must be frozen before they can be hashed.
+
+
+

There’s a special syntax for taking the union of sets: “{…+…}”. The elements should only be variables or sets.

-
-
->>> FeatStruct('[x={?a+?b+{1,2,3}}]')
-[x={?a+?b+{1, 2, 3}}]
-
-
-

There's a special syntax for taking the concatenation of tuples: -"(...+...)". The elements should only be variables or tuples.

-
-
->>> FeatStruct('[x=(?a+?b+(1,2,3))]')
-[x=(?a+?b+(1, 2, 3))]
-
-
+
>>> FeatStruct('[x={?a+?b+{1,2,3}}]')
+[x={?a+?b+{1, 2, 3}}]
+
+
+

There’s a special syntax for taking the concatenation of tuples: +“(…+…)”. The elements should only be variables or tuples.

+
>>> FeatStruct('[x=(?a+?b+(1,2,3))]')
+[x=(?a+?b+(1, 2, 3))]
+
+

Parsing gives helpful messages if your string contains an error.

-
-
->>> FeatStruct('[a=, b=5]]')
-Traceback (most recent call last):
-  . . .
-ValueError: Error parsing feature structure
-    [a=, b=5]]
-       ^ Expected value
->>> FeatStruct('[a=12 22, b=33]')
-Traceback (most recent call last):
-  . . .
-ValueError: Error parsing feature structure
-    [a=12 22, b=33]
-         ^ Expected comma
->>> FeatStruct('[a=5] [b=6]')
-Traceback (most recent call last):
-  . . .
-ValueError: Error parsing feature structure
-    [a=5] [b=6]
-          ^ Expected end of string
->>> FeatStruct(' *++*')
-Traceback (most recent call last):
-  . . .
-ValueError: Error parsing feature structure
-    *++*
-    ^ Expected open bracket or identifier
->>> FeatStruct('[x->(1)]')
-Traceback (most recent call last):
-  . . .
-ValueError: Error parsing feature structure
-    [x->(1)]
-        ^ Expected bound identifier
->>> FeatStruct('[x->y]')
-Traceback (most recent call last):
-  . . .
-ValueError: Error parsing feature structure
-    [x->y]
-        ^ Expected identifier
->>> FeatStruct('')
-Traceback (most recent call last):
-  . . .
-ValueError: Error parsing feature structure
-<BLANKLINE>
-    ^ Expected open bracket or identifier
-
-
-
-
-

Unification

+
>>> FeatStruct('[a=, b=5]]')
+Traceback (most recent call last):
+  . . .
+ValueError: Error parsing feature structure
+    [a=, b=5]]
+       ^ Expected value
+>>> FeatStruct('[a=12 22, b=33]')
+Traceback (most recent call last):
+  . . .
+ValueError: Error parsing feature structure
+    [a=12 22, b=33]
+         ^ Expected comma
+>>> FeatStruct('[a=5] [b=6]')
+Traceback (most recent call last):
+  . . .
+ValueError: Error parsing feature structure
+    [a=5] [b=6]
+          ^ Expected end of string
+>>> FeatStruct(' *++*')
+Traceback (most recent call last):
+  . . .
+ValueError: Error parsing feature structure
+    *++*
+    ^ Expected open bracket or identifier
+>>> FeatStruct('[x->(1)]')
+Traceback (most recent call last):
+  . . .
+ValueError: Error parsing feature structure
+    [x->(1)]
+        ^ Expected bound identifier
+>>> FeatStruct('[x->y]')
+Traceback (most recent call last):
+  . . .
+ValueError: Error parsing feature structure
+    [x->y]
+        ^ Expected identifier
+>>> FeatStruct('')
+Traceback (most recent call last):
+  . . .
+ValueError: Error parsing feature structure
+
+    ^ Expected open bracket or identifier
+
+
+ +
+

Unification

Very simple unifications give the expected results:

-
-
->>> FeatStruct().unify(FeatStruct())
-[]
->>> FeatStruct(number='singular').unify(FeatStruct())
-[number='singular']
->>> FeatStruct().unify(FeatStruct(number='singular'))
-[number='singular']
->>> FeatStruct(number='singular').unify(FeatStruct(person=3))
-[number='singular', person=3]
-
-
+
>>> FeatStruct().unify(FeatStruct())
+[]
+>>> FeatStruct(number='singular').unify(FeatStruct())
+[number='singular']
+>>> FeatStruct().unify(FeatStruct(number='singular'))
+[number='singular']
+>>> FeatStruct(number='singular').unify(FeatStruct(person=3))
+[number='singular', person=3]
+
+

Merging nested structures:

-
-
->>> fs1 = FeatStruct('[A=[B=b]]')
->>> fs2 = FeatStruct('[A=[C=c]]')
->>> fs1.unify(fs2)
-[A=[B='b', C='c']]
->>> fs2.unify(fs1)
-[A=[B='b', C='c']]
-
-
+
>>> fs1 = FeatStruct('[A=[B=b]]')
+>>> fs2 = FeatStruct('[A=[C=c]]')
+>>> fs1.unify(fs2)
+[A=[B='b', C='c']]
+>>> fs2.unify(fs1)
+[A=[B='b', C='c']]
+
+

A basic case of reentrant unification

-
-
->>> fs4 = FeatStruct('[A=(1)[B=b], E=[F->(1)]]')
->>> fs5 = FeatStruct("[A=[C='c'], E=[F=[D='d']]]")
->>> fs4.unify(fs5)
-[A=(1)[B='b', C='c', D='d'], E=[F->(1)]]
->>> fs5.unify(fs4)
-[A=(1)[B='b', C='c', D='d'], E=[F->(1)]]
-
-
+
>>> fs4 = FeatStruct('[A=(1)[B=b], E=[F->(1)]]')
+>>> fs5 = FeatStruct("[A=[C='c'], E=[F=[D='d']]]")
+>>> fs4.unify(fs5)
+[A=(1)[B='b', C='c', D='d'], E=[F->(1)]]
+>>> fs5.unify(fs4)
+[A=(1)[B='b', C='c', D='d'], E=[F->(1)]]
+
+

More than 2 paths to a value

-
-
->>> fs1 = FeatStruct("[a=[],b=[],c=[],d=[]]")
->>> fs2 = FeatStruct('[a=(1)[], b->(1), c->(1), d->(1)]')
->>> fs1.unify(fs2)
-[a=(1)[], b->(1), c->(1), d->(1)]
-
-
+
>>> fs1 = FeatStruct("[a=[],b=[],c=[],d=[]]")
+>>> fs2 = FeatStruct('[a=(1)[], b->(1), c->(1), d->(1)]')
+>>> fs1.unify(fs2)
+[a=(1)[], b->(1), c->(1), d->(1)]
+
+

fs1[a] gets unified with itself

-
-
->>> fs1 = FeatStruct('[x=(1)[], y->(1)]')
->>> fs2 = FeatStruct('[x=(1)[], y->(1)]')
->>> fs1.unify(fs2)
-[x=(1)[], y->(1)]
-
-
+
>>> fs1 = FeatStruct('[x=(1)[], y->(1)]')
+>>> fs2 = FeatStruct('[x=(1)[], y->(1)]')
+>>> fs1.unify(fs2)
+[x=(1)[], y->(1)]
+
+

Bound variables should get forwarded appropriately

-
-
->>> fs1 = FeatStruct('[A=(1)[X=x], B->(1), C=?cvar, D=?dvar]')
->>> fs2 = FeatStruct('[A=(1)[Y=y], B=(2)[Z=z], C->(1), D->(2)]')
->>> fs1.unify(fs2)
-[A=(1)[X='x', Y='y', Z='z'], B->(1), C->(1), D->(1)]
->>> fs2.unify(fs1)
-[A=(1)[X='x', Y='y', Z='z'], B->(1), C->(1), D->(1)]
-
-
+
>>> fs1 = FeatStruct('[A=(1)[X=x], B->(1), C=?cvar, D=?dvar]')
+>>> fs2 = FeatStruct('[A=(1)[Y=y], B=(2)[Z=z], C->(1), D->(2)]')
+>>> fs1.unify(fs2)
+[A=(1)[X='x', Y='y', Z='z'], B->(1), C->(1), D->(1)]
+>>> fs2.unify(fs1)
+[A=(1)[X='x', Y='y', Z='z'], B->(1), C->(1), D->(1)]
+
+

Cyclic structure created by unification.

-
-
->>> fs1 = FeatStruct('[F=(1)[], G->(1)]')
->>> fs2 = FeatStruct('[F=[H=(2)[]], G->(2)]')
->>> fs3 = fs1.unify(fs2)
->>> fs3
-[F=(1)[H->(1)], G->(1)]
->>> fs3['F'] is fs3['G']
-True
->>> fs3['F'] is fs3['G']['H']
-True
->>> fs3['F'] is fs3['G']['H']['H']
-True
->>> fs3['F'] is fs3['F']['H']['H']['H']['H']['H']['H']['H']['H']
-True
-
-
+
>>> fs1 = FeatStruct('[F=(1)[], G->(1)]')
+>>> fs2 = FeatStruct('[F=[H=(2)[]], G->(2)]')
+>>> fs3 = fs1.unify(fs2)
+>>> fs3
+[F=(1)[H->(1)], G->(1)]
+>>> fs3['F'] is fs3['G']
+True
+>>> fs3['F'] is fs3['G']['H']
+True
+>>> fs3['F'] is fs3['G']['H']['H']
+True
+>>> fs3['F'] is fs3['F']['H']['H']['H']['H']['H']['H']['H']['H']
+True
+
+

Cyclic structure created w/ variables.

-
-
->>> fs1 = FeatStruct('[F=[H=?x]]')
->>> fs2 = FeatStruct('[F=?x]')
->>> fs3 = fs1.unify(fs2, rename_vars=False)
->>> fs3
-[F=(1)[H->(1)]]
->>> fs3['F'] is fs3['F']['H']
-True
->>> fs3['F'] is fs3['F']['H']['H']
-True
->>> fs3['F'] is fs3['F']['H']['H']['H']['H']['H']['H']['H']['H']
-True
-
-
+
>>> fs1 = FeatStruct('[F=[H=?x]]')
+>>> fs2 = FeatStruct('[F=?x]')
+>>> fs3 = fs1.unify(fs2, rename_vars=False)
+>>> fs3
+[F=(1)[H->(1)]]
+>>> fs3['F'] is fs3['F']['H']
+True
+>>> fs3['F'] is fs3['F']['H']['H']
+True
+>>> fs3['F'] is fs3['F']['H']['H']['H']['H']['H']['H']['H']['H']
+True
+
+

Unifying w/ a cyclic feature structure.

-
-
->>> fs4 = FeatStruct('[F=[H=[H=[H=(1)[]]]], K->(1)]')
->>> fs3.unify(fs4)
-[F=(1)[H->(1)], K->(1)]
->>> fs4.unify(fs3)
-[F=(1)[H->(1)], K->(1)]
-
-
+
>>> fs4 = FeatStruct('[F=[H=[H=[H=(1)[]]]], K->(1)]')
+>>> fs3.unify(fs4)
+[F=(1)[H->(1)], K->(1)]
+>>> fs4.unify(fs3)
+[F=(1)[H->(1)], K->(1)]
+
+

Variable bindings should preserve reentrance.

-
-
->>> bindings = {}
->>> fs1 = FeatStruct("[a=?x]")
->>> fs2 = fs1.unify(FeatStruct("[a=[]]"), bindings)
->>> fs2['a'] is bindings[Variable('?x')]
-True
->>> fs2.unify(FeatStruct("[b=?x]"), bindings)
-[a=(1)[], b->(1)]
-
-
+
>>> bindings = {}
+>>> fs1 = FeatStruct("[a=?x]")
+>>> fs2 = fs1.unify(FeatStruct("[a=[]]"), bindings)
+>>> fs2['a'] is bindings[Variable('?x')]
+True
+>>> fs2.unify(FeatStruct("[b=?x]"), bindings)
+[a=(1)[], b->(1)]
+
+

Aliased variable tests

-
-
->>> fs1 = FeatStruct("[a=?x, b=?x]")
->>> fs2 = FeatStruct("[b=?y, c=?y]")
->>> bindings = {}
->>> fs3 = fs1.unify(fs2, bindings)
->>> fs3
-[a=?x, b=?x, c=?x]
->>> bindings
-{Variable('?y'): Variable('?x')}
->>> fs3.unify(FeatStruct("[a=1]"))
-[a=1, b=1, c=1]
-
-
+
>>> fs1 = FeatStruct("[a=?x, b=?x]")
+>>> fs2 = FeatStruct("[b=?y, c=?y]")
+>>> bindings = {}
+>>> fs3 = fs1.unify(fs2, bindings)
+>>> fs3
+[a=?x, b=?x, c=?x]
+>>> bindings
+{Variable('?y'): Variable('?x')}
+>>> fs3.unify(FeatStruct("[a=1]"))
+[a=1, b=1, c=1]
+
+

If we keep track of the bindings, then we can use the same variable over multiple calls to unify.

-
-
->>> bindings = {}
->>> fs1 = FeatStruct('[a=?x]')
->>> fs2 = fs1.unify(FeatStruct('[a=[]]'), bindings)
->>> fs2.unify(FeatStruct('[b=?x]'), bindings)
-[a=(1)[], b->(1)]
->>> bindings
-{Variable('?x'): []}
-
-
- -
-
-

Unification Bindings

-
-
->>> bindings = {}
->>> fs1 = FeatStruct('[a=?x]')
->>> fs2 = FeatStruct('[a=12]')
->>> fs3 = FeatStruct('[b=?x]')
->>> fs1.unify(fs2, bindings)
-[a=12]
->>> bindings
-{Variable('?x'): 12}
->>> fs3.substitute_bindings(bindings)
-[b=12]
->>> fs3 # substitute_bindings didn't mutate fs3.
-[b=?x]
->>> fs2.unify(fs3, bindings)
-[a=12, b=12]
-
-
->>> bindings = {}
->>> fs1 = FeatStruct('[a=?x, b=1]')
->>> fs2 = FeatStruct('[a=5, b=?x]')
->>> fs1.unify(fs2, bindings)
-[a=5, b=1]
->>> sorted(bindings.items())
-[(Variable('?x'), 5), (Variable('?x2'), 1)]
-
-
- -
-
-

Expressions

-
-
->>> e = Expression.fromstring('\\P y.P(z,y)')
->>> fs1 = FeatStruct(x=e, y=Variable('z'))
->>> fs2 = FeatStruct(y=VariableExpression(Variable('John')))
->>> fs1.unify(fs2)
-[x=<\P y.P(John,y)>, y=<John>]
-
-
-
-
-

Remove Variables

-
-
->>> FeatStruct('[a=?x, b=12, c=[d=?y]]').remove_variables()
-[b=12, c=[]]
->>> FeatStruct('(1)[a=[b=?x,c->(1)]]').remove_variables()
-(1)[a=[c->(1)]]
-
-
-
-
-

Equality & Hashing

+
>>> bindings = {}
+>>> fs1 = FeatStruct('[a=?x]')
+>>> fs2 = fs1.unify(FeatStruct('[a=[]]'), bindings)
+>>> fs2.unify(FeatStruct('[b=?x]'), bindings)
+[a=(1)[], b->(1)]
+>>> bindings
+{Variable('?x'): []}
+
+
+ +
+

Unification Bindings

+
>>> bindings = {}
+>>> fs1 = FeatStruct('[a=?x]')
+>>> fs2 = FeatStruct('[a=12]')
+>>> fs3 = FeatStruct('[b=?x]')
+>>> fs1.unify(fs2, bindings)
+[a=12]
+>>> bindings
+{Variable('?x'): 12}
+>>> fs3.substitute_bindings(bindings)
+[b=12]
+>>> fs3 # substitute_bindings didn't mutate fs3.
+[b=?x]
+>>> fs2.unify(fs3, bindings)
+[a=12, b=12]
+
+
+
>>> bindings = {}
+>>> fs1 = FeatStruct('[a=?x, b=1]')
+>>> fs2 = FeatStruct('[a=5, b=?x]')
+>>> fs1.unify(fs2, bindings)
+[a=5, b=1]
+>>> sorted(bindings.items())
+[(Variable('?x'), 5), (Variable('?x2'), 1)]
+
+
+
+
+

Expressions

+
>>> e = Expression.fromstring('\\P y.P(z,y)')
+>>> fs1 = FeatStruct(x=e, y=Variable('z'))
+>>> fs2 = FeatStruct(y=VariableExpression(Variable('John')))
+>>> fs1.unify(fs2)
+[x=<\P y.P(John,y)>, y=<John>]
+
+
+
+
+

Remove Variables

+
>>> FeatStruct('[a=?x, b=12, c=[d=?y]]').remove_variables()
+[b=12, c=[]]
+>>> FeatStruct('(1)[a=[b=?x,c->(1)]]').remove_variables()
+(1)[a=[c->(1)]]
+
+
+
+
+

Equality & Hashing

The equal_values method checks whether two feature structures assign the same value to every feature. If the optional argument -check_reentrances is supplied, then it also returns false if there +check_reentrances is supplied, then it also returns false if there is any difference in the reentrances.

-
-
->>> a = FeatStruct('(1)[x->(1)]')
->>> b = FeatStruct('(1)[x->(1)]')
->>> c = FeatStruct('(1)[x=[x->(1)]]')
->>> d = FeatStruct('[x=(1)[x->(1)]]')
->>> e = FeatStruct('(1)[x=[x->(1), y=1], y=1]')
->>> def compare(x,y):
-...     assert x.equal_values(y, True) == y.equal_values(x, True)
-...     assert x.equal_values(y, False) == y.equal_values(x, False)
-...     if x.equal_values(y, True):
-...         assert x.equal_values(y, False)
-...         print('equal values, same reentrance')
-...     elif x.equal_values(y, False):
-...         print('equal values, different reentrance')
-...     else:
-...         print('different values')
-
-
->>> compare(a, a)
-equal values, same reentrance
->>> compare(a, b)
-equal values, same reentrance
->>> compare(a, c)
-equal values, different reentrance
->>> compare(a, d)
-equal values, different reentrance
->>> compare(c, d)
-equal values, different reentrance
->>> compare(a, e)
-different values
->>> compare(c, e)
-different values
->>> compare(d, e)
-different values
->>> compare(e, e)
-equal values, same reentrance
-
-
+
>>> a = FeatStruct('(1)[x->(1)]')
+>>> b = FeatStruct('(1)[x->(1)]')
+>>> c = FeatStruct('(1)[x=[x->(1)]]')
+>>> d = FeatStruct('[x=(1)[x->(1)]]')
+>>> e = FeatStruct('(1)[x=[x->(1), y=1], y=1]')
+>>> def compare(x,y):
+...     assert x.equal_values(y, True) == y.equal_values(x, True)
+...     assert x.equal_values(y, False) == y.equal_values(x, False)
+...     if x.equal_values(y, True):
+...         assert x.equal_values(y, False)
+...         print('equal values, same reentrance')
+...     elif x.equal_values(y, False):
+...         print('equal values, different reentrance')
+...     else:
+...         print('different values')
+
+
+
>>> compare(a, a)
+equal values, same reentrance
+>>> compare(a, b)
+equal values, same reentrance
+>>> compare(a, c)
+equal values, different reentrance
+>>> compare(a, d)
+equal values, different reentrance
+>>> compare(c, d)
+equal values, different reentrance
+>>> compare(a, e)
+different values
+>>> compare(c, e)
+different values
+>>> compare(d, e)
+different values
+>>> compare(e, e)
+equal values, same reentrance
+
+

Feature structures may not be hashed until they are frozen:

-
-
->>> hash(a)
-Traceback (most recent call last):
-  . . .
-TypeError: FeatStructs must be frozen before they can be hashed.
->>> a.freeze()
->>> v = hash(a)
-
-
+
>>> hash(a)
+Traceback (most recent call last):
+  . . .
+TypeError: FeatStructs must be frozen before they can be hashed.
+>>> a.freeze()
+>>> v = hash(a)
+
+

Feature structures define hash consistently. The following example looks at the hash value for each (fs1,fs2) pair; if their hash values are not equal, then they must not be equal. If their hash values are equal, then display a message, and indicate whether their values are indeed equal. Note that c and d currently have the same hash value, even though they are not equal. That is not a bug, strictly speaking, -but it wouldn't be a bad thing if it changed.

-
-
->>> for fstruct in (a, b, c, d, e):
-...     fstruct.freeze()
->>> for fs1_name in 'abcde':
-...     for fs2_name in 'abcde':
-...         fs1 = locals()[fs1_name]
-...         fs2 = locals()[fs2_name]
-...         if hash(fs1) != hash(fs2):
-...             assert fs1 != fs2
-...         else:
-...             print('%s and %s have the same hash value,' %
-...                    (fs1_name, fs2_name))
-...             if fs1 == fs2: print('and are equal')
-...             else: print('and are not equal')
-a and a have the same hash value, and are equal
-a and b have the same hash value, and are equal
-b and a have the same hash value, and are equal
-b and b have the same hash value, and are equal
-c and c have the same hash value, and are equal
-c and d have the same hash value, and are not equal
-d and c have the same hash value, and are not equal
-d and d have the same hash value, and are equal
-e and e have the same hash value, and are equal
-
-
- -
-
-

Tracing

-
-
->>> fs1 = FeatStruct('[a=[b=(1)[], c=?x], d->(1), e=[f=?x]]')
->>> fs2 = FeatStruct('[a=(1)[c="C"], e=[g->(1)]]')
->>> fs1.unify(fs2, trace=True)
-<BLANKLINE>
-Unification trace:
-   / [a=[b=(1)[], c=?x], d->(1), e=[f=?x]]
-  |\ [a=(1)[c='C'], e=[g->(1)]]
-  |
-  | Unify feature: a
-  |    / [b=[], c=?x]
-  |   |\ [c='C']
-  |   |
-  |   | Unify feature: a.c
-  |   |    / ?x
-  |   |   |\ 'C'
-  |   |   |
-  |   |   +-->Variable('?x')
-  |   |
-  |   +-->[b=[], c=?x]
-  |       Bindings: {?x: 'C'}
-  |
-  | Unify feature: e
-  |    / [f=?x]
-  |   |\ [g=[c='C']]
-  |   |
-  |   +-->[f=?x, g=[b=[], c=?x]]
-  |       Bindings: {?x: 'C'}
-  |
-  +-->[a=(1)[b=(2)[], c='C'], d->(2), e=[f='C', g->(1)]]
-      Bindings: {?x: 'C'}
-[a=(1)[b=(2)[], c='C'], d->(2), e=[f='C', g->(1)]]
->>>
->>> fs1 = FeatStruct('[a=?x, b=?z, c=?z]')
->>> fs2 = FeatStruct('[a=?y, b=?y, c=?q]')
->>> #fs1.unify(fs2, trace=True)
->>>
-
-
- -
-
-

Unification on Dicts & Lists

-

It's possible to do unification on dictionaries:

-
-
->>> from nltk.featstruct import unify
->>> pprint(unify(dict(x=1, y=dict(z=2)), dict(x=1, q=5)), width=1)
-{'q': 5, 'x': 1, 'y': {'z': 2}}
-
-
-

It's possible to do unification on lists as well:

-
-
->>> unify([1, 2, 3], [1, Variable('x'), 3])
-[1, 2, 3]
-
-
+but it wouldn’t be a bad thing if it changed.

+
>>> for fstruct in (a, b, c, d, e):
+...     fstruct.freeze()
+>>> for fs1_name in 'abcde':
+...     for fs2_name in 'abcde':
+...         fs1 = locals()[fs1_name]
+...         fs2 = locals()[fs2_name]
+...         if hash(fs1) != hash(fs2):
+...             assert fs1 != fs2
+...         else:
+...             print('%s and %s have the same hash value,' %
+...                    (fs1_name, fs2_name))
+...             if fs1 == fs2: print('and are equal')
+...             else: print('and are not equal')
+a and a have the same hash value, and are equal
+a and b have the same hash value, and are equal
+b and a have the same hash value, and are equal
+b and b have the same hash value, and are equal
+c and c have the same hash value, and are equal
+c and d have the same hash value, and are not equal
+d and c have the same hash value, and are not equal
+d and d have the same hash value, and are equal
+e and e have the same hash value, and are equal
+
+
+ +
+

Tracing

+
>>> fs1 = FeatStruct('[a=[b=(1)[], c=?x], d->(1), e=[f=?x]]')
+>>> fs2 = FeatStruct('[a=(1)[c="C"], e=[g->(1)]]')
+>>> fs1.unify(fs2, trace=True)
+
+Unification trace:
+   / [a=[b=(1)[], c=?x], d->(1), e=[f=?x]]
+  |\ [a=(1)[c='C'], e=[g->(1)]]
+  |
+  | Unify feature: a
+  |    / [b=[], c=?x]
+  |   |\ [c='C']
+  |   |
+  |   | Unify feature: a.c
+  |   |    / ?x
+  |   |   |\ 'C'
+  |   |   |
+  |   |   +-->Variable('?x')
+  |   |
+  |   +-->[b=[], c=?x]
+  |       Bindings: {?x: 'C'}
+  |
+  | Unify feature: e
+  |    / [f=?x]
+  |   |\ [g=[c='C']]
+  |   |
+  |   +-->[f=?x, g=[b=[], c=?x]]
+  |       Bindings: {?x: 'C'}
+  |
+  +-->[a=(1)[b=(2)[], c='C'], d->(2), e=[f='C', g->(1)]]
+      Bindings: {?x: 'C'}
+[a=(1)[b=(2)[], c='C'], d->(2), e=[f='C', g->(1)]]
+>>>
+>>> fs1 = FeatStruct('[a=?x, b=?z, c=?z]')
+>>> fs2 = FeatStruct('[a=?y, b=?y, c=?q]')
+>>> #fs1.unify(fs2, trace=True)
+>>>
+
+
+
+
+

Unification on Dicts & Lists

+

It’s possible to do unification on dictionaries:

+
>>> from nltk.featstruct import unify
+>>> pprint(unify(dict(x=1, y=dict(z=2)), dict(x=1, q=5)), width=1)
+{'q': 5, 'x': 1, 'y': {'z': 2}}
+
+
+

It’s possible to do unification on lists as well:

+
>>> unify([1, 2, 3], [1, Variable('x'), 3])
+[1, 2, 3]
+
+

Mixing dicts and lists is fine:

-
-
->>> pprint(unify([dict(x=1, y=dict(z=2)),3], [dict(x=1, q=5),3]),
-...               width=1)
-[{'q': 5, 'x': 1, 'y': {'z': 2}}, 3]
-
-
+
>>> pprint(unify([dict(x=1, y=dict(z=2)),3], [dict(x=1, q=5),3]),
+...               width=1)
+[{'q': 5, 'x': 1, 'y': {'z': 2}}, 3]
+
+

Mixing dicts and FeatStructs is discouraged:

-
-
->>> unify(dict(x=1), FeatStruct(x=1))
-Traceback (most recent call last):
-  . . .
-ValueError: Mixing FeatStruct objects with Python dicts and lists is not supported.
-
-
+
>>> unify(dict(x=1), FeatStruct(x=1))
+Traceback (most recent call last):
+  . . .
+ValueError: Mixing FeatStruct objects with Python dicts and lists is not supported.
+
+

But you can do it if you really want, by explicitly stating that both dictionaries and FeatStructs should be treated as feature structures:

-
-
->>> unify(dict(x=1), FeatStruct(x=1), fs_class=(dict, FeatStruct))
-{'x': 1}
-
-
-
-
-

Finding Conflicts

-
-
->>> from nltk.featstruct import conflicts
->>> fs1 = FeatStruct('[a=[b=(1)[c=2], d->(1), e=[f->(1)]]]')
->>> fs2 = FeatStruct('[a=[b=[c=[x=5]], d=[c=2], e=[f=[c=3]]]]')
->>> for path in conflicts(fs1, fs2):
-...     print('%-8s: %r vs %r' % ('.'.join(path), fs1[path], fs2[path]))
-a.b.c   : 2 vs [x=5]
-a.e.f.c : 2 vs 3
-
-
- -
-
-

Retracting Bindings

-
-
->>> from nltk.featstruct import retract_bindings
->>> bindings = {}
->>> fs1 = FeatStruct('[a=?x, b=[c=?y]]')
->>> fs2 = FeatStruct('[a=(1)[c=[d=1]], b->(1)]')
->>> fs3 = fs1.unify(fs2, bindings)
->>> print(fs3)
-[ a = (1) [ c = [ d = 1 ] ] ]
-[                           ]
-[ b -> (1)                  ]
->>> pprint(bindings)
-{Variable('?x'): [c=[d=1]], Variable('?y'): [d=1]}
->>> retract_bindings(fs3, bindings)
-[a=?x, b=?x]
->>> pprint(bindings)
-{Variable('?x'): [c=?y], Variable('?y'): [d=1]}
-
-
-
-
-
-

Squashed Bugs

+
>>> unify(dict(x=1), FeatStruct(x=1), fs_class=(dict, FeatStruct))
+{'x': 1}
+
+
+ +
+

Finding Conflicts

+
>>> from nltk.featstruct import conflicts
+>>> fs1 = FeatStruct('[a=[b=(1)[c=2], d->(1), e=[f->(1)]]]')
+>>> fs2 = FeatStruct('[a=[b=[c=[x=5]], d=[c=2], e=[f=[c=3]]]]')
+>>> for path in conflicts(fs1, fs2):
+...     print('%-8s: %r vs %r' % ('.'.join(path), fs1[path], fs2[path]))
+a.b.c   : 2 vs [x=5]
+a.e.f.c : 2 vs 3
+
+
+
+
+

Retracting Bindings

+
>>> from nltk.featstruct import retract_bindings
+>>> bindings = {}
+>>> fs1 = FeatStruct('[a=?x, b=[c=?y]]')
+>>> fs2 = FeatStruct('[a=(1)[c=[d=1]], b->(1)]')
+>>> fs3 = fs1.unify(fs2, bindings)
+>>> print(fs3)
+[ a = (1) [ c = [ d = 1 ] ] ]
+[                           ]
+[ b -> (1)                  ]
+>>> pprint(bindings)
+{Variable('?x'): [c=[d=1]], Variable('?y'): [d=1]}
+>>> retract_bindings(fs3, bindings)
+[a=?x, b=?x]
+>>> pprint(bindings)
+{Variable('?x'): [c=?y], Variable('?y'): [d=1]}
+
+
+
+ +
+

Squashed Bugs

In svn rev 5167, unifying two feature structures that used the same variable would cause those variables to become aliased in the output.

-
-
->>> fs1 = FeatStruct('[a=?x]')
->>> fs2 = FeatStruct('[b=?x]')
->>> fs1.unify(fs2)
-[a=?x, b=?x2]
-
-
+
>>> fs1 = FeatStruct('[a=?x]')
+>>> fs2 = FeatStruct('[b=?x]')
+>>> fs1.unify(fs2)
+[a=?x, b=?x2]
+
+

There was a bug in svn revision 5172 that caused rename_variables to rename variables to names that are already used.

-
-
->>> FeatStruct('[a=?x, b=?x2]').rename_variables(
-...     vars=[Variable('?x')])
-[a=?x3, b=?x2]
->>> fs1 = FeatStruct('[a=?x]')
->>> fs2 = FeatStruct('[a=?x, b=?x2]')
->>> fs1.unify(fs2)
-[a=?x, b=?x2]
-
-
+
>>> FeatStruct('[a=?x, b=?x2]').rename_variables(
+...     vars=[Variable('?x')])
+[a=?x3, b=?x2]
+>>> fs1 = FeatStruct('[a=?x]')
+>>> fs2 = FeatStruct('[a=?x, b=?x2]')
+>>> fs1.unify(fs2)
+[a=?x, b=?x2]
+
+

There was a bug in svn rev 5167 that caused us to get the following example wrong. Basically the problem was that we only followed -'forward' pointers for other, not self, when unifying two feature +‘forward’ pointers for other, not self, when unifying two feature structures. (nb: this test assumes that features are unified in -alphabetical order -- if they are not, it might pass even if the bug +alphabetical order – if they are not, it might pass even if the bug is present.)

-
-
->>> fs1 = FeatStruct('[a=[x=1], b=?x, c=?x]')
->>> fs2 = FeatStruct('[a=(1)[], b->(1), c=[x=2]]')
->>> print(fs1.unify(fs2))
-None
-
-
- +
>>> fs1 = FeatStruct('[a=[x=1], b=?x, c=?x]')
+>>> fs2 = FeatStruct('[a=(1)[], b->(1), c=[x=2]]')
+>>> print(fs1.unify(fs2))
+None
+
+
+ + + + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/framenet.html b/howto/framenet.html index 7fdf0731b..07c61460c 100644 --- a/howto/framenet.html +++ b/howto/framenet.html @@ -1,348 +1,105 @@ - - - + - - -FrameNet - - - -
-

FrameNet

- - - +
+
+ +
+

Sample usage for framenet

+
+

FrameNet

The FrameNet corpus is a lexical database of English that is both human- and machine-readable, based on annotating examples of how words are used in actual texts. FrameNet is based on a theory of meaning called Frame @@ -360,116 +117,124 @@

FrameNet

the Apply_heat frame. The job of FrameNet is to define the frames and to annotate sentences to show how the FEs fit syntactically around the word that evokes the frame.

-
-

Frames

+
+

Frames

A Frame is a script-like conceptual structure that describes a particular type of situation, object, or event along with the participants and props that are needed for that Frame. For -example, the "Apply_heat" frame describes a common situation +example, the “Apply_heat” frame describes a common situation involving a Cook, some Food, and a Heating_Instrument, and is evoked by words such as bake, blanch, boil, broil, brown, simmer, steam, etc.

-

We call the roles of a Frame "frame elements" (FEs) and the -frame-evoking words are called "lexical units" (LUs).

+

We call the roles of a Frame “frame elements” (FEs) and the +frame-evoking words are called “lexical units” (LUs).

FrameNet includes relations between Frames. Several types of relations are defined, of which the most important are:

    -
  • Inheritance: An IS-A relation. The child frame is a subtype +
  • Inheritance: An IS-A relation. The child frame is a subtype of the parent frame, and each FE in the parent is bound to a corresponding FE in the child. An example is the -"Revenge" frame which inherits from the -"Rewards_and_punishments" frame.

  • -
  • Using: The child frame presupposes the parent frame as -background, e.g the "Speed" frame "uses" (or presupposes) -the "Motion" frame; however, not all parent FEs need to be -bound to child FEs.
  • -
  • Subframe: The child frame is a subevent of a complex event -represented by the parent, e.g. the "Criminal_process" frame -has subframes of "Arrest", "Arraignment", "Trial", and -"Sentencing".
  • -
  • Perspective_on: The child frame provides a particular +“Revenge” frame which inherits from the +“Rewards_and_punishments” frame.

  • +
  • Using: The child frame presupposes the parent frame as +background, e.g the “Speed” frame “uses” (or presupposes) +the “Motion” frame; however, not all parent FEs need to be +bound to child FEs.

  • +
  • Subframe: The child frame is a subevent of a complex event +represented by the parent, e.g. the “Criminal_process” frame +has subframes of “Arrest”, “Arraignment”, “Trial”, and +“Sentencing”.

  • +
  • Perspective_on: The child frame provides a particular perspective on an un-perspectivized parent frame. A pair of -examples consists of the "Hiring" and "Get_a_job" frames, -which perspectivize the "Employment_start" frame from the -Employer's and the Employee's point of view, respectively.

  • +examples consists of the “Hiring” and “Get_a_job” frames, +which perspectivize the “Employment_start” frame from the +Employer’s and the Employee’s point of view, respectively.

To get a list of all of the Frames in FrameNet, you can use the frames() function. If you supply a regular expression pattern to the frames() function, you will get a list of all Frames whose names match that pattern:

-
-
->>> from pprint import pprint
->>> from nltk.corpus import framenet as fn
->>> len(fn.frames())
-1019
->>> pprint(fn.frames(r'(?i)medical'))
-[<frame ID=256 name=Medical_specialties>, <frame ID=257 name=Medical_instruments>, ...]
-
-
+
>>> from pprint import pprint
+>>> from operator import itemgetter
+>>> from nltk.corpus import framenet as fn
+>>> from nltk.corpus.reader.framenet import PrettyList
+>>> x = fn.frames(r'(?i)crim')
+>>> x.sort(key=itemgetter('ID'))
+>>> x
+[<frame ID=200 name=Criminal_process>, <frame ID=500 name=Criminal_investigation>, ...]
+>>> PrettyList(sorted(x, key=itemgetter('ID')))
+[<frame ID=200 name=Criminal_process>, <frame ID=500 name=Criminal_investigation>, ...]
+
+

To get the details of a particular Frame, you can use the frame() function passing in the frame number:

-
-
->>> from pprint import pprint
->>> from nltk.corpus import framenet as fn
->>> f = fn.frame(256)
->>> f.ID
-256
->>> f.name
-'Medical_specialties'
->>> f.definition # doctest: +ELLIPSIS
-"This frame includes words that name ..."
->>> len(f.lexUnit)
-29
->>> pprint(sorted([x for x in f.FE]))
-['Affliction', 'Body_system', 'Specialty', 'Type']
->>> pprint(f.frameRelations)
-[<Parent=Cure -- Using -> Child=Medical_specialties>]
-
-
+
>>> from pprint import pprint
+>>> from nltk.corpus import framenet as fn
+>>> f = fn.frame(202)
+>>> f.ID
+202
+>>> f.name
+'Arrest'
+>>> f.definition
+"Authorities charge a Suspect, who is under suspicion of having committed a crime..."
+>>> len(f.lexUnit)
+11
+>>> pprint(sorted([x for x in f.FE]))
+['Authorities',
+ 'Charges',
+ 'Co-participant',
+ 'Manner',
+ 'Means',
+ 'Offense',
+ 'Place',
+ 'Purpose',
+ 'Source_of_legal_authority',
+ 'Suspect',
+ 'Time',
+ 'Type']
+>>> pprint(f.frameRelations)
+[<Parent=Intentionally_affect -- Inheritance -> Child=Arrest>, <Complex=Criminal_process -- Subframe -> Component=Arrest>, ...]
+
+

The frame() function shown above returns a dict object containing detailed information about the Frame. See the documentation on the frame() function for the specifics.

You can also search for Frames by their Lexical Units (LUs). The frames_by_lemma() function returns a list of all frames that contain -LUs in which the 'name' attribute of the LU matchs the given regular -expression. Note that LU names are composed of "lemma.POS", where the -"lemma" part can be made up of either a single lexeme (e.g. 'run') or -multiple lexemes (e.g. 'a little') (see below).

-
-
->>> from nltk.corpus import framenet as fn
->>> fn.frames_by_lemma(r'(?i)a little')
-[<frame ID=189 name=Quantity>, <frame ID=2001 name=Degree>]
-
-
+LUs in which the ‘name’ attribute of the LU matches the given regular +expression. Note that LU names are composed of “lemma.POS”, where the +“lemma” part can be made up of either a single lexeme (e.g. ‘run’) or +multiple lexemes (e.g. ‘a little’) (see below).

+
>>> PrettyList(sorted(fn.frames_by_lemma(r'(?i)a little'), key=itemgetter('ID')))
+[<frame ID=189 name=Quanti...>, <frame ID=2001 name=Degree>]
+
-
-

Lexical Units

+
+
+

Lexical Units

A lexical unit (LU) is a pairing of a word with a meaning. For -example, the "Apply_heat" Frame describes a common situation +example, the “Apply_heat” Frame describes a common situation involving a Cook, some Food, and a Heating Instrument, and is _evoked_ by words such as bake, blanch, boil, broil, brown, simmer, steam, etc. These frame-evoking words are the LUs in the Apply_heat frame. Each sense of a polysemous word is a different LU.

-

We have used the word "word" in talking about LUs. The reality -is actually rather complex. When we say that the word "bake" is -polysemous, we mean that the lemma "bake.v" (which has the -word-forms "bake", "bakes", "baked", and "baking") is linked to +

We have used the word “word” in talking about LUs. The reality +is actually rather complex. When we say that the word “bake” is +polysemous, we mean that the lemma “bake.v” (which has the +word-forms “bake”, “bakes”, “baked”, and “baking”) is linked to three different frames:

    -
  • Apply_heat: "Michelle baked the potatoes for 45 minutes."
  • -
  • Cooking_creation: "Michelle baked her mother a cake for her birthday."
  • -
  • Absorb_heat: "The potatoes have to bake for more than 30 minutes."
  • +
  • Apply_heat: “Michelle baked the potatoes for 45 minutes.”

  • +
  • Cooking_creation: “Michelle baked her mother a cake for her birthday.”

  • +
  • Absorb_heat: “The potatoes have to bake for more than 30 minutes.”

These constitute three different LUs, with different definitions.

-

Multiword expressions such as "given name" and hyphenated words -like "shut-eye" can also be LUs. Idiomatic phrases such as -"middle of nowhere" and "give the slip (to)" are also defined as -LUs in the appropriate frames ("Isolated_places" and "Evading", +

Multiword expressions such as “given name” and hyphenated words +like “shut-eye” can also be LUs. Idiomatic phrases such as +“middle of nowhere” and “give the slip (to)” are also defined as +LUs in the appropriate frames (“Isolated_places” and “Evading”, respectively), and their internal structure is not analyzed.

Framenet provides multiple annotated examples of each sense of a word (i.e. each LU). Moreover, the set of examples @@ -479,53 +244,49 @@

Lexical Units

evoke that Frame. This makes the FrameNet database similar to a thesaurus, grouping together semantically similar words.

In the simplest case, frame-evoking words are verbs such as -"fried" in:

+“fried” in:

-"Matilde fried the catfish in a heavy iron skillet."
+

“Matilde fried the catfish in a heavy iron skillet.”

+

Sometimes event nouns may evoke a Frame. For example, -"reduction" evokes "Cause_change_of_scalar_position" in:

+“reduction” evokes “Cause_change_of_scalar_position” in:

-"...the reduction of debt levels to $665 million from $2.6 billion."
-

Adjectives may also evoke a Frame. For example, "asleep" may -evoke the "Sleep" frame as in:

+

“…the reduction of debt levels to $665 million from $2.6 billion.”

+
+

Adjectives may also evoke a Frame. For example, “asleep” may +evoke the “Sleep” frame as in:

-"They were asleep for hours."
-

Many common nouns, such as artifacts like "hat" or "tower", +

“They were asleep for hours.”

+
+

Many common nouns, such as artifacts like “hat” or “tower”, typically serve as dependents rather than clearly evoking their own frames.

-

Details for a specific lexical unit can be obtained using this class's +

Details for a specific lexical unit can be obtained using this class’s lus() function, which takes an optional regular expression pattern that will be matched against the name of the lexical unit:

-
-
->>> from pprint import pprint
->>> from nltk.corpus import framenet as fn
->>> len(fn.lus())
-11829
->>> pprint(fn.lus(r'(?i)a little'))
-[<lu ID=14744 name=a little bit.adv>, <lu ID=14733 name=a little.n>, ...]
-
-
+
>>> from pprint import pprint
+>>> PrettyList(sorted(fn.lus(r'(?i)a little'), key=itemgetter('ID')))
+[<lu ID=14733 name=a little.n>, <lu ID=14743 name=a little.adv>, ...]
+
+

You can obtain detailed information on a particular LU by calling the -lu() function and passing in an LU's 'ID' number:

-
-
->>> from pprint import pprint
->>> from nltk.corpus import framenet as fn
->>> fn.lu(256).name
-'foresee.v'
->>> fn.lu(256).definition
-'COD: be aware of beforehand; predict.'
->>> fn.lu(256).frame.name
-'Expectation'
->>> fn.lu(256).lexemes[0].name
-'foresee'
-
-
-

Note that LU names take the form of a dotted string (e.g. "run.v" or "a -little.adv") in which a lemma preceeds the "." and a part of speech +lu() function and passing in an LU’s ‘ID’ number:

+
>>> from pprint import pprint
+>>> from nltk.corpus import framenet as fn
+>>> fn.lu(256).name
+'foresee.v'
+>>> fn.lu(256).definition
+'COD: be aware of beforehand; predict.'
+>>> fn.lu(256).frame.name
+'Expectation'
+>>> fn.lu(256).lexemes[0].name
+'foresee'
+
+
+

Note that LU names take the form of a dotted string (e.g. “run.v” or “a +little.adv”) in which a lemma precedes the “.” and a part of speech (POS) follows the dot. The lemma may be composed of a single lexeme -(e.g. "run") or of multiple lexemes (e.g. "a little"). The list of +(e.g. “run”) or of multiple lexemes (e.g. “a little”). The list of POSs used in the LUs is:

v - verb n - noun @@ -540,28 +301,111 @@

Lexical Units

For more detailed information about the info that is contained in the dict that is returned by the lu() function, see the documentation on the lu() function.

-
-
-

Annotated Documents

+
+
+

Annotated Documents

The FrameNet corpus contains a small set of annotated documents. A list -of these documents can be obtained by calling the documents() function:

-
-
->>> from pprint import pprint
->>> from nltk.corpus import framenet as fn
->>> docs = fn.documents()
->>> len(docs)
-78
->>> pprint(sorted(docs[0].keys()))
-['ID', 'corpid', 'corpname', 'description', 'filename']
-
-
-

Detailed information about each sentence contained in each document can -be obtained by calling the annotated_document() function and supplying -the 'ID' number of the document. For detailed information about the info -that is for each document, see the documentation on the -annotated_document() function.

+of these documents can be obtained by calling the docs() function:

+
>>> from pprint import pprint
+>>> from nltk.corpus import framenet as fn
+>>> d = fn.docs('BellRinging')[0]
+>>> d.corpname
+'PropBank'
+>>> d.sentence[49]
+full-text sentence (...) in BellRinging:
+
+
+[POS] 17 tags
+
+[POS_tagset] PENN
+
+[text] + [annotationSet]
+
+`` I live in hopes that the ringers themselves will be drawn into
+             *****          *******                    *****
+             Desir          Cause_t                    Cause
+             [1]            [3]                        [2]
+
+ that fuller life .
+      ******
+      Comple
+      [4]
+ (Desir=Desiring, Cause_t=Cause_to_make_noise, Cause=Cause_motion, Comple=Completeness)
+
+
>>> d.sentence[49].annotationSet[1]
+annotation set (...):
+
+[status] MANUAL
+
+[LU] (6605) hope.n in Desiring
+
+[frame] (366) Desiring
+
+[GF] 2 relations
+
+[PT] 2 phrases
+
+[text] + [Target] + [FE] + [Noun]
+
+`` I live in hopes that the ringers themselves will be drawn into
+   - ^^^^ ^^ ***** ----------------------------------------------
+   E supp su       Event
+
+ that fuller life .
+-----------------
+
+ (E=Experiencer, su=supp)
+
+
+
+
+ + + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/generate.html b/howto/generate.html index eca6a5bda..f4f164990 100644 --- a/howto/generate.html +++ b/howto/generate.html @@ -1,416 +1,211 @@ - - - + - - -Generating sentences from context-free grammars - + + + + + + + NLTK :: Sample usage for generate + + + + + + + + + + + + + + -
-

Generating sentences from context-free grammars

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - + + + +
+ +
+
+ +
+

Sample usage for generate

+
+

Generating sentences from context-free grammars

An example grammar:

-
-
->>> from nltk.parse.generate import generate, demo_grammar
->>> from nltk import CFG
->>> grammar = CFG.fromstring(demo_grammar)
->>> print(grammar)
-Grammar with 13 productions (start state = S)
-    S -> NP VP
-    NP -> Det N
-    PP -> P NP
-    VP -> 'slept'
-    VP -> 'saw' NP
-    VP -> 'walked' PP
-    Det -> 'the'
-    Det -> 'a'
-    N -> 'man'
-    N -> 'park'
-    N -> 'dog'
-    P -> 'in'
-    P -> 'with'
-
-
+
>>> from nltk.parse.generate import generate, demo_grammar
+>>> from nltk import CFG
+>>> grammar = CFG.fromstring(demo_grammar)
+>>> print(grammar)
+Grammar with 13 productions (start state = S)
+    S -> NP VP
+    NP -> Det N
+    PP -> P NP
+    VP -> 'slept'
+    VP -> 'saw' NP
+    VP -> 'walked' PP
+    Det -> 'the'
+    Det -> 'a'
+    N -> 'man'
+    N -> 'park'
+    N -> 'dog'
+    P -> 'in'
+    P -> 'with'
+
+

The first 10 generated sentences:

-
-
->>> for sentence in generate(grammar, n=10):
-...     print(' '.join(sentence))
-the man slept
-the man saw the man
-the man saw the park
-the man saw the dog
-the man saw a man
-the man saw a park
-the man saw a dog
-the man walked in the man
-the man walked in the park
-the man walked in the dog
-
-
+
>>> for sentence in generate(grammar, n=10):
+...     print(' '.join(sentence))
+the man slept
+the man saw the man
+the man saw the park
+the man saw the dog
+the man saw a man
+the man saw a park
+the man saw a dog
+the man walked in the man
+the man walked in the park
+the man walked in the dog
+
+

All sentences of max depth 4:

-
-
->>> for sentence in generate(grammar, depth=4):
-...     print(' '.join(sentence))
-the man slept
-the park slept
-the dog slept
-a man slept
-a park slept
-a dog slept
-
-
+
>>> for sentence in generate(grammar, depth=4):
+...     print(' '.join(sentence))
+the man slept
+the park slept
+the dog slept
+a man slept
+a park slept
+a dog slept
+
+

The number of sentences of different max depths:

-
-
->>> len(list(generate(grammar, depth=3)))
-0
->>> len(list(generate(grammar, depth=4)))
-6
->>> len(list(generate(grammar, depth=5)))
-42
->>> len(list(generate(grammar, depth=6)))
-114
->>> len(list(generate(grammar)))
-114
-
-
+
>>> len(list(generate(grammar, depth=3)))
+0
+>>> len(list(generate(grammar, depth=4)))
+6
+>>> len(list(generate(grammar, depth=5)))
+42
+>>> len(list(generate(grammar, depth=6)))
+114
+>>> len(list(generate(grammar)))
+114
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/gensim.html b/howto/gensim.html index 24c991a86..a341ea4d6 100644 --- a/howto/gensim.html +++ b/howto/gensim.html @@ -1,448 +1,189 @@ - - - + - - -Demonstrate word embedding using Gensim - + + + + + + + NLTK :: Sample usage for gensim + + + + + + + + + + + + + + -
-

Demonstrate word embedding using Gensim

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - + + + +
+ +
+
+ +
+

Sample usage for gensim

+
+

Demonstrate word embedding using Gensim

+
>>> from nltk.test.gensim_fixt import setup_module
+>>> setup_module()
+
+

We demonstrate three functions: - Train the word embeddings using brown corpus; - Load the pre-trained model and perform simple tasks; and - Pruning the pre-trained binary model.

-
-
->>> import gensim
-
-
-
-

Train the model

+
>>> import gensim
+
+
+
+

Train the model

Here we train a word embedding using the Brown Corpus:

-
-
->>> from nltk.corpus import brown
->>> model = gensim.models.Word2Vec(brown.sents())
-
-
+
>>> from nltk.corpus import brown
+>>> train_set = brown.sents()[:10000]
+>>> model = gensim.models.Word2Vec(train_set)
+
+

It might take some time to train the model. So, after it is trained, it can be saved as follows:

-
-
->>> model.save('brown.embedding')
->>> new_model = gensim.models.Word2Vec.load('brown.embedding')
-
-
-
-
The model will be the list of words with their embedding. We can easily get the vector representation of a word.
-
->>> len(new_model['university'])
-100
-
+
>>> model.save('brown.embedding')
+>>> new_model = gensim.models.Word2Vec.load('brown.embedding')
+
+
+
+
The model will be the list of words with their embedding. We can easily get the vector representation of a word.
>>> len(new_model['university'])
+100
+
+

There are some supporting functions already implemented in Gensim to manipulate with word embeddings. For example, to compute the cosine similarity between 2 words:

-
-
->>> new_model.similarity('university','school') > 0.3
-True
-
-
+
>>> new_model.wv.similarity('university','school') > 0.3
+True
+
-
-

Using the pre-trained model

+
+
+

Using the pre-trained model

NLTK includes a pre-trained model which is part of a model that is trained on 100 billion words from the Google News Dataset. The full model is from https://code.google.com/p/word2vec/ (about 3 GB).

-
-
->>> from nltk.data import find
->>> word2vec_sample = str(find('models/word2vec_sample/pruned.word2vec.txt'))
->>> model = gensim.models.Word2Vec.load_word2vec_format(word2vec_sample, binary=False)
-
-
+
>>> from nltk.data import find
+>>> word2vec_sample = str(find('models/word2vec_sample/pruned.word2vec.txt'))
+>>> model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_sample, binary=False)
+
+

We pruned the model to only include the most common words (~44k words).

-
-
->>> len(model.vocab)
-43981
-
-
+
>>> len(model.vocab)
+43981
+
+

Each word is represented in the space of 300 dimensions:

-
-
->>> len(model['university'])
-300
-
-
+
>>> len(model['university'])
+300
+
+

Finding the top n words that are similar to a target word is simple. The result is the list of n words with the score.

-
-
->>> model.most_similar(positive=['university'], topn = 3)
-[(u'universities', 0.7003918886184692), (u'faculty', 0.6780908703804016), (u'undergraduate', 0.6587098240852356)]
-
-
+
>>> model.most_similar(positive=['university'], topn = 3)
+[('universities', 0.70039...), ('faculty', 0.67809...), ('undergraduate', 0.65870...)]
+
+

Finding a word that is not in a list is also supported, although, implementing this by yourself is simple.

-
-
->>> model.doesnt_match('breakfast cereal dinner lunch'.split())
-'cereal'
-
-
+
>>> model.doesnt_match('breakfast cereal dinner lunch'.split())
+'cereal'
+
+

Mikolov et al. (2013) figured out that word embedding captures much of syntactic and semantic regularities. For example, -the vector 'King - Man + Woman' is close to 'Queen' and 'Germany - Berlin + Paris' is close to 'France'.

-
-
->>> model.most_similar(positive=['woman','king'], negative=['man'], topn = 1)
-[(u'queen', 0.7118192911148071)]
-
-
->>> model.most_similar(positive=['Paris','Germany'], negative=['Berlin'], topn = 1)
-[(u'France', 0.7884092926979065)]
-
-
-

We can visualize the word embeddings using t-SNE (http://lvdmaaten.github.io/tsne/). For this demonstration, we visualize the first 1000 words.

+the vector ‘King - Man + Woman’ is close to ‘Queen’ and ‘Germany - Berlin + Paris’ is close to ‘France’.

+
>>> model.most_similar(positive=['woman','king'], negative=['man'], topn = 1)
+[('queen', 0.71181...)]
+
+
+
>>> model.most_similar(positive=['Paris','Germany'], negative=['Berlin'], topn = 1)
+[('France', 0.78840...)]
+
+
+

We can visualize the word embeddings using t-SNE (https://lvdmaaten.github.io/tsne/). For this demonstration, we visualize the first 1000 words.

import numpy as np
labels = []
count = 0
max_count = 1000
-
X = np.zeros(shape=(max_count,len(model['university'])))
+
X = np.zeros(shape=(max_count,len(model[‘university’])))

for term in model.vocab:
@@ -469,21 +210,21 @@

Using the pre-trained model

# Add labels
for label, x, y in zip(labels, Y[:, 0], Y[:, 1]):
-
plt.annotate(label, xy = (x,y), xytext = (0, 0), textcoords = 'offset points', size = 10)
+
plt.annotate(label, xy = (x,y), xytext = (0, 0), textcoords = ‘offset points’, size = 10)

plt.show()
-
-
-

Prune the trained binary model

+
+
+

Prune the trained binary model

Here is the supporting code to extract part of the binary model (GoogleNews-vectors-negative300.bin.gz) from https://code.google.com/p/word2vec/ We use this code to get the word2vec_sample model.

import gensim
from gensim.models.word2vec import Word2Vec
# Load the binary model
-
model = Word2Vec.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary = True);
+
model = Word2Vec.load_word2vec_format(‘GoogleNews-vectors-negative300.bin.gz’, binary = True);

# Only output word that appear in the Brown corpus
from nltk.corpus import brown
@@ -491,20 +232,66 @@

Prune the trained binary model

print (len(words))

# Output presented word to a temporary file
-
out_file = 'pruned.word2vec.txt'
-
f = open(out_file,'wb')
+
out_file = ‘pruned.word2vec.txt’
+
f = open(out_file,’wb’)

word_presented = words.intersection(model.vocab.keys())
-
f.write('{} {}n'.format(len(word_presented),len(model['word'])))
+
f.write(‘{} {}n’.format(len(word_presented),len(model[‘word’])))

for word in word_presented:
-
f.write('{} {}n'.format(word, ' '.join(str(value) for value in model[word])))
+
f.write(‘{} {}n’.format(word, ‘ ‘.join(str(value) for value in model[word])))

f.close()
+
+
+
+ + +
+
+ +
+ +
+ +
+
-
+ - + \ No newline at end of file diff --git a/howto/gluesemantics.html b/howto/gluesemantics.html index 3dd8cbde2..99a852a92 100644 --- a/howto/gluesemantics.html +++ b/howto/gluesemantics.html @@ -1,815 +1,538 @@ - - - + - - - - + + + + + + + NLTK :: Sample usage for gluesemantics + + + + + + + + + + + + + - -
+ +
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-

Glue Semantics

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-

Linear logic

-
-
->>> from nltk.sem import logic
->>> from nltk.sem.glue import *
->>> from nltk.sem.linearlogic import *
-
-
->>> from nltk.sem.linearlogic import Expression
->>> read_expr = Expression.fromstring
-
-
+ + + +
+ +
+
+ +
+

Sample usage for gluesemantics

+
+

Glue Semantics

+
+
+

Linear logic

+
>>> from nltk.sem import logic
+>>> from nltk.sem.glue import *
+>>> from nltk.sem.linearlogic import *
+
+
+
>>> from nltk.sem.linearlogic import Expression
+>>> read_expr = Expression.fromstring
+
+

Parser

-
-
->>> print(read_expr(r'f'))
-f
->>> print(read_expr(r'(g -o f)'))
-(g -o f)
->>> print(read_expr(r'(g -o (h -o f))'))
-(g -o (h -o f))
->>> print(read_expr(r'((g -o G) -o G)'))
-((g -o G) -o G)
->>> print(read_expr(r'(g -o f)(g)'))
-(g -o f)(g)
->>> print(read_expr(r'((g -o G) -o G)((g -o f))'))
-((g -o G) -o G)((g -o f))
-
-
+
>>> print(read_expr(r'f'))
+f
+>>> print(read_expr(r'(g -o f)'))
+(g -o f)
+>>> print(read_expr(r'(g -o (h -o f))'))
+(g -o (h -o f))
+>>> print(read_expr(r'((g -o G) -o G)'))
+((g -o G) -o G)
+>>> print(read_expr(r'(g -o f)(g)'))
+(g -o f)(g)
+>>> print(read_expr(r'((g -o G) -o G)((g -o f))'))
+((g -o G) -o G)((g -o f))
+
+

Simplify

-
-
->>> print(read_expr(r'f').simplify())
-f
->>> print(read_expr(r'(g -o f)').simplify())
-(g -o f)
->>> print(read_expr(r'((g -o G) -o G)').simplify())
-((g -o G) -o G)
->>> print(read_expr(r'(g -o f)(g)').simplify())
-f
->>> try: read_expr(r'(g -o f)(f)').simplify()
-... except LinearLogicApplicationException as e: print(e)
-...
-Cannot apply (g -o f) to f. Cannot unify g with f given {}
->>> print(read_expr(r'(G -o f)(g)').simplify())
-f
->>> print(read_expr(r'((g -o G) -o G)((g -o f))').simplify())
-f
-
-
+
>>> print(read_expr(r'f').simplify())
+f
+>>> print(read_expr(r'(g -o f)').simplify())
+(g -o f)
+>>> print(read_expr(r'((g -o G) -o G)').simplify())
+((g -o G) -o G)
+>>> print(read_expr(r'(g -o f)(g)').simplify())
+f
+>>> try: read_expr(r'(g -o f)(f)').simplify()
+... except LinearLogicApplicationException as e: print(e)
+...
+Cannot apply (g -o f) to f. Cannot unify g with f given {}
+>>> print(read_expr(r'(G -o f)(g)').simplify())
+f
+>>> print(read_expr(r'((g -o G) -o G)((g -o f))').simplify())
+f
+
+

Test BindingDict

-
-
->>> h = ConstantExpression('h')
->>> g = ConstantExpression('g')
->>> f = ConstantExpression('f')
-
-
->>> H = VariableExpression('H')
->>> G = VariableExpression('G')
->>> F = VariableExpression('F')
-
-
->>> d1 = BindingDict({H: h})
->>> d2 = BindingDict({F: f, G: F})
->>> d12 = d1 + d2
->>> all12 = ['%s: %s' % (v, d12[v]) for v in d12.d]
->>> all12.sort()
->>> print(all12)
-['F: f', 'G: f', 'H: h']
-
-
->>> BindingDict([(F,f),(G,g),(H,h)]) == BindingDict({F:f, G:g, H:h})
-True
-
-
->>> d4 = BindingDict({F: f})
->>> try: d4[F] = g
-... except VariableBindingException as e: print(e)
-Variable F already bound to another value
-
-
+
>>> h = ConstantExpression('h')
+>>> g = ConstantExpression('g')
+>>> f = ConstantExpression('f')
+
+
+
>>> H = VariableExpression('H')
+>>> G = VariableExpression('G')
+>>> F = VariableExpression('F')
+
+
+
>>> d1 = BindingDict({H: h})
+>>> d2 = BindingDict({F: f, G: F})
+>>> d12 = d1 + d2
+>>> all12 = ['%s: %s' % (v, d12[v]) for v in d12.d]
+>>> all12.sort()
+>>> print(all12)
+['F: f', 'G: f', 'H: h']
+
+
+
>>> BindingDict([(F,f),(G,g),(H,h)]) == BindingDict({F:f, G:g, H:h})
+True
+
+
+
>>> d4 = BindingDict({F: f})
+>>> try: d4[F] = g
+... except VariableBindingException as e: print(e)
+Variable F already bound to another value
+
+

Test Unify

-
-
->>> try: f.unify(g, BindingDict())
-... except UnificationException as e: print(e)
-...
-Cannot unify f with g given {}
-
-
->>> f.unify(G, BindingDict()) == BindingDict({G: f})
-True
->>> try: f.unify(G, BindingDict({G: h}))
-... except UnificationException as e: print(e)
-...
-Cannot unify f with G given {G: h}
->>> f.unify(G, BindingDict({G: f})) == BindingDict({G: f})
-True
->>> f.unify(G, BindingDict({H: f})) == BindingDict({G: f, H: f})
-True
-
-
->>> G.unify(f, BindingDict()) == BindingDict({G: f})
-True
->>> try: G.unify(f, BindingDict({G: h}))
-... except UnificationException as e: print(e)
-...
-Cannot unify G with f given {G: h}
->>> G.unify(f, BindingDict({G: f})) == BindingDict({G: f})
-True
->>> G.unify(f, BindingDict({H: f})) == BindingDict({G: f, H: f})
-True
-
-
->>> G.unify(F, BindingDict()) == BindingDict({G: F})
-True
->>> try: G.unify(F, BindingDict({G: H}))
-... except UnificationException as e: print(e)
-...
-Cannot unify G with F given {G: H}
->>> G.unify(F, BindingDict({G: F})) == BindingDict({G: F})
-True
->>> G.unify(F, BindingDict({H: F})) == BindingDict({G: F, H: F})
-True
-
-
+
>>> try: f.unify(g, BindingDict())
+... except UnificationException as e: print(e)
+...
+Cannot unify f with g given {}
+
+
+
>>> f.unify(G, BindingDict()) == BindingDict({G: f})
+True
+>>> try: f.unify(G, BindingDict({G: h}))
+... except UnificationException as e: print(e)
+...
+Cannot unify f with G given {G: h}
+>>> f.unify(G, BindingDict({G: f})) == BindingDict({G: f})
+True
+>>> f.unify(G, BindingDict({H: f})) == BindingDict({G: f, H: f})
+True
+
+
+
>>> G.unify(f, BindingDict()) == BindingDict({G: f})
+True
+>>> try: G.unify(f, BindingDict({G: h}))
+... except UnificationException as e: print(e)
+...
+Cannot unify G with f given {G: h}
+>>> G.unify(f, BindingDict({G: f})) == BindingDict({G: f})
+True
+>>> G.unify(f, BindingDict({H: f})) == BindingDict({G: f, H: f})
+True
+
+
+
>>> G.unify(F, BindingDict()) == BindingDict({G: F})
+True
+>>> try: G.unify(F, BindingDict({G: H}))
+... except UnificationException as e: print(e)
+...
+Cannot unify G with F given {G: H}
+>>> G.unify(F, BindingDict({G: F})) == BindingDict({G: F})
+True
+>>> G.unify(F, BindingDict({H: F})) == BindingDict({G: F, H: F})
+True
+
+

Test Compile

-
-
->>> print(read_expr('g').compile_pos(Counter(), GlueFormula))
-(<ConstantExpression g>, [])
->>> print(read_expr('(g -o f)').compile_pos(Counter(), GlueFormula))
-(<ImpExpression (g -o f)>, [])
->>> print(read_expr('(g -o (h -o f))').compile_pos(Counter(), GlueFormula))
-(<ImpExpression (g -o (h -o f))>, [])
-
-
-
-
-

Glue

-
-

Demo of "John walks"

-
-
->>> john = GlueFormula("John", "g")
->>> print(john)
-John : g
->>> walks = GlueFormula(r"\x.walks(x)", "(g -o f)")
->>> print(walks)
-\x.walks(x) : (g -o f)
->>> print(walks.applyto(john))
-\x.walks(x)(John) : (g -o f)(g)
->>> print(walks.applyto(john).simplify())
-walks(John) : f
-
-
-
-
-

Demo of "A dog walks"

-
-
->>> a = GlueFormula("\P Q.some x.(P(x) and Q(x))", "((gv -o gr) -o ((g -o G) -o G))")
->>> print(a)
-\P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G) -o G))
->>> man = GlueFormula(r"\x.man(x)", "(gv -o gr)")
->>> print(man)
-\x.man(x) : (gv -o gr)
->>> walks = GlueFormula(r"\x.walks(x)", "(g -o f)")
->>> print(walks)
-\x.walks(x) : (g -o f)
->>> a_man = a.applyto(man)
->>> print(a_man.simplify())
-\Q.exists x.(man(x) & Q(x)) : ((g -o G) -o G)
->>> a_man_walks = a_man.applyto(walks)
->>> print(a_man_walks.simplify())
-exists x.(man(x) & walks(x)) : f
-
-
-
-
-

Demo of 'every girl chases a dog'

+
>>> print(read_expr('g').compile_pos(Counter(), GlueFormula))
+(<ConstantExpression g>, [])
+>>> print(read_expr('(g -o f)').compile_pos(Counter(), GlueFormula))
+(<ImpExpression (g -o f)>, [])
+>>> print(read_expr('(g -o (h -o f))').compile_pos(Counter(), GlueFormula))
+(<ImpExpression (g -o (h -o f))>, [])
+
+
+ +
+

Glue

+
+

Demo of “John walks”

+
>>> john = GlueFormula("John", "g")
+>>> print(john)
+John : g
+>>> walks = GlueFormula(r"\x.walks(x)", "(g -o f)")
+>>> print(walks)
+\x.walks(x) : (g -o f)
+>>> print(walks.applyto(john))
+\x.walks(x)(John) : (g -o f)(g)
+>>> print(walks.applyto(john).simplify())
+walks(John) : f
+
+
+
+
+

Demo of “A dog walks”

+
>>> a = GlueFormula("\\P Q.some x.(P(x) and Q(x))", "((gv -o gr) -o ((g -o G) -o G))")
+>>> print(a)
+\P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G) -o G))
+>>> man = GlueFormula(r"\x.man(x)", "(gv -o gr)")
+>>> print(man)
+\x.man(x) : (gv -o gr)
+>>> walks = GlueFormula(r"\x.walks(x)", "(g -o f)")
+>>> print(walks)
+\x.walks(x) : (g -o f)
+>>> a_man = a.applyto(man)
+>>> print(a_man.simplify())
+\Q.exists x.(man(x) & Q(x)) : ((g -o G) -o G)
+>>> a_man_walks = a_man.applyto(walks)
+>>> print(a_man_walks.simplify())
+exists x.(man(x) & walks(x)) : f
+
+
+
+
+

Demo of ‘every girl chases a dog’

Individual words:

-
-
->>> every = GlueFormula("\P Q.all x.(P(x) -> Q(x))", "((gv -o gr) -o ((g -o G) -o G))")
->>> print(every)
-\P Q.all x.(P(x) -> Q(x)) : ((gv -o gr) -o ((g -o G) -o G))
->>> girl = GlueFormula(r"\x.girl(x)", "(gv -o gr)")
->>> print(girl)
-\x.girl(x) : (gv -o gr)
->>> chases = GlueFormula(r"\x y.chases(x,y)", "(g -o (h -o f))")
->>> print(chases)
-\x y.chases(x,y) : (g -o (h -o f))
->>> a = GlueFormula("\P Q.some x.(P(x) and Q(x))", "((hv -o hr) -o ((h -o H) -o H))")
->>> print(a)
-\P Q.exists x.(P(x) & Q(x)) : ((hv -o hr) -o ((h -o H) -o H))
->>> dog = GlueFormula(r"\x.dog(x)", "(hv -o hr)")
->>> print(dog)
-\x.dog(x) : (hv -o hr)
-
-
+
>>> every = GlueFormula("\\P Q.all x.(P(x) -> Q(x))", "((gv -o gr) -o ((g -o G) -o G))")
+>>> print(every)
+\P Q.all x.(P(x) -> Q(x)) : ((gv -o gr) -o ((g -o G) -o G))
+>>> girl = GlueFormula(r"\x.girl(x)", "(gv -o gr)")
+>>> print(girl)
+\x.girl(x) : (gv -o gr)
+>>> chases = GlueFormula(r"\x y.chases(x,y)", "(g -o (h -o f))")
+>>> print(chases)
+\x y.chases(x,y) : (g -o (h -o f))
+>>> a = GlueFormula("\\P Q.some x.(P(x) and Q(x))", "((hv -o hr) -o ((h -o H) -o H))")
+>>> print(a)
+\P Q.exists x.(P(x) & Q(x)) : ((hv -o hr) -o ((h -o H) -o H))
+>>> dog = GlueFormula(r"\x.dog(x)", "(hv -o hr)")
+>>> print(dog)
+\x.dog(x) : (hv -o hr)
+
+

Noun Quantification can only be done one way:

-
-
->>> every_girl = every.applyto(girl)
->>> print(every_girl.simplify())
-\Q.all x.(girl(x) -> Q(x)) : ((g -o G) -o G)
->>> a_dog = a.applyto(dog)
->>> print(a_dog.simplify())
-\Q.exists x.(dog(x) & Q(x)) : ((h -o H) -o H)
-
-
-

The first reading is achieved by combining 'chases' with 'a dog' first. -Since 'a girl' requires something of the form '(h -o H)' we must -get rid of the 'g' in the glue of 'see'. We will do this with -the '-o elimination' rule. So, x1 will be our subject placeholder.

-
-
->>> xPrime = GlueFormula("x1", "g")
->>> print(xPrime)
-x1 : g
->>> xPrime_chases = chases.applyto(xPrime)
->>> print(xPrime_chases.simplify())
-\y.chases(x1,y) : (h -o f)
->>> xPrime_chases_a_dog = a_dog.applyto(xPrime_chases)
->>> print(xPrime_chases_a_dog.simplify())
-exists x.(dog(x) & chases(x1,x)) : f
-
-
+
>>> every_girl = every.applyto(girl)
+>>> print(every_girl.simplify())
+\Q.all x.(girl(x) -> Q(x)) : ((g -o G) -o G)
+>>> a_dog = a.applyto(dog)
+>>> print(a_dog.simplify())
+\Q.exists x.(dog(x) & Q(x)) : ((h -o H) -o H)
+
+
+

The first reading is achieved by combining ‘chases’ with ‘a dog’ first. +Since ‘a girl’ requires something of the form ‘(h -o H)’ we must +get rid of the ‘g’ in the glue of ‘see’. We will do this with +the ‘-o elimination’ rule. So, x1 will be our subject placeholder.

+
>>> xPrime = GlueFormula("x1", "g")
+>>> print(xPrime)
+x1 : g
+>>> xPrime_chases = chases.applyto(xPrime)
+>>> print(xPrime_chases.simplify())
+\y.chases(x1,y) : (h -o f)
+>>> xPrime_chases_a_dog = a_dog.applyto(xPrime_chases)
+>>> print(xPrime_chases_a_dog.simplify())
+exists x.(dog(x) & chases(x1,x)) : f
+
+

Now we can retract our subject placeholder using lambda-abstraction and combine with the true subject.

-
-
->>> chases_a_dog = xPrime_chases_a_dog.lambda_abstract(xPrime)
->>> print(chases_a_dog.simplify())
-\x1.exists x.(dog(x) & chases(x1,x)) : (g -o f)
->>> every_girl_chases_a_dog = every_girl.applyto(chases_a_dog)
->>> r1 = every_girl_chases_a_dog.simplify()
->>> r2 = GlueFormula(r'all x.(girl(x) -> exists z1.(dog(z1) & chases(x,z1)))', 'f')
->>> r1 == r2
-True
-
-
-

The second reading is achieved by combining 'every girl' with 'chases' first.

-
-
->>> xPrime = GlueFormula("x1", "g")
->>> print(xPrime)
-x1 : g
->>> xPrime_chases = chases.applyto(xPrime)
->>> print(xPrime_chases.simplify())
-\y.chases(x1,y) : (h -o f)
->>> yPrime = GlueFormula("x2", "h")
->>> print(yPrime)
-x2 : h
->>> xPrime_chases_yPrime = xPrime_chases.applyto(yPrime)
->>> print(xPrime_chases_yPrime.simplify())
-chases(x1,x2) : f
->>> chases_yPrime = xPrime_chases_yPrime.lambda_abstract(xPrime)
->>> print(chases_yPrime.simplify())
-\x1.chases(x1,x2) : (g -o f)
->>> every_girl_chases_yPrime = every_girl.applyto(chases_yPrime)
->>> print(every_girl_chases_yPrime.simplify())
-all x.(girl(x) -> chases(x,x2)) : f
->>> every_girl_chases = every_girl_chases_yPrime.lambda_abstract(yPrime)
->>> print(every_girl_chases.simplify())
-\x2.all x.(girl(x) -> chases(x,x2)) : (h -o f)
->>> every_girl_chases_a_dog = a_dog.applyto(every_girl_chases)
->>> r1 = every_girl_chases_a_dog.simplify()
->>> r2 = GlueFormula(r'exists x.(dog(x) & all z2.(girl(z2) -> chases(z2,x)))', 'f')
->>> r1 == r2
-True
-
-
-
-
-

Compilation

-
-
->>> for cp in GlueFormula('m', '(b -o a)').compile(Counter()): print(cp)
-m : (b -o a) : {1}
->>> for cp in GlueFormula('m', '((c -o b) -o a)').compile(Counter()): print(cp)
-v1 : c : {1}
-m : (b[1] -o a) : {2}
->>> for cp in GlueFormula('m', '((d -o (c -o b)) -o a)').compile(Counter()): print(cp)
-v1 : c : {1}
-v2 : d : {2}
-m : (b[1, 2] -o a) : {3}
->>> for cp in GlueFormula('m', '((d -o e) -o ((c -o b) -o a))').compile(Counter()): print(cp)
-v1 : d : {1}
-v2 : c : {2}
-m : (e[1] -o (b[2] -o a)) : {3}
->>> for cp in GlueFormula('m', '(((d -o c) -o b) -o a)').compile(Counter()): print(cp)
-v1 : (d -o c) : {1}
-m : (b[1] -o a) : {2}
->>> for cp in GlueFormula('m', '((((e -o d) -o c) -o b) -o a)').compile(Counter()): print(cp)
-v1 : e : {1}
-v2 : (d[1] -o c) : {2}
-m : (b[2] -o a) : {3}
-
-
-
-
-

Demo of 'a man walks' using Compilation

+
>>> chases_a_dog = xPrime_chases_a_dog.lambda_abstract(xPrime)
+>>> print(chases_a_dog.simplify())
+\x1.exists x.(dog(x) & chases(x1,x)) : (g -o f)
+>>> every_girl_chases_a_dog = every_girl.applyto(chases_a_dog)
+>>> r1 = every_girl_chases_a_dog.simplify()
+>>> r2 = GlueFormula(r'all x.(girl(x) -> exists z1.(dog(z1) & chases(x,z1)))', 'f')
+>>> r1 == r2
+True
+
+
+

The second reading is achieved by combining ‘every girl’ with ‘chases’ first.

+
>>> xPrime = GlueFormula("x1", "g")
+>>> print(xPrime)
+x1 : g
+>>> xPrime_chases = chases.applyto(xPrime)
+>>> print(xPrime_chases.simplify())
+\y.chases(x1,y) : (h -o f)
+>>> yPrime = GlueFormula("x2", "h")
+>>> print(yPrime)
+x2 : h
+>>> xPrime_chases_yPrime = xPrime_chases.applyto(yPrime)
+>>> print(xPrime_chases_yPrime.simplify())
+chases(x1,x2) : f
+>>> chases_yPrime = xPrime_chases_yPrime.lambda_abstract(xPrime)
+>>> print(chases_yPrime.simplify())
+\x1.chases(x1,x2) : (g -o f)
+>>> every_girl_chases_yPrime = every_girl.applyto(chases_yPrime)
+>>> print(every_girl_chases_yPrime.simplify())
+all x.(girl(x) -> chases(x,x2)) : f
+>>> every_girl_chases = every_girl_chases_yPrime.lambda_abstract(yPrime)
+>>> print(every_girl_chases.simplify())
+\x2.all x.(girl(x) -> chases(x,x2)) : (h -o f)
+>>> every_girl_chases_a_dog = a_dog.applyto(every_girl_chases)
+>>> r1 = every_girl_chases_a_dog.simplify()
+>>> r2 = GlueFormula(r'exists x.(dog(x) & all z2.(girl(z2) -> chases(z2,x)))', 'f')
+>>> r1 == r2
+True
+
+
+ +
+

Compilation

+
>>> for cp in GlueFormula('m', '(b -o a)').compile(Counter()): print(cp)
+m : (b -o a) : {1}
+>>> for cp in GlueFormula('m', '((c -o b) -o a)').compile(Counter()): print(cp)
+v1 : c : {1}
+m : (b[1] -o a) : {2}
+>>> for cp in GlueFormula('m', '((d -o (c -o b)) -o a)').compile(Counter()): print(cp)
+v1 : c : {1}
+v2 : d : {2}
+m : (b[1, 2] -o a) : {3}
+>>> for cp in GlueFormula('m', '((d -o e) -o ((c -o b) -o a))').compile(Counter()): print(cp)
+v1 : d : {1}
+v2 : c : {2}
+m : (e[1] -o (b[2] -o a)) : {3}
+>>> for cp in GlueFormula('m', '(((d -o c) -o b) -o a)').compile(Counter()): print(cp)
+v1 : (d -o c) : {1}
+m : (b[1] -o a) : {2}
+>>> for cp in GlueFormula('m', '((((e -o d) -o c) -o b) -o a)').compile(Counter()): print(cp)
+v1 : e : {1}
+v2 : (d[1] -o c) : {2}
+m : (b[2] -o a) : {3}
+
+
+
+
+

Demo of ‘a man walks’ using Compilation

Premises

-
-
->>> a = GlueFormula('\\P Q.some x.(P(x) and Q(x))', '((gv -o gr) -o ((g -o G) -o G))')
->>> print(a)
-\P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G) -o G))
-
-
->>> man = GlueFormula('\\x.man(x)', '(gv -o gr)')
->>> print(man)
-\x.man(x) : (gv -o gr)
-
-
->>> walks = GlueFormula('\\x.walks(x)', '(g -o f)')
->>> print(walks)
-\x.walks(x) : (g -o f)
-
-
+
>>> a = GlueFormula('\\P Q.some x.(P(x) and Q(x))', '((gv -o gr) -o ((g -o G) -o G))')
+>>> print(a)
+\P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G) -o G))
+
+
+
>>> man = GlueFormula('\\x.man(x)', '(gv -o gr)')
+>>> print(man)
+\x.man(x) : (gv -o gr)
+
+
+
>>> walks = GlueFormula('\\x.walks(x)', '(g -o f)')
+>>> print(walks)
+\x.walks(x) : (g -o f)
+
+

Compiled Premises:

-
-
->>> counter = Counter()
->>> ahc = a.compile(counter)
->>> g1 = ahc[0]
->>> print(g1)
-v1 : gv : {1}
->>> g2 = ahc[1]
->>> print(g2)
-v2 : g : {2}
->>> g3 = ahc[2]
->>> print(g3)
-\P Q.exists x.(P(x) & Q(x)) : (gr[1] -o (G[2] -o G)) : {3}
->>> g4 = man.compile(counter)[0]
->>> print(g4)
-\x.man(x) : (gv -o gr) : {4}
->>> g5 = walks.compile(counter)[0]
->>> print(g5)
-\x.walks(x) : (g -o f) : {5}
-
-
+
>>> counter = Counter()
+>>> ahc = a.compile(counter)
+>>> g1 = ahc[0]
+>>> print(g1)
+v1 : gv : {1}
+>>> g2 = ahc[1]
+>>> print(g2)
+v2 : g : {2}
+>>> g3 = ahc[2]
+>>> print(g3)
+\P Q.exists x.(P(x) & Q(x)) : (gr[1] -o (G[2] -o G)) : {3}
+>>> g4 = man.compile(counter)[0]
+>>> print(g4)
+\x.man(x) : (gv -o gr) : {4}
+>>> g5 = walks.compile(counter)[0]
+>>> print(g5)
+\x.walks(x) : (g -o f) : {5}
+
+

Derivation:

-
-
->>> g14 = g4.applyto(g1)
->>> print(g14.simplify())
-man(v1) : gr : {1, 4}
->>> g134 = g3.applyto(g14)
->>> print(g134.simplify())
-\Q.exists x.(man(x) & Q(x)) : (G[2] -o G) : {1, 3, 4}
->>> g25 = g5.applyto(g2)
->>> print(g25.simplify())
-walks(v2) : f : {2, 5}
->>> g12345 = g134.applyto(g25)
->>> print(g12345.simplify())
-exists x.(man(x) & walks(x)) : f : {1, 2, 3, 4, 5}
-
-
-
-

Dependency Graph to Glue Formulas

-
-
->>> from nltk.corpus.reader.dependency import DependencyGraph
-
-
->>> depgraph = DependencyGraph("""1 John    _       NNP     NNP     _       2       SUBJ    _       _
-... 2       sees    _       VB      VB      _       0       ROOT    _       _
-... 3       a       _       ex_quant        ex_quant        _       4       SPEC    _       _
-... 4       dog     _       NN      NN      _       2       OBJ     _       _
-... """)
->>> gfl = GlueDict('nltk:grammars/sample_grammars/glue.semtype').to_glueformula_list(depgraph)
->>> for gf in gfl:
-...     print(gf)
-\x y.sees(x,y) : (f -o (i -o g))
-\P Q.exists x.(P(x) & Q(x)) : ((fv -o fr) -o ((f -o F2) -o F2))
-\x.John(x) : (fv -o fr)
-\x.dog(x) : (iv -o ir)
-\P Q.exists x.(P(x) & Q(x)) : ((iv -o ir) -o ((i -o I5) -o I5))
->>> glue = Glue()
->>> for r in sorted([r.simplify().normalize() for r in glue.get_readings(glue.gfl_to_compiled(gfl))], key=str):
-...     print(r)
-exists z1.(John(z1) & exists z2.(dog(z2) & sees(z1,z2)))
-exists z1.(dog(z1) & exists z2.(John(z2) & sees(z2,z1)))
-
-
-
-
-

Dependency Graph to LFG f-structure

-
-
->>> from nltk.sem.lfg import FStructure
-
-
->>> fstruct = FStructure.read_depgraph(depgraph)
-
-
->>> print(fstruct)
-f:[pred 'sees'
-   obj h:[pred 'dog'
-          spec 'a']
-   subj g:[pred 'John']]
-
-
->>> fstruct.to_depgraph().tree().pprint()
-(sees (dog a) John)
-
-
-
-
-

LFG f-structure to Glue

-
-
->>> for gf in fstruct.to_glueformula_list(GlueDict('nltk:grammars/sample_grammars/glue.semtype')): # doctest: +SKIP
-...     print(gf)
-\x y.sees(x,y) : (i -o (g -o f))
-\x.dog(x) : (gv -o gr)
-\P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G3) -o G3))
-\P Q.exists x.(P(x) & Q(x)) : ((iv -o ir) -o ((i -o I4) -o I4))
-\x.John(x) : (iv -o ir)
-
-
- +
>>> g14 = g4.applyto(g1)
+>>> print(g14.simplify())
+man(v1) : gr : {1, 4}
+>>> g134 = g3.applyto(g14)
+>>> print(g134.simplify())
+\Q.exists x.(man(x) & Q(x)) : (G[2] -o G) : {1, 3, 4}
+>>> g25 = g5.applyto(g2)
+>>> print(g25.simplify())
+walks(v2) : f : {2, 5}
+>>> g12345 = g134.applyto(g25)
+>>> print(g12345.simplify())
+exists x.(man(x) & walks(x)) : f : {1, 2, 3, 4, 5}
+
+
+

Dependency Graph to Glue Formulas

+
>>> from nltk.corpus.reader.dependency import DependencyGraph
+
+
>>> depgraph = DependencyGraph("""1 John    _       NNP     NNP     _       2       SUBJ    _       _
+... 2       sees    _       VB      VB      _       0       ROOT    _       _
+... 3       a       _       ex_quant        ex_quant        _       4       SPEC    _       _
+... 4       dog     _       NN      NN      _       2       OBJ     _       _
+... """)
+>>> gfl = GlueDict('nltk:grammars/sample_grammars/glue.semtype').to_glueformula_list(depgraph)
+>>> print(gfl) 
+[\x y.sees(x,y) : (f -o (i -o g)),
+ \x.dog(x) : (iv -o ir),
+ \P Q.exists x.(P(x) & Q(x)) : ((iv -o ir) -o ((i -o I3) -o I3)),
+ \P Q.exists x.(P(x) & Q(x)) : ((fv -o fr) -o ((f -o F4) -o F4)),
+ \x.John(x) : (fv -o fr)]
+>>> glue = Glue()
+>>> for r in sorted([r.simplify().normalize() for r in glue.get_readings(glue.gfl_to_compiled(gfl))], key=str):
+...     print(r)
+exists z1.(John(z1) & exists z2.(dog(z2) & sees(z1,z2)))
+exists z1.(dog(z1) & exists z2.(John(z2) & sees(z2,z1)))
+
+
+
+

Dependency Graph to LFG f-structure

+
>>> from nltk.sem.lfg import FStructure
+
+
>>> fstruct = FStructure.read_depgraph(depgraph)
+
+
+
>>> print(fstruct) 
+f:[pred 'sees'
+   obj h:[pred 'dog'
+          spec 'a']
+   subj g:[pred 'John']]
+
+
+
>>> fstruct.to_depgraph().tree().pprint()
+(sees (dog a) John)
+
+
+
+
+

LFG f-structure to Glue

+
>>> fstruct.to_glueformula_list(GlueDict('nltk:grammars/sample_grammars/glue.semtype')) 
+[\x y.sees(x,y) : (i -o (g -o f)),
+ \x.dog(x) : (gv -o gr),
+ \P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G3) -o G3)),
+ \P Q.exists x.(P(x) & Q(x)) : ((iv -o ir) -o ((i -o I4) -o I4)),
+ \x.John(x) : (iv -o ir)]
+
+
+
+
+ + + + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/gluesemantics_malt.html b/howto/gluesemantics_malt.html index eda096b90..05d2ac4d1 100644 --- a/howto/gluesemantics_malt.html +++ b/howto/gluesemantics_malt.html @@ -1,421 +1,217 @@ - - - + - - -Glue Semantics - + + + + + + + NLTK :: Sample usage for gluesemantics_malt + + + + + + + + + + + + + + -
-

Glue Semantics

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - - -
-
->>> from nltk.sem.glue import *
->>> nltk.sem.logic._counter._value = 0
-
-
-
-

Initialize the Dependency Parser

-
-
->>> from nltk.parse.malt import MaltParser
-
-
->>> tagger = RegexpTagger(
-...     [('^(John|Mary)$', 'NNP'),
-...      ('^(sees|chases)$', 'VB'),
-...      ('^(a)$', 'ex_quant'),
-...      ('^(every)$', 'univ_quant'),
-...      ('^(girl|dog)$', 'NN')
-... ])
->>> depparser = MaltParser(tagger=tagger)
-
-
+ + + +
+ +
+
+ +
+

Sample usage for gluesemantics_malt

+
+

Glue Semantics

+
>>> from nltk.test.gluesemantics_malt_fixt import setup_module
+>>> setup_module()
+
+
+
>>> from nltk.sem.glue import *
+>>> nltk.sem.logic._counter._value = 0
+
+
+
+

Initialize the Dependency Parser

+
>>> from nltk.parse.malt import MaltParser
+
-
-

Automated Derivation

-
-
->>> glue = Glue(depparser=depparser)
->>> readings = glue.parse_to_meaning('every girl chases a dog'.split())
->>> for reading in sorted([r.simplify().normalize() for r in readings], key=str):
-...     print(reading.normalize())
-all z1.(girl(z1) -> exists z2.(dog(z2) & chases(z1,z2)))
-exists z1.(dog(z1) & all z2.(girl(z2) -> chases(z2,z1)))
-
-
->>> drtglue = DrtGlue(depparser=depparser)
->>> readings = drtglue.parse_to_meaning('every girl chases a dog'.split())
->>> for reading in sorted([r.simplify().normalize() for r in readings], key=str):
-...     print(reading)
-([],[(([z1],[girl(z1)]) -> ([z2],[dog(z2), chases(z1,z2)]))])
-([z1],[dog(z1), (([z2],[girl(z2)]) -> ([],[chases(z2,z1)]))])
-
-
+
>>> tagger = RegexpTagger(
+...     [('^(John|Mary)$', 'NNP'),
+...      ('^(sees|chases)$', 'VB'),
+...      ('^(a)$', 'ex_quant'),
+...      ('^(every)$', 'univ_quant'),
+...      ('^(girl|dog)$', 'NN')
+... ]).tag
+>>> depparser = MaltParser(tagger=tagger)
+
-
-

With inference

+
+
+

Automated Derivation

+
>>> glue = Glue(depparser=depparser)
+>>> readings = glue.parse_to_meaning('every girl chases a dog'.split())
+>>> for reading in sorted([r.simplify().normalize() for r in readings], key=str):
+...     print(reading.normalize())
+all z1.(girl(z1) -> exists z2.(dog(z2) & chases(z1,z2)))
+exists z1.(dog(z1) & all z2.(girl(z2) -> chases(z2,z1)))
+
+
+
>>> drtglue = DrtGlue(depparser=depparser)
+>>> readings = drtglue.parse_to_meaning('every girl chases a dog'.split())
+>>> for reading in sorted([r.simplify().normalize() for r in readings], key=str):
+...     print(reading)
+([],[(([z1],[girl(z1)]) -> ([z2],[dog(z2), chases(z1,z2)]))])
+([z1],[dog(z1), (([z2],[girl(z2)]) -> ([],[chases(z2,z1)]))])
+
+
+
+
+

With inference

Checking for equality of two DRSs is very useful when generating readings of a sentence. -For example, the glue module generates two readings for the sentence +For example, the glue module generates two readings for the sentence John sees Mary:

-
-
->>> from nltk.sem.glue import DrtGlue
->>> readings = drtglue.parse_to_meaning('John sees Mary'.split())
->>> for drs in sorted([r.simplify().normalize() for r in readings], key=str):
-...     print(drs)
-([z1,z2],[John(z1), Mary(z2), sees(z1,z2)])
-([z1,z2],[Mary(z1), John(z2), sees(z2,z1)])
-
-
+
>>> from nltk.sem.glue import DrtGlue
+>>> readings = drtglue.parse_to_meaning('John sees Mary'.split())
+>>> for drs in sorted([r.simplify().normalize() for r in readings], key=str):
+...     print(drs)
+([z1,z2],[John(z1), Mary(z2), sees(z1,z2)])
+([z1,z2],[Mary(z1), John(z2), sees(z2,z1)])
+
+

However, it is easy to tell that these two readings are logically the same, and therefore one of them is superfluous. We can use the theorem prover to determine this equivalence, and then delete one of them. A particular theorem prover may be specified, or the argument may be left off to use the default.

-
-
->>> readings[0].equiv(readings[1])
-True
-
-
+
>>> readings[0].equiv(readings[1])
+True
+
+
+
+
+ + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/grammar.html b/howto/grammar.html index 4f09de296..b6b475dac 100644 --- a/howto/grammar.html +++ b/howto/grammar.html @@ -1,396 +1,193 @@ - - - + - - -Grammar Parsing - + + + + + + + NLTK :: Sample usage for grammar + + + + + + + + + + + + + + -
-

Grammar Parsing

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - + + + +
+ +
+
+ +
+

Sample usage for grammar

+
+

Grammar Parsing

Grammars can be parsed from strings:

-
-
->>> from nltk import CFG
->>> grammar = CFG.fromstring("""
-... S -> NP VP
-... PP -> P NP
-... NP -> Det N | NP PP
-... VP -> V NP | VP PP
-... Det -> 'a' | 'the'
-... N -> 'dog' | 'cat'
-... V -> 'chased' | 'sat'
-... P -> 'on' | 'in'
-... """)
->>> grammar
-<Grammar with 14 productions>
->>> grammar.start()
-S
->>> grammar.productions() # doctest: +NORMALIZE_WHITESPACE
-[S -> NP VP, PP -> P NP, NP -> Det N, NP -> NP PP, VP -> V NP, VP -> VP PP,
-Det -> 'a', Det -> 'the', N -> 'dog', N -> 'cat', V -> 'chased', V -> 'sat',
-P -> 'on', P -> 'in']
-
-
+
>>> from nltk import CFG
+>>> grammar = CFG.fromstring("""
+... S -> NP VP
+... PP -> P NP
+... NP -> Det N | NP PP
+... VP -> V NP | VP PP
+... Det -> 'a' | 'the'
+... N -> 'dog' | 'cat'
+... V -> 'chased' | 'sat'
+... P -> 'on' | 'in'
+... """)
+>>> grammar
+<Grammar with 14 productions>
+>>> grammar.start()
+S
+>>> grammar.productions()
+[S -> NP VP, PP -> P NP, NP -> Det N, NP -> NP PP, VP -> V NP, VP -> VP PP,
+Det -> 'a', Det -> 'the', N -> 'dog', N -> 'cat', V -> 'chased', V -> 'sat',
+P -> 'on', P -> 'in']
+
+

Probabilistic CFGs:

-
-
->>> from nltk import PCFG
->>> toy_pcfg1 = PCFG.fromstring("""
-... S -> NP VP [1.0]
-... NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
-... Det -> 'the' [0.8] | 'my' [0.2]
-... N -> 'man' [0.5] | 'telescope' [0.5]
-... VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
-... V -> 'ate' [0.35] | 'saw' [0.65]
-... PP -> P NP [1.0]
-... P -> 'with' [0.61] | 'under' [0.39]
-... """)
-
-
+
>>> from nltk import PCFG
+>>> toy_pcfg1 = PCFG.fromstring("""
+... S -> NP VP [1.0]
+... NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
+... Det -> 'the' [0.8] | 'my' [0.2]
+... N -> 'man' [0.5] | 'telescope' [0.5]
+... VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
+... V -> 'ate' [0.35] | 'saw' [0.65]
+... PP -> P NP [1.0]
+... P -> 'with' [0.61] | 'under' [0.39]
+... """)
+
+

Chomsky Normal Form grammar (Test for bug 474)

-
-
->>> g = CFG.fromstring("VP^<TOP> -> VBP NP^<VP-TOP>")
->>> g.productions()[0].lhs()
-VP^<TOP>
-
-
+
>>> g = CFG.fromstring("VP^<TOP> -> VBP NP^<VP-TOP>")
+>>> g.productions()[0].lhs()
+VP^<TOP>
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/grammartestsuites.html b/howto/grammartestsuites.html index e94ca492f..3d0f8121e 100644 --- a/howto/grammartestsuites.html +++ b/howto/grammartestsuites.html @@ -1,453 +1,254 @@ - - - + - - -Test Suites for Grammars - - - -
-

Test Suites for Grammars

+ +
- - +
+
+ +
+

Sample usage for grammartestsuites

+
+

Test Suites for Grammars

Sentences in the test suite are divided into two classes:

    -
  • grammatical (accept) and
  • -
  • ungrammatical (reject).
  • +
  • grammatical (accept) and

  • +
  • ungrammatical (reject).

-

If a sentence should parse accordng to the grammar, the value of -trees will be a non-empty list. If a sentence should be rejected -according to the grammar, then the value of trees will be None.

-
-
->>> from nltk.parse import TestGrammar
->>> germantest1 = {}
->>> germantest1['doc'] = "Tests for person agreement"
->>> germantest1['accept'] = [
-... 'ich komme',
-... 'ich sehe mich',
-... 'du kommst',
-... 'du siehst mich',
-... 'sie kommt',
-... 'sie sieht mich',
-... 'ihr kommt',
-... 'wir kommen',
-... 'sie kommen',
-... 'du magst mich',
-... 'er mag mich',
-... 'du folgst mir',
-... 'sie hilft mir',
-... ]
->>> germantest1['reject'] = [
-... 'ich kommt',
-... 'ich kommst',
-... 'ich siehst mich',
-... 'du komme',
-... 'du sehe mich',
-... 'du kommt',
-... 'er komme',
-... 'er siehst mich',
-... 'wir komme',
-... 'wir kommst',
-... 'die Katzen kommst',
-... 'sie komme',
-... 'sie kommst',
-... 'du mag mich',
-... 'er magst mich',
-... 'du folgt mir',
-... 'sie hilfst mir',
-... ]
->>> germantest2 = {}
->>> germantest2['doc'] = "Tests for number agreement"
->>> germantest2['accept'] = [
-... 'der Hund kommt',
-... 'die Hunde kommen',
-... 'ich komme',
-... 'wir kommen',
-... 'ich sehe die Katzen',
-... 'ich folge den Katzen',
-... 'ich sehe die Katzen',
-... 'ich folge den Katzen',
-... 'wir sehen die Katzen',
-... 'wir folgen den Katzen'
-... ]
->>> germantest2['reject'] = [
-... 'ich kommen',
-... 'wir komme',
-... 'der Hunde kommt',
-... 'der Hunde kommen',
-... 'die Katzen kommt',
-... 'ich sehe der Hunde',
-... 'ich folge den Hund',
-... 'ich sehen der Hunde',
-... 'ich folgen den Hund',
-... 'wir sehe die Katzen',
-... 'wir folge den Katzen'
-... ]
->>> germantest3 = {}
->>> germantest3['doc'] = "Tests for case government and subcategorization"
->>> germantest3['accept'] = [
-... 'der Hund sieht mich',
-... 'der Hund kommt',
-... 'ich sehe den Hund',
-... 'ich helfe dem Hund',
-... ]
->>> germantest3['reject'] = [
-... 'ich sehe',
-... 'ich helfe',
-... 'ich komme den Hund',
-... 'ich sehe den Hund die Katzen',
-... 'du hilfst mich',
-... 'du siehst mir',
-... 'du siehst ich',
-... 'der Hunde kommt mich',
-... 'die Hunde sehe die Hunde',
-... 'der Hund sehe die Hunde',
-... 'ich hilft den Hund',
-... 'ich hilft der Hund',
-... 'ich sehe dem Hund',
-... ]
->>> germantestsuites = [germantest1, germantest2, germantest3]
->>> tester = TestGrammar('grammars/book_grammars/german.fcfg', germantestsuites)
->>> tester.run()
-Tests for person agreement: All tests passed!
-Tests for number agreement: All tests passed!
-Tests for case government and subcategorization: All tests passed!
-
-
+

If a sentence should parse according to the grammar, the value of +trees will be a non-empty list. If a sentence should be rejected +according to the grammar, then the value of trees will be None.

+
>>> from nltk.parse import TestGrammar
+>>> germantest1 = {}
+>>> germantest1['doc'] = "Tests for person agreement"
+>>> germantest1['accept'] = [
+... 'ich komme',
+... 'ich sehe mich',
+... 'du kommst',
+... 'du siehst mich',
+... 'sie kommt',
+... 'sie sieht mich',
+... 'ihr kommt',
+... 'wir kommen',
+... 'sie kommen',
+... 'du magst mich',
+... 'er mag mich',
+... 'du folgst mir',
+... 'sie hilft mir',
+... ]
+>>> germantest1['reject'] = [
+... 'ich kommt',
+... 'ich kommst',
+... 'ich siehst mich',
+... 'du komme',
+... 'du sehe mich',
+... 'du kommt',
+... 'er komme',
+... 'er siehst mich',
+... 'wir komme',
+... 'wir kommst',
+... 'die Katzen kommst',
+... 'sie komme',
+... 'sie kommst',
+... 'du mag mich',
+... 'er magst mich',
+... 'du folgt mir',
+... 'sie hilfst mir',
+... ]
+>>> germantest2 = {}
+>>> germantest2['doc'] = "Tests for number agreement"
+>>> germantest2['accept'] = [
+... 'der Hund kommt',
+... 'die Hunde kommen',
+... 'ich komme',
+... 'wir kommen',
+... 'ich sehe die Katzen',
+... 'ich folge den Katzen',
+... 'ich sehe die Katzen',
+... 'ich folge den Katzen',
+... 'wir sehen die Katzen',
+... 'wir folgen den Katzen'
+... ]
+>>> germantest2['reject'] = [
+... 'ich kommen',
+... 'wir komme',
+... 'der Hunde kommt',
+... 'der Hunde kommen',
+... 'die Katzen kommt',
+... 'ich sehe der Hunde',
+... 'ich folge den Hund',
+... 'ich sehen der Hunde',
+... 'ich folgen den Hund',
+... 'wir sehe die Katzen',
+... 'wir folge den Katzen'
+... ]
+>>> germantest3 = {}
+>>> germantest3['doc'] = "Tests for case government and subcategorization"
+>>> germantest3['accept'] = [
+... 'der Hund sieht mich',
+... 'der Hund kommt',
+... 'ich sehe den Hund',
+... 'ich helfe dem Hund',
+... ]
+>>> germantest3['reject'] = [
+... 'ich sehe',
+... 'ich helfe',
+... 'ich komme den Hund',
+... 'ich sehe den Hund die Katzen',
+... 'du hilfst mich',
+... 'du siehst mir',
+... 'du siehst ich',
+... 'der Hunde kommt mich',
+... 'die Hunde sehe die Hunde',
+... 'der Hund sehe die Hunde',
+... 'ich hilft den Hund',
+... 'ich hilft der Hund',
+... 'ich sehe dem Hund',
+... ]
+>>> germantestsuites = [germantest1, germantest2, germantest3]
+>>> tester = TestGrammar('grammars/book_grammars/german.fcfg', germantestsuites)
+>>> tester.run()
+Tests for person agreement: All tests passed!
+Tests for number agreement: All tests passed!
+Tests for case government and subcategorization: All tests passed!
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/inference.html b/howto/inference.html index d43c3d74c..083928233 100644 --- a/howto/inference.html +++ b/howto/inference.html @@ -1,911 +1,659 @@ - - - + - - -Logical Inference and Model Building - + + + + + + + NLTK :: Sample usage for inference + + + + + + + + + + + + + + -
-

Logical Inference and Model Building

- - - -
-
->>> from nltk import *
->>> from nltk.sem.drt import DrtParser
->>> from nltk.sem import logic
->>> logic._counter._value = 0
-
-
-
-

Introduction

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + + + + + +
+ +
+
+ +
+

Sample usage for inference

+
+

Logical Inference and Model Building

+
>>> from nltk.test.inference_fixt import setup_module
+>>> setup_module()
+
+
+
>>> from nltk import *
+>>> from nltk.sem.drt import DrtParser
+>>> from nltk.sem import logic
+>>> logic._counter._value = 0
+
+
+
+

Introduction

Within the area of automated reasoning, first order theorem proving and model building (or model generation) have both received much attention, and have given rise to highly sophisticated techniques. We focus therefore on providing an NLTK interface to third party tools -for these tasks. In particular, the module nltk.inference can be +for these tasks. In particular, the module nltk.inference can be used to access both theorem provers and model builders.

-
-
-

NLTK Interface to Theorem Provers

-

The main class used to interface with a theorem prover is the Prover -class, found in nltk.api. The prove() method takes three optional -arguments: a goal, a list of assumptions, and a verbose boolean to + +

+

NLTK Interface to Theorem Provers

+

The main class used to interface with a theorem prover is the Prover +class, found in nltk.api. The prove() method takes three optional +arguments: a goal, a list of assumptions, and a verbose boolean to indicate whether the proof should be printed to the console. The proof goal -and any assumptions need to be instances of the Expression class -specified by nltk.sem.logic. There are currently three theorem provers -included with NLTK: Prover9, TableauProver, and -ResolutionProver. The first is an off-the-shelf prover, while the other -two are written in Python and included in the nltk.inference package.

-
-
->>> from nltk.sem import Expression
->>> read_expr = Expression.fromstring
->>> p1 = read_expr('man(socrates)')
->>> p2 = read_expr('all x.(man(x) -> mortal(x))')
->>> c  = read_expr('mortal(socrates)')
->>> Prover9().prove(c, [p1,p2])
-True
->>> TableauProver().prove(c, [p1,p2])
-True
->>> ResolutionProver().prove(c, [p1,p2], verbose=True)
-[1] {-mortal(socrates)}     A
-[2] {man(socrates)}         A
-[3] {-man(z2), mortal(z2)}  A
-[4] {-man(socrates)}        (1, 3)
-[5] {mortal(socrates)}      (2, 3)
-[6] {}                      (1, 5)
-<BLANKLINE>
-True
-
-
-
-
-

The ProverCommand

-

A ProverCommand is a stateful holder for a theorem -prover. The command stores a theorem prover instance (of type Prover), +and any assumptions need to be instances of the Expression class +specified by nltk.sem.logic. There are currently three theorem provers +included with NLTK: Prover9, TableauProver, and +ResolutionProver. The first is an off-the-shelf prover, while the other +two are written in Python and included in the nltk.inference package.

+
>>> from nltk.sem import Expression
+>>> read_expr = Expression.fromstring
+>>> p1 = read_expr('man(socrates)')
+>>> p2 = read_expr('all x.(man(x) -> mortal(x))')
+>>> c  = read_expr('mortal(socrates)')
+>>> Prover9().prove(c, [p1,p2])
+True
+>>> TableauProver().prove(c, [p1,p2])
+True
+>>> ResolutionProver().prove(c, [p1,p2], verbose=True)
+[1] {-mortal(socrates)}     A
+[2] {man(socrates)}         A
+[3] {-man(z2), mortal(z2)}  A
+[4] {-man(socrates)}        (1, 3)
+[5] {mortal(socrates)}      (2, 3)
+[6] {}                      (1, 5)
+
+True
+
+
+ +
+

The ProverCommand

+

A ProverCommand is a stateful holder for a theorem +prover. The command stores a theorem prover instance (of type Prover), a goal, a list of assumptions, the result of the proof, and a string version -of the entire proof. Corresponding to the three included Prover -implementations, there are three ProverCommand implementations: -Prover9Command, TableauProverCommand, and -ResolutionProverCommand.

-

The ProverCommand's constructor takes its goal and assumptions. The -prove() command executes the Prover and proof() +of the entire proof. Corresponding to the three included Prover +implementations, there are three ProverCommand implementations: +Prover9Command, TableauProverCommand, and +ResolutionProverCommand.

+

The ProverCommand’s constructor takes its goal and assumptions. The +prove() command executes the Prover and proof() returns a String form of the proof -If the prove() method has not been called, +If the prove() method has not been called, then the prover command will be unable to display a proof.

-
-
->>> prover = ResolutionProverCommand(c, [p1,p2])
->>> print(prover.proof()) # doctest: +ELLIPSIS
-Traceback (most recent call last):
-  File "...", line 1212, in __run
-    compileflags, 1) in test.globs
-  File "<doctest nltk/test/inference.doctest[10]>", line 1, in <module>
-  File "...", line ..., in proof
-    raise LookupError("You have to call prove() first to get a proof!")
-LookupError: You have to call prove() first to get a proof!
->>> prover.prove()
-True
->>> print(prover.proof())
-[1] {-mortal(socrates)}     A
-[2] {man(socrates)}         A
-[3] {-man(z4), mortal(z4)}  A
-[4] {-man(socrates)}        (1, 3)
-[5] {mortal(socrates)}      (2, 3)
-[6] {}                      (1, 5)
-<BLANKLINE>
-
-
-

The prover command stores the result of proving so that if prove() is +

>>> prover = ResolutionProverCommand(c, [p1,p2])
+>>> print(prover.proof())
+Traceback (most recent call last):
+  File "...", line 1212, in __run
+    compileflags, 1) in test.globs
+  File "<doctest nltk/test/inference.doctest[10]>", line 1, in <module>
+  File "...", line ..., in proof
+    raise LookupError("You have to call prove() first to get a proof!")
+LookupError: You have to call prove() first to get a proof!
+>>> prover.prove()
+True
+>>> print(prover.proof())
+[1] {-mortal(socrates)}     A
+[2] {man(socrates)}         A
+[3] {-man(z4), mortal(z4)}  A
+[4] {-man(socrates)}        (1, 3)
+[5] {mortal(socrates)}      (2, 3)
+[6] {}                      (1, 5)
+
+
+

The prover command stores the result of proving so that if prove() is called again, then the command can return the result without executing the prover again. This allows the user to access the result of the proof without wasting time re-computing what it already knows.

-
-
->>> prover.prove()
-True
->>> prover.prove()
-True
-
-
-

The assumptions and goal may be accessed using the assumptions() and -goal() methods, respectively.

-
-
->>> prover.assumptions()
-[<ApplicationExpression man(socrates)>, <Alread_expression all x.(man(x) -> mortal(x))>]
->>> prover.goal()
-<ApplicationExpression mortal(socrates)>
-
-
-

The assumptions list may be modified using the add_assumptions() and -retract_assumptions() methods. Both methods take a list of Expression +

>>> prover.prove()
+True
+>>> prover.prove()
+True
+
+
+

The assumptions and goal may be accessed using the assumptions() and +goal() methods, respectively.

+
>>> prover.assumptions()
+[<ApplicationExpression man(socrates)>, <AllExpression all x.(man(x) -> mortal(x))>]
+>>> prover.goal()
+<ApplicationExpression mortal(socrates)>
+
+
+

The assumptions list may be modified using the add_assumptions() and +retract_assumptions() methods. Both methods take a list of Expression objects. Since adding or removing assumptions may change the result of the proof, the stored result is cleared when either of these methods are called. -That means that proof() will be unavailable until prove() is called and -a call to prove() will execute the theorem prover.

-
-
->>> prover.retract_assumptions([read_expr('man(socrates)')])
->>> print(prover.proof()) # doctest: +ELLIPSIS
-Traceback (most recent call last):
-  File "...", line 1212, in __run
-    compileflags, 1) in test.globs
-  File "<doctest nltk/test/inference.doctest[10]>", line 1, in <module>
-  File "...", line ..., in proof
-    raise LookupError("You have to call prove() first to get a proof!")
-LookupError: You have to call prove() first to get a proof!
->>> prover.prove()
-False
->>> print(prover.proof())
-[1] {-mortal(socrates)}     A
-[2] {-man(z6), mortal(z6)}  A
-[3] {-man(socrates)}        (1, 2)
-<BLANKLINE>
->>> prover.add_assumptions([read_expr('man(socrates)')])
->>> prover.prove()
-True
-
-
-
-
-

Prover9

-
-

Prover9 Installation

-

You can download Prover9 from http://www.cs.unm.edu/~mccune/prover9/.

+That means that proof() will be unavailable until prove() is called and +a call to prove() will execute the theorem prover.

+
>>> prover.retract_assumptions([read_expr('man(socrates)')])
+>>> print(prover.proof())
+Traceback (most recent call last):
+  File "...", line 1212, in __run
+    compileflags, 1) in test.globs
+  File "<doctest nltk/test/inference.doctest[10]>", line 1, in <module>
+  File "...", line ..., in proof
+    raise LookupError("You have to call prove() first to get a proof!")
+LookupError: You have to call prove() first to get a proof!
+>>> prover.prove()
+False
+>>> print(prover.proof())
+[1] {-mortal(socrates)}     A
+[2] {-man(z6), mortal(z6)}  A
+[3] {-man(socrates)}        (1, 2)
+
+>>> prover.add_assumptions([read_expr('man(socrates)')])
+>>> prover.prove()
+True
+
+
+ +
+

Prover9

+
+

Prover9 Installation

+

You can download Prover9 from https://www.cs.unm.edu/~mccune/prover9/.

Extract the source code into a suitable directory and follow the -instructions in the Prover9 README.make file to compile the executables. +instructions in the Prover9 README.make file to compile the executables. Install these into an appropriate location; the -prover9_search variable is currently configured to look in the +prover9_search variable is currently configured to look in the following locations:

-
-
->>> p = Prover9()
->>> p.binary_locations() # doctest: +NORMALIZE_WHITESPACE
-['/usr/local/bin/prover9',
- '/usr/local/bin/prover9/bin',
- '/usr/local/bin',
- '/usr/bin',
- '/usr/local/prover9',
- '/usr/local/share/prover9']
-
-
-

Alternatively, the environment variable PROVER9HOME may be configured with -the binary's location.

+
>>> p = Prover9()
+>>> p.binary_locations()
+['/usr/local/bin/prover9',
+ '/usr/local/bin/prover9/bin',
+ '/usr/local/bin',
+ '/usr/bin',
+ '/usr/local/prover9',
+ '/usr/local/share/prover9']
+
+
+

Alternatively, the environment variable PROVER9HOME may be configured with +the binary’s location.

The path to the correct directory can be set manually in the following manner:

-
-
->>> config_prover9(path='/usr/local/bin') # doctest: +SKIP
-[Found prover9: /usr/local/bin/prover9]
-
-
-

If the executables cannot be found, Prover9 will issue a warning message:

-
-
->>> p.prove() # doctest: +SKIP
-Traceback (most recent call last):
-  ...
-LookupError:
-===========================================================================
-  NLTK was unable to find the prover9 executable!  Use config_prover9() or
-  set the PROVER9HOME environment variable.
-<BLANKLINE>
-    >> config_prover9('/path/to/prover9')
-<BLANKLINE>
-  For more information, on prover9, see:
-    <http://www.cs.unm.edu/~mccune/prover9/>
-===========================================================================
-
-
-
-
-

Using Prover9

-

The general case in theorem proving is to determine whether S |- g -holds, where S is a possibly empty set of assumptions, and g +

>>> config_prover9(path='/usr/local/bin') 
+[Found prover9: /usr/local/bin/prover9]
+
+
+

If the executables cannot be found, Prover9 will issue a warning message:

+
>>> p.prove() 
+Traceback (most recent call last):
+  ...
+LookupError:
+===========================================================================
+  NLTK was unable to find the prover9 executable!  Use config_prover9() or
+  set the PROVER9HOME environment variable.
+
+    >> config_prover9('/path/to/prover9')
+
+  For more information, on prover9, see:
+    <https://www.cs.unm.edu/~mccune/prover9/>
+===========================================================================
+
+
+ +
+

Using Prover9

+

The general case in theorem proving is to determine whether S |- g +holds, where S is a possibly empty set of assumptions, and g is a proof goal.

-

As mentioned earlier, NLTK input to Prover9 must be -Expressions of nltk.sem.logic. A Prover9 instance is +

As mentioned earlier, NLTK input to Prover9 must be +Expressions of nltk.sem.logic. A Prover9 instance is initialized with a proof goal and, possibly, some assumptions. The -prove() method attempts to find a proof of the goal, given the +prove() method attempts to find a proof of the goal, given the list of assumptions (in this case, none).

-
-
->>> goal = read_expr('(man(x) <-> --man(x))')
->>> prover = Prover9Command(goal)
->>> prover.prove()
-True
-
-
-

Given a ProverCommand instance prover, the method -prover.proof() will return a String of the extensive proof information +

>>> goal = read_expr('(man(x) <-> --man(x))')
+>>> prover = Prover9Command(goal)
+>>> prover.prove()
+True
+
+
+

Given a ProverCommand instance prover, the method +prover.proof() will return a String of the extensive proof information provided by Prover9, shown in abbreviated form here:

-
-============================== Prover9 ===============================
-Prover9 (32) version ...
-Process ... was started by ... on ...
-...
-The command was ".../prover9 -f ...".
-============================== end of head ===========================
+
============================== Prover9 ===============================
+Prover9 (32) version ...
+Process ... was started by ... on ...
+...
+The command was ".../prover9 -f ...".
+============================== end of head ===========================
 
-============================== INPUT =================================
+============================== INPUT =================================
 
-% Reading from file /var/...
+% Reading from file /var/...
 
 
-formulas(goals).
-(all x (man(x) -> man(x))).
-end_of_list.
+formulas(goals).
+(all x (man(x) -> man(x))).
+end_of_list.
 
-...
-============================== end of search =========================
+...
+============================== end of search =========================
 
-THEOREM PROVED
+THEOREM PROVED
 
-Exiting with 1 proof.
+Exiting with 1 proof.
 
-Process 6317 exit (max_proofs) Mon Jan 21 15:23:28 2008
-
+Process 6317 exit (max_proofs) Mon Jan 21 15:23:28 2008 +
+

As mentioned earlier, we may want to list some assumptions for the proof, as shown here.

-
-
->>> g = read_expr('mortal(socrates)')
->>> a1 = read_expr('all x.(man(x) -> mortal(x))')
->>> prover = Prover9Command(g, assumptions=[a1])
->>> prover.print_assumptions()
-all x.(man(x) -> mortal(x))
-
-
+
>>> g = read_expr('mortal(socrates)')
+>>> a1 = read_expr('all x.(man(x) -> mortal(x))')
+>>> prover = Prover9Command(g, assumptions=[a1])
+>>> prover.print_assumptions()
+all x.(man(x) -> mortal(x))
+
+

However, the assumptions are not sufficient to derive the goal:

-
-
->>> print(prover.prove())
-False
-
-
-

So let's add another assumption:

-
-
->>> a2 = read_expr('man(socrates)')
->>> prover.add_assumptions([a2])
->>> prover.print_assumptions()
-all x.(man(x) -> mortal(x))
-man(socrates)
->>> print(prover.prove())
-True
-
-
-

We can also show the assumptions in Prover9 format.

-
-
->>> prover.print_assumptions(output_format='Prover9')
-all x (man(x) -> mortal(x))
-man(socrates)
-
-
->>> prover.print_assumptions(output_format='Spass')
-Traceback (most recent call last):
-  . . .
-NameError: Unrecognized value for 'output_format': Spass
-
-
+
>>> print(prover.prove())
+False
+
+
+

So let’s add another assumption:

+
>>> a2 = read_expr('man(socrates)')
+>>> prover.add_assumptions([a2])
+>>> prover.print_assumptions()
+all x.(man(x) -> mortal(x))
+man(socrates)
+>>> print(prover.prove())
+True
+
+
+

We can also show the assumptions in Prover9 format.

+
>>> prover.print_assumptions(output_format='Prover9')
+all x (man(x) -> mortal(x))
+man(socrates)
+
+
+
>>> prover.print_assumptions(output_format='Spass')
+Traceback (most recent call last):
+  . . .
+NameError: Unrecognized value for 'output_format': Spass
+
+

Assumptions can be retracted from the list of assumptions.

-
-
->>> prover.retract_assumptions([a1])
->>> prover.print_assumptions()
-man(socrates)
->>> prover.retract_assumptions([a1])
-
-
+
>>> prover.retract_assumptions([a1])
+>>> prover.print_assumptions()
+man(socrates)
+>>> prover.retract_assumptions([a1])
+
+

Statements can be loaded from a file and parsed. We can then add these statements as new assumptions.

-
-
->>> g = read_expr('all x.(boxer(x) -> -boxerdog(x))')
->>> prover = Prover9Command(g)
->>> prover.prove()
-False
->>> import nltk.data
->>> new = nltk.data.load('grammars/sample_grammars/background0.fol')
->>> for a in new:
-...     print(a)
-all x.(boxerdog(x) -> dog(x))
-all x.(boxer(x) -> person(x))
-all x.-(dog(x) & person(x))
-exists x.boxer(x)
-exists x.boxerdog(x)
->>> prover.add_assumptions(new)
->>> print(prover.prove())
-True
->>> print(prover.proof()) # doctest: +ELLIPSIS
-============================== prooftrans ============================
-Prover9 (...) version ...
-Process ... was started by ... on ...
-...
-The command was ".../prover9".
-============================== end of head ===========================
-<BLANKLINE>
-============================== end of input ==========================
-<BLANKLINE>
-============================== PROOF =================================
-<BLANKLINE>
-% -------- Comments from original proof --------
-% Proof 1 at ... seconds.
-% Length of proof is 13.
-% Level of proof is 4.
-% Maximum clause weight is 0.000.
-% Given clauses 0.
-<BLANKLINE>
-<BLANKLINE>
-1 (all x (boxerdog(x) -> dog(x))).  [assumption].
-2 (all x (boxer(x) -> person(x))).  [assumption].
-3 (all x -(dog(x) & person(x))).  [assumption].
-6 (all x (boxer(x) -> -boxerdog(x))).  [goal].
-8 -boxerdog(x) | dog(x).  [clausify(1)].
-9 boxerdog(c3).  [deny(6)].
-11 -boxer(x) | person(x).  [clausify(2)].
-12 boxer(c3).  [deny(6)].
-14 -dog(x) | -person(x).  [clausify(3)].
-15 dog(c3).  [resolve(9,a,8,a)].
-18 person(c3).  [resolve(12,a,11,a)].
-19 -person(c3).  [resolve(15,a,14,a)].
-20 $F.  [resolve(19,a,18,a)].
-<BLANKLINE>
-============================== end of proof ==========================
-
-
-
-
-
-

The equiv() method

+
>>> g = read_expr('all x.(boxer(x) -> -boxerdog(x))')
+>>> prover = Prover9Command(g)
+>>> prover.prove()
+False
+>>> import nltk.data
+>>> new = nltk.data.load('grammars/sample_grammars/background0.fol')
+>>> for a in new:
+...     print(a)
+all x.(boxerdog(x) -> dog(x))
+all x.(boxer(x) -> person(x))
+all x.-(dog(x) & person(x))
+exists x.boxer(x)
+exists x.boxerdog(x)
+>>> prover.add_assumptions(new)
+>>> print(prover.prove())
+True
+>>> print(prover.proof())
+============================== prooftrans ============================
+Prover9 (...) version ...
+Process ... was started by ... on ...
+...
+The command was ".../prover9".
+============================== end of head ===========================
+
+============================== end of input ==========================
+
+============================== PROOF =================================
+
+% -------- Comments from original proof --------
+% Proof 1 at ... seconds.
+% Length of proof is 13.
+% Level of proof is 4.
+% Maximum clause weight is 0.
+% Given clauses 0.
+
+1 (all x (boxerdog(x) -> dog(x))).  [assumption].
+2 (all x (boxer(x) -> person(x))).  [assumption].
+3 (all x -(dog(x) & person(x))).  [assumption].
+6 (all x (boxer(x) -> -boxerdog(x))).  [goal].
+8 -boxerdog(x) | dog(x).  [clausify(1)].
+9 boxerdog(c3).  [deny(6)].
+11 -boxer(x) | person(x).  [clausify(2)].
+12 boxer(c3).  [deny(6)].
+14 -dog(x) | -person(x).  [clausify(3)].
+15 dog(c3).  [resolve(9,a,8,a)].
+18 person(c3).  [resolve(12,a,11,a)].
+19 -person(c3).  [resolve(15,a,14,a)].
+20 $F.  [resolve(19,a,18,a)].
+
+============================== end of proof ==========================
+
+
+ + +
+

The equiv() method

One application of the theorem prover functionality is to check if two Expressions have the same meaning. -The equiv() method calls a theorem prover to determine whether two +The equiv() method calls a theorem prover to determine whether two Expressions are logically equivalent.

-
-
->>> a = read_expr(r'exists x.(man(x) & walks(x))')
->>> b = read_expr(r'exists x.(walks(x) & man(x))')
->>> print(a.equiv(b))
-True
-
-
+
>>> a = read_expr(r'exists x.(man(x) & walks(x))')
+>>> b = read_expr(r'exists x.(walks(x) & man(x))')
+>>> print(a.equiv(b))
+True
+
+

The same method can be used on Discourse Representation Structures (DRSs). In this case, each DRS is converted to a first order logic form, and then passed to the theorem prover.

-
-
->>> dp = DrtParser()
->>> a = dp.parse(r'([x],[man(x), walks(x)])')
->>> b = dp.parse(r'([x],[walks(x), man(x)])')
->>> print(a.equiv(b))
-True
-
-
-
-
-

NLTK Interface to Model Builders

+
>>> dp = DrtParser()
+>>> a = dp.parse(r'([x],[man(x), walks(x)])')
+>>> b = dp.parse(r'([x],[walks(x), man(x)])')
+>>> print(a.equiv(b))
+True
+
+
+ +
+

NLTK Interface to Model Builders

The top-level to model builders is parallel to that for -theorem-provers. The ModelBuilder interface is located -in nltk.inference.api. It is currently only implemented by -Mace, which interfaces with the Mace4 model builder.

+theorem-provers. The ModelBuilder interface is located +in nltk.inference.api. It is currently only implemented by +Mace, which interfaces with the Mace4 model builder.

Typically we use a model builder to show that some set of formulas has a model, and is therefore consistent. One way of doing this is by treating our candidate set of sentences as assumptions, and leaving the goal unspecified. -Thus, the following interaction shows how both {a, c1} and {a, c2} +Thus, the following interaction shows how both {a, c1} and {a, c2} are consistent sets, since Mace succeeds in a building a -model for each of them, while {c1, c2} is inconsistent.

-
-
->>> a3 = read_expr('exists x.(man(x) and walks(x))')
->>> c1 = read_expr('mortal(socrates)')
->>> c2 = read_expr('-mortal(socrates)')
->>> mace = Mace()
->>> print(mace.build_model(None, [a3, c1]))
-True
->>> print(mace.build_model(None, [a3, c2]))
-True
-
-
+model for each of them, while {c1, c2} is inconsistent.

+
>>> a3 = read_expr('exists x.(man(x) and walks(x))')
+>>> c1 = read_expr('mortal(socrates)')
+>>> c2 = read_expr('-mortal(socrates)')
+>>> mace = Mace()
+>>> print(mace.build_model(None, [a3, c1]))
+True
+>>> print(mace.build_model(None, [a3, c2]))
+True
+
+

We can also use the model builder as an adjunct to theorem prover. -Let's suppose we are trying to prove S |- g, i.e. that g -is logically entailed by assumptions S = {s1, s2, ..., sn}. +Let’s suppose we are trying to prove S |- g, i.e. that g +is logically entailed by assumptions S = {s1, s2, ..., sn}. We can this same input to Mace4, and the model builder will try to -find a counterexample, that is, to show that g does not follow -from S. So, given this input, Mace4 will try to find a model for -the set S' = {s1, s2, ..., sn, (not g)}. If g fails to follow -from S, then Mace4 may well return with a counterexample faster +find a counterexample, that is, to show that g does not follow +from S. So, given this input, Mace4 will try to find a model for +the set S' = {s1, s2, ..., sn, (not g)}. If g fails to follow +from S, then Mace4 may well return with a counterexample faster than Prover9 concludes that it cannot find the required proof. -Conversely, if g is provable from S, Mace4 may take a long +Conversely, if g is provable from S, Mace4 may take a long time unsuccessfully trying to find a counter model, and will eventually give up.

In the following example, we see that the model builder does succeed in building a model of the assumptions together with the negation of the goal. That is, it succeeds in finding a model where there is a woman that every man loves; Adam is a man; Eve is a woman; but Adam does not love Eve.

-
-
->>> a4 = read_expr('exists y. (woman(y) & all x. (man(x) -> love(x,y)))')
->>> a5 = read_expr('man(adam)')
->>> a6 = read_expr('woman(eve)')
->>> g = read_expr('love(adam,eve)')
->>> print(mace.build_model(g, [a4, a5, a6]))
-True
-
-
+
>>> a4 = read_expr('exists y. (woman(y) & all x. (man(x) -> love(x,y)))')
+>>> a5 = read_expr('man(adam)')
+>>> a6 = read_expr('woman(eve)')
+>>> g = read_expr('love(adam,eve)')
+>>> print(mace.build_model(g, [a4, a5, a6]))
+True
+
+

The Model Builder will fail to find a model if the assumptions do entail the goal. Mace will continue to look for models of ever-increasing sizes until the end_size number is reached. By default, end_size is 500, but it can be set manually for quicker response time.

-
-
->>> a7 = read_expr('all x.(man(x) -> mortal(x))')
->>> a8 = read_expr('man(socrates)')
->>> g2 = read_expr('mortal(socrates)')
->>> print(Mace(end_size=50).build_model(g2, [a7, a8]))
-False
-
-
-

There is also a ModelBuilderCommand class that, like ProverCommand, -stores a ModelBuilder, a goal, assumptions, a result, and a model. The -only implementation in NLTK is MaceCommand.

-
-
-

Mace4

-
-

Mace4 Installation

+
>>> a7 = read_expr('all x.(man(x) -> mortal(x))')
+>>> a8 = read_expr('man(socrates)')
+>>> g2 = read_expr('mortal(socrates)')
+>>> print(Mace(end_size=50).build_model(g2, [a7, a8]))
+False
+
+
+

There is also a ModelBuilderCommand class that, like ProverCommand, +stores a ModelBuilder, a goal, assumptions, a result, and a model. The +only implementation in NLTK is MaceCommand.

+ +
+

Mace4

+
+

Mace4 Installation

Mace4 is packaged with Prover9, and can be downloaded from the same -source, namely http://www.cs.unm.edu/~mccune/prover9/. It is installed +source, namely https://www.cs.unm.edu/~mccune/prover9/. It is installed in the same manner as Prover9.

-
-
-

Using Mace4

+ +
+

Using Mace4

Check whether Mace4 can find a model.

-
-
->>> a = read_expr('(see(mary,john) & -(mary = john))')
->>> mb = MaceCommand(assumptions=[a])
->>> mb.build_model()
-True
-
-
-

Show the model in 'tabular' format.

-
-
->>> print(mb.model(format='tabular'))
-% number = 1
-% seconds = 0
-<BLANKLINE>
-% Interpretation of size 2
-<BLANKLINE>
- john : 0
-<BLANKLINE>
- mary : 1
-<BLANKLINE>
- see :
-       | 0 1
-    ---+----
-     0 | 0 0
-     1 | 1 0
-<BLANKLINE>
-
-
-

Show the model in 'tabular' format.

-
-
->>> print(mb.model(format='cooked'))
-% number = 1
-% seconds = 0
-<BLANKLINE>
-% Interpretation of size 2
-<BLANKLINE>
-john = 0.
-<BLANKLINE>
-mary = 1.
-<BLANKLINE>
-- see(0,0).
-- see(0,1).
-  see(1,0).
-- see(1,1).
-<BLANKLINE>
-
-
-

The property valuation accesses the stored Valuation.

-
-
->>> print(mb.valuation)
-{'john': 'a', 'mary': 'b', 'see': {('b', 'a')}}
-
-
+
>>> a = read_expr('(see(mary,john) & -(mary = john))')
+>>> mb = MaceCommand(assumptions=[a])
+>>> mb.build_model()
+True
+
+
+

Show the model in ‘tabular’ format.

+
>>> print(mb.model(format='tabular'))
+% number = 1
+% seconds = 0
+
+% Interpretation of size 2
+
+ john : 0
+
+ mary : 1
+
+ see :
+       | 0 1
+    ---+----
+     0 | 0 0
+     1 | 1 0
+
+
+

Show the model in ‘tabular’ format.

+
>>> print(mb.model(format='cooked'))
+% number = 1
+% seconds = 0
+
+% Interpretation of size 2
+
+john = 0.
+
+mary = 1.
+
+- see(0,0).
+- see(0,1).
+  see(1,0).
+- see(1,1).
+
+
+

The property valuation accesses the stored Valuation.

+
>>> print(mb.valuation)
+{'john': 'a', 'mary': 'b', 'see': {('b', 'a')}}
+
+

We can return to our earlier example and inspect the model:

-
-
->>> mb = MaceCommand(g, assumptions=[a4, a5, a6])
->>> m = mb.build_model()
->>> print(mb.model(format='cooked'))
-% number = 1
-% seconds = 0
-<BLANKLINE>
-% Interpretation of size 2
-<BLANKLINE>
-adam = 0.
-<BLANKLINE>
-eve = 0.
-<BLANKLINE>
-c1 = 1.
-<BLANKLINE>
-  man(0).
-- man(1).
-<BLANKLINE>
-  woman(0).
-  woman(1).
-<BLANKLINE>
-- love(0,0).
-  love(0,1).
-- love(1,0).
-- love(1,1).
-<BLANKLINE>
-
-
-

Here, we can see that adam and eve have been assigned the same -individual, namely 0 as value; 0 is both a man and a woman; a second -individual 1 is also a woman; and 0 loves 1. Thus, this is +

>>> mb = MaceCommand(g, assumptions=[a4, a5, a6])
+>>> m = mb.build_model()
+>>> print(mb.model(format='cooked'))
+% number = 1
+% seconds = 0
+
+% Interpretation of size 2
+
+adam = 0.
+
+eve = 0.
+
+c1 = 1.
+
+  man(0).
+- man(1).
+
+  woman(0).
+  woman(1).
+
+- love(0,0).
+  love(0,1).
+- love(1,0).
+- love(1,1).
+
+
+

Here, we can see that adam and eve have been assigned the same +individual, namely 0 as value; 0 is both a man and a woman; a second +individual 1 is also a woman; and 0 loves 1. Thus, this is an interpretation in which there is a woman that every man loves but -Adam doesn't love Eve.

+Adam doesn’t love Eve.

Mace can also be used with propositional logic.

-
-
->>> p = read_expr('P')
->>> q = read_expr('Q')
->>> mb = MaceCommand(q, [p, p>-q])
->>> mb.build_model()
-True
->>> mb.valuation['P']
-True
->>> mb.valuation['Q']
-False
-
-
-
+
>>> p = read_expr('P')
+>>> q = read_expr('Q')
+>>> mb = MaceCommand(q, [p, p>-q])
+>>> mb.build_model()
+True
+>>> mb.valuation['P']
+True
+>>> mb.valuation['Q']
+False
+
+ + + + + + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/internals.html b/howto/internals.html index e50cc40d9..6e79a7290 100644 --- a/howto/internals.html +++ b/howto/internals.html @@ -1,503 +1,316 @@ - - - + - - -Unit tests for the nltk.utilities module - + + + + + + + NLTK :: Sample usage for internals + + + + + + + + + + + + + + -
-

Unit tests for the nltk.utilities module

-

overridden()

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> from nltk.internals import overridden
-
-
+ + + +
+ +
+
+ +
+

Sample usage for internals

+
+

Unit tests for the nltk.utilities module

+
+

overridden()

+
>>> from nltk.internals import overridden
+
+

The typical use case is in defining methods for an interface or -abstract base class, in such a way that subclasses don't have to +abstract base class, in such a way that subclasses don’t have to implement all of the methods:

-
-
->>> class EaterI(object):
-...     '''Subclass must define eat() or batch_eat().'''
-...     def eat(self, food):
-...         if overridden(self.batch_eat):
-...             return self.batch_eat([food])[0]
-...         else:
-...             raise NotImplementedError()
-...     def batch_eat(self, foods):
-...         return [self.eat(food) for food in foods]
-
-
+
>>> class EaterI(object):
+...     '''Subclass must define eat() or batch_eat().'''
+...     def eat(self, food):
+...         if overridden(self.batch_eat):
+...             return self.batch_eat([food])[0]
+...         else:
+...             raise NotImplementedError()
+...     def batch_eat(self, foods):
+...         return [self.eat(food) for food in foods]
+
+

As long as a subclass implements one method, it will be used to perform the other method:

-
-
->>> class GoodEater1(EaterI):
-...     def eat(self, food):
-...         return 'yum'
->>> GoodEater1().eat('steak')
-'yum'
->>> GoodEater1().batch_eat(['steak', 'peas'])
-['yum', 'yum']
-
-
->>> class GoodEater2(EaterI):
-...     def batch_eat(self, foods):
-...         return ['yum' for food in foods]
->>> GoodEater2().eat('steak')
-'yum'
->>> GoodEater2().batch_eat(['steak', 'peas'])
-['yum', 'yum']
-
-
-

But if a subclass doesn't implement either one, then they'll get an +

>>> class GoodEater1(EaterI):
+...     def eat(self, food):
+...         return 'yum'
+>>> GoodEater1().eat('steak')
+'yum'
+>>> GoodEater1().batch_eat(['steak', 'peas'])
+['yum', 'yum']
+
+
+
>>> class GoodEater2(EaterI):
+...     def batch_eat(self, foods):
+...         return ['yum' for food in foods]
+>>> GoodEater2().eat('steak')
+'yum'
+>>> GoodEater2().batch_eat(['steak', 'peas'])
+['yum', 'yum']
+
+
+

But if a subclass doesn’t implement either one, then they’ll get an error when they try to call them. (nb this is better than infinite recursion):

-
-
->>> class BadEater1(EaterI):
+
>>> class BadEater1(EaterI):
 ...     pass
->>> BadEater1().eat('steak')
+>>> BadEater1().eat('steak')
 Traceback (most recent call last):
   . . .
 NotImplementedError
->>> BadEater1().batch_eat(['steak', 'peas'])
+>>> BadEater1().batch_eat(['steak', 'peas'])
 Traceback (most recent call last):
   . . .
 NotImplementedError
-
-
+
+

Trying to use the abstract base class itself will also result in an error:

-
-
->>> class EaterI(EaterI):
+
>>> class EaterI(EaterI):
 ...     pass
->>> EaterI().eat('steak')
+>>> EaterI().eat('steak')
 Traceback (most recent call last):
   . . .
 NotImplementedError
->>> EaterI().batch_eat(['steak', 'peas'])
+>>> EaterI().batch_eat(['steak', 'peas'])
 Traceback (most recent call last):
   . . .
 NotImplementedError
-
-
-

It's ok to use intermediate abstract classes:

-
-
->>> class AbstractEater(EaterI):
-...     pass
-
-
->>> class GoodEater3(AbstractEater):
-...     def eat(self, food):
-...         return 'yum'
-...
->>> GoodEater3().eat('steak')
-'yum'
->>> GoodEater3().batch_eat(['steak', 'peas'])
-['yum', 'yum']
-
-
->>> class GoodEater4(AbstractEater):
-...     def batch_eat(self, foods):
-...         return ['yum' for food in foods]
->>> GoodEater4().eat('steak')
-'yum'
->>> GoodEater4().batch_eat(['steak', 'peas'])
-['yum', 'yum']
-
-
->>> class BadEater2(AbstractEater):
+
+
+

It’s ok to use intermediate abstract classes:

+
>>> class AbstractEater(EaterI):
+...     pass
+
+
+
>>> class GoodEater3(AbstractEater):
+...     def eat(self, food):
+...         return 'yum'
+...
+>>> GoodEater3().eat('steak')
+'yum'
+>>> GoodEater3().batch_eat(['steak', 'peas'])
+['yum', 'yum']
+
+
+
>>> class GoodEater4(AbstractEater):
+...     def batch_eat(self, foods):
+...         return ['yum' for food in foods]
+>>> GoodEater4().eat('steak')
+'yum'
+>>> GoodEater4().batch_eat(['steak', 'peas'])
+['yum', 'yum']
+
+
+
>>> class BadEater2(AbstractEater):
 ...     pass
->>> BadEater2().eat('steak')
+>>> BadEater2().eat('steak')
 Traceback (most recent call last):
   . . .
 NotImplementedError
->>> BadEater2().batch_eat(['steak', 'peas'])
+>>> BadEater2().batch_eat(['steak', 'peas'])
 Traceback (most recent call last):
   . . .
 NotImplementedError
-
- -

Here's some extra tests:

-
-
->>> class A(object):
-...     def f(x): pass
->>> class B(A):
-...     def f(x): pass
->>> class C(A): pass
->>> class D(B): pass
-
-
->>> overridden(A().f)
-False
->>> overridden(B().f)
-True
->>> overridden(C().f)
-False
->>> overridden(D().f)
-True
-
-
+
+
+

Here’s some extra tests:

+
>>> class A(object):
+...     def f(x): pass
+>>> class B(A):
+...     def f(x): pass
+>>> class C(A): pass
+>>> class D(B): pass
+
+
+
>>> overridden(A().f)
+False
+>>> overridden(B().f)
+True
+>>> overridden(C().f)
+False
+>>> overridden(D().f)
+True
+
+

It works for classic classes, too:

-
-
->>> class A:
-...     def f(x): pass
->>> class B(A):
-...     def f(x): pass
->>> class C(A): pass
->>> class D(B): pass
->>> overridden(A().f)
-False
->>> overridden(B().f)
-True
->>> overridden(C().f)
-False
->>> overridden(D().f)
-True
-
-
+
>>> class A:
+...     def f(x): pass
+>>> class B(A):
+...     def f(x): pass
+>>> class C(A): pass
+>>> class D(B): pass
+>>> overridden(A().f)
+False
+>>> overridden(B().f)
+True
+>>> overridden(C().f)
+False
+>>> overridden(D().f)
+True
+
+ +
+

read_str()

+
>>> from nltk.internals import read_str
+
+
+

Test valid scenarios

+
>>> read_str("'valid string'", 0)
+('valid string', 14)
+
+
+
+
Now test invalid scenarios
>>> read_str("should error", 0)
+Traceback (most recent call last):
+...
+nltk.internals.ReadError: Expected open quote at 0
+>>> read_str("'should error", 0)
+Traceback (most recent call last):
+...
+nltk.internals.ReadError: Expected close quote at 1
+
+
+
+
+
+ + + + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/japanese.html b/howto/japanese.html index 751bb849f..998a70bb7 100644 --- a/howto/japanese.html +++ b/howto/japanese.html @@ -1,407 +1,194 @@ - - - + - - -Japanese Language Processing - + + + + + + + NLTK :: Sample usage for japanese + + + + + + + + + + + + + + -
-

Japanese Language Processing

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> from nltk import *
-
-
-
-

Corpus Access

-
-

KNB Corpus

-
-
->>> from nltk.corpus import knbc
-
-
+ + + +
+ +
+
+ +
+

Sample usage for japanese

+
+

Japanese Language Processing

+
>>> from nltk import *
+
+
+
+

Corpus Access

+
+

KNB Corpus

+
>>> from nltk.corpus import knbc
+
+

Access the words: this should produce a list of strings:

-
-
->>> type(knbc.words()[0]) is not bytes
-True
-
-
+
>>> type(knbc.words()[0]) is not bytes
+True
+
+

Access the sentences: this should produce a list of lists of strings:

-
-
->>> type(knbc.sents()[0][0]) is not bytes
-True
-
-
+
>>> type(knbc.sents()[0][0]) is not bytes
+True
+
+

Access the tagged words: this should produce a list of word, tag pairs:

-
-
->>> type(knbc.tagged_words()[0])
-<... 'tuple'>
-
-
+
>>> type(knbc.tagged_words()[0])
+<... 'tuple'>
+
+

Access the tagged sentences: this should produce a list of lists of word, tag pairs:

-
-
->>> type(knbc.tagged_sents()[0][0])
-<... 'tuple'>
-
-
+
>>> type(knbc.tagged_sents()[0][0])
+<... 'tuple'>
+
-
-

JEITA Corpus

-
-
->>> from nltk.corpus import jeita
-
-
-

Access the tagged words: this should produce a list of word, tag pairs, where a tag is a string:

-
-
->>> type(jeita.tagged_words()[0][1]) is not bytes
-True
-
-
+
+
+

JEITA Corpus

+
>>> from nltk.corpus import jeita
+
+

Access the tagged words: this should produce a list of word, tag pairs, where a tag is a string:

+
>>> type(jeita.tagged_words()[0][1]) is not bytes
+True
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/lm.html b/howto/lm.html new file mode 100644 index 000000000..2e94162b4 --- /dev/null +++ b/howto/lm.html @@ -0,0 +1,276 @@ + + + + + + + + + NLTK :: Sample usage for lm + + + + + + + + + + + + + + + + +
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + + + + + +
+ +
+
+ +
+

Sample usage for lm

+
+

Regression Tests

+
+

Issue 167

+

https://github.com/nltk/nltk/issues/167

+
>>> from nltk.corpus import brown
+>>> from nltk.lm.preprocessing import padded_everygram_pipeline
+>>> ngram_order = 3
+>>> train_data, vocab_data = padded_everygram_pipeline(
+...     ngram_order,
+...     brown.sents(categories="news")
+... )
+
+
+
>>> from nltk.lm import WittenBellInterpolated
+>>> lm = WittenBellInterpolated(ngram_order)
+>>> lm.fit(train_data, vocab_data)
+
+
+

Sentence containing an unseen word should result in infinite entropy because +Witten-Bell is based ultimately on MLE, which cannot handle unseen ngrams. +Crucially, it shouldn’t raise any exceptions for unseen words.

+
>>> from nltk.util import ngrams
+>>> sent = ngrams("This is a sentence with the word aaddvark".split(), 3)
+>>> lm.entropy(sent)
+inf
+
+
+

If we remove all unseen ngrams from the sentence, we’ll get a non-infinite value +for the entropy.

+
>>> sent = ngrams("This is a sentence".split(), 3)
+>>> round(lm.entropy(sent), 14)
+10.23701322869105
+
+
+
+
+

Issue 367

+

https://github.com/nltk/nltk/issues/367

+

Reproducing Dan Blanchard’s example: +https://github.com/nltk/nltk/issues/367#issuecomment-14646110

+
>>> from nltk.lm import Lidstone, Vocabulary
+>>> word_seq = list('aaaababaaccbacb')
+>>> ngram_order = 2
+>>> from nltk.util import everygrams
+>>> train_data = [everygrams(word_seq, max_len=ngram_order)]
+>>> V = Vocabulary(['a', 'b', 'c', ''])
+>>> lm = Lidstone(0.2, ngram_order, vocabulary=V)
+>>> lm.fit(train_data)
+
+
+

For doctest to work we have to sort the vocabulary keys.

+
>>> V_keys = sorted(V)
+>>> round(sum(lm.score(w, ("b",)) for w in V_keys), 6)
+1.0
+>>> round(sum(lm.score(w, ("a",)) for w in V_keys), 6)
+1.0
+
+
+
>>> [lm.score(w, ("b",)) for w in V_keys]
+[0.05, 0.05, 0.8, 0.05, 0.05]
+>>> [round(lm.score(w, ("a",)), 4) for w in V_keys]
+[0.0222, 0.0222, 0.4667, 0.2444, 0.2444]
+
+
+

Here’s reproducing @afourney’s comment: +https://github.com/nltk/nltk/issues/367#issuecomment-15686289

+
>>> sent = ['foo', 'foo', 'foo', 'foo', 'bar', 'baz']
+>>> ngram_order = 3
+>>> from nltk.lm.preprocessing import padded_everygram_pipeline
+>>> train_data, vocab_data = padded_everygram_pipeline(ngram_order, [sent])
+>>> from nltk.lm import Lidstone
+>>> lm = Lidstone(0.2, ngram_order)
+>>> lm.fit(train_data, vocab_data)
+
+
+

The vocabulary includes the “UNK” symbol as well as two padding symbols.

+
>>> len(lm.vocab)
+6
+>>> word = "foo"
+>>> context = ("bar", "baz")
+
+
+

The raw counts.

+
>>> lm.context_counts(context)[word]
+0
+>>> lm.context_counts(context).N()
+1
+
+
+

Counts with Lidstone smoothing.

+
>>> lm.context_counts(context)[word] + lm.gamma
+0.2
+>>> lm.context_counts(context).N() + len(lm.vocab) * lm.gamma
+2.2
+
+
+

Without any backoff, just using Lidstone smoothing, P(“foo” | “bar”, “baz”) should be: +0.2 / 2.2 ~= 0.090909

+
>>> round(lm.score(word, context), 6)
+0.090909
+
+
+
+
+

Issue 380

+

https://github.com/nltk/nltk/issues/380

+

Reproducing setup akin to this comment: +https://github.com/nltk/nltk/issues/380#issue-12879030

+
+
For speed take only the first 100 sentences of reuters. Shouldn’t affect the test.
>>> from nltk.corpus import reuters
+>>> sents = reuters.sents()[:100]
+>>> ngram_order = 3
+>>> from nltk.lm.preprocessing import padded_everygram_pipeline
+>>> train_data, vocab_data = padded_everygram_pipeline(ngram_order, sents)
+
+
+
>>> from nltk.lm import Lidstone
+>>> lm = Lidstone(0.2, ngram_order)
+>>> lm.fit(train_data, vocab_data)
+>>> lm.score("said", ("",)) < 1
+True
+
+
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ + + \ No newline at end of file diff --git a/howto/logic.html b/howto/logic.html index 80136e6f9..72cb2c1f2 100644 --- a/howto/logic.html +++ b/howto/logic.html @@ -1,1531 +1,1291 @@ - - - + - - -Logic & Lambda Calculus - + + + + + + + NLTK :: Sample usage for logic + + + + + + + + + + + + + + -
-

Logic & Lambda Calculus

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - + + + +
+ +
+
+ +
+

Sample usage for logic

+
+

Logic & Lambda Calculus

The nltk.logic package allows expressions of First-Order Logic (FOL) to be -parsed into Expression objects. In addition to FOL, the parser +parsed into Expression objects. In addition to FOL, the parser handles lambda-abstraction with variables of higher order.

-
-

Overview

-
-
->>> from nltk.sem.logic import *
-
-
+
+

Overview

+
>>> from nltk.sem.logic import *
+
+

The default inventory of logical constants is the following:

-
-
->>> boolean_ops() # doctest: +NORMALIZE_WHITESPACE
-negation           -
-conjunction        &
-disjunction        |
-implication        ->
-equivalence        <->
->>> equality_preds() # doctest: +NORMALIZE_WHITESPACE
-equality           =
-inequality         !=
->>> binding_ops() # doctest: +NORMALIZE_WHITESPACE
-existential        exists
-universal          all
-lambda             \
-
-
-
-
-

Regression Tests

-
-

Untyped Logic

+
>>> boolean_ops()
+negation           -
+conjunction        &
+disjunction        |
+implication        ->
+equivalence        <->
+>>> equality_preds()
+equality           =
+inequality         !=
+>>> binding_ops()
+existential        exists
+universal          all
+lambda             \
+
+
+
+
+

Regression Tests

+
+

Untyped Logic

Process logical expressions conveniently:

-
-
->>> read_expr = Expression.fromstring
-
-
-
-

Test for equality under alpha-conversion

-
-
->>> e1 = read_expr('exists x.P(x)')
->>> print(e1)
-exists x.P(x)
->>> e2 = e1.alpha_convert(Variable('z'))
->>> print(e2)
-exists z.P(z)
->>> e1 == e2
-True
-
-
->>> l = read_expr(r'\X.\X.X(X)(1)').simplify()
->>> id = read_expr(r'\X.X(X)')
->>> l == id
-True
-
-
-
-
-

Test numerals

-
-
->>> zero = read_expr(r'\F x.x')
->>> one = read_expr(r'\F x.F(x)')
->>> two = read_expr(r'\F x.F(F(x))')
->>> three = read_expr(r'\F x.F(F(F(x)))')
->>> four = read_expr(r'\F x.F(F(F(F(x))))')
->>> succ = read_expr(r'\N F x.F(N(F,x))')
->>> plus = read_expr(r'\M N F x.M(F,N(F,x))')
->>> mult = read_expr(r'\M N F.M(N(F))')
->>> pred = read_expr(r'\N F x.(N(\G H.H(G(F)))(\u.x)(\u.u))')
->>> v1 = ApplicationExpression(succ, zero).simplify()
->>> v1 == one
-True
->>> v2 = ApplicationExpression(succ, v1).simplify()
->>> v2 == two
-True
->>> v3 = ApplicationExpression(ApplicationExpression(plus, v1), v2).simplify()
->>> v3 == three
-True
->>> v4 = ApplicationExpression(ApplicationExpression(mult, v2), v2).simplify()
->>> v4 == four
-True
->>> v5 = ApplicationExpression(pred, ApplicationExpression(pred, v4)).simplify()
->>> v5 == two
-True
-
-
+
>>> read_expr = Expression.fromstring
+
+
+
+
Test for equality under alpha-conversion
+
>>> e1 = read_expr('exists x.P(x)')
+>>> print(e1)
+exists x.P(x)
+>>> e2 = e1.alpha_convert(Variable('z'))
+>>> print(e2)
+exists z.P(z)
+>>> e1 == e2
+True
+
+
+
>>> l = read_expr(r'\X.\X.X(X)(1)').simplify()
+>>> id = read_expr(r'\X.X(X)')
+>>> l == id
+True
+
+
+
+
+
Test numerals
+
>>> zero = read_expr(r'\F x.x')
+>>> one = read_expr(r'\F x.F(x)')
+>>> two = read_expr(r'\F x.F(F(x))')
+>>> three = read_expr(r'\F x.F(F(F(x)))')
+>>> four = read_expr(r'\F x.F(F(F(F(x))))')
+>>> succ = read_expr(r'\N F x.F(N(F,x))')
+>>> plus = read_expr(r'\M N F x.M(F,N(F,x))')
+>>> mult = read_expr(r'\M N F.M(N(F))')
+>>> pred = read_expr(r'\N F x.(N(\G H.H(G(F)))(\u.x)(\u.u))')
+>>> v1 = ApplicationExpression(succ, zero).simplify()
+>>> v1 == one
+True
+>>> v2 = ApplicationExpression(succ, v1).simplify()
+>>> v2 == two
+True
+>>> v3 = ApplicationExpression(ApplicationExpression(plus, v1), v2).simplify()
+>>> v3 == three
+True
+>>> v4 = ApplicationExpression(ApplicationExpression(mult, v2), v2).simplify()
+>>> v4 == four
+True
+>>> v5 = ApplicationExpression(pred, ApplicationExpression(pred, v4)).simplify()
+>>> v5 == two
+True
+
+

Overloaded operators also exist, for convenience.

-
-
->>> print(succ(zero).simplify() == one)
-True
->>> print(plus(one,two).simplify() == three)
-True
->>> print(mult(two,two).simplify() == four)
-True
->>> print(pred(pred(four)).simplify() == two)
-True
-
-
->>> john = read_expr(r'john')
->>> man = read_expr(r'\x.man(x)')
->>> walk = read_expr(r'\x.walk(x)')
->>> man(john).simplify()
-<ApplicationExpression man(john)>
->>> print(-walk(john).simplify())
--walk(john)
->>> print((man(john) & walk(john)).simplify())
-(man(john) & walk(john))
->>> print((man(john) | walk(john)).simplify())
-(man(john) | walk(john))
->>> print((man(john) > walk(john)).simplify())
-(man(john) -> walk(john))
->>> print((man(john) < walk(john)).simplify())
-(man(john) <-> walk(john))
-
-
-

Python's built-in lambda operator can also be used with Expressions

-
-
->>> john = VariableExpression(Variable('john'))
->>> run_var = VariableExpression(Variable('run'))
->>> run = lambda x: run_var(x)
->>> run(john)
-<ApplicationExpression run(john)>
-
-
-
-

betaConversionTestSuite.pl

-

Tests based on Blackburn & Bos' book, Representation and Inference +

>>> print(succ(zero).simplify() == one)
+True
+>>> print(plus(one,two).simplify() == three)
+True
+>>> print(mult(two,two).simplify() == four)
+True
+>>> print(pred(pred(four)).simplify() == two)
+True
+
+
+
>>> john = read_expr(r'john')
+>>> man = read_expr(r'\x.man(x)')
+>>> walk = read_expr(r'\x.walk(x)')
+>>> man(john).simplify()
+<ApplicationExpression man(john)>
+>>> print(-walk(john).simplify())
+-walk(john)
+>>> print((man(john) & walk(john)).simplify())
+(man(john) & walk(john))
+>>> print((man(john) | walk(john)).simplify())
+(man(john) | walk(john))
+>>> print((man(john) > walk(john)).simplify())
+(man(john) -> walk(john))
+>>> print((man(john) < walk(john)).simplify())
+(man(john) <-> walk(john))
+
+
+

Python’s built-in lambda operator can also be used with Expressions

+
>>> john = VariableExpression(Variable('john'))
+>>> run_var = VariableExpression(Variable('run'))
+>>> run = lambda x: run_var(x)
+>>> run(john)
+<ApplicationExpression run(john)>
+
+
+
+
betaConversionTestSuite.pl
+

Tests based on Blackburn & Bos’ book, Representation and Inference for Natural Language.

-
-
->>> x1 = read_expr(r'\P.P(mia)(\x.walk(x))').simplify()
->>> x2 = read_expr(r'walk(mia)').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'exists x.(man(x) & ((\P.exists x.(woman(x) & P(x)))(\y.love(x,y))))').simplify()
->>> x2 = read_expr(r'exists x.(man(x) & exists y.(woman(y) & love(x,y)))').simplify()
->>> x1 == x2
-True
->>> x1 = read_expr(r'\a.sleep(a)(mia)').simplify()
->>> x2 = read_expr(r'sleep(mia)').simplify()
->>> x1 == x2
-True
->>> x1 = read_expr(r'\a.\b.like(b,a)(mia)').simplify()
->>> x2 = read_expr(r'\b.like(b,mia)').simplify()
->>> x1 == x2
-True
->>> x1 = read_expr(r'\a.(\b.like(b,a)(vincent))').simplify()
->>> x2 = read_expr(r'\a.like(vincent,a)').simplify()
->>> x1 == x2
-True
->>> x1 = read_expr(r'\a.((\b.like(b,a)(vincent)) & sleep(a))').simplify()
->>> x2 = read_expr(r'\a.(like(vincent,a) & sleep(a))').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'(\a.\b.like(b,a)(mia)(vincent))').simplify()
->>> x2 = read_expr(r'like(vincent,mia)').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'P((\a.sleep(a)(vincent)))').simplify()
->>> x2 = read_expr(r'P(sleep(vincent))').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'\A.A((\b.sleep(b)(vincent)))').simplify()
->>> x2 = read_expr(r'\A.A(sleep(vincent))').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'\A.A(sleep(vincent))').simplify()
->>> x2 = read_expr(r'\A.A(sleep(vincent))').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'(\A.A(vincent)(\b.sleep(b)))').simplify()
->>> x2 = read_expr(r'sleep(vincent)').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'\A.believe(mia,A(vincent))(\b.sleep(b))').simplify()
->>> x2 = read_expr(r'believe(mia,sleep(vincent))').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'(\A.(A(vincent) & A(mia)))(\b.sleep(b))').simplify()
->>> x2 = read_expr(r'(sleep(vincent) & sleep(mia))').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'\A.\B.(\C.C(A(vincent))(\d.probably(d)) & (\C.C(B(mia))(\d.improbably(d))))(\f.walk(f))(\f.talk(f))').simplify()
->>> x2 = read_expr(r'(probably(walk(vincent)) & improbably(talk(mia)))').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'(\a.\b.(\C.C(a,b)(\d.\f.love(d,f))))(jules)(mia)').simplify()
->>> x2 = read_expr(r'love(jules,mia)').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'(\A.\B.exists c.(A(c) & B(c)))(\d.boxer(d),\d.sleep(d))').simplify()
->>> x2 = read_expr(r'exists c.(boxer(c) & sleep(c))').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'\A.Z(A)(\c.\a.like(a,c))').simplify()
->>> x2 = read_expr(r'Z(\c.\a.like(a,c))').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'\A.\b.A(b)(\c.\b.like(b,c))').simplify()
->>> x2 = read_expr(r'\b.(\c.\b.like(b,c)(b))').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'(\a.\b.(\C.C(a,b)(\b.\a.loves(b,a))))(jules)(mia)').simplify()
->>> x2 = read_expr(r'loves(jules,mia)').simplify()
->>> x1 == x2
-True
-
-
->>> x1 = read_expr(r'(\A.\b.(exists b.A(b) & A(b)))(\c.boxer(c))(vincent)').simplify()
->>> x2 = read_expr(r'((exists b.boxer(b)) & boxer(vincent))').simplify()
->>> x1 == x2
-True
-
-
-
-
-
-

Test Parser

-
-
->>> print(read_expr(r'john'))
-john
->>> print(read_expr(r'x'))
-x
->>> print(read_expr(r'-man(x)'))
--man(x)
->>> print(read_expr(r'--man(x)'))
---man(x)
->>> print(read_expr(r'(man(x))'))
-man(x)
->>> print(read_expr(r'((man(x)))'))
-man(x)
->>> print(read_expr(r'man(x) <-> tall(x)'))
-(man(x) <-> tall(x))
->>> print(read_expr(r'(man(x) <-> tall(x))'))
-(man(x) <-> tall(x))
->>> print(read_expr(r'(man(x) & tall(x) & walks(x))'))
-(man(x) & tall(x) & walks(x))
->>> print(read_expr(r'(man(x) & tall(x) & walks(x))').first)
-(man(x) & tall(x))
->>> print(read_expr(r'man(x) | tall(x) & walks(x)'))
-(man(x) | (tall(x) & walks(x)))
->>> print(read_expr(r'((man(x) & tall(x)) | walks(x))'))
-((man(x) & tall(x)) | walks(x))
->>> print(read_expr(r'man(x) & (tall(x) | walks(x))'))
-(man(x) & (tall(x) | walks(x)))
->>> print(read_expr(r'(man(x) & (tall(x) | walks(x)))'))
-(man(x) & (tall(x) | walks(x)))
->>> print(read_expr(r'P(x) -> Q(x) <-> R(x) | S(x) & T(x)'))
-((P(x) -> Q(x)) <-> (R(x) | (S(x) & T(x))))
->>> print(read_expr(r'exists x.man(x)'))
-exists x.man(x)
->>> print(read_expr(r'exists x.(man(x) & tall(x))'))
-exists x.(man(x) & tall(x))
->>> print(read_expr(r'exists x.(man(x) & tall(x) & walks(x))'))
-exists x.(man(x) & tall(x) & walks(x))
->>> print(read_expr(r'-P(x) & Q(x)'))
-(-P(x) & Q(x))
->>> read_expr(r'-P(x) & Q(x)') == read_expr(r'(-P(x)) & Q(x)')
-True
->>> print(read_expr(r'\x.man(x)'))
-\x.man(x)
->>> print(read_expr(r'\x.man(x)(john)'))
-\x.man(x)(john)
->>> print(read_expr(r'\x.man(x)(john) & tall(x)'))
-(\x.man(x)(john) & tall(x))
->>> print(read_expr(r'\x.\y.sees(x,y)'))
-\x y.sees(x,y)
->>> print(read_expr(r'\x  y.sees(x,y)'))
-\x y.sees(x,y)
->>> print(read_expr(r'\x.\y.sees(x,y)(a)'))
-(\x y.sees(x,y))(a)
->>> print(read_expr(r'\x  y.sees(x,y)(a)'))
-(\x y.sees(x,y))(a)
->>> print(read_expr(r'\x.\y.sees(x,y)(a)(b)'))
-((\x y.sees(x,y))(a))(b)
->>> print(read_expr(r'\x  y.sees(x,y)(a)(b)'))
-((\x y.sees(x,y))(a))(b)
->>> print(read_expr(r'\x.\y.sees(x,y)(a,b)'))
-((\x y.sees(x,y))(a))(b)
->>> print(read_expr(r'\x  y.sees(x,y)(a,b)'))
-((\x y.sees(x,y))(a))(b)
->>> print(read_expr(r'((\x.\y.sees(x,y))(a))(b)'))
-((\x y.sees(x,y))(a))(b)
->>> print(read_expr(r'P(x)(y)(z)'))
-P(x,y,z)
->>> print(read_expr(r'P(Q)'))
-P(Q)
->>> print(read_expr(r'P(Q(x))'))
-P(Q(x))
->>> print(read_expr(r'(\x.exists y.walks(x,y))(x)'))
-(\x.exists y.walks(x,y))(x)
->>> print(read_expr(r'exists x.(x = john)'))
-exists x.(x = john)
->>> print(read_expr(r'((\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x))'))
-((\P Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x))
->>> a = read_expr(r'exists c.exists b.A(b,c) & A(b,c)')
->>> b = read_expr(r'(exists c.(exists b.A(b,c))) & A(b,c)')
->>> print(a == b)
-True
->>> a = read_expr(r'exists c.(exists b.A(b,c) & A(b,c))')
->>> b = read_expr(r'exists c.((exists b.A(b,c)) & A(b,c))')
->>> print(a == b)
-True
->>> print(read_expr(r'exists x.x = y'))
-exists x.(x = y)
->>> print(read_expr('A(B)(C)'))
-A(B,C)
->>> print(read_expr('(A(B))(C)'))
-A(B,C)
->>> print(read_expr('A((B)(C))'))
-A(B(C))
->>> print(read_expr('A(B(C))'))
-A(B(C))
->>> print(read_expr('(A)(B(C))'))
-A(B(C))
->>> print(read_expr('(((A)))(((B))(((C))))'))
-A(B(C))
->>> print(read_expr(r'A != B'))
--(A = B)
->>> print(read_expr('P(x) & x=y & P(y)'))
-(P(x) & (x = y) & P(y))
->>> try: print(read_expr(r'\walk.walk(x)'))
-... except LogicalExpressionException as e: print(e)
-'walk' is an illegal variable name.  Constants may not be abstracted.
-\walk.walk(x)
- ^
->>> try: print(read_expr(r'all walk.walk(john)'))
-... except LogicalExpressionException as e: print(e)
-'walk' is an illegal variable name.  Constants may not be quantified.
-all walk.walk(john)
-    ^
->>> try: print(read_expr(r'x(john)'))
-... except LogicalExpressionException as e: print(e)
-'x' is an illegal predicate name.  Individual variables may not be used as predicates.
-x(john)
-^
-
-
->>> from nltk.sem.logic import LogicParser # hack to give access to custom quote chars
->>> lpq = LogicParser()
->>> lpq.quote_chars = [("'", "'", "\\", False)]
->>> print(lpq.parse(r"(man(x) & 'tall\'s,' (x) & walks (x) )"))
-(man(x) & tall's,(x) & walks(x))
->>> lpq.quote_chars = [("'", "'", "\\", True)]
->>> print(lpq.parse(r"'tall\'s,'"))
-'tall\'s,'
->>> print(lpq.parse(r"'spaced name(x)'"))
-'spaced name(x)'
->>> print(lpq.parse(r"-'tall\'s,'(x)"))
--'tall\'s,'(x)
->>> print(lpq.parse(r"(man(x) & 'tall\'s,' (x) & walks (x) )"))
-(man(x) & 'tall\'s,'(x) & walks(x))
-
-
-
-
-

Simplify

-
-
->>> print(read_expr(r'\x.man(x)(john)').simplify())
-man(john)
->>> print(read_expr(r'\x.((man(x)))(john)').simplify())
-man(john)
->>> print(read_expr(r'\x.\y.sees(x,y)(john, mary)').simplify())
-sees(john,mary)
->>> print(read_expr(r'\x  y.sees(x,y)(john, mary)').simplify())
-sees(john,mary)
->>> print(read_expr(r'\x.\y.sees(x,y)(john)(mary)').simplify())
-sees(john,mary)
->>> print(read_expr(r'\x  y.sees(x,y)(john)(mary)').simplify())
-sees(john,mary)
->>> print(read_expr(r'\x.\y.sees(x,y)(john)').simplify())
-\y.sees(john,y)
->>> print(read_expr(r'\x  y.sees(x,y)(john)').simplify())
-\y.sees(john,y)
->>> print(read_expr(r'(\x.\y.sees(x,y)(john))(mary)').simplify())
-sees(john,mary)
->>> print(read_expr(r'(\x  y.sees(x,y)(john))(mary)').simplify())
-sees(john,mary)
->>> print(read_expr(r'exists x.(man(x) & (\x.exists y.walks(x,y))(x))').simplify())
-exists x.(man(x) & exists y.walks(x,y))
->>> e1 = read_expr(r'exists x.(man(x) & (\x.exists y.walks(x,y))(y))').simplify()
->>> e2 = read_expr(r'exists x.(man(x) & exists z1.walks(y,z1))')
->>> e1 == e2
-True
->>> print(read_expr(r'(\P Q.exists x.(P(x) & Q(x)))(\x.dog(x))').simplify())
-\Q.exists x.(dog(x) & Q(x))
->>> print(read_expr(r'((\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x))').simplify())
-exists x.(dog(x) & bark(x))
->>> print(read_expr(r'\P.(P(x)(y))(\a b.Q(a,b))').simplify())
-Q(x,y)
-
-
-
-
-

Replace

-
-
->>> a = read_expr(r'a')
->>> x = read_expr(r'x')
->>> y = read_expr(r'y')
->>> z = read_expr(r'z')
-
-
->>> print(read_expr(r'man(x)').replace(x.variable, a, False))
-man(a)
->>> print(read_expr(r'(man(x) & tall(x))').replace(x.variable, a, False))
-(man(a) & tall(a))
->>> print(read_expr(r'exists x.man(x)').replace(x.variable, a, False))
-exists x.man(x)
->>> print(read_expr(r'exists x.man(x)').replace(x.variable, a, True))
-exists a.man(a)
->>> print(read_expr(r'exists x.give(x,y,z)').replace(y.variable, a, False))
-exists x.give(x,a,z)
->>> print(read_expr(r'exists x.give(x,y,z)').replace(y.variable, a, True))
-exists x.give(x,a,z)
->>> e1 = read_expr(r'exists x.give(x,y,z)').replace(y.variable, x, False)
->>> e2 = read_expr(r'exists z1.give(z1,x,z)')
->>> e1 == e2
-True
->>> e1 = read_expr(r'exists x.give(x,y,z)').replace(y.variable, x, True)
->>> e2 = read_expr(r'exists z1.give(z1,x,z)')
->>> e1 == e2
-True
->>> print(read_expr(r'\x y z.give(x,y,z)').replace(y.variable, a, False))
-\x y z.give(x,y,z)
->>> print(read_expr(r'\x y z.give(x,y,z)').replace(y.variable, a, True))
-\x a z.give(x,a,z)
->>> print(read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, a, False))
-\x y.give(x,y,a)
->>> print(read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, a, True))
-\x y.give(x,y,a)
->>> e1 = read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, x, False)
->>> e2 = read_expr(r'\z1.\y.give(z1,y,x)')
->>> e1 == e2
-True
->>> e1 = read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, x, True)
->>> e2 = read_expr(r'\z1.\y.give(z1,y,x)')
->>> e1 == e2
-True
->>> print(read_expr(r'\x.give(x,y,z)').replace(z.variable, y, False))
-\x.give(x,y,y)
->>> print(read_expr(r'\x.give(x,y,z)').replace(z.variable, y, True))
-\x.give(x,y,y)
-
-
->>> from nltk.sem import logic
->>> logic._counter._value = 0
->>> e1 = read_expr('e1')
->>> e2 = read_expr('e2')
->>> print(read_expr('exists e1 e2.(walk(e1) & talk(e2))').replace(e1.variable, e2, True))
-exists e2 e01.(walk(e2) & talk(e01))
-
-
-
-
-

Variables / Free

-
-
->>> examples = [r'walk(john)',
-...             r'walk(x)',
-...             r'?vp(?np)',
-...             r'see(john,mary)',
-...             r'exists x.walk(x)',
-...             r'\x.see(john,x)',
-...             r'\x.see(john,x)(mary)',
-...             r'P(x)',
-...             r'\P.P(x)',
-...             r'aa(x,bb(y),cc(z),P(w),u)',
-...             r'bo(?det(?n),@x)']
->>> examples = [read_expr(e) for e in examples]
-
-
->>> for e in examples:
-...     print('%-25s' % e, sorted(e.free()))
-walk(john)                []
-walk(x)                   [Variable('x')]
-?vp(?np)                  []
-see(john,mary)            []
-exists x.walk(x)          []
-\x.see(john,x)            []
-(\x.see(john,x))(mary)    []
-P(x)                      [Variable('P'), Variable('x')]
-\P.P(x)                   [Variable('x')]
-aa(x,bb(y),cc(z),P(w),u)  [Variable('P'), Variable('u'), Variable('w'), Variable('x'), Variable('y'), Variable('z')]
-bo(?det(?n),@x)           []
-
-
->>> for e in examples:
-...     print('%-25s' % e, sorted(e.constants()))
-walk(john)                [Variable('john')]
-walk(x)                   []
-?vp(?np)                  [Variable('?np')]
-see(john,mary)            [Variable('john'), Variable('mary')]
-exists x.walk(x)          []
-\x.see(john,x)            [Variable('john')]
-(\x.see(john,x))(mary)    [Variable('john'), Variable('mary')]
-P(x)                      []
-\P.P(x)                   []
-aa(x,bb(y),cc(z),P(w),u)  []
-bo(?det(?n),@x)           [Variable('?n'), Variable('@x')]
-
-
->>> for e in examples:
-...     print('%-25s' % e, sorted(e.predicates()))
-walk(john)                [Variable('walk')]
-walk(x)                   [Variable('walk')]
-?vp(?np)                  [Variable('?vp')]
-see(john,mary)            [Variable('see')]
-exists x.walk(x)          [Variable('walk')]
-\x.see(john,x)            [Variable('see')]
-(\x.see(john,x))(mary)    [Variable('see')]
-P(x)                      []
-\P.P(x)                   []
-aa(x,bb(y),cc(z),P(w),u)  [Variable('aa'), Variable('bb'), Variable('cc')]
-bo(?det(?n),@x)           [Variable('?det'), Variable('bo')]
-
-
->>> for e in examples:
-...     print('%-25s' % e, sorted(e.variables()))
-walk(john)                []
-walk(x)                   [Variable('x')]
-?vp(?np)                  [Variable('?np'), Variable('?vp')]
-see(john,mary)            []
-exists x.walk(x)          []
-\x.see(john,x)            []
-(\x.see(john,x))(mary)    []
-P(x)                      [Variable('P'), Variable('x')]
-\P.P(x)                   [Variable('x')]
-aa(x,bb(y),cc(z),P(w),u)  [Variable('P'), Variable('u'), Variable('w'), Variable('x'), Variable('y'), Variable('z')]
-bo(?det(?n),@x)           [Variable('?det'), Variable('?n'), Variable('@x')]
-
-
-
-
normalize
-
->>> print(read_expr(r'\e083.(walk(e083, z472) & talk(e092, z938))').normalize())
-\e01.(walk(e01,z3) & talk(e02,z4))
-
+
>>> x1 = read_expr(r'\P.P(mia)(\x.walk(x))').simplify()
+>>> x2 = read_expr(r'walk(mia)').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'exists x.(man(x) & ((\P.exists x.(woman(x) & P(x)))(\y.love(x,y))))').simplify()
+>>> x2 = read_expr(r'exists x.(man(x) & exists y.(woman(y) & love(x,y)))').simplify()
+>>> x1 == x2
+True
+>>> x1 = read_expr(r'\a.sleep(a)(mia)').simplify()
+>>> x2 = read_expr(r'sleep(mia)').simplify()
+>>> x1 == x2
+True
+>>> x1 = read_expr(r'\a.\b.like(b,a)(mia)').simplify()
+>>> x2 = read_expr(r'\b.like(b,mia)').simplify()
+>>> x1 == x2
+True
+>>> x1 = read_expr(r'\a.(\b.like(b,a)(vincent))').simplify()
+>>> x2 = read_expr(r'\a.like(vincent,a)').simplify()
+>>> x1 == x2
+True
+>>> x1 = read_expr(r'\a.((\b.like(b,a)(vincent)) & sleep(a))').simplify()
+>>> x2 = read_expr(r'\a.(like(vincent,a) & sleep(a))').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'(\a.\b.like(b,a)(mia)(vincent))').simplify()
+>>> x2 = read_expr(r'like(vincent,mia)').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'P((\a.sleep(a)(vincent)))').simplify()
+>>> x2 = read_expr(r'P(sleep(vincent))').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'\A.A((\b.sleep(b)(vincent)))').simplify()
+>>> x2 = read_expr(r'\A.A(sleep(vincent))').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'\A.A(sleep(vincent))').simplify()
+>>> x2 = read_expr(r'\A.A(sleep(vincent))').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'(\A.A(vincent)(\b.sleep(b)))').simplify()
+>>> x2 = read_expr(r'sleep(vincent)').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'\A.believe(mia,A(vincent))(\b.sleep(b))').simplify()
+>>> x2 = read_expr(r'believe(mia,sleep(vincent))').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'(\A.(A(vincent) & A(mia)))(\b.sleep(b))').simplify()
+>>> x2 = read_expr(r'(sleep(vincent) & sleep(mia))').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'\A.\B.(\C.C(A(vincent))(\d.probably(d)) & (\C.C(B(mia))(\d.improbably(d))))(\f.walk(f))(\f.talk(f))').simplify()
+>>> x2 = read_expr(r'(probably(walk(vincent)) & improbably(talk(mia)))').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'(\a.\b.(\C.C(a,b)(\d.\f.love(d,f))))(jules)(mia)').simplify()
+>>> x2 = read_expr(r'love(jules,mia)').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'(\A.\B.exists c.(A(c) & B(c)))(\d.boxer(d),\d.sleep(d))').simplify()
+>>> x2 = read_expr(r'exists c.(boxer(c) & sleep(c))').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'\A.Z(A)(\c.\a.like(a,c))').simplify()
+>>> x2 = read_expr(r'Z(\c.\a.like(a,c))').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'\A.\b.A(b)(\c.\b.like(b,c))').simplify()
+>>> x2 = read_expr(r'\b.(\c.\b.like(b,c)(b))').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'(\a.\b.(\C.C(a,b)(\b.\a.loves(b,a))))(jules)(mia)').simplify()
+>>> x2 = read_expr(r'loves(jules,mia)').simplify()
+>>> x1 == x2
+True
+
+
+
>>> x1 = read_expr(r'(\A.\b.(exists b.A(b) & A(b)))(\c.boxer(c))(vincent)').simplify()
+>>> x2 = read_expr(r'((exists b.boxer(b)) & boxer(vincent))').simplify()
+>>> x1 == x2
+True
+
+
+
+
+
+
Test Parser
+
>>> print(read_expr(r'john'))
+john
+>>> print(read_expr(r'x'))
+x
+>>> print(read_expr(r'-man(x)'))
+-man(x)
+>>> print(read_expr(r'--man(x)'))
+--man(x)
+>>> print(read_expr(r'(man(x))'))
+man(x)
+>>> print(read_expr(r'((man(x)))'))
+man(x)
+>>> print(read_expr(r'man(x) <-> tall(x)'))
+(man(x) <-> tall(x))
+>>> print(read_expr(r'(man(x) <-> tall(x))'))
+(man(x) <-> tall(x))
+>>> print(read_expr(r'(man(x) & tall(x) & walks(x))'))
+(man(x) & tall(x) & walks(x))
+>>> print(read_expr(r'(man(x) & tall(x) & walks(x))').first)
+(man(x) & tall(x))
+>>> print(read_expr(r'man(x) | tall(x) & walks(x)'))
+(man(x) | (tall(x) & walks(x)))
+>>> print(read_expr(r'((man(x) & tall(x)) | walks(x))'))
+((man(x) & tall(x)) | walks(x))
+>>> print(read_expr(r'man(x) & (tall(x) | walks(x))'))
+(man(x) & (tall(x) | walks(x)))
+>>> print(read_expr(r'(man(x) & (tall(x) | walks(x)))'))
+(man(x) & (tall(x) | walks(x)))
+>>> print(read_expr(r'P(x) -> Q(x) <-> R(x) | S(x) & T(x)'))
+((P(x) -> Q(x)) <-> (R(x) | (S(x) & T(x))))
+>>> print(read_expr(r'exists x.man(x)'))
+exists x.man(x)
+>>> print(read_expr(r'exists x.(man(x) & tall(x))'))
+exists x.(man(x) & tall(x))
+>>> print(read_expr(r'exists x.(man(x) & tall(x) & walks(x))'))
+exists x.(man(x) & tall(x) & walks(x))
+>>> print(read_expr(r'-P(x) & Q(x)'))
+(-P(x) & Q(x))
+>>> read_expr(r'-P(x) & Q(x)') == read_expr(r'(-P(x)) & Q(x)')
+True
+>>> print(read_expr(r'\x.man(x)'))
+\x.man(x)
+>>> print(read_expr(r'\x.man(x)(john)'))
+\x.man(x)(john)
+>>> print(read_expr(r'\x.man(x)(john) & tall(x)'))
+(\x.man(x)(john) & tall(x))
+>>> print(read_expr(r'\x.\y.sees(x,y)'))
+\x y.sees(x,y)
+>>> print(read_expr(r'\x  y.sees(x,y)'))
+\x y.sees(x,y)
+>>> print(read_expr(r'\x.\y.sees(x,y)(a)'))
+(\x y.sees(x,y))(a)
+>>> print(read_expr(r'\x  y.sees(x,y)(a)'))
+(\x y.sees(x,y))(a)
+>>> print(read_expr(r'\x.\y.sees(x,y)(a)(b)'))
+((\x y.sees(x,y))(a))(b)
+>>> print(read_expr(r'\x  y.sees(x,y)(a)(b)'))
+((\x y.sees(x,y))(a))(b)
+>>> print(read_expr(r'\x.\y.sees(x,y)(a,b)'))
+((\x y.sees(x,y))(a))(b)
+>>> print(read_expr(r'\x  y.sees(x,y)(a,b)'))
+((\x y.sees(x,y))(a))(b)
+>>> print(read_expr(r'((\x.\y.sees(x,y))(a))(b)'))
+((\x y.sees(x,y))(a))(b)
+>>> print(read_expr(r'P(x)(y)(z)'))
+P(x,y,z)
+>>> print(read_expr(r'P(Q)'))
+P(Q)
+>>> print(read_expr(r'P(Q(x))'))
+P(Q(x))
+>>> print(read_expr(r'(\x.exists y.walks(x,y))(x)'))
+(\x.exists y.walks(x,y))(x)
+>>> print(read_expr(r'exists x.(x = john)'))
+exists x.(x = john)
+>>> print(read_expr(r'((\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x))'))
+((\P Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x))
+>>> a = read_expr(r'exists c.exists b.A(b,c) & A(b,c)')
+>>> b = read_expr(r'(exists c.(exists b.A(b,c))) & A(b,c)')
+>>> print(a == b)
+True
+>>> a = read_expr(r'exists c.(exists b.A(b,c) & A(b,c))')
+>>> b = read_expr(r'exists c.((exists b.A(b,c)) & A(b,c))')
+>>> print(a == b)
+True
+>>> print(read_expr(r'exists x.x = y'))
+exists x.(x = y)
+>>> print(read_expr('A(B)(C)'))
+A(B,C)
+>>> print(read_expr('(A(B))(C)'))
+A(B,C)
+>>> print(read_expr('A((B)(C))'))
+A(B(C))
+>>> print(read_expr('A(B(C))'))
+A(B(C))
+>>> print(read_expr('(A)(B(C))'))
+A(B(C))
+>>> print(read_expr('(((A)))(((B))(((C))))'))
+A(B(C))
+>>> print(read_expr(r'A != B'))
+-(A = B)
+>>> print(read_expr('P(x) & x=y & P(y)'))
+(P(x) & (x = y) & P(y))
+>>> try: print(read_expr(r'\walk.walk(x)'))
+... except LogicalExpressionException as e: print(e)
+'walk' is an illegal variable name.  Constants may not be abstracted.
+\walk.walk(x)
+ ^
+>>> try: print(read_expr(r'all walk.walk(john)'))
+... except LogicalExpressionException as e: print(e)
+'walk' is an illegal variable name.  Constants may not be quantified.
+all walk.walk(john)
+    ^
+>>> try: print(read_expr(r'x(john)'))
+... except LogicalExpressionException as e: print(e)
+'x' is an illegal predicate name.  Individual variables may not be used as predicates.
+x(john)
+^
+
+
+
>>> from nltk.sem.logic import LogicParser # hack to give access to custom quote chars
+>>> lpq = LogicParser()
+>>> lpq.quote_chars = [("'", "'", "\\", False)]
+>>> print(lpq.parse(r"(man(x) & 'tall\'s,' (x) & walks (x) )"))
+(man(x) & tall's,(x) & walks(x))
+>>> lpq.quote_chars = [("'", "'", "\\", True)]
+>>> print(lpq.parse(r"'tall\'s,'"))
+'tall\'s,'
+>>> print(lpq.parse(r"'spaced name(x)'"))
+'spaced name(x)'
+>>> print(lpq.parse(r"-'tall\'s,'(x)"))
+-'tall\'s,'(x)
+>>> print(lpq.parse(r"(man(x) & 'tall\'s,' (x) & walks (x) )"))
+(man(x) & 'tall\'s,'(x) & walks(x))
+
+
+
+
+
Simplify
+
>>> print(read_expr(r'\x.man(x)(john)').simplify())
+man(john)
+>>> print(read_expr(r'\x.((man(x)))(john)').simplify())
+man(john)
+>>> print(read_expr(r'\x.\y.sees(x,y)(john, mary)').simplify())
+sees(john,mary)
+>>> print(read_expr(r'\x  y.sees(x,y)(john, mary)').simplify())
+sees(john,mary)
+>>> print(read_expr(r'\x.\y.sees(x,y)(john)(mary)').simplify())
+sees(john,mary)
+>>> print(read_expr(r'\x  y.sees(x,y)(john)(mary)').simplify())
+sees(john,mary)
+>>> print(read_expr(r'\x.\y.sees(x,y)(john)').simplify())
+\y.sees(john,y)
+>>> print(read_expr(r'\x  y.sees(x,y)(john)').simplify())
+\y.sees(john,y)
+>>> print(read_expr(r'(\x.\y.sees(x,y)(john))(mary)').simplify())
+sees(john,mary)
+>>> print(read_expr(r'(\x  y.sees(x,y)(john))(mary)').simplify())
+sees(john,mary)
+>>> print(read_expr(r'exists x.(man(x) & (\x.exists y.walks(x,y))(x))').simplify())
+exists x.(man(x) & exists y.walks(x,y))
+>>> e1 = read_expr(r'exists x.(man(x) & (\x.exists y.walks(x,y))(y))').simplify()
+>>> e2 = read_expr(r'exists x.(man(x) & exists z1.walks(y,z1))')
+>>> e1 == e2
+True
+>>> print(read_expr(r'(\P Q.exists x.(P(x) & Q(x)))(\x.dog(x))').simplify())
+\Q.exists x.(dog(x) & Q(x))
+>>> print(read_expr(r'((\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x))').simplify())
+exists x.(dog(x) & bark(x))
+>>> print(read_expr(r'\P.(P(x)(y))(\a b.Q(a,b))').simplify())
+Q(x,y)
+
+
+
+
+
Replace
+
>>> a = read_expr(r'a')
+>>> x = read_expr(r'x')
+>>> y = read_expr(r'y')
+>>> z = read_expr(r'z')
+
+
+
>>> print(read_expr(r'man(x)').replace(x.variable, a, False))
+man(a)
+>>> print(read_expr(r'(man(x) & tall(x))').replace(x.variable, a, False))
+(man(a) & tall(a))
+>>> print(read_expr(r'exists x.man(x)').replace(x.variable, a, False))
+exists x.man(x)
+>>> print(read_expr(r'exists x.man(x)').replace(x.variable, a, True))
+exists a.man(a)
+>>> print(read_expr(r'exists x.give(x,y,z)').replace(y.variable, a, False))
+exists x.give(x,a,z)
+>>> print(read_expr(r'exists x.give(x,y,z)').replace(y.variable, a, True))
+exists x.give(x,a,z)
+>>> e1 = read_expr(r'exists x.give(x,y,z)').replace(y.variable, x, False)
+>>> e2 = read_expr(r'exists z1.give(z1,x,z)')
+>>> e1 == e2
+True
+>>> e1 = read_expr(r'exists x.give(x,y,z)').replace(y.variable, x, True)
+>>> e2 = read_expr(r'exists z1.give(z1,x,z)')
+>>> e1 == e2
+True
+>>> print(read_expr(r'\x y z.give(x,y,z)').replace(y.variable, a, False))
+\x y z.give(x,y,z)
+>>> print(read_expr(r'\x y z.give(x,y,z)').replace(y.variable, a, True))
+\x a z.give(x,a,z)
+>>> print(read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, a, False))
+\x y.give(x,y,a)
+>>> print(read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, a, True))
+\x y.give(x,y,a)
+>>> e1 = read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, x, False)
+>>> e2 = read_expr(r'\z1.\y.give(z1,y,x)')
+>>> e1 == e2
+True
+>>> e1 = read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, x, True)
+>>> e2 = read_expr(r'\z1.\y.give(z1,y,x)')
+>>> e1 == e2
+True
+>>> print(read_expr(r'\x.give(x,y,z)').replace(z.variable, y, False))
+\x.give(x,y,y)
+>>> print(read_expr(r'\x.give(x,y,z)').replace(z.variable, y, True))
+\x.give(x,y,y)
+
+
+
>>> from nltk.sem import logic
+>>> logic._counter._value = 0
+>>> e1 = read_expr('e1')
+>>> e2 = read_expr('e2')
+>>> print(read_expr('exists e1 e2.(walk(e1) & talk(e2))').replace(e1.variable, e2, True))
+exists e2 e01.(walk(e2) & talk(e01))
+
+
+
+
+
Variables / Free
+
>>> examples = [r'walk(john)',
+...             r'walk(x)',
+...             r'?vp(?np)',
+...             r'see(john,mary)',
+...             r'exists x.walk(x)',
+...             r'\x.see(john,x)',
+...             r'\x.see(john,x)(mary)',
+...             r'P(x)',
+...             r'\P.P(x)',
+...             r'aa(x,bb(y),cc(z),P(w),u)',
+...             r'bo(?det(?n),@x)']
+>>> examples = [read_expr(e) for e in examples]
+
+
+
>>> for e in examples:
+...     print('%-25s' % e, sorted(e.free()))
+walk(john)                []
+walk(x)                   [Variable('x')]
+?vp(?np)                  []
+see(john,mary)            []
+exists x.walk(x)          []
+\x.see(john,x)            []
+(\x.see(john,x))(mary)    []
+P(x)                      [Variable('P'), Variable('x')]
+\P.P(x)                   [Variable('x')]
+aa(x,bb(y),cc(z),P(w),u)  [Variable('P'), Variable('u'), Variable('w'), Variable('x'), Variable('y'), Variable('z')]
+bo(?det(?n),@x)           []
+
+
+
>>> for e in examples:
+...     print('%-25s' % e, sorted(e.constants()))
+walk(john)                [Variable('john')]
+walk(x)                   []
+?vp(?np)                  [Variable('?np')]
+see(john,mary)            [Variable('john'), Variable('mary')]
+exists x.walk(x)          []
+\x.see(john,x)            [Variable('john')]
+(\x.see(john,x))(mary)    [Variable('john'), Variable('mary')]
+P(x)                      []
+\P.P(x)                   []
+aa(x,bb(y),cc(z),P(w),u)  []
+bo(?det(?n),@x)           [Variable('?n'), Variable('@x')]
+
+
+
>>> for e in examples:
+...     print('%-25s' % e, sorted(e.predicates()))
+walk(john)                [Variable('walk')]
+walk(x)                   [Variable('walk')]
+?vp(?np)                  [Variable('?vp')]
+see(john,mary)            [Variable('see')]
+exists x.walk(x)          [Variable('walk')]
+\x.see(john,x)            [Variable('see')]
+(\x.see(john,x))(mary)    [Variable('see')]
+P(x)                      []
+\P.P(x)                   []
+aa(x,bb(y),cc(z),P(w),u)  [Variable('aa'), Variable('bb'), Variable('cc')]
+bo(?det(?n),@x)           [Variable('?det'), Variable('bo')]
+
+
+
>>> for e in examples:
+...     print('%-25s' % e, sorted(e.variables()))
+walk(john)                []
+walk(x)                   [Variable('x')]
+?vp(?np)                  [Variable('?np'), Variable('?vp')]
+see(john,mary)            []
+exists x.walk(x)          []
+\x.see(john,x)            []
+(\x.see(john,x))(mary)    []
+P(x)                      [Variable('P'), Variable('x')]
+\P.P(x)                   [Variable('x')]
+aa(x,bb(y),cc(z),P(w),u)  [Variable('P'), Variable('u'), Variable('w'), Variable('x'), Variable('y'), Variable('z')]
+bo(?det(?n),@x)           [Variable('?det'), Variable('?n'), Variable('@x')]
+
+
+
+
normalize
>>> print(read_expr(r'\e083.(walk(e083, z472) & talk(e092, z938))').normalize())
+\e01.(walk(e01,z3) & talk(e02,z4))
+
+
+
+
+
+

Typed Logic

+
>>> from nltk.sem.logic import LogicParser
+>>> tlp = LogicParser(True)
+>>> print(tlp.parse(r'man(x)').type)
+?
+>>> print(tlp.parse(r'walk(angus)').type)
+?
+>>> print(tlp.parse(r'-man(x)').type)
+t
+>>> print(tlp.parse(r'(man(x) <-> tall(x))').type)
+t
+>>> print(tlp.parse(r'exists x.(man(x) & tall(x))').type)
+t
+>>> print(tlp.parse(r'\x.man(x)').type)
+<e,?>
+>>> print(tlp.parse(r'john').type)
+e
+>>> print(tlp.parse(r'\x y.sees(x,y)').type)
+<e,<e,?>>
+>>> print(tlp.parse(r'\x.man(x)(john)').type)
+?
+>>> print(tlp.parse(r'\x.\y.sees(x,y)(john)').type)
+<e,?>
+>>> print(tlp.parse(r'\x.\y.sees(x,y)(john)(mary)').type)
+?
+>>> print(tlp.parse(r'\P.\Q.exists x.(P(x) & Q(x))').type)
+<<e,t>,<<e,t>,t>>
+>>> print(tlp.parse(r'\x.y').type)
+<?,e>
+>>> print(tlp.parse(r'\P.P(x)').type)
+<<e,?>,?>
+
+
+
>>> parsed = tlp.parse('see(john,mary)')
+>>> print(parsed.type)
+?
+>>> print(parsed.function)
+see(john)
+>>> print(parsed.function.type)
+<e,?>
+>>> print(parsed.function.function)
+see
+>>> print(parsed.function.function.type)
+<e,<e,?>>
+
+
+
>>> parsed = tlp.parse('P(x,y)')
+>>> print(parsed)
+P(x,y)
+>>> print(parsed.type)
+?
+>>> print(parsed.function)
+P(x)
+>>> print(parsed.function.type)
+<e,?>
+>>> print(parsed.function.function)
+P
+>>> print(parsed.function.function.type)
+<e,<e,?>>
+
+
+
>>> print(tlp.parse(r'P').type)
+?
+
+
+
>>> print(tlp.parse(r'P', {'P': 't'}).type)
+t
+
+
+
>>> a = tlp.parse(r'P(x)')
+>>> print(a.type)
+?
+>>> print(a.function.type)
+<e,?>
+>>> print(a.argument.type)
+e
+
+
+
>>> a = tlp.parse(r'-P(x)')
+>>> print(a.type)
+t
+>>> print(a.term.type)
+t
+>>> print(a.term.function.type)
+<e,t>
+>>> print(a.term.argument.type)
+e
+
+
+
>>> a = tlp.parse(r'P & Q')
+>>> print(a.type)
+t
+>>> print(a.first.type)
+t
+>>> print(a.second.type)
+t
+
+
+
>>> a = tlp.parse(r'(P(x) & Q(x))')
+>>> print(a.type)
+t
+>>> print(a.first.type)
+t
+>>> print(a.first.function.type)
+<e,t>
+>>> print(a.first.argument.type)
+e
+>>> print(a.second.type)
+t
+>>> print(a.second.function.type)
+<e,t>
+>>> print(a.second.argument.type)
+e
+
+
+
>>> a = tlp.parse(r'\x.P(x)')
+>>> print(a.type)
+<e,?>
+>>> print(a.term.function.type)
+<e,?>
+>>> print(a.term.argument.type)
+e
+
+
+
>>> a = tlp.parse(r'\P.P(x)')
+>>> print(a.type)
+<<e,?>,?>
+>>> print(a.term.function.type)
+<e,?>
+>>> print(a.term.argument.type)
+e
+
+
+
>>> a = tlp.parse(r'(\x.P(x)(john)) & Q(x)')
+>>> print(a.type)
+t
+>>> print(a.first.type)
+t
+>>> print(a.first.function.type)
+<e,t>
+>>> print(a.first.function.term.function.type)
+<e,t>
+>>> print(a.first.function.term.argument.type)
+e
+>>> print(a.first.argument.type)
+e
+
+
+
>>> a = tlp.parse(r'\x y.P(x,y)(john)(mary) & Q(x)')
+>>> print(a.type)
+t
+>>> print(a.first.type)
+t
+>>> print(a.first.function.type)
+<e,t>
+>>> print(a.first.function.function.type)
+<e,<e,t>>
+
+
>>> a = tlp.parse(r'--P')
+>>> print(a.type)
+t
+>>> print(a.term.type)
+t
+>>> print(a.term.term.type)
+t
+
+
+
>>> tlp.parse(r'\x y.P(x,y)').type
+<e,<e,?>>
+>>> tlp.parse(r'\x y.P(x,y)', {'P': '<e,<e,t>>'}).type
+<e,<e,t>>
+
+
+
>>> a = tlp.parse(r'\P y.P(john,y)(\x y.see(x,y))')
+>>> a.type
+<e,?>
+>>> a.function.type
+<<e,<e,?>>,<e,?>>
+>>> a.function.term.term.function.function.type
+<e,<e,?>>
+>>> a.argument.type
+<e,<e,?>>
+
+
+
>>> a = tlp.parse(r'exists c f.(father(c) = f)')
+>>> a.type
+t
+>>> a.term.term.type
+t
+>>> a.term.term.first.type
+e
+>>> a.term.term.first.function.type
+<e,e>
+>>> a.term.term.second.type
+e
+
-
-

Typed Logic

-
-
->>> from nltk.sem.logic import LogicParser
->>> tlp = LogicParser(True)
->>> print(tlp.parse(r'man(x)').type)
-?
->>> print(tlp.parse(r'walk(angus)').type)
-?
->>> print(tlp.parse(r'-man(x)').type)
-t
->>> print(tlp.parse(r'(man(x) <-> tall(x))').type)
-t
->>> print(tlp.parse(r'exists x.(man(x) & tall(x))').type)
-t
->>> print(tlp.parse(r'\x.man(x)').type)
-<e,?>
->>> print(tlp.parse(r'john').type)
-e
->>> print(tlp.parse(r'\x y.sees(x,y)').type)
-<e,<e,?>>
->>> print(tlp.parse(r'\x.man(x)(john)').type)
-?
->>> print(tlp.parse(r'\x.\y.sees(x,y)(john)').type)
-<e,?>
->>> print(tlp.parse(r'\x.\y.sees(x,y)(john)(mary)').type)
-?
->>> print(tlp.parse(r'\P.\Q.exists x.(P(x) & Q(x))').type)
-<<e,t>,<<e,t>,t>>
->>> print(tlp.parse(r'\x.y').type)
-<?,e>
->>> print(tlp.parse(r'\P.P(x)').type)
-<<e,?>,?>
-
-
->>> parsed = tlp.parse('see(john,mary)')
->>> print(parsed.type)
-?
->>> print(parsed.function)
-see(john)
->>> print(parsed.function.type)
-<e,?>
->>> print(parsed.function.function)
-see
->>> print(parsed.function.function.type)
-<e,<e,?>>
-
-
->>> parsed = tlp.parse('P(x,y)')
->>> print(parsed)
-P(x,y)
->>> print(parsed.type)
-?
->>> print(parsed.function)
-P(x)
->>> print(parsed.function.type)
-<e,?>
->>> print(parsed.function.function)
-P
->>> print(parsed.function.function.type)
-<e,<e,?>>
-
-
->>> print(tlp.parse(r'P').type)
-?
-
-
->>> print(tlp.parse(r'P', {'P': 't'}).type)
-t
-
-
->>> a = tlp.parse(r'P(x)')
->>> print(a.type)
-?
->>> print(a.function.type)
-<e,?>
->>> print(a.argument.type)
-e
-
-
->>> a = tlp.parse(r'-P(x)')
->>> print(a.type)
-t
->>> print(a.term.type)
-t
->>> print(a.term.function.type)
-<e,t>
->>> print(a.term.argument.type)
-e
-
-
->>> a = tlp.parse(r'P & Q')
->>> print(a.type)
-t
->>> print(a.first.type)
-t
->>> print(a.second.type)
-t
-
-
->>> a = tlp.parse(r'(P(x) & Q(x))')
->>> print(a.type)
-t
->>> print(a.first.type)
-t
->>> print(a.first.function.type)
-<e,t>
->>> print(a.first.argument.type)
-e
->>> print(a.second.type)
-t
->>> print(a.second.function.type)
-<e,t>
->>> print(a.second.argument.type)
-e
-
-
->>> a = tlp.parse(r'\x.P(x)')
->>> print(a.type)
-<e,?>
->>> print(a.term.function.type)
-<e,?>
->>> print(a.term.argument.type)
-e
-
-
->>> a = tlp.parse(r'\P.P(x)')
->>> print(a.type)
-<<e,?>,?>
->>> print(a.term.function.type)
-<e,?>
->>> print(a.term.argument.type)
-e
-
-
->>> a = tlp.parse(r'(\x.P(x)(john)) & Q(x)')
->>> print(a.type)
-t
->>> print(a.first.type)
-t
->>> print(a.first.function.type)
-<e,t>
->>> print(a.first.function.term.function.type)
-<e,t>
->>> print(a.first.function.term.argument.type)
-e
->>> print(a.first.argument.type)
-e
-
-
->>> a = tlp.parse(r'\x y.P(x,y)(john)(mary) & Q(x)')
->>> print(a.type)
-t
->>> print(a.first.type)
-t
->>> print(a.first.function.type)
-<e,t>
->>> print(a.first.function.function.type)
-<e,<e,t>>
-
-
->>> a = tlp.parse(r'--P')
->>> print(a.type)
-t
->>> print(a.term.type)
-t
->>> print(a.term.term.type)
-t
-
-
->>> tlp.parse(r'\x y.P(x,y)').type
-<e,<e,?>>
->>> tlp.parse(r'\x y.P(x,y)', {'P': '<e,<e,t>>'}).type
-<e,<e,t>>
-
-
->>> a = tlp.parse(r'\P y.P(john,y)(\x y.see(x,y))')
->>> a.type
-<e,?>
->>> a.function.type
-<<e,<e,?>>,<e,?>>
->>> a.function.term.term.function.function.type
-<e,<e,?>>
->>> a.argument.type
-<e,<e,?>>
-
-
->>> a = tlp.parse(r'exists c f.(father(c) = f)')
->>> a.type
-t
->>> a.term.term.type
-t
->>> a.term.term.first.type
-e
->>> a.term.term.first.function.type
-<e,e>
->>> a.term.term.second.type
-e
-
-

typecheck()

-
-
->>> a = tlp.parse('P(x)')
->>> b = tlp.parse('Q(x)')
->>> a.type
-?
->>> c = a & b
->>> c.first.type
-?
->>> c.typecheck() # doctest: +ELLIPSIS
-{...}
->>> c.first.type
-t
-
-
->>> a = tlp.parse('P(x)')
->>> b = tlp.parse('P(x) & Q(x)')
->>> a.type
-?
->>> typecheck([a,b]) # doctest: +ELLIPSIS
-{...}
->>> a.type
-t
-
-
->>> e = tlp.parse(r'man(x)')
->>> print(dict((k,str(v)) for k,v in e.typecheck().items()) == {'x': 'e', 'man': '<e,?>'})
-True
->>> sig = {'man': '<e, t>'}
->>> e = tlp.parse(r'man(x)', sig)
->>> print(e.function.type)
-<e,t>
->>> print(dict((k,str(v)) for k,v in e.typecheck().items()) == {'x': 'e', 'man': '<e,t>'})
-True
->>> print(e.function.type)
-<e,t>
->>> print(dict((k,str(v)) for k,v in e.typecheck(sig).items()) == {'x': 'e', 'man': '<e,t>'})
-True
-
-
+
>>> a = tlp.parse('P(x)')
+>>> b = tlp.parse('Q(x)')
+>>> a.type
+?
+>>> c = a & b
+>>> c.first.type
+?
+>>> c.typecheck()
+{...}
+>>> c.first.type
+t
+
+
+
>>> a = tlp.parse('P(x)')
+>>> b = tlp.parse('P(x) & Q(x)')
+>>> a.type
+?
+>>> typecheck([a,b])
+{...}
+>>> a.type
+t
+
+
+
>>> e = tlp.parse(r'man(x)')
+>>> print(dict((k,str(v)) for k,v in e.typecheck().items()) == {'x': 'e', 'man': '<e,?>'})
+True
+>>> sig = {'man': '<e, t>'}
+>>> e = tlp.parse(r'man(x)', sig)
+>>> print(e.function.type)
+<e,t>
+>>> print(dict((k,str(v)) for k,v in e.typecheck().items()) == {'x': 'e', 'man': '<e,t>'})
+True
+>>> print(e.function.type)
+<e,t>
+>>> print(dict((k,str(v)) for k,v in e.typecheck(sig).items()) == {'x': 'e', 'man': '<e,t>'})
+True
+
+

findtype()

-
-
->>> print(tlp.parse(r'man(x)').findtype(Variable('man')))
-<e,?>
->>> print(tlp.parse(r'see(x,y)').findtype(Variable('see')))
-<e,<e,?>>
->>> print(tlp.parse(r'P(Q(R(x)))').findtype(Variable('Q')))
-?
-
-
+
>>> print(tlp.parse(r'man(x)').findtype(Variable('man')))
+<e,?>
+>>> print(tlp.parse(r'see(x,y)').findtype(Variable('see')))
+<e,<e,?>>
+>>> print(tlp.parse(r'P(Q(R(x)))').findtype(Variable('Q')))
+?
+
+

reading types from strings

-
-
->>> Type.fromstring('e')
-e
->>> Type.fromstring('<e,t>')
-<e,t>
->>> Type.fromstring('<<e,t>,<e,t>>')
-<<e,t>,<e,t>>
->>> Type.fromstring('<<e,?>,?>')
-<<e,?>,?>
-
-
+
>>> Type.fromstring('e')
+e
+>>> Type.fromstring('<e,t>')
+<e,t>
+>>> Type.fromstring('<<e,t>,<e,t>>')
+<<e,t>,<e,t>>
+>>> Type.fromstring('<<e,?>,?>')
+<<e,?>,?>
+
+

alternative type format

-
-
->>> Type.fromstring('e').str()
-'IND'
->>> Type.fromstring('<e,?>').str()
-'(IND -> ANY)'
->>> Type.fromstring('<<e,t>,t>').str()
-'((IND -> BOOL) -> BOOL)'
-
-
+
>>> Type.fromstring('e').str()
+'IND'
+>>> Type.fromstring('<e,?>').str()
+'(IND -> ANY)'
+>>> Type.fromstring('<<e,t>,t>').str()
+'((IND -> BOOL) -> BOOL)'
+
+

Type.__eq__()

-
-
->>> from nltk.sem.logic import *
-
-
->>> e = ENTITY_TYPE
->>> t = TRUTH_TYPE
->>> a = ANY_TYPE
->>> et = ComplexType(e,t)
->>> eet = ComplexType(e,ComplexType(e,t))
->>> at = ComplexType(a,t)
->>> ea = ComplexType(e,a)
->>> aa = ComplexType(a,a)
-
-
->>> e == e
-True
->>> t == t
-True
->>> e == t
-False
->>> a == t
-False
->>> t == a
-False
->>> a == a
-True
->>> et == et
-True
->>> a == et
-False
->>> et == a
-False
->>> a == ComplexType(a,aa)
-True
->>> ComplexType(a,aa) == a
-True
-
-
+
>>> from nltk.sem.logic import *
+
+
+
>>> e = ENTITY_TYPE
+>>> t = TRUTH_TYPE
+>>> a = ANY_TYPE
+>>> et = ComplexType(e,t)
+>>> eet = ComplexType(e,ComplexType(e,t))
+>>> at = ComplexType(a,t)
+>>> ea = ComplexType(e,a)
+>>> aa = ComplexType(a,a)
+
+
+
>>> e == e
+True
+>>> t == t
+True
+>>> e == t
+False
+>>> a == t
+False
+>>> t == a
+False
+>>> a == a
+True
+>>> et == et
+True
+>>> a == et
+False
+>>> et == a
+False
+>>> a == ComplexType(a,aa)
+True
+>>> ComplexType(a,aa) == a
+True
+
+

matches()

-
-
->>> e.matches(t)
-False
->>> a.matches(t)
-True
->>> t.matches(a)
-True
->>> a.matches(et)
-True
->>> et.matches(a)
-True
->>> ea.matches(eet)
-True
->>> eet.matches(ea)
-True
->>> aa.matches(et)
-True
->>> aa.matches(t)
-True
-
-
-
-

Type error during parsing

-
-
->>> try: print(tlp.parse(r'exists x y.(P(x) & P(x,y))'))
-... except InconsistentTypeHierarchyException as e: print(e)
-The variable 'P' was found in multiple places with different types.
->>> try: tlp.parse(r'\x y.see(x,y)(\x.man(x))')
-... except TypeException as e: print(e)
-The function '\x y.see(x,y)' is of type '<e,<e,?>>' and cannot be applied to '\x.man(x)' of type '<e,?>'.  Its argument must match type 'e'.
->>> try: tlp.parse(r'\P x y.-P(x,y)(\x.-man(x))')
-... except TypeException as e: print(e)
-The function '\P x y.-P(x,y)' is of type '<<e,<e,t>>,<e,<e,t>>>' and cannot be applied to '\x.-man(x)' of type '<e,t>'.  Its argument must match type '<e,<e,t>>'.
-
-
->>> a = tlp.parse(r'-talk(x)')
->>> signature = a.typecheck()
->>> try: print(tlp.parse(r'-talk(x,y)', signature))
-... except InconsistentTypeHierarchyException as e: print(e)
-The variable 'talk' was found in multiple places with different types.
-
-
->>> a = tlp.parse(r'-P(x)')
->>> b = tlp.parse(r'-P(x,y)')
->>> a.typecheck() # doctest: +ELLIPSIS
-{...}
->>> b.typecheck() # doctest: +ELLIPSIS
-{...}
->>> try: typecheck([a,b])
-... except InconsistentTypeHierarchyException as e: print(e)
-The variable 'P' was found in multiple places with different types.
-
-
->>> a = tlp.parse(r'P(x)')
->>> b = tlp.parse(r'P(x,y)')
->>> signature = {'P': '<e,t>'}
->>> a.typecheck(signature) # doctest: +ELLIPSIS
-{...}
->>> try: typecheck([a,b], signature)
-... except InconsistentTypeHierarchyException as e: print(e)
-The variable 'P' was found in multiple places with different types.
-
-
-
-
-

Parse errors

-
-
->>> try: read_expr(r'')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expression expected.
-<BLANKLINE>
-^
->>> try: read_expr(r'(')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expression expected.
-(
- ^
->>> try: read_expr(r')')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-)
-^
->>> try: read_expr(r'()')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-()
- ^
->>> try: read_expr(r'(P(x) & Q(x)')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expected token ')'.
-(P(x) & Q(x)
-            ^
->>> try: read_expr(r'(P(x) &')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expression expected.
-(P(x) &
-       ^
->>> try: read_expr(r'(P(x) | )')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-(P(x) | )
-        ^
->>> try: read_expr(r'P(x) ->')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expression expected.
-P(x) ->
-       ^
->>> try: read_expr(r'P(x')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expected token ')'.
-P(x
-   ^
->>> try: read_expr(r'P(x,')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expression expected.
-P(x,
-    ^
->>> try: read_expr(r'P(x,)')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-P(x,)
-    ^
->>> try: read_expr(r'exists')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Variable and Expression expected following quantifier 'exists'.
-exists
-       ^
->>> try: read_expr(r'exists x')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expression expected.
-exists x
-         ^
->>> try: read_expr(r'exists x.')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expression expected.
-exists x.
-         ^
->>> try: read_expr(r'\  ')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Variable and Expression expected following lambda operator.
-\
-  ^
->>> try: read_expr(r'\ x')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expression expected.
-\ x
-    ^
->>> try: read_expr(r'\ x y')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expression expected.
-\ x y
-      ^
->>> try: read_expr(r'\ x.')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expression expected.
-\ x.
-    ^
->>> try: read_expr(r'P(x)Q(x)')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: 'Q'.
-P(x)Q(x)
-    ^
->>> try: read_expr(r'(P(x)Q(x)')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: 'Q'.  Expected token ')'.
-(P(x)Q(x)
-     ^
->>> try: read_expr(r'exists x y')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expression expected.
-exists x y
-           ^
->>> try: read_expr(r'exists x y.')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expression expected.
-exists x y.
-           ^
->>> try: read_expr(r'exists x -> y')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: '->'.  Expression expected.
-exists x -> y
-         ^
-
-
->>> try: read_expr(r'A -> ((P(x) & Q(x)) -> Z')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expected token ')'.
-A -> ((P(x) & Q(x)) -> Z
-                        ^
->>> try: read_expr(r'A -> ((P(x) &) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-A -> ((P(x) &) -> Z
-             ^
->>> try: read_expr(r'A -> ((P(x) | )) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-A -> ((P(x) | )) -> Z
-              ^
->>> try: read_expr(r'A -> (P(x) ->) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-A -> (P(x) ->) -> Z
-             ^
->>> try: read_expr(r'A -> (P(x) -> Z')
-... except LogicalExpressionException as e: print(e)
-End of input found.  Expected token ')'.
-A -> (P(x) -> Z
-               ^
->>> try: read_expr(r'A -> (P(x,) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-A -> (P(x,) -> Z
-          ^
->>> try: read_expr(r'A -> (P(x,)) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-A -> (P(x,)) -> Z
-          ^
->>> try: read_expr(r'A -> (exists) -> Z')
-... except LogicalExpressionException as e: print(e)
-')' is an illegal variable name.  Constants may not be quantified.
-A -> (exists) -> Z
-            ^
->>> try: read_expr(r'A -> (exists x) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-A -> (exists x) -> Z
-              ^
->>> try: read_expr(r'A -> (exists x.) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-A -> (exists x.) -> Z
-               ^
->>> try: read_expr(r'A -> (\  ) -> Z')
-... except LogicalExpressionException as e: print(e)
-')' is an illegal variable name.  Constants may not be abstracted.
-A -> (\  ) -> Z
-         ^
->>> try: read_expr(r'A -> (\ x) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-A -> (\ x) -> Z
-         ^
->>> try: read_expr(r'A -> (\ x y) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-A -> (\ x y) -> Z
-           ^
->>> try: read_expr(r'A -> (\ x.) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-A -> (\ x.) -> Z
-          ^
->>> try: read_expr(r'A -> (P(x)Q(x)) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: 'Q'.  Expected token ')'.
-A -> (P(x)Q(x)) -> Z
-          ^
->>> try: read_expr(r'A -> ((P(x)Q(x)) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: 'Q'.  Expected token ')'.
-A -> ((P(x)Q(x)) -> Z
-           ^
->>> try: read_expr(r'A -> (all x y) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-A -> (all x y) -> Z
-             ^
->>> try: read_expr(r'A -> (exists x y.) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: ')'.  Expression expected.
-A -> (exists x y.) -> Z
-                 ^
->>> try: read_expr(r'A -> (exists x -> y) -> Z')
-... except LogicalExpressionException as e: print(e)
-Unexpected token: '->'.  Expression expected.
-A -> (exists x -> y) -> Z
-               ^
-
-
+
>>> e.matches(t)
+False
+>>> a.matches(t)
+True
+>>> t.matches(a)
+True
+>>> a.matches(et)
+True
+>>> et.matches(a)
+True
+>>> ea.matches(eet)
+True
+>>> eet.matches(ea)
+True
+>>> aa.matches(et)
+True
+>>> aa.matches(t)
+True
+
+
+
+
Type error during parsing
+
>>> try: print(tlp.parse(r'exists x y.(P(x) & P(x,y))'))
+... except InconsistentTypeHierarchyException as e: print(e)
+The variable 'P' was found in multiple places with different types.
+>>> try: tlp.parse(r'\x y.see(x,y)(\x.man(x))')
+... except TypeException as e: print(e)
+The function '\x y.see(x,y)' is of type '<e,<e,?>>' and cannot be applied to '\x.man(x)' of type '<e,?>'.  Its argument must match type 'e'.
+>>> try: tlp.parse(r'\P x y.-P(x,y)(\x.-man(x))')
+... except TypeException as e: print(e)
+The function '\P x y.-P(x,y)' is of type '<<e,<e,t>>,<e,<e,t>>>' and cannot be applied to '\x.-man(x)' of type '<e,t>'.  Its argument must match type '<e,<e,t>>'.
+
+
+
>>> a = tlp.parse(r'-talk(x)')
+>>> signature = a.typecheck()
+>>> try: print(tlp.parse(r'-talk(x,y)', signature))
+... except InconsistentTypeHierarchyException as e: print(e)
+The variable 'talk' was found in multiple places with different types.
+
+
>>> a = tlp.parse(r'-P(x)')
+>>> b = tlp.parse(r'-P(x,y)')
+>>> a.typecheck()
+{...}
+>>> b.typecheck()
+{...}
+>>> try: typecheck([a,b])
+... except InconsistentTypeHierarchyException as e: print(e)
+The variable 'P' was found in multiple places with different types.
+
+
>>> a = tlp.parse(r'P(x)')
+>>> b = tlp.parse(r'P(x,y)')
+>>> signature = {'P': '<e,t>'}
+>>> a.typecheck(signature)
+{...}
+>>> try: typecheck([a,b], signature)
+... except InconsistentTypeHierarchyException as e: print(e)
+The variable 'P' was found in multiple places with different types.
+
+
+
+
Parse errors
+
>>> try: read_expr(r'')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expression expected.
+
+^
+>>> try: read_expr(r'(')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expression expected.
+(
+ ^
+>>> try: read_expr(r')')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+)
+^
+>>> try: read_expr(r'()')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+()
+ ^
+>>> try: read_expr(r'(P(x) & Q(x)')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expected token ')'.
+(P(x) & Q(x)
+            ^
+>>> try: read_expr(r'(P(x) &')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expression expected.
+(P(x) &
+       ^
+>>> try: read_expr(r'(P(x) | )')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+(P(x) | )
+        ^
+>>> try: read_expr(r'P(x) ->')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expression expected.
+P(x) ->
+       ^
+>>> try: read_expr(r'P(x')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expected token ')'.
+P(x
+   ^
+>>> try: read_expr(r'P(x,')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expression expected.
+P(x,
+    ^
+>>> try: read_expr(r'P(x,)')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+P(x,)
+    ^
+>>> try: read_expr(r'exists')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Variable and Expression expected following quantifier 'exists'.
+exists
+       ^
+>>> try: read_expr(r'exists x')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expression expected.
+exists x
+         ^
+>>> try: read_expr(r'exists x.')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expression expected.
+exists x.
+         ^
+>>> try: read_expr(r'\  ')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Variable and Expression expected following lambda operator.
+\
+  ^
+>>> try: read_expr(r'\ x')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expression expected.
+\ x
+    ^
+>>> try: read_expr(r'\ x y')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expression expected.
+\ x y
+      ^
+>>> try: read_expr(r'\ x.')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expression expected.
+\ x.
+    ^
+>>> try: read_expr(r'P(x)Q(x)')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: 'Q'.
+P(x)Q(x)
+    ^
+>>> try: read_expr(r'(P(x)Q(x)')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: 'Q'.  Expected token ')'.
+(P(x)Q(x)
+     ^
+>>> try: read_expr(r'exists x y')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expression expected.
+exists x y
+           ^
+>>> try: read_expr(r'exists x y.')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expression expected.
+exists x y.
+           ^
+>>> try: read_expr(r'exists x -> y')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: '->'.  Expression expected.
+exists x -> y
+         ^
+
+
>>> try: read_expr(r'A -> ((P(x) & Q(x)) -> Z')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expected token ')'.
+A -> ((P(x) & Q(x)) -> Z
+                        ^
+>>> try: read_expr(r'A -> ((P(x) &) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+A -> ((P(x) &) -> Z
+             ^
+>>> try: read_expr(r'A -> ((P(x) | )) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+A -> ((P(x) | )) -> Z
+              ^
+>>> try: read_expr(r'A -> (P(x) ->) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+A -> (P(x) ->) -> Z
+             ^
+>>> try: read_expr(r'A -> (P(x) -> Z')
+... except LogicalExpressionException as e: print(e)
+End of input found.  Expected token ')'.
+A -> (P(x) -> Z
+               ^
+>>> try: read_expr(r'A -> (P(x,) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+A -> (P(x,) -> Z
+          ^
+>>> try: read_expr(r'A -> (P(x,)) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+A -> (P(x,)) -> Z
+          ^
+>>> try: read_expr(r'A -> (exists) -> Z')
+... except LogicalExpressionException as e: print(e)
+')' is an illegal variable name.  Constants may not be quantified.
+A -> (exists) -> Z
+            ^
+>>> try: read_expr(r'A -> (exists x) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+A -> (exists x) -> Z
+              ^
+>>> try: read_expr(r'A -> (exists x.) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+A -> (exists x.) -> Z
+               ^
+>>> try: read_expr(r'A -> (\  ) -> Z')
+... except LogicalExpressionException as e: print(e)
+')' is an illegal variable name.  Constants may not be abstracted.
+A -> (\  ) -> Z
+         ^
+>>> try: read_expr(r'A -> (\ x) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+A -> (\ x) -> Z
+         ^
+>>> try: read_expr(r'A -> (\ x y) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+A -> (\ x y) -> Z
+           ^
+>>> try: read_expr(r'A -> (\ x.) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+A -> (\ x.) -> Z
+          ^
+>>> try: read_expr(r'A -> (P(x)Q(x)) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: 'Q'.  Expected token ')'.
+A -> (P(x)Q(x)) -> Z
+          ^
+>>> try: read_expr(r'A -> ((P(x)Q(x)) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: 'Q'.  Expected token ')'.
+A -> ((P(x)Q(x)) -> Z
+           ^
+>>> try: read_expr(r'A -> (all x y) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+A -> (all x y) -> Z
+             ^
+>>> try: read_expr(r'A -> (exists x y.) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: ')'.  Expression expected.
+A -> (exists x y.) -> Z
+                 ^
+>>> try: read_expr(r'A -> (exists x -> y) -> Z')
+... except LogicalExpressionException as e: print(e)
+Unexpected token: '->'.  Expression expected.
+A -> (exists x -> y) -> Z
+               ^
+
+
+
+
+ + + + + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/meteor.html b/howto/meteor.html new file mode 100644 index 000000000..41b1a8ea0 --- /dev/null +++ b/howto/meteor.html @@ -0,0 +1,200 @@ + + + + + + + + + NLTK :: Sample usage for meteor + + + + + + + + + + + + + + + + +
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + + + + + +
+ +
+
+ +
+

Sample usage for meteor

+
+

METEOR tests

+
+

No Alignment test

+
>>> from nltk.translate import meteor
+>>> from nltk import word_tokenize
+
+
+

If the candidate has no alignment to any of the references, the METEOR score is 0.

+
>>> round(meteor(
+...     [word_tokenize('The candidate has no alignment to any of the references')],
+...     word_tokenize('John loves Mary')
+... ), 4)
+0.0
+
+
+
+
+

Tests based on wikipedia examples

+

Testing on wikipedia examples

+
>>> same_res = round(meteor(
+...       [word_tokenize('The cat sat on the mat')],
+...       word_tokenize('The cat sat on the mat')
+...       ), 4)
+>>> abs(same_res - 0.9977) < 1e-2
+True
+
+
+
>>> meteor(
+...       [word_tokenize('The cat sat on the mat')],
+...       word_tokenize('on the mat sat the cat')
+...       )
+0.5
+
+
+
>>> round(meteor(
+...       [word_tokenize('The cat sat on the mat')],
+...       word_tokenize('The cat was sat on the mat')
+...       ), 4)
+0.9654
+
+
+

Test corresponding to issue #2751, where METEOR score > 1

+
>>> round(meteor(
+...       [word_tokenize('create or update a vm set')],
+...       word_tokenize('creates or updates a virtual machine scale set')
+...       ), 4)
+0.7806
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ + + \ No newline at end of file diff --git a/howto/metrics.html b/howto/metrics.html index 047fb20dc..4d5b600e8 100644 --- a/howto/metrics.html +++ b/howto/metrics.html @@ -1,631 +1,434 @@ - - - + - - -Metrics - + + + + + + + NLTK :: Sample usage for metrics + + + + + + + + + + + + + + -
-

Metrics

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - + + + +
+ +
+
+ +
+

Sample usage for metrics

+
+

Metrics

+
+

Setup

+
>>> import pytest
+>>> _ = pytest.importorskip("numpy")
+
+

The nltk.metrics package provides a variety of evaluation measures which can be used for a wide variety of NLP tasks.

-
-
->>> from __future__ import print_function
->>> from nltk.metrics import *
-
-
-
-

Standard IR Scores

+
>>> from nltk.metrics import *
+
+
+
+
+

Standard IR Scores

We can use standard scores from information retrieval to test the performance of taggers, chunkers, etc.

-
-
->>> reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
->>> test    = 'DET VB VB DET NN NN NN IN DET NN'.split()
->>> print(accuracy(reference, test))
-0.8
-
-
+
>>> reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
+>>> test    = 'DET VB VB DET NN NN NN IN DET NN'.split()
+>>> print(accuracy(reference, test))
+0.8
+
+

The following measures apply to sets:

-
-
->>> reference_set = set(reference)
->>> test_set = set(test)
->>> precision(reference_set, test_set)
-1.0
->>> print(recall(reference_set, test_set))
-0.8
->>> print(f_measure(reference_set, test_set))
-0.88888888888...
-
-
+
>>> reference_set = set(reference)
+>>> test_set = set(test)
+>>> precision(reference_set, test_set)
+1.0
+>>> print(recall(reference_set, test_set))
+0.8
+>>> print(f_measure(reference_set, test_set))
+0.88888888888...
+
+

Measuring the likelihood of the data, given probability distributions:

-
-
->>> from nltk import FreqDist, MLEProbDist
->>> pdist1 = MLEProbDist(FreqDist("aldjfalskfjaldsf"))
->>> pdist2 = MLEProbDist(FreqDist("aldjfalssjjlldss"))
->>> print(log_likelihood(['a', 'd'], [pdist1, pdist2]))
--2.7075187496...
-
-
+
>>> from nltk import FreqDist, MLEProbDist
+>>> pdist1 = MLEProbDist(FreqDist("aldjfalskfjaldsf"))
+>>> pdist2 = MLEProbDist(FreqDist("aldjfalssjjlldss"))
+>>> print(log_likelihood(['a', 'd'], [pdist1, pdist2]))
+-2.7075187496...
+
-
-

Distance Metrics

+
+
+

Distance Metrics

String edit distance (Levenshtein):

-
-
->>> edit_distance("rain", "shine")
-3
-
-
+
>>> edit_distance("rain", "shine")
+3
+>>> edit_distance_align("shine", "shine")
+[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]
+>>> edit_distance_align("rain", "brainy")
+[(0, 0), (1, 1), (1, 2), (2, 3), (3, 4), (4, 5), (4, 6)]
+>>> edit_distance_align("", "brainy")
+[(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6)]
+>>> edit_distance_align("", "")
+[(0, 0)]
+
+

Other distance measures:

-
-
->>> s1 = set([1,2,3,4])
->>> s2 = set([3,4,5])
->>> binary_distance(s1, s2)
-1.0
->>> print(jaccard_distance(s1, s2))
-0.6
->>> print(masi_distance(s1, s2))
-0.868...
-
-
+
>>> s1 = set([1,2,3,4])
+>>> s2 = set([3,4,5])
+>>> binary_distance(s1, s2)
+1.0
+>>> print(jaccard_distance(s1, s2))
+0.6
+>>> print(masi_distance(s1, s2))
+0.868
+
-
-

Miscellaneous Measures

+
+
+

Miscellaneous Measures

Rank Correlation works with two dictionaries mapping keys to ranks. The dictionaries should have the same set of keys.

-
-
->>> spearman_correlation({'e':1, 't':2, 'a':3}, {'e':1, 'a':2, 't':3})
-0.5
-
-
+
>>> spearman_correlation({'e':1, 't':2, 'a':3}, {'e':1, 'a':2, 't':3})
+0.5
+
+

Windowdiff uses a sliding window in comparing two segmentations of the same input (e.g. tokenizations, chunkings). Segmentations are represented using strings of zeros and ones.

-
-
->>> s1 = "000100000010"
->>> s2 = "000010000100"
->>> s3 = "100000010000"
->>> s4 = "000000000000"
->>> s5 = "111111111111"
->>> windowdiff(s1, s1, 3)
-0.0
->>> abs(windowdiff(s1, s2, 3) - 0.3)  < 1e-6  # windowdiff(s1, s2, 3) == 0.3
-True
->>> abs(windowdiff(s2, s3, 3) - 0.8)  < 1e-6  # windowdiff(s2, s3, 3) == 0.8
-True
->>> windowdiff(s1, s4, 3)
-0.5
->>> windowdiff(s1, s5, 3)
-1.0
-
-
+
>>> s1 = "000100000010"
+>>> s2 = "000010000100"
+>>> s3 = "100000010000"
+>>> s4 = "000000000000"
+>>> s5 = "111111111111"
+>>> windowdiff(s1, s1, 3)
+0.0
+>>> abs(windowdiff(s1, s2, 3) - 0.3)  < 1e-6  # windowdiff(s1, s2, 3) == 0.3
+True
+>>> abs(windowdiff(s2, s3, 3) - 0.8)  < 1e-6  # windowdiff(s2, s3, 3) == 0.8
+True
+>>> windowdiff(s1, s4, 3)
+0.5
+>>> windowdiff(s1, s5, 3)
+1.0
+
+
+
+
+

Confusion Matrix

+
>>> reference = 'This is the reference data.  Testing 123.  aoaeoeoe'
+>>> test =      'Thos iz_the rifirenci data.  Testeng 123.  aoaeoeoe'
+>>> print(ConfusionMatrix(reference, test))
+  |   . 1 2 3 T _ a c d e f g h i n o r s t z |
+--+-------------------------------------------+
+  |<8>. . . . . 1 . . . . . . . . . . . . . . |
+. | .<2>. . . . . . . . . . . . . . . . . . . |
+1 | . .<1>. . . . . . . . . . . . . . . . . . |
+2 | . . .<1>. . . . . . . . . . . . . . . . . |
+3 | . . . .<1>. . . . . . . . . . . . . . . . |
+T | . . . . .<2>. . . . . . . . . . . . . . . |
+_ | . . . . . .<.>. . . . . . . . . . . . . . |
+a | . . . . . . .<4>. . . . . . . . . . . . . |
+c | . . . . . . . .<1>. . . . . . . . . . . . |
+d | . . . . . . . . .<1>. . . . . . . . . . . |
+e | . . . . . . . . . .<6>. . . 3 . . . . . . |
+f | . . . . . . . . . . .<1>. . . . . . . . . |
+g | . . . . . . . . . . . .<1>. . . . . . . . |
+h | . . . . . . . . . . . . .<2>. . . . . . . |
+i | . . . . . . . . . . 1 . . .<1>. 1 . . . . |
+n | . . . . . . . . . . . . . . .<2>. . . . . |
+o | . . . . . . . . . . . . . . . .<3>. . . . |
+r | . . . . . . . . . . . . . . . . .<2>. . . |
+s | . . . . . . . . . . . . . . . . . .<2>. 1 |
+t | . . . . . . . . . . . . . . . . . . .<3>. |
+z | . . . . . . . . . . . . . . . . . . . .<.>|
+--+-------------------------------------------+
+(row = reference; col = test)
+
+
+
>>> cm = ConfusionMatrix(reference, test)
+>>> print(cm.pretty_format(sort_by_count=True))
+  |   e a i o s t . T h n r 1 2 3 c d f g _ z |
+--+-------------------------------------------+
+  |<8>. . . . . . . . . . . . . . . . . . 1 . |
+e | .<6>. 3 . . . . . . . . . . . . . . . . . |
+a | . .<4>. . . . . . . . . . . . . . . . . . |
+i | . 1 .<1>1 . . . . . . . . . . . . . . . . |
+o | . . . .<3>. . . . . . . . . . . . . . . . |
+s | . . . . .<2>. . . . . . . . . . . . . . 1 |
+t | . . . . . .<3>. . . . . . . . . . . . . . |
+. | . . . . . . .<2>. . . . . . . . . . . . . |
+T | . . . . . . . .<2>. . . . . . . . . . . . |
+h | . . . . . . . . .<2>. . . . . . . . . . . |
+n | . . . . . . . . . .<2>. . . . . . . . . . |
+r | . . . . . . . . . . .<2>. . . . . . . . . |
+1 | . . . . . . . . . . . .<1>. . . . . . . . |
+2 | . . . . . . . . . . . . .<1>. . . . . . . |
+3 | . . . . . . . . . . . . . .<1>. . . . . . |
+c | . . . . . . . . . . . . . . .<1>. . . . . |
+d | . . . . . . . . . . . . . . . .<1>. . . . |
+f | . . . . . . . . . . . . . . . . .<1>. . . |
+g | . . . . . . . . . . . . . . . . . .<1>. . |
+_ | . . . . . . . . . . . . . . . . . . .<.>. |
+z | . . . . . . . . . . . . . . . . . . . .<.>|
+--+-------------------------------------------+
+(row = reference; col = test)
+
+
+
>>> print(cm.pretty_format(sort_by_count=True, truncate=10))
+  |   e a i o s t . T h |
+--+---------------------+
+  |<8>. . . . . . . . . |
+e | .<6>. 3 . . . . . . |
+a | . .<4>. . . . . . . |
+i | . 1 .<1>1 . . . . . |
+o | . . . .<3>. . . . . |
+s | . . . . .<2>. . . . |
+t | . . . . . .<3>. . . |
+. | . . . . . . .<2>. . |
+T | . . . . . . . .<2>. |
+h | . . . . . . . . .<2>|
+--+---------------------+
+(row = reference; col = test)
+
-
-

Confusion Matrix

-
-
->>> reference = 'This is the reference data.  Testing 123.  aoaeoeoe'
->>> test =      'Thos iz_the rifirenci data.  Testeng 123.  aoaeoeoe'
->>> print(ConfusionMatrix(reference, test))
-  |   . 1 2 3 T _ a c d e f g h i n o r s t z |
---+-------------------------------------------+
-  |<8>. . . . . 1 . . . . . . . . . . . . . . |
-. | .<2>. . . . . . . . . . . . . . . . . . . |
-1 | . .<1>. . . . . . . . . . . . . . . . . . |
-2 | . . .<1>. . . . . . . . . . . . . . . . . |
-3 | . . . .<1>. . . . . . . . . . . . . . . . |
-T | . . . . .<2>. . . . . . . . . . . . . . . |
-_ | . . . . . .<.>. . . . . . . . . . . . . . |
-a | . . . . . . .<4>. . . . . . . . . . . . . |
-c | . . . . . . . .<1>. . . . . . . . . . . . |
-d | . . . . . . . . .<1>. . . . . . . . . . . |
-e | . . . . . . . . . .<6>. . . 3 . . . . . . |
-f | . . . . . . . . . . .<1>. . . . . . . . . |
-g | . . . . . . . . . . . .<1>. . . . . . . . |
-h | . . . . . . . . . . . . .<2>. . . . . . . |
-i | . . . . . . . . . . 1 . . .<1>. 1 . . . . |
-n | . . . . . . . . . . . . . . .<2>. . . . . |
-o | . . . . . . . . . . . . . . . .<3>. . . . |
-r | . . . . . . . . . . . . . . . . .<2>. . . |
-s | . . . . . . . . . . . . . . . . . .<2>. 1 |
-t | . . . . . . . . . . . . . . . . . . .<3>. |
-z | . . . . . . . . . . . . . . . . . . . .<.>|
---+-------------------------------------------+
-(row = reference; col = test)
-<BLANKLINE>
-
-
->>> cm = ConfusionMatrix(reference, test)
->>> print(cm.pretty_format(sort_by_count=True))
-  |   e a i o s t . T h n r 1 2 3 c d f g _ z |
---+-------------------------------------------+
-  |<8>. . . . . . . . . . . . . . . . . . 1 . |
-e | .<6>. 3 . . . . . . . . . . . . . . . . . |
-a | . .<4>. . . . . . . . . . . . . . . . . . |
-i | . 1 .<1>1 . . . . . . . . . . . . . . . . |
-o | . . . .<3>. . . . . . . . . . . . . . . . |
-s | . . . . .<2>. . . . . . . . . . . . . . 1 |
-t | . . . . . .<3>. . . . . . . . . . . . . . |
-. | . . . . . . .<2>. . . . . . . . . . . . . |
-T | . . . . . . . .<2>. . . . . . . . . . . . |
-h | . . . . . . . . .<2>. . . . . . . . . . . |
-n | . . . . . . . . . .<2>. . . . . . . . . . |
-r | . . . . . . . . . . .<2>. . . . . . . . . |
-1 | . . . . . . . . . . . .<1>. . . . . . . . |
-2 | . . . . . . . . . . . . .<1>. . . . . . . |
-3 | . . . . . . . . . . . . . .<1>. . . . . . |
-c | . . . . . . . . . . . . . . .<1>. . . . . |
-d | . . . . . . . . . . . . . . . .<1>. . . . |
-f | . . . . . . . . . . . . . . . . .<1>. . . |
-g | . . . . . . . . . . . . . . . . . .<1>. . |
-_ | . . . . . . . . . . . . . . . . . . .<.>. |
-z | . . . . . . . . . . . . . . . . . . . .<.>|
---+-------------------------------------------+
-(row = reference; col = test)
-<BLANKLINE>
-
-
->>> print(cm.pretty_format(sort_by_count=True, truncate=10))
-  |   e a i o s t . T h |
---+---------------------+
-  |<8>. . . . . . . . . |
-e | .<6>. 3 . . . . . . |
-a | . .<4>. . . . . . . |
-i | . 1 .<1>1 . . . . . |
-o | . . . .<3>. . . . . |
-s | . . . . .<2>. . . . |
-t | . . . . . .<3>. . . |
-. | . . . . . . .<2>. . |
-T | . . . . . . . .<2>. |
-h | . . . . . . . . .<2>|
---+---------------------+
-(row = reference; col = test)
-<BLANKLINE>
-
-
->>> print(cm.pretty_format(sort_by_count=True, truncate=10, values_in_chart=False))
-   |                   1 |
-   | 1 2 3 4 5 6 7 8 9 0 |
----+---------------------+
- 1 |<8>. . . . . . . . . |
- 2 | .<6>. 3 . . . . . . |
- 3 | . .<4>. . . . . . . |
- 4 | . 1 .<1>1 . . . . . |
- 5 | . . . .<3>. . . . . |
- 6 | . . . . .<2>. . . . |
- 7 | . . . . . .<3>. . . |
- 8 | . . . . . . .<2>. . |
- 9 | . . . . . . . .<2>. |
-10 | . . . . . . . . .<2>|
----+---------------------+
-(row = reference; col = test)
-Value key:
-     1:
-     2: e
-     3: a
-     4: i
-     5: o
-     6: s
-     7: t
-     8: .
-     9: T
-    10: h
-<BLANKLINE>
-
-
+
>>> print(cm.pretty_format(sort_by_count=True, truncate=10, values_in_chart=False))
+   |                   1 |
+   | 1 2 3 4 5 6 7 8 9 0 |
+---+---------------------+
+ 1 |<8>. . . . . . . . . |
+ 2 | .<6>. 3 . . . . . . |
+ 3 | . .<4>. . . . . . . |
+ 4 | . 1 .<1>1 . . . . . |
+ 5 | . . . .<3>. . . . . |
+ 6 | . . . . .<2>. . . . |
+ 7 | . . . . . .<3>. . . |
+ 8 | . . . . . . .<2>. . |
+ 9 | . . . . . . . .<2>. |
+10 | . . . . . . . . .<2>|
+---+---------------------+
+(row = reference; col = test)
+Value key:
+     1:
+     2: e
+     3: a
+     4: i
+     5: o
+     6: s
+     7: t
+     8: .
+     9: T
+    10: h
+
-
-

Association measures

+
+
+

Association measures

These measures are useful to determine whether the coocurrence of two random events is meaningful. They are used, for instance, to distinguish collocations from other pairs of adjacent words.

We bring some examples of bigram association calculations from Manning and -Schutze's SNLP, 2nd Ed. chapter 5.

-
-
->>> n_new_companies, n_new, n_companies, N = 8, 15828, 4675, 14307668
->>> bam = BigramAssocMeasures
->>> bam.raw_freq(20, (42, 20), N) == 20. / N
-True
->>> bam.student_t(n_new_companies, (n_new, n_companies), N)
-0.999...
->>> bam.chi_sq(n_new_companies, (n_new, n_companies), N)
-1.54...
->>> bam.likelihood_ratio(150, (12593, 932), N)
-1291...
-
-
+Schutze’s SNLP, 2nd Ed. chapter 5.

+
>>> n_new_companies, n_new, n_companies, N = 8, 15828, 4675, 14307668
+>>> bam = BigramAssocMeasures
+>>> bam.raw_freq(20, (42, 20), N) == 20. / N
+True
+>>> bam.student_t(n_new_companies, (n_new, n_companies), N)
+0.999...
+>>> bam.chi_sq(n_new_companies, (n_new, n_companies), N)
+1.54...
+>>> bam.likelihood_ratio(150, (12593, 932), N)
+1291...
+
+

For other associations, we ensure the ordering of the measures:

-
-
->>> bam.mi_like(20, (42, 20), N) > bam.mi_like(20, (41, 27), N)
-True
->>> bam.pmi(20, (42, 20), N) > bam.pmi(20, (41, 27), N)
-True
->>> bam.phi_sq(20, (42, 20), N) > bam.phi_sq(20, (41, 27), N)
-True
->>> bam.poisson_stirling(20, (42, 20), N) > bam.poisson_stirling(20, (41, 27), N)
-True
->>> bam.jaccard(20, (42, 20), N) > bam.jaccard(20, (41, 27), N)
-True
->>> bam.dice(20, (42, 20), N) > bam.dice(20, (41, 27), N)
-True
->>> bam.fisher(20, (42, 20), N) > bam.fisher(20, (41, 27), N)
-False
-
-
+
>>> bam.mi_like(20, (42, 20), N) > bam.mi_like(20, (41, 27), N)
+True
+>>> bam.pmi(20, (42, 20), N) > bam.pmi(20, (41, 27), N)
+True
+>>> bam.phi_sq(20, (42, 20), N) > bam.phi_sq(20, (41, 27), N)
+True
+>>> bam.poisson_stirling(20, (42, 20), N) > bam.poisson_stirling(20, (41, 27), N)
+True
+>>> bam.jaccard(20, (42, 20), N) > bam.jaccard(20, (41, 27), N)
+True
+>>> bam.dice(20, (42, 20), N) > bam.dice(20, (41, 27), N)
+True
+>>> bam.fisher(20, (42, 20), N) > bam.fisher(20, (41, 27), N) 
+False
+
+

For trigrams, we have to provide more count information:

-
-
->>> n_w1_w2_w3 = 20
->>> n_w1_w2, n_w1_w3, n_w2_w3 = 35, 60, 40
->>> pair_counts = (n_w1_w2, n_w1_w3, n_w2_w3)
->>> n_w1, n_w2, n_w3 = 100, 200, 300
->>> uni_counts = (n_w1, n_w2, n_w3)
->>> N = 14307668
->>> tam = TrigramAssocMeasures
->>> tam.raw_freq(n_w1_w2_w3, pair_counts, uni_counts, N) == 1. * n_w1_w2_w3 / N
-True
->>> uni_counts2 = (n_w1, n_w2, 100)
->>> tam.student_t(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.student_t(n_w1_w2_w3, pair_counts, uni_counts, N)
-True
->>> tam.chi_sq(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.chi_sq(n_w1_w2_w3, pair_counts, uni_counts, N)
-True
->>> tam.mi_like(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.mi_like(n_w1_w2_w3, pair_counts, uni_counts, N)
-True
->>> tam.pmi(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.pmi(n_w1_w2_w3, pair_counts, uni_counts, N)
-True
->>> tam.likelihood_ratio(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.likelihood_ratio(n_w1_w2_w3, pair_counts, uni_counts, N)
-True
->>> tam.poisson_stirling(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.poisson_stirling(n_w1_w2_w3, pair_counts, uni_counts, N)
-True
->>> tam.jaccard(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.jaccard(n_w1_w2_w3, pair_counts, uni_counts, N)
-True
-
-
+
>>> n_w1_w2_w3 = 20
+>>> n_w1_w2, n_w1_w3, n_w2_w3 = 35, 60, 40
+>>> pair_counts = (n_w1_w2, n_w1_w3, n_w2_w3)
+>>> n_w1, n_w2, n_w3 = 100, 200, 300
+>>> uni_counts = (n_w1, n_w2, n_w3)
+>>> N = 14307668
+>>> tam = TrigramAssocMeasures
+>>> tam.raw_freq(n_w1_w2_w3, pair_counts, uni_counts, N) == 1. * n_w1_w2_w3 / N
+True
+>>> uni_counts2 = (n_w1, n_w2, 100)
+>>> tam.student_t(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.student_t(n_w1_w2_w3, pair_counts, uni_counts, N)
+True
+>>> tam.chi_sq(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.chi_sq(n_w1_w2_w3, pair_counts, uni_counts, N)
+True
+>>> tam.mi_like(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.mi_like(n_w1_w2_w3, pair_counts, uni_counts, N)
+True
+>>> tam.pmi(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.pmi(n_w1_w2_w3, pair_counts, uni_counts, N)
+True
+>>> tam.likelihood_ratio(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.likelihood_ratio(n_w1_w2_w3, pair_counts, uni_counts, N)
+True
+>>> tam.poisson_stirling(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.poisson_stirling(n_w1_w2_w3, pair_counts, uni_counts, N)
+True
+>>> tam.jaccard(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.jaccard(n_w1_w2_w3, pair_counts, uni_counts, N)
+True
+
+

For fourgrams, we have to provide more count information:

+
>>> n_w1_w2_w3_w4 = 5
+>>> n_w1_w2, n_w1_w3, n_w2_w3 = 35, 60, 40
+>>> n_w1_w2_w3, n_w2_w3_w4 = 20, 10
+>>> pair_counts = (n_w1_w2, n_w1_w3, n_w2_w3)
+>>> triplet_counts = (n_w1_w2_w3, n_w2_w3_w4)
+>>> n_w1, n_w2, n_w3, n_w4 = 100, 200, 300, 400
+>>> uni_counts = (n_w1, n_w2, n_w3, n_w4)
+>>> N = 14307668
+>>> qam = QuadgramAssocMeasures
+>>> qam.raw_freq(n_w1_w2_w3_w4, pair_counts, triplet_counts, uni_counts, N) == 1. * n_w1_w2_w3_w4 / N
+True
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/misc.html b/howto/misc.html index 55063436e..e23bae519 100644 --- a/howto/misc.html +++ b/howto/misc.html @@ -1,481 +1,273 @@ - - - + - - - - + + + + + + + NLTK :: Sample usage for misc + + + + + + + + + + + + + - -
+ +
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-

Unit tests for the miscellaneous sort functions.

-
-
->>> from copy import deepcopy
->>> from nltk.misc.sort import *
-
-
+ + + +
+ +
+
+ +
+

Sample usage for misc

+
+

Unit tests for the miscellaneous sort functions.

+
>>> from copy import deepcopy
+>>> from nltk.misc.sort import *
+
+

A (very) small list of unsorted integers.

-
-
->>> test_data = [12, 67, 7, 28, 92, 56, 53, 720, 91, 57, 20, 20]
-
-
+
>>> test_data = [12, 67, 7, 28, 92, 56, 53, 720, 91, 57, 20, 20]
+
+

Test each sorting method - each method returns the number of operations required to sort the data, and sorts in-place (desctructively - hence the need for multiple copies).

-
-
->>> sorted_data = deepcopy(test_data)
->>> selection(sorted_data)
-66
-
-
->>> sorted_data
-[7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
-
-
->>> sorted_data = deepcopy(test_data)
->>> bubble(sorted_data)
-30
-
-
->>> sorted_data
-[7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
-
-
->>> sorted_data = deepcopy(test_data)
->>> merge(sorted_data)
-30
-
-
->>> sorted_data
-[7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
-
-
->>> sorted_data = deepcopy(test_data)
->>> quick(sorted_data)
-13
-
-
->>> sorted_data
-[7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
-
-
+
>>> sorted_data = deepcopy(test_data)
+>>> selection(sorted_data)
+66
+
-
-

Unit tests for Wordfinder class

-
-
->>> import random
-
-
->>> # The following is not enough for reproducibility under Python 2/3
->>> # (see http://bugs.python.org/issue9025) so this test is skipped.
->>> random.seed(12345)
-
-
->>> from nltk.misc import wordfinder
->>> wordfinder.word_finder() # doctest: +SKIP
-Word Finder
-<BLANKLINE>
-J V L A I R O T A T I S I V O D E R E T
-H U U B E A R O E P O C S O R E T N E P
-A D A U Z E E S R A P P A L L M E N T R
-C X A D Q S Z T P E O R S N G P J A D E
-I G Y K K T I A A R G F I D T E L C N S
-R E C N B H T R L T N N B W N T A O A I
-A Y I L O E I A M E I A A Y U R P L L D
-G L T V S T S F E A D I P H D O O H N I
-R L S E C I N I L R N N M E C G R U E A
-A A Y G I C E N L L E O I G Q R T A E L
-M R C E T I S T A E T L L E U A E N R L
-O U O T A S E E C S O O N H Y P A T G Y
-E M H O M M D R E S F P U L T H C F N V
-L A C A I M A M A N L B R U T E D O M I
-O R I L N E E E E E U A R S C R Y L I P
-H T R K E S N N M S I L A S R E V I N U
-T X T A A O U T K S E T A R R E S I B J
-A E D L E L J I F O O R P E L K N I R W
-K H A I D E Q O P R I C K T I M B E R P
-Z K D O O H G N I H T U R V E Y D R O P
-<BLANKLINE>
-1: INTERCHANGER
-2: TEARLESSNESS
-3: UNIVERSALISM
-4: DESENSITIZER
-5: INTERMENTION
-6: TRICHOCYSTIC
-7: EXTRAMURALLY
-8: VEGETOALKALI
-9: PALMELLACEAE
-10: AESTHETICISM
-11: PETROGRAPHER
-12: VISITATORIAL
-13: OLEOMARGARIC
-14: WRINKLEPROOF
-15: PRICKTIMBER
-16: PRESIDIALLY
-17: SCITAMINEAE
-18: ENTEROSCOPE
-19: APPALLMENT
-20: TURVEYDROP
-21: THINGHOOD
-22: BISERRATE
-23: GREENLAND
-24: BRUTEDOM
-25: POLONIAN
-26: ACOLHUAN
-27: LAPORTEA
-28: TENDING
-29: TEREDO
-30: MESOLE
-31: UNLIMP
-32: OSTARA
-33: PILY
-34: DUNT
-35: ONYX
-36: KATH
-37: JUNE
-
-
+
>>> sorted_data
+[7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
+
+
>>> sorted_data = deepcopy(test_data)
+>>> bubble(sorted_data)
+30
+
+
>>> sorted_data
+[7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
+
+
+
>>> sorted_data = deepcopy(test_data)
+>>> merge(sorted_data)
+30
+
+
+
>>> sorted_data
+[7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
+
+
+
>>> sorted_data = deepcopy(test_data)
+>>> quick(sorted_data)
+13
+
+
+
>>> sorted_data
+[7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
+
+
+
+
+

Unit tests for Wordfinder class

+
>>> import random
+
+
+
>>> # The following is not enough for reproducibility under Python 2/3
+>>> # (see https://bugs.python.org/issue9025) so this test is skipped.
+>>> random.seed(12345)
+
+
+
>>> from nltk.misc import wordfinder
+>>> wordfinder.word_finder() 
+Word Finder
+
+J V L A I R O T A T I S I V O D E R E T
+H U U B E A R O E P O C S O R E T N E P
+A D A U Z E E S R A P P A L L M E N T R
+C X A D Q S Z T P E O R S N G P J A D E
+I G Y K K T I A A R G F I D T E L C N S
+R E C N B H T R L T N N B W N T A O A I
+A Y I L O E I A M E I A A Y U R P L L D
+G L T V S T S F E A D I P H D O O H N I
+R L S E C I N I L R N N M E C G R U E A
+A A Y G I C E N L L E O I G Q R T A E L
+M R C E T I S T A E T L L E U A E N R L
+O U O T A S E E C S O O N H Y P A T G Y
+E M H O M M D R E S F P U L T H C F N V
+L A C A I M A M A N L B R U T E D O M I
+O R I L N E E E E E U A R S C R Y L I P
+H T R K E S N N M S I L A S R E V I N U
+T X T A A O U T K S E T A R R E S I B J
+A E D L E L J I F O O R P E L K N I R W
+K H A I D E Q O P R I C K T I M B E R P
+Z K D O O H G N I H T U R V E Y D R O P
+
+1: INTERCHANGER
+2: TEARLESSNESS
+3: UNIVERSALISM
+4: DESENSITIZER
+5: INTERMENTION
+6: TRICHOCYSTIC
+7: EXTRAMURALLY
+8: VEGETOALKALI
+9: PALMELLACEAE
+10: AESTHETICISM
+11: PETROGRAPHER
+12: VISITATORIAL
+13: OLEOMARGARIC
+14: WRINKLEPROOF
+15: PRICKTIMBER
+16: PRESIDIALLY
+17: SCITAMINEAE
+18: ENTEROSCOPE
+19: APPALLMENT
+20: TURVEYDROP
+21: THINGHOOD
+22: BISERRATE
+23: GREENLAND
+24: BRUTEDOM
+25: POLONIAN
+26: ACOLHUAN
+27: LAPORTEA
+28: TENDING
+29: TEREDO
+30: MESOLE
+31: UNLIMP
+32: OSTARA
+33: PILY
+34: DUNT
+35: ONYX
+36: KATH
+37: JUNE
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/nonmonotonic.html b/howto/nonmonotonic.html index d8669e4e1..e57abfe88 100644 --- a/howto/nonmonotonic.html +++ b/howto/nonmonotonic.html @@ -1,656 +1,446 @@ - - - + - - -Nonmonotonic Reasoning - + + + + + + + NLTK :: Sample usage for nonmonotonic + + + + + + + + + + + + + + -
-

Nonmonotonic Reasoning

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> from nltk import *
->>> from nltk.inference.nonmonotonic import *
->>> from nltk.sem import logic
->>> logic._counter._value = 0
->>> read_expr = logic.Expression.fromstring
-
-
-
-

Closed Domain Assumption

+ + + +
+ +
+
+ +
+

Sample usage for nonmonotonic

+
+

Nonmonotonic Reasoning

+
>>> from nltk.test.nonmonotonic_fixt import setup_module
+>>> setup_module()
+
+
+
>>> from nltk import *
+>>> from nltk.inference.nonmonotonic import *
+>>> from nltk.sem import logic
+>>> logic._counter._value = 0
+>>> read_expr = logic.Expression.fromstring
+
+
+
+

Closed Domain Assumption

The only entities in the domain are those found in the assumptions or goal. -If the domain only contains "A" and "B", then the expression "exists x.P(x)" can -be replaced with "P(A) | P(B)" and an expression "all x.P(x)" can be replaced -with "P(A) & P(B)".

-
-
->>> p1 = read_expr(r'all x.(man(x) -> mortal(x))')
->>> p2 = read_expr(r'man(Socrates)')
->>> c = read_expr(r'mortal(Socrates)')
->>> prover = Prover9Command(c, [p1,p2])
->>> prover.prove()
-True
->>> cdp = ClosedDomainProver(prover)
->>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
-(man(Socrates) -> mortal(Socrates))
-man(Socrates)
->>> cdp.prove()
-True
-
-
->>> p1 = read_expr(r'exists x.walk(x)')
->>> p2 = read_expr(r'man(Socrates)')
->>> c = read_expr(r'walk(Socrates)')
->>> prover = Prover9Command(c, [p1,p2])
->>> prover.prove()
-False
->>> cdp = ClosedDomainProver(prover)
->>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
-walk(Socrates)
-man(Socrates)
->>> cdp.prove()
-True
-
-
->>> p1 = read_expr(r'exists x.walk(x)')
->>> p2 = read_expr(r'man(Socrates)')
->>> p3 = read_expr(r'-walk(Bill)')
->>> c = read_expr(r'walk(Socrates)')
->>> prover = Prover9Command(c, [p1,p2,p3])
->>> prover.prove()
-False
->>> cdp = ClosedDomainProver(prover)
->>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
-(walk(Socrates) | walk(Bill))
-man(Socrates)
--walk(Bill)
->>> cdp.prove()
-True
-
-
->>> p1 = read_expr(r'walk(Socrates)')
->>> p2 = read_expr(r'walk(Bill)')
->>> c = read_expr(r'all x.walk(x)')
->>> prover = Prover9Command(c, [p1,p2])
->>> prover.prove()
-False
->>> cdp = ClosedDomainProver(prover)
->>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
-walk(Socrates)
-walk(Bill)
->>> print(cdp.goal()) # doctest: +SKIP
-(walk(Socrates) & walk(Bill))
->>> cdp.prove()
-True
-
-
->>> p1 = read_expr(r'girl(mary)')
->>> p2 = read_expr(r'dog(rover)')
->>> p3 = read_expr(r'all x.(girl(x) -> -dog(x))')
->>> p4 = read_expr(r'all x.(dog(x) -> -girl(x))')
->>> p5 = read_expr(r'chase(mary, rover)')
->>> c = read_expr(r'exists y.(dog(y) & all x.(girl(x) -> chase(x,y)))')
->>> prover = Prover9Command(c, [p1,p2,p3,p4,p5])
->>> print(prover.prove())
-False
->>> cdp = ClosedDomainProver(prover)
->>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
-girl(mary)
-dog(rover)
-((girl(rover) -> -dog(rover)) & (girl(mary) -> -dog(mary)))
-((dog(rover) -> -girl(rover)) & (dog(mary) -> -girl(mary)))
-chase(mary,rover)
->>> print(cdp.goal()) # doctest: +SKIP
-((dog(rover) & (girl(rover) -> chase(rover,rover)) & (girl(mary) -> chase(mary,rover))) | (dog(mary) & (girl(rover) -> chase(rover,mary)) & (girl(mary) -> chase(mary,mary))))
->>> print(cdp.prove())
-True
-
-
+If the domain only contains “A” and “B”, then the expression “exists x.P(x)” can +be replaced with “P(A) | P(B)” and an expression “all x.P(x)” can be replaced +with “P(A) & P(B)”.

+
>>> p1 = read_expr(r'all x.(man(x) -> mortal(x))')
+>>> p2 = read_expr(r'man(Socrates)')
+>>> c = read_expr(r'mortal(Socrates)')
+>>> prover = Prover9Command(c, [p1,p2])
+>>> prover.prove()
+True
+>>> cdp = ClosedDomainProver(prover)
+>>> for a in cdp.assumptions(): print(a) 
+(man(Socrates) -> mortal(Socrates))
+man(Socrates)
+>>> cdp.prove()
+True
+
+
+
>>> p1 = read_expr(r'exists x.walk(x)')
+>>> p2 = read_expr(r'man(Socrates)')
+>>> c = read_expr(r'walk(Socrates)')
+>>> prover = Prover9Command(c, [p1,p2])
+>>> prover.prove()
+False
+>>> cdp = ClosedDomainProver(prover)
+>>> for a in cdp.assumptions(): print(a) 
+walk(Socrates)
+man(Socrates)
+>>> cdp.prove()
+True
+
+
+
>>> p1 = read_expr(r'exists x.walk(x)')
+>>> p2 = read_expr(r'man(Socrates)')
+>>> p3 = read_expr(r'-walk(Bill)')
+>>> c = read_expr(r'walk(Socrates)')
+>>> prover = Prover9Command(c, [p1,p2,p3])
+>>> prover.prove()
+False
+>>> cdp = ClosedDomainProver(prover)
+>>> for a in cdp.assumptions(): print(a) 
+(walk(Socrates) | walk(Bill))
+man(Socrates)
+-walk(Bill)
+>>> cdp.prove()
+True
+
-
-

Unique Names Assumption

+
>>> p1 = read_expr(r'walk(Socrates)')
+>>> p2 = read_expr(r'walk(Bill)')
+>>> c = read_expr(r'all x.walk(x)')
+>>> prover = Prover9Command(c, [p1,p2])
+>>> prover.prove()
+False
+>>> cdp = ClosedDomainProver(prover)
+>>> for a in cdp.assumptions(): print(a) 
+walk(Socrates)
+walk(Bill)
+>>> print(cdp.goal()) 
+(walk(Socrates) & walk(Bill))
+>>> cdp.prove()
+True
+
+
+
>>> p1 = read_expr(r'girl(mary)')
+>>> p2 = read_expr(r'dog(rover)')
+>>> p3 = read_expr(r'all x.(girl(x) -> -dog(x))')
+>>> p4 = read_expr(r'all x.(dog(x) -> -girl(x))')
+>>> p5 = read_expr(r'chase(mary, rover)')
+>>> c = read_expr(r'exists y.(dog(y) & all x.(girl(x) -> chase(x,y)))')
+>>> prover = Prover9Command(c, [p1,p2,p3,p4,p5])
+>>> print(prover.prove())
+False
+>>> cdp = ClosedDomainProver(prover)
+>>> for a in cdp.assumptions(): print(a) 
+girl(mary)
+dog(rover)
+((girl(rover) -> -dog(rover)) & (girl(mary) -> -dog(mary)))
+((dog(rover) -> -girl(rover)) & (dog(mary) -> -girl(mary)))
+chase(mary,rover)
+>>> print(cdp.goal()) 
+((dog(rover) & (girl(rover) -> chase(rover,rover)) & (girl(mary) -> chase(mary,rover))) | (dog(mary) & (girl(rover) -> chase(rover,mary)) & (girl(mary) -> chase(mary,mary))))
+>>> print(cdp.prove())
+True
+
+
+
+
+

Unique Names Assumption

No two entities in the domain represent the same entity unless it can be -explicitly proven that they do. Therefore, if the domain contains "A" and "B", -then add the assumption "-(A = B)" if it is not the case that -"<assumptions> |- (A = B)".

-
-
->>> p1 = read_expr(r'man(Socrates)')
->>> p2 = read_expr(r'man(Bill)')
->>> c = read_expr(r'exists x.exists y.-(x = y)')
->>> prover = Prover9Command(c, [p1,p2])
->>> prover.prove()
-False
->>> unp = UniqueNamesProver(prover)
->>> for a in unp.assumptions(): print(a) # doctest: +SKIP
-man(Socrates)
-man(Bill)
--(Socrates = Bill)
->>> unp.prove()
-True
-
-
->>> p1 = read_expr(r'all x.(walk(x) -> (x = Socrates))')
->>> p2 = read_expr(r'Bill = William')
->>> p3 = read_expr(r'Bill = Billy')
->>> c = read_expr(r'-walk(William)')
->>> prover = Prover9Command(c, [p1,p2,p3])
->>> prover.prove()
-False
->>> unp = UniqueNamesProver(prover)
->>> for a in unp.assumptions(): print(a) # doctest: +SKIP
-all x.(walk(x) -> (x = Socrates))
-(Bill = William)
-(Bill = Billy)
--(William = Socrates)
--(Billy = Socrates)
--(Socrates = Bill)
->>> unp.prove()
-True
-
-
+explicitly proven that they do. Therefore, if the domain contains “A” and “B”, +then add the assumption “-(A = B)” if it is not the case that +“<assumptions> |- (A = B)”.

+
>>> p1 = read_expr(r'man(Socrates)')
+>>> p2 = read_expr(r'man(Bill)')
+>>> c = read_expr(r'exists x.exists y.-(x = y)')
+>>> prover = Prover9Command(c, [p1,p2])
+>>> prover.prove()
+False
+>>> unp = UniqueNamesProver(prover)
+>>> for a in unp.assumptions(): print(a) 
+man(Socrates)
+man(Bill)
+-(Socrates = Bill)
+>>> unp.prove()
+True
+
+
+
>>> p1 = read_expr(r'all x.(walk(x) -> (x = Socrates))')
+>>> p2 = read_expr(r'Bill = William')
+>>> p3 = read_expr(r'Bill = Billy')
+>>> c = read_expr(r'-walk(William)')
+>>> prover = Prover9Command(c, [p1,p2,p3])
+>>> prover.prove()
+False
+>>> unp = UniqueNamesProver(prover)
+>>> for a in unp.assumptions(): print(a) 
+all x.(walk(x) -> (x = Socrates))
+(Bill = William)
+(Bill = Billy)
+-(William = Socrates)
+-(Billy = Socrates)
+-(Socrates = Bill)
+>>> unp.prove()
+True
+
-
-

Closed World Assumption

+
+
+

Closed World Assumption

The only entities that have certain properties are those that is it stated -have the properties. We accomplish this assumption by "completing" predicates.

-

If the assumptions contain "P(A)", then "all x.(P(x) -> (x=A))" is the completion -of "P". If the assumptions contain "all x.(ostrich(x) -> bird(x))", then -"all x.(bird(x) -> ostrich(x))" is the completion of "bird". If the -assumptions don't contain anything that are "P", then "all x.-P(x)" is the -completion of "P".

-
-
->>> p1 = read_expr(r'walk(Socrates)')
->>> p2 = read_expr(r'-(Socrates = Bill)')
->>> c = read_expr(r'-walk(Bill)')
->>> prover = Prover9Command(c, [p1,p2])
->>> prover.prove()
-False
->>> cwp = ClosedWorldProver(prover)
->>> for a in cwp.assumptions(): print(a) # doctest: +SKIP
-walk(Socrates)
--(Socrates = Bill)
-all z1.(walk(z1) -> (z1 = Socrates))
->>> cwp.prove()
-True
-
-
->>> p1 = read_expr(r'see(Socrates, John)')
->>> p2 = read_expr(r'see(John, Mary)')
->>> p3 = read_expr(r'-(Socrates = John)')
->>> p4 = read_expr(r'-(John = Mary)')
->>> c = read_expr(r'-see(Socrates, Mary)')
->>> prover = Prover9Command(c, [p1,p2,p3,p4])
->>> prover.prove()
-False
->>> cwp = ClosedWorldProver(prover)
->>> for a in cwp.assumptions(): print(a) # doctest: +SKIP
-see(Socrates,John)
-see(John,Mary)
--(Socrates = John)
--(John = Mary)
-all z3 z4.(see(z3,z4) -> (((z3 = Socrates) & (z4 = John)) | ((z3 = John) & (z4 = Mary))))
->>> cwp.prove()
-True
-
-
->>> p1 = read_expr(r'all x.(ostrich(x) -> bird(x))')
->>> p2 = read_expr(r'bird(Tweety)')
->>> p3 = read_expr(r'-ostrich(Sam)')
->>> p4 = read_expr(r'Sam != Tweety')
->>> c = read_expr(r'-bird(Sam)')
->>> prover = Prover9Command(c, [p1,p2,p3,p4])
->>> prover.prove()
-False
->>> cwp = ClosedWorldProver(prover)
->>> for a in cwp.assumptions(): print(a) # doctest: +SKIP
-all x.(ostrich(x) -> bird(x))
-bird(Tweety)
--ostrich(Sam)
--(Sam = Tweety)
-all z7.-ostrich(z7)
-all z8.(bird(z8) -> ((z8 = Tweety) | ostrich(z8)))
->>> print(cwp.prove())
-True
-
-
+have the properties. We accomplish this assumption by “completing” predicates.

+

If the assumptions contain “P(A)”, then “all x.(P(x) -> (x=A))” is the completion +of “P”. If the assumptions contain “all x.(ostrich(x) -> bird(x))”, then +“all x.(bird(x) -> ostrich(x))” is the completion of “bird”. If the +assumptions don’t contain anything that are “P”, then “all x.-P(x)” is the +completion of “P”.

+
>>> p1 = read_expr(r'walk(Socrates)')
+>>> p2 = read_expr(r'-(Socrates = Bill)')
+>>> c = read_expr(r'-walk(Bill)')
+>>> prover = Prover9Command(c, [p1,p2])
+>>> prover.prove()
+False
+>>> cwp = ClosedWorldProver(prover)
+>>> for a in cwp.assumptions(): print(a) 
+walk(Socrates)
+-(Socrates = Bill)
+all z1.(walk(z1) -> (z1 = Socrates))
+>>> cwp.prove()
+True
+
-
-

Multi-Decorator Example

+
>>> p1 = read_expr(r'see(Socrates, John)')
+>>> p2 = read_expr(r'see(John, Mary)')
+>>> p3 = read_expr(r'-(Socrates = John)')
+>>> p4 = read_expr(r'-(John = Mary)')
+>>> c = read_expr(r'-see(Socrates, Mary)')
+>>> prover = Prover9Command(c, [p1,p2,p3,p4])
+>>> prover.prove()
+False
+>>> cwp = ClosedWorldProver(prover)
+>>> for a in cwp.assumptions(): print(a) 
+see(Socrates,John)
+see(John,Mary)
+-(Socrates = John)
+-(John = Mary)
+all z3 z4.(see(z3,z4) -> (((z3 = Socrates) & (z4 = John)) | ((z3 = John) & (z4 = Mary))))
+>>> cwp.prove()
+True
+
+
+
>>> p1 = read_expr(r'all x.(ostrich(x) -> bird(x))')
+>>> p2 = read_expr(r'bird(Tweety)')
+>>> p3 = read_expr(r'-ostrich(Sam)')
+>>> p4 = read_expr(r'Sam != Tweety')
+>>> c = read_expr(r'-bird(Sam)')
+>>> prover = Prover9Command(c, [p1,p2,p3,p4])
+>>> prover.prove()
+False
+>>> cwp = ClosedWorldProver(prover)
+>>> for a in cwp.assumptions(): print(a) 
+all x.(ostrich(x) -> bird(x))
+bird(Tweety)
+-ostrich(Sam)
+-(Sam = Tweety)
+all z7.-ostrich(z7)
+all z8.(bird(z8) -> ((z8 = Tweety) | ostrich(z8)))
+>>> print(cwp.prove())
+True
+
+
+
+
+

Multi-Decorator Example

Decorators can be nested to utilize multiple assumptions.

-
-
->>> p1 = read_expr(r'see(Socrates, John)')
->>> p2 = read_expr(r'see(John, Mary)')
->>> c = read_expr(r'-see(Socrates, Mary)')
->>> prover = Prover9Command(c, [p1,p2])
->>> print(prover.prove())
-False
->>> cmd = ClosedDomainProver(UniqueNamesProver(ClosedWorldProver(prover)))
->>> print(cmd.prove())
-True
-
-
+
>>> p1 = read_expr(r'see(Socrates, John)')
+>>> p2 = read_expr(r'see(John, Mary)')
+>>> c = read_expr(r'-see(Socrates, Mary)')
+>>> prover = Prover9Command(c, [p1,p2])
+>>> print(prover.prove())
+False
+>>> cmd = ClosedDomainProver(UniqueNamesProver(ClosedWorldProver(prover)))
+>>> print(cmd.prove())
+True
+
+
+
+
+

Default Reasoning

+
>>> logic._counter._value = 0
+>>> premises = []
+
+
+
+
define the taxonomy
>>> premises.append(read_expr(r'all x.(elephant(x)        -> animal(x))'))
+>>> premises.append(read_expr(r'all x.(bird(x)            -> animal(x))'))
+>>> premises.append(read_expr(r'all x.(dove(x)            -> bird(x))'))
+>>> premises.append(read_expr(r'all x.(ostrich(x)         -> bird(x))'))
+>>> premises.append(read_expr(r'all x.(flying_ostrich(x)  -> ostrich(x))'))
+
-
-

Default Reasoning

-
-
->>> logic._counter._value = 0
->>> premises = []
-
-
-
-
define the taxonomy
-
->>> premises.append(read_expr(r'all x.(elephant(x)        -> animal(x))'))
->>> premises.append(read_expr(r'all x.(bird(x)            -> animal(x))'))
->>> premises.append(read_expr(r'all x.(dove(x)            -> bird(x))'))
->>> premises.append(read_expr(r'all x.(ostrich(x)         -> bird(x))'))
->>> premises.append(read_expr(r'all x.(flying_ostrich(x)  -> ostrich(x))'))
-
-
default the properties using abnormalities
-
->>> premises.append(read_expr(r'all x.((animal(x)  & -Ab1(x)) -> -fly(x))')) #normal animals don't fly
->>> premises.append(read_expr(r'all x.((bird(x)    & -Ab2(x)) -> fly(x))'))  #normal birds fly
->>> premises.append(read_expr(r'all x.((ostrich(x) & -Ab3(x)) -> -fly(x))')) #normal ostriches don't fly
-
+
default the properties using abnormalities
>>> premises.append(read_expr(r'all x.((animal(x)  & -Ab1(x)) -> -fly(x))')) #normal animals don't fly
+>>> premises.append(read_expr(r'all x.((bird(x)    & -Ab2(x)) -> fly(x))'))  #normal birds fly
+>>> premises.append(read_expr(r'all x.((ostrich(x) & -Ab3(x)) -> -fly(x))')) #normal ostriches don't fly
+
+
-
specify abnormal entities
-
->>> premises.append(read_expr(r'all x.(bird(x)           -> Ab1(x))')) #flight
->>> premises.append(read_expr(r'all x.(ostrich(x)        -> Ab2(x))')) #non-flying bird
->>> premises.append(read_expr(r'all x.(flying_ostrich(x) -> Ab3(x))')) #flying ostrich
-
+
specify abnormal entities
>>> premises.append(read_expr(r'all x.(bird(x)           -> Ab1(x))')) #flight
+>>> premises.append(read_expr(r'all x.(ostrich(x)        -> Ab2(x))')) #non-flying bird
+>>> premises.append(read_expr(r'all x.(flying_ostrich(x) -> Ab3(x))')) #flying ostrich
+
+
-
define entities
-
->>> premises.append(read_expr(r'elephant(el)'))
->>> premises.append(read_expr(r'dove(do)'))
->>> premises.append(read_expr(r'ostrich(os)'))
-
+
define entities
>>> premises.append(read_expr(r'elephant(el)'))
+>>> premises.append(read_expr(r'dove(do)'))
+>>> premises.append(read_expr(r'ostrich(os)'))
+
+
-
print the augmented assumptions list
-
->>> prover = Prover9Command(None, premises)
->>> command = UniqueNamesProver(ClosedWorldProver(prover))
->>> for a in command.assumptions(): print(a) # doctest: +SKIP
-all x.(elephant(x) -> animal(x))
-all x.(bird(x) -> animal(x))
-all x.(dove(x) -> bird(x))
-all x.(ostrich(x) -> bird(x))
-all x.(flying_ostrich(x) -> ostrich(x))
-all x.((animal(x) & -Ab1(x)) -> -fly(x))
-all x.((bird(x) & -Ab2(x)) -> fly(x))
-all x.((ostrich(x) & -Ab3(x)) -> -fly(x))
-all x.(bird(x) -> Ab1(x))
-all x.(ostrich(x) -> Ab2(x))
-all x.(flying_ostrich(x) -> Ab3(x))
-elephant(el)
-dove(do)
-ostrich(os)
-all z1.(animal(z1) -> (elephant(z1) | bird(z1)))
-all z2.(Ab1(z2) -> bird(z2))
-all z3.(bird(z3) -> (dove(z3) | ostrich(z3)))
-all z4.(dove(z4) -> (z4 = do))
-all z5.(Ab2(z5) -> ostrich(z5))
-all z6.(Ab3(z6) -> flying_ostrich(z6))
-all z7.(ostrich(z7) -> ((z7 = os) | flying_ostrich(z7)))
-all z8.-flying_ostrich(z8)
-all z9.(elephant(z9) -> (z9 = el))
--(el = os)
--(el = do)
--(os = do)
-
-
->>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('-fly(el)'), premises))).prove()
-True
->>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('fly(do)'), premises))).prove()
-True
->>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('-fly(os)'), premises))).prove()
-True
-
+
print the augmented assumptions list
>>> prover = Prover9Command(None, premises)
+>>> command = UniqueNamesProver(ClosedWorldProver(prover))
+>>> for a in command.assumptions(): print(a) 
+all x.(elephant(x) -> animal(x))
+all x.(bird(x) -> animal(x))
+all x.(dove(x) -> bird(x))
+all x.(ostrich(x) -> bird(x))
+all x.(flying_ostrich(x) -> ostrich(x))
+all x.((animal(x) & -Ab1(x)) -> -fly(x))
+all x.((bird(x) & -Ab2(x)) -> fly(x))
+all x.((ostrich(x) & -Ab3(x)) -> -fly(x))
+all x.(bird(x) -> Ab1(x))
+all x.(ostrich(x) -> Ab2(x))
+all x.(flying_ostrich(x) -> Ab3(x))
+elephant(el)
+dove(do)
+ostrich(os)
+all z1.(animal(z1) -> (elephant(z1) | bird(z1)))
+all z2.(Ab1(z2) -> bird(z2))
+all z3.(bird(z3) -> (dove(z3) | ostrich(z3)))
+all z4.(dove(z4) -> (z4 = do))
+all z5.(Ab2(z5) -> ostrich(z5))
+all z6.(Ab3(z6) -> flying_ostrich(z6))
+all z7.(ostrich(z7) -> ((z7 = os) | flying_ostrich(z7)))
+all z8.-flying_ostrich(z8)
+all z9.(elephant(z9) -> (z9 = el))
+-(el = os)
+-(el = do)
+-(os = do)
+
+
+
>>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('-fly(el)'), premises))).prove()
+True
+>>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('fly(do)'), premises))).prove()
+True
+>>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('-fly(os)'), premises))).prove()
+True
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+
-
+ - + \ No newline at end of file diff --git a/howto/paice.html b/howto/paice.html index 506b539f6..d3436e761 100644 --- a/howto/paice.html +++ b/howto/paice.html @@ -1,383 +1,184 @@ - - - + - - -PAICE's evaluation statistics for stemming algorithms - + + + + + + + NLTK :: Sample usage for paice + + + + + + + + + + + + + + -
-

PAICE's evaluation statistics for stemming algorithms

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + + + + +
+ +
+
+ +
+

Sample usage for paice

+
+

PAICE’s evaluation statistics for stemming algorithms

Given a list of words with their real lemmas and stems according to stemming algorithm under evaluation, counts Understemming Index (UI), Overstemming Index (OI), Stemming Weight (SW) and Error-rate relative to truncation (ERRT).

-
-
->>> from nltk.metrics import Paice
-
-
-
-

Understemming and Overstemming values

-
-
->>> lemmas = {'kneel': ['kneel', 'knelt'],
-...           'range': ['range', 'ranged'],
-...           'ring': ['ring', 'rang', 'rung']}
->>> stems = {'kneel': ['kneel'],
-...          'knelt': ['knelt'],
-...          'rang': ['rang', 'range', 'ranged'],
-...          'ring': ['ring'],
-...          'rung': ['rung']}
->>> p = Paice(lemmas, stems)
->>> p.gumt, p.gdmt, p.gwmt, p.gdnt
-(4.0, 5.0, 2.0, 16.0)
-
-
->>> p.ui, p.oi, p.sw
-(0.8..., 0.125..., 0.15625...)
-
-
->>> p.errt
-1.0
-
-
->>> [('{0:.3f}'.format(a), '{0:.3f}'.format(b)) for a, b in p.coords]
-[('0.000', '1.000'), ('0.000', '0.375'), ('0.600', '0.125'), ('0.800', '0.125')]
-
-
+
>>> from nltk.metrics import Paice
+
+
+
+

Understemming and Overstemming values

+
>>> lemmas = {'kneel': ['kneel', 'knelt'],
+...           'range': ['range', 'ranged'],
+...           'ring': ['ring', 'rang', 'rung']}
+>>> stems = {'kneel': ['kneel'],
+...          'knelt': ['knelt'],
+...          'rang': ['rang', 'range', 'ranged'],
+...          'ring': ['ring'],
+...          'rung': ['rung']}
+>>> p = Paice(lemmas, stems)
+>>> p.gumt, p.gdmt, p.gwmt, p.gdnt
+(4.0, 5.0, 2.0, 16.0)
+
+
>>> p.ui, p.oi, p.sw
+(0.8..., 0.125..., 0.15625...)
+
+
>>> p.errt
+1.0
+
+
+
>>> [('{0:.3f}'.format(a), '{0:.3f}'.format(b)) for a, b in p.coords]
+[('0.000', '1.000'), ('0.000', '0.375'), ('0.600', '0.125'), ('0.800', '0.125')]
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/parse.html b/howto/parse.html index 051450d28..1f606655f 100644 --- a/howto/parse.html +++ b/howto/parse.html @@ -1,1324 +1,1058 @@ - - - + - - -Parsing - + + + + + + + NLTK :: Sample usage for parse + + + + + + + + + + + + + + -
-

Parsing

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-

Unit tests for the Context Free Grammar class

-
-
->>> from nltk import Nonterminal, nonterminals, Production, CFG
-
-
->>> nt1 = Nonterminal('NP')
->>> nt2 = Nonterminal('VP')
-
-
->>> nt1.symbol()
-'NP'
-
-
->>> nt1 == Nonterminal('NP')
-True
-
-
->>> nt1 == nt2
-False
-
-
->>> S, NP, VP, PP = nonterminals('S, NP, VP, PP')
->>> N, V, P, DT = nonterminals('N, V, P, DT')
-
-
->>> prod1 = Production(S, [NP, VP])
->>> prod2 = Production(NP, [DT, NP])
-
-
->>> prod1.lhs()
-S
-
-
->>> prod1.rhs()
-(NP, VP)
-
-
->>> prod1 == Production(S, [NP, VP])
-True
-
-
->>> prod1 == prod2
-False
-
-
->>> grammar = CFG.fromstring("""
-... S -> NP VP
-... PP -> P NP
-... NP -> 'the' N | N PP | 'the' N PP
-... VP -> V NP | V PP | V NP PP
-... N -> 'cat'
-... N -> 'dog'
-... N -> 'rug'
-... V -> 'chased'
-... V -> 'sat'
-... P -> 'in'
-... P -> 'on'
-... """)
-
-
-
-
-

Unit tests for the rd (Recursive Descent Parser) class

+ + + +
+ +
+
+ +
+

Sample usage for parse

+
+

Parsing

+
+

Unit tests for the Context Free Grammar class

+
>>> import pickle
+>>> import subprocess
+>>> import sys
+>>> from nltk import Nonterminal, nonterminals, Production, CFG
+
+
+
>>> nt1 = Nonterminal('NP')
+>>> nt2 = Nonterminal('VP')
+
+
+
>>> nt1.symbol()
+'NP'
+
+
+
>>> nt1 == Nonterminal('NP')
+True
+
+
+
>>> nt1 == nt2
+False
+
+
+
>>> S, NP, VP, PP = nonterminals('S, NP, VP, PP')
+>>> N, V, P, DT = nonterminals('N, V, P, DT')
+
+
+
>>> prod1 = Production(S, [NP, VP])
+>>> prod2 = Production(NP, [DT, NP])
+
+
+
>>> prod1.lhs()
+S
+
+
+
>>> prod1.rhs()
+(NP, VP)
+
+
+
>>> prod1 == Production(S, [NP, VP])
+True
+
+
+
>>> prod1 == prod2
+False
+
+
+
>>> grammar = CFG.fromstring("""
+... S -> NP VP
+... PP -> P NP
+... NP -> 'the' N | N PP | 'the' N PP
+... VP -> V NP | V PP | V NP PP
+... N -> 'cat'
+... N -> 'dog'
+... N -> 'rug'
+... V -> 'chased'
+... V -> 'sat'
+... P -> 'in'
+... P -> 'on'
+... """)
+
+
+
>>> cmd = """import pickle
+... from nltk import Production
+... p = Production('S', ['NP', 'VP'])
+... print(pickle.dumps(p))
+... """
+
+
+
>>> # Start a subprocess to simulate pickling in another process
+>>> proc = subprocess.run([sys.executable, '-c', cmd], stdout=subprocess.PIPE)
+>>> p1 = pickle.loads(eval(proc.stdout))
+>>> p2 = Production('S', ['NP', 'VP'])
+>>> print(hash(p1) == hash(p2))
+True
+
+
+
+
+

Unit tests for the rd (Recursive Descent Parser) class

Create and run a recursive descent parser over both a syntactically ambiguous and unambiguous sentence.

-
-
->>> from nltk.parse import RecursiveDescentParser
->>> rd = RecursiveDescentParser(grammar)
-
-
->>> sentence1 = 'the cat chased the dog'.split()
->>> sentence2 = 'the cat chased the dog on the rug'.split()
-
-
->>> for t in rd.parse(sentence1):
-...     print(t)
-(S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
-
-
->>> for t in rd.parse(sentence2):
-...     print(t)
-(S
-  (NP the (N cat))
-  (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
-(S
-  (NP the (N cat))
-  (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))
-
-
-
-
(dolist (expr doctest-font-lock-keywords)
-

(add-to-list 'font-lock-keywords expr))

-

font-lock-keywords

+
>>> from nltk.parse import RecursiveDescentParser
+>>> rd = RecursiveDescentParser(grammar)
+
+
+
>>> sentence1 = 'the cat chased the dog'.split()
+>>> sentence2 = 'the cat chased the dog on the rug'.split()
+
+
+
>>> for t in rd.parse(sentence1):
+...     print(t)
+(S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
+
+
+
>>> for t in rd.parse(sentence2):
+...     print(t)
+(S
+  (NP the (N cat))
+  (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
+(S
+  (NP the (N cat))
+  (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))
+
+
+
+
(dolist (expr doctest-font-lock-keywords)

(add-to-list ‘font-lock-keywords expr))

+

font-lock-keywords

+
+
(add-to-list ‘font-lock-keywords

(car doctest-font-lock-keywords))

-
(add-to-list 'font-lock-keywords
-
(car doctest-font-lock-keywords))
-
-
-

Unit tests for the sr (Shift Reduce Parser) class

+ +
+

Unit tests for the sr (Shift Reduce Parser) class

Create and run a shift reduce parser over both a syntactically ambiguous and unambiguous sentence. Note that unlike the recursive descent parser, one and only one parse is ever returned.

-
-
->>> from nltk.parse import ShiftReduceParser
->>> sr = ShiftReduceParser(grammar)
-
-
->>> sentence1 = 'the cat chased the dog'.split()
->>> sentence2 = 'the cat chased the dog on the rug'.split()
-
-
->>> for t in sr.parse(sentence1):
-...     print(t)
-(S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
-
-
+
>>> from nltk.parse import ShiftReduceParser
+>>> sr = ShiftReduceParser(grammar)
+
+
+
>>> sentence1 = 'the cat chased the dog'.split()
+>>> sentence2 = 'the cat chased the dog on the rug'.split()
+
+
+
>>> for t in sr.parse(sentence1):
+...     print(t)
+(S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
+
+

The shift reduce parser uses heuristics to decide what to do when there are multiple possible shift or reduce operations available - for the supplied grammar clearly the wrong operation is selected.

-
-
->>> for t in sr.parse(sentence2):
-...     print(t)
-
-
-
-
-

Unit tests for the Chart Parser class

+
>>> for t in sr.parse(sentence2):
+...     print(t)
+
+
+ +
+

Unit tests for the Chart Parser class

We use the demo() function for testing. We must turn off showing of times.

-
-
->>> import nltk
-
-
+
>>> import nltk
+
+

First we test tracing with a short sentence

-
-
->>> nltk.parse.chart.demo(2, print_times=False, trace=1,
-...                       sent='I saw a dog', numparses=1)
-* Sentence:
-I saw a dog
-['I', 'saw', 'a', 'dog']
-<BLANKLINE>
-* Strategy: Bottom-up
-<BLANKLINE>
-|.    I    .   saw   .    a    .   dog   .|
-|[---------]         .         .         .| [0:1] 'I'
-|.         [---------]         .         .| [1:2] 'saw'
-|.         .         [---------]         .| [2:3] 'a'
-|.         .         .         [---------]| [3:4] 'dog'
-|>         .         .         .         .| [0:0] NP -> * 'I'
-|[---------]         .         .         .| [0:1] NP -> 'I' *
-|>         .         .         .         .| [0:0] S  -> * NP VP
-|>         .         .         .         .| [0:0] NP -> * NP PP
-|[--------->         .         .         .| [0:1] S  -> NP * VP
-|[--------->         .         .         .| [0:1] NP -> NP * PP
-|.         >         .         .         .| [1:1] Verb -> * 'saw'
-|.         [---------]         .         .| [1:2] Verb -> 'saw' *
-|.         >         .         .         .| [1:1] VP -> * Verb NP
-|.         >         .         .         .| [1:1] VP -> * Verb
-|.         [--------->         .         .| [1:2] VP -> Verb * NP
-|.         [---------]         .         .| [1:2] VP -> Verb *
-|.         >         .         .         .| [1:1] VP -> * VP PP
-|[-------------------]         .         .| [0:2] S  -> NP VP *
-|.         [--------->         .         .| [1:2] VP -> VP * PP
-|.         .         >         .         .| [2:2] Det -> * 'a'
-|.         .         [---------]         .| [2:3] Det -> 'a' *
-|.         .         >         .         .| [2:2] NP -> * Det Noun
-|.         .         [--------->         .| [2:3] NP -> Det * Noun
-|.         .         .         >         .| [3:3] Noun -> * 'dog'
-|.         .         .         [---------]| [3:4] Noun -> 'dog' *
-|.         .         [-------------------]| [2:4] NP -> Det Noun *
-|.         .         >         .         .| [2:2] S  -> * NP VP
-|.         .         >         .         .| [2:2] NP -> * NP PP
-|.         [-----------------------------]| [1:4] VP -> Verb NP *
-|.         .         [------------------->| [2:4] S  -> NP * VP
-|.         .         [------------------->| [2:4] NP -> NP * PP
-|[=======================================]| [0:4] S  -> NP VP *
-|.         [----------------------------->| [1:4] VP -> VP * PP
-Nr edges in chart: 33
-(S (NP I) (VP (Verb saw) (NP (Det a) (Noun dog))))
-<BLANKLINE>
-
-
+
>>> nltk.parse.chart.demo(2, print_times=False, trace=1,
+...                       sent='I saw a dog', numparses=1)
+* Sentence:
+I saw a dog
+['I', 'saw', 'a', 'dog']
+
+* Strategy: Bottom-up
+
+|.    I    .   saw   .    a    .   dog   .|
+|[---------]         .         .         .| [0:1] 'I'
+|.         [---------]         .         .| [1:2] 'saw'
+|.         .         [---------]         .| [2:3] 'a'
+|.         .         .         [---------]| [3:4] 'dog'
+|>         .         .         .         .| [0:0] NP -> * 'I'
+|[---------]         .         .         .| [0:1] NP -> 'I' *
+|>         .         .         .         .| [0:0] S  -> * NP VP
+|>         .         .         .         .| [0:0] NP -> * NP PP
+|[--------->         .         .         .| [0:1] S  -> NP * VP
+|[--------->         .         .         .| [0:1] NP -> NP * PP
+|.         >         .         .         .| [1:1] Verb -> * 'saw'
+|.         [---------]         .         .| [1:2] Verb -> 'saw' *
+|.         >         .         .         .| [1:1] VP -> * Verb NP
+|.         >         .         .         .| [1:1] VP -> * Verb
+|.         [--------->         .         .| [1:2] VP -> Verb * NP
+|.         [---------]         .         .| [1:2] VP -> Verb *
+|.         >         .         .         .| [1:1] VP -> * VP PP
+|[-------------------]         .         .| [0:2] S  -> NP VP *
+|.         [--------->         .         .| [1:2] VP -> VP * PP
+|.         .         >         .         .| [2:2] Det -> * 'a'
+|.         .         [---------]         .| [2:3] Det -> 'a' *
+|.         .         >         .         .| [2:2] NP -> * Det Noun
+|.         .         [--------->         .| [2:3] NP -> Det * Noun
+|.         .         .         >         .| [3:3] Noun -> * 'dog'
+|.         .         .         [---------]| [3:4] Noun -> 'dog' *
+|.         .         [-------------------]| [2:4] NP -> Det Noun *
+|.         .         >         .         .| [2:2] S  -> * NP VP
+|.         .         >         .         .| [2:2] NP -> * NP PP
+|.         [-----------------------------]| [1:4] VP -> Verb NP *
+|.         .         [------------------->| [2:4] S  -> NP * VP
+|.         .         [------------------->| [2:4] NP -> NP * PP
+|[=======================================]| [0:4] S  -> NP VP *
+|.         [----------------------------->| [1:4] VP -> VP * PP
+Nr edges in chart: 33
+(S (NP I) (VP (Verb saw) (NP (Det a) (Noun dog))))
+
+

Then we test the different parsing Strategies. Note that the number of edges differ between the strategies.

Top-down

-
-
->>> nltk.parse.chart.demo(1, print_times=False, trace=0,
-...                       sent='I saw John with a dog', numparses=2)
-* Sentence:
-I saw John with a dog
-['I', 'saw', 'John', 'with', 'a', 'dog']
-<BLANKLINE>
-* Strategy: Top-down
-<BLANKLINE>
-Nr edges in chart: 48
-(S
-  (NP I)
-  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
-(S
-  (NP I)
-  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
-<BLANKLINE>
-
-
+
>>> nltk.parse.chart.demo(1, print_times=False, trace=0,
+...                       sent='I saw John with a dog', numparses=2)
+* Sentence:
+I saw John with a dog
+['I', 'saw', 'John', 'with', 'a', 'dog']
+
+* Strategy: Top-down
+
+Nr edges in chart: 48
+(S
+  (NP I)
+  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
+(S
+  (NP I)
+  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
+
+

Bottom-up

-
-
->>> nltk.parse.chart.demo(2, print_times=False, trace=0,
-...                       sent='I saw John with a dog', numparses=2)
-* Sentence:
-I saw John with a dog
-['I', 'saw', 'John', 'with', 'a', 'dog']
-<BLANKLINE>
-* Strategy: Bottom-up
-<BLANKLINE>
-Nr edges in chart: 53
-(S
-  (NP I)
-  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
-(S
-  (NP I)
-  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
-<BLANKLINE>
-
-
+
>>> nltk.parse.chart.demo(2, print_times=False, trace=0,
+...                       sent='I saw John with a dog', numparses=2)
+* Sentence:
+I saw John with a dog
+['I', 'saw', 'John', 'with', 'a', 'dog']
+
+* Strategy: Bottom-up
+
+Nr edges in chart: 53
+(S
+  (NP I)
+  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
+(S
+  (NP I)
+  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
+
+

Bottom-up Left-Corner

-
-
->>> nltk.parse.chart.demo(3, print_times=False, trace=0,
-...                       sent='I saw John with a dog', numparses=2)
-* Sentence:
-I saw John with a dog
-['I', 'saw', 'John', 'with', 'a', 'dog']
-<BLANKLINE>
-* Strategy: Bottom-up left-corner
-<BLANKLINE>
-Nr edges in chart: 36
-(S
-  (NP I)
-  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
-(S
-  (NP I)
-  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
-<BLANKLINE>
-
-
+
>>> nltk.parse.chart.demo(3, print_times=False, trace=0,
+...                       sent='I saw John with a dog', numparses=2)
+* Sentence:
+I saw John with a dog
+['I', 'saw', 'John', 'with', 'a', 'dog']
+
+* Strategy: Bottom-up left-corner
+
+Nr edges in chart: 36
+(S
+  (NP I)
+  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
+(S
+  (NP I)
+  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
+
+

Left-Corner with Bottom-Up Filter

-
-
->>> nltk.parse.chart.demo(4, print_times=False, trace=0,
-...                       sent='I saw John with a dog', numparses=2)
-* Sentence:
-I saw John with a dog
-['I', 'saw', 'John', 'with', 'a', 'dog']
-<BLANKLINE>
-* Strategy: Filtered left-corner
-<BLANKLINE>
-Nr edges in chart: 28
-(S
-  (NP I)
-  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
-(S
-  (NP I)
-  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
-<BLANKLINE>
-
-
+
>>> nltk.parse.chart.demo(4, print_times=False, trace=0,
+...                       sent='I saw John with a dog', numparses=2)
+* Sentence:
+I saw John with a dog
+['I', 'saw', 'John', 'with', 'a', 'dog']
+
+* Strategy: Filtered left-corner
+
+Nr edges in chart: 28
+(S
+  (NP I)
+  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
+(S
+  (NP I)
+  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
+
+

The stepping chart parser

-
-
->>> nltk.parse.chart.demo(5, print_times=False, trace=1,
-...                       sent='I saw John with a dog', numparses=2)
-* Sentence:
-I saw John with a dog
-['I', 'saw', 'John', 'with', 'a', 'dog']
-<BLANKLINE>
-* Strategy: Stepping (top-down vs bottom-up)
-<BLANKLINE>
-*** SWITCH TO TOP DOWN
-|[------]      .      .      .      .      .| [0:1] 'I'
-|.      [------]      .      .      .      .| [1:2] 'saw'
-|.      .      [------]      .      .      .| [2:3] 'John'
-|.      .      .      [------]      .      .| [3:4] 'with'
-|.      .      .      .      [------]      .| [4:5] 'a'
-|.      .      .      .      .      [------]| [5:6] 'dog'
-|>      .      .      .      .      .      .| [0:0] S  -> * NP VP
-|>      .      .      .      .      .      .| [0:0] NP -> * NP PP
-|>      .      .      .      .      .      .| [0:0] NP -> * Det Noun
-|>      .      .      .      .      .      .| [0:0] NP -> * 'I'
-|[------]      .      .      .      .      .| [0:1] NP -> 'I' *
-|[------>      .      .      .      .      .| [0:1] S  -> NP * VP
-|[------>      .      .      .      .      .| [0:1] NP -> NP * PP
-|.      >      .      .      .      .      .| [1:1] VP -> * VP PP
-|.      >      .      .      .      .      .| [1:1] VP -> * Verb NP
-|.      >      .      .      .      .      .| [1:1] VP -> * Verb
-|.      >      .      .      .      .      .| [1:1] Verb -> * 'saw'
-|.      [------]      .      .      .      .| [1:2] Verb -> 'saw' *
-|.      [------>      .      .      .      .| [1:2] VP -> Verb * NP
-|.      [------]      .      .      .      .| [1:2] VP -> Verb *
-|[-------------]      .      .      .      .| [0:2] S  -> NP VP *
-|.      [------>      .      .      .      .| [1:2] VP -> VP * PP
-*** SWITCH TO BOTTOM UP
-|.      .      >      .      .      .      .| [2:2] NP -> * 'John'
-|.      .      .      >      .      .      .| [3:3] PP -> * 'with' NP
-|.      .      .      >      .      .      .| [3:3] Prep -> * 'with'
-|.      .      .      .      >      .      .| [4:4] Det -> * 'a'
-|.      .      .      .      .      >      .| [5:5] Noun -> * 'dog'
-|.      .      [------]      .      .      .| [2:3] NP -> 'John' *
-|.      .      .      [------>      .      .| [3:4] PP -> 'with' * NP
-|.      .      .      [------]      .      .| [3:4] Prep -> 'with' *
-|.      .      .      .      [------]      .| [4:5] Det -> 'a' *
-|.      .      .      .      .      [------]| [5:6] Noun -> 'dog' *
-|.      [-------------]      .      .      .| [1:3] VP -> Verb NP *
-|[--------------------]      .      .      .| [0:3] S  -> NP VP *
-|.      [------------->      .      .      .| [1:3] VP -> VP * PP
-|.      .      >      .      .      .      .| [2:2] S  -> * NP VP
-|.      .      >      .      .      .      .| [2:2] NP -> * NP PP
-|.      .      .      .      >      .      .| [4:4] NP -> * Det Noun
-|.      .      [------>      .      .      .| [2:3] S  -> NP * VP
-|.      .      [------>      .      .      .| [2:3] NP -> NP * PP
-|.      .      .      .      [------>      .| [4:5] NP -> Det * Noun
-|.      .      .      .      [-------------]| [4:6] NP -> Det Noun *
-|.      .      .      [--------------------]| [3:6] PP -> 'with' NP *
-|.      [----------------------------------]| [1:6] VP -> VP PP *
-*** SWITCH TO TOP DOWN
-|.      .      >      .      .      .      .| [2:2] NP -> * Det Noun
-|.      .      .      .      >      .      .| [4:4] NP -> * NP PP
-|.      .      .      >      .      .      .| [3:3] VP -> * VP PP
-|.      .      .      >      .      .      .| [3:3] VP -> * Verb NP
-|.      .      .      >      .      .      .| [3:3] VP -> * Verb
-|[=========================================]| [0:6] S  -> NP VP *
-|.      [---------------------------------->| [1:6] VP -> VP * PP
-|.      .      [---------------------------]| [2:6] NP -> NP PP *
-|.      .      .      .      [------------->| [4:6] NP -> NP * PP
-|.      [----------------------------------]| [1:6] VP -> Verb NP *
-|.      .      [--------------------------->| [2:6] S  -> NP * VP
-|.      .      [--------------------------->| [2:6] NP -> NP * PP
-|[=========================================]| [0:6] S  -> NP VP *
-|.      [---------------------------------->| [1:6] VP -> VP * PP
-|.      .      .      .      .      .      >| [6:6] VP -> * VP PP
-|.      .      .      .      .      .      >| [6:6] VP -> * Verb NP
-|.      .      .      .      .      .      >| [6:6] VP -> * Verb
-*** SWITCH TO BOTTOM UP
-|.      .      .      .      >      .      .| [4:4] S  -> * NP VP
-|.      .      .      .      [------------->| [4:6] S  -> NP * VP
-*** SWITCH TO TOP DOWN
-*** SWITCH TO BOTTOM UP
-*** SWITCH TO TOP DOWN
-*** SWITCH TO BOTTOM UP
-*** SWITCH TO TOP DOWN
-*** SWITCH TO BOTTOM UP
-Nr edges in chart: 61
-(S
-  (NP I)
-  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
-(S
-  (NP I)
-  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
-<BLANKLINE>
-
-
-
-
-

Unit tests for the Incremental Chart Parser class

+
>>> nltk.parse.chart.demo(5, print_times=False, trace=1,
+...                       sent='I saw John with a dog', numparses=2)
+* Sentence:
+I saw John with a dog
+['I', 'saw', 'John', 'with', 'a', 'dog']
+
+* Strategy: Stepping (top-down vs bottom-up)
+
+*** SWITCH TO TOP DOWN
+|[------]      .      .      .      .      .| [0:1] 'I'
+|.      [------]      .      .      .      .| [1:2] 'saw'
+|.      .      [------]      .      .      .| [2:3] 'John'
+|.      .      .      [------]      .      .| [3:4] 'with'
+|.      .      .      .      [------]      .| [4:5] 'a'
+|.      .      .      .      .      [------]| [5:6] 'dog'
+|>      .      .      .      .      .      .| [0:0] S  -> * NP VP
+|>      .      .      .      .      .      .| [0:0] NP -> * NP PP
+|>      .      .      .      .      .      .| [0:0] NP -> * Det Noun
+|>      .      .      .      .      .      .| [0:0] NP -> * 'I'
+|[------]      .      .      .      .      .| [0:1] NP -> 'I' *
+|[------>      .      .      .      .      .| [0:1] S  -> NP * VP
+|[------>      .      .      .      .      .| [0:1] NP -> NP * PP
+|.      >      .      .      .      .      .| [1:1] VP -> * VP PP
+|.      >      .      .      .      .      .| [1:1] VP -> * Verb NP
+|.      >      .      .      .      .      .| [1:1] VP -> * Verb
+|.      >      .      .      .      .      .| [1:1] Verb -> * 'saw'
+|.      [------]      .      .      .      .| [1:2] Verb -> 'saw' *
+|.      [------>      .      .      .      .| [1:2] VP -> Verb * NP
+|.      [------]      .      .      .      .| [1:2] VP -> Verb *
+|[-------------]      .      .      .      .| [0:2] S  -> NP VP *
+|.      [------>      .      .      .      .| [1:2] VP -> VP * PP
+*** SWITCH TO BOTTOM UP
+|.      .      >      .      .      .      .| [2:2] NP -> * 'John'
+|.      .      .      >      .      .      .| [3:3] PP -> * 'with' NP
+|.      .      .      >      .      .      .| [3:3] Prep -> * 'with'
+|.      .      .      .      >      .      .| [4:4] Det -> * 'a'
+|.      .      .      .      .      >      .| [5:5] Noun -> * 'dog'
+|.      .      [------]      .      .      .| [2:3] NP -> 'John' *
+|.      .      .      [------>      .      .| [3:4] PP -> 'with' * NP
+|.      .      .      [------]      .      .| [3:4] Prep -> 'with' *
+|.      .      .      .      [------]      .| [4:5] Det -> 'a' *
+|.      .      .      .      .      [------]| [5:6] Noun -> 'dog' *
+|.      [-------------]      .      .      .| [1:3] VP -> Verb NP *
+|[--------------------]      .      .      .| [0:3] S  -> NP VP *
+|.      [------------->      .      .      .| [1:3] VP -> VP * PP
+|.      .      >      .      .      .      .| [2:2] S  -> * NP VP
+|.      .      >      .      .      .      .| [2:2] NP -> * NP PP
+|.      .      .      .      >      .      .| [4:4] NP -> * Det Noun
+|.      .      [------>      .      .      .| [2:3] S  -> NP * VP
+|.      .      [------>      .      .      .| [2:3] NP -> NP * PP
+|.      .      .      .      [------>      .| [4:5] NP -> Det * Noun
+|.      .      .      .      [-------------]| [4:6] NP -> Det Noun *
+|.      .      .      [--------------------]| [3:6] PP -> 'with' NP *
+|.      [----------------------------------]| [1:6] VP -> VP PP *
+*** SWITCH TO TOP DOWN
+|.      .      >      .      .      .      .| [2:2] NP -> * Det Noun
+|.      .      .      .      >      .      .| [4:4] NP -> * NP PP
+|.      .      .      >      .      .      .| [3:3] VP -> * VP PP
+|.      .      .      >      .      .      .| [3:3] VP -> * Verb NP
+|.      .      .      >      .      .      .| [3:3] VP -> * Verb
+|[=========================================]| [0:6] S  -> NP VP *
+|.      [---------------------------------->| [1:6] VP -> VP * PP
+|.      .      [---------------------------]| [2:6] NP -> NP PP *
+|.      .      .      .      [------------->| [4:6] NP -> NP * PP
+|.      [----------------------------------]| [1:6] VP -> Verb NP *
+|.      .      [--------------------------->| [2:6] S  -> NP * VP
+|.      .      [--------------------------->| [2:6] NP -> NP * PP
+|[=========================================]| [0:6] S  -> NP VP *
+|.      [---------------------------------->| [1:6] VP -> VP * PP
+|.      .      .      .      .      .      >| [6:6] VP -> * VP PP
+|.      .      .      .      .      .      >| [6:6] VP -> * Verb NP
+|.      .      .      .      .      .      >| [6:6] VP -> * Verb
+*** SWITCH TO BOTTOM UP
+|.      .      .      .      >      .      .| [4:4] S  -> * NP VP
+|.      .      .      .      [------------->| [4:6] S  -> NP * VP
+*** SWITCH TO TOP DOWN
+*** SWITCH TO BOTTOM UP
+*** SWITCH TO TOP DOWN
+*** SWITCH TO BOTTOM UP
+*** SWITCH TO TOP DOWN
+*** SWITCH TO BOTTOM UP
+Nr edges in chart: 61
+(S
+  (NP I)
+  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
+(S
+  (NP I)
+  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
+
+
+ +
+

Unit tests for the Incremental Chart Parser class

The incremental chart parsers are defined in earleychart.py. We use the demo() function for testing. We must turn off showing of times.

-
-
->>> import nltk
-
-
+
>>> import nltk
+
+

Earley Chart Parser

-
-
->>> nltk.parse.earleychart.demo(print_times=False, trace=1,
-...                             sent='I saw John with a dog', numparses=2)
-* Sentence:
-I saw John with a dog
-['I', 'saw', 'John', 'with', 'a', 'dog']
-<BLANKLINE>
-|.  I   . saw  . John . with .  a   . dog  .|
-|[------]      .      .      .      .      .| [0:1] 'I'
-|.      [------]      .      .      .      .| [1:2] 'saw'
-|.      .      [------]      .      .      .| [2:3] 'John'
-|.      .      .      [------]      .      .| [3:4] 'with'
-|.      .      .      .      [------]      .| [4:5] 'a'
-|.      .      .      .      .      [------]| [5:6] 'dog'
-|>      .      .      .      .      .      .| [0:0] S  -> * NP VP
-|>      .      .      .      .      .      .| [0:0] NP -> * NP PP
-|>      .      .      .      .      .      .| [0:0] NP -> * Det Noun
-|>      .      .      .      .      .      .| [0:0] NP -> * 'I'
-|[------]      .      .      .      .      .| [0:1] NP -> 'I' *
-|[------>      .      .      .      .      .| [0:1] S  -> NP * VP
-|[------>      .      .      .      .      .| [0:1] NP -> NP * PP
-|.      >      .      .      .      .      .| [1:1] VP -> * VP PP
-|.      >      .      .      .      .      .| [1:1] VP -> * Verb NP
-|.      >      .      .      .      .      .| [1:1] VP -> * Verb
-|.      >      .      .      .      .      .| [1:1] Verb -> * 'saw'
-|.      [------]      .      .      .      .| [1:2] Verb -> 'saw' *
-|.      [------>      .      .      .      .| [1:2] VP -> Verb * NP
-|.      [------]      .      .      .      .| [1:2] VP -> Verb *
-|[-------------]      .      .      .      .| [0:2] S  -> NP VP *
-|.      [------>      .      .      .      .| [1:2] VP -> VP * PP
-|.      .      >      .      .      .      .| [2:2] NP -> * NP PP
-|.      .      >      .      .      .      .| [2:2] NP -> * Det Noun
-|.      .      >      .      .      .      .| [2:2] NP -> * 'John'
-|.      .      [------]      .      .      .| [2:3] NP -> 'John' *
-|.      [-------------]      .      .      .| [1:3] VP -> Verb NP *
-|.      .      [------>      .      .      .| [2:3] NP -> NP * PP
-|.      .      .      >      .      .      .| [3:3] PP -> * 'with' NP
-|[--------------------]      .      .      .| [0:3] S  -> NP VP *
-|.      [------------->      .      .      .| [1:3] VP -> VP * PP
-|.      .      .      [------>      .      .| [3:4] PP -> 'with' * NP
-|.      .      .      .      >      .      .| [4:4] NP -> * NP PP
-|.      .      .      .      >      .      .| [4:4] NP -> * Det Noun
-|.      .      .      .      >      .      .| [4:4] Det -> * 'a'
-|.      .      .      .      [------]      .| [4:5] Det -> 'a' *
-|.      .      .      .      [------>      .| [4:5] NP -> Det * Noun
-|.      .      .      .      .      >      .| [5:5] Noun -> * 'dog'
-|.      .      .      .      .      [------]| [5:6] Noun -> 'dog' *
-|.      .      .      .      [-------------]| [4:6] NP -> Det Noun *
-|.      .      .      [--------------------]| [3:6] PP -> 'with' NP *
-|.      .      .      .      [------------->| [4:6] NP -> NP * PP
-|.      .      [---------------------------]| [2:6] NP -> NP PP *
-|.      [----------------------------------]| [1:6] VP -> VP PP *
-|[=========================================]| [0:6] S  -> NP VP *
-|.      [---------------------------------->| [1:6] VP -> VP * PP
-|.      [----------------------------------]| [1:6] VP -> Verb NP *
-|.      .      [--------------------------->| [2:6] NP -> NP * PP
-|[=========================================]| [0:6] S  -> NP VP *
-|.      [---------------------------------->| [1:6] VP -> VP * PP
-(S
-  (NP I)
-  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
-(S
-  (NP I)
-  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
-
-
-
-
-

Unit tests for LARGE context-free grammars

+
>>> nltk.parse.earleychart.demo(print_times=False, trace=1,
+...                             sent='I saw John with a dog', numparses=2)
+* Sentence:
+I saw John with a dog
+['I', 'saw', 'John', 'with', 'a', 'dog']
+
+|.  I   . saw  . John . with .  a   . dog  .|
+|[------]      .      .      .      .      .| [0:1] 'I'
+|.      [------]      .      .      .      .| [1:2] 'saw'
+|.      .      [------]      .      .      .| [2:3] 'John'
+|.      .      .      [------]      .      .| [3:4] 'with'
+|.      .      .      .      [------]      .| [4:5] 'a'
+|.      .      .      .      .      [------]| [5:6] 'dog'
+|>      .      .      .      .      .      .| [0:0] S  -> * NP VP
+|>      .      .      .      .      .      .| [0:0] NP -> * NP PP
+|>      .      .      .      .      .      .| [0:0] NP -> * Det Noun
+|>      .      .      .      .      .      .| [0:0] NP -> * 'I'
+|[------]      .      .      .      .      .| [0:1] NP -> 'I' *
+|[------>      .      .      .      .      .| [0:1] S  -> NP * VP
+|[------>      .      .      .      .      .| [0:1] NP -> NP * PP
+|.      >      .      .      .      .      .| [1:1] VP -> * VP PP
+|.      >      .      .      .      .      .| [1:1] VP -> * Verb NP
+|.      >      .      .      .      .      .| [1:1] VP -> * Verb
+|.      >      .      .      .      .      .| [1:1] Verb -> * 'saw'
+|.      [------]      .      .      .      .| [1:2] Verb -> 'saw' *
+|.      [------>      .      .      .      .| [1:2] VP -> Verb * NP
+|.      [------]      .      .      .      .| [1:2] VP -> Verb *
+|[-------------]      .      .      .      .| [0:2] S  -> NP VP *
+|.      [------>      .      .      .      .| [1:2] VP -> VP * PP
+|.      .      >      .      .      .      .| [2:2] NP -> * NP PP
+|.      .      >      .      .      .      .| [2:2] NP -> * Det Noun
+|.      .      >      .      .      .      .| [2:2] NP -> * 'John'
+|.      .      [------]      .      .      .| [2:3] NP -> 'John' *
+|.      [-------------]      .      .      .| [1:3] VP -> Verb NP *
+|.      .      [------>      .      .      .| [2:3] NP -> NP * PP
+|.      .      .      >      .      .      .| [3:3] PP -> * 'with' NP
+|[--------------------]      .      .      .| [0:3] S  -> NP VP *
+|.      [------------->      .      .      .| [1:3] VP -> VP * PP
+|.      .      .      [------>      .      .| [3:4] PP -> 'with' * NP
+|.      .      .      .      >      .      .| [4:4] NP -> * NP PP
+|.      .      .      .      >      .      .| [4:4] NP -> * Det Noun
+|.      .      .      .      >      .      .| [4:4] Det -> * 'a'
+|.      .      .      .      [------]      .| [4:5] Det -> 'a' *
+|.      .      .      .      [------>      .| [4:5] NP -> Det * Noun
+|.      .      .      .      .      >      .| [5:5] Noun -> * 'dog'
+|.      .      .      .      .      [------]| [5:6] Noun -> 'dog' *
+|.      .      .      .      [-------------]| [4:6] NP -> Det Noun *
+|.      .      .      [--------------------]| [3:6] PP -> 'with' NP *
+|.      .      .      .      [------------->| [4:6] NP -> NP * PP
+|.      .      [---------------------------]| [2:6] NP -> NP PP *
+|.      [----------------------------------]| [1:6] VP -> VP PP *
+|[=========================================]| [0:6] S  -> NP VP *
+|.      [---------------------------------->| [1:6] VP -> VP * PP
+|.      [----------------------------------]| [1:6] VP -> Verb NP *
+|.      .      [--------------------------->| [2:6] NP -> NP * PP
+|[=========================================]| [0:6] S  -> NP VP *
+|.      [---------------------------------->| [1:6] VP -> VP * PP
+(S
+  (NP I)
+  (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
+(S
+  (NP I)
+  (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
+
+
+ +
+

Unit tests for LARGE context-free grammars

Reading the ATIS grammar.

-
-
->>> grammar = nltk.data.load('grammars/large_grammars/atis.cfg')
->>> grammar
-<Grammar with 5517 productions>
-
-
+
>>> grammar = nltk.data.load('grammars/large_grammars/atis.cfg')
+>>> grammar
+<Grammar with 5517 productions>
+
+

Reading the test sentences.

-
-
->>> sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
->>> sentences = nltk.parse.util.extract_test_sentences(sentences)
->>> len(sentences)
-98
->>> testsentence = sentences[22]
->>> testsentence[0]
-['show', 'me', 'northwest', 'flights', 'to', 'detroit', '.']
->>> testsentence[1]
-17
->>> sentence = testsentence[0]
-
-
+
>>> sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
+>>> sentences = nltk.parse.util.extract_test_sentences(sentences)
+>>> len(sentences)
+98
+>>> testsentence = sentences[22]
+>>> testsentence[0]
+['show', 'me', 'northwest', 'flights', 'to', 'detroit', '.']
+>>> testsentence[1]
+17
+>>> sentence = testsentence[0]
+
+

Now we test all different parsing strategies. Note that the number of edges differ between the strategies.

Bottom-up parsing.

-
-
->>> parser = nltk.parse.BottomUpChartParser(grammar)
->>> chart = parser.chart_parse(sentence)
->>> print((chart.num_edges()))
-7661
->>> print((len(list(chart.parses(grammar.start())))))
-17
-
-
+
>>> parser = nltk.parse.BottomUpChartParser(grammar)
+>>> chart = parser.chart_parse(sentence)
+>>> print((chart.num_edges()))
+7661
+>>> print((len(list(chart.parses(grammar.start())))))
+17
+
+

Bottom-up Left-corner parsing.

-
-
->>> parser = nltk.parse.BottomUpLeftCornerChartParser(grammar)
->>> chart = parser.chart_parse(sentence)
->>> print((chart.num_edges()))
-4986
->>> print((len(list(chart.parses(grammar.start())))))
-17
-
-
+
>>> parser = nltk.parse.BottomUpLeftCornerChartParser(grammar)
+>>> chart = parser.chart_parse(sentence)
+>>> print((chart.num_edges()))
+4986
+>>> print((len(list(chart.parses(grammar.start())))))
+17
+
+

Left-corner parsing with bottom-up filter.

-
-
->>> parser = nltk.parse.LeftCornerChartParser(grammar)
->>> chart = parser.chart_parse(sentence)
->>> print((chart.num_edges()))
-1342
->>> print((len(list(chart.parses(grammar.start())))))
-17
-
-
+
>>> parser = nltk.parse.LeftCornerChartParser(grammar)
+>>> chart = parser.chart_parse(sentence)
+>>> print((chart.num_edges()))
+1342
+>>> print((len(list(chart.parses(grammar.start())))))
+17
+
+

Top-down parsing.

-
-
->>> parser = nltk.parse.TopDownChartParser(grammar)
->>> chart = parser.chart_parse(sentence)
->>> print((chart.num_edges()))
-28352
->>> print((len(list(chart.parses(grammar.start())))))
-17
-
-
+
>>> parser = nltk.parse.TopDownChartParser(grammar)
+>>> chart = parser.chart_parse(sentence)
+>>> print((chart.num_edges()))
+28352
+>>> print((len(list(chart.parses(grammar.start())))))
+17
+
+

Incremental Bottom-up parsing.

-
-
->>> parser = nltk.parse.IncrementalBottomUpChartParser(grammar)
->>> chart = parser.chart_parse(sentence)
->>> print((chart.num_edges()))
-7661
->>> print((len(list(chart.parses(grammar.start())))))
-17
-
-
+
>>> parser = nltk.parse.IncrementalBottomUpChartParser(grammar)
+>>> chart = parser.chart_parse(sentence)
+>>> print((chart.num_edges()))
+7661
+>>> print((len(list(chart.parses(grammar.start())))))
+17
+
+

Incremental Bottom-up Left-corner parsing.

-
-
->>> parser = nltk.parse.IncrementalBottomUpLeftCornerChartParser(grammar)
->>> chart = parser.chart_parse(sentence)
->>> print((chart.num_edges()))
-4986
->>> print((len(list(chart.parses(grammar.start())))))
-17
-
-
+
>>> parser = nltk.parse.IncrementalBottomUpLeftCornerChartParser(grammar)
+>>> chart = parser.chart_parse(sentence)
+>>> print((chart.num_edges()))
+4986
+>>> print((len(list(chart.parses(grammar.start())))))
+17
+
+

Incremental Left-corner parsing with bottom-up filter.

-
-
->>> parser = nltk.parse.IncrementalLeftCornerChartParser(grammar)
->>> chart = parser.chart_parse(sentence)
->>> print((chart.num_edges()))
-1342
->>> print((len(list(chart.parses(grammar.start())))))
-17
-
-
+
>>> parser = nltk.parse.IncrementalLeftCornerChartParser(grammar)
+>>> chart = parser.chart_parse(sentence)
+>>> print((chart.num_edges()))
+1342
+>>> print((len(list(chart.parses(grammar.start())))))
+17
+
+

Incremental Top-down parsing.

-
-
->>> parser = nltk.parse.IncrementalTopDownChartParser(grammar)
->>> chart = parser.chart_parse(sentence)
->>> print((chart.num_edges()))
-28352
->>> print((len(list(chart.parses(grammar.start())))))
-17
-
-
+
>>> parser = nltk.parse.IncrementalTopDownChartParser(grammar)
+>>> chart = parser.chart_parse(sentence)
+>>> print((chart.num_edges()))
+28352
+>>> print((len(list(chart.parses(grammar.start())))))
+17
+
+

Earley parsing. This is similar to the incremental top-down algorithm.

-
-
->>> parser = nltk.parse.EarleyChartParser(grammar)
->>> chart = parser.chart_parse(sentence)
->>> print((chart.num_edges()))
-28352
->>> print((len(list(chart.parses(grammar.start())))))
-17
-
-
-
-
-

Unit tests for the Probabilistic CFG class

-
-
->>> from nltk.corpus import treebank
->>> from itertools import islice
->>> from nltk.grammar import PCFG, induce_pcfg, toy_pcfg1, toy_pcfg2
-
-
+
>>> parser = nltk.parse.EarleyChartParser(grammar)
+>>> chart = parser.chart_parse(sentence)
+>>> print((chart.num_edges()))
+28352
+>>> print((len(list(chart.parses(grammar.start())))))
+17
+
+
+ +
+

Unit tests for the Probabilistic CFG class

+
>>> from nltk.corpus import treebank
+>>> from itertools import islice
+>>> from nltk.grammar import PCFG, induce_pcfg, toy_pcfg1, toy_pcfg2
+
+

Create a set of PCFG productions.

-
-
->>> grammar = PCFG.fromstring("""
-... A -> B B [.3] | C B C [.7]
-... B -> B D [.5] | C [.5]
-... C -> 'a' [.1] | 'b' [0.9]
-... D -> 'b' [1.0]
-... """)
->>> prod = grammar.productions()[0]
->>> prod
-A -> B B [0.3]
-
-
->>> prod.lhs()
-A
-
-
->>> prod.rhs()
-(B, B)
-
-
->>> print((prod.prob()))
-0.3
-
-
->>> grammar.start()
-A
-
-
->>> grammar.productions()
-[A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1.0]]
-
-
+
>>> grammar = PCFG.fromstring("""
+... A -> B B [.3] | C B C [.7]
+... B -> B D [.5] | C [.5]
+... C -> 'a' [.1] | 'b' [0.9]
+... D -> 'b' [1.0]
+... """)
+>>> prod = grammar.productions()[0]
+>>> prod
+A -> B B [0.3]
+
+
+
>>> prod.lhs()
+A
+
+
+
>>> prod.rhs()
+(B, B)
+
+
+
>>> print((prod.prob()))
+0.3
+
+
+
>>> grammar.start()
+A
+
+
+
>>> grammar.productions()
+[A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1.0]]
+
+

Induce some productions using parsed Treebank data.

-
-
->>> productions = []
->>> for fileid in treebank.fileids()[:2]:
-...     for t in treebank.parsed_sents(fileid):
-...         productions += t.productions()
-
-
->>> grammar = induce_pcfg(S, productions)
->>> grammar
-<Grammar with 71 productions>
-
-
->>> sorted(grammar.productions(lhs=Nonterminal('PP')))[:2]
-[PP -> IN NP [1.0]]
->>> sorted(grammar.productions(lhs=Nonterminal('NNP')))[:2]
-[NNP -> 'Agnew' [0.0714286], NNP -> 'Consolidated' [0.0714286]]
->>> sorted(grammar.productions(lhs=Nonterminal('JJ')))[:2]
-[JJ -> 'British' [0.142857], JJ -> 'former' [0.142857]]
->>> sorted(grammar.productions(lhs=Nonterminal('NP')))[:2]
-[NP -> CD NNS [0.133333], NP -> DT JJ JJ NN [0.0666667]]
-
-
-
-
-

Unit tests for the Probabilistic Chart Parse classes

-
-
->>> tokens = "Jack saw Bob with my cookie".split()
->>> grammar = toy_pcfg2
->>> print(grammar)
-Grammar with 23 productions (start state = S)
-    S -> NP VP [1.0]
-    VP -> V NP [0.59]
-    VP -> V [0.4]
-    VP -> VP PP [0.01]
-    NP -> Det N [0.41]
-    NP -> Name [0.28]
-    NP -> NP PP [0.31]
-    PP -> P NP [1.0]
-    V -> 'saw' [0.21]
-    V -> 'ate' [0.51]
-    V -> 'ran' [0.28]
-    N -> 'boy' [0.11]
-    N -> 'cookie' [0.12]
-    N -> 'table' [0.13]
-    N -> 'telescope' [0.14]
-    N -> 'hill' [0.5]
-    Name -> 'Jack' [0.52]
-    Name -> 'Bob' [0.48]
-    P -> 'with' [0.61]
-    P -> 'under' [0.39]
-    Det -> 'the' [0.41]
-    Det -> 'a' [0.31]
-    Det -> 'my' [0.28]
-
-
+
>>> productions = []
+>>> for fileid in treebank.fileids()[:2]:
+...     for t in treebank.parsed_sents(fileid):
+...         productions += t.productions()
+
+
+
>>> grammar = induce_pcfg(S, productions)
+>>> grammar
+<Grammar with 71 productions>
+
+
+
>>> sorted(grammar.productions(lhs=Nonterminal('PP')))[:2]
+[PP -> IN NP [1.0]]
+>>> sorted(grammar.productions(lhs=Nonterminal('NNP')))[:2]
+[NNP -> 'Agnew' [0.0714286], NNP -> 'Consolidated' [0.0714286]]
+>>> sorted(grammar.productions(lhs=Nonterminal('JJ')))[:2]
+[JJ -> 'British' [0.142857], JJ -> 'former' [0.142857]]
+>>> sorted(grammar.productions(lhs=Nonterminal('NP')))[:2]
+[NP -> CD NNS [0.133333], NP -> DT JJ JJ NN [0.0666667]]
+
+
+ +
+

Unit tests for the Probabilistic Chart Parse classes

+
>>> tokens = "Jack saw Bob with my cookie".split()
+>>> grammar = toy_pcfg2
+>>> print(grammar)
+Grammar with 23 productions (start state = S)
+    S -> NP VP [1.0]
+    VP -> V NP [0.59]
+    VP -> V [0.4]
+    VP -> VP PP [0.01]
+    NP -> Det N [0.41]
+    NP -> Name [0.28]
+    NP -> NP PP [0.31]
+    PP -> P NP [1.0]
+    V -> 'saw' [0.21]
+    V -> 'ate' [0.51]
+    V -> 'ran' [0.28]
+    N -> 'boy' [0.11]
+    N -> 'cookie' [0.12]
+    N -> 'table' [0.13]
+    N -> 'telescope' [0.14]
+    N -> 'hill' [0.5]
+    Name -> 'Jack' [0.52]
+    Name -> 'Bob' [0.48]
+    P -> 'with' [0.61]
+    P -> 'under' [0.39]
+    Det -> 'the' [0.41]
+    Det -> 'a' [0.31]
+    Det -> 'my' [0.28]
+
+

Create several parsers using different queuing strategies and show the resulting parses.

-
-
->>> from nltk.parse import pchart
-
-
->>> parser = pchart.InsideChartParser(grammar)
->>> for t in parser.parse(tokens):
-...     print(t)
-(S
-  (NP (Name Jack))
-  (VP
-    (V saw)
-    (NP
-      (NP (Name Bob))
-      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
-(S
-  (NP (Name Jack))
-  (VP
-    (VP (V saw) (NP (Name Bob)))
-    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
-
-
->>> parser = pchart.RandomChartParser(grammar)
->>> for t in parser.parse(tokens):
-...     print(t)
-(S
-  (NP (Name Jack))
-  (VP
-    (V saw)
-    (NP
-      (NP (Name Bob))
-      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
-(S
-  (NP (Name Jack))
-  (VP
-    (VP (V saw) (NP (Name Bob)))
-    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
-
-
->>> parser = pchart.UnsortedChartParser(grammar)
->>> for t in parser.parse(tokens):
-...     print(t)
-(S
-  (NP (Name Jack))
-  (VP
-    (V saw)
-    (NP
-      (NP (Name Bob))
-      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
-(S
-  (NP (Name Jack))
-  (VP
-    (VP (V saw) (NP (Name Bob)))
-    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
-
-
->>> parser = pchart.LongestChartParser(grammar)
->>> for t in parser.parse(tokens):
-...     print(t)
-(S
-  (NP (Name Jack))
-  (VP
-    (V saw)
-    (NP
-      (NP (Name Bob))
-      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
-(S
-  (NP (Name Jack))
-  (VP
-    (VP (V saw) (NP (Name Bob)))
-    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
-
-
->>> parser = pchart.InsideChartParser(grammar, beam_size = len(tokens)+1)
->>> for t in parser.parse(tokens):
-...     print(t)
-
-
-
-
-

Unit tests for the Viterbi Parse classes

-
-
->>> from nltk.parse import ViterbiParser
->>> tokens = "Jack saw Bob with my cookie".split()
->>> grammar = toy_pcfg2
-
-
+
>>> from nltk.parse import pchart
+
+
+
>>> parser = pchart.InsideChartParser(grammar)
+>>> for t in parser.parse(tokens):
+...     print(t)
+(S
+  (NP (Name Jack))
+  (VP
+    (V saw)
+    (NP
+      (NP (Name Bob))
+      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
+(S
+  (NP (Name Jack))
+  (VP
+    (VP (V saw) (NP (Name Bob)))
+    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
+
+
+
>>> parser = pchart.RandomChartParser(grammar)
+>>> for t in parser.parse(tokens):
+...     print(t)
+(S
+  (NP (Name Jack))
+  (VP
+    (V saw)
+    (NP
+      (NP (Name Bob))
+      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
+(S
+  (NP (Name Jack))
+  (VP
+    (VP (V saw) (NP (Name Bob)))
+    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
+
+
+
>>> parser = pchart.UnsortedChartParser(grammar)
+>>> for t in parser.parse(tokens):
+...     print(t)
+(S
+  (NP (Name Jack))
+  (VP
+    (V saw)
+    (NP
+      (NP (Name Bob))
+      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
+(S
+  (NP (Name Jack))
+  (VP
+    (VP (V saw) (NP (Name Bob)))
+    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
+
+
+
>>> parser = pchart.LongestChartParser(grammar)
+>>> for t in parser.parse(tokens):
+...     print(t)
+(S
+  (NP (Name Jack))
+  (VP
+    (V saw)
+    (NP
+      (NP (Name Bob))
+      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
+(S
+  (NP (Name Jack))
+  (VP
+    (VP (V saw) (NP (Name Bob)))
+    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
+
+
+
>>> parser = pchart.InsideChartParser(grammar, beam_size = len(tokens)+1)
+>>> for t in parser.parse(tokens):
+...     print(t)
+
+
+ +
+

Unit tests for the Viterbi Parse classes

+
>>> from nltk.parse import ViterbiParser
+>>> tokens = "Jack saw Bob with my cookie".split()
+>>> grammar = toy_pcfg2
+
+

Parse the tokenized sentence.

-
-
->>> parser = ViterbiParser(grammar)
->>> for t in parser.parse(tokens):
-...     print(t)
-(S
-  (NP (Name Jack))
-  (VP
-    (V saw)
-    (NP
-      (NP (Name Bob))
-      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
-
-
-
-
-

Unit tests for the FeatStructNonterminal class

-
-
->>> from nltk.grammar import FeatStructNonterminal
->>> FeatStructNonterminal(
-...     pos='n', agr=FeatStructNonterminal(number='pl', gender='f'))
-[agr=[gender='f', number='pl'], pos='n']
-
-
->>> FeatStructNonterminal('VP[+fin]/NP[+pl]')
-VP[+fin]/NP[+pl]
-
-
-
-
-

Tracing the Feature Chart Parser

+
>>> parser = ViterbiParser(grammar)
+>>> for t in parser.parse(tokens):
+...     print(t)
+(S
+  (NP (Name Jack))
+  (VP
+    (V saw)
+    (NP
+      (NP (Name Bob))
+      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
+
+
+ +
+

Unit tests for the FeatStructNonterminal class

+
>>> from nltk.grammar import FeatStructNonterminal
+>>> FeatStructNonterminal(
+...     pos='n', agr=FeatStructNonterminal(number='pl', gender='f'))
+[agr=[gender='f', number='pl'], pos='n']
+
+
+
>>> FeatStructNonterminal('VP[+fin]/NP[+pl]')
+VP[+fin]/NP[+pl]
+
+
+
+
+

Tracing the Feature Chart Parser

We use the featurechart.demo() function for tracing the Feature Chart Parser.

-
-
->>> nltk.parse.featurechart.demo(print_times=False,
-...                              print_grammar=True,
-...                              parser=nltk.parse.featurechart.FeatureChartParser,
-...                              sent='I saw John with a dog')
-<BLANKLINE>
-Grammar with 18 productions (start state = S[])
-    S[] -> NP[] VP[]
-    PP[] -> Prep[] NP[]
-    NP[] -> NP[] PP[]
-    VP[] -> VP[] PP[]
-    VP[] -> Verb[] NP[]
-    VP[] -> Verb[]
-    NP[] -> Det[pl=?x] Noun[pl=?x]
-    NP[] -> 'John'
-    NP[] -> 'I'
-    Det[] -> 'the'
-    Det[] -> 'my'
-    Det[-pl] -> 'a'
-    Noun[-pl] -> 'dog'
-    Noun[-pl] -> 'cookie'
-    Verb[] -> 'ate'
-    Verb[] -> 'saw'
-    Prep[] -> 'with'
-    Prep[] -> 'under'
-<BLANKLINE>
-* FeatureChartParser
-Sentence: I saw John with a dog
-|.I.s.J.w.a.d.|
-|[-] . . . . .| [0:1] 'I'
-|. [-] . . . .| [1:2] 'saw'
-|. . [-] . . .| [2:3] 'John'
-|. . . [-] . .| [3:4] 'with'
-|. . . . [-] .| [4:5] 'a'
-|. . . . . [-]| [5:6] 'dog'
-|[-] . . . . .| [0:1] NP[] -> 'I' *
-|[-> . . . . .| [0:1] S[] -> NP[] * VP[] {}
-|[-> . . . . .| [0:1] NP[] -> NP[] * PP[] {}
-|. [-] . . . .| [1:2] Verb[] -> 'saw' *
-|. [-> . . . .| [1:2] VP[] -> Verb[] * NP[] {}
-|. [-] . . . .| [1:2] VP[] -> Verb[] *
-|. [-> . . . .| [1:2] VP[] -> VP[] * PP[] {}
-|[---] . . . .| [0:2] S[] -> NP[] VP[] *
-|. . [-] . . .| [2:3] NP[] -> 'John' *
-|. . [-> . . .| [2:3] S[] -> NP[] * VP[] {}
-|. . [-> . . .| [2:3] NP[] -> NP[] * PP[] {}
-|. [---] . . .| [1:3] VP[] -> Verb[] NP[] *
-|. [---> . . .| [1:3] VP[] -> VP[] * PP[] {}
-|[-----] . . .| [0:3] S[] -> NP[] VP[] *
-|. . . [-] . .| [3:4] Prep[] -> 'with' *
-|. . . [-> . .| [3:4] PP[] -> Prep[] * NP[] {}
-|. . . . [-] .| [4:5] Det[-pl] -> 'a' *
-|. . . . [-> .| [4:5] NP[] -> Det[pl=?x] * Noun[pl=?x] {?x: False}
-|. . . . . [-]| [5:6] Noun[-pl] -> 'dog' *
-|. . . . [---]| [4:6] NP[] -> Det[-pl] Noun[-pl] *
-|. . . . [--->| [4:6] S[] -> NP[] * VP[] {}
-|. . . . [--->| [4:6] NP[] -> NP[] * PP[] {}
-|. . . [-----]| [3:6] PP[] -> Prep[] NP[] *
-|. . [-------]| [2:6] NP[] -> NP[] PP[] *
-|. [---------]| [1:6] VP[] -> VP[] PP[] *
-|. [--------->| [1:6] VP[] -> VP[] * PP[] {}
-|[===========]| [0:6] S[] -> NP[] VP[] *
-|. . [------->| [2:6] S[] -> NP[] * VP[] {}
-|. . [------->| [2:6] NP[] -> NP[] * PP[] {}
-|. [---------]| [1:6] VP[] -> Verb[] NP[] *
-|. [--------->| [1:6] VP[] -> VP[] * PP[] {}
-|[===========]| [0:6] S[] -> NP[] VP[] *
-(S[]
-  (NP[] I)
-  (VP[]
-    (VP[] (Verb[] saw) (NP[] John))
-    (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog)))))
-(S[]
-  (NP[] I)
-  (VP[]
-    (Verb[] saw)
-    (NP[]
-      (NP[] John)
-      (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog))))))
-
-
-
-
-

Unit tests for the Feature Chart Parser classes

+
>>> nltk.parse.featurechart.demo(print_times=False,
+...                              print_grammar=True,
+...                              parser=nltk.parse.featurechart.FeatureChartParser,
+...                              sent='I saw John with a dog')
+
+Grammar with 18 productions (start state = S[])
+    S[] -> NP[] VP[]
+    PP[] -> Prep[] NP[]
+    NP[] -> NP[] PP[]
+    VP[] -> VP[] PP[]
+    VP[] -> Verb[] NP[]
+    VP[] -> Verb[]
+    NP[] -> Det[pl=?x] Noun[pl=?x]
+    NP[] -> 'John'
+    NP[] -> 'I'
+    Det[] -> 'the'
+    Det[] -> 'my'
+    Det[-pl] -> 'a'
+    Noun[-pl] -> 'dog'
+    Noun[-pl] -> 'cookie'
+    Verb[] -> 'ate'
+    Verb[] -> 'saw'
+    Prep[] -> 'with'
+    Prep[] -> 'under'
+
+* FeatureChartParser
+Sentence: I saw John with a dog
+|.I.s.J.w.a.d.|
+|[-] . . . . .| [0:1] 'I'
+|. [-] . . . .| [1:2] 'saw'
+|. . [-] . . .| [2:3] 'John'
+|. . . [-] . .| [3:4] 'with'
+|. . . . [-] .| [4:5] 'a'
+|. . . . . [-]| [5:6] 'dog'
+|[-] . . . . .| [0:1] NP[] -> 'I' *
+|[-> . . . . .| [0:1] S[] -> NP[] * VP[] {}
+|[-> . . . . .| [0:1] NP[] -> NP[] * PP[] {}
+|. [-] . . . .| [1:2] Verb[] -> 'saw' *
+|. [-> . . . .| [1:2] VP[] -> Verb[] * NP[] {}
+|. [-] . . . .| [1:2] VP[] -> Verb[] *
+|. [-> . . . .| [1:2] VP[] -> VP[] * PP[] {}
+|[---] . . . .| [0:2] S[] -> NP[] VP[] *
+|. . [-] . . .| [2:3] NP[] -> 'John' *
+|. . [-> . . .| [2:3] S[] -> NP[] * VP[] {}
+|. . [-> . . .| [2:3] NP[] -> NP[] * PP[] {}
+|. [---] . . .| [1:3] VP[] -> Verb[] NP[] *
+|. [---> . . .| [1:3] VP[] -> VP[] * PP[] {}
+|[-----] . . .| [0:3] S[] -> NP[] VP[] *
+|. . . [-] . .| [3:4] Prep[] -> 'with' *
+|. . . [-> . .| [3:4] PP[] -> Prep[] * NP[] {}
+|. . . . [-] .| [4:5] Det[-pl] -> 'a' *
+|. . . . [-> .| [4:5] NP[] -> Det[pl=?x] * Noun[pl=?x] {?x: False}
+|. . . . . [-]| [5:6] Noun[-pl] -> 'dog' *
+|. . . . [---]| [4:6] NP[] -> Det[-pl] Noun[-pl] *
+|. . . . [--->| [4:6] S[] -> NP[] * VP[] {}
+|. . . . [--->| [4:6] NP[] -> NP[] * PP[] {}
+|. . . [-----]| [3:6] PP[] -> Prep[] NP[] *
+|. . [-------]| [2:6] NP[] -> NP[] PP[] *
+|. [---------]| [1:6] VP[] -> VP[] PP[] *
+|. [--------->| [1:6] VP[] -> VP[] * PP[] {}
+|[===========]| [0:6] S[] -> NP[] VP[] *
+|. . [------->| [2:6] S[] -> NP[] * VP[] {}
+|. . [------->| [2:6] NP[] -> NP[] * PP[] {}
+|. [---------]| [1:6] VP[] -> Verb[] NP[] *
+|. [--------->| [1:6] VP[] -> VP[] * PP[] {}
+|[===========]| [0:6] S[] -> NP[] VP[] *
+(S[]
+  (NP[] I)
+  (VP[]
+    (VP[] (Verb[] saw) (NP[] John))
+    (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog)))))
+(S[]
+  (NP[] I)
+  (VP[]
+    (Verb[] saw)
+    (NP[]
+      (NP[] John)
+      (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog))))))
+
+
+ +
+

Unit tests for the Feature Chart Parser classes

The list of parsers we want to test.

-
-
->>> parsers = [nltk.parse.featurechart.FeatureChartParser,
-...            nltk.parse.featurechart.FeatureTopDownChartParser,
-...            nltk.parse.featurechart.FeatureBottomUpChartParser,
-...            nltk.parse.featurechart.FeatureBottomUpLeftCornerChartParser,
-...            nltk.parse.earleychart.FeatureIncrementalChartParser,
-...            nltk.parse.earleychart.FeatureEarleyChartParser,
-...            nltk.parse.earleychart.FeatureIncrementalTopDownChartParser,
-...            nltk.parse.earleychart.FeatureIncrementalBottomUpChartParser,
-...            nltk.parse.earleychart.FeatureIncrementalBottomUpLeftCornerChartParser,
-...            ]
-
-
+
>>> parsers = [nltk.parse.featurechart.FeatureChartParser,
+...            nltk.parse.featurechart.FeatureTopDownChartParser,
+...            nltk.parse.featurechart.FeatureBottomUpChartParser,
+...            nltk.parse.featurechart.FeatureBottomUpLeftCornerChartParser,
+...            nltk.parse.earleychart.FeatureIncrementalChartParser,
+...            nltk.parse.earleychart.FeatureEarleyChartParser,
+...            nltk.parse.earleychart.FeatureIncrementalTopDownChartParser,
+...            nltk.parse.earleychart.FeatureIncrementalBottomUpChartParser,
+...            nltk.parse.earleychart.FeatureIncrementalBottomUpLeftCornerChartParser,
+...            ]
+
+

A helper function that tests each parser on the given grammar and sentence. We check that the number of trees are correct, and that all parsers return the same trees. Otherwise an error is printed.

-
-
->>> def unittest(grammar, sentence, nr_trees):
-...     sentence = sentence.split()
-...     trees = None
-...     for P in parsers:
-...         result = P(grammar).parse(sentence)
-...         result = set(tree.freeze() for tree in result)
-...         if len(result) != nr_trees:
-...             print("Wrong nr of trees:", len(result))
-...         elif trees is None:
-...             trees = result
-...         elif result != trees:
-...             print("Trees differ for parser:", P.__name__)
-
-
+
>>> def unittest(grammar, sentence, nr_trees):
+...     sentence = sentence.split()
+...     trees = None
+...     for P in parsers:
+...         result = P(grammar).parse(sentence)
+...         result = set(tree.freeze() for tree in result)
+...         if len(result) != nr_trees:
+...             print("Wrong nr of trees:", len(result))
+...         elif trees is None:
+...             trees = result
+...         elif result != trees:
+...             print("Trees differ for parser:", P.__name__)
+
+

The demo grammar from before, with an ambiguous sentence.

-
-
->>> isawjohn = nltk.parse.featurechart.demo_grammar()
->>> unittest(isawjohn, "I saw John with a dog with my cookie", 5)
-
-
+
>>> isawjohn = nltk.parse.featurechart.demo_grammar()
+>>> unittest(isawjohn, "I saw John with a dog with my cookie", 5)
+
+

This grammar tests that variables in different grammar rules are renamed before unification. (The problematic variable is in this case ?X).

-
-
->>> whatwasthat = nltk.grammar.FeatureGrammar.fromstring('''
-... S[] -> NP[num=?N] VP[num=?N, slash=?X]
-... NP[num=?X] -> "what"
-... NP[num=?X] -> "that"
-... VP[num=?P, slash=none] -> V[num=?P] NP[]
-... V[num=sg] -> "was"
-... ''')
->>> unittest(whatwasthat, "what was that", 1)
-
-
+
>>> whatwasthat = nltk.grammar.FeatureGrammar.fromstring('''
+... S[] -> NP[num=?N] VP[num=?N, slash=?X]
+... NP[num=?X] -> "what"
+... NP[num=?X] -> "that"
+... VP[num=?P, slash=none] -> V[num=?P] NP[]
+... V[num=sg] -> "was"
+... ''')
+>>> unittest(whatwasthat, "what was that", 1)
+
+

This grammar tests that the same rule can be used in different places in another rule, and that the variables are properly renamed.

-
-
->>> thislovesthat = nltk.grammar.FeatureGrammar.fromstring('''
-... S[] -> NP[case=nom] V[] NP[case=acc]
-... NP[case=?X] -> Pron[case=?X]
-... Pron[] -> "this"
-... Pron[] -> "that"
-... V[] -> "loves"
-... ''')
->>> unittest(thislovesthat, "this loves that", 1)
-
-
-
-
-

Tests for loading feature grammar files

+
>>> thislovesthat = nltk.grammar.FeatureGrammar.fromstring('''
+... S[] -> NP[case=nom] V[] NP[case=acc]
+... NP[case=?X] -> Pron[case=?X]
+... Pron[] -> "this"
+... Pron[] -> "that"
+... V[] -> "loves"
+... ''')
+>>> unittest(thislovesthat, "this loves that", 1)
+
+
+ +
+

Tests for loading feature grammar files

Alternative 1: first load the grammar, then create the parser.

-
-
->>> fcfg = nltk.data.load('grammars/book_grammars/feat0.fcfg')
->>> fcp1 = nltk.parse.FeatureChartParser(fcfg)
->>> print((type(fcp1)))
-<class 'nltk.parse.featurechart.FeatureChartParser'>
-
-
+
>>> fcfg = nltk.data.load('grammars/book_grammars/feat0.fcfg')
+>>> fcp1 = nltk.parse.FeatureChartParser(fcfg)
+>>> print((type(fcp1)))
+<class 'nltk.parse.featurechart.FeatureChartParser'>
+
+

Alternative 2: directly load the parser.

-
-
->>> fcp2 = nltk.parse.load_parser('grammars/book_grammars/feat0.fcfg')
->>> print((type(fcp2)))
-<class 'nltk.parse.featurechart.FeatureChartParser'>
-
-
+
>>> fcp2 = nltk.parse.load_parser('grammars/book_grammars/feat0.fcfg')
+>>> print((type(fcp2)))
+<class 'nltk.parse.featurechart.FeatureChartParser'>
+
+
+ + + + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/portuguese_en.html b/howto/portuguese_en.html index 7cfbc09d8..c0657a693 100644 --- a/howto/portuguese_en.html +++ b/howto/portuguese_en.html @@ -1,972 +1,702 @@ - - - + - - -Examples for Portuguese Processing - + + + + + + + NLTK :: Sample usage for portuguese_en + + + + + + + + + + + + + + -
-

Examples for Portuguese Processing

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - + + + +
+ +
+
+ +
+

Sample usage for portuguese_en

+
+

Examples for Portuguese Processing

This HOWTO contains a variety of examples relating to the Portuguese language. It is intended to be read in conjunction with the NLTK book -(http://nltk.org/book). For instructions on running the Python +(https://www.nltk.org/book). For instructions on running the Python interpreter, please see the section Getting Started with Python, in Chapter 1.

-
-

Python Programming, with Portuguese Examples

+
+

Python Programming, with Portuguese Examples

Chapter 1 of the NLTK book contains many elementary programming examples, all -with English texts. In this section, we'll see some corresponding examples +with English texts. In this section, we’ll see some corresponding examples using Portuguese. Please refer to the chapter for full discussion. Vamos!

-
-
->>> from nltk.examples.pt import *
-*** Introductory Examples for the NLTK Book ***
-Loading ptext1, ... and psent1, ...
-Type the name of the text or sentence to view it.
-Type: 'texts()' or 'sents()' to list the materials.
-ptext1: Memórias Póstumas de Brás Cubas (1881)
-ptext2: Dom Casmurro (1899)
-ptext3: Gênesis
-ptext4: Folha de Sao Paulo (1994)
-
-
+
>>> from nltk.test.portuguese_en_fixt import setup_module
+>>> setup_module()
+
+
+
>>> from nltk.examples.pt import *
+*** Introductory Examples for the NLTK Book ***
+Loading ptext1, ... and psent1, ...
+Type the name of the text or sentence to view it.
+Type: 'texts()' or 'sents()' to list the materials.
+ptext1: Memórias Póstumas de Brás Cubas (1881)
+ptext2: Dom Casmurro (1899)
+ptext3: Gênesis
+ptext4: Folha de Sao Paulo (1994)
+
+

Any time we want to find out about these texts, we just have to enter their names at the Python prompt:

-
-
->>> ptext2
-<Text: Dom Casmurro (1899)>
-
-
-
-

Searching Text

+
>>> ptext2
+<Text: Dom Casmurro (1899)>
+
+
+
+

Searching Text

A concordance permits us to see words in context.

-
-
->>> ptext1.concordance('olhos')
-Building index...
-Displaying 25 of 138 matches:
-De pé , à cabeceira da cama , com os olhos estúpidos , a boca entreaberta , a t
-orelhas . Pela minha parte fechei os olhos e deixei - me ir à ventura . Já agor
-xões de cérebro enfermo . Como ia de olhos fechados , não via o caminho ; lembr
-gelos eternos . Com efeito , abri os olhos e vi que o meu animal galopava numa
-me apareceu então , fitando - me uns olhos rutilantes como o sol . Tudo nessa f
- mim mesmo . Então , encarei - a com olhos súplices , e pedi mais alguns anos .
-...
-
-
+
>>> ptext1.concordance('olhos')
+Building index...
+Displaying 25 of 138 matches:
+De pé , à cabeceira da cama , com os olhos estúpidos , a boca entreaberta , a t
+orelhas . Pela minha parte fechei os olhos e deixei - me ir à ventura . Já agor
+xões de cérebro enfermo . Como ia de olhos fechados , não via o caminho ; lembr
+gelos eternos . Com efeito , abri os olhos e vi que o meu animal galopava numa
+me apareceu então , fitando - me uns olhos rutilantes como o sol . Tudo nessa f
+ mim mesmo . Então , encarei - a com olhos súplices , e pedi mais alguns anos .
+...
+
+

For a given word, we can find words with a similar text distribution:

-
-
->>> ptext1.similar('chegar')
-Building word-context index...
-acabada acudir aludir avistar bramanismo casamento cheguei com contar
-contrário corpo dali deixei desferirem dizer fazer filhos já leitor lhe
->>> ptext3.similar('chegar')
-Building word-context index...
-achar alumiar arrombar destruir governar guardar ir lavrar passar que
-toda tomar ver vir
-
-
+
>>> ptext1.similar('chegar')
+Building word-context index...
+acabada acudir aludir avistar bramanismo casamento cheguei com contar
+contrário corpo dali deixei desferirem dizer fazer filhos já leitor lhe
+>>> ptext3.similar('chegar')
+Building word-context index...
+achar alumiar arrombar destruir governar guardar ir lavrar passar que
+toda tomar ver vir
+
+

We can search for the statistically significant collocations in a text:

-
-
->>> ptext1.collocations()
-Building collocations list
-Quincas Borba; Lobo Neves; alguma coisa; Brás Cubas; meu pai; dia
-seguinte; não sei; Meu pai; alguns instantes; outra vez; outra coisa;
-por exemplo; mim mesmo; coisa nenhuma; mesma coisa; não era; dias
-depois; Passeio Público; olhar para; das coisas
-
-
+
>>> ptext1.collocations()
+Building collocations list
+Quincas Borba; Lobo Neves; alguma coisa; Brás Cubas; meu pai; dia
+seguinte; não sei; Meu pai; alguns instantes; outra vez; outra coisa;
+por exemplo; mim mesmo; coisa nenhuma; mesma coisa; não era; dias
+depois; Passeio Público; olhar para; das coisas
+
+

We can search for words in context, with the help of regular expressions, e.g.:

-
-
->>> ptext1.findall("<olhos> (<.*>)")
-estúpidos; e; fechados; rutilantes; súplices; a; do; babavam;
-na; moles; se; da; umas; espraiavam; chamejantes; espetados;
-...
-
-
+
>>> ptext1.findall("<olhos> (<.*>)")
+estúpidos; e; fechados; rutilantes; súplices; a; do; babavam;
+na; moles; se; da; umas; espraiavam; chamejantes; espetados;
+...
+
+

We can automatically generate random text based on a given text, e.g.:

-
-
->>> ptext3.generate() # doctest: +SKIP
-No princípio , criou Deus os abençoou , dizendo : Onde { estão } e até
-à ave dos céus , { que } será . Disse mais Abrão : Dá - me a mulher
-que tomaste ; porque daquele poço Eseque , { tinha .} E disse : Não
-poderemos descer ; mas , do campo ainda não estava na casa do teu
-pescoço . E viveu Serugue , depois Simeão e Levi { são } estes ? E o
-varão , porque habitava na terra de Node , da mão de Esaú : Jeús ,
-Jalão e Corá
-
-
-
-
-

Texts as List of Words

+
>>> ptext3.generate() 
+No princípio , criou Deus os abençoou , dizendo : Onde { estão } e até
+à ave dos céus , { que } será . Disse mais Abrão : Dá - me a mulher
+que tomaste ; porque daquele poço Eseque , { tinha .} E disse : Não
+poderemos descer ; mas , do campo ainda não estava na casa do teu
+pescoço . E viveu Serugue , depois Simeão e Levi { são } estes ? E o
+varão , porque habitava na terra de Node , da mão de Esaú : Jeús ,
+Jalão e Corá
+
+
+
+
+

Texts as List of Words

A few sentences have been defined for you.

-
-
->>> psent1
-['o', 'amor', 'da', 'gl\xf3ria', 'era', 'a', 'coisa', 'mais',
-'verdadeiramente', 'humana', 'que', 'h\xe1', 'no', 'homem', ',',
-'e', ',', 'conseq\xfcentemente', ',', 'a', 'sua', 'mais',
-'genu\xedna', 'fei\xe7\xe3o', '.']
->>>
-
-
+
>>> psent1
+['o', 'amor', 'da', 'gl\xf3ria', 'era', 'a', 'coisa', 'mais',
+'verdadeiramente', 'humana', 'que', 'h\xe1', 'no', 'homem', ',',
+'e', ',', 'conseq\xfcentemente', ',', 'a', 'sua', 'mais',
+'genu\xedna', 'fei\xe7\xe3o', '.']
+>>>
+
+

Notice that the sentence has been tokenized. Each token is -represented as a string, represented using quotes, e.g. 'coisa'. -Some strings contain special characters, e.g. \xf3, +represented as a string, represented using quotes, e.g. 'coisa'. +Some strings contain special characters, e.g. \xf3, the internal representation for ó. The tokens are combined in the form of a list. How long is this list?

-
-
->>> len(psent1)
-25
->>>
-
-
+
>>> len(psent1)
+25
+>>>
+
+

What is the vocabulary of this sentence?

-
-
->>> sorted(set(psent1))
-[',', '.', 'a', 'amor', 'coisa', 'conseqüentemente', 'da', 'e', 'era',
- 'feição', 'genuína', 'glória', 'homem', 'humana', 'há', 'mais', 'no',
- 'o', 'que', 'sua', 'verdadeiramente']
->>>
-
-
-

Let's iterate over each item in psent2, and print information for each:

-
-
->>> for w in psent2:
-...     print(w, len(w), w[-1])
-...
-Não 3 o
-consultes 9 s
-dicionários 11 s
-. 1 .
-
-
-

Observe how we make a human-readable version of a string, using decode(). -Also notice that we accessed the last character of a string w using w[-1].

-

We just saw a for loop above. Another useful control structure is a +

>>> sorted(set(psent1))
+[',', '.', 'a', 'amor', 'coisa', 'conseqüentemente', 'da', 'e', 'era',
+ 'feição', 'genuína', 'glória', 'homem', 'humana', 'há', 'mais', 'no',
+ 'o', 'que', 'sua', 'verdadeiramente']
+>>>
+
+
+

Let’s iterate over each item in psent2, and print information for each:

+
>>> for w in psent2:
+...     print(w, len(w), w[-1])
+...
+Não 3 o
+consultes 9 s
+dicionários 11 s
+. 1 .
+
+
+

Observe how we make a human-readable version of a string, using decode(). +Also notice that we accessed the last character of a string w using w[-1].

+

We just saw a for loop above. Another useful control structure is a list comprehension.

-
-
->>> [w.upper() for w in psent2]
-['N\xc3O', 'CONSULTES', 'DICION\xc1RIOS', '.']
->>> [w for w in psent1 if w.endswith('a')]
-['da', 'gl\xf3ria', 'era', 'a', 'coisa', 'humana', 'a', 'sua', 'genu\xedna']
->>> [w for w in ptext4 if len(w) > 15]
-[u'norte-irlandeses', u'pan-nacionalismo', u'predominatemente', u'primeiro-ministro',
-u'primeiro-ministro', u'irlandesa-americana', u'responsabilidades', u'significativamente']
-
-
-

We can examine the relative frequency of words in a text, using FreqDist:

-
-
->>> fd1 = FreqDist(ptext1)
->>> fd1
-<FreqDist with 10848 samples and 77098 outcomes>
->>> fd1['olhos']
-137
->>> fd1.max()
-u','
->>> fd1.samples()[:100]
-[u',', u'.', u'a', u'que', u'de', u'e', u'-', u'o', u';', u'me', u'um', u'n\xe3o',
-u'\x97', u'se', u'do', u'da', u'uma', u'com', u'os', u'\xe9', u'era', u'as', u'eu',
-u'lhe', u'ao', u'em', u'para', u'mas', u'...', u'!', u'\xe0', u'na', u'mais', u'?',
-u'no', u'como', u'por', u'N\xe3o', u'dos', u'ou', u'ele', u':', u'Virg\xedlia',
-u'meu', u'disse', u'minha', u'das', u'O', u'/', u'A', u'CAP\xcdTULO', u'muito',
-u'depois', u'coisa', u'foi', u'sem', u'olhos', u'ela', u'nos', u'tinha', u'nem',
-u'E', u'outro', u'vida', u'nada', u'tempo', u'menos', u'outra', u'casa', u'homem',
-u'porque', u'quando', u'mim', u'mesmo', u'ser', u'pouco', u'estava', u'dia',
-u't\xe3o', u'tudo', u'Mas', u'at\xe9', u'D', u'ainda', u's\xf3', u'alguma',
-u'la', u'vez', u'anos', u'h\xe1', u'Era', u'pai', u'esse', u'lo', u'dizer', u'assim',
-u'ent\xe3o', u'dizia', u'aos', u'Borba']
-
-
-
-
-
-

Reading Corpora

-
-

Accessing the Machado Text Corpus

+
>>> [w.upper() for w in psent2]
+['N\xc3O', 'CONSULTES', 'DICION\xc1RIOS', '.']
+>>> [w for w in psent1 if w.endswith('a')]
+['da', 'gl\xf3ria', 'era', 'a', 'coisa', 'humana', 'a', 'sua', 'genu\xedna']
+>>> [w for w in ptext4 if len(w) > 15]
+['norte-irlandeses', 'pan-nacionalismo', 'predominatemente', 'primeiro-ministro',
+'primeiro-ministro', 'irlandesa-americana', 'responsabilidades', 'significativamente']
+
+
+

We can examine the relative frequency of words in a text, using FreqDist:

+
>>> fd1 = FreqDist(ptext1)
+>>> fd1
+<FreqDist with 10848 samples and 77098 outcomes>
+>>> fd1['olhos']
+137
+>>> fd1.max()
+','
+>>> fd1.samples()[:100]
+[',', '.', 'a', 'que', 'de', 'e', '-', 'o', ';', 'me', 'um', 'n\xe3o',
+'\x97', 'se', 'do', 'da', 'uma', 'com', 'os', '\xe9', 'era', 'as', 'eu',
+'lhe', 'ao', 'em', 'para', 'mas', '...', '!', '\xe0', 'na', 'mais', '?',
+'no', 'como', 'por', 'N\xe3o', 'dos', 'o', 'ele', ':', 'Virg\xedlia',
+'me', 'disse', 'minha', 'das', 'O', '/', 'A', 'CAP\xcdTULO', 'muito',
+'depois', 'coisa', 'foi', 'sem', 'olhos', 'ela', 'nos', 'tinha', 'nem',
+'E', 'outro', 'vida', 'nada', 'tempo', 'menos', 'outra', 'casa', 'homem',
+'porque', 'quando', 'mim', 'mesmo', 'ser', 'pouco', 'estava', 'dia',
+'t\xe3o', 'tudo', 'Mas', 'at\xe9', 'D', 'ainda', 's\xf3', 'alguma',
+'la', 'vez', 'anos', 'h\xe1', 'Era', 'pai', 'esse', 'lo', 'dizer', 'assim',
+'ent\xe3o', 'dizia', 'aos', 'Borba']
+
+
+ + +
+

Reading Corpora

+
+

Accessing the Machado Text Corpus

NLTK includes the complete works of Machado de Assis.

-
-
->>> from nltk.corpus import machado
->>> machado.fileids()
-['contos/macn001.txt', 'contos/macn002.txt', 'contos/macn003.txt', ...]
-
-
+
>>> from nltk.corpus import machado
+>>> machado.fileids()
+['contos/macn001.txt', 'contos/macn002.txt', 'contos/macn003.txt', ...]
+
+

Each file corresponds to one of the works of Machado de Assis. To see a complete -list of works, you can look at the corpus README file: print machado.readme(). -Let's access the text of the Posthumous Memories of Brás Cubas.

+list of works, you can look at the corpus README file: print machado.readme(). +Let’s access the text of the Posthumous Memories of Brás Cubas.

We can access the text as a list of characters, and access 200 characters starting from position 10,000.

-
-
->>> raw_text = machado.raw('romance/marm05.txt')
->>> raw_text[10000:10200]
-u', primou no\nEstado, e foi um dos amigos particulares do vice-rei Conde
-da Cunha.\n\nComo este apelido de Cubas lhe\ncheirasse excessivamente a
-tanoaria, alegava meu pai, bisneto de Dami\xe3o, que o\ndito ape'
-
-
+
>>> raw_text = machado.raw('romance/marm05.txt')
+>>> raw_text[10000:10200]
+u', primou no\nEstado, e foi um dos amigos particulares do vice-rei Conde
+da Cunha.\n\nComo este apelido de Cubas lhe\ncheirasse excessivamente a
+tanoaria, alegava meu pai, bisneto de Dami\xe3o, que o\ndito ape'
+
+

However, this is not a very useful way to work with a text. We generally think of a text as a sequence of words and punctuation, not characters:

-
-
->>> text1 = machado.words('romance/marm05.txt')
->>> text1
-['Romance', ',', 'Mem\xf3rias', 'P\xf3stumas', 'de', ...]
->>> len(text1)
-77098
->>> len(set(text1))
-10848
-
-
-

Here's a program that finds the most common ngrams that contain a +

>>> text1 = machado.words('romance/marm05.txt')
+>>> text1
+['Romance', ',', 'Mem\xf3rias', 'P\xf3stumas', 'de', ...]
+>>> len(text1)
+77098
+>>> len(set(text1))
+10848
+
+
+

Here’s a program that finds the most common ngrams that contain a particular target word.

-
-
->>> from nltk import ngrams, FreqDist
->>> target_word = 'olhos'
->>> fd = FreqDist(ng
-...               for ng in ngrams(text1, 5)
-...               if target_word in ng)
->>> for hit in fd.samples():
-...     print(' '.join(hit))
-...
-, com os olhos no
-com os olhos no ar
-com os olhos no chão
-e todos com os olhos
-me estar com os olhos
-os olhos estúpidos , a
-os olhos na costura ,
-os olhos no ar ,
-, com os olhos espetados
-, com os olhos estúpidos
-, com os olhos fitos
-, com os olhos naquele
-, com os olhos para
-
-
-
-
-

Accessing the MacMorpho Tagged Corpus

+
>>> from nltk import ngrams, FreqDist
+>>> target_word = 'olhos'
+>>> fd = FreqDist(ng
+...               for ng in ngrams(text1, 5)
+...               if target_word in ng)
+>>> for hit in fd.samples():
+...     print(' '.join(hit))
+...
+, com os olhos no
+com os olhos no ar
+com os olhos no chão
+e todos com os olhos
+me estar com os olhos
+os olhos estúpidos , a
+os olhos na costura ,
+os olhos no ar ,
+, com os olhos espetados
+, com os olhos estúpidos
+, com os olhos fitos
+, com os olhos naquele
+, com os olhos para
+
+
+ +
+

Accessing the MacMorpho Tagged Corpus

NLTK includes the MAC-MORPHO Brazilian Portuguese POS-tagged news text, with over a million words of journalistic texts extracted from ten sections of the daily newspaper Folha de Sao Paulo, 1994.

-
-
We can access this corpus as a sequence of words or tagged words as follows:
-
->>> import nltk.corpus
->>> nltk.corpus.mac_morpho.words()
-['Jersei', 'atinge', 'm\xe9dia', 'de', 'Cr$', '1,4', ...]
->>> nltk.corpus.mac_morpho.sents() # doctest: +NORMALIZE_WHITESPACE
-[['Jersei', 'atinge', 'm\xe9dia', 'de', 'Cr$', '1,4', 'milh\xe3o',
-'em', 'a', 'venda', 'de', 'a', 'Pinhal', 'em', 'S\xe3o', 'Paulo'],
-['Programe', 'sua', 'viagem', 'a', 'a', 'Exposi\xe7\xe3o', 'Nacional',
-'do', 'Zebu', ',', 'que', 'come\xe7a', 'dia', '25'], ...]
->>> nltk.corpus.mac_morpho.tagged_words()
-[('Jersei', 'N'), ('atinge', 'V'), ('m\xe9dia', 'N'), ...]
-
+
+
We can access this corpus as a sequence of words or tagged words as follows:
>>> import nltk.corpus
+>>> nltk.corpus.mac_morpho.words()
+['Jersei', 'atinge', 'm\xe9dia', 'de', 'Cr$', '1,4', ...]
+>>> nltk.corpus.mac_morpho.sents()
+[['Jersei', 'atinge', 'm\xe9dia', 'de', 'Cr$', '1,4', 'milh\xe3o',
+'em', 'a', 'venda', 'de', 'a', 'Pinhal', 'em', 'S\xe3o', 'Paulo'],
+['Programe', 'sua', 'viagem', 'a', 'a', 'Exposi\xe7\xe3o', 'Nacional',
+'do', 'Zeb', ',', 'que', 'come\xe7a', 'dia', '25'], ...]
+>>> nltk.corpus.mac_morpho.tagged_words()
+[('Jersei', 'N'), ('atinge', 'V'), ('m\xe9dia', 'N'), ...]
+
+

We can also access it in sentence chunks.

-
-
->>> nltk.corpus.mac_morpho.tagged_sents() # doctest: +NORMALIZE_WHITESPACE
-[[('Jersei', 'N'), ('atinge', 'V'), ('m\xe9dia', 'N'), ('de', 'PREP'),
-  ('Cr$', 'CUR'), ('1,4', 'NUM'), ('milh\xe3o', 'N'), ('em', 'PREP|+'),
-  ('a', 'ART'), ('venda', 'N'), ('de', 'PREP|+'), ('a', 'ART'),
-  ('Pinhal', 'NPROP'), ('em', 'PREP'), ('S\xe3o', 'NPROP'),
-  ('Paulo', 'NPROP')],
- [('Programe', 'V'), ('sua', 'PROADJ'), ('viagem', 'N'), ('a', 'PREP|+'),
-  ('a', 'ART'), ('Exposi\xe7\xe3o', 'NPROP'), ('Nacional', 'NPROP'),
-  ('do', 'NPROP'), ('Zebu', 'NPROP'), (',', ','), ('que', 'PRO-KS-REL'),
-  ('come\xe7a', 'V'), ('dia', 'N'), ('25', 'N|AP')], ...]
-
-
-

This data can be used to train taggers (examples below for the Floresta treebank).

+
>>> nltk.corpus.mac_morpho.tagged_sents()
+[[('Jersei', 'N'), ('atinge', 'V'), ('m\xe9dia', 'N'), ('de', 'PREP'),
+  ('Cr$', 'CUR'), ('1,4', 'NUM'), ('milh\xe3o', 'N'), ('em', 'PREP|+'),
+  ('a', 'ART'), ('venda', 'N'), ('de', 'PREP|+'), ('a', 'ART'),
+  ('Pinhal', 'NPROP'), ('em', 'PREP'), ('S\xe3o', 'NPROP'),
+  ('Paulo', 'NPROP')],
+ [('Programe', 'V'), ('sua', 'PROADJ'), ('viagem', 'N'), ('a', 'PREP|+'),
+  ('a', 'ART'), ('Exposi\xe7\xe3o', 'NPROP'), ('Nacional', 'NPROP'),
+  ('do', 'NPROP'), ('Zeb', 'NPROP'), (',', ','), ('que', 'PRO-KS-REL'),
+  ('come\xe7a', 'V'), ('dia', 'N'), ('25', 'N|AP')], ...]
+
-
-

Accessing the Floresta Portuguese Treebank

+

This data can be used to train taggers (examples below for the Floresta treebank).

+
+
+

Accessing the Floresta Portuguese Treebank

The NLTK data distribution includes the -"Floresta Sinta(c)tica Corpus" version 7.4, available from -http://www.linguateca.pt/Floresta/.

+“Floresta Sinta(c)tica Corpus” version 7.4, available from +https://www.linguateca.pt/Floresta/.

We can access this corpus as a sequence of words or tagged words as follows:

-
-
->>> from nltk.corpus import floresta
->>> floresta.words()
-['Um', 'revivalismo', 'refrescante', 'O', '7_e_Meio', ...]
->>> floresta.tagged_words()
-[('Um', '>N+art'), ('revivalismo', 'H+n'), ...]
-
-
+
>>> from nltk.corpus import floresta
+>>> floresta.words()
+['Um', 'revivalismo', 'refrescante', 'O', '7_e_Meio', ...]
+>>> floresta.tagged_words()
+[('Um', '>N+art'), ('revivalismo', 'H+n'), ...]
+
+

The tags consist of some syntactic information, followed by a plus sign, -followed by a conventional part-of-speech tag. Let's strip off the material before +followed by a conventional part-of-speech tag. Let’s strip off the material before the plus sign:

-
-
->>> def simplify_tag(t):
-...     if "+" in t:
-...         return t[t.index("+")+1:]
-...     else:
-...         return t
->>> twords = floresta.tagged_words()
->>> twords = [(w.lower(), simplify_tag(t)) for (w,t) in twords]
->>> twords[:10]
-[('um', 'art'), ('revivalismo', 'n'), ('refrescante', 'adj'), ('o', 'art'), ('7_e_meio', 'prop'),
-('\xe9', 'v-fin'), ('um', 'art'), ('ex-libris', 'n'), ('de', 'prp'), ('a', 'art')]
-
-
+
>>> def simplify_tag(t):
+...     if "+" in t:
+...         return t[t.index("+")+1:]
+...     else:
+...         return t
+>>> twords = floresta.tagged_words()
+>>> twords = [(w.lower(), simplify_tag(t)) for (w,t) in twords]
+>>> twords[:10]
+[('um', 'art'), ('revivalismo', 'n'), ('refrescante', 'adj'), ('o', 'art'), ('7_e_meio', 'prop'),
+('\xe9', 'v-fin'), ('um', 'art'), ('ex-libris', 'n'), ('de', 'prp'), ('a', 'art')]
+
+

Pretty printing the tagged words:

-
-
->>> print(' '.join(word + '/' + tag for (word, tag) in twords[:10]))
-um/art revivalismo/n refrescante/adj o/art 7_e_meio/prop é/v-fin um/art ex-libris/n de/prp a/art
-
-
+
>>> print(' '.join(word + '/' + tag for (word, tag) in twords[:10]))
+um/art revivalismo/n refrescante/adj o/art 7_e_meio/prop é/v-fin um/art ex-libris/n de/prp a/art
+
+

Count the word tokens and types, and determine the most common word:

-
-
->>> words = floresta.words()
->>> len(words)
-211852
->>> fd = nltk.FreqDist(words)
->>> len(fd)
-29421
->>> fd.max()
-'de'
-
-
+
>>> words = floresta.words()
+>>> len(words)
+211852
+>>> fd = nltk.FreqDist(words)
+>>> len(fd)
+29421
+>>> fd.max()
+'de'
+
+

List the 20 most frequent tags, in order of decreasing frequency:

-
-
->>> tags = [simplify_tag(tag) for (word,tag) in floresta.tagged_words()]
->>> fd = nltk.FreqDist(tags)
->>> fd.keys()[:20] # doctest: +NORMALIZE_WHITESPACE
-['n', 'prp', 'art', 'v-fin', ',', 'prop', 'adj', 'adv', '.',
- 'conj-c', 'v-inf', 'pron-det', 'v-pcp', 'num', 'pron-indp',
- 'pron-pers', '\xab', '\xbb', 'conj-s', '}']
-
-
+
>>> tags = [simplify_tag(tag) for (word,tag) in floresta.tagged_words()]
+>>> fd = nltk.FreqDist(tags)
+>>> fd.keys()[:20]
+['n', 'prp', 'art', 'v-fin', ',', 'prop', 'adj', 'adv', '.',
+ 'conj-c', 'v-inf', 'pron-det', 'v-pcp', 'num', 'pron-indp',
+ 'pron-pers', '\xab', '\xbb', 'conj-s', '}']
+
+

We can also access the corpus grouped by sentence:

-
-
->>> floresta.sents() # doctest: +NORMALIZE_WHITESPACE
-[['Um', 'revivalismo', 'refrescante'],
- ['O', '7_e_Meio', '\xe9', 'um', 'ex-libris', 'de', 'a', 'noite',
-  'algarvia', '.'], ...]
->>> floresta.tagged_sents() # doctest: +NORMALIZE_WHITESPACE
-[[('Um', '>N+art'), ('revivalismo', 'H+n'), ('refrescante', 'N<+adj')],
- [('O', '>N+art'), ('7_e_Meio', 'H+prop'), ('\xe9', 'P+v-fin'),
-  ('um', '>N+art'), ('ex-libris', 'H+n'), ('de', 'H+prp'),
-  ('a', '>N+art'), ('noite', 'H+n'), ('algarvia', 'N<+adj'), ('.', '.')],
- ...]
->>> floresta.parsed_sents() # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
-[Tree('UTT+np', [Tree('>N+art', ['Um']), Tree('H+n', ['revivalismo']),
-                 Tree('N<+adj', ['refrescante'])]),
- Tree('STA+fcl',
-     [Tree('SUBJ+np', [Tree('>N+art', ['O']),
-                       Tree('H+prop', ['7_e_Meio'])]),
-      Tree('P+v-fin', ['\xe9']),
-      Tree('SC+np',
-         [Tree('>N+art', ['um']),
-          Tree('H+n', ['ex-libris']),
-          Tree('N<+pp', [Tree('H+prp', ['de']),
-                         Tree('P<+np', [Tree('>N+art', ['a']),
-                                        Tree('H+n', ['noite']),
-                                        Tree('N<+adj', ['algarvia'])])])]),
-      Tree('.', ['.'])]), ...]
-
-
-

To view a parse tree, use the draw() method, e.g.:

-
-
->>> psents = floresta.parsed_sents()
->>> psents[5].draw() # doctest: +SKIP
-
-
-
-
-

Character Encodings

+
>>> floresta.sents()
+[['Um', 'revivalismo', 'refrescante'],
+ ['O', '7_e_Meio', '\xe9', 'um', 'ex-libris', 'de', 'a', 'noite',
+  'algarvia', '.'], ...]
+>>> floresta.tagged_sents()
+[[('Um', '>N+art'), ('revivalismo', 'H+n'), ('refrescante', 'N<+adj')],
+ [('O', '>N+art'), ('7_e_Meio', 'H+prop'), ('\xe9', 'P+v-fin'),
+  ('um', '>N+art'), ('ex-libris', 'H+n'), ('de', 'H+prp'),
+  ('a', '>N+art'), ('noite', 'H+n'), ('algarvia', 'N<+adj'), ('.', '.')],
+ ...]
+>>> floresta.parsed_sents()
+[Tree('UTT+np', [Tree('>N+art', ['Um']), Tree('H+n', ['revivalismo']),
+                 Tree('N<+adj', ['refrescante'])]),
+ Tree('STA+fcl',
+     [Tree('SUBJ+np', [Tree('>N+art', ['O']),
+                       Tree('H+prop', ['7_e_Meio'])]),
+      Tree('P+v-fin', ['\xe9']),
+      Tree('SC+np',
+         [Tree('>N+art', ['um']),
+          Tree('H+n', ['ex-libris']),
+          Tree('N<+pp', [Tree('H+prp', ['de']),
+                         Tree('P<+np', [Tree('>N+art', ['a']),
+                                        Tree('H+n', ['noite']),
+                                        Tree('N<+adj', ['algarvia'])])])]),
+      Tree('.', ['.'])]), ...]
+
+
+

To view a parse tree, use the draw() method, e.g.:

+
>>> psents = floresta.parsed_sents()
+>>> psents[5].draw() 
+
+
+ +
+

Character Encodings

Python understands the common character encoding used for Portuguese, ISO 8859-1 (ISO Latin 1).

-
-
->>> import os, nltk.test
->>> testdir = os.path.split(nltk.test.__file__)[0]
->>> text = open(os.path.join(testdir, 'floresta.txt'), 'rb').read().decode('ISO 8859-1')
->>> text[:60]
-'O 7 e Meio \xe9 um ex-libris da noite algarvia.\n\xc9 uma das mais '
->>> print(text[:60])
-O 7 e Meio é um ex-libris da noite algarvia.
-É uma das mais
-
-
+
>>> import os, nltk.test
+>>> testdir = os.path.split(nltk.test.__file__)[0]
+>>> text = open(os.path.join(testdir, 'floresta.txt'), 'rb').read().decode('ISO 8859-1')
+>>> text[:60]
+'O 7 e Meio \xe9 um ex-libris da noite algarvia.\n\xc9 uma das mais '
+>>> print(text[:60])
+O 7 e Meio é um ex-libris da noite algarvia.
+É uma das mais
+
+

For more information about character encodings and Python, please see section 3.3 of the book.

+
+ +
+

Processing Tasks

+
+

Simple Concordancing

+

Here’s a function that takes a word and a specified amount of context (measured +in characters), and generates a concordance for that word.

+
>>> def concordance(word, context=30):
+...     for sent in floresta.sents():
+...         if word in sent:
+...             pos = sent.index(word)
+...             left = ' '.join(sent[:pos])
+...             right = ' '.join(sent[pos+1:])
+...             print('%*s %s %-*s' %
+...                 (context, left[-context:], word, context, right[:context]))
+
+
>>> concordance("dar") 
+anduru , foi o suficiente para dar a volta a o resultado .
+             1. O P?BLICO veio dar a a imprensa di?ria portuguesa
+  A fartura de pensamento pode dar maus resultados e n?s n?o quer
+                      Come?a a dar resultados a pol?tica de a Uni
+ial come?ar a incorporar- lo e dar forma a um ' site ' que tem se
+r com Constantino para ele lhe dar tamb?m os pap?is assinados .
+va a brincar , pois n?o lhe ia dar procura??o nenhuma enquanto n?
+?rica como o ant?doto capaz de dar sentido a o seu enorme poder .
+. . .
+>>> concordance("vender") 
+er recebido uma encomenda para vender 4000 blindados a o Iraque .
+m?rico_Amorim caso conseguisse vender o lote de ac??es de o empres?r
+mpre ter jovens simp?ticos a ? vender ? chega ! }
+       Disse que o governo vai vender ? desde autom?vel at? particip
+ndiciou ontem duas pessoas por vender carro com ?gio .
+        A inten??o de Fleury ? vender as a??es para equilibrar as fi
+
-
-

Processing Tasks

-
-

Simple Concordancing

-

Here's a function that takes a word and a specified amount of context (measured -in characters), and generates a concordance for that word.

-
-
->>> def concordance(word, context=30):
-...     for sent in floresta.sents():
-...         if word in sent:
-...             pos = sent.index(word)
-...             left = ' '.join(sent[:pos])
-...             right = ' '.join(sent[pos+1:])
-...             print('%*s %s %-*s' %
-...                 (context, left[-context:], word, context, right[:context]))
-
-
->>> concordance("dar") # doctest: +SKIP
-anduru , foi o suficiente para dar a volta a o resultado .
-             1. O P?BLICO veio dar a a imprensa di?ria portuguesa
-  A fartura de pensamento pode dar maus resultados e n?s n?o quer
-                      Come?a a dar resultados a pol?tica de a Uni
-ial come?ar a incorporar- lo e dar forma a um ' site ' que tem se
-r com Constantino para ele lhe dar tamb?m os pap?is assinados .
-va a brincar , pois n?o lhe ia dar procura??o nenhuma enquanto n?
-?rica como o ant?doto capaz de dar sentido a o seu enorme poder .
-. . .
->>> concordance("vender") # doctest: +SKIP
-er recebido uma encomenda para vender 4000 blindados a o Iraque .
-m?rico_Amorim caso conseguisse vender o lote de ac??es de o empres?r
-mpre ter jovens simp?ticos a ? vender ? chega ! }
-       Disse que o governo vai vender ? desde autom?vel at? particip
-ndiciou ontem duas pessoas por vender carro com ?gio .
-        A inten??o de Fleury ? vender as a??es para equilibrar as fi
-
-
-
-
-

Part-of-Speech Tagging

-

Let's begin by getting the tagged sentence data, and simplifying the tags +

+
+

Part-of-Speech Tagging

+

Let’s begin by getting the tagged sentence data, and simplifying the tags as described earlier.

-
-
->>> from nltk.corpus import floresta
->>> tsents = floresta.tagged_sents()
->>> tsents = [[(w.lower(),simplify_tag(t)) for (w,t) in sent] for sent in tsents if sent]
->>> train = tsents[100:]
->>> test = tsents[:100]
-
-
-

We already know that n is the most common tag, so we can set up a +

>>> from nltk.corpus import floresta
+>>> tsents = floresta.tagged_sents()
+>>> tsents = [[(w.lower(),simplify_tag(t)) for (w,t) in sent] for sent in tsents if sent]
+>>> train = tsents[100:]
+>>> test = tsents[:100]
+
+
+

We already know that n is the most common tag, so we can set up a default tagger that tags every word as a noun, and see how well it does:

-
-
->>> tagger0 = nltk.DefaultTagger('n')
->>> nltk.tag.accuracy(tagger0, test)
-0.17697228144989338
-
-
-

Evidently, about one in every six words is a noun. Let's improve on this by +

>>> tagger0 = nltk.DefaultTagger('n')
+>>> nltk.tag.accuracy(tagger0, test)
+0.17697228144989338
+
+
+

Evidently, about one in every six words is a noun. Let’s improve on this by training a unigram tagger:

-
-
->>> tagger1 = nltk.UnigramTagger(train, backoff=tagger0)
->>> nltk.tag.accuracy(tagger1, test)
-0.87029140014214645
-
-
+
>>> tagger1 = nltk.UnigramTagger(train, backoff=tagger0)
+>>> nltk.tag.accuracy(tagger1, test)
+0.87029140014214645
+
+

Next a bigram tagger:

-
-
->>> tagger2 = nltk.BigramTagger(train, backoff=tagger1)
->>> nltk.tag.accuracy(tagger2, test)
-0.89019189765458417
-
-
-
-
-

Sentence Segmentation

+
>>> tagger2 = nltk.BigramTagger(train, backoff=tagger1)
+>>> nltk.tag.accuracy(tagger2, test)
+0.89019189765458417
+
+
+ +
+

Sentence Segmentation

Punkt is a language-neutral sentence segmentation tool. We

-
-
->>> sent_tokenizer=nltk.data.load('tokenizers/punkt/portuguese.pickle')
->>> raw_text = machado.raw('romance/marm05.txt')
->>> sentences = sent_tokenizer.tokenize(raw_text)
->>> for sent in sentences[1000:1005]:
-...     print("<<", sent, ">>")
-...
-<< Em verdade, parecia ainda mais mulher do que era;
-seria criança nos seus folgares de moça; mas assim quieta, impassível, tinha a
-compostura da mulher casada. >>
-<< Talvez essa circunstância lhe diminuía um pouco da
-graça virginal. >>
-<< Depressa nos familiarizamos; a mãe fazia-lhe grandes elogios, eu
-escutava-os de boa sombra, e ela sorria com os olhos fúlgidos, como se lá dentro
-do cérebro lhe estivesse a voar uma borboletinha de asas de ouro e olhos de
-diamante... >>
-<< Digo lá dentro, porque cá fora o
-que esvoaçou foi uma borboleta preta, que subitamente penetrou na varanda, e
-começou a bater as asas em derredor de D. Eusébia. >>
-<< D. Eusébia deu um grito,
-levantou-se, praguejou umas palavras soltas: - T'esconjuro!... >>
-
-
+
>>> sent_tokenizer=nltk.data.load('tokenizers/punkt/portuguese.pickle')
+>>> raw_text = machado.raw('romance/marm05.txt')
+>>> sentences = sent_tokenizer.tokenize(raw_text)
+>>> for sent in sentences[1000:1005]:
+...     print("<<", sent, ">>")
+...
+<< Em verdade, parecia ainda mais mulher do que era;
+seria criança nos seus folgares de moça; mas assim quieta, impassível, tinha a
+compostura da mulher casada. >>
+<< Talvez essa circunstância lhe diminuía um pouco da
+graça virginal. >>
+<< Depressa nos familiarizamos; a mãe fazia-lhe grandes elogios, eu
+escutava-os de boa sombra, e ela sorria com os olhos fúlgidos, como se lá dentro
+do cérebro lhe estivesse a voar uma borboletinha de asas de ouro e olhos de
+diamante... >>
+<< Digo lá dentro, porque cá fora o
+que esvoaçou foi uma borboleta preta, que subitamente penetrou na varanda, e
+começou a bater as asas em derredor de D. Eusébia. >>
+<< D. Eusébia deu um grito,
+levantou-se, praguejou umas palavras soltas: - T'esconjuro!... >>
+
+

The sentence tokenizer can be trained and evaluated on other text. The source text (from the Floresta Portuguese Treebank) contains one sentence per line. We read the text, split it into its lines, and then join these lines together using spaces. Now the information about sentence breaks has been discarded. We split this material into training and testing data:

-
-
->>> import os, nltk.test
->>> testdir = os.path.split(nltk.test.__file__)[0]
->>> text = open(os.path.join(testdir, 'floresta.txt'), 'rb').read().decode('ISO-8859-1')
->>> lines = text.split('\n')
->>> train = ' '.join(lines[10:])
->>> test = ' '.join(lines[:10])
-
-
+
>>> import os, nltk.test
+>>> testdir = os.path.split(nltk.test.__file__)[0]
+>>> text = open(os.path.join(testdir, 'floresta.txt'), 'rb').read().decode('ISO-8859-1')
+>>> lines = text.split('\n')
+>>> train = ' '.join(lines[10:])
+>>> test = ' '.join(lines[:10])
+
+

Now we train the sentence segmenter (or sentence tokenizer) and use it on our test sentences:

-
-
->>> stok = nltk.PunktSentenceTokenizer(train)
->>> print(stok.tokenize(test))
-['O 7 e Meio \xe9 um ex-libris da noite algarvia.',
-'\xc9 uma das mais antigas discotecas do Algarve, situada em Albufeira,
-que continua a manter os tra\xe7os decorativos e as clientelas de sempre.',
-'\xc9 um pouco a vers\xe3o de uma esp\xe9cie de \xaboutro lado\xbb da noite,
-a meio caminho entre os devaneios de uma fauna perif\xe9rica, seja de Lisboa,
-Londres, Dublin ou Faro e Portim\xe3o, e a postura circunspecta dos fi\xe9is da casa,
-que dela esperam a m\xfasica \xabgeracionista\xbb dos 60 ou dos 70.',
-'N\xe3o deixa de ser, nos tempos que correm, um certo \xabvery typical\xbb algarvio,
-cabe\xe7a de cartaz para os que querem fugir a algumas movimenta\xe7\xf5es nocturnas
-j\xe1 a caminho da ritualiza\xe7\xe3o de massas, do g\xe9nero \xabvamos todos ao
-Calypso e encontramo-nos na Locomia\xbb.',
-'E assim, aos 2,5 milh\xf5es que o Minist\xe9rio do Planeamento e Administra\xe7\xe3o
-do Territ\xf3rio j\xe1 gasta no pagamento do pessoal afecto a estes organismos,
-v\xeam juntar-se os montantes das obras propriamente ditas, que os munic\xedpios,
-j\xe1 com projectos na m\xe3o, v\xeam reivindicar junto do Executivo, como salienta
-aquele membro do Governo.',
-'E o dinheiro \xabn\xe3o falta s\xf3 \xe0s c\xe2maras\xbb, lembra o secret\xe1rio de Estado,
-que considera que a solu\xe7\xe3o para as autarquias \xe9 \xabespecializarem-se em
-fundos comunit\xe1rios\xbb.',
-'Mas como, se muitas n\xe3o disp\xf5em, nos seus quadros, dos t\xe9cnicos necess\xe1rios?',
-'\xabEncomendem-nos a projectistas de fora\xbb porque, se as obras vierem a ser financiadas,
-eles at\xe9 saem de gra\xe7a, j\xe1 que, nesse caso, \xabos fundos comunit\xe1rios pagam
-os projectos, o mesmo n\xe3o acontecendo quando eles s\xe3o feitos pelos GAT\xbb,
-dado serem organismos do Estado.',
-'Essa poder\xe1 vir a ser uma hip\xf3tese, at\xe9 porque, no terreno, a capacidade dos GAT
-est\xe1 cada vez mais enfraquecida.',
-'Alguns at\xe9 j\xe1 desapareceram, como o de Castro Verde, e outros t\xeam vindo a perder quadros.']
-
-
-

NLTK's data collection includes a trained model for Portuguese sentence +

>>> stok = nltk.PunktSentenceTokenizer(train)
+>>> print(stok.tokenize(test))
+['O 7 e Meio \xe9 um ex-libris da noite algarvia.',
+'\xc9 uma das mais antigas discotecas do Algarve, situada em Albufeira,
+que continua a manter os tra\xe7os decorativos e as clientelas de sempre.',
+'\xc9 um pouco a vers\xe3o de uma esp\xe9cie de \xaboutro lado\xbb da noite,
+a meio caminho entre os devaneios de uma fauna perif\xe9rica, seja de Lisboa,
+Londres, Dublin ou Faro e Portim\xe3o, e a postura circunspecta dos fi\xe9is da casa,
+que dela esperam a m\xfasica \xabgeracionista\xbb dos 60 ou dos 70.',
+'N\xe3o deixa de ser, nos tempos que correm, um certo \xabvery typical\xbb algarvio,
+cabe\xe7a de cartaz para os que querem fugir a algumas movimenta\xe7\xf5es nocturnas
+j\xe1 a caminho da ritualiza\xe7\xe3o de massas, do g\xe9nero \xabvamos todos ao
+Calypso e encontramo-nos na Locomia\xbb.',
+'E assim, aos 2,5 milh\xf5es que o Minist\xe9rio do Planeamento e Administra\xe7\xe3o
+do Territ\xf3rio j\xe1 gasta no pagamento do pessoal afecto a estes organismos,
+v\xeam juntar-se os montantes das obras propriamente ditas, que os munic\xedpios,
+j\xe1 com projectos na m\xe3o, v\xeam reivindicar junto do Executivo, como salienta
+aquele membro do Governo.',
+'E o dinheiro \xabn\xe3o falta s\xf3 \xe0s c\xe2maras\xbb, lembra o secret\xe1rio de Estado,
+que considera que a solu\xe7\xe3o para as autarquias \xe9 \xabespecializarem-se em
+fundos comunit\xe1rios\xbb.',
+'Mas como, se muitas n\xe3o disp\xf5em, nos seus quadros, dos t\xe9cnicos necess\xe1rios?',
+'\xabEncomendem-nos a projectistas de fora\xbb porque, se as obras vierem a ser financiadas,
+eles at\xe9 saem de gra\xe7a, j\xe1 que, nesse caso, \xabos fundos comunit\xe1rios pagam
+os projectos, o mesmo n\xe3o acontecendo quando eles s\xe3o feitos pelos GAT\xbb,
+dado serem organismos do Estado.',
+'Essa poder\xe1 vir a ser uma hip\xf3tese, at\xe9 porque, no terreno, a capacidade dos GAT
+est\xe1 cada vez mais enfraquecida.',
+'Alguns at\xe9 j\xe1 desapareceram, como o de Castro Verde, e outros t\xeam vindo a perder quadros.']
+
+
+

NLTK’s data collection includes a trained model for Portuguese sentence segmentation, which can be loaded as follows. It is faster to load a trained model than to retrain it.

-
-
->>> stok = nltk.data.load('tokenizers/punkt/portuguese.pickle')
-
-
-
-
-

Stemming

+
>>> stok = nltk.data.load('tokenizers/punkt/portuguese.pickle')
+
+
+ +
+

Stemming

NLTK includes the RSLP Portuguese stemmer. Here we use it to stem some Portuguese text:

-
-
->>> stemmer = nltk.stem.RSLPStemmer()
->>> stemmer.stem("copiar")
-'copi'
->>> stemmer.stem("paisagem")
-'pais'
-
-
-
-
-

Stopwords

+
>>> stemmer = nltk.stem.RSLPStemmer()
+>>> stemmer.stem("copiar")
+'copi'
+>>> stemmer.stem("paisagem")
+'pais'
+
+
+ +
+

Stopwords

NLTK includes Portuguese stopwords:

-
-
->>> stopwords = nltk.corpus.stopwords.words('portuguese')
->>> stopwords[:10]
-['a', 'ao', 'aos', 'aquela', 'aquelas', 'aquele', 'aqueles', 'aquilo', 'as', 'at\xe9']
-
-
-

Now we can use these to filter text. Let's find the most frequent words (other than stopwords) -and print them in descending order of frequency:

-
-
->>> fd = nltk.FreqDist(w.lower() for w in floresta.words() if w not in stopwords)
->>> for word in list(fd.keys())[:20]:
-...     print(word, fd[word])
-, 13444
-. 7725
-« 2369
-» 2310
-é 1305
-o 1086
-} 1047
-{ 1044
-a 897
-; 633
-em 516
-ser 466
-sobre 349
-os 313
-anos 301
-ontem 292
-ainda 279
-segundo 256
-ter 249
-dois 231
-
-
+
>>> stopwords = nltk.corpus.stopwords.words('portuguese')
+>>> stopwords[:10]
+['a', 'ao', 'aos', 'aquela', 'aquelas', 'aquele', 'aqueles', 'aquilo', 'as', 'at\xe9']
+
+

Now we can use these to filter text. Let’s find the most frequent words (other than stopwords) +and print them in descending order of frequency:

+
>>> fd = nltk.FreqDist(w.lower() for w in floresta.words() if w not in stopwords)
+>>> for word in list(fd.keys())[:20]:
+...     print(word, fd[word])
+, 13444
+. 7725
+« 2369
+» 2310
+é 1305
+o 1086
+} 1047
+{ 1044
+a 897
+; 633
+em 516
+ser 466
+sobre 349
+os 313
+anos 301
+ontem 292
+ainda 279
+segundo 256
+ter 249
+dois 231
+
+
+ + + + + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/probability.html b/howto/probability.html index 2dfb441c2..fa43e9e0b 100644 --- a/howto/probability.html +++ b/howto/probability.html @@ -1,672 +1,480 @@ - - - + - - -Probability - - - -
-

Probability

+ +
- - -
-
->>> import nltk
->>> from nltk.probability import *
-
-
-
-

FreqDist

-
-
->>> text1 = ['no', 'good', 'fish', 'goes', 'anywhere', 'without', 'a', 'porpoise', '!']
->>> text2 = ['no', 'good', 'porpoise', 'likes', 'to', 'fish', 'fish', 'anywhere', '.']
-
-
->>> fd1 = nltk.FreqDist(text1)
->>> fd1 == nltk.FreqDist(text1)
-True
-
-
+
+
+ +
+

Sample usage for probability

+
+

Probability

+
>>> from nltk.test.probability_fixt import setup_module
+>>> setup_module()
+
+
+
>>> import nltk
+>>> from nltk.probability import *
+
+
+
+

FreqDist

+
>>> text1 = ['no', 'good', 'fish', 'goes', 'anywhere', 'without', 'a', 'porpoise', '!']
+>>> text2 = ['no', 'good', 'porpoise', 'likes', 'to', 'fish', 'fish', 'anywhere', '.']
+
+
+
>>> fd1 = nltk.FreqDist(text1)
+>>> fd1 == nltk.FreqDist(text1)
+True
+
+

Note that items are sorted in order of decreasing frequency; two items of the same frequency appear in indeterminate order.

-
-
->>> import itertools
->>> both = nltk.FreqDist(text1 + text2)
->>> both_most_common = both.most_common()
->>> list(itertools.chain(*(sorted(ys) for k, ys in itertools.groupby(both_most_common, key=lambda t: t[1]))))
-[('fish', 3), ('anywhere', 2), ('good', 2), ('no', 2), ('porpoise', 2), ('!', 1), ('.', 1), ('a', 1), ('goes', 1), ('likes', 1), ('to', 1), ('without', 1)]
-
-
->>> both == fd1 + nltk.FreqDist(text2)
-True
->>> fd1 == nltk.FreqDist(text1) # But fd1 is unchanged
-True
-
-
->>> fd2 = nltk.FreqDist(text2)
->>> fd1.update(fd2)
->>> fd1 == both
-True
-
-
->>> fd1 = nltk.FreqDist(text1)
->>> fd1.update(text2)
->>> fd1 == both
-True
-
-
->>> fd1 = nltk.FreqDist(text1)
->>> fd2 = nltk.FreqDist(fd1)
->>> fd2 == fd1
-True
-
-
-

nltk.FreqDist can be pickled:

-
-
->>> import pickle
->>> fd1 = nltk.FreqDist(text1)
->>> pickled = pickle.dumps(fd1)
->>> fd1 == pickle.loads(pickled)
-True
-
-
-
-
-

Testing some HMM estimators

+
>>> import itertools
+>>> both = nltk.FreqDist(text1 + text2)
+>>> both_most_common = both.most_common()
+>>> list(itertools.chain(*(sorted(ys) for k, ys in itertools.groupby(both_most_common, key=lambda t: t[1]))))
+[('fish', 3), ('anywhere', 2), ('good', 2), ('no', 2), ('porpoise', 2), ('!', 1), ('.', 1), ('a', 1), ('goes', 1), ('likes', 1), ('to', 1), ('without', 1)]
+
+
+
>>> both == fd1 + nltk.FreqDist(text2)
+True
+>>> fd1 == nltk.FreqDist(text1) # But fd1 is unchanged
+True
+
+
+
>>> fd2 = nltk.FreqDist(text2)
+>>> fd1.update(fd2)
+>>> fd1 == both
+True
+
+
+
>>> fd1 = nltk.FreqDist(text1)
+>>> fd1.update(text2)
+>>> fd1 == both
+True
+
+
+
>>> fd1 = nltk.FreqDist(text1)
+>>> fd2 = nltk.FreqDist(fd1)
+>>> fd2 == fd1
+True
+
+
+

nltk.FreqDist can be pickled:

+
>>> import pickle
+>>> fd1 = nltk.FreqDist(text1)
+>>> pickled = pickle.dumps(fd1)
+>>> fd1 == pickle.loads(pickled)
+True
+
+
+

Mathematical operations:

+
>>> FreqDist('abbb') + FreqDist('bcc')
+FreqDist({'b': 4, 'c': 2, 'a': 1})
+>>> FreqDist('abbbc') - FreqDist('bccd')
+FreqDist({'b': 2, 'a': 1})
+>>> FreqDist('abbb') | FreqDist('bcc')
+FreqDist({'b': 3, 'c': 2, 'a': 1})
+>>> FreqDist('abbb') & FreqDist('bcc')
+FreqDist({'b': 1})
+
+
+ +
+

ConditionalFreqDist

+
>>> cfd1 = ConditionalFreqDist()
+>>> cfd1[1] = FreqDist('abbbb')
+>>> cfd1[2] = FreqDist('xxxxyy')
+>>> cfd1
+<ConditionalFreqDist with 2 conditions>
+
+
+
>>> cfd2 = ConditionalFreqDist()
+>>> cfd2[1] = FreqDist('bbccc')
+>>> cfd2[2] = FreqDist('xxxyyyzz')
+>>> cfd2[3] = FreqDist('m')
+>>> cfd2
+<ConditionalFreqDist with 3 conditions>
+
+
+
>>> r = cfd1 + cfd2
+>>> [(i,r[i]) for i in r.conditions()]
+[(1, FreqDist({'b': 6, 'c': 3, 'a': 1})), (2, FreqDist({'x': 7, 'y': 5, 'z': 2})), (3, FreqDist({'m': 1}))]
+
+
+
>>> r = cfd1 - cfd2
+>>> [(i,r[i]) for i in r.conditions()]
+[(1, FreqDist({'b': 2, 'a': 1})), (2, FreqDist({'x': 1}))]
+
+
+
>>> r = cfd1 | cfd2
+>>> [(i,r[i]) for i in r.conditions()]
+[(1, FreqDist({'b': 4, 'c': 3, 'a': 1})), (2, FreqDist({'x': 4, 'y': 3, 'z': 2})), (3, FreqDist({'m': 1}))]
+
+
+
>>> r = cfd1 & cfd2
+>>> [(i,r[i]) for i in r.conditions()]
+[(1, FreqDist({'b': 2})), (2, FreqDist({'x': 3, 'y': 2}))]
+
+
+
+
+

Testing some HMM estimators

We extract a small part (500 sentences) of the Brown corpus

-
-
->>> corpus = nltk.corpus.brown.tagged_sents(categories='adventure')[:500]
->>> print(len(corpus))
-500
-
-
+
>>> corpus = nltk.corpus.brown.tagged_sents(categories='adventure')[:500]
+>>> print(len(corpus))
+500
+
+

We create a HMM trainer - note that we need the tags and symbols from the whole corpus, not just the training corpus

-
-
->>> from nltk.util import unique_list
->>> tag_set = unique_list(tag for sent in corpus for (word,tag) in sent)
->>> print(len(tag_set))
-92
->>> symbols = unique_list(word for sent in corpus for (word,tag) in sent)
->>> print(len(symbols))
-1464
->>> print(len(tag_set))
-92
->>> symbols = unique_list(word for sent in corpus for (word,tag) in sent)
->>> print(len(symbols))
-1464
->>> trainer = nltk.tag.HiddenMarkovModelTrainer(tag_set, symbols)
-
-
+
>>> from nltk.util import unique_list
+>>> tag_set = unique_list(tag for sent in corpus for (word,tag) in sent)
+>>> print(len(tag_set))
+92
+>>> symbols = unique_list(word for sent in corpus for (word,tag) in sent)
+>>> print(len(symbols))
+1464
+>>> trainer = nltk.tag.HiddenMarkovModelTrainer(tag_set, symbols)
+
+

We divide the corpus into 90% training and 10% testing

-
-
->>> train_corpus = []
->>> test_corpus = []
->>> for i in range(len(corpus)):
-...     if i % 10:
-...         train_corpus += [corpus[i]]
-...     else:
-...         test_corpus += [corpus[i]]
->>> print(len(train_corpus))
-450
->>> print(len(test_corpus))
-50
-
-
+
>>> train_corpus = []
+>>> test_corpus = []
+>>> for i in range(len(corpus)):
+...     if i % 10:
+...         train_corpus += [corpus[i]]
+...     else:
+...         test_corpus += [corpus[i]]
+>>> print(len(train_corpus))
+450
+>>> print(len(test_corpus))
+50
+
+

And now we can test the estimators

-
-
->>> def train_and_test(est):
-...     hmm = trainer.train_supervised(train_corpus, estimator=est)
-...     print('%.2f%%' % (100 * hmm.evaluate(test_corpus)))
-
-
-
-
-

Maximum Likelihood Estimation

+
>>> def train_and_test(est):
+...     hmm = trainer.train_supervised(train_corpus, estimator=est)
+...     print('%.2f%%' % (100 * hmm.evaluate(test_corpus)))
+
+
+ +
+

Maximum Likelihood Estimation

    -
  • this resulted in an initialization error before r7209

    -
    -
    ->>> mle = lambda fd, bins: MLEProbDist(fd)
    ->>> train_and_test(mle)
    -22.75%
    -
    -
    +
  • this resulted in an initialization error before r7209

    +
    >>> mle = lambda fd, bins: MLEProbDist(fd)
    +>>> train_and_test(mle)
    +22.75%
    +
    +

Laplace (= Lidstone with gamma==1)

-
-
->>> train_and_test(LaplaceProbDist)
-66.04%
-
-
+
>>> train_and_test(LaplaceProbDist)
+66.04%
+
+

Expected Likelihood Estimation (= Lidstone with gamma==0.5)

-
-
->>> train_and_test(ELEProbDist)
-73.01%
-
-
+
>>> train_and_test(ELEProbDist)
+73.01%
+
+

Lidstone Estimation, for gamma==0.1, 0.5 and 1 (the later two should be exactly equal to MLE and ELE above)

-
-
->>> def lidstone(gamma):
-...     return lambda fd, bins: LidstoneProbDist(fd, gamma, bins)
->>> train_and_test(lidstone(0.1))
-82.51%
->>> train_and_test(lidstone(0.5))
-73.01%
->>> train_and_test(lidstone(1.0))
-66.04%
-
-
-
-
-

Witten Bell Estimation

+
>>> def lidstone(gamma):
+...     return lambda fd, bins: LidstoneProbDist(fd, gamma, bins)
+>>> train_and_test(lidstone(0.1))
+82.51%
+>>> train_and_test(lidstone(0.5))
+73.01%
+>>> train_and_test(lidstone(1.0))
+66.04%
+
+
+ +
+

Witten Bell Estimation

    -
  • This resulted in ZeroDivisionError before r7209

    -
    -
    ->>> train_and_test(WittenBellProbDist)
    -88.12%
    -
    -
    +
  • This resulted in ZeroDivisionError before r7209

    +
    >>> train_and_test(WittenBellProbDist)
    +88.12%
    +
    +

Good Turing Estimation

-
-
->>> gt = lambda fd, bins: SimpleGoodTuringProbDist(fd, bins=1e5)
->>> train_and_test(gt)
-86.93%
-
-
-
-
-

Kneser Ney Estimation

+
>>> gt = lambda fd, bins: SimpleGoodTuringProbDist(fd, bins=1e5)
+>>> train_and_test(gt)
+86.93%
+
+
+ +
+

Kneser Ney Estimation

Since the Kneser-Ney distribution is best suited for trigrams, we must adjust our testing accordingly.

-
-
->>> corpus = [[((x[0],y[0],z[0]),(x[1],y[1],z[1]))
-...     for x, y, z in nltk.trigrams(sent)]
-...         for sent in corpus[:100]]
-
-
-
-
We will then need to redefine the rest of the training/testing variables
-
->>> tag_set = unique_list(tag for sent in corpus for (word,tag) in sent)
->>> len(tag_set)
-906
-
-
->>> symbols = unique_list(word for sent in corpus for (word,tag) in sent)
->>> len(symbols)
-1341
-
-
->>> trainer = nltk.tag.HiddenMarkovModelTrainer(tag_set, symbols)
->>> train_corpus = []
->>> test_corpus = []
-
-
->>> for i in range(len(corpus)):
-...    if i % 10:
-...        train_corpus += [corpus[i]]
-...    else:
-...        test_corpus += [corpus[i]]
-
-
->>> len(train_corpus)
-90
->>> len(test_corpus)
-10
-
-
->>> kn = lambda fd, bins: KneserNeyProbDist(fd)
->>> train_and_test(kn)
-0.86%
-
+
>>> corpus = [[((x[0],y[0],z[0]),(x[1],y[1],z[1]))
+...     for x, y, z in nltk.trigrams(sent)]
+...         for sent in corpus[:100]]
+
+
+
+
We will then need to redefine the rest of the training/testing variables
>>> tag_set = unique_list(tag for sent in corpus for (word,tag) in sent)
+>>> len(tag_set)
+906
+
+
+
>>> symbols = unique_list(word for sent in corpus for (word,tag) in sent)
+>>> len(symbols)
+1341
+
+
+
>>> trainer = nltk.tag.HiddenMarkovModelTrainer(tag_set, symbols)
+>>> train_corpus = []
+>>> test_corpus = []
+
+
+
>>> for i in range(len(corpus)):
+...    if i % 10:
+...        train_corpus += [corpus[i]]
+...    else:
+...        test_corpus += [corpus[i]]
+
+
+
>>> len(train_corpus)
+90
+>>> len(test_corpus)
+10
+
+
+
>>> kn = lambda fd, bins: KneserNeyProbDist(fd)
+>>> train_and_test(kn)
+0.86%
+
+

Remains to be added: - Tests for HeldoutProbDist, CrossValidationProbDist and MutableProbDist

-
-
-

Squashed bugs

+ +
+

Squashed bugs

Issue 511: override pop and popitem to invalidate the cache

-
-
->>> fd = nltk.FreqDist('a')
->>> list(fd.keys())
-['a']
->>> fd.pop('a')
-1
->>> list(fd.keys())
-[]
-
-
+
>>> fd = nltk.FreqDist('a')
+>>> list(fd.keys())
+['a']
+>>> fd.pop('a')
+1
+>>> list(fd.keys())
+[]
+
+

Issue 533: access cumulative frequencies with no arguments

-
-
->>> fd = nltk.FreqDist('aab')
->>> list(fd._cumulative_frequencies(['a']))
-[2.0]
->>> list(fd._cumulative_frequencies(['a', 'b']))
-[2.0, 3.0]
-
-
+
>>> fd = nltk.FreqDist('aab')
+>>> list(fd._cumulative_frequencies(['a']))
+[2.0]
+>>> list(fd._cumulative_frequencies(['a', 'b']))
+[2.0, 3.0]
+
+

Issue 579: override clear to reset some variables

-
-
->>> fd = FreqDist('aab')
->>> fd.clear()
->>> fd.N()
-0
-
-
+
>>> fd = FreqDist('aab')
+>>> fd.clear()
+>>> fd.N()
+0
+
+

Issue 351: fix fileids method of CategorizedCorpusReader to inadvertently add errant categories

-
-
->>> from nltk.corpus import brown
->>> brown.fileids('blah')
-Traceback (most recent call last):
-  ...
-ValueError: Category blah not found
->>> brown.categories()
-['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor', 'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction']
-
-
+
>>> from nltk.corpus import brown
+>>> brown.fileids('blah')
+Traceback (most recent call last):
+  ...
+ValueError: Category blah not found
+>>> brown.categories()
+['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor', 'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction']
+
+

Issue 175: add the unseen bin to SimpleGoodTuringProbDist by default otherwise any unseen events get a probability of zero, i.e., -they don't get smoothed

-
-
->>> from nltk import SimpleGoodTuringProbDist, FreqDist
->>> fd = FreqDist({'a':1, 'b':1, 'c': 2, 'd': 3, 'e': 4, 'f': 4, 'g': 4, 'h': 5, 'i': 5, 'j': 6, 'k': 6, 'l': 6, 'm': 7, 'n': 7, 'o': 8, 'p': 9, 'q': 10})
->>> p = SimpleGoodTuringProbDist(fd)
->>> p.prob('a')
-0.017649766667026317...
->>> p.prob('o')
-0.08433050215340411...
->>> p.prob('z')
-0.022727272727272728...
->>> p.prob('foobar')
-0.022727272727272728...
-
-
-

MLEProbDist, ConditionalProbDist'', ``DictionaryConditionalProbDist and -ConditionalFreqDist can be pickled:

-
-
->>> import pickle
->>> pd = MLEProbDist(fd)
->>> sorted(pd.samples()) == sorted(pickle.loads(pickle.dumps(pd)).samples())
-True
->>> dpd = DictionaryConditionalProbDist({'x': pd})
->>> unpickled = pickle.loads(pickle.dumps(dpd))
->>> dpd['x'].prob('a')
-0.011363636...
->>> dpd['x'].prob('a') == unpickled['x'].prob('a')
-True
->>> cfd = nltk.probability.ConditionalFreqDist()
->>> cfd['foo']['hello'] += 1
->>> cfd['foo']['hello'] += 1
->>> cfd['bar']['hello'] += 1
->>> cfd2 = pickle.loads(pickle.dumps(cfd))
->>> cfd2 == cfd
-True
->>> cpd = ConditionalProbDist(cfd, SimpleGoodTuringProbDist)
->>> cpd2 = pickle.loads(pickle.dumps(cpd))
->>> cpd['foo'].prob('hello') == cpd2['foo'].prob('hello')
-True
-
-
+they don’t get smoothed

+
>>> from nltk import SimpleGoodTuringProbDist, FreqDist
+>>> fd = FreqDist({'a':1, 'b':1, 'c': 2, 'd': 3, 'e': 4, 'f': 4, 'g': 4, 'h': 5, 'i': 5, 'j': 6, 'k': 6, 'l': 6, 'm': 7, 'n': 7, 'o': 8, 'p': 9, 'q': 10})
+>>> p = SimpleGoodTuringProbDist(fd)
+>>> p.prob('a')
+0.017649766667026317...
+>>> p.prob('o')
+0.08433050215340411...
+>>> p.prob('z')
+0.022727272727272728...
+>>> p.prob('foobar')
+0.022727272727272728...
+
+

MLEProbDist, ConditionalProbDist'', ``DictionaryConditionalProbDist and +ConditionalFreqDist can be pickled:

+
>>> import pickle
+>>> pd = MLEProbDist(fd)
+>>> sorted(pd.samples()) == sorted(pickle.loads(pickle.dumps(pd)).samples())
+True
+>>> dpd = DictionaryConditionalProbDist({'x': pd})
+>>> unpickled = pickle.loads(pickle.dumps(dpd))
+>>> dpd['x'].prob('a')
+0.011363636...
+>>> dpd['x'].prob('a') == unpickled['x'].prob('a')
+True
+>>> cfd = nltk.probability.ConditionalFreqDist()
+>>> cfd['foo']['hello'] += 1
+>>> cfd['foo']['hello'] += 1
+>>> cfd['bar']['hello'] += 1
+>>> cfd2 = pickle.loads(pickle.dumps(cfd))
+>>> cfd2 == cfd
+True
+>>> cpd = ConditionalProbDist(cfd, SimpleGoodTuringProbDist)
+>>> cpd2 = pickle.loads(pickle.dumps(cpd))
+>>> cpd['foo'].prob('hello') == cpd2['foo'].prob('hello')
+True
+
+
+ + + + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/propbank.html b/howto/propbank.html index 903c1f08c..7dc2808c1 100644 --- a/howto/propbank.html +++ b/howto/propbank.html @@ -1,441 +1,188 @@ - - - + - - -PropBank - - - -
-

PropBank

- - - +
+
+ +
+

Sample usage for propbank

+
+

PropBank

The PropBank Corpus provides predicate-argument annotation for the entire Penn Treebank. Each verb in the treebank is annotated by a single instance in PropBank, containing information about the location of the verb, and the location and identity of its arguments:

-
-
->>> from nltk.corpus import propbank
->>> pb_instances = propbank.instances()
->>> print(pb_instances) # doctest: +NORMALIZE_WHITESPACE
-[<PropbankInstance: wsj_0001.mrg, sent 0, word 8>,
- <PropbankInstance: wsj_0001.mrg, sent 1, word 10>, ...]
-
-
+
>>> from nltk.corpus import propbank
+>>> pb_instances = propbank.instances()
+>>> print(pb_instances)
+[<PropbankInstance: wsj_0001.mrg, sent 0, word 8>,
+ <PropbankInstance: wsj_0001.mrg, sent 1, word 10>, ...]
+
+

Each propbank instance defines the following member variables:

-
    -
  • Location information: fileid, sentnum, wordnum
  • -
  • Annotator information: tagger
  • -
  • Inflection information: inflection
  • -
  • Roleset identifier: roleset
  • -
  • Verb (aka predicate) location: predicate
  • -
  • Argument locations and types: arguments
  • +
      +
    • Location information: fileid, sentnum, wordnum

    • +
    • Annotator information: tagger

    • +
    • Inflection information: inflection

    • +
    • Roleset identifier: roleset

    • +
    • Verb (aka predicate) location: predicate

    • +
    • Argument locations and types: arguments

    -
+

The following examples show the types of these arguments:

-
-
->>> inst = pb_instances[103]
->>> (inst.fileid, inst.sentnum, inst.wordnum)
-('wsj_0004.mrg', 8, 16)
->>> inst.tagger
-'gold'
->>> inst.inflection
-<PropbankInflection: vp--a>
->>> infl = inst.inflection
->>> infl.form, infl.tense, infl.aspect, infl.person, infl.voice
-('v', 'p', '-', '-', 'a')
->>> inst.roleset
-'rise.01'
->>> inst.predicate
-PropbankTreePointer(16, 0)
->>> inst.arguments # doctest: +NORMALIZE_WHITESPACE
-((PropbankTreePointer(0, 2), 'ARG1'),
- (PropbankTreePointer(13, 1), 'ARGM-DIS'),
- (PropbankTreePointer(17, 1), 'ARG4-to'),
- (PropbankTreePointer(20, 1), 'ARG3-from'))
-
-
+
>>> inst = pb_instances[103]
+>>> (inst.fileid, inst.sentnum, inst.wordnum)
+('wsj_0004.mrg', 8, 16)
+>>> inst.tagger
+'gold'
+>>> inst.inflection
+<PropbankInflection: vp--a>
+>>> infl = inst.inflection
+>>> infl.form, infl.tense, infl.aspect, infl.person, infl.voice
+('v', 'p', '-', '-', 'a')
+>>> inst.roleset
+'rise.01'
+>>> inst.predicate
+PropbankTreePointer(16, 0)
+>>> inst.arguments
+((PropbankTreePointer(0, 2), 'ARG1'),
+ (PropbankTreePointer(13, 1), 'ARGM-DIS'),
+ (PropbankTreePointer(17, 1), 'ARG4-to'),
+ (PropbankTreePointer(20, 1), 'ARG3-from'))
+
+

The location of the predicate and of the arguments are encoded using PropbankTreePointer objects, as well as PropbankChainTreePointer objects and PropbankSplitTreePointer objects. A PropbankTreePointer consists of a wordnum and a height:

-
-
->>> print(inst.predicate.wordnum, inst.predicate.height)
-16 0
-
-
+
>>> print(inst.predicate.wordnum, inst.predicate.height)
+16 0
+
+

This identifies the tree constituent that is headed by the word that -is the wordnum'th token in the sentence, and whose span is found +is the wordnum‘th token in the sentence, and whose span is found by going height nodes up in the tree. This type of pointer is only useful if we also have the corresponding tree structure, since it includes empty elements such as traces in the word number count. The trees for 10% of the standard PropBank Corpus are contained in the treebank corpus:

-
-
->>> tree = inst.tree
-
-
->>> from nltk.corpus import treebank
->>> assert tree == treebank.parsed_sents(inst.fileid)[inst.sentnum]
-
-
->>> inst.predicate.select(tree)
-Tree('VBD', ['rose'])
->>> for (argloc, argid) in inst.arguments:
-...     print('%-10s %s' % (argid, argloc.select(tree).pformat(500)[:50]))
-ARG1       (NP-SBJ (NP (DT The) (NN yield)) (PP (IN on) (NP (
-ARGM-DIS   (PP (IN for) (NP (NN example)))
-ARG4-to    (PP-DIR (TO to) (NP (CD 8.04) (NN %)))
-ARG3-from  (PP-DIR (IN from) (NP (CD 7.90) (NN %)))
-
-
+
>>> tree = inst.tree
+
+
+
>>> from nltk.corpus import treebank
+>>> assert tree == treebank.parsed_sents(inst.fileid)[inst.sentnum]
+
+
+
>>> inst.predicate.select(tree)
+Tree('VBD', ['rose'])
+>>> for (argloc, argid) in inst.arguments:
+...     print('%-10s %s' % (argid, argloc.select(tree).pformat(500)[:50]))
+ARG1       (NP-SBJ (NP (DT The) (NN yield)) (PP (IN on) (NP (
+ARGM-DIS   (PP (IN for) (NP (NN example)))
+ARG4-to    (PP-DIR (TO to) (NP (CD 8.04) (NN %)))
+ARG3-from  (PP-DIR (IN from) (NP (CD 7.90) (NN %)))
+
+

Propbank tree pointers can be converted to standard tree locations, which are usually easier to work with, using the treepos() method:

-
-
->>> treepos = inst.predicate.treepos(tree)
->>> print (treepos, tree[treepos])
-(4, 0) (VBD rose)
-
-
+
>>> treepos = inst.predicate.treepos(tree)
+>>> print (treepos, tree[treepos])
+(4, 0) (VBD rose)
+
+

In some cases, argument locations will be encoded using PropbankChainTreePointers (for trace chains) or PropbankSplitTreePointers (for discontinuous constituents). Both @@ -443,86 +190,80 @@

PropBank

containing a list of the constituent pieces. They also define the method select(), which will return a tree containing all the elements of the argument. (A new head node is created, labeled -"CHAIN" or "SPLIT", since the argument is not a single constituent +“CHAIN” or “SPLIT”, since the argument is not a single constituent in the original tree). Sentence #6 contains an example of an argument that is both discontinuous and contains a chain:

-
-
->>> inst = pb_instances[6]
->>> inst.roleset
-'expose.01'
->>> argloc, argid = inst.arguments[2]
->>> argloc
-<PropbankChainTreePointer: 22:1,24:0,25:1*27:0>
->>> argloc.pieces
-[<PropbankSplitTreePointer: 22:1,24:0,25:1>, PropbankTreePointer(27, 0)]
->>> argloc.pieces[0].pieces
-... # doctest: +NORMALIZE_WHITESPACE
-[PropbankTreePointer(22, 1), PropbankTreePointer(24, 0),
- PropbankTreePointer(25, 1)]
->>> print(argloc.select(inst.tree))
-(*CHAIN*
-  (*SPLIT* (NP (DT a) (NN group)) (IN of) (NP (NNS workers)))
-  (-NONE- *))
-
-
+
>>> inst = pb_instances[6]
+>>> inst.roleset
+'expose.01'
+>>> argloc, argid = inst.arguments[2]
+>>> argloc
+<PropbankChainTreePointer: 22:1,24:0,25:1*27:0>
+>>> argloc.pieces
+[<PropbankSplitTreePointer: 22:1,24:0,25:1>, PropbankTreePointer(27, 0)]
+>>> argloc.pieces[0].pieces
+...
+[PropbankTreePointer(22, 1), PropbankTreePointer(24, 0),
+ PropbankTreePointer(25, 1)]
+>>> print(argloc.select(inst.tree))
+(*CHAIN*
+  (*SPLIT* (NP (DT a) (NN group)) (IN of) (NP (NNS workers)))
+  (-NONE- *))
+
+

The PropBank Corpus also provides access to the frameset files, which define the argument labels used by the annotations, on a per-verb basis. Each frameset file contains one or more predicates, such as -'turn' or 'turn_on', each of which is divided into coarse-grained word +‘turn’ or ‘turn_on’, each of which is divided into coarse-grained word senses called rolesets. For each roleset, the frameset file provides descriptions of the argument roles, along with examples.

-
-
->>> expose_01 = propbank.roleset('expose.01')
->>> turn_01 = propbank.roleset('turn.01')
->>> print(turn_01) # doctest: +ELLIPSIS
-<Element 'roleset' at ...>
->>> for role in turn_01.findall("roles/role"):
-...     print(role.attrib['n'], role.attrib['descr'])
-0 turner
-1 thing turning
-m direction, location
-
-
->>> from xml.etree import ElementTree
->>> print(ElementTree.tostring(turn_01.find('example')).decode('utf8').strip())
-<example name="transitive agentive">
-  <text>
-  John turned the key in the lock.
-  </text>
-  <arg n="0">John</arg>
-  <rel>turned</rel>
-  <arg n="1">the key</arg>
-  <arg f="LOC" n="m">in the lock</arg>
-</example>
-
-
+
>>> expose_01 = propbank.roleset('expose.01')
+>>> turn_01 = propbank.roleset('turn.01')
+>>> print(turn_01)
+<Element 'roleset' at ...>
+>>> for role in turn_01.findall("roles/role"):
+...     print(role.attrib['n'], role.attrib['descr'])
+0 turner
+1 thing turning
+m direction, location
+
+
+
>>> from xml.etree import ElementTree
+>>> print(ElementTree.tostring(turn_01.find('example')).decode('utf8').strip())
+<example name="transitive agentive">
+  <text>
+  John turned the key in the lock.
+  </text>
+  <arg n="0">John</arg>
+  <rel>turned</rel>
+  <arg n="1">the key</arg>
+  <arg f="LOC" n="m">in the lock</arg>
+</example>
+
+

Note that the standard corpus distribution only contains 10% of the treebank, so the parse trees are not available for instances starting at 9353:

-
-
->>> inst = pb_instances[9352]
->>> inst.fileid
-'wsj_0199.mrg'
->>> print(inst.tree) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
-(S (NP-SBJ (NNP Trinity)) (VP (VBD said) (SBAR (-NONE- 0) ...))
->>> print(inst.predicate.select(inst.tree))
-(VB begin)
-
-
->>> inst = pb_instances[9353]
->>> inst.fileid
-'wsj_0200.mrg'
->>> print(inst.tree)
-None
->>> print(inst.predicate.select(inst.tree))
-Traceback (most recent call last):
-  . . .
-ValueError: Parse tree not avaialable
-
-
+
>>> inst = pb_instances[9352]
+>>> inst.fileid
+'wsj_0199.mrg'
+>>> print(inst.tree)
+(S (NP-SBJ (NNP Trinity)) (VP (VBD said) (SBAR (-NONE- 0) ...))
+>>> print(inst.predicate.select(inst.tree))
+(VB begin)
+
+
+
>>> inst = pb_instances[9353]
+>>> inst.fileid
+'wsj_0200.mrg'
+>>> print(inst.tree)
+None
+>>> print(inst.predicate.select(inst.tree))
+Traceback (most recent call last):
+  . . .
+ValueError: Parse tree not available
+
+

However, if you supply your own version of the treebank corpus (by putting it before the nltk-provided version on nltk.data.path, or by creating a ptb directory as described above and using the @@ -530,12 +271,56 @@

PropBank

instances.

A list of the verb lemmas contained in PropBank is returned by the propbank.verbs() method:

-
-
->>> propbank.verbs()
-['abandon', 'abate', 'abdicate', 'abet', 'abide', ...]
-
-
+
>>> propbank.verbs()
+['abandon', 'abate', 'abdicate', 'abet', 'abide', ...]
+
+ + + + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/relextract.html b/howto/relextract.html index ae228066d..00a40cfb5 100644 --- a/howto/relextract.html +++ b/howto/relextract.html @@ -1,356 +1,113 @@ - - - + - - -Information Extraction - + + + + + + + NLTK :: Sample usage for relextract + + + + + + + + + + + + + + -
-

Information Extraction

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - + + + +
+ +
+
+ +
+

Sample usage for relextract

+
+

Information Extraction

Information Extraction standardly consists of three subtasks:

    -
  1. Named Entity Recognition
  2. -
  3. Relation Extraction
  4. -
  5. Template Filling
  6. +
  7. Named Entity Recognition

  8. +
  9. Relation Extraction

  10. +
  11. Template Filling

-
-

Named Entities

+
+

Named Entities

The IEER corpus is marked up for a variety of Named Entities. A `Named Entity`:dt: (more strictly, a Named Entity mention) is a name of an entity belonging to a specified class. For example, the Named Entity @@ -360,74 +117,67 @@

Named Entities

entity mention itself appears as the leaves of the subtree. This is illustrated below, where we have show an extract of the chunk representation of document NYT_19980315.064:

-
-

System Message: ERROR/3 (relextract.doctest, line 19); backlink

-Unknown interpreted text role "dt".
-
-
->>> from nltk.corpus import ieer
->>> docs = ieer.parsed_docs('NYT_19980315')
->>> tree = docs[1].text
->>> print(tree) # doctest: +ELLIPSIS
-(DOCUMENT
-...
-  ``It's
-  a
-  chance
-  to
-  think
-  about
-  first-level
-  questions,''
-  said
-  Ms.
-  (PERSON Cohn)
-  ,
-  a
-  partner
-  in
-  the
-  (ORGANIZATION McGlashan &AMP; Sarrail)
-  firm
-  in
-  (LOCATION San Mateo)
-  ,
-  (LOCATION Calif.)
-  ...)
-
-
+
>>> from nltk.corpus import ieer
+>>> docs = ieer.parsed_docs('NYT_19980315')
+>>> tree = docs[1].text
+>>> print(tree)
+(DOCUMENT
+...
+  ``It's
+  a
+  chance
+  to
+  think
+  about
+  first-level
+  questions,''
+  said
+  Ms.
+  (PERSON Cohn)
+  ,
+  a
+  partner
+  in
+  the
+  (ORGANIZATION McGlashan &AMP; Sarrail)
+  firm
+  in
+  (LOCATION San Mateo)
+  ,
+  (LOCATION Calif.)
+  ...)
+
+

Thus, the Named Entity mentions in this example are Cohn, McGlashan &AMP; Sarrail, San Mateo and Calif..

The CoNLL2002 Dutch and Spanish data is treated similarly, although in this case, the strings are also POS tagged.

-
-
->>> from nltk.corpus import conll2002
->>> for doc in conll2002.chunked_sents('ned.train')[27]:
-...     print(doc)
-(u'Het', u'Art')
-(ORG Hof/N van/Prep Cassatie/N)
-(u'verbrak', u'V')
-(u'het', u'Art')
-(u'arrest', u'N')
-(u'zodat', u'Conj')
-(u'het', u'Pron')
-(u'moest', u'V')
-(u'worden', u'V')
-(u'overgedaan', u'V')
-(u'door', u'Prep')
-(u'het', u'Art')
-(u'hof', u'N')
-(u'van', u'Prep')
-(u'beroep', u'N')
-(u'van', u'Prep')
-(LOC Antwerpen/N)
-(u'.', u'Punc')
-
-
+
>>> from nltk.corpus import conll2002
+>>> for doc in conll2002.chunked_sents('ned.train')[27]:
+...     print(doc)
+('Het', 'Art')
+(ORG Hof/N van/Prep Cassatie/N)
+('verbrak', 'V')
+('het', 'Art')
+('arrest', 'N')
+('zodat', 'Conj')
+('het', 'Pron')
+('moest', 'V')
+('worden', 'V')
+('overgedaan', 'V')
+('door', 'Prep')
+('het', 'Art')
+('hof', 'N')
+('van', 'Prep')
+('beroep', 'N')
+('van', 'Prep')
+(LOC Antwerpen/N)
+('.', 'Punc')
+
-
-

Relation Extraction

+
+
+

Relation Extraction

Relation Extraction standardly consists of identifying specified relations between Named Entities. For example, assuming that we can recognize ORGANIZATIONs and LOCATIONs in text, we might want to also @@ -437,184 +187,218 @@

Relation Extraction

simple version of this task. The tree2semi_rel() function splits a chunk document into a list of two-member lists, each of which consists of a (possibly empty) string followed by a Tree (i.e., a Named Entity):

-
-
->>> from nltk.sem import relextract
->>> pairs = relextract.tree2semi_rel(tree)
->>> for s, tree in pairs[18:22]:
-...     print('("...%s", %s)' % (" ".join(s[-5:]),tree))
-("...about first-level questions,'' said Ms.", (PERSON Cohn))
-("..., a partner in the", (ORGANIZATION McGlashan &AMP; Sarrail))
-("...firm in", (LOCATION San Mateo))
-("...,", (LOCATION Calif.))
-
-
+
>>> from nltk.sem import relextract
+>>> pairs = relextract.tree2semi_rel(tree)
+>>> for s, tree in pairs[18:22]:
+...     print('("...%s", %s)' % (" ".join(s[-5:]),tree))
+("...about first-level questions,'' said Ms.", (PERSON Cohn))
+("..., a partner in the", (ORGANIZATION McGlashan &AMP; Sarrail))
+("...firm in", (LOCATION San Mateo))
+("...,", (LOCATION Calif.))
+
+

The function semi_rel2reldict() processes triples of these pairs, i.e., -pairs of the form ((string1, Tree1), (string2, Tree2), (string3, -Tree3)) and outputs a dictionary (a reldict) in which Tree1 is -the subject of the relation, string2 is the filler -and Tree3 is the object of the relation. string1 and string3 are +pairs of the form ((string1, Tree1), (string2, Tree2), (string3, +Tree3)) and outputs a dictionary (a reldict) in which Tree1 is +the subject of the relation, string2 is the filler +and Tree3 is the object of the relation. string1 and string3 are stored as left and right context respectively.

-
-
->>> reldicts = relextract.semi_rel2reldict(pairs)
->>> for k, v in sorted(reldicts[0].items()):
-...     print(k, '=>', v) # doctest: +ELLIPSIS
-filler => of messages to their own ``Cyberia'' ...
-lcon => transactions.'' Each week, they post
-objclass => ORGANIZATION
-objsym => white_house
-objtext => White House
-rcon => for access to its planned
-subjclass => CARDINAL
-subjsym => hundreds
-subjtext => hundreds
-untagged_filler => of messages to their own ``Cyberia'' ...
-
-
+
>>> reldicts = relextract.semi_rel2reldict(pairs)
+>>> for k, v in sorted(reldicts[0].items()):
+...     print(k, '=>', v)
+filler => of messages to their own ``Cyberia'' ...
+lcon => transactions.'' Each week, they post
+objclass => ORGANIZATION
+objsym => white_house
+objtext => White House
+rcon => for access to its planned
+subjclass => CARDINAL
+subjsym => hundreds
+subjtext => hundreds
+untagged_filler => of messages to their own ``Cyberia'' ...
+
+

The next example shows some of the values for two reldicts -corresponding to the 'NYT_19980315' text extract shown earlier.

-
-
->>> for r in reldicts[18:20]:
-...     print('=' * 20)
-...     print(r['subjtext'])
-...     print(r['filler'])
-...     print(r['objtext'])
-====================
-Cohn
-, a partner in the
-McGlashan &AMP; Sarrail
-====================
-McGlashan &AMP; Sarrail
-firm in
-San Mateo
-
-
+corresponding to the 'NYT_19980315' text extract shown earlier.

+
>>> for r in reldicts[18:20]:
+...     print('=' * 20)
+...     print(r['subjtext'])
+...     print(r['filler'])
+...     print(r['objtext'])
+====================
+Cohn
+, a partner in the
+McGlashan &AMP; Sarrail
+====================
+McGlashan &AMP; Sarrail
+firm in
+San Mateo
+
+

The function relextract() allows us to filter the reldicts according to the classes of the subject and object named entities. In addition, we can specify that the filler text has to match a given regular expression, as illustrated in the next example. Here, we are looking for pairs of entities in the IN relation, where IN has signature <ORG, LOC>.

-
-
->>> import re
->>> IN = re.compile(r'.*\bin\b(?!\b.+ing\b)')
->>> for fileid in ieer.fileids():
-...     for doc in ieer.parsed_docs(fileid):
-...         for rel in relextract.extract_rels('ORG', 'LOC', doc, corpus='ieer', pattern = IN):
-...             print(relextract.rtuple(rel))  # doctest: +ELLIPSIS
-[ORG: 'Christian Democrats'] ', the leading political forces in' [LOC: 'Italy']
-[ORG: 'AP'] ') _ Lebanese guerrillas attacked Israeli forces in southern' [LOC: 'Lebanon']
-[ORG: 'Security Council'] 'adopted Resolution 425. Huge yellow banners hung across intersections in' [LOC: 'Beirut']
-[ORG: 'U.N.'] 'failures in' [LOC: 'Africa']
-[ORG: 'U.N.'] 'peacekeeping operation in' [LOC: 'Somalia']
-[ORG: 'U.N.'] 'partners on a more effective role in' [LOC: 'Africa']
-[ORG: 'AP'] ') _ A bomb exploded in a mosque in central' [LOC: 'San`a']
-[ORG: 'Krasnoye Sormovo'] 'shipyard in the Soviet city of' [LOC: 'Gorky']
-[ORG: 'Kelab Golf Darul Ridzuan'] 'in' [LOC: 'Perak']
-[ORG: 'U.N.'] 'peacekeeping operation in' [LOC: 'Somalia']
-[ORG: 'WHYY'] 'in' [LOC: 'Philadelphia']
-[ORG: 'McGlashan &AMP; Sarrail'] 'firm in' [LOC: 'San Mateo']
-[ORG: 'Freedom Forum'] 'in' [LOC: 'Arlington']
-[ORG: 'Brookings Institution'] ', the research group in' [LOC: 'Washington']
-[ORG: 'Idealab'] ', a self-described business incubator based in' [LOC: 'Los Angeles']
-[ORG: 'Open Text'] ', based in' [LOC: 'Waterloo']
-...
-
-
-

The next example illustrates a case where the patter is a disjunction +

>>> import re
+>>> IN = re.compile(r'.*\bin\b(?!\b.+ing\b)')
+>>> for fileid in ieer.fileids():
+...     for doc in ieer.parsed_docs(fileid):
+...         for rel in relextract.extract_rels('ORG', 'LOC', doc, corpus='ieer', pattern = IN):
+...             print(relextract.rtuple(rel))
+[ORG: 'Christian Democrats'] ', the leading political forces in' [LOC: 'Italy']
+[ORG: 'AP'] ') _ Lebanese guerrillas attacked Israeli forces in southern' [LOC: 'Lebanon']
+[ORG: 'Security Council'] 'adopted Resolution 425. Huge yellow banners hung across intersections in' [LOC: 'Beirut']
+[ORG: 'U.N.'] 'failures in' [LOC: 'Africa']
+[ORG: 'U.N.'] 'peacekeeping operation in' [LOC: 'Somalia']
+[ORG: 'U.N.'] 'partners on a more effective role in' [LOC: 'Africa']
+[ORG: 'AP'] ') _ A bomb exploded in a mosque in central' [LOC: 'San`a']
+[ORG: 'Krasnoye Sormovo'] 'shipyard in the Soviet city of' [LOC: 'Gorky']
+[ORG: 'Kelab Golf Darul Ridzuan'] 'in' [LOC: 'Perak']
+[ORG: 'U.N.'] 'peacekeeping operation in' [LOC: 'Somalia']
+[ORG: 'WHYY'] 'in' [LOC: 'Philadelphia']
+[ORG: 'McGlashan &AMP; Sarrail'] 'firm in' [LOC: 'San Mateo']
+[ORG: 'Freedom Forum'] 'in' [LOC: 'Arlington']
+[ORG: 'Brookings Institution'] ', the research group in' [LOC: 'Washington']
+[ORG: 'Idealab'] ', a self-described business incubator based in' [LOC: 'Los Angeles']
+[ORG: 'Open Text'] ', based in' [LOC: 'Waterloo']
+...
+
+
+

The next example illustrates a case where the pattern is a disjunction of roles that a PERSON can occupy in an ORGANIZATION.

-
-
->>> roles = """
-... (.*(
-... analyst|
-... chair(wo)?man|
-... commissioner|
-... counsel|
-... director|
-... economist|
-... editor|
-... executive|
-... foreman|
-... governor|
-... head|
-... lawyer|
-... leader|
-... librarian).*)|
-... manager|
-... partner|
-... president|
-... producer|
-... professor|
-... researcher|
-... spokes(wo)?man|
-... writer|
-... ,\sof\sthe?\s*  # "X, of (the) Y"
-... """
->>> ROLES = re.compile(roles, re.VERBOSE)
->>> for fileid in ieer.fileids():
-...     for doc in ieer.parsed_docs(fileid):
-...         for rel in relextract.extract_rels('PER', 'ORG', doc, corpus='ieer', pattern=ROLES):
-...             print(relextract.rtuple(rel)) # doctest: +ELLIPSIS
-[PER: 'Kivutha Kibwana'] ', of the' [ORG: 'National Convention Assembly']
-[PER: 'Boban Boskovic'] ', chief executive of the' [ORG: 'Plastika']
-[PER: 'Annan'] ', the first sub-Saharan African to head the' [ORG: 'United Nations']
-[PER: 'Kiriyenko'] 'became a foreman at the' [ORG: 'Krasnoye Sormovo']
-[PER: 'Annan'] ', the first sub-Saharan African to head the' [ORG: 'United Nations']
-[PER: 'Mike Godwin'] ', chief counsel for the' [ORG: 'Electronic Frontier Foundation']
-...
-
-
+
>>> roles = r"""
+... (.*(
+... analyst|
+... chair(wo)?man|
+... commissioner|
+... counsel|
+... director|
+... economist|
+... editor|
+... executive|
+... foreman|
+... governor|
+... head|
+... lawyer|
+... leader|
+... librarian).*)|
+... manager|
+... partner|
+... president|
+... producer|
+... professor|
+... researcher|
+... spokes(wo)?man|
+... writer|
+... ,\sof\sthe?\s*  # "X, of (the) Y"
+... """
+>>> ROLES = re.compile(roles, re.VERBOSE)
+>>> for fileid in ieer.fileids():
+...     for doc in ieer.parsed_docs(fileid):
+...         for rel in relextract.extract_rels('PER', 'ORG', doc, corpus='ieer', pattern=ROLES):
+...             print(relextract.rtuple(rel))
+[PER: 'Kivutha Kibwana'] ', of the' [ORG: 'National Convention Assembly']
+[PER: 'Boban Boskovic'] ', chief executive of the' [ORG: 'Plastika']
+[PER: 'Annan'] ', the first sub-Saharan African to head the' [ORG: 'United Nations']
+[PER: 'Kiriyenko'] 'became a foreman at the' [ORG: 'Krasnoye Sormovo']
+[PER: 'Annan'] ', the first sub-Saharan African to head the' [ORG: 'United Nations']
+[PER: 'Mike Godwin'] ', chief counsel for the' [ORG: 'Electronic Frontier Foundation']
+...
+
+

In the case of the CoNLL2002 data, we can include POS tags in the query pattern. This example also illustrates how the output can be presented as something that looks more like a clause in a logical language.

-
-
->>> de = """
-... .*
-... (
-... de/SP|
-... del/SP
-... )
-... """
->>> DE = re.compile(de, re.VERBOSE)
->>> rels = [rel for doc in conll2002.chunked_sents('esp.train')
-...         for rel in relextract.extract_rels('ORG', 'LOC', doc, corpus='conll2002', pattern = DE)]
->>> for r in rels[:10]:
-...     print(relextract.clause(r, relsym='DE'))    # doctest: +NORMALIZE_WHITESPACE
-DE(u'tribunal_supremo', u'victoria')
-DE(u'museo_de_arte', u'alcorc\xf3n')
-DE(u'museo_de_bellas_artes', u'a_coru\xf1a')
-DE(u'siria', u'l\xedbano')
-DE(u'uni\xf3n_europea', u'pek\xedn')
-DE(u'ej\xe9rcito', u'rogberi')
-DE(u'juzgado_de_instrucci\xf3n_n\xfamero_1', u'san_sebasti\xe1n')
-DE(u'psoe', u'villanueva_de_la_serena')
-DE(u'ej\xe9rcito', u'l\xedbano')
-DE(u'juzgado_de_lo_penal_n\xfamero_2', u'ceuta')
->>> vnv = """
-... (
-... is/V|
-... was/V|
-... werd/V|
-... wordt/V
-... )
-... .*
-... van/Prep
-... """
->>> VAN = re.compile(vnv, re.VERBOSE)
->>> for doc in conll2002.chunked_sents('ned.train'):
-...     for r in relextract.extract_rels('PER', 'ORG', doc, corpus='conll2002', pattern=VAN):
-...         print(relextract.clause(r, relsym="VAN"))
-VAN(u"cornet_d'elzius", u'buitenlandse_handel')
-VAN(u'johan_rottiers', u'kardinaal_van_roey_instituut')
-VAN(u'annie_lennox', u'eurythmics')
-
-
+
>>> de = """
+... .*
+... (
+... de/SP|
+... del/SP
+... )
+... """
+>>> DE = re.compile(de, re.VERBOSE)
+>>> rels = [rel for doc in conll2002.chunked_sents('esp.train')
+...         for rel in relextract.extract_rels('ORG', 'LOC', doc, corpus='conll2002', pattern = DE)]
+>>> for r in rels[:10]:
+...     print(relextract.clause(r, relsym='DE'))
+DE('tribunal_supremo', 'victoria')
+DE('museo_de_arte', 'alcorc\xf3n')
+DE('museo_de_bellas_artes', 'a_coru\xf1a')
+DE('siria', 'l\xedbano')
+DE('uni\xf3n_europea', 'pek\xedn')
+DE('ej\xe9rcito', 'rogberi')
+DE('juzgado_de_instrucci\xf3n_n\xfamero_1', 'san_sebasti\xe1n')
+DE('psoe', 'villanueva_de_la_serena')
+DE('ej\xe9rcito', 'l\xedbano')
+DE('juzgado_de_lo_penal_n\xfamero_2', 'ceuta')
+>>> vnv = """
+... (
+... is/V|
+... was/V|
+... werd/V|
+... wordt/V
+... )
+... .*
+... van/Prep
+... """
+>>> VAN = re.compile(vnv, re.VERBOSE)
+>>> for doc in conll2002.chunked_sents('ned.train'):
+...     for r in relextract.extract_rels('PER', 'ORG', doc, corpus='conll2002', pattern=VAN):
+...         print(relextract.clause(r, relsym="VAN"))
+VAN("cornet_d'elzius", 'buitenlandse_handel')
+VAN('johan_rottiers', 'kardinaal_van_roey_instituut')
+VAN('annie_lennox', 'eurythmics')
+
+
+
+
+ + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/resolution.html b/howto/resolution.html index 54c786f17..5b6360219 100644 --- a/howto/resolution.html +++ b/howto/resolution.html @@ -1,594 +1,378 @@ - - - + - - -Resolution Theorem Prover - + + + + + + + NLTK :: Sample usage for resolution + + + + + + + + + + + + + + -
-

Resolution Theorem Prover

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> from nltk.inference.resolution import *
->>> from nltk.sem import logic
->>> from nltk.sem.logic import *
->>> logic._counter._value = 0
->>> read_expr = logic.Expression.fromstring
-
-
->>> P = read_expr('P')
->>> Q = read_expr('Q')
->>> R = read_expr('R')
->>> A = read_expr('A')
->>> B = read_expr('B')
->>> x = read_expr('x')
->>> y = read_expr('y')
->>> z = read_expr('z')
-
-
-
-

Test most_general_unification()

-
-
->>> print(most_general_unification(x, x))
-{}
->>> print(most_general_unification(A, A))
-{}
->>> print(most_general_unification(A, x))
-{x: A}
->>> print(most_general_unification(x, A))
-{x: A}
->>> print(most_general_unification(x, y))
-{x: y}
->>> print(most_general_unification(P(x), P(A)))
-{x: A}
->>> print(most_general_unification(P(x,B), P(A,y)))
-{x: A, y: B}
->>> print(most_general_unification(P(x,B), P(B,x)))
-{x: B}
->>> print(most_general_unification(P(x,y), P(A,x)))
-{x: A, y: x}
->>> print(most_general_unification(P(Q(x)), P(y)))
-{y: Q(x)}
-
-
+ + + +
+ +
+
+ +
+

Sample usage for resolution

+
+

Resolution Theorem Prover

+
>>> from nltk.inference.resolution import *
+>>> from nltk.sem import logic
+>>> from nltk.sem.logic import *
+>>> logic._counter._value = 0
+>>> read_expr = logic.Expression.fromstring
+
-
-

Test unify()

-
-
->>> print(Clause([]).unify(Clause([])))
-[]
->>> print(Clause([P(x)]).unify(Clause([-P(A)])))
-[{}]
->>> print(Clause([P(A), Q(x)]).unify(Clause([-P(x), R(x)])))
-[{R(A), Q(A)}]
->>> print(Clause([P(A), Q(x), R(x,y)]).unify(Clause([-P(x), Q(y)])))
-[{Q(y), Q(A), R(A,y)}]
->>> print(Clause([P(A), -Q(y)]).unify(Clause([-P(x), Q(B)])))
-[{}]
->>> print(Clause([P(x), Q(x)]).unify(Clause([-P(A), -Q(B)])))
-[{-Q(B), Q(A)}, {-P(A), P(B)}]
->>> print(Clause([P(x,x), Q(x), R(x)]).unify(Clause([-P(A,z), -Q(B)])))
-[{-Q(B), Q(A), R(A)}, {-P(A,z), R(B), P(B,B)}]
-
-
->>> a = clausify(read_expr('P(A)'))
->>> b = clausify(read_expr('A=B'))
->>> print(a[0].unify(b[0]))
-[{P(B)}]
-
-
+
>>> P = read_expr('P')
+>>> Q = read_expr('Q')
+>>> R = read_expr('R')
+>>> A = read_expr('A')
+>>> B = read_expr('B')
+>>> x = read_expr('x')
+>>> y = read_expr('y')
+>>> z = read_expr('z')
+
-
-

Test is_tautology()

-
-
->>> print(Clause([P(A), -P(A)]).is_tautology())
-True
->>> print(Clause([-P(A), P(A)]).is_tautology())
-True
->>> print(Clause([P(x), -P(A)]).is_tautology())
-False
->>> print(Clause([Q(B), -P(A), P(A)]).is_tautology())
-True
->>> print(Clause([-Q(A), P(R(A)), -P(R(A)), Q(x), -R(y)]).is_tautology())
-True
->>> print(Clause([P(x), -Q(A)]).is_tautology())
-False
-
-
+
+

Test most_general_unification()

+
>>> print(most_general_unification(x, x))
+{}
+>>> print(most_general_unification(A, A))
+{}
+>>> print(most_general_unification(A, x))
+{x: A}
+>>> print(most_general_unification(x, A))
+{x: A}
+>>> print(most_general_unification(x, y))
+{x: y}
+>>> print(most_general_unification(P(x), P(A)))
+{x: A}
+>>> print(most_general_unification(P(x,B), P(A,y)))
+{x: A, y: B}
+>>> print(most_general_unification(P(x,B), P(B,x)))
+{x: B}
+>>> print(most_general_unification(P(x,y), P(A,x)))
+{x: A, y: x}
+>>> print(most_general_unification(P(Q(x)), P(y)))
+{y: Q(x)}
+
-
-

Test subsumes()

-
-
->>> print(Clause([P(A), Q(B)]).subsumes(Clause([P(A), Q(B)])))
-True
->>> print(Clause([-P(A)]).subsumes(Clause([P(A)])))
-False
->>> print(Clause([P(A), Q(B)]).subsumes(Clause([Q(B), P(A)])))
-True
->>> print(Clause([P(A), Q(B)]).subsumes(Clause([Q(B), R(A), P(A)])))
-True
->>> print(Clause([P(A), R(A), Q(B)]).subsumes(Clause([Q(B), P(A)])))
-False
->>> print(Clause([P(x)]).subsumes(Clause([P(A)])))
-True
->>> print(Clause([P(A)]).subsumes(Clause([P(x)])))
-True
-
-
+
+
+

Test unify()

+
>>> print(Clause([]).unify(Clause([])))
+[]
+>>> print(Clause([P(x)]).unify(Clause([-P(A)])))
+[{}]
+>>> print(Clause([P(A), Q(x)]).unify(Clause([-P(x), R(x)])))
+[{R(A), Q(A)}]
+>>> print(Clause([P(A), Q(x), R(x,y)]).unify(Clause([-P(x), Q(y)])))
+[{Q(y), Q(A), R(A,y)}]
+>>> print(Clause([P(A), -Q(y)]).unify(Clause([-P(x), Q(B)])))
+[{}]
+>>> print(Clause([P(x), Q(x)]).unify(Clause([-P(A), -Q(B)])))
+[{-Q(B), Q(A)}, {-P(A), P(B)}]
+>>> print(Clause([P(x,x), Q(x), R(x)]).unify(Clause([-P(A,z), -Q(B)])))
+[{-Q(B), Q(A), R(A)}, {-P(A,z), R(B), P(B,B)}]
+
-
-

Test prove()

-
-
->>> print(ResolutionProverCommand(read_expr('man(x)')).prove())
-False
->>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove())
-True
->>> print(ResolutionProverCommand(read_expr('(man(x) -> --man(x))')).prove())
-True
->>> print(ResolutionProverCommand(read_expr('-(man(x) & -man(x))')).prove())
-True
->>> print(ResolutionProverCommand(read_expr('(man(x) | -man(x))')).prove())
-True
->>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove())
-True
->>> print(ResolutionProverCommand(read_expr('-(man(x) & -man(x))')).prove())
-True
->>> print(ResolutionProverCommand(read_expr('(man(x) | -man(x))')).prove())
-True
->>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove())
-True
->>> print(ResolutionProverCommand(read_expr('(man(x) <-> man(x))')).prove())
-True
->>> print(ResolutionProverCommand(read_expr('-(man(x) <-> -man(x))')).prove())
-True
->>> print(ResolutionProverCommand(read_expr('all x.man(x)')).prove())
-False
->>> print(ResolutionProverCommand(read_expr('-all x.some y.F(x,y) & some x.all y.(-F(x,y))')).prove())
-False
->>> print(ResolutionProverCommand(read_expr('some x.all y.sees(x,y)')).prove())
-False
-
-
->>> p1 = read_expr('all x.(man(x) -> mortal(x))')
->>> p2 = read_expr('man(Socrates)')
->>> c = read_expr('mortal(Socrates)')
->>> ResolutionProverCommand(c, [p1,p2]).prove()
-True
-
-
->>> p1 = read_expr('all x.(man(x) -> walks(x))')
->>> p2 = read_expr('man(John)')
->>> c = read_expr('some y.walks(y)')
->>> ResolutionProverCommand(c, [p1,p2]).prove()
-True
-
-
->>> p = read_expr('some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))')
->>> c = read_expr('some e0.walk(e0,mary)')
->>> ResolutionProverCommand(c, [p]).prove()
-True
-
-
+
>>> a = clausify(read_expr('P(A)'))
+>>> b = clausify(read_expr('A=B'))
+>>> print(a[0].unify(b[0]))
+[{P(B)}]
+
-
-

Test proof()

-
-
->>> p1 = read_expr('all x.(man(x) -> mortal(x))')
->>> p2 = read_expr('man(Socrates)')
->>> c = read_expr('mortal(Socrates)')
->>> logic._counter._value = 0
->>> tp = ResolutionProverCommand(c, [p1,p2])
->>> tp.prove()
-True
->>> print(tp.proof())
-[1] {-mortal(Socrates)}     A
-[2] {-man(z2), mortal(z2)}  A
-[3] {man(Socrates)}         A
-[4] {-man(Socrates)}        (1, 2)
-[5] {mortal(Socrates)}      (2, 3)
-[6] {}                      (1, 5)
-<BLANKLINE>
-
-
+
+
+

Test is_tautology()

+
>>> print(Clause([P(A), -P(A)]).is_tautology())
+True
+>>> print(Clause([-P(A), P(A)]).is_tautology())
+True
+>>> print(Clause([P(x), -P(A)]).is_tautology())
+False
+>>> print(Clause([Q(B), -P(A), P(A)]).is_tautology())
+True
+>>> print(Clause([-Q(A), P(R(A)), -P(R(A)), Q(x), -R(y)]).is_tautology())
+True
+>>> print(Clause([P(x), -Q(A)]).is_tautology())
+False
+
+
+
+
+

Test subsumes()

+
>>> print(Clause([P(A), Q(B)]).subsumes(Clause([P(A), Q(B)])))
+True
+>>> print(Clause([-P(A)]).subsumes(Clause([P(A)])))
+False
+>>> print(Clause([P(A), Q(B)]).subsumes(Clause([Q(B), P(A)])))
+True
+>>> print(Clause([P(A), Q(B)]).subsumes(Clause([Q(B), R(A), P(A)])))
+True
+>>> print(Clause([P(A), R(A), Q(B)]).subsumes(Clause([Q(B), P(A)])))
+False
+>>> print(Clause([P(x)]).subsumes(Clause([P(A)])))
+True
+>>> print(Clause([P(A)]).subsumes(Clause([P(x)])))
+True
+
+
+
+
+

Test prove()

+
>>> print(ResolutionProverCommand(read_expr('man(x)')).prove())
+False
+>>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove())
+True
+>>> print(ResolutionProverCommand(read_expr('(man(x) -> --man(x))')).prove())
+True
+>>> print(ResolutionProverCommand(read_expr('-(man(x) & -man(x))')).prove())
+True
+>>> print(ResolutionProverCommand(read_expr('(man(x) | -man(x))')).prove())
+True
+>>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove())
+True
+>>> print(ResolutionProverCommand(read_expr('-(man(x) & -man(x))')).prove())
+True
+>>> print(ResolutionProverCommand(read_expr('(man(x) | -man(x))')).prove())
+True
+>>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove())
+True
+>>> print(ResolutionProverCommand(read_expr('(man(x) <-> man(x))')).prove())
+True
+>>> print(ResolutionProverCommand(read_expr('-(man(x) <-> -man(x))')).prove())
+True
+>>> print(ResolutionProverCommand(read_expr('all x.man(x)')).prove())
+False
+>>> print(ResolutionProverCommand(read_expr('-all x.some y.F(x,y) & some x.all y.(-F(x,y))')).prove())
+False
+>>> print(ResolutionProverCommand(read_expr('some x.all y.sees(x,y)')).prove())
+False
+
+
+
>>> p1 = read_expr('all x.(man(x) -> mortal(x))')
+>>> p2 = read_expr('man(Socrates)')
+>>> c = read_expr('mortal(Socrates)')
+>>> ResolutionProverCommand(c, [p1,p2]).prove()
+True
+
+
+
>>> p1 = read_expr('all x.(man(x) -> walks(x))')
+>>> p2 = read_expr('man(John)')
+>>> c = read_expr('some y.walks(y)')
+>>> ResolutionProverCommand(c, [p1,p2]).prove()
+True
+
+
+
>>> p = read_expr('some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))')
+>>> c = read_expr('some e0.walk(e0,mary)')
+>>> ResolutionProverCommand(c, [p]).prove()
+True
+
+
+
+
+

Test proof()

+
>>> p1 = read_expr('all x.(man(x) -> mortal(x))')
+>>> p2 = read_expr('man(Socrates)')
+>>> c = read_expr('mortal(Socrates)')
+>>> logic._counter._value = 0
+>>> tp = ResolutionProverCommand(c, [p1,p2])
+>>> tp.prove()
+True
+>>> print(tp.proof())
+[1] {-mortal(Socrates)}     A
+[2] {-man(z2), mortal(z2)}  A
+[3] {man(Socrates)}         A
+[4] {-man(Socrates)}        (1, 2)
+[5] {mortal(Socrates)}      (2, 3)
+[6] {}                      (1, 5)
+
+
+
+
+

Question Answering

+
+
One answer
>>> p1 = read_expr('father_of(art,john)')
+>>> p2 = read_expr('father_of(bob,kim)')
+>>> p3 = read_expr('all x.all y.(father_of(x,y) -> parent_of(x,y))')
+>>> c = read_expr('all x.(parent_of(x,john) -> ANSWER(x))')
+>>> logic._counter._value = 0
+>>> tp = ResolutionProverCommand(None, [p1,p2,p3,c])
+>>> sorted(tp.find_answers())
+[<ConstantExpression art>]
+>>> print(tp.proof()) 
+[1] {father_of(art,john)}                  A
+[2] {father_of(bob,kim)}                   A
+[3] {-father_of(z3,z4), parent_of(z3,z4)}  A
+[4] {-parent_of(z6,john), ANSWER(z6)}      A
+[5] {parent_of(art,john)}                  (1, 3)
+[6] {parent_of(bob,kim)}                   (2, 3)
+[7] {ANSWER(z6), -father_of(z6,john)}      (3, 4)
+[8] {ANSWER(art)}                          (1, 7)
+[9] {ANSWER(art)}                          (4, 5)
+
-
-

Question Answering

-
-
One answer
-
->>> p1 = read_expr('father_of(art,john)')
->>> p2 = read_expr('father_of(bob,kim)')
->>> p3 = read_expr('all x.all y.(father_of(x,y) -> parent_of(x,y))')
->>> c = read_expr('all x.(parent_of(x,john) -> ANSWER(x))')
->>> logic._counter._value = 0
->>> tp = ResolutionProverCommand(None, [p1,p2,p3,c])
->>> sorted(tp.find_answers())
-[<ConstantExpression art>]
->>> print(tp.proof()) # doctest: +SKIP
-[1] {father_of(art,john)}                  A
-[2] {father_of(bob,kim)}                   A
-[3] {-father_of(z3,z4), parent_of(z3,z4)}  A
-[4] {-parent_of(z6,john), ANSWER(z6)}      A
-[5] {parent_of(art,john)}                  (1, 3)
-[6] {parent_of(bob,kim)}                   (2, 3)
-[7] {ANSWER(z6), -father_of(z6,john)}      (3, 4)
-[8] {ANSWER(art)}                          (1, 7)
-[9] {ANSWER(art)}                          (4, 5)
-<BLANKLINE>
-
-
Multiple answers
-
->>> p1 = read_expr('father_of(art,john)')
->>> p2 = read_expr('mother_of(ann,john)')
->>> p3 = read_expr('all x.all y.(father_of(x,y) -> parent_of(x,y))')
->>> p4 = read_expr('all x.all y.(mother_of(x,y) -> parent_of(x,y))')
->>> c = read_expr('all x.(parent_of(x,john) -> ANSWER(x))')
->>> logic._counter._value = 0
->>> tp = ResolutionProverCommand(None, [p1,p2,p3,p4,c])
->>> sorted(tp.find_answers())
-[<ConstantExpression ann>, <ConstantExpression art>]
->>> print(tp.proof()) # doctest: +SKIP
-[ 1] {father_of(art,john)}                  A
-[ 2] {mother_of(ann,john)}                  A
-[ 3] {-father_of(z3,z4), parent_of(z3,z4)}  A
-[ 4] {-mother_of(z7,z8), parent_of(z7,z8)}  A
-[ 5] {-parent_of(z10,john), ANSWER(z10)}    A
-[ 6] {parent_of(art,john)}                  (1, 3)
-[ 7] {parent_of(ann,john)}                  (2, 4)
-[ 8] {ANSWER(z10), -father_of(z10,john)}    (3, 5)
-[ 9] {ANSWER(art)}                          (1, 8)
-[10] {ANSWER(z10), -mother_of(z10,john)}    (4, 5)
-[11] {ANSWER(ann)}                          (2, 10)
-[12] {ANSWER(art)}                          (5, 6)
-[13] {ANSWER(ann)}                          (5, 7)
-<BLANKLINE>
-
+
Multiple answers
>>> p1 = read_expr('father_of(art,john)')
+>>> p2 = read_expr('mother_of(ann,john)')
+>>> p3 = read_expr('all x.all y.(father_of(x,y) -> parent_of(x,y))')
+>>> p4 = read_expr('all x.all y.(mother_of(x,y) -> parent_of(x,y))')
+>>> c = read_expr('all x.(parent_of(x,john) -> ANSWER(x))')
+>>> logic._counter._value = 0
+>>> tp = ResolutionProverCommand(None, [p1,p2,p3,p4,c])
+>>> sorted(tp.find_answers())
+[<ConstantExpression ann>, <ConstantExpression art>]
+>>> print(tp.proof()) 
+[ 1] {father_of(art,john)}                  A
+[ 2] {mother_of(ann,john)}                  A
+[ 3] {-father_of(z3,z4), parent_of(z3,z4)}  A
+[ 4] {-mother_of(z7,z8), parent_of(z7,z8)}  A
+[ 5] {-parent_of(z10,john), ANSWER(z10)}    A
+[ 6] {parent_of(art,john)}                  (1, 3)
+[ 7] {parent_of(ann,john)}                  (2, 4)
+[ 8] {ANSWER(z10), -father_of(z10,john)}    (3, 5)
+[ 9] {ANSWER(art)}                          (1, 8)
+[10] {ANSWER(z10), -mother_of(z10,john)}    (4, 5)
+[11] {ANSWER(ann)}                          (2, 10)
+[12] {ANSWER(art)}                          (5, 6)
+[13] {ANSWER(ann)}                          (5, 7)
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+
-
+ - + \ No newline at end of file diff --git a/howto/semantics.html b/howto/semantics.html index 98fb4bc97..a38ed52c6 100644 --- a/howto/semantics.html +++ b/howto/semantics.html @@ -1,1052 +1,822 @@ - - - + - - -Semantics - + + + + + + + NLTK :: Sample usage for semantics + + + + + + + + + + + + + + -
-

Semantics

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> import nltk
->>> from nltk.sem import Valuation, Model
->>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),
-... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])),
-... ('dog', set(['d1'])),
-... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
->>> val = Valuation(v)
->>> dom = val.domain
->>> m = Model(dom, val)
-
-
-
-

Evaluation

-

The top-level method of a Model instance is evaluate(), which -assigns a semantic value to expressions of the logic module, under -an assignment g:

-
-
->>> dom = val.domain
->>> g = nltk.sem.Assignment(dom)
->>> m.evaluate('all x.(boy(x) -> - girl(x))', g)
-True
-
-
-

evaluate() calls a recursive function satisfy(), which in turn -calls a function i() to interpret non-logical constants and -individual variables. i() delegates the interpretation of these to -the the model's Valuation and the variable assignment g +

+ + +
+ +
+
+ +
+

Sample usage for semantics

+
+

Semantics

+
>>> # Setup tests by setting the counter to 0
+>>> from nltk.sem import logic
+>>> logic._counter._value = 0
+
+
+
>>> import nltk
+>>> from nltk.sem import Valuation, Model
+>>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),
+... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])),
+... ('dog', set(['d1'])),
+... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
+>>> val = Valuation(v)
+>>> dom = val.domain
+>>> m = Model(dom, val)
+
+
+
+

Evaluation

+

The top-level method of a Model instance is evaluate(), which +assigns a semantic value to expressions of the logic module, under +an assignment g:

+
>>> dom = val.domain
+>>> g = nltk.sem.Assignment(dom)
+>>> m.evaluate('all x.(boy(x) -> - girl(x))', g)
+True
+
+
+

evaluate() calls a recursive function satisfy(), which in turn +calls a function i() to interpret non-logical constants and +individual variables. i() delegates the interpretation of these to +the the model’s Valuation and the variable assignment g respectively. Any atomic expression which cannot be assigned a value -by i raises an Undefined exception; this is caught by -evaluate, which returns the string 'Undefined'.

-
-
->>> m.evaluate('walk(adam)', g, trace=2)
-<BLANKLINE>
-'walk(adam)' is undefined under M, g
-'Undefined'
-
-
+by i raises an Undefined exception; this is caught by +evaluate, which returns the string 'Undefined'.

+
>>> m.evaluate('walk(adam)', g, trace=2)
+
+'walk(adam)' is undefined under M, g
+'Undefined'
+
-
-

Batch Processing

-

The utility functions interpret_sents() and evaluate_sents() are intended to -help with processing multiple sentences. Here's an example of the first of these:

-
-
->>> sents = ['Mary walks']
->>> results = nltk.sem.util.interpret_sents(sents, 'grammars/sample_grammars/sem2.fcfg')
->>> for result in results:
-...     for (synrep, semrep) in result:
-...         print(synrep)
-(S[SEM=<walk(mary)>]
-  (NP[-LOC, NUM='sg', SEM=<\P.P(mary)>]
-    (PropN[-LOC, NUM='sg', SEM=<\P.P(mary)>] Mary))
-  (VP[NUM='sg', SEM=<\x.walk(x)>]
-    (IV[NUM='sg', SEM=<\x.walk(x)>, TNS='pres'] walks)))
-
-
-

In order to provide backwards compatibility with 'legacy' grammars where the semantics value +

+
+

Batch Processing

+

The utility functions interpret_sents() and evaluate_sents() are intended to +help with processing multiple sentences. Here’s an example of the first of these:

+
>>> sents = ['Mary walks']
+>>> results = nltk.sem.util.interpret_sents(sents, 'grammars/sample_grammars/sem2.fcfg')
+>>> for result in results:
+...     for (synrep, semrep) in result:
+...         print(synrep)
+(S[SEM=<walk(mary)>]
+  (NP[-LOC, NUM='sg', SEM=<\P.P(mary)>]
+    (PropN[-LOC, NUM='sg', SEM=<\P.P(mary)>] Mary))
+  (VP[NUM='sg', SEM=<\x.walk(x)>]
+    (IV[NUM='sg', SEM=<\x.walk(x)>, TNS='pres'] walks)))
+
+
+

In order to provide backwards compatibility with ‘legacy’ grammars where the semantics value is specified with a lowercase -sem feature, the relevant feature name can be passed to the function using the -semkey parameter, as shown here:

-
-
->>> sents = ['raining']
->>> g = nltk.grammar.FeatureGrammar.fromstring("""
-... % start S
-... S[sem=<raining>] -> 'raining'
-... """)
->>> results = nltk.sem.util.interpret_sents(sents, g, semkey='sem')
->>> for result in results:
-...     for (synrep, semrep) in result:
-...         print(semrep)
-raining
-
-
-

The function evaluate_sents() works in a similar manner, but also needs to be -passed a Model against which the semantic representations are evaluated.

-
-

Unit Tests

+sem feature, the relevant feature name can be passed to the function using the +semkey parameter, as shown here:

+
>>> sents = ['raining']
+>>> g = nltk.grammar.FeatureGrammar.fromstring("""
+... % start S
+... S[sem=<raining>] -> 'raining'
+... """)
+>>> results = nltk.sem.util.interpret_sents(sents, g, semkey='sem')
+>>> for result in results:
+...     for (synrep, semrep) in result:
+...         print(semrep)
+raining
+
+

The function evaluate_sents() works in a similar manner, but also needs to be +passed a Model against which the semantic representations are evaluated.

+
+

Unit Tests

+
+
+
+

Unit tests for relations and valuations

+
>>> from nltk.sem import *
+
-
-

Unit tests for relations and valuations

-
-
->>> from nltk.sem import *
-
-

Relations are sets of tuples, all of the same length.

-
-
->>> s1 = set([('d1', 'd2'), ('d1', 'd1'), ('d2', 'd1')])
->>> is_rel(s1)
-True
->>> s2 = set([('d1', 'd2'), ('d1', 'd2'), ('d1',)])
->>> is_rel(s2)
-Traceback (most recent call last):
-  . . .
-ValueError: Set set([('d1', 'd2'), ('d1',)]) contains sequences of different lengths
->>> s3 = set(['d1', 'd2'])
->>> is_rel(s3)
-Traceback (most recent call last):
-  . . .
-ValueError: Set set(['d2', 'd1']) contains sequences of different lengths
->>> s4 = set2rel(s3)
->>> is_rel(s4)
-True
->>> is_rel(set())
-True
->>> null_binary_rel = set([(None, None)])
->>> is_rel(null_binary_rel)
-True
-
-
+
>>> s1 = set([('d1', 'd2'), ('d1', 'd1'), ('d2', 'd1')])
+>>> is_rel(s1)
+True
+>>> s2 = set([('d1', 'd2'), ('d1', 'd2'), ('d1',)])
+>>> is_rel(s2)
+Traceback (most recent call last):
+  . . .
+ValueError: Set set([('d1', 'd2'), ('d1',)]) contains sequences of different lengths
+>>> s3 = set(['d1', 'd2'])
+>>> is_rel(s3)
+Traceback (most recent call last):
+  . . .
+ValueError: Set set(['d2', 'd1']) contains sequences of different lengths
+>>> s4 = set2rel(s3)
+>>> is_rel(s4)
+True
+>>> is_rel(set())
+True
+>>> null_binary_rel = set([(None, None)])
+>>> is_rel(null_binary_rel)
+True
+
+

Sets of entities are converted into sets of singleton tuples (containing strings).

-
-
->>> sorted(set2rel(s3))
-[('d1',), ('d2',)]
->>> sorted(set2rel(set([1,3,5,])))
-['1', '3', '5']
->>> set2rel(set()) == set()
-True
->>> set2rel(set2rel(s3)) == set2rel(s3)
-True
-
-
+
>>> sorted(set2rel(s3))
+[('d1',), ('d2',)]
+>>> sorted(set2rel(set([1,3,5,])))
+['1', '3', '5']
+>>> set2rel(set()) == set()
+True
+>>> set2rel(set2rel(s3)) == set2rel(s3)
+True
+
+

Predication is evaluated by set membership.

-
-
->>> ('d1', 'd2') in s1
-True
->>> ('d2', 'd2') in s1
-False
->>> ('d1',) in s1
-False
->>> 'd2' in s1
-False
->>> ('d1',) in s4
-True
->>> ('d1',) in set()
-False
->>> 'd1' in  null_binary_rel
-False
-
-
->>> val = Valuation([('Fido', 'd1'), ('dog', set(['d1', 'd2'])), ('walk', set())])
->>> sorted(val['dog'])
-[('d1',), ('d2',)]
->>> val.domain == set(['d1', 'd2'])
-True
->>> print(val.symbols)
-['Fido', 'dog', 'walk']
-
-
+
>>> ('d1', 'd2') in s1
+True
+>>> ('d2', 'd2') in s1
+False
+>>> ('d1',) in s1
+False
+>>> 'd2' in s1
+False
+>>> ('d1',) in s4
+True
+>>> ('d1',) in set()
+False
+>>> 'd1' in  null_binary_rel
+False
+
+
+
>>> val = Valuation([('Fido', 'd1'), ('dog', set(['d1', 'd2'])), ('walk', set())])
+>>> sorted(val['dog'])
+[('d1',), ('d2',)]
+>>> val.domain == set(['d1', 'd2'])
+True
+>>> print(val.symbols)
+['Fido', 'dog', 'walk']
+
+

Parse a valuation from a string.

-
-
->>> v = """
-... john => b1
-... mary => g1
-... suzie => g2
-... fido => d1
-... tess => d2
-... noosa => n
-... girl => {g1, g2}
-... boy => {b1, b2}
-... dog => {d1, d2}
-... bark => {d1, d2}
-... walk => {b1, g2, d1}
-... chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)}
-... see => {(b1, g1), (b2, d2), (g1, b1),(d2, b1), (g2, n)}
-... in => {(b1, n), (b2, n), (d2, n)}
-... with => {(b1, g1), (g1, b1), (d1, b1), (b1, d1)}
-... """
->>> val = Valuation.fromstring(v)
-
-
->>> print(val) # doctest: +SKIP
-{'bark': set([('d1',), ('d2',)]),
- 'boy': set([('b1',), ('b2',)]),
- 'chase': set([('b1', 'g1'), ('g2', 'd2'), ('g1', 'd1'), ('b2', 'g1')]),
- 'dog': set([('d1',), ('d2',)]),
- 'fido': 'd1',
- 'girl': set([('g2',), ('g1',)]),
- 'in': set([('d2', 'n'), ('b1', 'n'), ('b2', 'n')]),
- 'john': 'b1',
- 'mary': 'g1',
- 'noosa': 'n',
- 'see': set([('b1', 'g1'), ('b2', 'd2'), ('d2', 'b1'), ('g2', 'n'), ('g1', 'b1')]),
- 'suzie': 'g2',
- 'tess': 'd2',
- 'walk': set([('d1',), ('b1',), ('g2',)]),
- 'with': set([('b1', 'g1'), ('d1', 'b1'), ('b1', 'd1'), ('g1', 'b1')])}
-
-
+
>>> v = """
+... john => b1
+... mary => g1
+... suzie => g2
+... fido => d1
+... tess => d2
+... noosa => n
+... girl => {g1, g2}
+... boy => {b1, b2}
+... dog => {d1, d2}
+... bark => {d1, d2}
+... walk => {b1, g2, d1}
+... chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)}
+... see => {(b1, g1), (b2, d2), (g1, b1),(d2, b1), (g2, n)}
+... in => {(b1, n), (b2, n), (d2, n)}
+... with => {(b1, g1), (g1, b1), (d1, b1), (b1, d1)}
+... """
+>>> val = Valuation.fromstring(v)
+
+
+
>>> print(val) 
+{'bark': set([('d1',), ('d2',)]),
+ 'boy': set([('b1',), ('b2',)]),
+ 'chase': set([('b1', 'g1'), ('g2', 'd2'), ('g1', 'd1'), ('b2', 'g1')]),
+ 'dog': set([('d1',), ('d2',)]),
+ 'fido': 'd1',
+ 'girl': set([('g2',), ('g1',)]),
+ 'in': set([('d2', 'n'), ('b1', 'n'), ('b2', 'n')]),
+ 'john': 'b1',
+ 'mary': 'g1',
+ 'noosa': 'n',
+ 'see': set([('b1', 'g1'), ('b2', 'd2'), ('d2', 'b1'), ('g2', 'n'), ('g1', 'b1')]),
+ 'suzie': 'g2',
+ 'tess': 'd2',
+ 'walk': set([('d1',), ('b1',), ('g2',)]),
+ 'with': set([('b1', 'g1'), ('d1', 'b1'), ('b1', 'd1'), ('g1', 'b1')])}
+
-
-

Unit tests for function argument application in a Model

-
-
->>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),\
-...      ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), ('dog', set(['d1'])),
-...      ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')])),
-...      ('kiss', null_binary_rel)]
->>> val = Valuation(v)
->>> dom = val.domain
->>> m = Model(dom, val)
->>> g = Assignment(dom)
->>> sorted(val['boy'])
-[('b1',), ('b2',)]
->>> ('b1',) in val['boy']
-True
->>> ('g1',) in val['boy']
-False
->>> ('foo',) in val['boy']
-False
->>> ('b1', 'g1') in val['love']
-True
->>> ('b1', 'b1') in val['kiss']
-False
->>> sorted(val.domain)
-['b1', 'b2', 'd1', 'g1', 'g2']
-
-
-
-

Model Tests

+
+
+

Unit tests for function argument application in a Model

+
>>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),\
+...      ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), ('dog', set(['d1'])),
+...      ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')])),
+...      ('kiss', null_binary_rel)]
+>>> val = Valuation(v)
+>>> dom = val.domain
+>>> m = Model(dom, val)
+>>> g = Assignment(dom)
+>>> sorted(val['boy'])
+[('b1',), ('b2',)]
+>>> ('b1',) in val['boy']
+True
+>>> ('g1',) in val['boy']
+False
+>>> ('foo',) in val['boy']
+False
+>>> ('b1', 'g1') in val['love']
+True
+>>> ('b1', 'b1') in val['kiss']
+False
+>>> sorted(val.domain)
+['b1', 'b2', 'd1', 'g1', 'g2']
+
+
+
+

Model Tests

Extension of Lambda expressions

-
-
->>> v0 = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),\
-... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])),
-... ('dog', set(['d1'])),
-... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
-
-
->>> val0 = Valuation(v0)
->>> dom0 = val0.domain
->>> m0 = Model(dom0, val0)
->>> g0 = Assignment(dom0)
-
-
->>> print(m0.evaluate(r'\x. \y. love(x, y)', g0) == {'g2': {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False}, 'b2': {'g2': True, 'b2': False, 'b1': False, 'g1': False, 'd1': False}, 'b1': {'g2': False, 'b2': False, 'b1': False, 'g1': True, 'd1': False}, 'g1': {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False}, 'd1': {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False}})
-True
->>> print(m0.evaluate(r'\x. dog(x) (adam)', g0))
-False
->>> print(m0.evaluate(r'\x. (dog(x) | boy(x)) (adam)', g0))
-True
->>> print(m0.evaluate(r'\x. \y. love(x, y)(fido)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False})
-True
->>> print(m0.evaluate(r'\x. \y. love(x, y)(adam)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': True, 'd1': False})
-True
->>> print(m0.evaluate(r'\x. \y. love(x, y)(betty)', g0) == {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False})
-True
->>> print(m0.evaluate(r'\x. \y. love(x, y)(betty)(adam)', g0))
-True
->>> print(m0.evaluate(r'\x. \y. love(x, y)(betty, adam)', g0))
-True
->>> print(m0.evaluate(r'\y. \x. love(x, y)(fido)(adam)', g0))
-False
->>> print(m0.evaluate(r'\y. \x. love(x, y)(betty, adam)', g0))
-True
->>> print(m0.evaluate(r'\x. exists y. love(x, y)', g0) == {'g2': True, 'b2': True, 'b1': True, 'g1': True, 'd1': False})
-True
->>> print(m0.evaluate(r'\z. adam', g0) == {'g2': 'b1', 'b2': 'b1', 'b1': 'b1', 'g1': 'b1', 'd1': 'b1'})
-True
->>> print(m0.evaluate(r'\z. love(x, y)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False})
-True
-
-
+
>>> v0 = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),\
+... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])),
+... ('dog', set(['d1'])),
+... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
+
+
+
>>> val0 = Valuation(v0)
+>>> dom0 = val0.domain
+>>> m0 = Model(dom0, val0)
+>>> g0 = Assignment(dom0)
+
+
+
>>> print(m0.evaluate(r'\x. \y. love(x, y)', g0) == {'g2': {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False}, 'b2': {'g2': True, 'b2': False, 'b1': False, 'g1': False, 'd1': False}, 'b1': {'g2': False, 'b2': False, 'b1': False, 'g1': True, 'd1': False}, 'g1': {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False}, 'd1': {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False}})
+True
+>>> print(m0.evaluate(r'\x. dog(x) (adam)', g0))
+False
+>>> print(m0.evaluate(r'\x. (dog(x) | boy(x)) (adam)', g0))
+True
+>>> print(m0.evaluate(r'\x. \y. love(x, y)(fido)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False})
+True
+>>> print(m0.evaluate(r'\x. \y. love(x, y)(adam)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': True, 'd1': False})
+True
+>>> print(m0.evaluate(r'\x. \y. love(x, y)(betty)', g0) == {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False})
+True
+>>> print(m0.evaluate(r'\x. \y. love(x, y)(betty)(adam)', g0))
+True
+>>> print(m0.evaluate(r'\x. \y. love(x, y)(betty, adam)', g0))
+True
+>>> print(m0.evaluate(r'\y. \x. love(x, y)(fido)(adam)', g0))
+False
+>>> print(m0.evaluate(r'\y. \x. love(x, y)(betty, adam)', g0))
+True
+>>> print(m0.evaluate(r'\x. exists y. love(x, y)', g0) == {'g2': True, 'b2': True, 'b1': True, 'g1': True, 'd1': False})
+True
+>>> print(m0.evaluate(r'\z. adam', g0) == {'g2': 'b1', 'b2': 'b1', 'b1': 'b1', 'g1': 'b1', 'd1': 'b1'})
+True
+>>> print(m0.evaluate(r'\z. love(x, y)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False})
+True
+
+
+
+
+
+

Propositional Model Test

+
>>> tests = [
+...     ('P & Q', True),
+...     ('P & R', False),
+...     ('- P', False),
+...     ('- R', True),
+...     ('- - P', True),
+...     ('- (P & R)', True),
+...     ('P | R', True),
+...     ('R | P', True),
+...     ('R | R', False),
+...     ('- P | R', False),
+...     ('P | - P', True),
+...     ('P -> Q', True),
+...     ('P -> R', False),
+...     ('R -> P', True),
+...     ('P <-> P', True),
+...     ('R <-> R', True),
+...     ('P <-> R', False),
+...     ]
+>>> val1 = Valuation([('P', True), ('Q', True), ('R', False)])
+>>> dom = set([])
+>>> m = Model(dom, val1)
+>>> g = Assignment(dom)
+>>> for (sent, testvalue) in tests:
+...     semvalue = m.evaluate(sent, g)
+...     if semvalue == testvalue:
+...         print('*', end=' ')
+* * * * * * * * * * * * * * * * *
+
+
+
+
+

Test of i Function

+
>>> from nltk.sem import Expression
+>>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),
+...      ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), ('dog', set(['d1'])),
+...      ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
+>>> val = Valuation(v)
+>>> dom = val.domain
+>>> m = Model(dom, val)
+>>> g = Assignment(dom, [('x', 'b1'), ('y', 'g2')])
+>>> exprs = ['adam', 'girl', 'love', 'walks', 'x', 'y', 'z']
+>>> parsed_exprs = [Expression.fromstring(e) for e in exprs]
+>>> sorted_set = lambda x: sorted(x) if isinstance(x, set) else x
+>>> for parsed in parsed_exprs:
+...     try:
+...         print("'%s' gets value %s" % (parsed, sorted_set(m.i(parsed, g))))
+...     except Undefined:
+...         print("'%s' is Undefined" % parsed)
+'adam' gets value b1
+'girl' gets value [('g1',), ('g2',)]
+'love' gets value [('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]
+'walks' is Undefined
+'x' gets value b1
+'y' gets value g2
+'z' is Undefined
+
+
+
+
+

Test for formulas in Model

+
>>> tests = [
+...     ('love(adam, betty)', True),
+...     ('love(adam, sue)', 'Undefined'),
+...     ('dog(fido)', True),
+...     ('- dog(fido)', False),
+...     ('- - dog(fido)', True),
+...     ('- dog(sue)', 'Undefined'),
+...     ('dog(fido) & boy(adam)', True),
+...     ('- (dog(fido) & boy(adam))', False),
+...     ('- dog(fido) & boy(adam)', False),
+...     ('dog(fido) | boy(adam)', True),
+...     ('- (dog(fido) | boy(adam))', False),
+...     ('- dog(fido) | boy(adam)', True),
+...     ('- dog(fido) | - boy(adam)', False),
+...     ('dog(fido) -> boy(adam)', True),
+...     ('- (dog(fido) -> boy(adam))', False),
+...     ('- dog(fido) -> boy(adam)', True),
+...     ('exists x . love(adam, x)', True),
+...     ('all x . love(adam, x)', False),
+...     ('fido = fido', True),
+...     ('exists x . all y. love(x, y)', False),
+...     ('exists x . (x = fido)', True),
+...     ('all x . (dog(x) | - dog(x))', True),
+...     ('adam = mia', 'Undefined'),
+...     ('\\x. (boy(x) | girl(x))', {'g2': True, 'b2': True, 'b1': True, 'g1': True, 'd1': False}),
+...     ('\\x. exists y. (boy(x) & love(x, y))', {'g2': False, 'b2': True, 'b1': True, 'g1': False, 'd1': False}),
+...     ('exists z1. boy(z1)', True),
+...     ('exists x. (boy(x) & - (x = adam))', True),
+...     ('exists x. (boy(x) & all y. love(y, x))', False),
+...     ('all x. (boy(x) | girl(x))', False),
+...     ('all x. (girl(x) -> exists y. boy(y) & love(x, y))', False),
+...     ('exists x. (boy(x) & all y. (girl(y) -> love(y, x)))', True),
+...     ('exists x. (boy(x) & all y. (girl(y) -> love(x, y)))', False),
+...     ('all x. (dog(x) -> - girl(x))', True),
+...     ('exists x. exists y. (love(x, y) & love(x, y))', True),
+...     ]
+>>> for (sent, testvalue) in tests:
+...     semvalue = m.evaluate(sent, g)
+...     if semvalue == testvalue:
+...         print('*', end=' ')
+...     else:
+...         print(sent, semvalue)
+* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+
+
+
+

Satisfier Tests

+
>>> formulas = [
+...     'boy(x)',
+...     '(x = x)',
+...     '(boy(x) | girl(x))',
+...     '(boy(x) & girl(x))',
+...     'love(adam, x)',
+...     'love(x, adam)',
+...     '- (x = adam)',
+...     'exists z22. love(x, z22)',
+...     'exists y. love(y, x)',
+...     'all y. (girl(y) -> love(x, y))',
+...     'all y. (girl(y) -> love(y, x))',
+...     'all y. (girl(y) -> (boy(x) & love(y, x)))',
+...     'boy(x) & all y. (girl(y) -> love(x, y))',
+...     'boy(x) & all y. (girl(y) -> love(y, x))',
+...     'boy(x) & exists y. (girl(y) & love(y, x))',
+...     'girl(x) -> dog(x)',
+...     'all y. (dog(y) -> (x = y))',
+...     '- exists y. love(y, x)',
+...     'exists y. (love(adam, y) & love(y, x))'
+...     ]
+>>> g.purge()
+>>> g.add('x', 'b1')
+{'x': 'b1'}
+>>> for f in formulas:
+...     try:
+...         print("'%s' gets value: %s" % (f, m.evaluate(f, g)))
+...     except Undefined:
+...         print("'%s' is Undefined" % f)
+'boy(x)' gets value: True
+'(x = x)' gets value: True
+'(boy(x) | girl(x))' gets value: True
+'(boy(x) & girl(x))' gets value: False
+'love(adam, x)' gets value: False
+'love(x, adam)' gets value: False
+'- (x = adam)' gets value: False
+'exists z22. love(x, z22)' gets value: True
+'exists y. love(y, x)' gets value: True
+'all y. (girl(y) -> love(x, y))' gets value: False
+'all y. (girl(y) -> love(y, x))' gets value: True
+'all y. (girl(y) -> (boy(x) & love(y, x)))' gets value: True
+'boy(x) & all y. (girl(y) -> love(x, y))' gets value: False
+'boy(x) & all y. (girl(y) -> love(y, x))' gets value: True
+'boy(x) & exists y. (girl(y) & love(y, x))' gets value: True
+'girl(x) -> dog(x)' gets value: True
+'all y. (dog(y) -> (x = y))' gets value: False
+'- exists y. love(y, x)' gets value: False
+'exists y. (love(adam, y) & love(y, x))' gets value: True
+
+
>>> from nltk.sem import Expression
+>>> for fmla in formulas:
+...     p = Expression.fromstring(fmla)
+...     g.purge()
+...     print("Satisfiers of '%s':\n\t%s" % (p, sorted(m.satisfiers(p, 'x', g))))
+Satisfiers of 'boy(x)':
+['b1', 'b2']
+Satisfiers of '(x = x)':
+['b1', 'b2', 'd1', 'g1', 'g2']
+Satisfiers of '(boy(x) | girl(x))':
+['b1', 'b2', 'g1', 'g2']
+Satisfiers of '(boy(x) & girl(x))':
+[]
+Satisfiers of 'love(adam,x)':
+['g1']
+Satisfiers of 'love(x,adam)':
+['g1', 'g2']
+Satisfiers of '-(x = adam)':
+['b2', 'd1', 'g1', 'g2']
+Satisfiers of 'exists z22.love(x,z22)':
+['b1', 'b2', 'g1', 'g2']
+Satisfiers of 'exists y.love(y,x)':
+['b1', 'g1', 'g2']
+Satisfiers of 'all y.(girl(y) -> love(x,y))':
+[]
+Satisfiers of 'all y.(girl(y) -> love(y,x))':
+['b1']
+Satisfiers of 'all y.(girl(y) -> (boy(x) & love(y,x)))':
+['b1']
+Satisfiers of '(boy(x) & all y.(girl(y) -> love(x,y)))':
+[]
+Satisfiers of '(boy(x) & all y.(girl(y) -> love(y,x)))':
+['b1']
+Satisfiers of '(boy(x) & exists y.(girl(y) & love(y,x)))':
+['b1']
+Satisfiers of '(girl(x) -> dog(x))':
+['b1', 'b2', 'd1']
+Satisfiers of 'all y.(dog(y) -> (x = y))':
+['d1']
+Satisfiers of '-exists y.love(y,x)':
+['b2', 'd1']
+Satisfiers of 'exists y.(love(adam,y) & love(y,x))':
+['b1']
+
-
-

Propositional Model Test

-
-
->>> tests = [
-...     ('P & Q', True),
-...     ('P & R', False),
-...     ('- P', False),
-...     ('- R', True),
-...     ('- - P', True),
-...     ('- (P & R)', True),
-...     ('P | R', True),
-...     ('R | P', True),
-...     ('R | R', False),
-...     ('- P | R', False),
-...     ('P | - P', True),
-...     ('P -> Q', True),
-...     ('P -> R', False),
-...     ('R -> P', True),
-...     ('P <-> P', True),
-...     ('R <-> R', True),
-...     ('P <-> R', False),
-...     ]
->>> val1 = Valuation([('P', True), ('Q', True), ('R', False)])
->>> dom = set([])
->>> m = Model(dom, val1)
->>> g = Assignment(dom)
->>> for (sent, testvalue) in tests:
-...     semvalue = m.evaluate(sent, g)
-...     if semvalue == testvalue:
-...         print('*', end=' ')
-* * * * * * * * * * * * * * * * *
-
-
+
+
+

Tests based on the Blackburn & Bos testsuite

+
>>> v1 = [('jules', 'd1'), ('vincent', 'd2'), ('pumpkin', 'd3'),
+...       ('honey_bunny', 'd4'), ('yolanda', 'd5'),
+...       ('customer', set(['d1', 'd2'])),
+...       ('robber', set(['d3', 'd4'])),
+...       ('love', set([('d3', 'd4')]))]
+>>> val1 = Valuation(v1)
+>>> dom1 = val1.domain
+>>> m1 = Model(dom1, val1)
+>>> g1 = Assignment(dom1)
+
-
-

Test of i Function

-
-
->>> from nltk.sem import Expression
->>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),
-...      ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), ('dog', set(['d1'])),
-...      ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
->>> val = Valuation(v)
->>> dom = val.domain
->>> m = Model(dom, val)
->>> g = Assignment(dom, [('x', 'b1'), ('y', 'g2')])
->>> exprs = ['adam', 'girl', 'love', 'walks', 'x', 'y', 'z']
->>> parsed_exprs = [Expression.fromstring(e) for e in exprs]
->>> sorted_set = lambda x: sorted(x) if isinstance(x, set) else x
->>> for parsed in parsed_exprs:
-...     try:
-...         print("'%s' gets value %s" % (parsed, sorted_set(m.i(parsed, g))))
-...     except Undefined:
-...         print("'%s' is Undefined" % parsed)
-'adam' gets value b1
-'girl' gets value [('g1',), ('g2',)]
-'love' gets value [('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]
-'walks' is Undefined
-'x' gets value b1
-'y' gets value g2
-'z' is Undefined
-
-
+
>>> v2 = [('jules', 'd1'), ('vincent', 'd2'), ('pumpkin', 'd3'),
+...       ('honey_bunny', 'd4'), ('yolanda', 'd4'),
+...       ('customer', set(['d1', 'd2', 'd5', 'd6'])),
+...       ('robber', set(['d3', 'd4'])),
+...       ('love', set([(None, None)]))]
+>>> val2 = Valuation(v2)
+>>> dom2 = set(['d1', 'd2', 'd3', 'd4', 'd5', 'd6'])
+>>> m2 = Model(dom2, val2)
+>>> g2 = Assignment(dom2)
+>>> g21 = Assignment(dom2)
+>>> g21.add('y', 'd3')
+{'y': 'd3'}
+
-
-

Test for formulas in Model

-
-
->>> tests = [
-...     ('love(adam, betty)', True),
-...     ('love(adam, sue)', 'Undefined'),
-...     ('dog(fido)', True),
-...     ('- dog(fido)', False),
-...     ('- - dog(fido)', True),
-...     ('- dog(sue)', 'Undefined'),
-...     ('dog(fido) & boy(adam)', True),
-...     ('- (dog(fido) & boy(adam))', False),
-...     ('- dog(fido) & boy(adam)', False),
-...     ('dog(fido) | boy(adam)', True),
-...     ('- (dog(fido) | boy(adam))', False),
-...     ('- dog(fido) | boy(adam)', True),
-...     ('- dog(fido) | - boy(adam)', False),
-...     ('dog(fido) -> boy(adam)', True),
-...     ('- (dog(fido) -> boy(adam))', False),
-...     ('- dog(fido) -> boy(adam)', True),
-...     ('exists x . love(adam, x)', True),
-...     ('all x . love(adam, x)', False),
-...     ('fido = fido', True),
-...     ('exists x . all y. love(x, y)', False),
-...     ('exists x . (x = fido)', True),
-...     ('all x . (dog(x) | - dog(x))', True),
-...     ('adam = mia', 'Undefined'),
-...     ('\\x. (boy(x) | girl(x))', {'g2': True, 'b2': True, 'b1': True, 'g1': True, 'd1': False}),
-...     ('\\x. exists y. (boy(x) & love(x, y))', {'g2': False, 'b2': True, 'b1': True, 'g1': False, 'd1': False}),
-...     ('exists z1. boy(z1)', True),
-...     ('exists x. (boy(x) & - (x = adam))', True),
-...     ('exists x. (boy(x) & all y. love(y, x))', False),
-...     ('all x. (boy(x) | girl(x))', False),
-...     ('all x. (girl(x) -> exists y. boy(y) & love(x, y))', False),
-...     ('exists x. (boy(x) & all y. (girl(y) -> love(y, x)))', True),
-...     ('exists x. (boy(x) & all y. (girl(y) -> love(x, y)))', False),
-...     ('all x. (dog(x) -> - girl(x))', True),
-...     ('exists x. exists y. (love(x, y) & love(x, y))', True),
-...     ]
->>> for (sent, testvalue) in tests:
-...     semvalue = m.evaluate(sent, g)
-...     if semvalue == testvalue:
-...         print('*', end=' ')
-...     else:
-...         print(sent, semvalue)
-* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
-
-
+
>>> v3 = [('mia', 'd1'), ('jody', 'd2'), ('jules', 'd3'),
+...       ('vincent', 'd4'),
+...       ('woman', set(['d1', 'd2'])), ('man', set(['d3', 'd4'])),
+...       ('joke', set(['d5', 'd6'])), ('episode', set(['d7', 'd8'])),
+...       ('in', set([('d5', 'd7'), ('d5', 'd8')])),
+...       ('tell', set([('d1', 'd5'), ('d2', 'd6')]))]
+>>> val3 = Valuation(v3)
+>>> dom3 = set(['d1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8'])
+>>> m3 = Model(dom3, val3)
+>>> g3 = Assignment(dom3)
+
-
-

Satisfier Tests

-
-
->>> formulas = [
-...     'boy(x)',
-...     '(x = x)',
-...     '(boy(x) | girl(x))',
-...     '(boy(x) & girl(x))',
-...     'love(adam, x)',
-...     'love(x, adam)',
-...     '- (x = adam)',
-...     'exists z22. love(x, z22)',
-...     'exists y. love(y, x)',
-...     'all y. (girl(y) -> love(x, y))',
-...     'all y. (girl(y) -> love(y, x))',
-...     'all y. (girl(y) -> (boy(x) & love(y, x)))',
-...     'boy(x) & all y. (girl(y) -> love(x, y))',
-...     'boy(x) & all y. (girl(y) -> love(y, x))',
-...     'boy(x) & exists y. (girl(y) & love(y, x))',
-...     'girl(x) -> dog(x)',
-...     'all y. (dog(y) -> (x = y))',
-...     '- exists y. love(y, x)',
-...     'exists y. (love(adam, y) & love(y, x))'
-...     ]
->>> g.purge()
->>> g.add('x', 'b1')
-{'x': 'b1'}
->>> for f in formulas: # doctest: +NORMALIZE_WHITESPACE
-...     try:
-...         print("'%s' gets value: %s" % (f, m.evaluate(f, g)))
-...     except Undefined:
-...         print("'%s' is Undefined" % f)
-'boy(x)' gets value: True
-'(x = x)' gets value: True
-'(boy(x) | girl(x))' gets value: True
-'(boy(x) & girl(x))' gets value: False
-'love(adam, x)' gets value: False
-'love(x, adam)' gets value: False
-'- (x = adam)' gets value: False
-'exists z22. love(x, z22)' gets value: True
-'exists y. love(y, x)' gets value: True
-'all y. (girl(y) -> love(x, y))' gets value: False
-'all y. (girl(y) -> love(y, x))' gets value: True
-'all y. (girl(y) -> (boy(x) & love(y, x)))' gets value: True
-'boy(x) & all y. (girl(y) -> love(x, y))' gets value: False
-'boy(x) & all y. (girl(y) -> love(y, x))' gets value: True
-'boy(x) & exists y. (girl(y) & love(y, x))' gets value: True
-'girl(x) -> dog(x)' gets value: True
-'all y. (dog(y) -> (x = y))' gets value: False
-'- exists y. love(y, x)' gets value: False
-'exists y. (love(adam, y) & love(y, x))' gets value: True
-
-
->>> from nltk.sem import Expression
->>> for fmla in formulas: # doctest: +NORMALIZE_WHITESPACE
-...     p = Expression.fromstring(fmla)
-...     g.purge()
-...     print("Satisfiers of '%s':\n\t%s" % (p, sorted(m.satisfiers(p, 'x', g))))
-Satisfiers of 'boy(x)':
-['b1', 'b2']
-Satisfiers of '(x = x)':
-['b1', 'b2', 'd1', 'g1', 'g2']
-Satisfiers of '(boy(x) | girl(x))':
-['b1', 'b2', 'g1', 'g2']
-Satisfiers of '(boy(x) & girl(x))':
-[]
-Satisfiers of 'love(adam,x)':
-['g1']
-Satisfiers of 'love(x,adam)':
-['g1', 'g2']
-Satisfiers of '-(x = adam)':
-['b2', 'd1', 'g1', 'g2']
-Satisfiers of 'exists z22.love(x,z22)':
-['b1', 'b2', 'g1', 'g2']
-Satisfiers of 'exists y.love(y,x)':
-['b1', 'g1', 'g2']
-Satisfiers of 'all y.(girl(y) -> love(x,y))':
-[]
-Satisfiers of 'all y.(girl(y) -> love(y,x))':
-['b1']
-Satisfiers of 'all y.(girl(y) -> (boy(x) & love(y,x)))':
-['b1']
-Satisfiers of '(boy(x) & all y.(girl(y) -> love(x,y)))':
-[]
-Satisfiers of '(boy(x) & all y.(girl(y) -> love(y,x)))':
-['b1']
-Satisfiers of '(boy(x) & exists y.(girl(y) & love(y,x)))':
-['b1']
-Satisfiers of '(girl(x) -> dog(x))':
-['b1', 'b2', 'd1']
-Satisfiers of 'all y.(dog(y) -> (x = y))':
-['d1']
-Satisfiers of '-exists y.love(y,x)':
-['b2', 'd1']
-Satisfiers of 'exists y.(love(adam,y) & love(y,x))':
-['b1']
-
-
+
>>> tests = [
+...     ('exists x. robber(x)', m1, g1, True),
+...     ('exists x. exists y. love(y, x)', m1, g1, True),
+...     ('exists x0. exists x1. love(x1, x0)', m2, g2, False),
+...     ('all x. all y. love(y, x)', m2, g2, False),
+...     ('- (all x. all y. love(y, x))', m2, g2, True),
+...     ('all x. all y. - love(y, x)', m2, g2, True),
+...     ('yolanda = honey_bunny', m2, g2, True),
+...     ('mia = honey_bunny', m2, g2, 'Undefined'),
+...     ('- (yolanda = honey_bunny)', m2, g2, False),
+...     ('- (mia = honey_bunny)', m2, g2, 'Undefined'),
+...     ('all x. (robber(x) | customer(x))', m2, g2, True),
+...     ('- (all x. (robber(x) | customer(x)))', m2, g2, False),
+...     ('(robber(x) | customer(x))', m2, g2, 'Undefined'),
+...     ('(robber(y) | customer(y))', m2, g21, True),
+...     ('exists x. (man(x) & exists x. woman(x))', m3, g3, True),
+...     ('exists x. (man(x) & exists x. woman(x))', m3, g3, True),
+...     ('- exists x. woman(x)', m3, g3, False),
+...     ('exists x. (tasty(x) & burger(x))', m3, g3, 'Undefined'),
+...     ('- exists x. (tasty(x) & burger(x))', m3, g3, 'Undefined'),
+...     ('exists x. (man(x) & - exists y. woman(y))', m3, g3, False),
+...     ('exists x. (man(x) & - exists x. woman(x))', m3, g3, False),
+...     ('exists x. (woman(x) & - exists x. customer(x))', m2, g2, 'Undefined'),
+... ]
+
-
-

Tests based on the Blackburn & Bos testsuite

-
-
->>> v1 = [('jules', 'd1'), ('vincent', 'd2'), ('pumpkin', 'd3'),
-...       ('honey_bunny', 'd4'), ('yolanda', 'd5'),
-...       ('customer', set(['d1', 'd2'])),
-...       ('robber', set(['d3', 'd4'])),
-...       ('love', set([('d3', 'd4')]))]
->>> val1 = Valuation(v1)
->>> dom1 = val1.domain
->>> m1 = Model(dom1, val1)
->>> g1 = Assignment(dom1)
-
-
->>> v2 = [('jules', 'd1'), ('vincent', 'd2'), ('pumpkin', 'd3'),
-...       ('honey_bunny', 'd4'), ('yolanda', 'd4'),
-...       ('customer', set(['d1', 'd2', 'd5', 'd6'])),
-...       ('robber', set(['d3', 'd4'])),
-...       ('love', set([(None, None)]))]
->>> val2 = Valuation(v2)
->>> dom2 = set(['d1', 'd2', 'd3', 'd4', 'd5', 'd6'])
->>> m2 = Model(dom2, val2)
->>> g2 = Assignment(dom2)
->>> g21 = Assignment(dom2)
->>> g21.add('y', 'd3')
-{'y': 'd3'}
-
-
->>> v3 = [('mia', 'd1'), ('jody', 'd2'), ('jules', 'd3'),
-...       ('vincent', 'd4'),
-...       ('woman', set(['d1', 'd2'])), ('man', set(['d3', 'd4'])),
-...       ('joke', set(['d5', 'd6'])), ('episode', set(['d7', 'd8'])),
-...       ('in', set([('d5', 'd7'), ('d5', 'd8')])),
-...       ('tell', set([('d1', 'd5'), ('d2', 'd6')]))]
->>> val3 = Valuation(v3)
->>> dom3 = set(['d1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8'])
->>> m3 = Model(dom3, val3)
->>> g3 = Assignment(dom3)
-
-
->>> tests = [
-...     ('exists x. robber(x)', m1, g1, True),
-...     ('exists x. exists y. love(y, x)', m1, g1, True),
-...     ('exists x0. exists x1. love(x1, x0)', m2, g2, False),
-...     ('all x. all y. love(y, x)', m2, g2, False),
-...     ('- (all x. all y. love(y, x))', m2, g2, True),
-...     ('all x. all y. - love(y, x)', m2, g2, True),
-...     ('yolanda = honey_bunny', m2, g2, True),
-...     ('mia = honey_bunny', m2, g2, 'Undefined'),
-...     ('- (yolanda = honey_bunny)', m2, g2, False),
-...     ('- (mia = honey_bunny)', m2, g2, 'Undefined'),
-...     ('all x. (robber(x) | customer(x))', m2, g2, True),
-...     ('- (all x. (robber(x) | customer(x)))', m2, g2, False),
-...     ('(robber(x) | customer(x))', m2, g2, 'Undefined'),
-...     ('(robber(y) | customer(y))', m2, g21, True),
-...     ('exists x. (man(x) & exists x. woman(x))', m3, g3, True),
-...     ('exists x. (man(x) & exists x. woman(x))', m3, g3, True),
-...     ('- exists x. woman(x)', m3, g3, False),
-...     ('exists x. (tasty(x) & burger(x))', m3, g3, 'Undefined'),
-...     ('- exists x. (tasty(x) & burger(x))', m3, g3, 'Undefined'),
-...     ('exists x. (man(x) & - exists y. woman(y))', m3, g3, False),
-...     ('exists x. (man(x) & - exists x. woman(x))', m3, g3, False),
-...     ('exists x. (woman(x) & - exists x. customer(x))', m2, g2, 'Undefined'),
-... ]
-
-
->>> for item in tests:
-...     sentence, model, g, testvalue = item
-...     semvalue = model.evaluate(sentence, g)
-...     if semvalue == testvalue:
-...         print('*', end=' ')
-...     g.purge()
-* * * * * * * * * * * * * * * * * * * * * *
-
-
+
>>> for item in tests:
+...     sentence, model, g, testvalue = item
+...     semvalue = model.evaluate(sentence, g)
+...     if semvalue == testvalue:
+...         print('*', end=' ')
+...     g.purge()
+* * * * * * * * * * * * * * * * * * * * * *
+
-
-

Tests for mapping from syntax to semantics

+
+
+

Tests for mapping from syntax to semantics

Load a valuation from a file.

-
-
->>> import nltk.data
->>> from nltk.sem.util import parse_sents
->>> val = nltk.data.load('grammars/sample_grammars/valuation1.val')
->>> dom = val.domain
->>> m = Model(dom, val)
->>> g = Assignment(dom)
->>> gramfile = 'grammars/sample_grammars/sem2.fcfg'
->>> inputs = ['John sees a girl', 'every dog barks']
->>> parses = parse_sents(inputs, gramfile)
->>> for sent, trees in zip(inputs, parses):
-...     print()
-...     print("Sentence: %s" % sent)
-...     for tree in trees:
-...         print("Parse:\n %s" %tree)
-...         print("Semantics: %s" %  root_semrep(tree))
-<BLANKLINE>
-Sentence: John sees a girl
-Parse:
- (S[SEM=<exists x.(girl(x) & see(john,x))>]
-  (NP[-LOC, NUM='sg', SEM=<\P.P(john)>]
-    (PropN[-LOC, NUM='sg', SEM=<\P.P(john)>] John))
-  (VP[NUM='sg', SEM=<\y.exists x.(girl(x) & see(y,x))>]
-    (TV[NUM='sg', SEM=<\X y.X(\x.see(y,x))>, TNS='pres'] sees)
-    (NP[NUM='sg', SEM=<\Q.exists x.(girl(x) & Q(x))>]
-      (Det[NUM='sg', SEM=<\P Q.exists x.(P(x) & Q(x))>] a)
-      (Nom[NUM='sg', SEM=<\x.girl(x)>]
-        (N[NUM='sg', SEM=<\x.girl(x)>] girl)))))
-Semantics: exists x.(girl(x) & see(john,x))
-<BLANKLINE>
-Sentence: every dog barks
-Parse:
- (S[SEM=<all x.(dog(x) -> bark(x))>]
-  (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>]
-    (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every)
-    (Nom[NUM='sg', SEM=<\x.dog(x)>]
-      (N[NUM='sg', SEM=<\x.dog(x)>] dog)))
-  (VP[NUM='sg', SEM=<\x.bark(x)>]
-    (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks)))
-Semantics: all x.(dog(x) -> bark(x))
-
-
->>> sent = "every dog barks"
->>> result = nltk.sem.util.interpret_sents([sent], gramfile)[0]
->>> for (syntree, semrep) in result:
-...     print(syntree)
-...     print()
-...     print(semrep)
-(S[SEM=<all x.(dog(x) -> bark(x))>]
-  (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>]
-    (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every)
-    (Nom[NUM='sg', SEM=<\x.dog(x)>]
-      (N[NUM='sg', SEM=<\x.dog(x)>] dog)))
-  (VP[NUM='sg', SEM=<\x.bark(x)>]
-    (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks)))
-<BLANKLINE>
-all x.(dog(x) -> bark(x))
-
-
->>> result = nltk.sem.util.evaluate_sents([sent], gramfile, m, g)[0]
->>> for (syntree, semrel, value) in result:
-...     print(syntree)
-...     print()
-...     print(semrep)
-...     print()
-...     print(value)
-(S[SEM=<all x.(dog(x) -> bark(x))>]
-  (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>]
-    (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every)
-    (Nom[NUM='sg', SEM=<\x.dog(x)>]
-      (N[NUM='sg', SEM=<\x.dog(x)>] dog)))
-  (VP[NUM='sg', SEM=<\x.bark(x)>]
-    (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks)))
-<BLANKLINE>
-all x.(dog(x) -> bark(x))
-<BLANKLINE>
-True
-
-
->>> sents = ['Mary walks', 'John sees a dog']
->>> results = nltk.sem.util.interpret_sents(sents, 'grammars/sample_grammars/sem2.fcfg')
->>> for result in results:
-...     for (synrep, semrep) in result:
-...         print(synrep)
-(S[SEM=<walk(mary)>]
-  (NP[-LOC, NUM='sg', SEM=<\P.P(mary)>]
-    (PropN[-LOC, NUM='sg', SEM=<\P.P(mary)>] Mary))
-  (VP[NUM='sg', SEM=<\x.walk(x)>]
-    (IV[NUM='sg', SEM=<\x.walk(x)>, TNS='pres'] walks)))
-(S[SEM=<exists x.(dog(x) & see(john,x))>]
-  (NP[-LOC, NUM='sg', SEM=<\P.P(john)>]
-    (PropN[-LOC, NUM='sg', SEM=<\P.P(john)>] John))
-  (VP[NUM='sg', SEM=<\y.exists x.(dog(x) & see(y,x))>]
-    (TV[NUM='sg', SEM=<\X y.X(\x.see(y,x))>, TNS='pres'] sees)
-    (NP[NUM='sg', SEM=<\Q.exists x.(dog(x) & Q(x))>]
-      (Det[NUM='sg', SEM=<\P Q.exists x.(P(x) & Q(x))>] a)
-      (Nom[NUM='sg', SEM=<\x.dog(x)>]
-        (N[NUM='sg', SEM=<\x.dog(x)>] dog)))))
-
-
+
>>> import nltk.data
+>>> from nltk.sem.util import parse_sents
+>>> val = nltk.data.load('grammars/sample_grammars/valuation1.val')
+>>> dom = val.domain
+>>> m = Model(dom, val)
+>>> g = Assignment(dom)
+>>> gramfile = 'grammars/sample_grammars/sem2.fcfg'
+>>> inputs = ['John sees a girl', 'every dog barks']
+>>> parses = parse_sents(inputs, gramfile)
+>>> for sent, trees in zip(inputs, parses):
+...     print()
+...     print("Sentence: %s" % sent)
+...     for tree in trees:
+...         print("Parse:\n %s" %tree)
+...         print("Semantics: %s" %  root_semrep(tree))
+
+Sentence: John sees a girl
+Parse:
+ (S[SEM=<exists x.(girl(x) & see(john,x))>]
+  (NP[-LOC, NUM='sg', SEM=<\P.P(john)>]
+    (PropN[-LOC, NUM='sg', SEM=<\P.P(john)>] John))
+  (VP[NUM='sg', SEM=<\y.exists x.(girl(x) & see(y,x))>]
+    (TV[NUM='sg', SEM=<\X y.X(\x.see(y,x))>, TNS='pres'] sees)
+    (NP[NUM='sg', SEM=<\Q.exists x.(girl(x) & Q(x))>]
+      (Det[NUM='sg', SEM=<\P Q.exists x.(P(x) & Q(x))>] a)
+      (Nom[NUM='sg', SEM=<\x.girl(x)>]
+        (N[NUM='sg', SEM=<\x.girl(x)>] girl)))))
+Semantics: exists x.(girl(x) & see(john,x))
+
+Sentence: every dog barks
+Parse:
+ (S[SEM=<all x.(dog(x) -> bark(x))>]
+  (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>]
+    (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every)
+    (Nom[NUM='sg', SEM=<\x.dog(x)>]
+      (N[NUM='sg', SEM=<\x.dog(x)>] dog)))
+  (VP[NUM='sg', SEM=<\x.bark(x)>]
+    (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks)))
+Semantics: all x.(dog(x) -> bark(x))
+
-
-

Cooper Storage

-
-
->>> from nltk.sem import cooper_storage as cs
->>> sentence = 'every girl chases a dog'
->>> trees = cs.parse_with_bindops(sentence, grammar='grammars/book_grammars/storage.fcfg')
->>> semrep = trees[0].label()['SEM']
->>> cs_semrep = cs.CooperStore(semrep)
->>> print(cs_semrep.core)
-chase(z2,z4)
->>> for bo in cs_semrep.store:
-...     print(bo)
-bo(\P.all x.(girl(x) -> P(x)),z2)
-bo(\P.exists x.(dog(x) & P(x)),z4)
->>> cs_semrep.s_retrieve(trace=True)
-Permutation 1
-   (\P.all x.(girl(x) -> P(x)))(\z2.chase(z2,z4))
-   (\P.exists x.(dog(x) & P(x)))(\z4.all x.(girl(x) -> chase(x,z4)))
-Permutation 2
-   (\P.exists x.(dog(x) & P(x)))(\z4.chase(z2,z4))
-   (\P.all x.(girl(x) -> P(x)))(\z2.exists x.(dog(x) & chase(z2,x)))
-
-
->>> for reading in cs_semrep.readings:
-...     print(reading)
-exists x.(dog(x) & all z3.(girl(z3) -> chase(z3,x)))
-all x.(girl(x) -> exists z4.(dog(z4) & chase(x,z4)))
-
-
+
>>> sent = "every dog barks"
+>>> result = nltk.sem.util.interpret_sents([sent], gramfile)[0]
+>>> for (syntree, semrep) in result:
+...     print(syntree)
+...     print()
+...     print(semrep)
+(S[SEM=<all x.(dog(x) -> bark(x))>]
+  (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>]
+    (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every)
+    (Nom[NUM='sg', SEM=<\x.dog(x)>]
+      (N[NUM='sg', SEM=<\x.dog(x)>] dog)))
+  (VP[NUM='sg', SEM=<\x.bark(x)>]
+    (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks)))
+
+all x.(dog(x) -> bark(x))
+
+
>>> result = nltk.sem.util.evaluate_sents([sent], gramfile, m, g)[0]
+>>> for (syntree, semrel, value) in result:
+...     print(syntree)
+...     print()
+...     print(semrep)
+...     print()
+...     print(value)
+(S[SEM=<all x.(dog(x) -> bark(x))>]
+  (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>]
+    (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every)
+    (Nom[NUM='sg', SEM=<\x.dog(x)>]
+      (N[NUM='sg', SEM=<\x.dog(x)>] dog)))
+  (VP[NUM='sg', SEM=<\x.bark(x)>]
+    (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks)))
+
+all x.(dog(x) -> bark(x))
+
+True
+
+
>>> sents = ['Mary walks', 'John sees a dog']
+>>> results = nltk.sem.util.interpret_sents(sents, 'grammars/sample_grammars/sem2.fcfg')
+>>> for result in results:
+...     for (synrep, semrep) in result:
+...         print(synrep)
+(S[SEM=<walk(mary)>]
+  (NP[-LOC, NUM='sg', SEM=<\P.P(mary)>]
+    (PropN[-LOC, NUM='sg', SEM=<\P.P(mary)>] Mary))
+  (VP[NUM='sg', SEM=<\x.walk(x)>]
+    (IV[NUM='sg', SEM=<\x.walk(x)>, TNS='pres'] walks)))
+(S[SEM=<exists x.(dog(x) & see(john,x))>]
+  (NP[-LOC, NUM='sg', SEM=<\P.P(john)>]
+    (PropN[-LOC, NUM='sg', SEM=<\P.P(john)>] John))
+  (VP[NUM='sg', SEM=<\y.exists x.(dog(x) & see(y,x))>]
+    (TV[NUM='sg', SEM=<\X y.X(\x.see(y,x))>, TNS='pres'] sees)
+    (NP[NUM='sg', SEM=<\Q.exists x.(dog(x) & Q(x))>]
+      (Det[NUM='sg', SEM=<\P Q.exists x.(P(x) & Q(x))>] a)
+      (Nom[NUM='sg', SEM=<\x.dog(x)>]
+        (N[NUM='sg', SEM=<\x.dog(x)>] dog)))))
+
+
+
+
+

Cooper Storage

+
>>> from nltk.sem import cooper_storage as cs
+>>> sentence = 'every girl chases a dog'
+>>> trees = cs.parse_with_bindops(sentence, grammar='grammars/book_grammars/storage.fcfg')
+>>> semrep = trees[0].label()['SEM']
+>>> cs_semrep = cs.CooperStore(semrep)
+>>> print(cs_semrep.core)
+chase(z2,z4)
+>>> for bo in cs_semrep.store:
+...     print(bo)
+bo(\P.all x.(girl(x) -> P(x)),z2)
+bo(\P.exists x.(dog(x) & P(x)),z4)
+>>> cs_semrep.s_retrieve(trace=True)
+Permutation 1
+   (\P.all x.(girl(x) -> P(x)))(\z2.chase(z2,z4))
+   (\P.exists x.(dog(x) & P(x)))(\z4.all x.(girl(x) -> chase(x,z4)))
+Permutation 2
+   (\P.exists x.(dog(x) & P(x)))(\z4.chase(z2,z4))
+   (\P.all x.(girl(x) -> P(x)))(\z2.exists x.(dog(x) & chase(z2,x)))
+
+
+
>>> for reading in cs_semrep.readings:
+...     print(reading)
+exists x.(dog(x) & all z3.(girl(z3) -> chase(z3,x)))
+all x.(girl(x) -> exists z4.(dog(z4) & chase(x,z4)))
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/sentiment.html b/howto/sentiment.html index 75b62215c..34ff0a710 100644 --- a/howto/sentiment.html +++ b/howto/sentiment.html @@ -1,594 +1,386 @@ - - - + - - -Sentiment Analysis - + + + + + + + NLTK :: Sample usage for sentiment + + + + + + + + + + + + + + -
-

Sentiment Analysis

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> from nltk.classify import NaiveBayesClassifier
->>> from nltk.corpus import subjectivity
->>> from nltk.sentiment import SentimentAnalyzer
->>> from nltk.sentiment.util import *
-
-
->>> n_instances = 100
->>> subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
->>> obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]
->>> len(subj_docs), len(obj_docs)
-(100, 100)
-
-
+ + + +
+ +
+
+ +
+

Sample usage for sentiment

+
+

Sentiment Analysis

+
>>> from nltk.classify import NaiveBayesClassifier
+>>> from nltk.corpus import subjectivity
+>>> from nltk.sentiment import SentimentAnalyzer
+>>> from nltk.sentiment.util import *
+
+
+
>>> n_instances = 100
+>>> subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
+>>> obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]
+>>> len(subj_docs), len(obj_docs)
+(100, 100)
+
+

Each document is represented by a tuple (sentence, label). The sentence is tokenized, so it is represented by a list of strings:

-
-
->>> subj_docs[0]
-(['smart', 'and', 'alert', ',', 'thirteen', 'conversations', 'about', 'one',
-'thing', 'is', 'a', 'small', 'gem', '.'], 'subj')
-
-
+
>>> subj_docs[0]
+(['smart', 'and', 'alert', ',', 'thirteen', 'conversations', 'about', 'one',
+'thing', 'is', 'a', 'small', 'gem', '.'], 'subj')
+
+

We separately split subjective and objective instances to keep a balanced uniform class distribution in both train and test sets.

-
-
->>> train_subj_docs = subj_docs[:80]
->>> test_subj_docs = subj_docs[80:100]
->>> train_obj_docs = obj_docs[:80]
->>> test_obj_docs = obj_docs[80:100]
->>> training_docs = train_subj_docs+train_obj_docs
->>> testing_docs = test_subj_docs+test_obj_docs
-
-
->>> sentim_analyzer = SentimentAnalyzer()
->>> all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])
-
-
+
>>> train_subj_docs = subj_docs[:80]
+>>> test_subj_docs = subj_docs[80:100]
+>>> train_obj_docs = obj_docs[:80]
+>>> test_obj_docs = obj_docs[80:100]
+>>> training_docs = train_subj_docs+train_obj_docs
+>>> testing_docs = test_subj_docs+test_obj_docs
+
+
+
>>> sentim_analyzer = SentimentAnalyzer()
+>>> all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])
+
+

We use simple unigram word features, handling negation:

-
-
->>> unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
->>> len(unigram_feats)
-83
->>> sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
-
-
+
>>> unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
+>>> len(unigram_feats)
+83
+>>> sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
+
+

We apply features to obtain a feature-value representation of our datasets:

-
-
->>> training_set = sentim_analyzer.apply_features(training_docs)
->>> test_set = sentim_analyzer.apply_features(testing_docs)
-
-
+
>>> training_set = sentim_analyzer.apply_features(training_docs)
+>>> test_set = sentim_analyzer.apply_features(testing_docs)
+
+

We can now train our classifier on the training set, and subsequently output the evaluation results:

-
-
->>> trainer = NaiveBayesClassifier.train
->>> classifier = sentim_analyzer.train(trainer, training_set)
-Training classifier
->>> for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
-...     print('{0}: {1}'.format(key, value))
-Evaluating NaiveBayesClassifier results...
-Accuracy: 0.8
-F-measure [obj]: 0.8
-F-measure [subj]: 0.8
-Precision [obj]: 0.8
-Precision [subj]: 0.8
-Recall [obj]: 0.8
-Recall [subj]: 0.8
-
-
-
-

Vader

-
-
->>> from nltk.sentiment.vader import SentimentIntensityAnalyzer
->>> sentences = ["VADER is smart, handsome, and funny.", # positive sentence example
-...    "VADER is smart, handsome, and funny!", # punctuation emphasis handled correctly (sentiment intensity adjusted)
-...    "VADER is very smart, handsome, and funny.",  # booster words handled correctly (sentiment intensity adjusted)
-...    "VADER is VERY SMART, handsome, and FUNNY.",  # emphasis for ALLCAPS handled
-...    "VADER is VERY SMART, handsome, and FUNNY!!!",# combination of signals - VADER appropriately adjusts intensity
-...    "VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!!",# booster words & punctuation make this close to ceiling for score
-...    "The book was good.",         # positive sentence
-...    "The book was kind of good.", # qualified positive sentence is handled correctly (intensity adjusted)
-...    "The plot was good, but the characters are uncompelling and the dialog is not great.", # mixed negation sentence
-...    "A really bad, horrible book.",       # negative sentence with booster words
-...    "At least it isn't a horrible book.", # negated negative sentence with contraction
-...    ":) and :D",     # emoticons handled
-...    "",              # an empty string is correctly handled
-...    "Today sux",     #  negative slang handled
-...    "Today sux!",    #  negative slang with punctuation emphasis handled
-...    "Today SUX!",    #  negative slang with capitalization emphasis
-...    "Today kinda sux! But I'll get by, lol" # mixed sentiment example with slang and constrastive conjunction "but"
-... ]
->>> paragraph = "It was one of the worst movies I've seen, despite good reviews. \
-... Unbelievably bad acting!! Poor direction. VERY poor production. \
-... The movie was bad. Very bad movie. VERY bad movie. VERY BAD movie. VERY BAD movie!"
-
-
->>> from nltk import tokenize
->>> lines_list = tokenize.sent_tokenize(paragraph)
->>> sentences.extend(lines_list)
-
-
->>> tricky_sentences = [
-...    "Most automated sentiment analysis tools are shit.",
-...    "VADER sentiment analysis is the shit.",
-...    "Sentiment analysis has never been good.",
-...    "Sentiment analysis with VADER has never been this good.",
-...    "Warren Beatty has never been so entertaining.",
-...    "I won't say that the movie is astounding and I wouldn't claim that \
-...    the movie is too banal either.",
-...    "I like to hate Michael Bay films, but I couldn't fault this one",
-...    "It's one thing to watch an Uwe Boll film, but another thing entirely \
-...    to pay for it",
-...    "The movie was too good",
-...    "This movie was actually neither that funny, nor super witty.",
-...    "This movie doesn't care about cleverness, wit or any other kind of \
-...    intelligent humor.",
-...    "Those who find ugly meanings in beautiful things are corrupt without \
-...    being charming.",
-...    "There are slow and repetitive parts, BUT it has just enough spice to \
-...    keep it interesting.",
-...    "The script is not fantastic, but the acting is decent and the cinematography \
-...    is EXCELLENT!",
-...    "Roger Dodger is one of the most compelling variations on this theme.",
-...    "Roger Dodger is one of the least compelling variations on this theme.",
-...    "Roger Dodger is at least compelling as a variation on the theme.",
-...    "they fall in love with the product",
-...    "but then it breaks",
-...    "usually around the time the 90 day warranty expires",
-...    "the twin towers collapsed today",
-...    "However, Mr. Carter solemnly argues, his client carried out the kidnapping \
-...    under orders and in the ''least offensive way possible.''"
-... ]
->>> sentences.extend(tricky_sentences)
->>> sid = SentimentIntensityAnalyzer()
->>> for sentence in sentences:
-...     print(sentence)
-...     ss = sid.polarity_scores(sentence)
-...     for k in sorted(ss):
-...         print('{0}: {1}, '.format(k, ss[k]), end='')
-...     print()
-VADER is smart, handsome, and funny.
-compound: 0.8316, neg: 0.0, neu: 0.254, pos: 0.746,
-VADER is smart, handsome, and funny!
-compound: 0.8439, neg: 0.0, neu: 0.248, pos: 0.752,
-VADER is very smart, handsome, and funny.
-compound: 0.8545, neg: 0.0, neu: 0.299, pos: 0.701,
-VADER is VERY SMART, handsome, and FUNNY.
-compound: 0.9227, neg: 0.0, neu: 0.246, pos: 0.754,
-VADER is VERY SMART, handsome, and FUNNY!!!
-compound: 0.9342, neg: 0.0, neu: 0.233, pos: 0.767,
-VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!!
-compound: 0.9469, neg: 0.0, neu: 0.294, pos: 0.706,
-The book was good.
-compound: 0.4404, neg: 0.0, neu: 0.508, pos: 0.492,
-The book was kind of good.
-compound: 0.3832, neg: 0.0, neu: 0.657, pos: 0.343,
-The plot was good, but the characters are uncompelling and the dialog is not great.
-compound: -0.7042, neg: 0.327, neu: 0.579, pos: 0.094,
-A really bad, horrible book.
-compound: -0.8211, neg: 0.791, neu: 0.209, pos: 0.0,
-At least it isn't a horrible book.
-compound: 0.431, neg: 0.0, neu: 0.637, pos: 0.363,
-:) and :D
-compound: 0.7925, neg: 0.0, neu: 0.124, pos: 0.876,
-<BLANKLINE>
-compound: 0.0, neg: 0.0, neu: 0.0, pos: 0.0,
-Today sux
-compound: -0.3612, neg: 0.714, neu: 0.286, pos: 0.0,
-Today sux!
-compound: -0.4199, neg: 0.736, neu: 0.264, pos: 0.0,
-Today SUX!
-compound: -0.5461, neg: 0.779, neu: 0.221, pos: 0.0,
-Today kinda sux! But I'll get by, lol
-compound: 0.2228, neg: 0.195, neu: 0.531, pos: 0.274,
-It was one of the worst movies I've seen, despite good reviews.
-compound: -0.7584, neg: 0.394, neu: 0.606, pos: 0.0,
-Unbelievably bad acting!!
-compound: -0.6572, neg: 0.686, neu: 0.314, pos: 0.0,
-Poor direction.
-compound: -0.4767, neg: 0.756, neu: 0.244, pos: 0.0,
-VERY poor production.
-compound: -0.6281, neg: 0.674, neu: 0.326, pos: 0.0,
-The movie was bad.
-compound: -0.5423, neg: 0.538, neu: 0.462, pos: 0.0,
-Very bad movie.
-compound: -0.5849, neg: 0.655, neu: 0.345, pos: 0.0,
-VERY bad movie.
-compound: -0.6732, neg: 0.694, neu: 0.306, pos: 0.0,
-VERY BAD movie.
-compound: -0.7398, neg: 0.724, neu: 0.276, pos: 0.0,
-VERY BAD movie!
-compound: -0.7616, neg: 0.735, neu: 0.265, pos: 0.0,
-Most automated sentiment analysis tools are shit.
-compound: -0.5574, neg: 0.375, neu: 0.625, pos: 0.0,
-VADER sentiment analysis is the shit.
-compound: 0.6124, neg: 0.0, neu: 0.556, pos: 0.444,
-Sentiment analysis has never been good.
-compound: -0.3412, neg: 0.325, neu: 0.675, pos: 0.0,
-Sentiment analysis with VADER has never been this good.
-compound: 0.5228, neg: 0.0, neu: 0.703, pos: 0.297,
-Warren Beatty has never been so entertaining.
-compound: 0.5777, neg: 0.0, neu: 0.616, pos: 0.384,
-I won't say that the movie is astounding and I wouldn't claim that the movie is too banal either.
-compound: 0.4215, neg: 0.0, neu: 0.851, pos: 0.149,
-I like to hate Michael Bay films, but I couldn't fault this one
-compound: 0.3153, neg: 0.157, neu: 0.534, pos: 0.309,
-It's one thing to watch an Uwe Boll film, but another thing entirely to pay for it
-compound: -0.2541, neg: 0.112, neu: 0.888, pos: 0.0,
-The movie was too good
-compound: 0.4404, neg: 0.0, neu: 0.58, pos: 0.42,
-This movie was actually neither that funny, nor super witty.
-compound: -0.6759, neg: 0.41, neu: 0.59, pos: 0.0,
-This movie doesn't care about cleverness, wit or any other kind of intelligent humor.
-compound: -0.1338, neg: 0.265, neu: 0.497, pos: 0.239,
-Those who find ugly meanings in beautiful things are corrupt without being charming.
-compound: -0.3553, neg: 0.314, neu: 0.493, pos: 0.192,
-There are slow and repetitive parts, BUT it has just enough spice to keep it interesting.
-compound: 0.4678, neg: 0.079, neu: 0.735, pos: 0.186,
-The script is not fantastic, but the acting is decent and the cinematography is EXCELLENT!
-compound: 0.7565, neg: 0.092, neu: 0.607, pos: 0.301,
-Roger Dodger is one of the most compelling variations on this theme.
-compound: 0.2944, neg: 0.0, neu: 0.834, pos: 0.166,
-Roger Dodger is one of the least compelling variations on this theme.
-compound: -0.1695, neg: 0.132, neu: 0.868, pos: 0.0,
-Roger Dodger is at least compelling as a variation on the theme.
-compound: 0.2263, neg: 0.0, neu: 0.84, pos: 0.16,
-they fall in love with the product
-compound: 0.6369, neg: 0.0, neu: 0.588, pos: 0.412,
-but then it breaks
-compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0,
-usually around the time the 90 day warranty expires
-compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0,
-the twin towers collapsed today
-compound: -0.2732, neg: 0.344, neu: 0.656, pos: 0.0,
-However, Mr. Carter solemnly argues, his client carried out the kidnapping under orders and in the ''least offensive way possible.''
-compound: -0.5859, neg: 0.23, neu: 0.697, pos: 0.074,
-
-
+
>>> trainer = NaiveBayesClassifier.train
+>>> classifier = sentim_analyzer.train(trainer, training_set)
+Training classifier
+>>> for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
+...     print('{0}: {1}'.format(key, value))
+Evaluating NaiveBayesClassifier results...
+Accuracy: 0.8
+F-measure [obj]: 0.8
+F-measure [subj]: 0.8
+Precision [obj]: 0.8
+Precision [subj]: 0.8
+Recall [obj]: 0.8
+Recall [subj]: 0.8
+
+
+

Vader

+
>>> from nltk.sentiment.vader import SentimentIntensityAnalyzer
+>>> sentences = ["VADER is smart, handsome, and funny.", # positive sentence example
+...    "VADER is smart, handsome, and funny!", # punctuation emphasis handled correctly (sentiment intensity adjusted)
+...    "VADER is very smart, handsome, and funny.",  # booster words handled correctly (sentiment intensity adjusted)
+...    "VADER is VERY SMART, handsome, and FUNNY.",  # emphasis for ALLCAPS handled
+...    "VADER is VERY SMART, handsome, and FUNNY!!!",# combination of signals - VADER appropriately adjusts intensity
+...    "VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!!",# booster words & punctuation make this close to ceiling for score
+...    "The book was good.",         # positive sentence
+...    "The book was kind of good.", # qualified positive sentence is handled correctly (intensity adjusted)
+...    "The plot was good, but the characters are uncompelling and the dialog is not great.", # mixed negation sentence
+...    "A really bad, horrible book.",       # negative sentence with booster words
+...    "At least it isn't a horrible book.", # negated negative sentence with contraction
+...    ":) and :D",     # emoticons handled
+...    "",              # an empty string is correctly handled
+...    "Today sux",     #  negative slang handled
+...    "Today sux!",    #  negative slang with punctuation emphasis handled
+...    "Today SUX!",    #  negative slang with capitalization emphasis
+...    "Today kinda sux! But I'll get by, lol" # mixed sentiment example with slang and constrastive conjunction "but"
+... ]
+>>> paragraph = "It was one of the worst movies I've seen, despite good reviews. \
+... Unbelievably bad acting!! Poor direction. VERY poor production. \
+... The movie was bad. Very bad movie. VERY bad movie. VERY BAD movie. VERY BAD movie!"
+
+
>>> from nltk import tokenize
+>>> lines_list = tokenize.sent_tokenize(paragraph)
+>>> sentences.extend(lines_list)
+
+
+
>>> tricky_sentences = [
+...    "Most automated sentiment analysis tools are shit.",
+...    "VADER sentiment analysis is the shit.",
+...    "Sentiment analysis has never been good.",
+...    "Sentiment analysis with VADER has never been this good.",
+...    "Warren Beatty has never been so entertaining.",
+...    "I won't say that the movie is astounding and I wouldn't claim that \
+...    the movie is too banal either.",
+...    "I like to hate Michael Bay films, but I couldn't fault this one",
+...    "I like to hate Michael Bay films, BUT I couldn't help but fault this one",
+...    "It's one thing to watch an Uwe Boll film, but another thing entirely \
+...    to pay for it",
+...    "The movie was too good",
+...    "This movie was actually neither that funny, nor super witty.",
+...    "This movie doesn't care about cleverness, wit or any other kind of \
+...    intelligent humor.",
+...    "Those who find ugly meanings in beautiful things are corrupt without \
+...    being charming.",
+...    "There are slow and repetitive parts, BUT it has just enough spice to \
+...    keep it interesting.",
+...    "The script is not fantastic, but the acting is decent and the cinematography \
+...    is EXCELLENT!",
+...    "Roger Dodger is one of the most compelling variations on this theme.",
+...    "Roger Dodger is one of the least compelling variations on this theme.",
+...    "Roger Dodger is at least compelling as a variation on the theme.",
+...    "they fall in love with the product",
+...    "but then it breaks",
+...    "usually around the time the 90 day warranty expires",
+...    "the twin towers collapsed today",
+...    "However, Mr. Carter solemnly argues, his client carried out the kidnapping \
+...    under orders and in the ''least offensive way possible.''"
+... ]
+>>> sentences.extend(tricky_sentences)
+>>> for sentence in sentences:
+...     sid = SentimentIntensityAnalyzer()
+...     print(sentence)
+...     ss = sid.polarity_scores(sentence)
+...     for k in sorted(ss):
+...         print('{0}: {1}, '.format(k, ss[k]), end='')
+...     print()
+VADER is smart, handsome, and funny.
+compound: 0.8316, neg: 0.0, neu: 0.254, pos: 0.746,
+VADER is smart, handsome, and funny!
+compound: 0.8439, neg: 0.0, neu: 0.248, pos: 0.752,
+VADER is very smart, handsome, and funny.
+compound: 0.8545, neg: 0.0, neu: 0.299, pos: 0.701,
+VADER is VERY SMART, handsome, and FUNNY.
+compound: 0.9227, neg: 0.0, neu: 0.246, pos: 0.754,
+VADER is VERY SMART, handsome, and FUNNY!!!
+compound: 0.9342, neg: 0.0, neu: 0.233, pos: 0.767,
+VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!!
+compound: 0.9469, neg: 0.0, neu: 0.294, pos: 0.706,
+The book was good.
+compound: 0.4404, neg: 0.0, neu: 0.508, pos: 0.492,
+The book was kind of good.
+compound: 0.3832, neg: 0.0, neu: 0.657, pos: 0.343,
+The plot was good, but the characters are uncompelling and the dialog is not great.
+compound: -0.7042, neg: 0.327, neu: 0.579, pos: 0.094,
+A really bad, horrible book.
+compound: -0.8211, neg: 0.791, neu: 0.209, pos: 0.0,
+At least it isn't a horrible book.
+compound: 0.431, neg: 0.0, neu: 0.637, pos: 0.363,
+:) and :D
+compound: 0.7925, neg: 0.0, neu: 0.124, pos: 0.876,
+
+compound: 0.0, neg: 0.0, neu: 0.0, pos: 0.0,
+Today sux
+compound: -0.3612, neg: 0.714, neu: 0.286, pos: 0.0,
+Today sux!
+compound: -0.4199, neg: 0.736, neu: 0.264, pos: 0.0,
+Today SUX!
+compound: -0.5461, neg: 0.779, neu: 0.221, pos: 0.0,
+Today kinda sux! But I'll get by, lol
+compound: 0.5249, neg: 0.138, neu: 0.517, pos: 0.344,
+It was one of the worst movies I've seen, despite good reviews.
+compound: -0.7584, neg: 0.394, neu: 0.606, pos: 0.0,
+Unbelievably bad acting!!
+compound: -0.6572, neg: 0.686, neu: 0.314, pos: 0.0,
+Poor direction.
+compound: -0.4767, neg: 0.756, neu: 0.244, pos: 0.0,
+VERY poor production.
+compound: -0.6281, neg: 0.674, neu: 0.326, pos: 0.0,
+The movie was bad.
+compound: -0.5423, neg: 0.538, neu: 0.462, pos: 0.0,
+Very bad movie.
+compound: -0.5849, neg: 0.655, neu: 0.345, pos: 0.0,
+VERY bad movie.
+compound: -0.6732, neg: 0.694, neu: 0.306, pos: 0.0,
+VERY BAD movie.
+compound: -0.7398, neg: 0.724, neu: 0.276, pos: 0.0,
+VERY BAD movie!
+compound: -0.7616, neg: 0.735, neu: 0.265, pos: 0.0,
+Most automated sentiment analysis tools are shit.
+compound: -0.5574, neg: 0.375, neu: 0.625, pos: 0.0,
+VADER sentiment analysis is the shit.
+compound: 0.6124, neg: 0.0, neu: 0.556, pos: 0.444,
+Sentiment analysis has never been good.
+compound: -0.3412, neg: 0.325, neu: 0.675, pos: 0.0,
+Sentiment analysis with VADER has never been this good.
+compound: 0.5228, neg: 0.0, neu: 0.703, pos: 0.297,
+Warren Beatty has never been so entertaining.
+compound: 0.5777, neg: 0.0, neu: 0.616, pos: 0.384,
+I won't say that the movie is astounding and I wouldn't claim that the movie is too banal either.
+compound: 0.4215, neg: 0.0, neu: 0.851, pos: 0.149,
+I like to hate Michael Bay films, but I couldn't fault this one
+compound: 0.3153, neg: 0.157, neu: 0.534, pos: 0.309,
+I like to hate Michael Bay films, BUT I couldn't help but fault this one
+compound: -0.1531, neg: 0.277, neu: 0.477, pos: 0.246,
+It's one thing to watch an Uwe Boll film, but another thing entirely to pay for it
+compound: -0.2541, neg: 0.112, neu: 0.888, pos: 0.0,
+The movie was too good
+compound: 0.4404, neg: 0.0, neu: 0.58, pos: 0.42,
+This movie was actually neither that funny, nor super witty.
+compound: -0.6759, neg: 0.41, neu: 0.59, pos: 0.0,
+This movie doesn't care about cleverness, wit or any other kind of intelligent humor.
+compound: -0.1338, neg: 0.265, neu: 0.497, pos: 0.239,
+Those who find ugly meanings in beautiful things are corrupt without being charming.
+compound: -0.3553, neg: 0.314, neu: 0.493, pos: 0.192,
+There are slow and repetitive parts, BUT it has just enough spice to keep it interesting.
+compound: 0.4678, neg: 0.079, neu: 0.735, pos: 0.186,
+The script is not fantastic, but the acting is decent and the cinematography is EXCELLENT!
+compound: 0.7565, neg: 0.092, neu: 0.607, pos: 0.301,
+Roger Dodger is one of the most compelling variations on this theme.
+compound: 0.2944, neg: 0.0, neu: 0.834, pos: 0.166,
+Roger Dodger is one of the least compelling variations on this theme.
+compound: -0.1695, neg: 0.132, neu: 0.868, pos: 0.0,
+Roger Dodger is at least compelling as a variation on the theme.
+compound: 0.2263, neg: 0.0, neu: 0.84, pos: 0.16,
+they fall in love with the product
+compound: 0.6369, neg: 0.0, neu: 0.588, pos: 0.412,
+but then it breaks
+compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0,
+usually around the time the 90 day warranty expires
+compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0,
+the twin towers collapsed today
+compound: -0.2732, neg: 0.344, neu: 0.656, pos: 0.0,
+However, Mr. Carter solemnly argues, his client carried out the kidnapping under orders and in the ''least offensive way possible.''
+compound: -0.5859, neg: 0.23, neu: 0.697, pos: 0.074,
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/sentiwordnet.html b/howto/sentiwordnet.html index 07668e0df..6c73c02aa 100644 --- a/howto/sentiwordnet.html +++ b/howto/sentiwordnet.html @@ -1,388 +1,187 @@ - - - + - - -SentiWordNet Interface - + + + + + + + NLTK :: Sample usage for sentiwordnet + + + + + + + + + + + + + + -
-

SentiWordNet Interface

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - + + + +
+ +
+
+ +
+

Sample usage for sentiwordnet

+
+

SentiWordNet Interface

SentiWordNet can be imported like this:

-
-
->>> from nltk.corpus import sentiwordnet as swn
-
-
-
-

SentiSynsets

-
-
->>> breakdown = swn.senti_synset('breakdown.n.03')
->>> print(breakdown)
-<breakdown.n.03: PosScore=0.0 NegScore=0.25>
->>> breakdown.pos_score()
-0.0
->>> breakdown.neg_score()
-0.25
->>> breakdown.obj_score()
-0.75
-
-
+
>>> from nltk.corpus import sentiwordnet as swn
+
-
-

Lookup

-
-
->>> list(swn.senti_synsets('slow')) # doctest: +NORMALIZE_WHITESPACE
-[SentiSynset('decelerate.v.01'), SentiSynset('slow.v.02'),
-SentiSynset('slow.v.03'), SentiSynset('slow.a.01'),
-SentiSynset('slow.a.02'), SentiSynset('slow.a.04'),
-SentiSynset('slowly.r.01'), SentiSynset('behind.r.03')]
-
-
->>> happy = swn.senti_synsets('happy', 'a')
-
-
->>> all = swn.all_senti_synsets()
-
-
+
+

SentiSynsets

+
>>> breakdown = swn.senti_synset('breakdown.n.03')
+>>> print(breakdown)
+<breakdown.n.03: PosScore=0.0 NegScore=0.25>
+>>> breakdown.pos_score()
+0.0
+>>> breakdown.neg_score()
+0.25
+>>> breakdown.obj_score()
+0.75
+
+
+
+

Lookup

+
>>> list(swn.senti_synsets('slow'))
+[SentiSynset('decelerate.v.01'), SentiSynset('slow.v.02'),
+SentiSynset('slow.v.03'), SentiSynset('slow.a.01'),
+SentiSynset('slow.a.02'), SentiSynset('dense.s.04'),
+SentiSynset('slow.a.04'), SentiSynset('boring.s.01'),
+SentiSynset('dull.s.08'), SentiSynset('slowly.r.01'),
+SentiSynset('behind.r.03')]
+
+
>>> happy = swn.senti_synsets('happy', 'a')
+
+
+
>>> all = swn.all_senti_synsets()
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/simple.html b/howto/simple.html index e78260fa0..ef0816135 100644 --- a/howto/simple.html +++ b/howto/simple.html @@ -1,436 +1,226 @@ - - - + - - -EasyInstall Tests - + + + + + + + NLTK :: Sample usage for simple + + + + + + + + + + + + + + -
-

EasyInstall Tests

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - + + + +
+ +
+
+ +
+

Sample usage for simple

+
+

EasyInstall Tests

This file contains some simple tests that will be run by EasyInstall in order to test the installation when NLTK-Data is absent.

-
-
->>> from __future__ import print_function
-
-
-
-

Tokenization

-
-
->>> from nltk.tokenize import wordpunct_tokenize
->>> s = ("Good muffins cost $3.88\nin New York.  Please buy me\n"
-...      "two of them.\n\nThanks.")
->>> wordpunct_tokenize(s) # doctest: +NORMALIZE_WHITESPACE
-['Good', 'muffins', 'cost', '$', '3', '.', '88', 'in', 'New', 'York', '.',
-'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.']
-
-
+
+

Tokenization

+
>>> from nltk.tokenize import wordpunct_tokenize
+>>> s = ("Good muffins cost $3.88\nin New York.  Please buy me\n"
+...      "two of them.\n\nThanks.")
+>>> wordpunct_tokenize(s)
+['Good', 'muffins', 'cost', '$', '3', '.', '88', 'in', 'New', 'York', '.',
+'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.']
+
-
-

Metrics

-
-
->>> from nltk.metrics import precision, recall, f_measure
->>> reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
->>> test    = 'DET VB VB DET NN NN NN IN DET NN'.split()
->>> reference_set = set(reference)
->>> test_set = set(test)
->>> precision(reference_set, test_set)
-1.0
->>> print(recall(reference_set, test_set))
-0.8
->>> print(f_measure(reference_set, test_set))
-0.88888888888...
-
-
+
+
+

Metrics

+
>>> from nltk.metrics import precision, recall, f_measure
+>>> reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
+>>> test    = 'DET VB VB DET NN NN NN IN DET NN'.split()
+>>> reference_set = set(reference)
+>>> test_set = set(test)
+>>> precision(reference_set, test_set)
+1.0
+>>> print(recall(reference_set, test_set))
+0.8
+>>> print(f_measure(reference_set, test_set))
+0.88888888888...
+
-
-

Feature Structures

-
-
->>> from nltk import FeatStruct
->>> fs1 = FeatStruct(PER=3, NUM='pl', GND='fem')
->>> fs2 = FeatStruct(POS='N', AGR=fs1)
->>> print(fs2)
-[       [ GND = 'fem' ] ]
-[ AGR = [ NUM = 'pl'  ] ]
-[       [ PER = 3     ] ]
-[                       ]
-[ POS = 'N'             ]
->>> print(fs2['AGR'])
-[ GND = 'fem' ]
-[ NUM = 'pl'  ]
-[ PER = 3     ]
->>> print(fs2['AGR']['PER'])
-3
-
-
+
+
+

Feature Structures

+
>>> from nltk import FeatStruct
+>>> fs1 = FeatStruct(PER=3, NUM='pl', GND='fem')
+>>> fs2 = FeatStruct(POS='N', AGR=fs1)
+>>> print(fs2)
+[       [ GND = 'fem' ] ]
+[ AGR = [ NUM = 'pl'  ] ]
+[       [ PER = 3     ] ]
+[                       ]
+[ POS = 'N'             ]
+>>> print(fs2['AGR'])
+[ GND = 'fem' ]
+[ NUM = 'pl'  ]
+[ PER = 3     ]
+>>> print(fs2['AGR']['PER'])
+3
+
-
-

Parsing

-
-
->>> from nltk.parse.recursivedescent import RecursiveDescentParser
->>> from nltk.grammar import CFG
->>> grammar = CFG.fromstring("""
-... S -> NP VP
-... PP -> P NP
-... NP -> 'the' N | N PP | 'the' N PP
-... VP -> V NP | V PP | V NP PP
-... N -> 'cat' | 'dog' | 'rug'
-... V -> 'chased'
-... P -> 'on'
-... """)
->>> rd = RecursiveDescentParser(grammar)
->>> sent = 'the cat chased the dog on the rug'.split()
->>> for t in rd.parse(sent):
-...     print(t)
-(S
-  (NP the (N cat))
-  (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
-(S
-  (NP the (N cat))
-  (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))
-
-
+
+
+

Parsing

+
>>> from nltk.parse.recursivedescent import RecursiveDescentParser
+>>> from nltk.grammar import CFG
+>>> grammar = CFG.fromstring("""
+... S -> NP VP
+... PP -> P NP
+... NP -> 'the' N | N PP | 'the' N PP
+... VP -> V NP | V PP | V NP PP
+... N -> 'cat' | 'dog' | 'rug'
+... V -> 'chased'
+... P -> 'on'
+... """)
+>>> rd = RecursiveDescentParser(grammar)
+>>> sent = 'the cat chased the dog on the rug'.split()
+>>> for t in rd.parse(sent):
+...     print(t)
+(S
+  (NP the (N cat))
+  (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
+(S
+  (NP the (N cat))
+  (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))
+
+
+
+
+ + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/stem.html b/howto/stem.html index 7ca2e8634..51862d4f7 100644 --- a/howto/stem.html +++ b/howto/stem.html @@ -1,443 +1,255 @@ - - - + - - -Stemmers - + + + + + + + NLTK :: Sample usage for stem + + + + + + + + + + + + + + -
-

Stemmers

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-

Overview

+ + + +
+ +
+
+ +
+

Sample usage for stem

+
+

Stemmers

+
+

Overview

Stemmers remove morphological affixes from words, leaving only the word stem.

-
-
->>> from __future__ import print_function
->>> from nltk.stem import *
-
-
+
>>> from nltk.stem import *
+
+
+
+
+

Unit tests for the Porter stemmer

+
>>> from nltk.stem.porter import *
+
-
-

Unit tests for the Porter stemmer

-
-
->>> from nltk.stem.porter import *
-
-

Create a new Porter stemmer.

-
-
->>> stemmer = PorterStemmer()
-
-
+
>>> stemmer = PorterStemmer()
+
+

Test the stemmer on various pluralised words.

-
-
->>> plurals = ['caresses', 'flies', 'dies', 'mules', 'denied',
-...            'died', 'agreed', 'owned', 'humbled', 'sized',
-...            'meeting', 'stating', 'siezing', 'itemization',
-...            'sensational', 'traditional', 'reference', 'colonizer',
-...            'plotted']
-
-
->>> singles = [stemmer.stem(plural) for plural in plurals]
-
-
->>> print(' '.join(singles))  # doctest: +NORMALIZE_WHITESPACE
-caress fli die mule deni die agre own humbl size meet
-state siez item sensat tradit refer colon plot
-
-
+
>>> plurals = ['caresses', 'flies', 'dies', 'mules', 'denied',
+...            'died', 'agreed', 'owned', 'humbled', 'sized',
+...            'meeting', 'stating', 'siezing', 'itemization',
+...            'sensational', 'traditional', 'reference', 'colonizer',
+...            'plotted']
+
+
+
>>> singles = [stemmer.stem(plural) for plural in plurals]
+
+
+
>>> print(' '.join(singles))
+caress fli die mule deni die agre own humbl size meet
+state siez item sensat tradit refer colon plot
+
+
+
+
+

Unit tests for Snowball stemmer

+
>>> from nltk.stem.snowball import SnowballStemmer
+
-
-

Unit tests for Snowball stemmer

-
-
->>> from nltk.stem.snowball import SnowballStemmer
-
-

See which languages are supported.

-
-
->>> print(" ".join(SnowballStemmer.languages))
-danish dutch english finnish french german hungarian italian
-norwegian porter portuguese romanian russian spanish swedish
-
-
+
>>> print(" ".join(SnowballStemmer.languages))
+arabic danish dutch english finnish french german hungarian italian
+norwegian porter portuguese romanian russian spanish swedish
+
+

Create a new instance of a language specific subclass.

-
-
->>> stemmer = SnowballStemmer("english")
-
-
+
>>> stemmer = SnowballStemmer("english")
+
+

Stem a word.

-
-
->>> print(stemmer.stem("running"))
-run
-
-
+
>>> print(stemmer.stem("running"))
+run
+
+

Decide not to stem stopwords.

-
-
->>> stemmer2 = SnowballStemmer("english", ignore_stopwords=True)
->>> print(stemmer.stem("having"))
-have
->>> print(stemmer2.stem("having"))
-having
-
-
-

The 'english' stemmer is better than the original 'porter' stemmer.

-
-
->>> print(SnowballStemmer("english").stem("generously"))
-generous
->>> print(SnowballStemmer("porter").stem("generously"))
-gener
-
-
-
-

Note

-

Extra stemmer tests can be found in nltk.test.unit.test_stem.

+
>>> stemmer2 = SnowballStemmer("english", ignore_stopwords=True)
+>>> print(stemmer.stem("having"))
+have
+>>> print(stemmer2.stem("having"))
+having
+
+
+

The ‘english’ stemmer is better than the original ‘porter’ stemmer.

+
>>> print(SnowballStemmer("english").stem("generously"))
+generous
+>>> print(SnowballStemmer("porter").stem("generously"))
+gener
+
+
+
+

Note

+

Extra stemmer tests can be found in nltk.test.unit.test_stem.

+
+
+
+

Unit tests for ARLSTem Stemmer

+
>>> from nltk.stem.arlstem import ARLSTem
+
+
+

Create a Stemmer instance.

+
>>> stemmer = ARLSTem()
+
+
+

Stem a word.

+
>>> stemmer.stem('يعمل')
+'عمل'
+
+
+
+
+

Unit tests for ARLSTem2 Stemmer

+
>>> from nltk.stem.arlstem2 import ARLSTem2
+
+
+

Create a Stemmer instance.

+
>>> stemmer = ARLSTem2()
+
+

Stem a word.

+
>>> stemmer.stem('يعمل')
+'عمل'
+
+
+
+
+ + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/tag.html b/howto/tag.html index 4460a7acc..4c0c54bf3 100644 --- a/howto/tag.html +++ b/howto/tag.html @@ -1,366 +1,194 @@ - - - + - - -Regression Tests - + + + + + + + NLTK :: Sample usage for tag + + + + + + + + + + + + + + -
-

Regression Tests

-

Sequential Taggers

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
Add tests for:
-
    -
  • make sure backoff is being done correctly.
  • -
  • make sure ngram taggers don't use previous sentences for context.
  • -
  • make sure ngram taggers see 'beginning of the sentence' as a -unique context
  • -
  • make sure regexp tagger's regexps are tried in order
  • -
  • train on some simple examples, & make sure that the size & the -generated models are correct.
  • -
  • make sure cutoff works as intended
  • -
  • make sure that ngram models only exclude contexts covered by the + + + +
+ +
+
+ +
+

Sample usage for tag

+
+

Regression Tests

+
+

Sequential Taggers

+
+
Add tests for:
    +
  • make sure backoff is being done correctly.

  • +
  • make sure ngram taggers don’t use previous sentences for context.

  • +
  • make sure ngram taggers see ‘beginning of the sentence’ as a +unique context

  • +
  • make sure regexp tagger’s regexps are tried in order

  • +
  • train on some simple examples, & make sure that the size & the +generated models are correct.

  • +
  • make sure cutoff works as intended

  • +
  • make sure that ngram models only exclude contexts covered by the backoff tagger if the backoff tagger gets that context correct at -all locations.

  • +all locations.

+
+

Regression Testing for issue #1025

+

We want to ensure that a RegexpTagger can be created with more than 100 patterns +and does not fail with:

+
+

“AssertionError: sorry, but this version only supports 100 named groups”

+
>>> from nltk.tag import RegexpTagger
+>>> patterns = [(str(i), 'NNP',) for i in range(200)]
+>>> tagger = RegexpTagger(patterns)
+
+
+
+
+
+

Regression Testing for issue #2483

+

Ensure that tagging with pos_tag (PerceptronTagger) does not throw an IndexError +when attempting tagging an empty string. What it must return instead is not +strictly defined.

+
>>> from nltk.tag import pos_tag
+>>> pos_tag(['', 'is', 'a', 'beautiful', 'day'])
+[...]
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/tokenize.html b/howto/tokenize.html index 6548e1dc2..95ff7dfb7 100644 --- a/howto/tokenize.html +++ b/howto/tokenize.html @@ -1,479 +1,480 @@ - - - + - - - - + + + + + + + NLTK :: Sample usage for tokenize + + + + + + + + + + + + + - -
+ +
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-

Regression Tests: Treebank Tokenizer

+ + + +
+ +
+
+ +
+

Sample usage for tokenize

+
+

Regression Tests: Treebank Tokenizer

Some test strings.

-
-
->>> s1 = "On a $50,000 mortgage of 30 years at 8 percent, the monthly payment would be $366.88."
->>> word_tokenize(s1)
-['On', 'a', '$', '50,000', 'mortgage', 'of', '30', 'years', 'at', '8', 'percent', ',', 'the', 'monthly', 'payment', 'would', 'be', '$', '366.88', '.']
->>> s2 = "\"We beat some pretty good teams to get here,\" Slocum said."
->>> word_tokenize(s2)
-['``', 'We', 'beat', 'some', 'pretty', 'good', 'teams', 'to', 'get', 'here', ',', "''", 'Slocum', 'said', '.']
->>> s3 = "Well, we couldn't have this predictable, cliche-ridden, \"Touched by an Angel\" (a show creator John Masius worked on) wanna-be if she didn't."
->>> word_tokenize(s3)
-['Well', ',', 'we', 'could', "n't", 'have', 'this', 'predictable', ',', 'cliche-ridden', ',', '``', 'Touched', 'by', 'an', 'Angel', "''", '(', 'a', 'show', 'creator', 'John', 'Masius', 'worked', 'on', ')', 'wanna-be', 'if', 'she', 'did', "n't", '.']
->>> s4 = "I cannot cannot work under these conditions!"
->>> word_tokenize(s4)
-['I', 'can', 'not', 'can', 'not', 'work', 'under', 'these', 'conditions', '!']
->>> s5 = "The company spent $30,000,000 last year."
->>> word_tokenize(s5)
-['The', 'company', 'spent', '$', '30,000,000', 'last', 'year', '.']
->>> s6 = "The company spent 40.75% of its income last year."
->>> word_tokenize(s6)
-['The', 'company', 'spent', '40.75', '%', 'of', 'its', 'income', 'last', 'year', '.']
->>> s7 = "He arrived at 3:00 pm."
->>> word_tokenize(s7)
-['He', 'arrived', 'at', '3:00', 'pm', '.']
->>> s8 = "I bought these items: books, pencils, and pens."
->>> word_tokenize(s8)
-['I', 'bought', 'these', 'items', ':', 'books', ',', 'pencils', ',', 'and', 'pens', '.']
->>> s9 = "Though there were 150, 100 of them were old."
->>> word_tokenize(s9)
-['Though', 'there', 'were', '150', ',', '100', 'of', 'them', 'were', 'old', '.']
->>> s10 = "There were 300,000, but that wasn't enough."
->>> word_tokenize(s10)
-['There', 'were', '300,000', ',', 'but', 'that', 'was', "n't", 'enough', '.']
-
-
+
>>> s1 = "On a $50,000 mortgage of 30 years at 8 percent, the monthly payment would be $366.88."
+>>> word_tokenize(s1)
+['On', 'a', '$', '50,000', 'mortgage', 'of', '30', 'years', 'at', '8', 'percent', ',', 'the', 'monthly', 'payment', 'would', 'be', '$', '366.88', '.']
+>>> s2 = "\"We beat some pretty good teams to get here,\" Slocum said."
+>>> word_tokenize(s2)
+['``', 'We', 'beat', 'some', 'pretty', 'good', 'teams', 'to', 'get', 'here', ',', "''", 'Slocum', 'said', '.']
+>>> s3 = "Well, we couldn't have this predictable, cliche-ridden, \"Touched by an Angel\" (a show creator John Masius worked on) wanna-be if she didn't."
+>>> word_tokenize(s3)
+['Well', ',', 'we', 'could', "n't", 'have', 'this', 'predictable', ',', 'cliche-ridden', ',', '``', 'Touched', 'by', 'an', 'Angel', "''", '(', 'a', 'show', 'creator', 'John', 'Masius', 'worked', 'on', ')', 'wanna-be', 'if', 'she', 'did', "n't", '.']
+>>> s4 = "I cannot cannot work under these conditions!"
+>>> word_tokenize(s4)
+['I', 'can', 'not', 'can', 'not', 'work', 'under', 'these', 'conditions', '!']
+>>> s5 = "The company spent $30,000,000 last year."
+>>> word_tokenize(s5)
+['The', 'company', 'spent', '$', '30,000,000', 'last', 'year', '.']
+>>> s6 = "The company spent 40.75% of its income last year."
+>>> word_tokenize(s6)
+['The', 'company', 'spent', '40.75', '%', 'of', 'its', 'income', 'last', 'year', '.']
+>>> s7 = "He arrived at 3:00 pm."
+>>> word_tokenize(s7)
+['He', 'arrived', 'at', '3:00', 'pm', '.']
+>>> s8 = "I bought these items: books, pencils, and pens."
+>>> word_tokenize(s8)
+['I', 'bought', 'these', 'items', ':', 'books', ',', 'pencils', ',', 'and', 'pens', '.']
+>>> s9 = "Though there were 150, 100 of them were old."
+>>> word_tokenize(s9)
+['Though', 'there', 'were', '150', ',', '100', 'of', 'them', 'were', 'old', '.']
+>>> s10 = "There were 300,000, but that wasn't enough."
+>>> word_tokenize(s10)
+['There', 'were', '300,000', ',', 'but', 'that', 'was', "n't", 'enough', '.']
+>>> s11 = "It's more'n enough."
+>>> word_tokenize(s11)
+['It', "'s", 'more', "'n", 'enough', '.']
+
+
+

Testing improvement made to the TreebankWordTokenizer

+
>>> sx1 = '\xabNow that I can do.\xbb'
+>>> expected = ['\xab', 'Now', 'that', 'I', 'can', 'do', '.', '\xbb']
+>>> word_tokenize(sx1) == expected
+True
+>>> sx2 = 'The unicode 201C and 201D \u201cLEFT(RIGHT) DOUBLE QUOTATION MARK\u201d is also OPEN_PUNCT and CLOSE_PUNCT.'
+>>> expected = ['The', 'unicode', '201C', 'and', '201D', '\u201c', 'LEFT', '(', 'RIGHT', ')', 'DOUBLE', 'QUOTATION', 'MARK', '\u201d', 'is', 'also', 'OPEN_PUNCT', 'and', 'CLOSE_PUNCT', '.']
+>>> word_tokenize(sx2) == expected
+True
+
+
+

Testing treebank’s detokenizer

+
>>> from nltk.tokenize.treebank import TreebankWordDetokenizer
+>>> detokenizer = TreebankWordDetokenizer()
+>>> s = "On a $50,000 mortgage of 30 years at 8 percent, the monthly payment would be $366.88."
+>>> detokenizer.detokenize(word_tokenize(s))
+'On a $50,000 mortgage of 30 years at 8 percent, the monthly payment would be $366.88.'
+>>> s = "\"We beat some pretty good teams to get here,\" Slocum said."
+>>> detokenizer.detokenize(word_tokenize(s))
+'"We beat some pretty good teams to get here," Slocum said.'
+>>> s = "Well, we couldn't have this predictable, cliche-ridden, \"Touched by an Angel\" (a show creator John Masius worked on) wanna-be if she didn't."
+>>> detokenizer.detokenize(word_tokenize(s))
+'Well, we couldn\'t have this predictable, cliche-ridden, "Touched by an Angel" (a show creator John Masius worked on) wanna-be if she didn\'t.'
+>>> s = "I cannot cannot work under these conditions!"
+>>> detokenizer.detokenize(word_tokenize(s))
+'I cannot cannot work under these conditions!'
+>>> s = "The company spent $30,000,000 last year."
+>>> detokenizer.detokenize(word_tokenize(s))
+'The company spent $30,000,000 last year.'
+>>> s = "The company spent 40.75% of its income last year."
+>>> detokenizer.detokenize(word_tokenize(s))
+'The company spent 40.75% of its income last year.'
+>>> s = "He arrived at 3:00 pm."
+>>> detokenizer.detokenize(word_tokenize(s))
+'He arrived at 3:00 pm.'
+>>> s = "I bought these items: books, pencils, and pens."
+>>> detokenizer.detokenize(word_tokenize(s))
+'I bought these items: books, pencils, and pens.'
+>>> s = "Though there were 150, 100 of them were old."
+>>> detokenizer.detokenize(word_tokenize(s))
+'Though there were 150, 100 of them were old.'
+>>> s = "There were 300,000, but that wasn't enough."
+>>> detokenizer.detokenize(word_tokenize(s))
+"There were 300,000, but that wasn't enough."
+>>> s = 'How "are" you?'
+>>> detokenizer.detokenize(word_tokenize(s))
+'How "are" you?'
+>>> s = "Hello (world)"
+>>> detokenizer.detokenize(word_tokenize(s))
+'Hello (world)'
+>>> s = '<A sentence> with (many) [kinds] of {parentheses}. "Sometimes it\'s inside (quotes)". ("Sometimes the otherway around").'
+>>> detokenizer.detokenize(word_tokenize(s))
+'<A sentence> with (many) [kinds] of {parentheses}. "Sometimes it\'s inside (quotes)". ("Sometimes the otherway around").'
+>>> s = "Sentence ending with (parentheses)"
+>>> detokenizer.detokenize(word_tokenize(s))
+'Sentence ending with (parentheses)'
+>>> s = "(Sentence) starting with parentheses."
+>>> detokenizer.detokenize(word_tokenize(s))
+'(Sentence) starting with parentheses.'
+
+

Sentence tokenization in word_tokenize:

-
-
->>> s11 = "I called Dr. Jones. I called Dr. Jones."
->>> word_tokenize(s11)
-['I', 'called', 'Dr.', 'Jones', '.', 'I', 'called', 'Dr.', 'Jones', '.']
->>> s12 = ("Ich muss unbedingt daran denken, Mehl, usw. fur einen "
-...        "Kuchen einzukaufen. Ich muss.")
->>> word_tokenize(s12)
-['Ich', 'muss', 'unbedingt', 'daran', 'denken', ',', 'Mehl', ',', 'usw',
- '.', 'fur', 'einen', 'Kuchen', 'einzukaufen', '.', 'Ich', 'muss', '.']
->>> word_tokenize(s12, 'german')
-['Ich', 'muss', 'unbedingt', 'daran', 'denken', ',', 'Mehl', ',', 'usw.',
- 'fur', 'einen', 'Kuchen', 'einzukaufen', '.', 'Ich', 'muss', '.']
-
-
+
>>> s11 = "I called Dr. Jones. I called Dr. Jones."
+>>> word_tokenize(s11)
+['I', 'called', 'Dr.', 'Jones', '.', 'I', 'called', 'Dr.', 'Jones', '.']
+>>> s12 = ("Ich muss unbedingt daran denken, Mehl, usw. fur einen "
+...        "Kuchen einzukaufen. Ich muss.")
+>>> word_tokenize(s12)
+['Ich', 'muss', 'unbedingt', 'daran', 'denken', ',', 'Mehl', ',', 'usw',
+ '.', 'fur', 'einen', 'Kuchen', 'einzukaufen', '.', 'Ich', 'muss', '.']
+>>> word_tokenize(s12, 'german')
+['Ich', 'muss', 'unbedingt', 'daran', 'denken', ',', 'Mehl', ',', 'usw.',
+ 'fur', 'einen', 'Kuchen', 'einzukaufen', '.', 'Ich', 'muss', '.']
+
-
-

Regression Tests: Regexp Tokenizer

+
+
+

Regression Tests: Regexp Tokenizer

Some additional test strings.

-
-
->>> s = ("Good muffins cost $3.88\nin New York.  Please buy me\n"
-...      "two of them.\n\nThanks.")
->>> s2 = ("Alas, it has not rained today. When, do you think, "
-...       "will it rain again?")
->>> s3 = ("<p>Although this is <b>not</b> the case here, we must "
-...       "not relax our vigilance!</p>")
-
-
->>> regexp_tokenize(s2, r'[,\.\?!"]\s*', gaps=False)
-[', ', '. ', ', ', ', ', '?']
->>> regexp_tokenize(s2, r'[,\.\?!"]\s*', gaps=True)
-['Alas', 'it has not rained today', 'When', 'do you think',
- 'will it rain again']
-
-
-

Make sure that grouping parentheses don't confuse the tokenizer:

-
-
->>> regexp_tokenize(s3, r'</?(b|p)>', gaps=False)
-['<p>', '<b>', '</b>', '</p>']
->>> regexp_tokenize(s3, r'</?(b|p)>', gaps=True)
-['Although this is ', 'not',
- ' the case here, we must not relax our vigilance!']
-
-
-

Make sure that named groups don't confuse the tokenizer:

-
-
->>> regexp_tokenize(s3, r'</?(?P<named>b|p)>', gaps=False)
-['<p>', '<b>', '</b>', '</p>']
->>> regexp_tokenize(s3, r'</?(?P<named>b|p)>', gaps=True)
-['Although this is ', 'not',
- ' the case here, we must not relax our vigilance!']
-
-
-

Make sure that nested groups don't confuse the tokenizer:

-
-
->>> regexp_tokenize(s2, r'(h|r|l)a(s|(i|n0))', gaps=False)
-['las', 'has', 'rai', 'rai']
->>> regexp_tokenize(s2, r'(h|r|l)a(s|(i|n0))', gaps=True)
-['A', ', it ', ' not ', 'ned today. When, do you think, will it ',
- 'n again?']
-
-
-

The tokenizer should reject any patterns with backreferences:

-
-
->>> regexp_tokenize(s2, r'(.)\1')
+
>>> s = ("Good muffins cost $3.88\nin New York.  Please buy me\n"
+...      "two of them.\n\nThanks.")
+>>> s2 = ("Alas, it has not rained today. When, do you think, "
+...       "will it rain again?")
+>>> s3 = ("<p>Although this is <b>not</b> the case here, we must "
+...       "not relax our vigilance!</p>")
+
+
+
>>> regexp_tokenize(s2, r'[,\.\?!"]\s*', gaps=False)
+[', ', '. ', ', ', ', ', '?']
+>>> regexp_tokenize(s2, r'[,\.\?!"]\s*', gaps=True)
+['Alas', 'it has not rained today', 'When', 'do you think',
+ 'will it rain again']
+
+
+

Take care to avoid using capturing groups:

+
>>> regexp_tokenize(s3, r'</?[bp]>', gaps=False)
+['<p>', '<b>', '</b>', '</p>']
+>>> regexp_tokenize(s3, r'</?(?:b|p)>', gaps=False)
+['<p>', '<b>', '</b>', '</p>']
+>>> regexp_tokenize(s3, r'</?(?:b|p)>', gaps=True)
+['Although this is ', 'not',
+ ' the case here, we must not relax our vigilance!']
+
+
+

Named groups are capturing groups, and confuse the tokenizer:

+
>>> regexp_tokenize(s3, r'</?(?P<named>b|p)>', gaps=False)
+['p', 'b', 'b', 'p']
+>>> regexp_tokenize(s3, r'</?(?P<named>b|p)>', gaps=True)
+['p', 'Although this is ', 'b', 'not', 'b',
+ ' the case here, we must not relax our vigilance!', 'p']
+
+
+

Make sure that nested groups don’t confuse the tokenizer:

+
>>> regexp_tokenize(s2, r'(?:h|r|l)a(?:s|(?:i|n0))', gaps=False)
+['las', 'has', 'rai', 'rai']
+>>> regexp_tokenize(s2, r'(?:h|r|l)a(?:s|(?:i|n0))', gaps=True)
+['A', ', it ', ' not ', 'ned today. When, do you think, will it ',
+ 'n again?']
+
+
+

Back-references require capturing groups, and these are not supported:

+
>>> regexp_tokenize("aabbbcccc", r'(.)\1')
+['a', 'b', 'c', 'c']
+
+
+

A simple sentence tokenizer ‘.(s+|$)’

+
>>> regexp_tokenize(s, pattern=r'\.(?:\s+|$)', gaps=True)
+['Good muffins cost $3.88\nin New York',
+ 'Please buy me\ntwo of them', 'Thanks']
+
+
+
+
+

Regression Tests: TweetTokenizer

+

TweetTokenizer is a tokenizer specifically designed for micro-blogging tokenization tasks.

+
>>> from nltk.tokenize import TweetTokenizer
+>>> tknzr = TweetTokenizer()
+>>> s0 = "This is a cooool #dummysmiley: :-) :-P <3 and some arrows < > -> <--"
+>>> tknzr.tokenize(s0)
+['This', 'is', 'a', 'cooool', '#dummysmiley', ':', ':-)', ':-P', '<3', 'and', 'some', 'arrows', '<', '>', '->', '<--']
+>>> s1 = "@Joyster2012 @CathStaincliffe Good for you, girl!! Best wishes :-)"
+>>> tknzr.tokenize(s1)
+['@Joyster2012', '@CathStaincliffe', 'Good', 'for', 'you', ',', 'girl', '!', '!', 'Best', 'wishes', ':-)']
+>>> s2 = "3Points for #DreamTeam Gooo BAILEY! :) #PBB737Gold @PBBabscbn"
+>>> tknzr.tokenize(s2)
+['3Points', 'for', '#DreamTeam', 'Gooo', 'BAILEY', '!', ':)', '#PBB737Gold', '@PBBabscbn']
+>>> s3 = "@Insanomania They do... Their mentality doesn't :("
+>>> tknzr.tokenize(s3)
+['@Insanomania', 'They', 'do', '...', 'Their', 'mentality', "doesn't", ':(']
+>>> s4 = "RT @facugambande: Ya por arrancar a grabar !!! #TirenTirenTiren vamoo !!"
+>>> tknzr.tokenize(s4)
+['RT', '@facugambande', ':', 'Ya', 'por', 'arrancar', 'a', 'grabar', '!', '!', '!', '#TirenTirenTiren', 'vamoo', '!', '!']
+>>> tknzr = TweetTokenizer(reduce_len=True)
+>>> s5 = "@crushinghes the summer holidays are great but I'm so bored already :("
+>>> tknzr.tokenize(s5)
+['@crushinghes', 'the', 'summer', 'holidays', 'are', 'great', 'but', "I'm", 'so', 'bored', 'already', ':(']
+
+
+

It is possible to specify strip_handles and reduce_len parameters for a TweetTokenizer instance. Setting strip_handles to True, the tokenizer will remove Twitter handles (e.g. usernames). Setting reduce_len to True, repeated character sequences of length 3 or greater will be replaced with sequences of length 3.

+
>>> tknzr = TweetTokenizer(strip_handles=True, reduce_len=True)
+>>> s6 = '@remy: This is waaaaayyyy too much for you!!!!!!'
+>>> tknzr.tokenize(s6)
+[':', 'This', 'is', 'waaayyy', 'too', 'much', 'for', 'you', '!', '!', '!']
+>>> s7 = '@_willy65: No place for @chuck tonight. Sorry.'
+>>> tknzr.tokenize(s7)
+[':', 'No', 'place', 'for', 'tonight', '.', 'Sorry', '.']
+>>> s8 = '@mar_tin is a great developer. Contact him at mar_tin@email.com.'
+>>> tknzr.tokenize(s8)
+['is', 'a', 'great', 'developer', '.', 'Contact', 'him', 'at', 'mar_tin@email.com', '.']
+
+
+

The preserve_case parameter (default: True) allows to convert uppercase tokens to lowercase tokens. Emoticons are not affected:

+
>>> tknzr = TweetTokenizer(preserve_case=False)
+>>> s9 = "@jrmy: I'm REALLY HAPPYYY about that! NICEEEE :D :P"
+>>> tknzr.tokenize(s9)
+['@jrmy', ':', "i'm", 'really', 'happyyy', 'about', 'that', '!', 'niceeee', ':D', ':P']
+
+
+

It should not hang on long sequences of the same punctuation character.

+
>>> tknzr = TweetTokenizer()
+>>> s10 = "Photo: Aujourd'hui sur http://t.co/0gebOFDUzn Projet... http://t.co/bKfIUbydz2.............................. http://fb.me/3b6uXpz0L"
+>>> tknzr.tokenize(s10)
+['Photo', ':', "Aujourd'hui", 'sur', 'http://t.co/0gebOFDUzn', 'Projet', '...', 'http://t.co/bKfIUbydz2', '...', 'http://fb.me/3b6uXpz0L']
+
+
+
+
+

Regression Tests: PunktSentenceTokenizer

+

The sentence splitter should remove whitespace following the sentence boundary.

+
>>> pst = PunktSentenceTokenizer()
+>>> pst.tokenize('See Section 3).  Or Section 2).  ')
+['See Section 3).', 'Or Section 2).']
+>>> pst.tokenize('See Section 3.)  Or Section 2.)  ')
+['See Section 3.)', 'Or Section 2.)']
+>>> pst.tokenize('See Section 3.)  Or Section 2.)  ', realign_boundaries=False)
+['See Section 3.', ')  Or Section 2.', ')']
+
+
+

Two instances of PunktSentenceTokenizer should not share PunktParameters.

+
>>> pst = PunktSentenceTokenizer()
+>>> pst2 = PunktSentenceTokenizer()
+>>> pst._params is pst2._params
+False
+
+
+

Testing mutable default arguments for https://github.com/nltk/nltk/pull/2067

+
>>> from nltk.tokenize.punkt import PunktBaseClass, PunktTrainer, PunktSentenceTokenizer
+>>> from nltk.tokenize.punkt import PunktLanguageVars, PunktParameters
+>>> pbc = PunktBaseClass(lang_vars=None, params=None)
+>>> type(pbc._params)
+<class 'nltk.tokenize.punkt.PunktParameters'>
+>>> type(pbc._lang_vars)
+<class 'nltk.tokenize.punkt.PunktLanguageVars'>
+>>> pt = PunktTrainer(lang_vars=None)
+>>> type(pt._lang_vars)
+<class 'nltk.tokenize.punkt.PunktLanguageVars'>
+>>> pst = PunktSentenceTokenizer(lang_vars=None)
+>>> type(pst._lang_vars)
+<class 'nltk.tokenize.punkt.PunktLanguageVars'>
+
+
+
+
+

Regression Tests: align_tokens

+

Post-hoc alignment of tokens with a source string

+
>>> from nltk.tokenize.util import align_tokens
+>>> list(align_tokens([''], ""))
+[(0, 0)]
+>>> list(align_tokens([''], " "))
+[(0, 0)]
+>>> list(align_tokens([], ""))
+[]
+>>> list(align_tokens([], " "))
+[]
+>>> list(align_tokens(['a'], "a"))
+[(0, 1)]
+>>> list(align_tokens(['abc', 'def'], "abcdef"))
+[(0, 3), (3, 6)]
+>>> list(align_tokens(['abc', 'def'], "abc def"))
+[(0, 3), (4, 7)]
+>>> list(align_tokens(['ab', 'cd'], "ab cd ef"))
+[(0, 2), (3, 5)]
+>>> list(align_tokens(['ab', 'cd', 'ef'], "ab cd ef"))
+[(0, 2), (3, 5), (6, 8)]
+>>> list(align_tokens(['ab', 'cd', 'efg'], "ab cd ef"))
 Traceback (most recent call last):
-   ...
-ValueError: Regular expressions with back-references are
-not supported: '(.)\\1'
->>> regexp_tokenize(s2, r'(?P<foo>)(?P=foo)')
+....
+ValueError: substring "efg" not found in "ab cd ef"
+>>> list(align_tokens(['ab', 'cd', 'ef', 'gh'], "ab cd ef"))
 Traceback (most recent call last):
-   ...
-ValueError: Regular expressions with back-references are
-not supported: '(?P<foo>)(?P=foo)'
-
- -

A simple sentence tokenizer '.(s+|$)'

-
-
->>> regexp_tokenize(s, pattern=r'\.(\s+|$)', gaps=True)
-['Good muffins cost $3.88\nin New York',
- 'Please buy me\ntwo of them', 'Thanks']
-
-
+.... +ValueError: substring "gh" not found in "ab cd ef" +>>> list(align_tokens(['The', 'plane', ',', 'bound', 'for', 'St', 'Petersburg', ',', 'crashed', 'in', 'Egypt', "'s", 'Sinai', 'desert', 'just', '23', 'minutes', 'after', 'take-off', 'from', 'Sharm', 'el-Sheikh', 'on', 'Saturday', '.'], "The plane, bound for St Petersburg, crashed in Egypt's Sinai desert just 23 minutes after take-off from Sharm el-Sheikh on Saturday.")) +[(0, 3), (4, 9), (9, 10), (11, 16), (17, 20), (21, 23), (24, 34), (34, 35), (36, 43), (44, 46), (47, 52), (52, 54), (55, 60), (61, 67), (68, 72), (73, 75), (76, 83), (84, 89), (90, 98), (99, 103), (104, 109), (110, 119), (120, 122), (123, 131), (131, 132)] +
+
+
+

Regression Tests: MWETokenizer

+

Pickle an MWETokenizer

+
>>> from nltk.tokenize import MWETokenizer
+>>> import pickle
+
+
>>> tokenizer = MWETokenizer([('hors', "d'oeuvre")], separator='+')
+>>> p = pickle.dumps(tokenizer)
+>>> unpickeled = pickle.loads(p)
+>>> unpickeled.tokenize("An hors d'oeuvre tonight, sir?".split())
+['An', "hors+d'oeuvre", 'tonight,', 'sir?']
+
+
+
+
+

Regression Tests: TextTilingTokenizer

+

TextTilingTokenizer tokenizes text into coherent subtopic chunks based upon Hearst’s TextTiling algorithm.

+
>>> from nltk.tokenize import TextTilingTokenizer
+>>> from nltk.corpus import brown
+>>> tt = TextTilingTokenizer()
+>>> tt.tokenize(brown.raw()[0:1000])
+["\n\n\tThe/at Fulton/np-tl County/nn-tl Grand/jj-tl Jury/nn-tl said/vbd Friday/nr an/at investigation/nn of/in Atlanta's/np$ recent/jj primary/nn election/nn produced/vbd ``/`` no/at evidence/nn ''/'' that/cs any/dti irregularities/nns took/vbd place/nn ./.\n\n\n\tThe/at jury/nn further/rbr said/vbd in/in term-end/nn presentments/nns that/cs the/at City/nn-tl Executive/jj-tl Committee/nn-tl ,/, which/wdt had/hvd over-all/jj charge/nn of/in the/at election/nn ,/, ``/`` deserves/vbz the/at praise/nn and/cc thanks/nns of/in the/at City/nn-tl of/in-tl Atlanta/np-tl ''/'' for/in the/at manner/nn in/in which/wdt the/at election/nn was/bedz conducted/vbn ./.\n\n\n\tThe/at September-October/np term/nn jury/nn had/hvd been/ben charged/vbn by/in Fulton/np-tl Superior/jj-tl Court/nn-tl Judge/nn-tl Durwood/np Pye/np to/to investigate/vb reports/nns of/in possible/jj ``/`` irregularities/nns ''/'' in/in the/at hard-fought/jj primary/nn which/wdt was/bedz won/vbn by/in Mayor-nominate/nn-tl Ivan/np Allen/np Jr./"]
+
+
+

Test that ValueError exceptions are raised when illegal arguments are used.

+
>>> TextTilingTokenizer(similarity_method='foo').tokenize(brown.raw()[0:1000])
+Traceback (most recent call last):
+  ...
+ValueError: Similarity method foo not recognized
+>>> TextTilingTokenizer(smoothing_method='bar').tokenize(brown.raw()[0:1000])
+Traceback (most recent call last):
+  ...
+ValueError: Smoothing method bar not recognized
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/toolbox.html b/howto/toolbox.html index 1e040bfd3..56040efb5 100644 --- a/howto/toolbox.html +++ b/howto/toolbox.html @@ -1,720 +1,461 @@ - - - + - - -Unit test cases for toolbox - + + + + + + + NLTK :: Sample usage for toolbox + + + + + + + + + + + + + + -
-

Unit test cases for toolbox

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> from nltk import toolbox
-
-
-
-

toolbox.StandardFormat

-
-
->>> f = toolbox.StandardFormat()
-
-
-
-

toolbox.StandardFormat.open()

-
-
->>> import os, tempfile
->>> (fd, fname) = tempfile.mkstemp()
->>> tf = os.fdopen(fd, "w")
->>> _ = tf.write('\\lx a value\n\\lx another value\n')
->>> tf.close()
->>> f = toolbox.StandardFormat()
->>> f.open(fname)
->>> list(f.fields())
-[('lx', 'a value'), ('lx', 'another value')]
->>> f.close()
->>> os.unlink(fname)
-
-
-
-
-

toolbox.StandardFormat.open_string()

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\lx a value\n\\lx another value\n')
->>> list(f.fields())
-[('lx', 'a value'), ('lx', 'another value')]
->>> f.close()
-
-
-
-
-

toolbox.StandardFormat.close()

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\lx a value\n\\lx another value\n')
->>> list(f.fields())
-[('lx', 'a value'), ('lx', 'another value')]
->>> f.close()
-
-
-
-
-

toolbox.StandardFormat.line_num

-

StandardFormat.line_num contains the line number of the last line returned:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\lx a value\n\\lx another value\n\\lx a third value\n')
->>> line_nums = []
->>> for l in f.raw_fields():
-...     line_nums.append(f.line_num)
->>> line_nums
-[1, 2, 3]
-
-
-

StandardFormat.line_num contains the line number of the last line returned:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\lx two\nlines\n\\lx three\nlines\n\n\\lx two\nlines\n')
->>> line_nums = []
->>> for l in f.raw_fields():
-...     line_nums.append(f.line_num)
->>> line_nums
-[2, 5, 7]
-
-
-

StandardFormat.line_num doesn't exist before openning or after closing +

+ + +
+ +
+
+ +
+

Sample usage for toolbox

+
+

Unit test cases for toolbox

+
>>> from nltk import toolbox
+
+
+
+

toolbox.StandardFormat

+
>>> f = toolbox.StandardFormat()
+
+
+
+

toolbox.StandardFormat.open()

+
>>> import os, tempfile
+>>> (fd, fname) = tempfile.mkstemp()
+>>> tf = os.fdopen(fd, "w")
+>>> _ = tf.write('\\lx a value\n\\lx another value\n')
+>>> tf.close()
+>>> f = toolbox.StandardFormat()
+>>> f.open(fname)
+>>> list(f.fields())
+[('lx', 'a value'), ('lx', 'another value')]
+>>> f.close()
+>>> os.unlink(fname)
+
+
+
+
+

toolbox.StandardFormat.open_string()

+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\lx a value\n\\lx another value\n')
+>>> list(f.fields())
+[('lx', 'a value'), ('lx', 'another value')]
+>>> f.close()
+
+
+
+
+

toolbox.StandardFormat.close()

+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\lx a value\n\\lx another value\n')
+>>> list(f.fields())
+[('lx', 'a value'), ('lx', 'another value')]
+>>> f.close()
+
+
+
+
+

toolbox.StandardFormat.line_num

+

StandardFormat.line_num contains the line number of the last line returned:

+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\lx a value\n\\lx another value\n\\lx a third value\n')
+>>> line_nums = []
+>>> for l in f.raw_fields():
+...     line_nums.append(f.line_num)
+>>> line_nums
+[1, 2, 3]
+
+
+

StandardFormat.line_num contains the line number of the last line returned:

+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\lx two\nlines\n\\lx three\nlines\n\n\\lx two\nlines\n')
+>>> line_nums = []
+>>> for l in f.raw_fields():
+...     line_nums.append(f.line_num)
+>>> line_nums
+[2, 5, 7]
+
+
+

StandardFormat.line_num doesn’t exist before opening or after closing a file or string:

-
-
->>> f = toolbox.StandardFormat()
->>> f.line_num
-Traceback (most recent call last):
-    ...
-AttributeError: 'StandardFormat' object has no attribute 'line_num'
->>> f.open_string('\\lx two\nlines\n\\lx three\nlines\n\n\\lx two\nlines\n')
->>> line_nums = []
->>> for l in f.raw_fields():
-...     line_nums.append(f.line_num)
->>> line_nums
-[2, 5, 7]
->>> f.close()
->>> f.line_num
-Traceback (most recent call last):
-    ...
-AttributeError: 'StandardFormat' object has no attribute 'line_num'
-
-
-
-
-

toolbox.StandardFormat.raw_fields()

-

raw_fields() returns an iterator over tuples of two strings representing the +

>>> f = toolbox.StandardFormat()
+>>> f.line_num
+Traceback (most recent call last):
+    ...
+AttributeError: 'StandardFormat' object has no attribute 'line_num'
+>>> f.open_string('\\lx two\nlines\n\\lx three\nlines\n\n\\lx two\nlines\n')
+>>> line_nums = []
+>>> for l in f.raw_fields():
+...     line_nums.append(f.line_num)
+>>> line_nums
+[2, 5, 7]
+>>> f.close()
+>>> f.line_num
+Traceback (most recent call last):
+    ...
+AttributeError: 'StandardFormat' object has no attribute 'line_num'
+
+
+ +
+

toolbox.StandardFormat.raw_fields()

+

raw_fields() returns an iterator over tuples of two strings representing the marker and its value. The marker is given without the backslash and the value without its trailing newline:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\lx a value\n\\lx another value\n')
->>> list(f.raw_fields())
-[('lx', 'a value'), ('lx', 'another value')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\lx a value\n\\lx another value\n')
+>>> list(f.raw_fields())
+[('lx', 'a value'), ('lx', 'another value')]
+
+

an empty file returns nothing:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('')
->>> list(f.raw_fields())
-[]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('')
+>>> list(f.raw_fields())
+[]
+
+

file with only a newline returns WHAT SHOULD IT RETURN???:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\n')
->>> list(f.raw_fields())
-[(None, '')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\n')
+>>> list(f.raw_fields())
+[(None, '')]
+
+

file with only one field should be parsed ok:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\lx one value\n')
->>> list(f.raw_fields())
-[('lx', 'one value')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\lx one value\n')
+>>> list(f.raw_fields())
+[('lx', 'one value')]
+
+

file without a trailing newline should be parsed ok:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\lx a value\n\\lx another value')
->>> list(f.raw_fields())
-[('lx', 'a value'), ('lx', 'another value')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\lx a value\n\\lx another value')
+>>> list(f.raw_fields())
+[('lx', 'a value'), ('lx', 'another value')]
+
+

trailing white space is preserved except for the final newline:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\lx trailing space \n\\lx trailing tab\t\n\\lx extra newline\n\n')
->>> list(f.raw_fields())
-[('lx', 'trailing space '), ('lx', 'trailing tab\t'), ('lx', 'extra newline\n')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\lx trailing space \n\\lx trailing tab\t\n\\lx extra newline\n\n')
+>>> list(f.raw_fields())
+[('lx', 'trailing space '), ('lx', 'trailing tab\t'), ('lx', 'extra newline\n')]
+
+

line wrapping is preserved:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n')
->>> list(f.raw_fields())
-[('lx', 'a value\nmore of the value\nand still more'), ('lc', 'another val')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n')
+>>> list(f.raw_fields())
+[('lx', 'a value\nmore of the value\nand still more'), ('lc', 'another val')]
+
+

file beginning with a multiline record should be parsed ok:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n')
->>> list(f.raw_fields())
-[('lx', 'a value\nmore of the value\nand still more'), ('lc', 'another val')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n')
+>>> list(f.raw_fields())
+[('lx', 'a value\nmore of the value\nand still more'), ('lc', 'another val')]
+
+

file ending with a multiline record should be parsed ok:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\lc a value\n\\lx another value\nmore of the value\nand still more\n')
->>> list(f.raw_fields())
-[('lc', 'a value'), ('lx', 'another value\nmore of the value\nand still more')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\lc a value\n\\lx another value\nmore of the value\nand still more\n')
+>>> list(f.raw_fields())
+[('lc', 'a value'), ('lx', 'another value\nmore of the value\nand still more')]
+
+

file beginning with a BOM should be parsed ok:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\xef\xbb\xbf\\lx a value\n\\lx another value\n')
->>> list(f.raw_fields())
-[('lx', 'a value'), ('lx', 'another value')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\xef\xbb\xbf\\lx a value\n\\lx another value\n')
+>>> list(f.raw_fields())
+[('lx', 'a value'), ('lx', 'another value')]
+
+

file beginning with two BOMs should ignore only the first one:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\xef\xbb\xbf\xef\xbb\xbf\\lx a value\n\\lx another value\n')
->>> list(f.raw_fields())
-[(None, '\xef\xbb\xbf\\lx a value'), ('lx', 'another value')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\xef\xbb\xbf\xef\xbb\xbf\\lx a value\n\\lx another value\n')
+>>> list(f.raw_fields())
+[(None, '\xef\xbb\xbf\\lx a value'), ('lx', 'another value')]
+
+

should not ignore a BOM not at the beginning of the file:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\lx a value\n\xef\xbb\xbf\\lx another value\n')
->>> list(f.raw_fields())
-[('lx', 'a value\n\xef\xbb\xbf\\lx another value')]
-
-
-
-
-

toolbox.StandardFormat.fields()

+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\lx a value\n\xef\xbb\xbf\\lx another value\n')
+>>> list(f.raw_fields())
+[('lx', 'a value\n\xef\xbb\xbf\\lx another value')]
+
+
+ +
+

toolbox.StandardFormat.fields()

trailing white space is not preserved:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\lx trailing space \n\\lx trailing tab\t\n\\lx extra newline\n\n')
->>> list(f.fields())
-[('lx', 'trailing space'), ('lx', 'trailing tab'), ('lx', 'extra newline')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\lx trailing space \n\\lx trailing tab\t\n\\lx extra newline\n\n')
+>>> list(f.fields())
+[('lx', 'trailing space'), ('lx', 'trailing tab'), ('lx', 'extra newline')]
+
+

multiline fields are unwrapped:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n')
->>> list(f.fields())
-[('lx', 'a value more of the value and still more'), ('lc', 'another val')]
-
-
-
-
-

markers

+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n')
+>>> list(f.fields())
+[('lx', 'a value more of the value and still more'), ('lc', 'another val')]
+
+
+ +
+

markers

A backslash in the first position on a new line indicates the start of a marker. The backslash is not part of the marker:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\mk a value\n')
->>> list(f.fields())
-[('mk', 'a value')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\mk a value\n')
+>>> list(f.fields())
+[('mk', 'a value')]
+
+

If the backslash occurs later in the line it does not indicate the start of a marker:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\mk a value\n \\mk another one\n')
->>> list(f.raw_fields())
-[('mk', 'a value\n \\mk another one')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\mk a value\n \\mk another one\n')
+>>> list(f.raw_fields())
+[('mk', 'a value\n \\mk another one')]
+
+

There is no specific limit to the length of a marker:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\this_is_an_extremely_long_marker value\n')
->>> list(f.fields())
-[('this_is_an_extremely_long_marker', 'value')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\this_is_an_extremely_long_marker value\n')
+>>> list(f.fields())
+[('this_is_an_extremely_long_marker', 'value')]
+
+

A marker can contain any non white space character:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\`~!@#$%^&*()_-=+[{]}\|,<.>/?;:"0123456789 value\n')
->>> list(f.fields())
-[('`~!@#$%^&*()_-=+[{]}\\|,<.>/?;:"0123456789', 'value')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\`~!@#$%^&*()_-=+[{]}\\|,<.>/?;:"0123456789 value\n')
+>>> list(f.fields())
+[('`~!@#$%^&*()_-=+[{]}\\|,<.>/?;:"0123456789', 'value')]
+
+

A marker is terminated by any white space character:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\mk a value\n\\mk\tanother one\n\\mk\rthird one\n\\mk\ffourth one')
->>> list(f.fields())
-[('mk', 'a value'), ('mk', 'another one'), ('mk', 'third one'), ('mk', 'fourth one')]
-
-
+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\mk a value\n\\mk\tanother one\n\\mk\rthird one\n\\mk\ffourth one')
+>>> list(f.fields())
+[('mk', 'a value'), ('mk', 'another one'), ('mk', 'third one'), ('mk', 'fourth one')]
+
+

Consecutive whitespace characters (except newline) are treated the same as one:

-
-
->>> f = toolbox.StandardFormat()
->>> f.open_string('\\mk \t\r\fa value\n')
->>> list(f.fields())
-[('mk', 'a value')]
-
-
-
-
-
-

toolbox.ToolboxData

-
-
->>> db = toolbox.ToolboxData()
-
-
-
-

toolbox.ToolboxData.parse()

+
>>> f = toolbox.StandardFormat()
+>>> f.open_string('\\mk \t\r\fa value\n')
+>>> list(f.fields())
+[('mk', 'a value')]
+
+
+ + +
+

toolbox.ToolboxData

+
>>> db = toolbox.ToolboxData()
+
+
+
+

toolbox.ToolboxData.parse()

check that normal parsing works:

-
-
->>> from xml.etree import ElementTree
->>> td = toolbox.ToolboxData()
->>> s = """\\_sh v3.0  400  Rotokas Dictionary
-... \\_DateStampHasFourDigitYear
-...
-... \\lx kaa
-... \\ps V.A
-... \\ge gag
-... \\gp nek i pas
-...
-... \\lx kaa
-... \\ps V.B
-... \\ge strangle
-... \\gp pasim nek
-... """
->>> td.open_string(s)
->>> tree = td.parse(key='lx')
->>> tree.tag
-'toolbox_data'
->>> ElementTree.tostring(list(tree)[0]).decode('utf8')
-'<header><_sh>v3.0  400  Rotokas Dictionary</_sh><_DateStampHasFourDigitYear /></header>'
->>> ElementTree.tostring(list(tree)[1]).decode('utf8')
-'<record><lx>kaa</lx><ps>V.A</ps><ge>gag</ge><gp>nek i pas</gp></record>'
->>> ElementTree.tostring(list(tree)[2]).decode('utf8')
-'<record><lx>kaa</lx><ps>V.B</ps><ge>strangle</ge><gp>pasim nek</gp></record>'
-
-
-

check that guessing the key marker works:

-
-
->>> from xml.etree import ElementTree
->>> td = toolbox.ToolboxData()
->>> s = """\\_sh v3.0  400  Rotokas Dictionary
-... \\_DateStampHasFourDigitYear
-...
-... \\lx kaa
-... \\ps V.A
-... \\ge gag
-... \\gp nek i pas
-...
-... \\lx kaa
-... \\ps V.B
-... \\ge strangle
-... \\gp pasim nek
-... """
->>> td.open_string(s)
->>> tree = td.parse()
->>> ElementTree.tostring(list(tree)[0]).decode('utf8')
-'<header><_sh>v3.0  400  Rotokas Dictionary</_sh><_DateStampHasFourDigitYear /></header>'
->>> ElementTree.tostring(list(tree)[1]).decode('utf8')
-'<record><lx>kaa</lx><ps>V.A</ps><ge>gag</ge><gp>nek i pas</gp></record>'
->>> ElementTree.tostring(list(tree)[2]).decode('utf8')
-'<record><lx>kaa</lx><ps>V.B</ps><ge>strangle</ge><gp>pasim nek</gp></record>'
-
-
-
-
-
-

toolbox functions

-
-

toolbox.to_sfm_string()

+
>>> from xml.etree import ElementTree
+>>> td = toolbox.ToolboxData()
+>>> s = """\\_sh v3.0  400  Rotokas Dictionary
+... \\_DateStampHasFourDigitYear
+...
+... \\lx kaa
+... \\ps V.A
+... \\ge gag
+... \\gp nek i pas
+...
+... \\lx kaa
+... \\ps V.B
+... \\ge strangle
+... \\gp pasim nek
+... """
+>>> td.open_string(s)
+>>> tree = td.parse(key='lx')
+>>> tree.tag
+'toolbox_data'
+>>> ElementTree.tostring(list(tree)[0]).decode('utf8')
+'<header><_sh>v3.0  400  Rotokas Dictionary</_sh><_DateStampHasFourDigitYear /></header>'
+>>> ElementTree.tostring(list(tree)[1]).decode('utf8')
+'<record><lx>kaa</lx><ps>V.A</ps><ge>gag</ge><gp>nek i pas</gp></record>'
+>>> ElementTree.tostring(list(tree)[2]).decode('utf8')
+'<record><lx>kaa</lx><ps>V.B</ps><ge>strangle</ge><gp>pasim nek</gp></record>'
+
+

check that guessing the key marker works:

+
>>> from xml.etree import ElementTree
+>>> td = toolbox.ToolboxData()
+>>> s = """\\_sh v3.0  400  Rotokas Dictionary
+... \\_DateStampHasFourDigitYear
+...
+... \\lx kaa
+... \\ps V.A
+... \\ge gag
+... \\gp nek i pas
+...
+... \\lx kaa
+... \\ps V.B
+... \\ge strangle
+... \\gp pasim nek
+... """
+>>> td.open_string(s)
+>>> tree = td.parse()
+>>> ElementTree.tostring(list(tree)[0]).decode('utf8')
+'<header><_sh>v3.0  400  Rotokas Dictionary</_sh><_DateStampHasFourDigitYear /></header>'
+>>> ElementTree.tostring(list(tree)[1]).decode('utf8')
+'<record><lx>kaa</lx><ps>V.A</ps><ge>gag</ge><gp>nek i pas</gp></record>'
+>>> ElementTree.tostring(list(tree)[2]).decode('utf8')
+'<record><lx>kaa</lx><ps>V.B</ps><ge>strangle</ge><gp>pasim nek</gp></record>'
+
+ + +
+

toolbox functions

+
+

toolbox.to_sfm_string()

+
+
+ + + + +
+
+ +
+ +
+ +
+
+ - + \ No newline at end of file diff --git a/howto/translate.html b/howto/translate.html new file mode 100644 index 000000000..d7042ac76 --- /dev/null +++ b/howto/translate.html @@ -0,0 +1,372 @@ + + + + + + + + + NLTK :: Sample usage for translate + + + + + + + + + + + + + + + + +
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + + + + + +
+ +
+
+ +
+

Sample usage for translate

+
+

Alignment

+
+

Corpus Reader

+
>>> from nltk.corpus import comtrans
+>>> words = comtrans.words('alignment-en-fr.txt')
+>>> for word in words[:6]:
+...     print(word)
+Resumption
+of
+the
+session
+I
+declare
+>>> als = comtrans.aligned_sents('alignment-en-fr.txt')[0]
+>>> als
+AlignedSent(['Resumption', 'of', 'the', 'session'],
+['Reprise', 'de', 'la', 'session'],
+Alignment([(0, 0), (1, 1), (2, 2), (3, 3)]))
+
+
+
+
+

Alignment Objects

+

Aligned sentences are simply a mapping between words in a sentence:

+
>>> print(" ".join(als.words))
+Resumption of the session
+>>> print(" ".join(als.mots))
+Reprise de la session
+>>> als.alignment
+Alignment([(0, 0), (1, 1), (2, 2), (3, 3)])
+
+
+

Usually we look at them from the perspective of a source to a target language, +but they are easily inverted:

+
>>> als.invert()
+AlignedSent(['Reprise', 'de', 'la', 'session'],
+['Resumption', 'of', 'the', 'session'],
+Alignment([(0, 0), (1, 1), (2, 2), (3, 3)]))
+
+
+

We can create new alignments, but these need to be in the correct range of +the corresponding sentences:

+
>>> from nltk.translate import Alignment, AlignedSent
+>>> als = AlignedSent(['Reprise', 'de', 'la', 'session'],
+...                   ['Resumption', 'of', 'the', 'session'],
+...                   Alignment([(0, 0), (1, 4), (2, 1), (3, 3)]))
+Traceback (most recent call last):
+    ...
+IndexError: Alignment is outside boundary of mots
+
+
+

You can set alignments with any sequence of tuples, so long as the first two +indexes of the tuple are the alignment indices:

+
>>> als.alignment = Alignment([(0, 0), (1, 1), (2, 2, "boat"), (3, 3, False, (1,2))])
+
+
+
>>> Alignment([(0, 0), (1, 1), (2, 2, "boat"), (3, 3, False, (1,2))])
+Alignment([(0, 0), (1, 1), (2, 2, 'boat'), (3, 3, False, (1, 2))])
+
+
+
+
+

Alignment Algorithms

+
+

EM for IBM Model 1

+

Here is an example from Koehn, 2010:

+
>>> from nltk.translate import IBMModel1
+>>> corpus = [AlignedSent(['the', 'house'], ['das', 'Haus']),
+...           AlignedSent(['the', 'book'], ['das', 'Buch']),
+...           AlignedSent(['a', 'book'], ['ein', 'Buch'])]
+>>> em_ibm1 = IBMModel1(corpus, 20)
+>>> print(round(em_ibm1.translation_table['the']['das'], 1))
+1.0
+>>> print(round(em_ibm1.translation_table['book']['das'], 1))
+0.0
+>>> print(round(em_ibm1.translation_table['house']['das'], 1))
+0.0
+>>> print(round(em_ibm1.translation_table['the']['Buch'], 1))
+0.0
+>>> print(round(em_ibm1.translation_table['book']['Buch'], 1))
+1.0
+>>> print(round(em_ibm1.translation_table['a']['Buch'], 1))
+0.0
+>>> print(round(em_ibm1.translation_table['book']['ein'], 1))
+0.0
+>>> print(round(em_ibm1.translation_table['a']['ein'], 1))
+1.0
+>>> print(round(em_ibm1.translation_table['the']['Haus'], 1))
+0.0
+>>> print(round(em_ibm1.translation_table['house']['Haus'], 1))
+1.0
+>>> print(round(em_ibm1.translation_table['book'][None], 1))
+0.5
+
+
+

And using an NLTK corpus. We train on only 10 sentences, since it is so slow:

+
>>> from nltk.corpus import comtrans
+>>> com_ibm1 = IBMModel1(comtrans.aligned_sents()[:10], 20)
+>>> print(round(com_ibm1.translation_table['bitte']['Please'], 1))
+0.2
+>>> print(round(com_ibm1.translation_table['Sitzungsperiode']['session'], 1))
+1.0
+
+
+
+
+
+

Evaluation

+

The evaluation metrics for alignments are usually not interested in the +contents of alignments but more often the comparison to a “gold standard” +alignment that has been been constructed by human experts. For this reason we +often want to work just with raw set operations against the alignment points. +This then gives us a very clean form for defining our evaluation metrics.

+
+

Note

+

The AlignedSent class has no distinction of “possible” or “sure” +alignments. Thus all alignments are treated as “sure”.

+
+

Consider the following aligned sentence for evaluation:

+
>>> my_als = AlignedSent(['Resumption', 'of', 'the', 'session'],
+...     ['Reprise', 'de', 'la', 'session'],
+...     Alignment([(0, 0), (3, 3), (1, 2), (1, 1), (1, 3)]))
+
+
+
+

Precision

+

precision = |A∩P| / |A|

+

Precision is probably the most well known evaluation metric and it is implemented +in nltk.metrics.scores.precision. Since precision is simply interested in the +proportion of correct alignments, we calculate the ratio of the number of our +test alignments (A) that match a possible alignment (P), over the number of +test alignments provided. There is no penalty for missing a possible alignment +in our test alignments. An easy way to game this metric is to provide just one +test alignment that is in P [OCH2000].

+

Here are some examples:

+
>>> from nltk.metrics import precision
+>>> als.alignment = Alignment([(0,0), (1,1), (2,2), (3,3)])
+>>> precision(Alignment([]), als.alignment)
+0.0
+>>> precision(Alignment([(0,0), (1,1), (2,2), (3,3)]), als.alignment)
+1.0
+>>> precision(Alignment([(0,0), (3,3)]), als.alignment)
+0.5
+>>> precision(Alignment.fromstring('0-0 3-3'), als.alignment)
+0.5
+>>> precision(Alignment([(0,0), (1,1), (2,2), (3,3), (1,2), (2,1)]), als.alignment)
+1.0
+>>> precision(als.alignment, my_als.alignment)
+0.6
+
+
+
+
+

Recall

+

recall = |A∩S| / |S|

+

Recall is another well known evaluation metric that has a set based +implementation in NLTK as nltk.metrics.scores.recall. Since recall is +simply interested in the proportion of found alignments, we calculate the +ratio of the number of our test alignments (A) that match a sure alignment +(S) over the number of sure alignments. There is no penalty for producing +a lot of test alignments. An easy way to game this metric is to include every +possible alignment in our test alignments, regardless if they are correct or +not [OCH2000].

+

Here are some examples:

+
>>> from nltk.metrics import recall
+>>> print(recall(Alignment([]), als.alignment))
+None
+>>> recall(Alignment([(0,0), (1,1), (2,2), (3,3)]), als.alignment)
+1.0
+>>> recall(Alignment.fromstring('0-0 3-3'), als.alignment)
+1.0
+>>> recall(Alignment([(0,0), (3,3)]), als.alignment)
+1.0
+>>> recall(Alignment([(0,0), (1,1), (2,2), (3,3), (1,2), (2,1)]), als.alignment)
+0.66666...
+>>> recall(als.alignment, my_als.alignment)
+0.75
+
+
+
+
+

Alignment Error Rate (AER)

+

AER = 1 - (|A∩S| + |A∩P|) / (|A| + |S|)

+

Alignment Error Rate is commonly used metric for assessing sentence +alignments. It combines precision and recall metrics together such that a +perfect alignment must have all of the sure alignments and may have some +possible alignments [MIHALCEA2003] [KOEHN2010].

+
+

Note

+

[KOEHN2010] defines the AER as AER = (|A∩S| + |A∩P|) / (|A| + |S|) +in his book, but corrects it to the above in his online errata. This is +in line with [MIHALCEA2003].

+
+

Here are some examples:

+
>>> from nltk.translate import alignment_error_rate
+>>> alignment_error_rate(Alignment([]), als.alignment)
+1.0
+>>> alignment_error_rate(Alignment([(0,0), (1,1), (2,2), (3,3)]), als.alignment)
+0.0
+>>> alignment_error_rate(als.alignment, my_als.alignment)
+0.333333...
+>>> alignment_error_rate(als.alignment, my_als.alignment,
+...     als.alignment | Alignment([(1,2), (2,1)]))
+0.222222...
+
+
+
+
OCH2000(1,2)
+

Och, F. and Ney, H. (2000) +Statistical Machine Translation, EAMT Workshop

+
+
MIHALCEA2003(1,2)
+

Mihalcea, R. and Pedersen, T. (2003) +An evaluation exercise for word alignment, HLT-NAACL 2003

+
+
KOEHN2010(1,2)
+

Koehn, P. (2010) +Statistical Machine Translation, Cambridge University Press

+
+
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ + + \ No newline at end of file diff --git a/howto/tree.html b/howto/tree.html index 40dcc77bb..9338ed6e9 100644 --- a/howto/tree.html +++ b/howto/tree.html @@ -1,1571 +1,1306 @@ - - - + - - -Unit tests for nltk.tree.Tree - + + + + + + + NLTK :: Sample usage for tree + + + + + + + + + + + + + + -
-

Unit tests for nltk.tree.Tree

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> from nltk.tree import *
-
-
+ + + +
+ +
+
+ +
+

Sample usage for tree

+
+

Unit tests for nltk.tree.Tree

+
>>> from nltk.tree import *
+
+

Some trees to run tests on:

-
-
->>> dp1 = Tree('dp', [Tree('d', ['the']), Tree('np', ['dog'])])
->>> dp2 = Tree('dp', [Tree('d', ['the']), Tree('np', ['cat'])])
->>> vp = Tree('vp', [Tree('v', ['chased']), dp2])
->>> tree = Tree('s', [dp1, vp])
->>> print(tree)
-(s (dp (d the) (np dog)) (vp (v chased) (dp (d the) (np cat))))
-
-
+
>>> dp1 = Tree('dp', [Tree('d', ['the']), Tree('np', ['dog'])])
+>>> dp2 = Tree('dp', [Tree('d', ['the']), Tree('np', ['cat'])])
+>>> vp = Tree('vp', [Tree('v', ['chased']), dp2])
+>>> tree = Tree('s', [dp1, vp])
+>>> print(tree)
+(s (dp (d the) (np dog)) (vp (v chased) (dp (d the) (np cat))))
+
+

The node label is accessed using the label() method:

-
-
->>> dp1.label(), dp2.label(), vp.label(), tree.label()
-('dp', 'dp', 'vp', 's')
-
-
->>> print(tree[1,1,1,0])
-cat
-
-
+
>>> dp1.label(), dp2.label(), vp.label(), tree.label()
+('dp', 'dp', 'vp', 's')
+
+
+
>>> print(tree[1,1,1,0])
+cat
+
+

The treepositions method returns a list of the tree positions of subtrees and leaves in a tree. By default, it gives the position of every tree, subtree, and leaf, in prefix order:

-
-
->>> print(tree.treepositions())
-[(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0), (1, 1, 0, 0), (1, 1, 1), (1, 1, 1, 0)]
-
-
+
>>> print(tree.treepositions())
+[(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0), (1, 1, 0, 0), (1, 1, 1), (1, 1, 1, 0)]
+
+

In addition to str and repr, several methods exist to convert a tree object to one of several standard tree encodings:

-
-
->>> print(tree.pformat_latex_qtree())
-\Tree [.s
-        [.dp [.d the ] [.np dog ] ]
-        [.vp [.v chased ] [.dp [.d the ] [.np cat ] ] ] ]
-
-
+
>>> print(tree.pformat_latex_qtree())
+\Tree [.s
+        [.dp [.d the ] [.np dog ] ]
+        [.vp [.v chased ] [.dp [.d the ] [.np cat ] ] ] ]
+
+

There is also a fancy ASCII art representation:

-
-
->>> tree.pretty_print()
-              s
-      ________|_____
-     |              vp
-     |         _____|___
-     dp       |         dp
-  ___|___     |      ___|___
- d       np   v     d       np
- |       |    |     |       |
-the     dog chased the     cat
-
-
->>> tree.pretty_print(unicodelines=True, nodedist=4)
-                       s
-        ┌──────────────┴────────┐
-        │                       vp
-        │              ┌────────┴──────┐
-        dp             │               dp
- ┌──────┴──────┐       │        ┌──────┴──────┐
- d             np      v        d             np
- │             │       │        │             │
-the           dog    chased    the           cat
-
-
+
>>> tree.pretty_print()
+              s
+      ________|_____
+     |              vp
+     |         _____|___
+     dp       |         dp
+  ___|___     |      ___|___
+ d       np   v     d       np
+ |       |    |     |       |
+the     dog chased the     cat
+
+
+
>>> tree.pretty_print(unicodelines=True, nodedist=4)
+                       s
+        ┌──────────────┴────────┐
+        │                       vp
+        │              ┌────────┴──────┐
+        dp             │               dp
+ ┌──────┴──────┐       │        ┌──────┴──────┐
+ d             np      v        d             np
+ │             │       │        │             │
+the           dog    chased    the           cat
+
+

Trees can be initialized from treebank strings:

-
-
->>> tree2 = Tree.fromstring('(S (NP I) (VP (V enjoyed) (NP my cookie)))')
->>> print(tree2)
-(S (NP I) (VP (V enjoyed) (NP my cookie)))
-
-
+
>>> tree2 = Tree.fromstring('(S (NP I) (VP (V enjoyed) (NP my cookie)))')
+>>> print(tree2)
+(S (NP I) (VP (V enjoyed) (NP my cookie)))
+
+

Trees can be compared for equality:

-
-
->>> tree == Tree.fromstring(str(tree))
-True
->>> tree2 == Tree.fromstring(str(tree2))
-True
->>> tree == tree2
-False
->>> tree == Tree.fromstring(str(tree2))
-False
->>> tree2 == Tree.fromstring(str(tree))
-False
-
-
->>> tree != Tree.fromstring(str(tree))
-False
->>> tree2 != Tree.fromstring(str(tree2))
-False
->>> tree != tree2
-True
->>> tree != Tree.fromstring(str(tree2))
-True
->>> tree2 != Tree.fromstring(str(tree))
-True
-
-
->>> tree < tree2 or tree > tree2
-True
-
-
-
-

Tree Parsing

+
>>> tree == Tree.fromstring(str(tree))
+True
+>>> tree2 == Tree.fromstring(str(tree2))
+True
+>>> tree == tree2
+False
+>>> tree == Tree.fromstring(str(tree2))
+False
+>>> tree2 == Tree.fromstring(str(tree))
+False
+
+
+
>>> tree != Tree.fromstring(str(tree))
+False
+>>> tree2 != Tree.fromstring(str(tree2))
+False
+>>> tree != tree2
+True
+>>> tree != Tree.fromstring(str(tree2))
+True
+>>> tree2 != Tree.fromstring(str(tree))
+True
+
+
+
>>> tree < tree2 or tree > tree2
+True
+
+
+
+

Tree Parsing

The class method Tree.fromstring() can be used to parse trees, and it provides some additional options.

-
-
->>> tree = Tree.fromstring('(S (NP I) (VP (V enjoyed) (NP my cookie)))')
->>> print(tree)
-(S (NP I) (VP (V enjoyed) (NP my cookie)))
-
-
+
>>> tree = Tree.fromstring('(S (NP I) (VP (V enjoyed) (NP my cookie)))')
+>>> print(tree)
+(S (NP I) (VP (V enjoyed) (NP my cookie)))
+
+

When called on a subclass of Tree, it will create trees of that type:

-
-
->>> tree = ImmutableTree.fromstring('(VP (V enjoyed) (NP my cookie))')
->>> print(tree)
-(VP (V enjoyed) (NP my cookie))
->>> print(type(tree))
-<class 'nltk.tree.ImmutableTree'>
->>> tree[1] = 'x'
-Traceback (most recent call last):
-  . . .
-ValueError: ImmutableTree may not be modified
->>> del tree[0]
-Traceback (most recent call last):
-  . . .
-ValueError: ImmutableTree may not be modified
-
-
-

The brackets parameter can be used to specify two characters that +

>>> tree = ImmutableTree.fromstring('(VP (V enjoyed) (NP my cookie))')
+>>> print(tree)
+(VP (V enjoyed) (NP my cookie))
+>>> print(type(tree))
+<class 'nltk.tree.ImmutableTree'>
+>>> tree[1] = 'x'
+Traceback (most recent call last):
+  . . .
+ValueError: ImmutableTree may not be modified
+>>> del tree[0]
+Traceback (most recent call last):
+  . . .
+ValueError: ImmutableTree may not be modified
+
+
+

The brackets parameter can be used to specify two characters that should be used as brackets:

-
-
->>> print(Tree.fromstring('[S [NP I] [VP [V enjoyed] [NP my cookie]]]',
-...                  brackets='[]'))
-(S (NP I) (VP (V enjoyed) (NP my cookie)))
->>> print(Tree.fromstring('<S <NP I> <VP <V enjoyed> <NP my cookie>>>',
-...                  brackets='<>'))
-(S (NP I) (VP (V enjoyed) (NP my cookie)))
-
-
-

If brackets is not a string, or is not exactly two characters, +

>>> print(Tree.fromstring('[S [NP I] [VP [V enjoyed] [NP my cookie]]]',
+...                  brackets='[]'))
+(S (NP I) (VP (V enjoyed) (NP my cookie)))
+>>> print(Tree.fromstring('<S <NP I> <VP <V enjoyed> <NP my cookie>>>',
+...                  brackets='<>'))
+(S (NP I) (VP (V enjoyed) (NP my cookie)))
+
+
+

If brackets is not a string, or is not exactly two characters, then Tree.fromstring raises an exception:

-
-
->>> Tree.fromstring('<VP <V enjoyed> <NP my cookie>>', brackets='')
-Traceback (most recent call last):
-  . . .
-TypeError: brackets must be a length-2 string
->>> Tree.fromstring('<VP <V enjoyed> <NP my cookie>>', brackets='<<>>')
-Traceback (most recent call last):
-  . . .
-TypeError: brackets must be a length-2 string
->>> Tree.fromstring('<VP <V enjoyed> <NP my cookie>>', brackets=12)
-Traceback (most recent call last):
-  . . .
-TypeError: brackets must be a length-2 string
->>> Tree.fromstring('<<NP my cookie>>', brackets=('<<','>>'))
-Traceback (most recent call last):
-  . . .
-TypeError: brackets must be a length-2 string
-
-
+
>>> Tree.fromstring('<VP <V enjoyed> <NP my cookie>>', brackets='')
+Traceback (most recent call last):
+  . . .
+TypeError: brackets must be a length-2 string
+>>> Tree.fromstring('<VP <V enjoyed> <NP my cookie>>', brackets='<<>>')
+Traceback (most recent call last):
+  . . .
+TypeError: brackets must be a length-2 string
+>>> Tree.fromstring('<VP <V enjoyed> <NP my cookie>>', brackets=12)
+Traceback (most recent call last):
+  . . .
+TypeError: brackets must be a length-2 string
+>>> Tree.fromstring('<<NP my cookie>>', brackets=('<<','>>'))
+Traceback (most recent call last):
+  . . .
+TypeError: brackets must be a length-2 string
+
+

(We may add support for multi-character brackets in the future, in -which case the brackets=('<<','>>') example would start working.)

+which case the brackets=('<<','>>') example would start working.)

Whitespace brackets are not permitted:

-
-
->>> Tree.fromstring('(NP my cookie\n', brackets='(\n')
-Traceback (most recent call last):
-  . . .
-TypeError: whitespace brackets not allowed
-
-
+
>>> Tree.fromstring('(NP my cookie\n', brackets='(\n')
+Traceback (most recent call last):
+  . . .
+TypeError: whitespace brackets not allowed
+
+

If an invalid tree is given to Tree.fromstring, then it raises a ValueError, with a description of the problem:

-
-
->>> Tree.fromstring('(NP my cookie) (NP my milk)')
-Traceback (most recent call last):
-  . . .
-ValueError: Tree.fromstring(): expected 'end-of-string' but got '(NP'
-            at index 15.
-                "...y cookie) (NP my mil..."
-                              ^
->>> Tree.fromstring(')NP my cookie(')
-Traceback (most recent call last):
-  . . .
-ValueError: Tree.fromstring(): expected '(' but got ')'
-            at index 0.
-                ")NP my coo..."
-                 ^
->>> Tree.fromstring('(NP my cookie))')
-Traceback (most recent call last):
-  . . .
-ValueError: Tree.fromstring(): expected 'end-of-string' but got ')'
-            at index 14.
-                "...my cookie))"
-                              ^
->>> Tree.fromstring('my cookie)')
-Traceback (most recent call last):
-  . . .
-ValueError: Tree.fromstring(): expected '(' but got 'my'
-            at index 0.
-                "my cookie)"
-                 ^
->>> Tree.fromstring('(NP my cookie')
-Traceback (most recent call last):
-  . . .
-ValueError: Tree.fromstring(): expected ')' but got 'end-of-string'
-            at index 13.
-                "... my cookie"
-                              ^
->>> Tree.fromstring('')
-Traceback (most recent call last):
-  . . .
-ValueError: Tree.fromstring(): expected '(' but got 'end-of-string'
-            at index 0.
-                ""
-                 ^
-
-
+
>>> Tree.fromstring('(NP my cookie) (NP my milk)')
+Traceback (most recent call last):
+  . . .
+ValueError: Tree.fromstring(): expected 'end-of-string' but got '(NP'
+            at index 15.
+                "...y cookie) (NP my mil..."
+                              ^
+>>> Tree.fromstring(')NP my cookie(')
+Traceback (most recent call last):
+  . . .
+ValueError: Tree.fromstring(): expected '(' but got ')'
+            at index 0.
+                ")NP my coo..."
+                 ^
+>>> Tree.fromstring('(NP my cookie))')
+Traceback (most recent call last):
+  . . .
+ValueError: Tree.fromstring(): expected 'end-of-string' but got ')'
+            at index 14.
+                "...my cookie))"
+                              ^
+>>> Tree.fromstring('my cookie)')
+Traceback (most recent call last):
+  . . .
+ValueError: Tree.fromstring(): expected '(' but got 'my'
+            at index 0.
+                "my cookie)"
+                 ^
+>>> Tree.fromstring('(NP my cookie')
+Traceback (most recent call last):
+  . . .
+ValueError: Tree.fromstring(): expected ')' but got 'end-of-string'
+            at index 13.
+                "... my cookie"
+                              ^
+>>> Tree.fromstring('')
+Traceback (most recent call last):
+  . . .
+ValueError: Tree.fromstring(): expected '(' but got 'end-of-string'
+            at index 0.
+                ""
+                 ^
+
+

Trees with no children are supported:

-
-
->>> print(Tree.fromstring('(S)'))
-(S )
->>> print(Tree.fromstring('(X (Y) (Z))'))
-(X (Y ) (Z ))
-
-
+
>>> print(Tree.fromstring('(S)'))
+(S )
+>>> print(Tree.fromstring('(X (Y) (Z))'))
+(X (Y ) (Z ))
+
+

Trees with an empty node label and no children are supported:

-
-
->>> print(Tree.fromstring('()'))
-( )
->>> print(Tree.fromstring('(X () ())'))
-(X ( ) ( ))
-
-
+
>>> print(Tree.fromstring('()'))
+( )
+>>> print(Tree.fromstring('(X () ())'))
+(X ( ) ( ))
+
+

Trees with an empty node label and children are supported, but only if the first child is not a leaf (otherwise, it will be treated as the node label).

-
-
->>> print(Tree.fromstring('((A) (B) (C))'))
-( (A ) (B ) (C ))
->>> print(Tree.fromstring('((A) leaf)'))
-( (A ) leaf)
->>> print(Tree.fromstring('(((())))'))
-( ( ( ( ))))
-
-
+
>>> print(Tree.fromstring('((A) (B) (C))'))
+( (A ) (B ) (C ))
+>>> print(Tree.fromstring('((A) leaf)'))
+( (A ) leaf)
+>>> print(Tree.fromstring('(((())))'))
+( ( ( ( ))))
+
+

The optional arguments read_node and read_leaf may be used to transform the string values of nodes or leaves.

-
-
->>> print(Tree.fromstring('(A b (C d e) (F (G h i)))',
-...                  read_node=lambda s: '<%s>' % s,
-...                  read_leaf=lambda s: '"%s"' % s))
-(<A> "b" (<C> "d" "e") (<F> (<G> "h" "i")))
-
-
+
>>> print(Tree.fromstring('(A b (C d e) (F (G h i)))',
+...                  read_node=lambda s: '<%s>' % s,
+...                  read_leaf=lambda s: '"%s"' % s))
+(<A> "b" (<C> "d" "e") (<F> (<G> "h" "i")))
+
+

These transformation functions are typically used when the node or leaf labels should be parsed to a non-string value (such as a feature structure). If node and leaf labels need to be able to include whitespace, then you must also use the optional node_pattern and leaf_pattern arguments.

-
-
->>> from nltk.featstruct import FeatStruct
->>> tree = Tree.fromstring('([cat=NP] [lex=the] [lex=dog])',
-...                   read_node=FeatStruct, read_leaf=FeatStruct)
->>> tree.set_label(tree.label().unify(FeatStruct('[num=singular]')))
->>> print(tree)
-([cat='NP', num='singular'] [lex='the'] [lex='dog'])
-
-
-

The optional argument remove_empty_top_bracketing can be used to +

>>> from nltk.featstruct import FeatStruct
+>>> tree = Tree.fromstring('([cat=NP] [lex=the] [lex=dog])',
+...                   read_node=FeatStruct, read_leaf=FeatStruct)
+>>> tree.set_label(tree.label().unify(FeatStruct('[num=singular]')))
+>>> print(tree)
+([cat='NP', num='singular'] [lex='the'] [lex='dog'])
+
+
+

The optional argument remove_empty_top_bracketing can be used to remove any top-level empty bracketing that occurs.

-
-
->>> print(Tree.fromstring('((S (NP I) (VP (V enjoyed) (NP my cookie))))',
-...                  remove_empty_top_bracketing=True))
-(S (NP I) (VP (V enjoyed) (NP my cookie)))
-
-
+
>>> print(Tree.fromstring('((S (NP I) (VP (V enjoyed) (NP my cookie))))',
+...                  remove_empty_top_bracketing=True))
+(S (NP I) (VP (V enjoyed) (NP my cookie)))
+
+

It will not remove a top-level empty bracketing with multiple children:

-
-
->>> print(Tree.fromstring('((A a) (B b))'))
-( (A a) (B b))
-
-
-
-
-

Parented Trees

+
>>> print(Tree.fromstring('((A a) (B b))'))
+( (A a) (B b))
+
+
+
+

Tree.fromlist()

+

The class method Tree.fromlist() can be used to parse trees +that are expressed as nested lists, such as those produced by +the tree() function from the wordnet module.

+
>>> from nltk.corpus import wordnet as wn
+>>> t=Tree.fromlist(wn.synset('dog.n.01').tree(lambda s:s.hypernyms()))
+>>> print(t.height())
+14
+>>> print(t.leaves())
+["Synset('entity.n.01')", "Synset('entity.n.01')"]
+>>> t.pretty_print()
+                  Synset('dog.n.01')
+         _________________|__________________
+Synset('canine.n.                            |
+       02')                                  |
+        |                                    |
+ Synset('carnivor                            |
+     e.n.01')                                |
+        |                                    |
+ Synset('placenta                            |
+     l.n.01')                                |
+        |                                    |
+Synset('mammal.n.                            |
+       01')                                  |
+        |                                    |
+ Synset('vertebra                            |
+    te.n.01')                                |
+        |                                    |
+Synset('chordate.                     Synset('domestic
+      n.01')                           _animal.n.01')
+        |                                    |
+Synset('animal.n.                    Synset('animal.n.
+       01')                                 01')
+        |                                    |
+Synset('organism.                    Synset('organism.
+      n.01')                               n.01')
+        |                                    |
+ Synset('living_t                     Synset('living_t
+   hing.n.01')                          hing.n.01')
+        |                                    |
+ Synset('whole.n.                     Synset('whole.n.
+       02')                                 02')
+        |                                    |
+Synset('object.n.                    Synset('object.n.
+       01')                                 01')
+        |                                    |
+ Synset('physical                     Synset('physical
+  _entity.n.01')                       _entity.n.01')
+        |                                    |
+Synset('entity.n.                    Synset('entity.n.
+       01')                                 01')
+
+
+
+
+
+

Parented Trees

ParentedTree is a subclass of Tree that automatically maintains parent pointers for single-parented trees. Parented trees can be created directly from a node label and a list of children:

-
-
->>> ptree = (
-...     ParentedTree('VP', [
-...         ParentedTree('VERB', ['saw']),
-...         ParentedTree('NP', [
-...             ParentedTree('DET', ['the']),
-...             ParentedTree('NOUN', ['dog'])])]))
->>> print(ptree)
-(VP (VERB saw) (NP (DET the) (NOUN dog)))
-
-
+
>>> ptree = (
+...     ParentedTree('VP', [
+...         ParentedTree('VERB', ['saw']),
+...         ParentedTree('NP', [
+...             ParentedTree('DET', ['the']),
+...             ParentedTree('NOUN', ['dog'])])]))
+>>> print(ptree)
+(VP (VERB saw) (NP (DET the) (NOUN dog)))
+
+

Parented trees can be created from strings using the classmethod ParentedTree.fromstring:

-
-
->>> ptree = ParentedTree.fromstring('(VP (VERB saw) (NP (DET the) (NOUN dog)))')
->>> print(ptree)
-(VP (VERB saw) (NP (DET the) (NOUN dog)))
->>> print(type(ptree))
-<class 'nltk.tree.ParentedTree'>
-
-
+
>>> ptree = ParentedTree.fromstring('(VP (VERB saw) (NP (DET the) (NOUN dog)))')
+>>> print(ptree)
+(VP (VERB saw) (NP (DET the) (NOUN dog)))
+>>> print(type(ptree))
+<class 'nltk.tree.ParentedTree'>
+
+

Parented trees can also be created by using the classmethod ParentedTree.convert to convert another type of tree to a parented tree:

-
-
->>> tree = Tree.fromstring('(VP (VERB saw) (NP (DET the) (NOUN dog)))')
->>> ptree = ParentedTree.convert(tree)
->>> print(ptree)
-(VP (VERB saw) (NP (DET the) (NOUN dog)))
->>> print(type(ptree))
-<class 'nltk.tree.ParentedTree'>
-
-
- +
>>> tree = Tree.fromstring('(VP (VERB saw) (NP (DET the) (NOUN dog)))')
+>>> ptree = ParentedTree.convert(tree)
+>>> print(ptree)
+(VP (VERB saw) (NP (DET the) (NOUN dog)))
+>>> print(type(ptree))
+<class 'nltk.tree.ParentedTree'>
+
+

ParentedTrees should never be used in the same tree as Trees or MultiParentedTrees. Mixing tree implementations may result in incorrect parent pointers and in TypeError exceptions:

-
-
->>> # Inserting a Tree in a ParentedTree gives an exception:
->>> ParentedTree('NP', [
-...     Tree('DET', ['the']), Tree('NOUN', ['dog'])])
-Traceback (most recent call last):
-  . . .
-TypeError: Can not insert a non-ParentedTree into a ParentedTree
-
-
->>> # inserting a ParentedTree in a Tree gives incorrect parent pointers:
->>> broken_tree = Tree('NP', [
-...     ParentedTree('DET', ['the']), ParentedTree('NOUN', ['dog'])])
->>> print(broken_tree[0].parent())
-None
-
-
-
-

Parented Tree Methods

+
>>> # Inserting a Tree in a ParentedTree gives an exception:
+>>> ParentedTree('NP', [
+...     Tree('DET', ['the']), Tree('NOUN', ['dog'])])
+Traceback (most recent call last):
+  . . .
+TypeError: Can not insert a non-ParentedTree into a ParentedTree
+
+
+
>>> # inserting a ParentedTree in a Tree gives incorrect parent pointers:
+>>> broken_tree = Tree('NP', [
+...     ParentedTree('DET', ['the']), ParentedTree('NOUN', ['dog'])])
+>>> print(broken_tree[0].parent())
+None
+
+
+
+

Parented Tree Methods

In addition to all the methods defined by the Tree class, the ParentedTree class adds six new methods whose values are -automatically updated whenver a parented tree is modified: parent(), +automatically updated whenever a parented tree is modified: parent(), parent_index(), left_sibling(), right_sibling(), root(), and treeposition().

-

The parent() method contains a ParentedTree's parent, if it has -one; and None otherwise. ParentedTrees that do not have -parents are known as "root trees."

-
-
->>> for subtree in ptree.subtrees():
-...     print(subtree)
-...     print('  Parent = %s' % subtree.parent())
-(VP (VERB saw) (NP (DET the) (NOUN dog)))
-  Parent = None
-(VERB saw)
-  Parent = (VP (VERB saw) (NP (DET the) (NOUN dog)))
-(NP (DET the) (NOUN dog))
-  Parent = (VP (VERB saw) (NP (DET the) (NOUN dog)))
-(DET the)
-  Parent = (NP (DET the) (NOUN dog))
-(NOUN dog)
-  Parent = (NP (DET the) (NOUN dog))
-
-
-

The parent_index() method stores the index of a tree in its parent's +

The parent() method contains a ParentedTree‘s parent, if it has +one; and None otherwise. ParentedTrees that do not have +parents are known as “root trees.”

+
>>> for subtree in ptree.subtrees():
+...     print(subtree)
+...     print('  Parent = %s' % subtree.parent())
+(VP (VERB saw) (NP (DET the) (NOUN dog)))
+  Parent = None
+(VERB saw)
+  Parent = (VP (VERB saw) (NP (DET the) (NOUN dog)))
+(NP (DET the) (NOUN dog))
+  Parent = (VP (VERB saw) (NP (DET the) (NOUN dog)))
+(DET the)
+  Parent = (NP (DET the) (NOUN dog))
+(NOUN dog)
+  Parent = (NP (DET the) (NOUN dog))
+
+
+

The parent_index() method stores the index of a tree in its parent’s child list. If a tree does not have a parent, then its parent_index -is None.

-
-
->>> for subtree in ptree.subtrees():
-...     print(subtree)
-...     print('  Parent Index = %s' % subtree.parent_index())
-...     assert (subtree.parent() is None or
-...             subtree.parent()[subtree.parent_index()] is subtree)
-(VP (VERB saw) (NP (DET the) (NOUN dog)))
-  Parent Index = None
-(VERB saw)
-  Parent Index = 0
-(NP (DET the) (NOUN dog))
-  Parent Index = 1
-(DET the)
-  Parent Index = 0
-(NOUN dog)
-  Parent Index = 1
-
-
-

Note that ptree.parent().index(ptree) is not equivalent to -ptree.parent_index(). In particular, ptree.parent().index(ptree) -will return the index of the first child of ptree.parent() that is -equal to ptree (using ==); and that child may not be -ptree:

-
-
->>> on_and_on = ParentedTree('CONJP', [
-...     ParentedTree('PREP', ['on']),
-...     ParentedTree('COJN', ['and']),
-...     ParentedTree('PREP', ['on'])])
->>> second_on = on_and_on[2]
->>> print(second_on.parent_index())
-2
->>> print(second_on.parent().index(second_on))
-0
-
-
+is None.

+
>>> for subtree in ptree.subtrees():
+...     print(subtree)
+...     print('  Parent Index = %s' % subtree.parent_index())
+...     assert (subtree.parent() is None or
+...             subtree.parent()[subtree.parent_index()] is subtree)
+(VP (VERB saw) (NP (DET the) (NOUN dog)))
+  Parent Index = None
+(VERB saw)
+  Parent Index = 0
+(NP (DET the) (NOUN dog))
+  Parent Index = 1
+(DET the)
+  Parent Index = 0
+(NOUN dog)
+  Parent Index = 1
+
+
+

Note that ptree.parent().index(ptree) is not equivalent to +ptree.parent_index(). In particular, ptree.parent().index(ptree) +will return the index of the first child of ptree.parent() that is +equal to ptree (using ==); and that child may not be +ptree:

+
>>> on_and_on = ParentedTree('CONJP', [
+...     ParentedTree('PREP', ['on']),
+...     ParentedTree('COJN', ['and']),
+...     ParentedTree('PREP', ['on'])])
+>>> second_on = on_and_on[2]
+>>> print(second_on.parent_index())
+2
+>>> print(second_on.parent().index(second_on))
+0
+
+

The methods left_sibling() and right_sibling() can be used to get a -parented tree's siblings. If a tree does not have a left or right -sibling, then the corresponding method's value is None:

-
-
->>> for subtree in ptree.subtrees():
-...     print(subtree)
-...     print('  Left Sibling  = %s' % subtree.left_sibling())
-...     print('  Right Sibling = %s' % subtree.right_sibling())
-(VP (VERB saw) (NP (DET the) (NOUN dog)))
-  Left Sibling  = None
-  Right Sibling = None
-(VERB saw)
-  Left Sibling  = None
-  Right Sibling = (NP (DET the) (NOUN dog))
-(NP (DET the) (NOUN dog))
-  Left Sibling  = (VERB saw)
-  Right Sibling = None
-(DET the)
-  Left Sibling  = None
-  Right Sibling = (NOUN dog)
-(NOUN dog)
-  Left Sibling  = (DET the)
-  Right Sibling = None
-
-
-

A parented tree's root tree can be accessed using the root() -method. This method follows the tree's parent pointers until it +parented tree’s siblings. If a tree does not have a left or right +sibling, then the corresponding method’s value is None:

+
>>> for subtree in ptree.subtrees():
+...     print(subtree)
+...     print('  Left Sibling  = %s' % subtree.left_sibling())
+...     print('  Right Sibling = %s' % subtree.right_sibling())
+(VP (VERB saw) (NP (DET the) (NOUN dog)))
+  Left Sibling  = None
+  Right Sibling = None
+(VERB saw)
+  Left Sibling  = None
+  Right Sibling = (NP (DET the) (NOUN dog))
+(NP (DET the) (NOUN dog))
+  Left Sibling  = (VERB saw)
+  Right Sibling = None
+(DET the)
+  Left Sibling  = None
+  Right Sibling = (NOUN dog)
+(NOUN dog)
+  Left Sibling  = (DET the)
+  Right Sibling = None
+
+
+

A parented tree’s root tree can be accessed using the root() +method. This method follows the tree’s parent pointers until it finds a tree without a parent. If a tree does not have a parent, then it is its own root:

-
-
->>> for subtree in ptree.subtrees():
-...     print(subtree)
-...     print('  Root = %s' % subtree.root())
-(VP (VERB saw) (NP (DET the) (NOUN dog)))
-  Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
-(VERB saw)
-  Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
-(NP (DET the) (NOUN dog))
-  Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
-(DET the)
-  Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
-(NOUN dog)
-  Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
-
-
-

The treeposition() method can be used to find a tree's treeposition +

>>> for subtree in ptree.subtrees():
+...     print(subtree)
+...     print('  Root = %s' % subtree.root())
+(VP (VERB saw) (NP (DET the) (NOUN dog)))
+  Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
+(VERB saw)
+  Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
+(NP (DET the) (NOUN dog))
+  Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
+(DET the)
+  Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
+(NOUN dog)
+  Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
+
+
+

The treeposition() method can be used to find a tree’s treeposition relative to its root:

-
-
->>> for subtree in ptree.subtrees():
-...     print(subtree)
-...     print('  Tree Position = %s' % (subtree.treeposition(),))
-...     assert subtree.root()[subtree.treeposition()] is subtree
-(VP (VERB saw) (NP (DET the) (NOUN dog)))
-  Tree Position = ()
-(VERB saw)
-  Tree Position = (0,)
-(NP (DET the) (NOUN dog))
-  Tree Position = (1,)
-(DET the)
-  Tree Position = (1, 0)
-(NOUN dog)
-  Tree Position = (1, 1)
-
-
+
>>> for subtree in ptree.subtrees():
+...     print(subtree)
+...     print('  Tree Position = %s' % (subtree.treeposition(),))
+...     assert subtree.root()[subtree.treeposition()] is subtree
+(VP (VERB saw) (NP (DET the) (NOUN dog)))
+  Tree Position = ()
+(VERB saw)
+  Tree Position = (0,)
+(NP (DET the) (NOUN dog))
+  Tree Position = (1,)
+(DET the)
+  Tree Position = (1, 0)
+(NOUN dog)
+  Tree Position = (1, 1)
+
+

Whenever a parented tree is modified, all of the methods described above (parent(), parent_index(), left_sibling(), right_sibling(), root(), and treeposition()) are automatically updated. For example, -if we replace ptree's subtree for the word "dog" with a new -subtree for "cat," the method values for both the "dog" subtree and the -"cat" subtree get automatically updated:

-
-
->>> # Replace the dog with a cat
->>> dog = ptree[1,1]
->>> cat = ParentedTree('NOUN', ['cat'])
->>> ptree[1,1] = cat
-
-
->>> # the noun phrase is no longer the dog's parent:
->>> print(dog.parent(), dog.parent_index(), dog.left_sibling())
-None None None
->>> # dog is now its own root.
->>> print(dog.root())
-(NOUN dog)
->>> print(dog.treeposition())
-()
-
-
->>> # the cat's parent is now the noun phrase:
->>> print(cat.parent())
-(NP (DET the) (NOUN cat))
->>> print(cat.parent_index())
-1
->>> print(cat.left_sibling())
-(DET the)
->>> print(cat.root())
-(VP (VERB saw) (NP (DET the) (NOUN cat)))
->>> print(cat.treeposition())
-(1, 1)
-
-
-
-
-

ParentedTree Regression Tests

+if we replace ptree‘s subtree for the word “dog” with a new +subtree for “cat,” the method values for both the “dog” subtree and the +“cat” subtree get automatically updated:

+
>>> # Replace the dog with a cat
+>>> dog = ptree[1,1]
+>>> cat = ParentedTree('NOUN', ['cat'])
+>>> ptree[1,1] = cat
+
+
+
>>> # the noun phrase is no longer the dog's parent:
+>>> print(dog.parent(), dog.parent_index(), dog.left_sibling())
+None None None
+>>> # dog is now its own root.
+>>> print(dog.root())
+(NOUN dog)
+>>> print(dog.treeposition())
+()
+
+
+
>>> # the cat's parent is now the noun phrase:
+>>> print(cat.parent())
+(NP (DET the) (NOUN cat))
+>>> print(cat.parent_index())
+1
+>>> print(cat.left_sibling())
+(DET the)
+>>> print(cat.root())
+(VP (VERB saw) (NP (DET the) (NOUN cat)))
+>>> print(cat.treeposition())
+(1, 1)
+
+
+
+
+

ParentedTree Regression Tests

Keep track of all trees that we create (including subtrees) using this variable:

-
-
->>> all_ptrees = []
-
-
-

Define a helper funciton to create new parented trees:

-
-
->>> def make_ptree(s):
-...     ptree = ParentedTree.convert(Tree.fromstring(s))
-...     all_ptrees.extend(t for t in ptree.subtrees()
-...                       if isinstance(t, Tree))
-...     return ptree
-
-
+
>>> all_ptrees = []
+
+
+

Define a helper function to create new parented trees:

+
>>> def make_ptree(s):
+...     ptree = ParentedTree.convert(Tree.fromstring(s))
+...     all_ptrees.extend(t for t in ptree.subtrees()
+...                       if isinstance(t, Tree))
+...     return ptree
+
+

Define a test function that examines every subtree in all_ptrees; and checks that all six of its methods are defined correctly. If any ptrees are passed as arguments, then they are printed.

-
-
->>> def pcheck(*print_ptrees):
-...     for ptree in all_ptrees:
-...         # Check ptree's methods.
-...         if ptree.parent() is not None:
-...             i = ptree.parent_index()
-...             assert ptree.parent()[i] is ptree
-...             if i > 0:
-...                 assert ptree.left_sibling() is ptree.parent()[i-1]
-...             if i < (len(ptree.parent())-1):
-...                 assert ptree.right_sibling() is ptree.parent()[i+1]
-...             assert len(ptree.treeposition()) > 0
-...             assert (ptree.treeposition() ==
-...                     ptree.parent().treeposition() + (ptree.parent_index(),))
-...             assert ptree.root() is not ptree
-...             assert ptree.root() is not None
-...             assert ptree.root() is ptree.parent().root()
-...             assert ptree.root()[ptree.treeposition()] is ptree
-...         else:
-...             assert ptree.parent_index() is None
-...             assert ptree.left_sibling() is None
-...             assert ptree.right_sibling() is None
-...             assert ptree.root() is ptree
-...             assert ptree.treeposition() == ()
-...         # Check ptree's children's methods:
-...         for i, child in enumerate(ptree):
-...             if isinstance(child, Tree):
-...                 # pcheck parent() & parent_index() methods
-...                 assert child.parent() is ptree
-...                 assert child.parent_index() == i
-...                 # pcheck sibling methods
-...                 if i == 0:
-...                     assert child.left_sibling() is None
-...                 else:
-...                     assert child.left_sibling() is ptree[i-1]
-...                 if i == len(ptree)-1:
-...                     assert child.right_sibling() is None
-...                 else:
-...                     assert child.right_sibling() is ptree[i+1]
-...     if print_ptrees:
-...         print('ok!', end=' ')
-...         for ptree in print_ptrees: print(ptree)
-...     else:
-...         print('ok!')
-
-
+
>>> def pcheck(*print_ptrees):
+...     for ptree in all_ptrees:
+...         # Check ptree's methods.
+...         if ptree.parent() is not None:
+...             i = ptree.parent_index()
+...             assert ptree.parent()[i] is ptree
+...             if i > 0:
+...                 assert ptree.left_sibling() is ptree.parent()[i-1]
+...             if i < (len(ptree.parent())-1):
+...                 assert ptree.right_sibling() is ptree.parent()[i+1]
+...             assert len(ptree.treeposition()) > 0
+...             assert (ptree.treeposition() ==
+...                     ptree.parent().treeposition() + (ptree.parent_index(),))
+...             assert ptree.root() is not ptree
+...             assert ptree.root() is not None
+...             assert ptree.root() is ptree.parent().root()
+...             assert ptree.root()[ptree.treeposition()] is ptree
+...         else:
+...             assert ptree.parent_index() is None
+...             assert ptree.left_sibling() is None
+...             assert ptree.right_sibling() is None
+...             assert ptree.root() is ptree
+...             assert ptree.treeposition() == ()
+...         # Check ptree's children's methods:
+...         for i, child in enumerate(ptree):
+...             if isinstance(child, Tree):
+...                 # pcheck parent() & parent_index() methods
+...                 assert child.parent() is ptree
+...                 assert child.parent_index() == i
+...                 # pcheck sibling methods
+...                 if i == 0:
+...                     assert child.left_sibling() is None
+...                 else:
+...                     assert child.left_sibling() is ptree[i-1]
+...                 if i == len(ptree)-1:
+...                     assert child.right_sibling() is None
+...                 else:
+...                     assert child.right_sibling() is ptree[i+1]
+...     if print_ptrees:
+...         print('ok!', end=' ')
+...         for ptree in print_ptrees: print(ptree)
+...     else:
+...         print('ok!')
+
+

Run our test function on a variety of newly-created trees:

-
-
->>> pcheck(make_ptree('(A)'))
-ok! (A )
->>> pcheck(make_ptree('(A (B (C (D) (E f)) g) h)'))
-ok! (A (B (C (D ) (E f)) g) h)
->>> pcheck(make_ptree('(A (B) (C c) (D d d) (E e e e))'))
-ok! (A (B ) (C c) (D d d) (E e e e))
->>> pcheck(make_ptree('(A (B) (C (c)) (D (d) (d)) (E (e) (e) (e)))'))
-ok! (A (B ) (C (c )) (D (d ) (d )) (E (e ) (e ) (e )))
-
-
+
>>> pcheck(make_ptree('(A)'))
+ok! (A )
+>>> pcheck(make_ptree('(A (B (C (D) (E f)) g) h)'))
+ok! (A (B (C (D ) (E f)) g) h)
+>>> pcheck(make_ptree('(A (B) (C c) (D d d) (E e e e))'))
+ok! (A (B ) (C c) (D d d) (E e e e))
+>>> pcheck(make_ptree('(A (B) (C (c)) (D (d) (d)) (E (e) (e) (e)))'))
+ok! (A (B ) (C (c )) (D (d ) (d )) (E (e ) (e ) (e )))
+
+

Run our test function after performing various tree-modification operations:

__delitem__()

-
-
->>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
->>> e = ptree[0,0,1]
->>> del ptree[0,0,1]; pcheck(ptree); pcheck(e)
-ok! (A (B (C (D ) (Q p)) g) h)
-ok! (E f)
->>> del ptree[0,0,0]; pcheck(ptree)
-ok! (A (B (C (Q p)) g) h)
->>> del ptree[0,1]; pcheck(ptree)
-ok! (A (B (C (Q p))) h)
->>> del ptree[-1]; pcheck(ptree)
-ok! (A (B (C (Q p))))
->>> del ptree[-100]
-Traceback (most recent call last):
-  . . .
-IndexError: index out of range
->>> del ptree[()]
-Traceback (most recent call last):
-  . . .
-IndexError: The tree position () may not be deleted.
-
-
->>> # With slices:
->>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
->>> b = ptree[0]
->>> del ptree[0:0]; pcheck(ptree)
-ok! (A (B c) (D e) f g (H i) j (K l))
->>> del ptree[:1]; pcheck(ptree); pcheck(b)
-ok! (A (D e) f g (H i) j (K l))
-ok! (B c)
->>> del ptree[-2:]; pcheck(ptree)
-ok! (A (D e) f g (H i))
->>> del ptree[1:3]; pcheck(ptree)
-ok! (A (D e) (H i))
->>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
->>> del ptree[5:1000]; pcheck(ptree)
-ok! (A (B c) (D e) f g (H i))
->>> del ptree[-2:1000]; pcheck(ptree)
-ok! (A (B c) (D e) f)
->>> del ptree[-100:1]; pcheck(ptree)
-ok! (A (D e) f)
->>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
->>> del ptree[1:-2:2]; pcheck(ptree)
-ok! (A (B c) f (H i) j (K l))
-
-
+
>>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
+>>> e = ptree[0,0,1]
+>>> del ptree[0,0,1]; pcheck(ptree); pcheck(e)
+ok! (A (B (C (D ) (Q p)) g) h)
+ok! (E f)
+>>> del ptree[0,0,0]; pcheck(ptree)
+ok! (A (B (C (Q p)) g) h)
+>>> del ptree[0,1]; pcheck(ptree)
+ok! (A (B (C (Q p))) h)
+>>> del ptree[-1]; pcheck(ptree)
+ok! (A (B (C (Q p))))
+>>> del ptree[-100]
+Traceback (most recent call last):
+  . . .
+IndexError: index out of range
+>>> del ptree[()]
+Traceback (most recent call last):
+  . . .
+IndexError: The tree position () may not be deleted.
+
+
+
>>> # With slices:
+>>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
+>>> b = ptree[0]
+>>> del ptree[0:0]; pcheck(ptree)
+ok! (A (B c) (D e) f g (H i) j (K l))
+>>> del ptree[:1]; pcheck(ptree); pcheck(b)
+ok! (A (D e) f g (H i) j (K l))
+ok! (B c)
+>>> del ptree[-2:]; pcheck(ptree)
+ok! (A (D e) f g (H i))
+>>> del ptree[1:3]; pcheck(ptree)
+ok! (A (D e) (H i))
+>>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
+>>> del ptree[5:1000]; pcheck(ptree)
+ok! (A (B c) (D e) f g (H i))
+>>> del ptree[-2:1000]; pcheck(ptree)
+ok! (A (B c) (D e) f)
+>>> del ptree[-100:1]; pcheck(ptree)
+ok! (A (D e) f)
+>>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
+>>> del ptree[1:-2:2]; pcheck(ptree)
+ok! (A (B c) f (H i) j (K l))
+
+

__setitem__()

-
-
->>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
->>> d, e, q = ptree[0,0]
->>> ptree[0,0,0] = 'x'; pcheck(ptree); pcheck(d)
-ok! (A (B (C x (E f) (Q p)) g) h)
-ok! (D )
->>> ptree[0,0,1] = make_ptree('(X (Y z))'); pcheck(ptree); pcheck(e)
-ok! (A (B (C x (X (Y z)) (Q p)) g) h)
-ok! (E f)
->>> ptree[1] = d; pcheck(ptree)
-ok! (A (B (C x (X (Y z)) (Q p)) g) (D ))
->>> ptree[-1] = 'x'; pcheck(ptree)
-ok! (A (B (C x (X (Y z)) (Q p)) g) x)
->>> ptree[-100] = 'y'
-Traceback (most recent call last):
-  . . .
-IndexError: index out of range
->>> ptree[()] = make_ptree('(X y)')
-Traceback (most recent call last):
-  . . .
-IndexError: The tree position () may not be assigned to.
-
-
->>> # With slices:
->>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
->>> b = ptree[0]
->>> ptree[0:0] = ('x', make_ptree('(Y)')); pcheck(ptree)
-ok! (A x (Y ) (B c) (D e) f g (H i) j (K l))
->>> ptree[2:6] = (); pcheck(ptree); pcheck(b)
-ok! (A x (Y ) (H i) j (K l))
-ok! (B c)
->>> ptree[-2:] = ('z', 'p'); pcheck(ptree)
-ok! (A x (Y ) (H i) z p)
->>> ptree[1:3] = [make_ptree('(X)') for x in range(10)]; pcheck(ptree)
-ok! (A x (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) z p)
->>> ptree[5:1000] = []; pcheck(ptree)
-ok! (A x (X ) (X ) (X ) (X ))
->>> ptree[-2:1000] = ['n']; pcheck(ptree)
-ok! (A x (X ) (X ) n)
->>> ptree[-100:1] = [make_ptree('(U v)')]; pcheck(ptree)
-ok! (A (U v) (X ) (X ) n)
->>> ptree[-1:] = (make_ptree('(X)') for x in range(3)); pcheck(ptree)
-ok! (A (U v) (X ) (X ) (X ) (X ) (X ))
->>> ptree[1:-2:2] = ['x', 'y']; pcheck(ptree)
-ok! (A (U v) x (X ) y (X ) (X ))
-
-
+
>>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
+>>> d, e, q = ptree[0,0]
+>>> ptree[0,0,0] = 'x'; pcheck(ptree); pcheck(d)
+ok! (A (B (C x (E f) (Q p)) g) h)
+ok! (D )
+>>> ptree[0,0,1] = make_ptree('(X (Y z))'); pcheck(ptree); pcheck(e)
+ok! (A (B (C x (X (Y z)) (Q p)) g) h)
+ok! (E f)
+>>> ptree[1] = d; pcheck(ptree)
+ok! (A (B (C x (X (Y z)) (Q p)) g) (D ))
+>>> ptree[-1] = 'x'; pcheck(ptree)
+ok! (A (B (C x (X (Y z)) (Q p)) g) x)
+>>> ptree[-100] = 'y'
+Traceback (most recent call last):
+  . . .
+IndexError: index out of range
+>>> ptree[()] = make_ptree('(X y)')
+Traceback (most recent call last):
+  . . .
+IndexError: The tree position () may not be assigned to.
+
+
+
>>> # With slices:
+>>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
+>>> b = ptree[0]
+>>> ptree[0:0] = ('x', make_ptree('(Y)')); pcheck(ptree)
+ok! (A x (Y ) (B c) (D e) f g (H i) j (K l))
+>>> ptree[2:6] = (); pcheck(ptree); pcheck(b)
+ok! (A x (Y ) (H i) j (K l))
+ok! (B c)
+>>> ptree[-2:] = ('z', 'p'); pcheck(ptree)
+ok! (A x (Y ) (H i) z p)
+>>> ptree[1:3] = [make_ptree('(X)') for x in range(10)]; pcheck(ptree)
+ok! (A x (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) z p)
+>>> ptree[5:1000] = []; pcheck(ptree)
+ok! (A x (X ) (X ) (X ) (X ))
+>>> ptree[-2:1000] = ['n']; pcheck(ptree)
+ok! (A x (X ) (X ) n)
+>>> ptree[-100:1] = [make_ptree('(U v)')]; pcheck(ptree)
+ok! (A (U v) (X ) (X ) n)
+>>> ptree[-1:] = (make_ptree('(X)') for x in range(3)); pcheck(ptree)
+ok! (A (U v) (X ) (X ) (X ) (X ) (X ))
+>>> ptree[1:-2:2] = ['x', 'y']; pcheck(ptree)
+ok! (A (U v) x (X ) y (X ) (X ))
+
+

append()

-
-
->>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
->>> ptree.append('x'); pcheck(ptree)
-ok! (A (B (C (D ) (E f) (Q p)) g) h x)
->>> ptree.append(make_ptree('(X (Y z))')); pcheck(ptree)
-ok! (A (B (C (D ) (E f) (Q p)) g) h x (X (Y z)))
-
-
+
>>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
+>>> ptree.append('x'); pcheck(ptree)
+ok! (A (B (C (D ) (E f) (Q p)) g) h x)
+>>> ptree.append(make_ptree('(X (Y z))')); pcheck(ptree)
+ok! (A (B (C (D ) (E f) (Q p)) g) h x (X (Y z)))
+
+

extend()

-
-
->>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
->>> ptree.extend(['x', 'y', make_ptree('(X (Y z))')]); pcheck(ptree)
-ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
->>> ptree.extend([]); pcheck(ptree)
-ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
->>> ptree.extend(make_ptree('(X)') for x in range(3)); pcheck(ptree)
-ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)) (X ) (X ) (X ))
-
-
+
>>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
+>>> ptree.extend(['x', 'y', make_ptree('(X (Y z))')]); pcheck(ptree)
+ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
+>>> ptree.extend([]); pcheck(ptree)
+ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
+>>> ptree.extend(make_ptree('(X)') for x in range(3)); pcheck(ptree)
+ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)) (X ) (X ) (X ))
+
+

insert()

-
-
->>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
->>> ptree.insert(0, make_ptree('(X (Y z))')); pcheck(ptree)
-ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) h)
->>> ptree.insert(-1, make_ptree('(X (Y z))')); pcheck(ptree)
-ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
->>> ptree.insert(-4, make_ptree('(X (Y z))')); pcheck(ptree)
-ok! (A (X (Y z)) (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
->>> # Note: as with ``list``, inserting at a negative index that
->>> # gives a position before the start of the list does *not*
->>> # raise an IndexError exception; it just inserts at 0.
->>> ptree.insert(-400, make_ptree('(X (Y z))')); pcheck(ptree)
-ok! (A
-  (X (Y z))
-  (X (Y z))
-  (X (Y z))
-  (B (C (D ) (E f) (Q p)) g)
-  (X (Y z))
-  h)
-
-
+
>>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
+>>> ptree.insert(0, make_ptree('(X (Y z))')); pcheck(ptree)
+ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) h)
+>>> ptree.insert(-1, make_ptree('(X (Y z))')); pcheck(ptree)
+ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
+>>> ptree.insert(-4, make_ptree('(X (Y z))')); pcheck(ptree)
+ok! (A (X (Y z)) (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
+>>> # Note: as with ``list``, inserting at a negative index that
+>>> # gives a position before the start of the list does *not*
+>>> # raise an IndexError exception; it just inserts at 0.
+>>> ptree.insert(-400, make_ptree('(X (Y z))')); pcheck(ptree)
+ok! (A
+  (X (Y z))
+  (X (Y z))
+  (X (Y z))
+  (B (C (D ) (E f) (Q p)) g)
+  (X (Y z))
+  h)
+
+

pop()

-
-
->>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
->>> ptree[0,0].pop(1); pcheck(ptree)
-ParentedTree('E', ['f'])
-ok! (A (B (C (D ) (Q p)) g) h)
->>> ptree[0].pop(-1); pcheck(ptree)
-'g'
-ok! (A (B (C (D ) (Q p))) h)
->>> ptree.pop(); pcheck(ptree)
-'h'
-ok! (A (B (C (D ) (Q p))))
->>> ptree.pop(-100)
-Traceback (most recent call last):
-  . . .
-IndexError: index out of range
-
-
+
>>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
+>>> ptree[0,0].pop(1); pcheck(ptree)
+ParentedTree('E', ['f'])
+ok! (A (B (C (D ) (Q p)) g) h)
+>>> ptree[0].pop(-1); pcheck(ptree)
+'g'
+ok! (A (B (C (D ) (Q p))) h)
+>>> ptree.pop(); pcheck(ptree)
+'h'
+ok! (A (B (C (D ) (Q p))))
+>>> ptree.pop(-100)
+Traceback (most recent call last):
+  . . .
+IndexError: index out of range
+
+

remove()

-
-
->>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
->>> e = ptree[0,0,1]
->>> ptree[0,0].remove(ptree[0,0,1]); pcheck(ptree); pcheck(e)
-ok! (A (B (C (D ) (Q p)) g) h)
-ok! (E f)
->>> ptree[0,0].remove(make_ptree('(Q p)')); pcheck(ptree)
-ok! (A (B (C (D )) g) h)
->>> ptree[0,0].remove(make_ptree('(Q p)'))
-Traceback (most recent call last):
-  . . .
-ValueError: ParentedTree('Q', ['p']) is not in list
->>> ptree.remove('h'); pcheck(ptree)
-ok! (A (B (C (D )) g))
->>> ptree.remove('h');
-Traceback (most recent call last):
-  . . .
-ValueError: 'h' is not in list
->>> # remove() removes the first subtree that is equal (==) to the
->>> # given tree, which may not be the identical tree we give it:
->>> ptree = make_ptree('(A (X x) (Y y) (X x))')
->>> x1, y, x2 = ptree
->>> ptree.remove(ptree[-1]); pcheck(ptree)
-ok! (A (Y y) (X x))
->>> print(x1.parent()); pcheck(x1)
-None
-ok! (X x)
->>> print(x2.parent())
-(A (Y y) (X x))
-
-
+
>>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
+>>> e = ptree[0,0,1]
+>>> ptree[0,0].remove(ptree[0,0,1]); pcheck(ptree); pcheck(e)
+ok! (A (B (C (D ) (Q p)) g) h)
+ok! (E f)
+>>> ptree[0,0].remove(make_ptree('(Q p)')); pcheck(ptree)
+ok! (A (B (C (D )) g) h)
+>>> ptree[0,0].remove(make_ptree('(Q p)'))
+Traceback (most recent call last):
+  . . .
+ValueError: ParentedTree('Q', ['p']) is not in list
+>>> ptree.remove('h'); pcheck(ptree)
+ok! (A (B (C (D )) g))
+>>> ptree.remove('h');
+Traceback (most recent call last):
+  . . .
+ValueError: 'h' is not in list
+>>> # remove() removes the first subtree that is equal (==) to the
+>>> # given tree, which may not be the identical tree we give it:
+>>> ptree = make_ptree('(A (X x) (Y y) (X x))')
+>>> x1, y, x2 = ptree
+>>> ptree.remove(ptree[-1]); pcheck(ptree)
+ok! (A (Y y) (X x))
+>>> print(x1.parent()); pcheck(x1)
+None
+ok! (X x)
+>>> print(x2.parent())
+(A (Y y) (X x))
+
+

Test that a tree can not be given multiple parents:

-
-
->>> ptree = make_ptree('(A (X x) (Y y) (Z z))')
->>> ptree[0] = ptree[1]
-Traceback (most recent call last):
-  . . .
-ValueError: Can not insert a subtree that already has a parent.
->>> pcheck()
-ok!
-
-
+
>>> ptree = make_ptree('(A (X x) (Y y) (Z z))')
+>>> ptree[0] = ptree[1]
+Traceback (most recent call last):
+  . . .
+ValueError: Can not insert a subtree that already has a parent.
+>>> pcheck()
+ok!
+
+

[more to be written]

+
+
+

ImmutableParentedTree Regression Tests

+
>>> iptree = ImmutableParentedTree.convert(ptree)
+>>> type(iptree)
+<class 'nltk.tree.ImmutableParentedTree'>
+>>> del iptree[0]
+Traceback (most recent call last):
+  . . .
+ValueError: ImmutableParentedTree may not be modified
+>>> iptree.set_label('newnode')
+Traceback (most recent call last):
+  . . .
+ValueError: ImmutableParentedTree may not be modified
+
-
-

ImmutableParentedTree Regression Tests

-
-
->>> iptree = ImmutableParentedTree.convert(ptree)
->>> type(iptree)
-<class 'nltk.tree.ImmutableParentedTree'>
->>> del iptree[0]
-Traceback (most recent call last):
-  . . .
-ValueError: ImmutableParentedTree may not be modified
->>> iptree.set_label('newnode')
-Traceback (most recent call last):
-  . . .
-ValueError: ImmutableParentedTree may not be modified
-
-
-
-
-

MultiParentedTree Regression Tests

+
+
+

MultiParentedTree Regression Tests

Keep track of all trees that we create (including subtrees) using this variable:

-
-
->>> all_mptrees = []
-
-
-

Define a helper funciton to create new parented trees:

-
-
->>> def make_mptree(s):
-...     mptree = MultiParentedTree.convert(Tree.fromstring(s))
-...     all_mptrees.extend(t for t in mptree.subtrees()
-...                       if isinstance(t, Tree))
-...     return mptree
-
-
+
>>> all_mptrees = []
+
+
+

Define a helper function to create new parented trees:

+
>>> def make_mptree(s):
+...     mptree = MultiParentedTree.convert(Tree.fromstring(s))
+...     all_mptrees.extend(t for t in mptree.subtrees()
+...                       if isinstance(t, Tree))
+...     return mptree
+
+

Define a test function that examines every subtree in all_mptrees; and checks that all six of its methods are defined correctly. If any mptrees are passed as arguments, then they are printed.

-
-
->>> def mpcheck(*print_mptrees):
-...     def has(seq, val): # uses identity comparison
-...         for item in seq:
-...             if item is val: return True
-...         return False
-...     for mptree in all_mptrees:
-...         # Check mptree's methods.
-...         if len(mptree.parents()) == 0:
-...             assert len(mptree.left_siblings()) == 0
-...             assert len(mptree.right_siblings()) == 0
-...             assert len(mptree.roots()) == 1
-...             assert mptree.roots()[0] is mptree
-...             assert mptree.treepositions(mptree) == [()]
-...             left_siblings = right_siblings = ()
-...             roots = {id(mptree): 1}
-...         else:
-...             roots = dict((id(r), 0) for r in mptree.roots())
-...             left_siblings = mptree.left_siblings()
-...             right_siblings = mptree.right_siblings()
-...         for parent in mptree.parents():
-...             for i in mptree.parent_indices(parent):
-...                 assert parent[i] is mptree
-...                 # check left siblings
-...                 if i > 0:
-...                     for j in range(len(left_siblings)):
-...                         if left_siblings[j] is parent[i-1]:
-...                             del left_siblings[j]
-...                             break
-...                     else:
-...                         assert 0, 'sibling not found!'
-...                 # check ight siblings
-...                 if i < (len(parent)-1):
-...                     for j in range(len(right_siblings)):
-...                         if right_siblings[j] is parent[i+1]:
-...                             del right_siblings[j]
-...                             break
-...                     else:
-...                         assert 0, 'sibling not found!'
-...             # check roots
-...             for root in parent.roots():
-...                 assert id(root) in roots, 'missing root'
-...                 roots[id(root)] += 1
-...         # check that we don't have any unexplained values
-...         assert len(left_siblings)==0, 'unexpected sibling'
-...         assert len(right_siblings)==0, 'unexpected sibling'
-...         for v in roots.values(): assert v>0, roots #'unexpected root'
-...         # check treepositions
-...         for root in mptree.roots():
-...             for treepos in mptree.treepositions(root):
-...                 assert root[treepos] is mptree
-...         # Check mptree's children's methods:
-...         for i, child in enumerate(mptree):
-...             if isinstance(child, Tree):
-...                 # mpcheck parent() & parent_index() methods
-...                 assert has(child.parents(), mptree)
-...                 assert i in child.parent_indices(mptree)
-...                 # mpcheck sibling methods
-...                 if i > 0:
-...                     assert has(child.left_siblings(), mptree[i-1])
-...                 if i < len(mptree)-1:
-...                     assert has(child.right_siblings(), mptree[i+1])
-...     if print_mptrees:
-...         print('ok!', end=' ')
-...         for mptree in print_mptrees: print(mptree)
-...     else:
-...         print('ok!')
-
-
+
>>> def mpcheck(*print_mptrees):
+...     def has(seq, val): # uses identity comparison
+...         for item in seq:
+...             if item is val: return True
+...         return False
+...     for mptree in all_mptrees:
+...         # Check mptree's methods.
+...         if len(mptree.parents()) == 0:
+...             assert len(mptree.left_siblings()) == 0
+...             assert len(mptree.right_siblings()) == 0
+...             assert len(mptree.roots()) == 1
+...             assert mptree.roots()[0] is mptree
+...             assert mptree.treepositions(mptree) == [()]
+...             left_siblings = right_siblings = ()
+...             roots = {id(mptree): 1}
+...         else:
+...             roots = dict((id(r), 0) for r in mptree.roots())
+...             left_siblings = mptree.left_siblings()
+...             right_siblings = mptree.right_siblings()
+...         for parent in mptree.parents():
+...             for i in mptree.parent_indices(parent):
+...                 assert parent[i] is mptree
+...                 # check left siblings
+...                 if i > 0:
+...                     for j in range(len(left_siblings)):
+...                         if left_siblings[j] is parent[i-1]:
+...                             del left_siblings[j]
+...                             break
+...                     else:
+...                         assert 0, 'sibling not found!'
+...                 # check ight siblings
+...                 if i < (len(parent)-1):
+...                     for j in range(len(right_siblings)):
+...                         if right_siblings[j] is parent[i+1]:
+...                             del right_siblings[j]
+...                             break
+...                     else:
+...                         assert 0, 'sibling not found!'
+...             # check roots
+...             for root in parent.roots():
+...                 assert id(root) in roots, 'missing root'
+...                 roots[id(root)] += 1
+...         # check that we don't have any unexplained values
+...         assert len(left_siblings)==0, 'unexpected sibling'
+...         assert len(right_siblings)==0, 'unexpected sibling'
+...         for v in roots.values(): assert v>0, roots #'unexpected root'
+...         # check treepositions
+...         for root in mptree.roots():
+...             for treepos in mptree.treepositions(root):
+...                 assert root[treepos] is mptree
+...         # Check mptree's children's methods:
+...         for i, child in enumerate(mptree):
+...             if isinstance(child, Tree):
+...                 # mpcheck parent() & parent_index() methods
+...                 assert has(child.parents(), mptree)
+...                 assert i in child.parent_indices(mptree)
+...                 # mpcheck sibling methods
+...                 if i > 0:
+...                     assert has(child.left_siblings(), mptree[i-1])
+...                 if i < len(mptree)-1:
+...                     assert has(child.right_siblings(), mptree[i+1])
+...     if print_mptrees:
+...         print('ok!', end=' ')
+...         for mptree in print_mptrees: print(mptree)
+...     else:
+...         print('ok!')
+
+

Run our test function on a variety of newly-created trees:

-
-
->>> mpcheck(make_mptree('(A)'))
-ok! (A )
->>> mpcheck(make_mptree('(A (B (C (D) (E f)) g) h)'))
-ok! (A (B (C (D ) (E f)) g) h)
->>> mpcheck(make_mptree('(A (B) (C c) (D d d) (E e e e))'))
-ok! (A (B ) (C c) (D d d) (E e e e))
->>> mpcheck(make_mptree('(A (B) (C (c)) (D (d) (d)) (E (e) (e) (e)))'))
-ok! (A (B ) (C (c )) (D (d ) (d )) (E (e ) (e ) (e )))
->>> subtree = make_mptree('(A (B (C (D) (E f)) g) h)')
-
-
+
>>> mpcheck(make_mptree('(A)'))
+ok! (A )
+>>> mpcheck(make_mptree('(A (B (C (D) (E f)) g) h)'))
+ok! (A (B (C (D ) (E f)) g) h)
+>>> mpcheck(make_mptree('(A (B) (C c) (D d d) (E e e e))'))
+ok! (A (B ) (C c) (D d d) (E e e e))
+>>> mpcheck(make_mptree('(A (B) (C (c)) (D (d) (d)) (E (e) (e) (e)))'))
+ok! (A (B ) (C (c )) (D (d ) (d )) (E (e ) (e ) (e )))
+>>> subtree = make_mptree('(A (B (C (D) (E f)) g) h)')
+
+

Including some trees that contain multiple parents:

-
-
->>> mpcheck(MultiParentedTree('Z', [subtree, subtree]))
-ok! (Z (A (B (C (D ) (E f)) g) h) (A (B (C (D ) (E f)) g) h))
-
-
+
>>> mpcheck(MultiParentedTree('Z', [subtree, subtree]))
+ok! (Z (A (B (C (D ) (E f)) g) h) (A (B (C (D ) (E f)) g) h))
+
+

Run our test function after performing various tree-modification operations (n.b., these are the same tests that we ran for ParentedTree, above; thus, none of these trees actually uses multiple parents.)

__delitem__()

-
-
->>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
->>> e = mptree[0,0,1]
->>> del mptree[0,0,1]; mpcheck(mptree); mpcheck(e)
-ok! (A (B (C (D ) (Q p)) g) h)
-ok! (E f)
->>> del mptree[0,0,0]; mpcheck(mptree)
-ok! (A (B (C (Q p)) g) h)
->>> del mptree[0,1]; mpcheck(mptree)
-ok! (A (B (C (Q p))) h)
->>> del mptree[-1]; mpcheck(mptree)
-ok! (A (B (C (Q p))))
->>> del mptree[-100]
-Traceback (most recent call last):
-  . . .
-IndexError: index out of range
->>> del mptree[()]
-Traceback (most recent call last):
-  . . .
-IndexError: The tree position () may not be deleted.
-
-
->>> # With slices:
->>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
->>> b = mptree[0]
->>> del mptree[0:0]; mpcheck(mptree)
-ok! (A (B c) (D e) f g (H i) j (K l))
->>> del mptree[:1]; mpcheck(mptree); mpcheck(b)
-ok! (A (D e) f g (H i) j (K l))
-ok! (B c)
->>> del mptree[-2:]; mpcheck(mptree)
-ok! (A (D e) f g (H i))
->>> del mptree[1:3]; mpcheck(mptree)
-ok! (A (D e) (H i))
->>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
->>> del mptree[5:1000]; mpcheck(mptree)
-ok! (A (B c) (D e) f g (H i))
->>> del mptree[-2:1000]; mpcheck(mptree)
-ok! (A (B c) (D e) f)
->>> del mptree[-100:1]; mpcheck(mptree)
-ok! (A (D e) f)
->>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
->>> del mptree[1:-2:2]; mpcheck(mptree)
-ok! (A (B c) f (H i) j (K l))
-
-
+
>>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
+>>> e = mptree[0,0,1]
+>>> del mptree[0,0,1]; mpcheck(mptree); mpcheck(e)
+ok! (A (B (C (D ) (Q p)) g) h)
+ok! (E f)
+>>> del mptree[0,0,0]; mpcheck(mptree)
+ok! (A (B (C (Q p)) g) h)
+>>> del mptree[0,1]; mpcheck(mptree)
+ok! (A (B (C (Q p))) h)
+>>> del mptree[-1]; mpcheck(mptree)
+ok! (A (B (C (Q p))))
+>>> del mptree[-100]
+Traceback (most recent call last):
+  . . .
+IndexError: index out of range
+>>> del mptree[()]
+Traceback (most recent call last):
+  . . .
+IndexError: The tree position () may not be deleted.
+
+
+
>>> # With slices:
+>>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
+>>> b = mptree[0]
+>>> del mptree[0:0]; mpcheck(mptree)
+ok! (A (B c) (D e) f g (H i) j (K l))
+>>> del mptree[:1]; mpcheck(mptree); mpcheck(b)
+ok! (A (D e) f g (H i) j (K l))
+ok! (B c)
+>>> del mptree[-2:]; mpcheck(mptree)
+ok! (A (D e) f g (H i))
+>>> del mptree[1:3]; mpcheck(mptree)
+ok! (A (D e) (H i))
+>>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
+>>> del mptree[5:1000]; mpcheck(mptree)
+ok! (A (B c) (D e) f g (H i))
+>>> del mptree[-2:1000]; mpcheck(mptree)
+ok! (A (B c) (D e) f)
+>>> del mptree[-100:1]; mpcheck(mptree)
+ok! (A (D e) f)
+>>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
+>>> del mptree[1:-2:2]; mpcheck(mptree)
+ok! (A (B c) f (H i) j (K l))
+
+

__setitem__()

-
-
->>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
->>> d, e, q = mptree[0,0]
->>> mptree[0,0,0] = 'x'; mpcheck(mptree); mpcheck(d)
-ok! (A (B (C x (E f) (Q p)) g) h)
-ok! (D )
->>> mptree[0,0,1] = make_mptree('(X (Y z))'); mpcheck(mptree); mpcheck(e)
-ok! (A (B (C x (X (Y z)) (Q p)) g) h)
-ok! (E f)
->>> mptree[1] = d; mpcheck(mptree)
-ok! (A (B (C x (X (Y z)) (Q p)) g) (D ))
->>> mptree[-1] = 'x'; mpcheck(mptree)
-ok! (A (B (C x (X (Y z)) (Q p)) g) x)
->>> mptree[-100] = 'y'
-Traceback (most recent call last):
-  . . .
-IndexError: index out of range
->>> mptree[()] = make_mptree('(X y)')
-Traceback (most recent call last):
-  . . .
-IndexError: The tree position () may not be assigned to.
-
-
->>> # With slices:
->>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
->>> b = mptree[0]
->>> mptree[0:0] = ('x', make_mptree('(Y)')); mpcheck(mptree)
-ok! (A x (Y ) (B c) (D e) f g (H i) j (K l))
->>> mptree[2:6] = (); mpcheck(mptree); mpcheck(b)
-ok! (A x (Y ) (H i) j (K l))
-ok! (B c)
->>> mptree[-2:] = ('z', 'p'); mpcheck(mptree)
-ok! (A x (Y ) (H i) z p)
->>> mptree[1:3] = [make_mptree('(X)') for x in range(10)]; mpcheck(mptree)
-ok! (A x (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) z p)
->>> mptree[5:1000] = []; mpcheck(mptree)
-ok! (A x (X ) (X ) (X ) (X ))
->>> mptree[-2:1000] = ['n']; mpcheck(mptree)
-ok! (A x (X ) (X ) n)
->>> mptree[-100:1] = [make_mptree('(U v)')]; mpcheck(mptree)
-ok! (A (U v) (X ) (X ) n)
->>> mptree[-1:] = (make_mptree('(X)') for x in range(3)); mpcheck(mptree)
-ok! (A (U v) (X ) (X ) (X ) (X ) (X ))
->>> mptree[1:-2:2] = ['x', 'y']; mpcheck(mptree)
-ok! (A (U v) x (X ) y (X ) (X ))
-
-
+
>>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
+>>> d, e, q = mptree[0,0]
+>>> mptree[0,0,0] = 'x'; mpcheck(mptree); mpcheck(d)
+ok! (A (B (C x (E f) (Q p)) g) h)
+ok! (D )
+>>> mptree[0,0,1] = make_mptree('(X (Y z))'); mpcheck(mptree); mpcheck(e)
+ok! (A (B (C x (X (Y z)) (Q p)) g) h)
+ok! (E f)
+>>> mptree[1] = d; mpcheck(mptree)
+ok! (A (B (C x (X (Y z)) (Q p)) g) (D ))
+>>> mptree[-1] = 'x'; mpcheck(mptree)
+ok! (A (B (C x (X (Y z)) (Q p)) g) x)
+>>> mptree[-100] = 'y'
+Traceback (most recent call last):
+  . . .
+IndexError: index out of range
+>>> mptree[()] = make_mptree('(X y)')
+Traceback (most recent call last):
+  . . .
+IndexError: The tree position () may not be assigned to.
+
+
+
>>> # With slices:
+>>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
+>>> b = mptree[0]
+>>> mptree[0:0] = ('x', make_mptree('(Y)')); mpcheck(mptree)
+ok! (A x (Y ) (B c) (D e) f g (H i) j (K l))
+>>> mptree[2:6] = (); mpcheck(mptree); mpcheck(b)
+ok! (A x (Y ) (H i) j (K l))
+ok! (B c)
+>>> mptree[-2:] = ('z', 'p'); mpcheck(mptree)
+ok! (A x (Y ) (H i) z p)
+>>> mptree[1:3] = [make_mptree('(X)') for x in range(10)]; mpcheck(mptree)
+ok! (A x (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) z p)
+>>> mptree[5:1000] = []; mpcheck(mptree)
+ok! (A x (X ) (X ) (X ) (X ))
+>>> mptree[-2:1000] = ['n']; mpcheck(mptree)
+ok! (A x (X ) (X ) n)
+>>> mptree[-100:1] = [make_mptree('(U v)')]; mpcheck(mptree)
+ok! (A (U v) (X ) (X ) n)
+>>> mptree[-1:] = (make_mptree('(X)') for x in range(3)); mpcheck(mptree)
+ok! (A (U v) (X ) (X ) (X ) (X ) (X ))
+>>> mptree[1:-2:2] = ['x', 'y']; mpcheck(mptree)
+ok! (A (U v) x (X ) y (X ) (X ))
+
+

append()

-
-
->>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
->>> mptree.append('x'); mpcheck(mptree)
-ok! (A (B (C (D ) (E f) (Q p)) g) h x)
->>> mptree.append(make_mptree('(X (Y z))')); mpcheck(mptree)
-ok! (A (B (C (D ) (E f) (Q p)) g) h x (X (Y z)))
-
-
+
>>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
+>>> mptree.append('x'); mpcheck(mptree)
+ok! (A (B (C (D ) (E f) (Q p)) g) h x)
+>>> mptree.append(make_mptree('(X (Y z))')); mpcheck(mptree)
+ok! (A (B (C (D ) (E f) (Q p)) g) h x (X (Y z)))
+
+

extend()

-
-
->>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
->>> mptree.extend(['x', 'y', make_mptree('(X (Y z))')]); mpcheck(mptree)
-ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
->>> mptree.extend([]); mpcheck(mptree)
-ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
->>> mptree.extend(make_mptree('(X)') for x in range(3)); mpcheck(mptree)
-ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)) (X ) (X ) (X ))
-
-
+
>>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
+>>> mptree.extend(['x', 'y', make_mptree('(X (Y z))')]); mpcheck(mptree)
+ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
+>>> mptree.extend([]); mpcheck(mptree)
+ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
+>>> mptree.extend(make_mptree('(X)') for x in range(3)); mpcheck(mptree)
+ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)) (X ) (X ) (X ))
+
+

insert()

-
-
->>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
->>> mptree.insert(0, make_mptree('(X (Y z))')); mpcheck(mptree)
-ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) h)
->>> mptree.insert(-1, make_mptree('(X (Y z))')); mpcheck(mptree)
-ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
->>> mptree.insert(-4, make_mptree('(X (Y z))')); mpcheck(mptree)
-ok! (A (X (Y z)) (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
->>> # Note: as with ``list``, inserting at a negative index that
->>> # gives a position before the start of the list does *not*
->>> # raise an IndexError exception; it just inserts at 0.
->>> mptree.insert(-400, make_mptree('(X (Y z))')); mpcheck(mptree)
-ok! (A
-  (X (Y z))
-  (X (Y z))
-  (X (Y z))
-  (B (C (D ) (E f) (Q p)) g)
-  (X (Y z))
-  h)
-
-
+
>>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
+>>> mptree.insert(0, make_mptree('(X (Y z))')); mpcheck(mptree)
+ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) h)
+>>> mptree.insert(-1, make_mptree('(X (Y z))')); mpcheck(mptree)
+ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
+>>> mptree.insert(-4, make_mptree('(X (Y z))')); mpcheck(mptree)
+ok! (A (X (Y z)) (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
+>>> # Note: as with ``list``, inserting at a negative index that
+>>> # gives a position before the start of the list does *not*
+>>> # raise an IndexError exception; it just inserts at 0.
+>>> mptree.insert(-400, make_mptree('(X (Y z))')); mpcheck(mptree)
+ok! (A
+  (X (Y z))
+  (X (Y z))
+  (X (Y z))
+  (B (C (D ) (E f) (Q p)) g)
+  (X (Y z))
+  h)
+
+

pop()

-
-
->>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
->>> mptree[0,0].pop(1); mpcheck(mptree)
-MultiParentedTree('E', ['f'])
-ok! (A (B (C (D ) (Q p)) g) h)
->>> mptree[0].pop(-1); mpcheck(mptree)
-'g'
-ok! (A (B (C (D ) (Q p))) h)
->>> mptree.pop(); mpcheck(mptree)
-'h'
-ok! (A (B (C (D ) (Q p))))
->>> mptree.pop(-100)
-Traceback (most recent call last):
-  . . .
-IndexError: index out of range
-
-
+
>>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
+>>> mptree[0,0].pop(1); mpcheck(mptree)
+MultiParentedTree('E', ['f'])
+ok! (A (B (C (D ) (Q p)) g) h)
+>>> mptree[0].pop(-1); mpcheck(mptree)
+'g'
+ok! (A (B (C (D ) (Q p))) h)
+>>> mptree.pop(); mpcheck(mptree)
+'h'
+ok! (A (B (C (D ) (Q p))))
+>>> mptree.pop(-100)
+Traceback (most recent call last):
+  . . .
+IndexError: index out of range
+
+

remove()

-
-
->>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
->>> e = mptree[0,0,1]
->>> mptree[0,0].remove(mptree[0,0,1]); mpcheck(mptree); mpcheck(e)
-ok! (A (B (C (D ) (Q p)) g) h)
-ok! (E f)
->>> mptree[0,0].remove(make_mptree('(Q p)')); mpcheck(mptree)
-ok! (A (B (C (D )) g) h)
->>> mptree[0,0].remove(make_mptree('(Q p)'))
-Traceback (most recent call last):
-  . . .
-ValueError: MultiParentedTree('Q', ['p']) is not in list
->>> mptree.remove('h'); mpcheck(mptree)
-ok! (A (B (C (D )) g))
->>> mptree.remove('h');
-Traceback (most recent call last):
-  . . .
-ValueError: 'h' is not in list
->>> # remove() removes the first subtree that is equal (==) to the
->>> # given tree, which may not be the identical tree we give it:
->>> mptree = make_mptree('(A (X x) (Y y) (X x))')
->>> x1, y, x2 = mptree
->>> mptree.remove(mptree[-1]); mpcheck(mptree)
-ok! (A (Y y) (X x))
->>> print([str(p) for p in x1.parents()])
-[]
->>> print([str(p) for p in x2.parents()])
-['(A (Y y) (X x))']
-
-
-
-
-

ImmutableMultiParentedTree Regression Tests

-
-
->>> imptree = ImmutableMultiParentedTree.convert(mptree)
->>> type(imptree)
-<class 'nltk.tree.ImmutableMultiParentedTree'>
->>> del imptree[0]
-Traceback (most recent call last):
-  . . .
-ValueError: ImmutableMultiParentedTree may not be modified
->>> imptree.set_label('newnode')
-Traceback (most recent call last):
-  . . .
-ValueError: ImmutableMultiParentedTree may not be modified
-
-
-
-
-

ProbabilisticTree Regression Tests

-
-
->>> prtree = ProbabilisticTree("S", [ProbabilisticTree("NP", ["N"], prob=0.3)], prob=0.6)
->>> print(prtree)
-(S (NP N)) (p=0.6)
->>> import copy
->>> prtree == copy.deepcopy(prtree) == prtree.copy(deep=True) == prtree.copy()
-True
->>> prtree[0] is prtree.copy()[0]
-True
->>> prtree[0] is prtree.copy(deep=True)[0]
-False
-
-
->>> imprtree = ImmutableProbabilisticTree.convert(prtree)
->>> type(imprtree)
-<class 'nltk.tree.ImmutableProbabilisticTree'>
->>> del imprtree[0]
-Traceback (most recent call last):
-  . . .
-ValueError: ImmutableProbabilisticTree may not be modified
->>> imprtree.set_label('newnode')
-Traceback (most recent call last):
-  . . .
-ValueError: ImmutableProbabilisticTree may not be modified
-
-
-
-
-
-

Squashed Bugs

-

This used to discard the (B b) subtree (fixed in svn 6270):

-
-
->>> print(Tree.fromstring('((A a) (B b))'))
-( (A a) (B b))
-
-
+
>>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
+>>> e = mptree[0,0,1]
+>>> mptree[0,0].remove(mptree[0,0,1]); mpcheck(mptree); mpcheck(e)
+ok! (A (B (C (D ) (Q p)) g) h)
+ok! (E f)
+>>> mptree[0,0].remove(make_mptree('(Q p)')); mpcheck(mptree)
+ok! (A (B (C (D )) g) h)
+>>> mptree[0,0].remove(make_mptree('(Q p)'))
+Traceback (most recent call last):
+  . . .
+ValueError: MultiParentedTree('Q', ['p']) is not in list
+>>> mptree.remove('h'); mpcheck(mptree)
+ok! (A (B (C (D )) g))
+>>> mptree.remove('h');
+Traceback (most recent call last):
+  . . .
+ValueError: 'h' is not in list
+>>> # remove() removes the first subtree that is equal (==) to the
+>>> # given tree, which may not be the identical tree we give it:
+>>> mptree = make_mptree('(A (X x) (Y y) (X x))')
+>>> x1, y, x2 = mptree
+>>> mptree.remove(mptree[-1]); mpcheck(mptree)
+ok! (A (Y y) (X x))
+>>> print([str(p) for p in x1.parents()])
+[]
+>>> print([str(p) for p in x2.parents()])
+['(A (Y y) (X x))']
+
+
+ +
+

ImmutableMultiParentedTree Regression Tests

+
>>> imptree = ImmutableMultiParentedTree.convert(mptree)
+>>> type(imptree)
+<class 'nltk.tree.ImmutableMultiParentedTree'>
+>>> del imptree[0]
+Traceback (most recent call last):
+  . . .
+ValueError: ImmutableMultiParentedTree may not be modified
+>>> imptree.set_label('newnode')
+Traceback (most recent call last):
+  . . .
+ValueError: ImmutableMultiParentedTree may not be modified
+
+
+
+
+

ProbabilisticTree Regression Tests

+
>>> prtree = ProbabilisticTree("S", [ProbabilisticTree("NP", ["N"], prob=0.3)], prob=0.6)
+>>> print(prtree)
+(S (NP N)) (p=0.6)
+>>> import copy
+>>> prtree == copy.deepcopy(prtree) == prtree.copy(deep=True) == prtree.copy()
+True
+>>> prtree[0] is prtree.copy()[0]
+True
+>>> prtree[0] is prtree.copy(deep=True)[0]
+False
+
+
>>> imprtree = ImmutableProbabilisticTree.convert(prtree)
+>>> type(imprtree)
+<class 'nltk.tree.ImmutableProbabilisticTree'>
+>>> del imprtree[0]
+Traceback (most recent call last):
+  . . .
+ValueError: ImmutableProbabilisticTree may not be modified
+>>> imprtree.set_label('newnode')
+Traceback (most recent call last):
+  . . .
+ValueError: ImmutableProbabilisticTree may not be modified
+
+
+ +
+

Squashed Bugs

+

This used to discard the (B b) subtree (fixed in svn 6270):

+
>>> print(Tree.fromstring('((A a) (B b))'))
+( (A a) (B b))
+
+
+
+ + + + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/treeprettyprinter.html b/howto/treeprettyprinter.html index c3d4a3b8e..55fbc01e6 100644 --- a/howto/treeprettyprinter.html +++ b/howto/treeprettyprinter.html @@ -1,480 +1,275 @@ - - - + - - -Unit tests for nltk.treeprettyprinter.TreePrettyPrinter - + + + + + + + NLTK :: Sample usage for treeprettyprinter + + + + + + + + + + + + + + -
-

Unit tests for nltk.treeprettyprinter.TreePrettyPrinter

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> from nltk.tree import Tree
->>> from nltk.treeprettyprinter import TreePrettyPrinter
-
-
+ + + +
+ +
+
+ +
+

Sample usage for treeprettyprinter

+
+

Unit tests for nltk.treeprettyprinter.TreePrettyPrinter

+
>>> from nltk.tree import Tree
+>>> from nltk.treeprettyprinter import TreePrettyPrinter
+
+

Tree nr 2170 from nltk.corpus.treebank:

-
-
->>> tree = Tree.fromstring(
-...     '(S (NP-SBJ (PRP I)) (VP (VBP feel) (ADJP-PRD (RB pretty) '
-...     '(JJ good)) (PP-CLR (IN about) (NP (PRP it)))) (. .))')
->>> tpp = TreePrettyPrinter(tree)
->>> print(tpp.text())
-                             S
-   __________________________|_____________________
-  |                          VP                    |
-  |      ____________________|___________          |
-  |     |             |                PP-CLR      |
-  |     |             |             _____|_____    |
-NP-SBJ  |          ADJP-PRD        |           NP  |
-  |     |      _______|______      |           |   |
- PRP   VBP    RB             JJ    IN         PRP  .
-  |     |     |              |     |           |   |
-  I    feel pretty          good about         it  .
-
-
->>> print(tpp.text(unicodelines=True))
-                             S
-  ┌──────────────────────────┼─────────────────────┐
-  │                          VP                    │
-  │     ┌─────────────┬──────┴───────────┐         │
-  │     │             │                PP-CLR      │
-  │     │             │            ┌─────┴─────┐   │
-NP-SBJ  │          ADJP-PRD        │           NP  │
-  │     │     ┌───────┴──────┐     │           │   │
- PRP   VBP    RB             JJ    IN         PRP  .
-  │     │     │              │     │           │   │
-  I    feel pretty          good about         it  .
-
-
+
>>> tree = Tree.fromstring(
+...     '(S (NP-SBJ (PRP I)) (VP (VBP feel) (ADJP-PRD (RB pretty) '
+...     '(JJ good)) (PP-CLR (IN about) (NP (PRP it)))) (. .))')
+>>> tpp = TreePrettyPrinter(tree)
+>>> print(tpp.text())
+                             S
+   __________________________|_____________________
+  |                          VP                    |
+  |      ____________________|___________          |
+  |     |             |                PP-CLR      |
+  |     |             |             _____|_____    |
+NP-SBJ  |          ADJP-PRD        |           NP  |
+  |     |      _______|______      |           |   |
+ PRP   VBP    RB             JJ    IN         PRP  .
+  |     |     |              |     |           |   |
+  I    feel pretty          good about         it  .
+
+
+
>>> print(tpp.text(unicodelines=True))
+                             S
+  ┌──────────────────────────┼─────────────────────┐
+  │                          VP                    │
+  │     ┌─────────────┬──────┴───────────┐         │
+  │     │             │                PP-CLR      │
+  │     │             │            ┌─────┴─────┐   │
+NP-SBJ  │          ADJP-PRD        │           NP  │
+  │     │     ┌───────┴──────┐     │           │   │
+ PRP   VBP    RB             JJ    IN         PRP  .
+  │     │     │              │     │           │   │
+  I    feel pretty          good about         it  .
+
+

A tree with long labels:

-
-
->>> tree = Tree.fromstring(
-...     '(sentence (plural-noun-phrase (plural-noun Superconductors)) '
-...     '(verb-phrase (plural-verb conduct) '
-...     '(noun-phrase (singular-noun electricity))))')
->>> tpp = TreePrettyPrinter(tree)
->>> print(tpp.text(abbreviate=8, nodedist=2))
-            sentence
-     __________|__________
-    |                 verb-phr.
-    |           __________|__________
-plural-n.      |                 noun-phr.
-    |          |                     |
-plural-n.  plural-v.             singular.
-    |          |                     |
-Supercon.   conduct              electric.
-
-
->>> print(tpp.text(maxwidth=8, nodedist=2))
-          sentence
-    _________|________
-   |                verb-
-   |                phrase
-   |          ________|_________
-plural-      |                noun-
- noun-       |                phrase
- phrase      |                  |
-   |         |                  |
-plural-   plural-           singular-
-  noun      verb               noun
-   |         |                  |
-Supercon  conduct            electric
-ductors                        ity
-
-
+
>>> tree = Tree.fromstring(
+...     '(sentence (plural-noun-phrase (plural-noun Superconductors)) '
+...     '(verb-phrase (plural-verb conduct) '
+...     '(noun-phrase (singular-noun electricity))))')
+>>> tpp = TreePrettyPrinter(tree)
+>>> print(tpp.text(abbreviate=8, nodedist=2))
+            sentence
+     __________|__________
+    |                 verb-phr.
+    |           __________|__________
+plural-n.      |                 noun-phr.
+    |          |                     |
+plural-n.  plural-v.             singular.
+    |          |                     |
+Supercon.   conduct              electric.
+
+
+
>>> print(tpp.text(maxwidth=8, nodedist=2))
+          sentence
+    _________|________
+   |                verb-
+   |                phrase
+   |          ________|_________
+plural-      |                noun-
+ noun-       |                phrase
+ phrase      |                  |
+   |         |                  |
+plural-   plural-           singular-
+  noun      verb               noun
+   |         |                  |
+Supercon  conduct            electric
+ductors                        ity
+
+

A discontinuous tree:

-
-
->>> tree = Tree.fromstring(
-...     '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
-...     '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
-...     '(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int)
->>> sentence = ('Ze had met haar moeder kunnen gaan winkelen ,'
-...             ' zwemmen of terrassen .'.split())
->>> tpp = TreePrettyPrinter(tree, sentence)
->>> print(tpp.text())
-                                      top
-                                  _____|______________________________________________
-                               smain                      |                           |
-  _______________________________|_____                   |                           |
- |    |                               inf                 |                           |
- |    |                           _____|____              |                           |
- |    |                          |         inf            |                           |
- |    |                          |      ____|_____        |                           |
- |    |                          |     |         conj     |                           |
- |    |                    _____ | ___ | _________|______ | __________________        |
- |    |                  inf     |     |                  |      |     |      |       |
- |    |          _________|_____ | ___ | _________        |      |     |      |       |
- |    |         pp               |     |          |       |      |     |      |       |
- |    |     ____|____            |     |          |       |      |     |      |       |
- |    |    |         np          |     |          |       |     inf    |     inf      |
- |    |    |     ____|____       |     |          |       |      |     |      |       |
-noun verb prep det       noun   verb  verb       verb   punct   verb   vg    verb   punct
- |    |    |    |         |      |     |          |       |      |     |      |       |
- Ze  had  met  haar     moeder kunnen gaan     winkelen   ,   zwemmen  of terrassen   .
-
-
->>> print(tpp.text(unicodelines=True))
-                                      top
-                                 ┌─────┴──────────────────┬───────────────────────────┐
-                               smain                      │                           │
- ┌────┬──────────────────────────┴─────┐                  │                           │
- │    │                               inf                 │                           │
- │    │                          ┌─────┴────┐             │                           │
- │    │                          │         inf            │                           │
- │    │                          │     ┌────┴─────┐       │                           │
- │    │                          │     │         conj     │                           │
- │    │                   ┌───── │ ─── │ ─────────┴────── │ ─────┬─────┬──────┐       │
- │    │                  inf     │     │                  │      │     │      │       │
- │    │         ┌─────────┴───── │ ─── │ ─────────┐       │      │     │      │       │
- │    │         pp               │     │          │       │      │     │      │       │
- │    │    ┌────┴────┐           │     │          │       │      │     │      │       │
- │    │    │         np          │     │          │       │     inf    │     inf      │
- │    │    │    ┌────┴────┐      │     │          │       │      │     │      │       │
-noun verb prep det       noun   verb  verb       verb   punct   verb   vg    verb   punct
- │    │    │    │         │      │     │          │       │      │     │      │       │
- Ze  had  met  haar     moeder kunnen gaan     winkelen   ,   zwemmen  of terrassen   .
-
-
+
>>> tree = Tree.fromstring(
+...     '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
+...     '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
+...     '(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int)
+>>> sentence = ('Ze had met haar moeder kunnen gaan winkelen ,'
+...             ' zwemmen of terrassen .'.split())
+>>> tpp = TreePrettyPrinter(tree, sentence)
+>>> print(tpp.text())
+                                      top
+                                  _____|______________________________________________
+                               smain                      |                           |
+  _______________________________|_____                   |                           |
+ |    |                               inf                 |                           |
+ |    |                           _____|____              |                           |
+ |    |                          |         inf            |                           |
+ |    |                          |      ____|_____        |                           |
+ |    |                          |     |         conj     |                           |
+ |    |                    _____ | ___ | _________|______ | __________________        |
+ |    |                  inf     |     |                  |      |     |      |       |
+ |    |          _________|_____ | ___ | _________        |      |     |      |       |
+ |    |         pp               |     |          |       |      |     |      |       |
+ |    |     ____|____            |     |          |       |      |     |      |       |
+ |    |    |         np          |     |          |       |     inf    |     inf      |
+ |    |    |     ____|____       |     |          |       |      |     |      |       |
+noun verb prep det       noun   verb  verb       verb   punct   verb   vg    verb   punct
+ |    |    |    |         |      |     |          |       |      |     |      |       |
+ Ze  had  met  haar     moeder kunnen gaan     winkelen   ,   zwemmen  of terrassen   .
+
+
+
>>> print(tpp.text(unicodelines=True))
+                                      top
+                                 ┌─────┴──────────────────┬───────────────────────────┐
+                               smain                      │                           │
+ ┌────┬──────────────────────────┴─────┐                  │                           │
+ │    │                               inf                 │                           │
+ │    │                          ┌─────┴────┐             │                           │
+ │    │                          │         inf            │                           │
+ │    │                          │     ┌────┴─────┐       │                           │
+ │    │                          │     │         conj     │                           │
+ │    │                   ┌───── │ ─── │ ─────────┴────── │ ─────┬─────┬──────┐       │
+ │    │                  inf     │     │                  │      │     │      │       │
+ │    │         ┌─────────┴───── │ ─── │ ─────────┐       │      │     │      │       │
+ │    │         pp               │     │          │       │      │     │      │       │
+ │    │    ┌────┴────┐           │     │          │       │      │     │      │       │
+ │    │    │         np          │     │          │       │     inf    │     inf      │
+ │    │    │    ┌────┴────┐      │     │          │       │      │     │      │       │
+noun verb prep det       noun   verb  verb       verb   punct   verb   vg    verb   punct
+ │    │    │    │         │      │     │          │       │      │     │      │       │
+ Ze  had  met  haar     moeder kunnen gaan     winkelen   ,   zwemmen  of terrassen   .
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/treetransforms.html b/howto/treetransforms.html index 637c0061a..193fb493c 100644 --- a/howto/treetransforms.html +++ b/howto/treetransforms.html @@ -1,512 +1,305 @@ - - - + - - -Unit tests for the TreeTransformation class - + + + + + + + NLTK :: Sample usage for treetransforms + + + + + + + + + + + + + + -
-

Unit tests for the TreeTransformation class

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> from copy import deepcopy
->>> from nltk.tree import *
->>> from nltk.treetransforms import *
-
-
->>> tree_string = "(TOP (S (S (VP (VBN Turned) (ADVP (RB loose)) (PP (IN in) (NP (NP (NNP Shane) (NNP Longman) (POS 's)) (NN trading) (NN room))))) (, ,) (NP (DT the) (NN yuppie) (NNS dealers)) (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) (. .)))"
-
-
->>> tree = Tree.fromstring(tree_string)
->>> print(tree)
-(TOP
-  (S
-    (S
-      (VP
-        (VBN Turned)
-        (ADVP (RB loose))
-        (PP
-          (IN in)
-          (NP
-            (NP (NNP Shane) (NNP Longman) (POS 's))
-            (NN trading)
-            (NN room)))))
-    (, ,)
-    (NP (DT the) (NN yuppie) (NNS dealers))
-    (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
-    (. .)))
-
-
+ + + +
+ +
+
+ +
+

Sample usage for treetransforms

+
+

Unit tests for the TreeTransformation class

+
>>> from copy import deepcopy
+>>> from nltk.tree import *
+>>> from nltk.treetransforms import *
+
+
+
>>> tree_string = "(TOP (S (S (VP (VBN Turned) (ADVP (RB loose)) (PP (IN in) (NP (NP (NNP Shane) (NNP Longman) (POS 's)) (NN trading) (NN room))))) (, ,) (NP (DT the) (NN yuppie) (NNS dealers)) (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) (. .)))"
+
+
+
>>> tree = Tree.fromstring(tree_string)
+>>> print(tree)
+(TOP
+  (S
+    (S
+      (VP
+        (VBN Turned)
+        (ADVP (RB loose))
+        (PP
+          (IN in)
+          (NP
+            (NP (NNP Shane) (NNP Longman) (POS 's))
+            (NN trading)
+            (NN room)))))
+    (, ,)
+    (NP (DT the) (NN yuppie) (NNS dealers))
+    (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
+    (. .)))
+
+

Make a copy of the original tree and collapse the subtrees with only one child

-
-
->>> collapsedTree = deepcopy(tree)
->>> collapse_unary(collapsedTree)
->>> print(collapsedTree)
-(TOP
-  (S
-    (S+VP
-      (VBN Turned)
-      (ADVP (RB loose))
-      (PP
-        (IN in)
-        (NP
-          (NP (NNP Shane) (NNP Longman) (POS 's))
-          (NN trading)
-          (NN room))))
-    (, ,)
-    (NP (DT the) (NN yuppie) (NNS dealers))
-    (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
-    (. .)))
-
-
->>> collapsedTree2 = deepcopy(tree)
->>> collapse_unary(collapsedTree2, collapsePOS=True, collapseRoot=True)
->>> print(collapsedTree2)
-(TOP+S
-  (S+VP
-    (VBN Turned)
-    (ADVP+RB loose)
-    (PP
-      (IN in)
-      (NP
-        (NP (NNP Shane) (NNP Longman) (POS 's))
-        (NN trading)
-        (NN room))))
-  (, ,)
-  (NP (DT the) (NN yuppie) (NNS dealers))
-  (VP (AUX do) (NP (NP+RB little) (ADJP+RB right)))
-  (. .))
-
-
+
>>> collapsedTree = deepcopy(tree)
+>>> collapse_unary(collapsedTree)
+>>> print(collapsedTree)
+(TOP
+  (S
+    (S+VP
+      (VBN Turned)
+      (ADVP (RB loose))
+      (PP
+        (IN in)
+        (NP
+          (NP (NNP Shane) (NNP Longman) (POS 's))
+          (NN trading)
+          (NN room))))
+    (, ,)
+    (NP (DT the) (NN yuppie) (NNS dealers))
+    (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
+    (. .)))
+
+
+
>>> collapsedTree2 = deepcopy(tree)
+>>> collapse_unary(collapsedTree2, collapsePOS=True, collapseRoot=True)
+>>> print(collapsedTree2)
+(TOP+S
+  (S+VP
+    (VBN Turned)
+    (ADVP+RB loose)
+    (PP
+      (IN in)
+      (NP
+        (NP (NNP Shane) (NNP Longman) (POS 's))
+        (NN trading)
+        (NN room))))
+  (, ,)
+  (NP (DT the) (NN yuppie) (NNS dealers))
+  (VP (AUX do) (NP (NP+RB little) (ADJP+RB right)))
+  (. .))
+
+

Convert the tree to Chomsky Normal Form i.e. each subtree has either two subtree children or a single leaf value. This conversion can be performed using either left- or right-factoring.

-
-
->>> cnfTree = deepcopy(collapsedTree)
->>> chomsky_normal_form(cnfTree, factor='left')
->>> print(cnfTree)
-(TOP
-  (S
-    (S|<S+VP-,-NP-VP>
-      (S|<S+VP-,-NP>
-        (S|<S+VP-,>
-          (S+VP
-            (S+VP|<VBN-ADVP> (VBN Turned) (ADVP (RB loose)))
-            (PP
-              (IN in)
-              (NP
-                (NP|<NP-NN>
-                  (NP
-                    (NP|<NNP-NNP> (NNP Shane) (NNP Longman))
-                    (POS 's))
-                  (NN trading))
-                (NN room))))
-          (, ,))
-        (NP (NP|<DT-NN> (DT the) (NN yuppie)) (NNS dealers)))
-      (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))))
-    (. .)))
-
-
->>> cnfTree = deepcopy(collapsedTree)
->>> chomsky_normal_form(cnfTree, factor='right')
->>> print(cnfTree)
-(TOP
-  (S
-    (S+VP
-      (VBN Turned)
-      (S+VP|<ADVP-PP>
-        (ADVP (RB loose))
-        (PP
-          (IN in)
-          (NP
-            (NP (NNP Shane) (NP|<NNP-POS> (NNP Longman) (POS 's)))
-            (NP|<NN-NN> (NN trading) (NN room))))))
-    (S|<,-NP-VP-.>
-      (, ,)
-      (S|<NP-VP-.>
-        (NP (DT the) (NP|<NN-NNS> (NN yuppie) (NNS dealers)))
-        (S|<VP-.>
-          (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
-          (. .))))))
-
-
+
>>> cnfTree = deepcopy(collapsedTree)
+>>> chomsky_normal_form(cnfTree, factor='left')
+>>> print(cnfTree)
+(TOP
+  (S
+    (S|<S+VP-,-NP-VP>
+      (S|<S+VP-,-NP>
+        (S|<S+VP-,>
+          (S+VP
+            (S+VP|<VBN-ADVP> (VBN Turned) (ADVP (RB loose)))
+            (PP
+              (IN in)
+              (NP
+                (NP|<NP-NN>
+                  (NP
+                    (NP|<NNP-NNP> (NNP Shane) (NNP Longman))
+                    (POS 's))
+                  (NN trading))
+                (NN room))))
+          (, ,))
+        (NP (NP|<DT-NN> (DT the) (NN yuppie)) (NNS dealers)))
+      (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))))
+    (. .)))
+
+
+
>>> cnfTree = deepcopy(collapsedTree)
+>>> chomsky_normal_form(cnfTree, factor='right')
+>>> print(cnfTree)
+(TOP
+  (S
+    (S+VP
+      (VBN Turned)
+      (S+VP|<ADVP-PP>
+        (ADVP (RB loose))
+        (PP
+          (IN in)
+          (NP
+            (NP (NNP Shane) (NP|<NNP-POS> (NNP Longman) (POS 's)))
+            (NP|<NN-NN> (NN trading) (NN room))))))
+    (S|<,-NP-VP-.>
+      (, ,)
+      (S|<NP-VP-.>
+        (NP (DT the) (NP|<NN-NNS> (NN yuppie) (NNS dealers)))
+        (S|<VP-.>
+          (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
+          (. .))))))
+
+

Employ some Markov smoothing to make the artificial node labels a bit more readable. See the treetransforms.py documentation for more details.

-
-
->>> markovTree = deepcopy(collapsedTree)
->>> chomsky_normal_form(markovTree, horzMarkov=2, vertMarkov=1)
->>> print(markovTree)
-(TOP
-  (S^<TOP>
-    (S+VP^<S>
-      (VBN Turned)
-      (S+VP|<ADVP-PP>^<S>
-        (ADVP^<S+VP> (RB loose))
-        (PP^<S+VP>
-          (IN in)
-          (NP^<PP>
-            (NP^<NP>
-              (NNP Shane)
-              (NP|<NNP-POS>^<NP> (NNP Longman) (POS 's)))
-            (NP|<NN-NN>^<PP> (NN trading) (NN room))))))
-    (S|<,-NP>^<TOP>
-      (, ,)
-      (S|<NP-VP>^<TOP>
-        (NP^<S> (DT the) (NP|<NN-NNS>^<S> (NN yuppie) (NNS dealers)))
-        (S|<VP-.>^<TOP>
-          (VP^<S>
-            (AUX do)
-            (NP^<VP> (NP^<NP> (RB little)) (ADJP^<NP> (RB right))))
-          (. .))))))
-
-
+
>>> markovTree = deepcopy(collapsedTree)
+>>> chomsky_normal_form(markovTree, horzMarkov=2, vertMarkov=1)
+>>> print(markovTree)
+(TOP
+  (S^<TOP>
+    (S+VP^<S>
+      (VBN Turned)
+      (S+VP|<ADVP-PP>^<S>
+        (ADVP^<S+VP> (RB loose))
+        (PP^<S+VP>
+          (IN in)
+          (NP^<PP>
+            (NP^<NP>
+              (NNP Shane)
+              (NP|<NNP-POS>^<NP> (NNP Longman) (POS 's)))
+            (NP|<NN-NN>^<PP> (NN trading) (NN room))))))
+    (S|<,-NP>^<TOP>
+      (, ,)
+      (S|<NP-VP>^<TOP>
+        (NP^<S> (DT the) (NP|<NN-NNS>^<S> (NN yuppie) (NNS dealers)))
+        (S|<VP-.>^<TOP>
+          (VP^<S>
+            (AUX do)
+            (NP^<VP> (NP^<NP> (RB little)) (ADJP^<NP> (RB right))))
+          (. .))))))
+
+

Convert the transformed tree back to its original form

-
-
->>> un_chomsky_normal_form(markovTree)
->>> tree == markovTree
-True
-
-
+
>>> un_chomsky_normal_form(markovTree)
+>>> tree == markovTree
+True
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/util.html b/howto/util.html index 58224beec..22523cb6b 100644 --- a/howto/util.html +++ b/howto/util.html @@ -1,398 +1,198 @@ - - - + - - -Utility functions - + + + + + + + NLTK :: Sample usage for util + + + + + + + + + + + + + + -
-

Utility functions

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -
-
->>> from __future__ import print_function
->>> from nltk.util import *
->>> from nltk.tree import Tree
-
-
->>> print_string("This is a long string, therefore it should break", 25)
-This is a long string,
-therefore it should break
-
-
->>> re_show("[a-z]+", "sdf123")
-{sdf}123
-
-
->>> tree = Tree(5,
-...             [Tree(4, [Tree(2, [1, 3])]),
-...              Tree(8, [Tree(6, [7]), 9])])
->>> for x in breadth_first(tree):
-...     if isinstance(x, int): print(x)
-...     else: print(x.label())
-5
-4
-8
-2
-6
-9
-1
-3
-7
->>> for x in breadth_first(tree, maxdepth=2):
-...     if isinstance(x, int): print(x)
-...     else: print(x.label())
-5
-4
-8
-2
-6
-9
-
-
->>> invert_dict({1: 2})
-defaultdict(<... 'list'>, {2: 1})
-
-
->>> invert_dict({1: [3, 4, 5]})
-defaultdict(<... 'list'>, {3: [1], 4: [1], 5: [1]})
-
-
+ + + +
+ +
+
+ +
+

Sample usage for util

+
+

Utility functions

+
>>> from nltk.util import *
+>>> from nltk.tree import Tree
+
+
+
>>> print_string("This is a long string, therefore it should break", 25)
+This is a long string,
+therefore it should break
+
+
+
>>> re_show("[a-z]+", "sdf123")
+{sdf}123
+
+
+
>>> tree = Tree(5,
+...             [Tree(4, [Tree(2, [1, 3])]),
+...              Tree(8, [Tree(6, [7]), 9])])
+>>> for x in breadth_first(tree):
+...     if isinstance(x, int): print(x)
+...     else: print(x.label())
+5
+4
+8
+2
+6
+9
+1
+3
+7
+>>> for x in breadth_first(tree, maxdepth=2):
+...     if isinstance(x, int): print(x)
+...     else: print(x.label())
+5
+4
+8
+2
+6
+9
+
+
>>> invert_dict({1: 2})
+defaultdict(<... 'list'>, {2: 1})
+
+
+
>>> invert_dict({1: [3, 4, 5]})
+defaultdict(<... 'list'>, {3: [1], 4: [1], 5: [1]})
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/wordnet.html b/howto/wordnet.html index 9e76d975e..3ad2ac95f 100644 --- a/howto/wordnet.html +++ b/howto/wordnet.html @@ -1,620 +1,369 @@ - - - + - - -WordNet Interface - + + + + + + + NLTK :: Sample usage for wordnet + + + + + + + + + + + + + + -
-

WordNet Interface

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -

WordNet is just another NLTK corpus reader, and can be imported like this:

-
-
->>> from nltk.corpus import wordnet
-
-
+ + + +
+ +
+
+ +
+

Sample usage for wordnet

+
+

WordNet Interface

+
+
WordNet is just another NLTK corpus reader, and can be imported like this:
>>> from nltk.corpus import wordnet
+
+
+
+

For more compact code, we recommend:

-
-
->>> from nltk.corpus import wordnet as wn
-
-
-
-

Words

-

Look up a word using synsets(); this function has an optional pos argument +

>>> from nltk.corpus import wordnet as wn
+
+
+
+

Words

+

Look up a word using synsets(); this function has an optional pos argument which lets you constrain the part of speech of the word:

-
-
->>> wn.synsets('dog') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-[Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'),
-Synset('frank.n.02'), Synset('pawl.n.01'), Synset('andiron.n.01'), Synset('chase.v.01')]
->>> wn.synsets('dog', pos=wn.VERB)
-[Synset('chase.v.01')]
-
-
-

The other parts of speech are NOUN, ADJ and ADV. +

>>> wn.synsets('dog')
+[Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'),
+Synset('frank.n.02'), Synset('pawl.n.01'), Synset('andiron.n.01'), Synset('chase.v.01')]
+>>> wn.synsets('dog', pos=wn.VERB)
+[Synset('chase.v.01')]
+
+
+

The other parts of speech are NOUN, ADJ and ADV. A synset is identified with a 3-part name of the form: word.pos.nn:

-
-
->>> wn.synset('dog.n.01')
-Synset('dog.n.01')
->>> print(wn.synset('dog.n.01').definition())
-a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
->>> len(wn.synset('dog.n.01').examples())
-1
->>> print(wn.synset('dog.n.01').examples()[0])
-the dog barked all night
->>> wn.synset('dog.n.01').lemmas()
-[Lemma('dog.n.01.dog'), Lemma('dog.n.01.domestic_dog'), Lemma('dog.n.01.Canis_familiaris')]
->>> [str(lemma.name()) for lemma in wn.synset('dog.n.01').lemmas()]
-['dog', 'domestic_dog', 'Canis_familiaris']
->>> wn.lemma('dog.n.01.dog').synset()
-Synset('dog.n.01')
-
-
+
>>> wn.synset('dog.n.01')
+Synset('dog.n.01')
+>>> print(wn.synset('dog.n.01').definition())
+a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
+>>> len(wn.synset('dog.n.01').examples())
+1
+>>> print(wn.synset('dog.n.01').examples()[0])
+the dog barked all night
+>>> wn.synset('dog.n.01').lemmas()
+[Lemma('dog.n.01.dog'), Lemma('dog.n.01.domestic_dog'), Lemma('dog.n.01.Canis_familiaris')]
+>>> [str(lemma.name()) for lemma in wn.synset('dog.n.01').lemmas()]
+['dog', 'domestic_dog', 'Canis_familiaris']
+>>> wn.lemma('dog.n.01.dog').synset()
+Synset('dog.n.01')
+
+

The WordNet corpus reader gives access to the Open Multilingual WordNet, using ISO-639 language codes.

-
->>> sorted(wn.langs())
-['als', 'arb', 'cat', 'cmn', 'dan', 'eng', 'eus', 'fas',
-'fin', 'fra', 'fre', 'glg', 'heb', 'ind', 'ita', 'jpn', 'nno',
-'nob', 'pol', 'por', 'spa', 'tha', 'zsm']
->>> wn.synsets(b'\xe7\x8a\xac'.decode('utf-8'), lang='jpn')
-[Synset('dog.n.01'), Synset('spy.n.01')]
->>> wn.synset('spy.n.01').lemma_names('jpn')
-['\u3044\u306c', '\u307e\u308f\u3057\u8005', '\u30b9\u30d1\u30a4', '\u56de\u3057\u8005',
-'\u56de\u8005', '\u5bc6\u5075', '\u5de5\u4f5c\u54e1', '\u5efb\u3057\u8005',
-'\u5efb\u8005', '\u63a2', '\u63a2\u308a', '\u72ac', '\u79d8\u5bc6\u635c\u67fb\u54e1',
-'\u8adc\u5831\u54e1', '\u8adc\u8005', '\u9593\u8005', '\u9593\u8adc', '\u96a0\u5bc6']
->>> wn.synset('dog.n.01').lemma_names('ita')
-['cane', 'Canis_familiaris']
->>> wn.lemmas('cane', lang='ita')
-[Lemma('dog.n.01.cane'), Lemma('hammer.n.01.cane'), Lemma('cramp.n.02.cane'),
-Lemma('bad_person.n.01.cane'), Lemma('incompetent.n.01.cane')]
->>> sorted(wn.synset('dog.n.01').lemmas('dan'))
-[Lemma('dog.n.01.hund'), Lemma('dog.n.01.k\xf8ter'),
-Lemma('dog.n.01.vovhund'), Lemma('dog.n.01.vovse')]
->>> sorted(wn.synset('dog.n.01').lemmas('por'))
-[Lemma('dog.n.01.cachorro'), Lemma('dog.n.01.c\xe3es'),
-Lemma('dog.n.01.c\xe3o'), Lemma('dog.n.01.c\xe3o')]
->>> dog_lemma = wn.lemma(b'dog.n.01.c\xc3\xa3o'.decode('utf-8'), lang='por')
->>> dog_lemma
-Lemma('dog.n.01.c\xe3o')
->>> dog_lemma.lang()
-'por'
->>> len(wordnet.all_lemma_names(pos='n', lang='jpn'))
-66027
-
-
-
-
-

Synsets

+
>>> sorted(wn.langs())
+['als', 'arb', 'bul', 'cat', 'cmn', 'dan', 'ell', 'eng', 'eus', 'fas',
+'fin', 'fra', 'glg', 'heb', 'hrv', 'ind', 'ita', 'jpn', 'nld', 'nno',
+'nob', 'pol', 'por', 'qcn', 'slv', 'spa', 'swe', 'tha', 'zsm']
+>>> wn.synsets(b'\xe7\x8a\xac'.decode('utf-8'), lang='jpn')
+[Synset('dog.n.01'), Synset('spy.n.01')]
+
+
+

wn.synset(‘spy.n.01’).lemma_names(‘jpn’) +[’u3044u306c’, ‘u307eu308fu3057u8005’, ‘u30b9u30d1u30a4’, ‘u56deu3057u8005’, +‘u56deu8005’, ‘u5bc6u5075’, ‘u5de5u4f5cu54e1’, ‘u5efbu3057u8005’, +‘u5efbu8005’, ‘u63a2’, ‘u63a2u308a’, ‘u72ac’, ‘u79d8u5bc6u635cu67fbu54e1’, +‘u8adcu5831u54e1’, ‘u8adcu8005’, ‘u9593u8005’, ‘u9593u8adc’, ‘u96a0u5bc6’]

+
>>> wn.synset('dog.n.01').lemma_names('ita')
+['cane', 'Canis_familiaris']
+>>> wn.lemmas('cane', lang='ita')
+[Lemma('dog.n.01.cane'), Lemma('cramp.n.02.cane'), Lemma('hammer.n.01.cane'), Lemma('bad_person.n.01.cane'),
+Lemma('incompetent.n.01.cane')]
+>>> sorted(wn.synset('dog.n.01').lemmas('dan'))
+[Lemma('dog.n.01.hund'), Lemma('dog.n.01.k\xf8ter'),
+Lemma('dog.n.01.vovhund'), Lemma('dog.n.01.vovse')]
+
+
+
+
sorted(wn.synset(‘dog.n.01’).lemmas(‘por’))

[Lemma(‘dog.n.01.cachorra’), Lemma(‘dog.n.01.cachorro’), Lemma(‘dog.n.01.cadela’), Lemma(‘dog.n.01.cxe3o’)]

+
+
+
>>> dog_lemma = wn.lemma(b'dog.n.01.c\xc3\xa3o'.decode('utf-8'), lang='por')
+>>> dog_lemma
+Lemma('dog.n.01.c\xe3o')
+>>> dog_lemma.lang()
+'por'
+>>> len(list(wordnet.all_lemma_names(pos='n', lang='jpn')))
+64797
+
+
+
+
+
+

Synsets

Synset: a set of synonyms that share a common meaning.

-
-
->>> dog = wn.synset('dog.n.01')
->>> dog.hypernyms()
-[Synset('canine.n.02'), Synset('domestic_animal.n.01')]
->>> dog.hyponyms()  # doctest: +ELLIPSIS
-[Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), Synset('dalmatian.n.02'), ...]
->>> dog.member_holonyms()
-[Synset('canis.n.01'), Synset('pack.n.06')]
->>> dog.root_hypernyms()
-[Synset('entity.n.01')]
->>> wn.synset('dog.n.01').lowest_common_hypernyms(wn.synset('cat.n.01'))
-[Synset('carnivore.n.01')]
-
-
+
>>> dog = wn.synset('dog.n.01')
+>>> dog.hypernyms()
+[Synset('canine.n.02'), Synset('domestic_animal.n.01')]
+>>> dog.hyponyms()
+[Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), Synset('dalmatian.n.02'), ...]
+>>> dog.member_holonyms()
+[Synset('canis.n.01'), Synset('pack.n.06')]
+>>> dog.root_hypernyms()
+[Synset('entity.n.01')]
+>>> wn.synset('dog.n.01').lowest_common_hypernyms(wn.synset('cat.n.01'))
+[Synset('carnivore.n.01')]
+
+

Each synset contains one or more lemmas, which represent a specific sense of a specific word.

Note that some relations are defined by WordNet only over Lemmas:

-
-
->>> good = wn.synset('good.a.01')
->>> good.antonyms()
-Traceback (most recent call last):
-  File "<stdin>", line 1, in <module>
-AttributeError: 'Synset' object has no attribute 'antonyms'
->>> good.lemmas()[0].antonyms()
-[Lemma('bad.a.01.bad')]
-
-
+
>>> good = wn.synset('good.a.01')
+>>> good.antonyms()
+Traceback (most recent call last):
+  File "<stdin>", line 1, in <module>
+AttributeError: 'Synset' object has no attribute 'antonyms'
+>>> good.lemmas()[0].antonyms()
+[Lemma('bad.a.01.bad')]
+
+

The relations that are currently defined in this way are antonyms, derivationally_related_forms and pertainyms.

+

If you know the byte offset used to identify a synset in the original +Princeton WordNet data file, you can use that to instantiate the synset +in NLTK:

+
>>> wn.synset_from_pos_and_offset('n', 4543158)
+Synset('wagon.n.01')
+
+
+
+
+

Lemmas

+
>>> eat = wn.lemma('eat.v.03.eat')
+>>> eat
+Lemma('feed.v.06.eat')
+>>> print(eat.key())
+eat%2:34:02::
+>>> eat.count()
+4
+>>> wn.lemma_from_key(eat.key())
+Lemma('feed.v.06.eat')
+>>> wn.lemma_from_key(eat.key()).synset()
+Synset('feed.v.06')
+>>> wn.lemma_from_key('feebleminded%5:00:00:retarded:00')
+Lemma('backward.s.03.feebleminded')
+>>> for lemma in wn.synset('eat.v.03').lemmas():
+...     print(lemma, lemma.count())
+...
+Lemma('feed.v.06.feed') 3
+Lemma('feed.v.06.eat') 4
+>>> for lemma in wn.lemmas('eat', 'v'):
+...     print(lemma, lemma.count())
+...
+Lemma('eat.v.01.eat') 61
+Lemma('eat.v.02.eat') 13
+Lemma('feed.v.06.eat') 4
+Lemma('eat.v.04.eat') 0
+Lemma('consume.v.05.eat') 0
+Lemma('corrode.v.01.eat') 0
+>>> wn.lemma('jump.v.11.jump')
+Lemma('jump.v.11.jump')
+
-
-

Lemmas

-
-
->>> eat = wn.lemma('eat.v.03.eat')
->>> eat
-Lemma('feed.v.06.eat')
->>> print(eat.key())
-eat%2:34:02::
->>> eat.count()
-4
->>> wn.lemma_from_key(eat.key())
-Lemma('feed.v.06.eat')
->>> wn.lemma_from_key(eat.key()).synset()
-Synset('feed.v.06')
->>> wn.lemma_from_key('feebleminded%5:00:00:retarded:00')
-Lemma('backward.s.03.feebleminded')
->>> for lemma in wn.synset('eat.v.03').lemmas():
-...     print(lemma, lemma.count())
-...
-Lemma('feed.v.06.feed') 3
-Lemma('feed.v.06.eat') 4
->>> for lemma in wn.lemmas('eat', 'v'):
-...     print(lemma, lemma.count())
-...
-Lemma('eat.v.01.eat') 61
-Lemma('eat.v.02.eat') 13
-Lemma('feed.v.06.eat') 4
-Lemma('eat.v.04.eat') 0
-Lemma('consume.v.05.eat') 0
-Lemma('corrode.v.01.eat') 0
-
-

Lemmas can also have relations between them:

-
-
->>> vocal = wn.lemma('vocal.a.01.vocal')
->>> vocal.derivationally_related_forms()
-[Lemma('vocalize.v.02.vocalize')]
->>> vocal.pertainyms()
-[Lemma('voice.n.02.voice')]
->>> vocal.antonyms()
-[Lemma('instrumental.a.01.instrumental')]
-
-
+
>>> vocal = wn.lemma('vocal.a.01.vocal')
+>>> vocal.derivationally_related_forms()
+[Lemma('vocalize.v.02.vocalize')]
+>>> vocal.pertainyms()
+[Lemma('voice.n.02.voice')]
+>>> vocal.antonyms()
+[Lemma('instrumental.a.01.instrumental')]
+
+

The three relations above exist only on lemmas, not on synsets.

+
+
+

Verb Frames

+
>>> wn.synset('think.v.01').frame_ids()
+[5, 9]
+>>> for lemma in wn.synset('think.v.01').lemmas():
+...     print(lemma, lemma.frame_ids())
+...     print(" | ".join(lemma.frame_strings()))
+...
+Lemma('think.v.01.think') [5, 9]
+Something think something Adjective/Noun | Somebody think somebody
+Lemma('think.v.01.believe') [5, 9]
+Something believe something Adjective/Noun | Somebody believe somebody
+Lemma('think.v.01.consider') [5, 9]
+Something consider something Adjective/Noun | Somebody consider somebody
+Lemma('think.v.01.conceive') [5, 9]
+Something conceive something Adjective/Noun | Somebody conceive somebody
+>>> wn.synset('stretch.v.02').frame_ids()
+[8]
+>>> for lemma in wn.synset('stretch.v.02').lemmas():
+...     print(lemma, lemma.frame_ids())
+...     print(" | ".join(lemma.frame_strings()))
+...
+Lemma('stretch.v.02.stretch') [8, 2]
+Somebody stretch something | Somebody stretch
+Lemma('stretch.v.02.extend') [8]
+Somebody extend something
+
-
-

Verb Frames

-
-
->>> wn.synset('think.v.01').frame_ids()
-[5, 9]
->>> for lemma in wn.synset('think.v.01').lemmas():
-...     print(lemma, lemma.frame_ids())
-...     print(" | ".join(lemma.frame_strings()))
-...
-Lemma('think.v.01.think') [5, 9]
-Something think something Adjective/Noun | Somebody think somebody
-Lemma('think.v.01.believe') [5, 9]
-Something believe something Adjective/Noun | Somebody believe somebody
-Lemma('think.v.01.consider') [5, 9]
-Something consider something Adjective/Noun | Somebody consider somebody
-Lemma('think.v.01.conceive') [5, 9]
-Something conceive something Adjective/Noun | Somebody conceive somebody
->>> wn.synset('stretch.v.02').frame_ids()
-[8]
->>> for lemma in wn.synset('stretch.v.02').lemmas():
-...     print(lemma, lemma.frame_ids())
-...     print(" | ".join(lemma.frame_strings()))
-...
-Lemma('stretch.v.02.stretch') [8, 2]
-Somebody stretch something | Somebody stretch
-Lemma('stretch.v.02.extend') [8]
-Somebody extend something
-
-
-
-
-

Similarity

-
-
->>> dog = wn.synset('dog.n.01')
->>> cat = wn.synset('cat.n.01')
-
-
->>> hit = wn.synset('hit.v.01')
->>> slap = wn.synset('slap.v.01')
-
-
-

synset1.path_similarity(synset2): +

+
+

Similarity

+
>>> dog = wn.synset('dog.n.01')
+>>> cat = wn.synset('cat.n.01')
+
+
+
>>> hit = wn.synset('hit.v.01')
+>>> slap = wn.synset('slap.v.01')
+
+
+

synset1.path_similarity(synset2): Return a score denoting how similar two word senses are, based on the shortest path that connects the senses in the is-a (hypernym/hypnoym) taxonomy. The score is in the range 0 to 1. By default, there is now a fake root node added to verbs so for cases where previously a path -could not be found---and None was returned---it should return a value. +could not be found—and None was returned—it should return a value. The old behavior can be achieved by setting simulate_root to be False. A score of 1 represents identity i.e. comparing a sense with itself will return 1.

-
-
->>> dog.path_similarity(cat)  # doctest: +ELLIPSIS
-0.2...
-
-
->>> hit.path_similarity(slap)  # doctest: +ELLIPSIS
-0.142...
-
-
->>> wn.path_similarity(hit, slap)  # doctest: +ELLIPSIS
-0.142...
-
-
->>> print(hit.path_similarity(slap, simulate_root=False))
-None
-
-
->>> print(wn.path_similarity(hit, slap, simulate_root=False))
-None
-
-
-

synset1.lch_similarity(synset2): +

>>> dog.path_similarity(cat)
+0.2...
+
+
+
>>> hit.path_similarity(slap)
+0.142...
+
+
+
>>> wn.path_similarity(hit, slap)
+0.142...
+
+
+
>>> print(hit.path_similarity(slap, simulate_root=False))
+None
+
+
+
>>> print(wn.path_similarity(hit, slap, simulate_root=False))
+None
+
+
+

synset1.lch_similarity(synset2): Leacock-Chodorow Similarity: Return a score denoting how similar two word senses are, based on the shortest path that connects the senses (as above) and the maximum depth of the taxonomy in which the senses occur. The relationship is given as -log(p/2d) where p is the shortest path length and d the taxonomy depth.

-
-
->>> dog.lch_similarity(cat)  # doctest: +ELLIPSIS
-2.028...
-
-
->>> hit.lch_similarity(slap)  # doctest: +ELLIPSIS
-1.312...
-
-
->>> wn.lch_similarity(hit, slap)  # doctest: +ELLIPSIS
-1.312...
-
-
->>> print(hit.lch_similarity(slap, simulate_root=False))
-None
-
-
->>> print(wn.lch_similarity(hit, slap, simulate_root=False))
-None
-
-
-

synset1.wup_similarity(synset2): +

>>> dog.lch_similarity(cat)
+2.028...
+
+
+
>>> hit.lch_similarity(slap)
+1.312...
+
+
+
>>> wn.lch_similarity(hit, slap)
+1.312...
+
+
+
>>> print(hit.lch_similarity(slap, simulate_root=False))
+None
+
+
+
>>> print(wn.lch_similarity(hit, slap, simulate_root=False))
+None
+
+
+

synset1.wup_similarity(synset2): Wu-Palmer Similarity: Return a score denoting how similar two word senses are, based on the depth of the two senses in the taxonomy and that of their Least Common Subsumer (most specific ancestor node). Note that at this time the -scores given do _not_ always agree with those given by Pedersen's Perl +scores given do _not_ always agree with those given by Pedersen’s Perl implementation of Wordnet Similarity.

The LCS does not necessarily feature in the shortest path connecting the two senses, as it is by definition the common ancestor deepest in the @@ -623,47 +372,41 @@

Similarity

shortest path to the root node is the longest will be selected. Where the LCS has multiple paths to the root, the longer path is used for the purposes of the calculation.

-
-
->>> dog.wup_similarity(cat)  # doctest: +ELLIPSIS
-0.857...
-
-
->>> hit.wup_similarity(slap)
-0.25
-
-
->>> wn.wup_similarity(hit, slap)
-0.25
-
-
->>> print(hit.wup_similarity(slap, simulate_root=False))
-None
-
-
->>> print(wn.wup_similarity(hit, slap, simulate_root=False))
-None
-
-
-

wordnet_ic +

>>> dog.wup_similarity(cat)
+0.857...
+
+
+
>>> hit.wup_similarity(slap)
+0.25
+
+
+
>>> wn.wup_similarity(hit, slap)
+0.25
+
+
+
>>> print(hit.wup_similarity(slap, simulate_root=False))
+None
+
+
+
>>> print(wn.wup_similarity(hit, slap, simulate_root=False))
+None
+
+
+

wordnet_ic Information Content: Load an information content file from the wordnet_ic corpus.

-
-
->>> from nltk.corpus import wordnet_ic
->>> brown_ic = wordnet_ic.ic('ic-brown.dat')
->>> semcor_ic = wordnet_ic.ic('ic-semcor.dat')
-
-
+
>>> from nltk.corpus import wordnet_ic
+>>> brown_ic = wordnet_ic.ic('ic-brown.dat')
+>>> semcor_ic = wordnet_ic.ic('ic-semcor.dat')
+
+

Or you can create an information content dictionary from a corpus (or anything that has a words() method).

-
-
->>> from nltk.corpus import genesis
->>> genesis_ic = wn.ic(genesis, False, 0.0)
-
-
-

synset1.res_similarity(synset2, ic): +

>>> from nltk.corpus import genesis
+>>> genesis_ic = wn.ic(genesis, False, 0.0)
+
+
+

synset1.res_similarity(synset2, ic): Resnik Similarity: Return a score denoting how similar two word senses are, based on the Information Content (IC) of the Least Common Subsumer (most specific @@ -671,336 +414,498 @@

Similarity

information content, the result is dependent on the corpus used to generate the information content and the specifics of how the information content was created.

-
-
->>> dog.res_similarity(cat, brown_ic)  # doctest: +ELLIPSIS
-7.911...
->>> dog.res_similarity(cat, genesis_ic)  # doctest: +ELLIPSIS
-7.204...
-
-
-

synset1.jcn_similarity(synset2, ic): +

>>> dog.res_similarity(cat, brown_ic)
+7.911...
+>>> dog.res_similarity(cat, genesis_ic)
+7.204...
+
+
+

synset1.jcn_similarity(synset2, ic): Jiang-Conrath Similarity Return a score denoting how similar two word senses are, based on the Information Content (IC) of the Least Common Subsumer (most specific ancestor node) and that of the two input Synsets. The relationship is given by the equation 1 / (IC(s1) + IC(s2) - 2 * IC(lcs)).

-
-
->>> dog.jcn_similarity(cat, brown_ic)  # doctest: +ELLIPSIS
-0.449...
->>> dog.jcn_similarity(cat, genesis_ic)  # doctest: +ELLIPSIS
-0.285...
-
-
-

synset1.lin_similarity(synset2, ic): +

>>> dog.jcn_similarity(cat, brown_ic)
+0.449...
+>>> dog.jcn_similarity(cat, genesis_ic)
+0.285...
+
+
+

synset1.lin_similarity(synset2, ic): Lin Similarity: Return a score denoting how similar two word senses are, based on the Information Content (IC) of the Least Common Subsumer (most specific ancestor node) and that of the two input Synsets. The relationship is given by the equation 2 * IC(lcs) / (IC(s1) + IC(s2)).

-
-
->>> dog.lin_similarity(cat, semcor_ic)  # doctest: +ELLIPSIS
-0.886...
-
-
-
-
-

Access to all Synsets

+
>>> dog.lin_similarity(cat, semcor_ic)
+0.886...
+
+
+ +
+

Access to all Synsets

Iterate over all the noun synsets:

-
-
->>> for synset in list(wn.all_synsets('n'))[:10]:
-...     print(synset)
-...
-Synset('entity.n.01')
-Synset('physical_entity.n.01')
-Synset('abstraction.n.06')
-Synset('thing.n.12')
-Synset('object.n.01')
-Synset('whole.n.02')
-Synset('congener.n.03')
-Synset('living_thing.n.01')
-Synset('organism.n.01')
-Synset('benthos.n.02')
-
-
+
>>> for synset in list(wn.all_synsets('n'))[:10]:
+...     print(synset)
+...
+Synset('entity.n.01')
+Synset('physical_entity.n.01')
+Synset('abstraction.n.06')
+Synset('thing.n.12')
+Synset('object.n.01')
+Synset('whole.n.02')
+Synset('congener.n.03')
+Synset('living_thing.n.01')
+Synset('organism.n.01')
+Synset('benthos.n.02')
+
+

Get all synsets for this word, possibly restricted by POS:

-
-
->>> wn.synsets('dog') # doctest: +ELLIPSIS
-[Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'), ...]
->>> wn.synsets('dog', pos='v')
-[Synset('chase.v.01')]
-
-
+
>>> wn.synsets('dog')
+[Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'), ...]
+>>> wn.synsets('dog', pos='v')
+[Synset('chase.v.01')]
+
+

Walk through the noun synsets looking at their hypernyms:

-
-
->>> from itertools import islice
->>> for synset in islice(wn.all_synsets('n'), 5):
-...     print(synset, synset.hypernyms())
-...
-Synset('entity.n.01') []
-Synset('physical_entity.n.01') [Synset('entity.n.01')]
-Synset('abstraction.n.06') [Synset('entity.n.01')]
-Synset('thing.n.12') [Synset('physical_entity.n.01')]
-Synset('object.n.01') [Synset('physical_entity.n.01')]
-
-
-
-
-

Morphy

+
>>> from itertools import islice
+>>> for synset in islice(wn.all_synsets('n'), 5):
+...     print(synset, synset.hypernyms())
+...
+Synset('entity.n.01') []
+Synset('physical_entity.n.01') [Synset('entity.n.01')]
+Synset('abstraction.n.06') [Synset('entity.n.01')]
+Synset('thing.n.12') [Synset('physical_entity.n.01')]
+Synset('object.n.01') [Synset('physical_entity.n.01')]
+
+
+ +
+

Morphy

Look up forms not in WordNet, with the help of Morphy:

-
-
->>> wn.morphy('denied', wn.NOUN)
->>> print(wn.morphy('denied', wn.VERB))
-deny
->>> wn.synsets('denied', wn.NOUN)
-[]
->>> wn.synsets('denied', wn.VERB) # doctest: +NORMALIZE_WHITESPACE
-[Synset('deny.v.01'), Synset('deny.v.02'), Synset('deny.v.03'), Synset('deny.v.04'),
-Synset('deny.v.05'), Synset('traverse.v.03'), Synset('deny.v.07')]
-
-
+
>>> wn.morphy('denied', wn.NOUN)
+>>> print(wn.morphy('denied', wn.VERB))
+deny
+>>> wn.synsets('denied', wn.NOUN)
+[]
+>>> wn.synsets('denied', wn.VERB)
+[Synset('deny.v.01'), Synset('deny.v.02'), Synset('deny.v.03'), Synset('deny.v.04'),
+Synset('deny.v.05'), Synset('traverse.v.03'), Synset('deny.v.07')]
+
+

Morphy uses a combination of inflectional ending rules and exception lists to handle a variety of different possibilities:

-
-
->>> print(wn.morphy('dogs'))
-dog
->>> print(wn.morphy('churches'))
-church
->>> print(wn.morphy('aardwolves'))
-aardwolf
->>> print(wn.morphy('abaci'))
-abacus
->>> print(wn.morphy('book', wn.NOUN))
-book
->>> wn.morphy('hardrock', wn.ADV)
->>> wn.morphy('book', wn.ADJ)
->>> wn.morphy('his', wn.NOUN)
->>>
-
-
-
-
-

Synset Closures

+
>>> print(wn.morphy('dogs'))
+dog
+>>> print(wn.morphy('churches'))
+church
+>>> print(wn.morphy('aardwolves'))
+aardwolf
+>>> print(wn.morphy('abaci'))
+abacus
+>>> print(wn.morphy('book', wn.NOUN))
+book
+>>> wn.morphy('hardrock', wn.ADV)
+>>> wn.morphy('book', wn.ADJ)
+>>> wn.morphy('his', wn.NOUN)
+>>>
+
+
+ +
+

Synset Closures

Compute transitive closures of synsets

-
-
->>> dog = wn.synset('dog.n.01')
->>> hypo = lambda s: s.hyponyms()
->>> hyper = lambda s: s.hypernyms()
->>> list(dog.closure(hypo, depth=1)) == dog.hyponyms()
-True
->>> list(dog.closure(hyper, depth=1)) == dog.hypernyms()
-True
->>> list(dog.closure(hypo))
-[Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'),
- Synset('dalmatian.n.02'), Synset('great_pyrenees.n.01'),
- Synset('griffon.n.02'), Synset('hunting_dog.n.01'), Synset('lapdog.n.01'),
- Synset('leonberg.n.01'), Synset('mexican_hairless.n.01'),
- Synset('newfoundland.n.01'), Synset('pooch.n.01'), Synset('poodle.n.01'), ...]
->>> list(dog.closure(hyper))
-[Synset('canine.n.02'), Synset('domestic_animal.n.01'), Synset('carnivore.n.01'),
-Synset('animal.n.01'), Synset('placental.n.01'), Synset('organism.n.01'),
-Synset('mammal.n.01'), Synset('living_thing.n.01'), Synset('vertebrate.n.01'),
-Synset('whole.n.02'), Synset('chordate.n.01'), Synset('object.n.01'),
-Synset('physical_entity.n.01'), Synset('entity.n.01')]
-
-
-
-
-

Regression Tests

-

Bug 85: morphy returns the base form of a word, if it's input is given +

>>> dog = wn.synset('dog.n.01')
+>>> hypo = lambda s: s.hyponyms()
+>>> hyper = lambda s: s.hypernyms()
+>>> list(dog.closure(hypo, depth=1)) == dog.hyponyms()
+True
+>>> list(dog.closure(hyper, depth=1)) == dog.hypernyms()
+True
+>>> list(dog.closure(hypo))
+[Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'),
+ Synset('dalmatian.n.02'), Synset('great_pyrenees.n.01'),
+ Synset('griffon.n.02'), Synset('hunting_dog.n.01'), Synset('lapdog.n.01'),
+ Synset('leonberg.n.01'), Synset('mexican_hairless.n.01'),
+ Synset('newfoundland.n.01'), Synset('pooch.n.01'), Synset('poodle.n.01'), ...]
+>>> list(dog.closure(hyper))
+[Synset('canine.n.02'), Synset('domestic_animal.n.01'), Synset('carnivore.n.01'), Synset('animal.n.01'),
+Synset('placental.n.01'), Synset('organism.n.01'), Synset('mammal.n.01'), Synset('living_thing.n.01'),
+Synset('vertebrate.n.01'), Synset('whole.n.02'), Synset('chordate.n.01'), Synset('object.n.01'),
+Synset('physical_entity.n.01'), Synset('entity.n.01')]
+
+
+ +
+

Regression Tests

+

Bug 85: morphy returns the base form of a word, if it’s input is given as a base form for a POS for which that word is not defined:

-
-
->>> wn.synsets('book', wn.NOUN)
-[Synset('book.n.01'), Synset('book.n.02'), Synset('record.n.05'), Synset('script.n.01'), Synset('ledger.n.01'), Synset('book.n.06'), Synset('book.n.07'), Synset('koran.n.01'), Synset('bible.n.01'), Synset('book.n.10'), Synset('book.n.11')]
->>> wn.synsets('book', wn.ADJ)
-[]
->>> wn.morphy('book', wn.NOUN)
-'book'
->>> wn.morphy('book', wn.ADJ)
-
-
+
>>> wn.synsets('book', wn.NOUN)
+[Synset('book.n.01'), Synset('book.n.02'), Synset('record.n.05'), Synset('script.n.01'), Synset('ledger.n.01'), Synset('book.n.06'), Synset('book.n.07'), Synset('koran.n.01'), Synset('bible.n.01'), Synset('book.n.10'), Synset('book.n.11')]
+>>> wn.synsets('book', wn.ADJ)
+[]
+>>> wn.morphy('book', wn.NOUN)
+'book'
+>>> wn.morphy('book', wn.ADJ)
+
+

Bug 160: wup_similarity breaks when the two synsets have no common hypernym

-
->>> t = wn.synsets('picasso')[0]
->>> m = wn.synsets('male')[1]
->>> t.wup_similarity(m)  # doctest: +ELLIPSIS
-0.631...
-
-
->>> t = wn.synsets('titan')[1]
->>> s = wn.synsets('say', wn.VERB)[0]
->>> print(t.wup_similarity(s))
-None
-
-
-

Bug 21: "instance of" not included in LCS (very similar to bug 160)

-
-
->>> a = wn.synsets("writings")[0]
->>> b = wn.synsets("scripture")[0]
->>> brown_ic = wordnet_ic.ic('ic-brown.dat')
->>> a.jcn_similarity(b, brown_ic)  # doctest: +ELLIPSIS
-0.175...
-
-
+
>>> t = wn.synsets('picasso')[0]
+>>> m = wn.synsets('male')[1]
+>>> t.wup_similarity(m)
+0.631...
+
+
+

Issue #2278: wup_similarity not commutative when comparing a noun and a verb. +Patch #2650 resolved this error. As a result, the output of the following use of wup_similarity no longer returns None. +>>> t = wn.synsets(‘titan’)[1] +>>> s = wn.synsets(‘say’, wn.VERB)[0] +>>> t.wup_similarity(s) +0.142…

+
+

Bug 21: “instance of” not included in LCS (very similar to bug 160)

+
>>> a = wn.synsets("writings")[0]
+>>> b = wn.synsets("scripture")[0]
+>>> brown_ic = wordnet_ic.ic('ic-brown.dat')
+>>> a.jcn_similarity(b, brown_ic)
+0.175...
+
+

Bug 221: Verb root IC is zero

-
-
->>> from nltk.corpus.reader.wordnet import information_content
->>> s = wn.synsets('say', wn.VERB)[0]
->>> information_content(s, brown_ic)  # doctest: +ELLIPSIS
-4.623...
-
-
+
>>> from nltk.corpus.reader.wordnet import information_content
+>>> s = wn.synsets('say', wn.VERB)[0]
+>>> information_content(s, brown_ic)
+4.623...
+
+

Bug 161: Comparison between WN keys/lemmas should not be case sensitive

-
-
->>> k = wn.synsets("jefferson")[0].lemmas()[0].key()
->>> wn.lemma_from_key(k)
-Lemma('jefferson.n.01.Jefferson')
->>> wn.lemma_from_key(k.upper())
-Lemma('jefferson.n.01.Jefferson')
-
-
+
>>> k = wn.synsets("jefferson")[0].lemmas()[0].key()
+>>> wn.lemma_from_key(k)
+Lemma('jefferson.n.01.Jefferson')
+>>> wn.lemma_from_key(k.upper())
+Lemma('jefferson.n.01.Jefferson')
+
+

Bug 99: WordNet root_hypernyms gives incorrect results

-
-
->>> from nltk.corpus import wordnet as wn
->>> for s in wn.all_synsets(wn.NOUN):
-...     if s.root_hypernyms()[0] != wn.synset('entity.n.01'):
-...         print(s, s.root_hypernyms())
-...
->>>
-
-
+
>>> from nltk.corpus import wordnet as wn
+>>> for s in wn.all_synsets(wn.NOUN):
+...     if s.root_hypernyms()[0] != wn.synset('entity.n.01'):
+...         print(s, s.root_hypernyms())
+...
+>>>
+
+

Bug 382: JCN Division by zero error

-
-
->>> tow = wn.synset('tow.v.01')
->>> shlep = wn.synset('shlep.v.02')
->>> from nltk.corpus import wordnet_ic
->>> brown_ic =  wordnet_ic.ic('ic-brown.dat')
->>> tow.jcn_similarity(shlep, brown_ic)  # doctest: +ELLIPSIS
-1...e+300
-
-
+
>>> tow = wn.synset('tow.v.01')
+>>> shlep = wn.synset('shlep.v.02')
+>>> from nltk.corpus import wordnet_ic
+>>> brown_ic =  wordnet_ic.ic('ic-brown.dat')
+>>> tow.jcn_similarity(shlep, brown_ic)
+1...e+300
+
+

Bug 428: Depth is zero for instance nouns

-
-
->>> s = wn.synset("lincoln.n.01")
->>> s.max_depth() > 0
-True
-
-
+
>>> s = wn.synset("lincoln.n.01")
+>>> s.max_depth() > 0
+True
+
+

Bug 429: Information content smoothing used old reference to all_synsets

-
-
->>> genesis_ic = wn.ic(genesis, True, 1.0)
-
-
+
>>> genesis_ic = wn.ic(genesis, True, 1.0)
+
+

Bug 430: all_synsets used wrong pos lookup when synsets were cached

-
-
->>> for ii in wn.all_synsets(): pass
->>> for ii in wn.all_synsets(): pass
-
-
+
>>> for ii in wn.all_synsets(): pass
+>>> for ii in wn.all_synsets(): pass
+
+

Bug 470: shortest_path_distance ignored instance hypernyms

-
-
->>> google = wordnet.synsets("google")[0]
->>> earth = wordnet.synsets("earth")[0]
->>> google.wup_similarity(earth)  # doctest: +ELLIPSIS
-0.1...
-
-
+
>>> google = wordnet.synsets("google")[0]
+>>> earth = wordnet.synsets("earth")[0]
+>>> google.wup_similarity(earth)
+0.1...
+
+

Bug 484: similarity metrics returned -1 instead of None for no LCS

-
-
->>> t = wn.synsets('fly', wn.VERB)[0]
->>> s = wn.synsets('say', wn.VERB)[0]
->>> print(s.shortest_path_distance(t))
-None
->>> print(s.path_similarity(t, simulate_root=False))
-None
->>> print(s.lch_similarity(t, simulate_root=False))
-None
->>> print(s.wup_similarity(t, simulate_root=False))
-None
-
-
-

Bug 427: "pants" does not return all the senses it should

-
-
->>> from nltk.corpus import wordnet
->>> wordnet.synsets("pants",'n')
-[Synset('bloomers.n.01'), Synset('pant.n.01'), Synset('trouser.n.01'), Synset('gasp.n.01')]
-
-
+
>>> t = wn.synsets('fly', wn.VERB)[0]
+>>> s = wn.synsets('say', wn.VERB)[0]
+>>> print(s.shortest_path_distance(t))
+None
+>>> print(s.path_similarity(t, simulate_root=False))
+None
+>>> print(s.lch_similarity(t, simulate_root=False))
+None
+>>> print(s.wup_similarity(t, simulate_root=False))
+None
+
+
+

Bug 427: “pants” does not return all the senses it should

+
>>> from nltk.corpus import wordnet
+>>> wordnet.synsets("pants",'n')
+[Synset('bloomers.n.01'), Synset('pant.n.01'), Synset('trouser.n.01'), Synset('gasp.n.01')]
+
+

Bug 482: Some nouns not being lemmatised by WordNetLemmatizer().lemmatize

-
-
->>> from nltk.stem.wordnet import WordNetLemmatizer
->>> WordNetLemmatizer().lemmatize("eggs", pos="n")
-'egg'
->>> WordNetLemmatizer().lemmatize("legs", pos="n")
-'leg'
-
-
+
>>> from nltk.stem.wordnet import WordNetLemmatizer
+>>> WordNetLemmatizer().lemmatize("eggs", pos="n")
+'egg'
+>>> WordNetLemmatizer().lemmatize("legs", pos="n")
+'leg'
+
+

Bug 284: instance hypernyms not used in similarity calculations

-
-
->>> wn.synset('john.n.02').lch_similarity(wn.synset('dog.n.01'))  # doctest: +ELLIPSIS
-1.335...
->>> wn.synset('john.n.02').wup_similarity(wn.synset('dog.n.01'))  # doctest: +ELLIPSIS
-0.571...
->>> wn.synset('john.n.02').res_similarity(wn.synset('dog.n.01'), brown_ic)  # doctest: +ELLIPSIS
-2.224...
->>> wn.synset('john.n.02').jcn_similarity(wn.synset('dog.n.01'), brown_ic)  # doctest: +ELLIPSIS
-0.075...
->>> wn.synset('john.n.02').lin_similarity(wn.synset('dog.n.01'), brown_ic)  # doctest: +ELLIPSIS
-0.252...
->>> wn.synset('john.n.02').hypernym_paths()  # doctest: +ELLIPSIS
-[[Synset('entity.n.01'), ..., Synset('john.n.02')]]
-
-
+
>>> wn.synset('john.n.02').lch_similarity(wn.synset('dog.n.01'))
+1.335...
+>>> wn.synset('john.n.02').wup_similarity(wn.synset('dog.n.01'))
+0.571...
+>>> wn.synset('john.n.02').res_similarity(wn.synset('dog.n.01'), brown_ic)
+2.224...
+>>> wn.synset('john.n.02').jcn_similarity(wn.synset('dog.n.01'), brown_ic)
+0.075...
+>>> wn.synset('john.n.02').lin_similarity(wn.synset('dog.n.01'), brown_ic)
+0.252...
+>>> wn.synset('john.n.02').hypernym_paths()
+[[Synset('entity.n.01'), ..., Synset('john.n.02')]]
+
+

Issue 541: add domains to wordnet

-
-
->>> wn.synset('code.n.03').topic_domains()
-[Synset('computer_science.n.01')]
->>> wn.synset('pukka.a.01').region_domains()
-[Synset('india.n.01')]
->>> wn.synset('freaky.a.01').usage_domains()
-[Synset('slang.n.02')]
-
-
+
>>> wn.synset('code.n.03').topic_domains()
+[Synset('computer_science.n.01')]
+>>> wn.synset('pukka.a.01').region_domains()
+[Synset('india.n.01')]
+>>> wn.synset('freaky.a.01').usage_domains()
+[Synset('slang.n.02')]
+
+

Issue 629: wordnet failures when python run with -O optimizations

-
-
->>> # Run the test suite with python -O to check this
->>> wn.synsets("brunch")
-[Synset('brunch.n.01'), Synset('brunch.v.01')]
-
-
+
>>> # Run the test suite with python -O to check this
+>>> wn.synsets("brunch")
+[Synset('brunch.n.01'), Synset('brunch.v.01')]
+
+

Issue 395: wordnet returns incorrect result for lowest_common_hypernyms of chef and policeman

-
-
->>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'))
-[Synset('person.n.01')]
-
-
+
>>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'))
+[Synset('person.n.01')]
+
+
+

Bug https://github.com/nltk/nltk/issues/1641: Non-English lemmas containing capital letters cannot be looked up using wordnet.lemmas() or wordnet.synsets()

+
>>> wn.lemmas('Londres', lang='fra')
+[Lemma('united_kingdom.n.01.Londres'), Lemma('london.n.01.Londres'), Lemma('london.n.02.Londres')]
+>>> wn.lemmas('londres', lang='fra')
+[Lemma('united_kingdom.n.01.Londres'), Lemma('london.n.01.Londres'), Lemma('london.n.02.Londres')]
+
+
+
Patch-1 https://github.com/nltk/nltk/pull/2065 Adding 3 functions (relations) to WordNet class
>>> wn.synsets("computer_science")[0].in_topic_domains()[2]
+Synset('access_time.n.01')
+>>> wn.synsets("France")[0].in_region_domains()[18]
+Synset('french.n.01')
+>>> wn.synsets("slang")[1].in_usage_domains()[18]
+Synset('can-do.s.01')
+
+
+
+

Issue 2721: WordNetCorpusReader.ic() does not add smoothing to N

+
>>> class FakeCorpus:
+...     def words(self): return ['word']
+...
+>>> fake_ic = wn.ic(FakeCorpus(), False, 1.0)
+>>> word = wn.synset('word.n.01')
+>>> information_content(word, fake_ic) > 0
+True
+
+
+
+
+

Endlessness vs. intractability in relation trees

+
+

1. Endlessness

+

Until NLTK v. 3.5, the tree() function looped forever on symmetric +relations (verb_groups, attributes, and most also_sees). But in +the current version, tree() now detects and discards these cycles:

+
>>> from pprint import pprint
+>>> pprint(wn.synset('bound.a.01').tree(lambda s:s.also_sees()))
+[Synset('bound.a.01'),
+ [Synset('unfree.a.02'),
+  [Synset('confined.a.02'),
+   [Synset('restricted.a.01'), [Synset('classified.a.02')]]],
+  [Synset('dependent.a.01')],
+  [Synset('restricted.a.01'),
+   [Synset('classified.a.02')],
+   [Synset('confined.a.02')]]]]
+
+
+

Specifying the “cut_mark” parameter increases verbosity, so that the cycles +are mentioned in the output, together with the level where they occur:

+
>>> pprint(wn.synset('bound.a.01').tree(lambda s:s.also_sees(),cut_mark='...'))
+[Synset('bound.a.01'),
+ [Synset('unfree.a.02'),
+  "Cycle(Synset('bound.a.01'),-3,...)",
+  [Synset('confined.a.02'),
+   [Synset('restricted.a.01'),
+    [Synset('classified.a.02')],
+    "Cycle(Synset('confined.a.02'),-5,...)",
+    "Cycle(Synset('unfree.a.02'),-5,...)"],
+   "Cycle(Synset('unfree.a.02'),-4,...)"],
+  [Synset('dependent.a.01'), "Cycle(Synset('unfree.a.02'),-4,...)"],
+  [Synset('restricted.a.01'),
+   [Synset('classified.a.02')],
+   [Synset('confined.a.02'),
+    "Cycle(Synset('restricted.a.01'),-5,...)",
+    "Cycle(Synset('unfree.a.02'),-5,...)"],
+   "Cycle(Synset('unfree.a.02'),-4,...)"]]]
+
+
+
+
+

2. Intractability

+

However, even after discarding the infinite cycles, some trees can remain +intractable, due to combinatorial explosion in a relation. This happens in +WordNet, because the also.sees() relation has a big Strongly Connected +Component (_SCC_) consisting in 758 synsets, where any member node is +transitively connected by the same relation, to all other members of the +same SCC. This produces intractable relation trees for each of these 758 +synsets, i. e. trees that are too big to compute or display on any computer.

+

For example, the synset ‘concrete.a.01’ is a member of the largest SCC, +so its also_sees() tree is intractable, and can normally only be handled +by limiting the “depth” parameter to display a small number of levels:

+
>>> from pprint import pprint
+>>> pprint(wn.synset('concrete.a.01').tree(lambda s:s.also_sees(),cut_mark='...',depth=2))
+[Synset('concrete.a.01'),
+ [Synset('practical.a.01'),
+  "Cycle(Synset('concrete.a.01'),0,...)",
+  [Synset('possible.a.01'), '...'],
+  [Synset('realistic.a.01'), '...'],
+  [Synset('serviceable.a.01'), '...']],
+ [Synset('real.a.01'),
+  "Cycle(Synset('concrete.a.01'),0,...)",
+  [Synset('genuine.a.01'), '...'],
+  [Synset('realistic.a.01'), '...'],
+  [Synset('sincere.a.01'), '...']],
+ [Synset('tangible.a.01'), "Cycle(Synset('concrete.a.01'),0,...)"]]
+
+
+
+
2.1 First solution: acyclic_tree()
+

On the other hand, the new acyclic_tree() function is able to also handle +the intractable cases. The also_sees() acyclic tree of ‘concrete.a.01’ is +several hundred lines long, so here is a simpler example, concerning a much +smaller SCC: counting only five members, the SCC that includes ‘bound.a.01’ +is tractable with the normal tree() function, as seen above.

+

But while tree() only prunes redundancy within local branches, acyclic_tree +prunes the tree globally, thus discarding any additional redundancy, and +produces a tree that includes all reachable nodes (i. e. a _spanning tree_). +This tree is _minimal_ because it includes the reachable nodes only once, +but it is not necessarily a _Minimum Spanning Tree_ (MST), because the +Depth-first search strategy does not guarantee that nodes are reached +through the lowest number of links (as Breadth-first search would).

+
>>> pprint(wn.synset('bound.a.01').acyclic_tree(lambda s:s.also_sees()))
+[Synset('bound.a.01'),
+ [Synset('unfree.a.02'),
+  [Synset('confined.a.02'),
+   [Synset('restricted.a.01'), [Synset('classified.a.02')]]],
+  [Synset('dependent.a.01')]]]
+
+
+

Again, specifying the “cut_mark” parameter increases verbosity, so that the +cycles are mentioned in the output, together with the level where they occur:

+
>>> pprint(wn.synset('bound.a.01').acyclic_tree(lambda s:s.also_sees(),cut_mark='...'))
+[Synset('bound.a.01'),
+ [Synset('unfree.a.02'),
+  "Cycle(Synset('bound.a.01'),-3,...)",
+  [Synset('confined.a.02'),
+   [Synset('restricted.a.01'),
+    [Synset('classified.a.02')],
+    "Cycle(Synset('confined.a.02'),-5,...)",
+    "Cycle(Synset('unfree.a.02'),-5,...)"],
+   "Cycle(Synset('unfree.a.02'),-4,...)"],
+  [Synset('dependent.a.01'), "Cycle(Synset('unfree.a.02'),-4,...)"],
+  "Cycle(Synset('restricted.a.01'),-3,...)"]]
+
+
+
+
+
2.2 Better solution: mst()
+

A Minimum Spanning Tree (MST) spans all the nodes of a relation subgraph once, +while guaranteeing that each node is reached through the shortest path possible. +In unweighted relation graphs like WordNet, a MST can be computed very efficiently +in linear time, using Breadth-First Search (BFS). Like acyclic_tree(), the new +“unweighted_minimum_spanning_tree()” function (imported in the Wordnet +module as “mst”) handles intractable trees, such as the example discussed above: +“wn.synset(‘concrete.a.01’).mst(lambda s:s.also_sees())”.

+

But, while the also_sees() acyclic_tree of ‘bound.a.01’ reaches +‘classified.a.02’ through four links, using depth-first search as seen above +(bound.a.01 > unfree.a.02 > confined.a.02 > restricted.a.01 > classified.a.02), +in the following MST, the path to ‘classified.a.02’ is the shortest possible, +consisting only in three links (bound.a.01 > unfree.a.02 > restricted.a.01 > +classified.a.02):

+
>>> pprint(wn.synset('bound.a.01').mst(lambda s:s.also_sees()))
+[Synset('bound.a.01'),
+ [Synset('unfree.a.02'),
+  [Synset('confined.a.02')],
+  [Synset('dependent.a.01')],
+  [Synset('restricted.a.01'), [Synset('classified.a.02')]]]]
+
+
+
+
+
+
+

Teardown test

+
>>> from nltk.corpus import wordnet
+>>> wordnet._unload()
+
+
+
+ + + + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/wordnet_lch.html b/howto/wordnet_lch.html index e0859cd8c..bb3695850 100644 --- a/howto/wordnet_lch.html +++ b/howto/wordnet_lch.html @@ -1,408 +1,199 @@ - - - + - - -WordNet Lowest Common Hypernyms - + + + + + + + NLTK :: Sample usage for wordnet_lch + + + + + + + + + + + + + + -
-

WordNet Lowest Common Hypernyms

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - -

Wordnet's lowest_common_hypernyms() method is based used to locate the +

+ + +
+ +
+
+ +
+

Sample usage for wordnet_lch

+
+

WordNet Lowest Common Hypernyms

+

Wordnet’s lowest_common_hypernyms() method is based used to locate the lowest single hypernym that is shared by two given words:

-
-
->>> from nltk.corpus import wordnet as wn
->>> wn.synset('kin.n.01').lowest_common_hypernyms(wn.synset('mother.n.01'))
-[Synset('relative.n.01')]
-
-
->>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'))
-[Synset('person.n.01')]
-
-
+
>>> from nltk.corpus import wordnet as wn
+>>> wn.synset('kin.n.01').lowest_common_hypernyms(wn.synset('mother.n.01'))
+[Synset('relative.n.01')]
+
+
+
>>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'))
+[Synset('person.n.01')]
+
+

This method generally returns a single result, but in some cases, more than one valid LCH is possible:

-
-
->>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01'))
-[Synset('attribute.n.02'), Synset('measure.n.02')]
-
-
+
>>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01'))
+[Synset('attribute.n.02'), Synset('measure.n.02')]
+
+

In some cases, lowest_common_hypernyms() can return one of the synsets which was passed to it as an argument:

-
-
->>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02'))
-[Synset('woman.n.01')]
-
-
+
>>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02'))
+[Synset('woman.n.01')]
+
+

In NLTK 3.0a2 the behavior of lowest_common_hypernyms() was changed to give more accurate results in a small set of cases, generally when dealing with nouns describing social roles or jobs. To emulate the pre v3.0a2 behavior, you can set the use_min_depth=True flag:

-
-
->>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'))
-[Synset('person.n.01')]
->>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'), use_min_depth=True)
-[Synset('organism.n.01')]
-
-
+
>>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'))
+[Synset('person.n.01')]
+>>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'), use_min_depth=True)
+[Synset('organism.n.01')]
+
+

In some cases use_min_depth=True may return more or fewer results than the default behavior:

-
-
->>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02'))
-[Synset('woman.n.01')]
->>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02'), use_min_depth=True)
-[Synset('organism.n.01'), Synset('woman.n.01')]
-
-
+
>>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02'))
+[Synset('woman.n.01')]
+>>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02'), use_min_depth=True)
+[Synset('organism.n.01'), Synset('woman.n.01')]
+
+

In the general case, however, they tend to return the same results:

-
-
->>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01'))
-[Synset('attribute.n.02'), Synset('measure.n.02')]
->>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01'), use_min_depth=True)
-[Synset('attribute.n.02'), Synset('measure.n.02')]
-
-
+
>>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01'))
+[Synset('attribute.n.02'), Synset('measure.n.02')]
+>>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01'), use_min_depth=True)
+[Synset('attribute.n.02'), Synset('measure.n.02')]
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/howto/wsd.html b/howto/wsd.html index 870995c78..7feeb6060 100644 --- a/howto/wsd.html +++ b/howto/wsd.html @@ -1,414 +1,210 @@ - - - + - - -Word Sense Disambiguation - + + + + + + + NLTK :: Sample usage for wsd + + + + + + + + + + + + + + -
-

Word Sense Disambiguation

-

Lesk Algorithm

+
+
+
+ +

+ NLTK +

+ +
+
+ +

Documentation

+ +
+ + + + +
+ +
+ +
+ + - - - + + + +
+ +
+
+ +
+

Sample usage for wsd

+
+

Word Sense Disambiguation

+
+

Lesk Algorithm

Performs the classic Lesk algorithm for Word Sense Disambiguation (WSD) using a the definitions of the ambiguous word.

Given an ambiguous word and the context in which the word occurs, Lesk returns a Synset with the highest number of overlapping words between the context sentence and different definitions from each Synset.

-
-
->>> from nltk.wsd import lesk
->>> sent = ['I', 'went', 'to', 'the', 'bank', 'to', 'deposit', 'money', '.']
-
-
->>> print(lesk(sent, 'bank', 'n'))
-Synset('savings_bank.n.02')
-
-
->>> print(lesk(sent, 'bank'))
-Synset('savings_bank.n.02')
-
-
-

The definitions for "bank" are:

-
-
->>> from nltk.corpus import wordnet as wn
->>> for ss in wn.synsets('bank'):
-...     print(ss, ss.definition())
-...
-Synset('bank.n.01') sloping land (especially the slope beside a body of water)
-Synset('depository_financial_institution.n.01') a financial institution that accepts deposits and channels the money into lending activities
-Synset('bank.n.03') a long ridge or pile
-Synset('bank.n.04') an arrangement of similar objects in a row or in tiers
-Synset('bank.n.05') a supply or stock held in reserve for future use (especially in emergencies)
-Synset('bank.n.06') the funds held by a gambling house or the dealer in some gambling games
-Synset('bank.n.07') a slope in the turn of a road or track; the outside is higher than the inside in order to reduce the effects of centrifugal force
-Synset('savings_bank.n.02') a container (usually with a slot in the top) for keeping money at home
-Synset('bank.n.09') a building in which the business of banking transacted
-Synset('bank.n.10') a flight maneuver; aircraft tips laterally about its longitudinal axis (especially in turning)
-Synset('bank.v.01') tip laterally
-Synset('bank.v.02') enclose with a bank
-Synset('bank.v.03') do business with a bank or keep an account at a bank
-Synset('bank.v.04') act as the banker in a game or in gambling
-Synset('bank.v.05') be in the banking business
-Synset('deposit.v.02') put into a bank account
-Synset('bank.v.07') cover with ashes so to control the rate of burning
-Synset('trust.v.01') have confidence or faith in
-
-
+
>>> from nltk.wsd import lesk
+>>> sent = ['I', 'went', 'to', 'the', 'bank', 'to', 'deposit', 'money', '.']
+
+
+
>>> print(lesk(sent, 'bank', 'n'))
+Synset('savings_bank.n.02')
+
+
+
>>> print(lesk(sent, 'bank'))
+Synset('savings_bank.n.02')
+
+
+

The definitions for “bank” are:

+
>>> from nltk.corpus import wordnet as wn
+>>> for ss in wn.synsets('bank'):
+...     print(ss, ss.definition())
+...
+Synset('bank.n.01') sloping land (especially the slope beside a body of water)
+Synset('depository_financial_institution.n.01') a financial institution that accepts deposits and channels the money into lending activities
+Synset('bank.n.03') a long ridge or pile
+Synset('bank.n.04') an arrangement of similar objects in a row or in tiers
+Synset('bank.n.05') a supply or stock held in reserve for future use (especially in emergencies)
+Synset('bank.n.06') the funds held by a gambling house or the dealer in some gambling games
+Synset('bank.n.07') a slope in the turn of a road or track; the outside is higher than the inside in order to reduce the effects of centrifugal force
+Synset('savings_bank.n.02') a container (usually with a slot in the top) for keeping money at home
+Synset('bank.n.09') a building in which the business of banking transacted
+Synset('bank.n.10') a flight maneuver; aircraft tips laterally about its longitudinal axis (especially in turning)
+Synset('bank.v.01') tip laterally
+Synset('bank.v.02') enclose with a bank
+Synset('bank.v.03') do business with a bank or keep an account at a bank
+Synset('bank.v.04') act as the banker in a game or in gambling
+Synset('bank.v.05') be in the banking business
+Synset('deposit.v.02') put into a bank account
+Synset('bank.v.07') cover with ashes so to control the rate of burning
+Synset('trust.v.01') have confidence or faith in
+
+

Test disambiguation of POS tagged able.

-
-
->>> [(s, s.pos()) for s in wn.synsets('able')]
-[(Synset('able.a.01'), 'a'), (Synset('able.s.02'), 's'), (Synset('able.s.03'), 's'), (Synset('able.s.04'), 's')]
->>> sent = 'people should be able to marry a person of their choice'.split()
->>> lesk(sent, 'able')
-Synset('able.s.04')
->>> lesk(sent, 'able', pos='a')
-Synset('able.a.01')
-
-
+
>>> [(s, s.pos()) for s in wn.synsets('able')]
+[(Synset('able.a.01'), 'a'), (Synset('able.s.02'), 's'), (Synset('able.s.03'), 's'), (Synset('able.s.04'), 's')]
+>>> sent = 'people should be able to marry a person of their choice'.split()
+>>> lesk(sent, 'able')
+Synset('able.s.04')
+>>> lesk(sent, 'able', pos='a')
+Synset('able.a.01')
+
+

Test behavior if there is are no matching senses.

-
-
->>> lesk('John loves Mary'.split(), 'loves', synsets=[])
-
-
+
>>> lesk('John loves Mary'.split(), 'loves', synsets=[])
+
+
+
+
+ + +
+
+ +
+ +
+ +
+ +
+ - + \ No newline at end of file diff --git a/index.html b/index.html index 321c3a248..a808e9541 100644 --- a/index.html +++ b/index.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/install.html b/install.html index 2fe8d1dc7..19dfe1fbf 100644 --- a/install.html +++ b/install.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/news.html b/news.html index 8820fff1e..b65b59f0d 100644 --- a/news.html +++ b/news.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation

diff --git a/objects.inv b/objects.inv index caa5eb5ad..b5e4567ff 100644 Binary files a/objects.inv and b/objects.inv differ diff --git a/py-modindex.html b/py-modindex.html index 971b22404..b6db8ec51 100644 --- a/py-modindex.html +++ b/py-modindex.html @@ -71,7 +71,7 @@

Documentation

NLTK Documentation

diff --git a/search.html b/search.html index d9c45797e..e2c0de285 100644 --- a/search.html +++ b/search.html @@ -74,7 +74,7 @@

Documentation

NLTK Documentation

diff --git a/searchindex.js b/searchindex.js index 3e7f8821c..5c12c58d1 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["api/nltk","api/nltk.app","api/nltk.app.chartparser_app","api/nltk.app.chunkparser_app","api/nltk.app.collocations_app","api/nltk.app.concordance_app","api/nltk.app.nemo_app","api/nltk.app.rdparser_app","api/nltk.app.srparser_app","api/nltk.app.wordfreq_app","api/nltk.app.wordnet_app","api/nltk.book","api/nltk.ccg","api/nltk.ccg.api","api/nltk.ccg.chart","api/nltk.ccg.combinator","api/nltk.ccg.lexicon","api/nltk.ccg.logic","api/nltk.chat","api/nltk.chat.eliza","api/nltk.chat.iesha","api/nltk.chat.rude","api/nltk.chat.suntsu","api/nltk.chat.util","api/nltk.chat.zen","api/nltk.chunk","api/nltk.chunk.api","api/nltk.chunk.named_entity","api/nltk.chunk.regexp","api/nltk.chunk.util","api/nltk.classify","api/nltk.classify.api","api/nltk.classify.decisiontree","api/nltk.classify.maxent","api/nltk.classify.megam","api/nltk.classify.naivebayes","api/nltk.classify.positivenaivebayes","api/nltk.classify.rte_classify","api/nltk.classify.scikitlearn","api/nltk.classify.senna","api/nltk.classify.svm","api/nltk.classify.tadm","api/nltk.classify.textcat","api/nltk.classify.util","api/nltk.classify.weka","api/nltk.cli","api/nltk.cluster","api/nltk.cluster.api","api/nltk.cluster.em","api/nltk.cluster.gaac","api/nltk.cluster.kmeans","api/nltk.cluster.util","api/nltk.collections","api/nltk.collocations","api/nltk.compat","api/nltk.corpus","api/nltk.corpus.europarl_raw","api/nltk.corpus.reader","api/nltk.corpus.reader.aligned","api/nltk.corpus.reader.api","api/nltk.corpus.reader.bnc","api/nltk.corpus.reader.bracket_parse","api/nltk.corpus.reader.categorized_sents","api/nltk.corpus.reader.chasen","api/nltk.corpus.reader.childes","api/nltk.corpus.reader.chunked","api/nltk.corpus.reader.cmudict","api/nltk.corpus.reader.comparative_sents","api/nltk.corpus.reader.conll","api/nltk.corpus.reader.crubadan","api/nltk.corpus.reader.dependency","api/nltk.corpus.reader.framenet","api/nltk.corpus.reader.ieer","api/nltk.corpus.reader.indian","api/nltk.corpus.reader.ipipan","api/nltk.corpus.reader.knbc","api/nltk.corpus.reader.lin","api/nltk.corpus.reader.mte","api/nltk.corpus.reader.nkjp","api/nltk.corpus.reader.nombank","api/nltk.corpus.reader.nps_chat","api/nltk.corpus.reader.opinion_lexicon","api/nltk.corpus.reader.panlex_lite","api/nltk.corpus.reader.panlex_swadesh","api/nltk.corpus.reader.pl196x","api/nltk.corpus.reader.plaintext","api/nltk.corpus.reader.ppattach","api/nltk.corpus.reader.propbank","api/nltk.corpus.reader.pros_cons","api/nltk.corpus.reader.reviews","api/nltk.corpus.reader.rte","api/nltk.corpus.reader.semcor","api/nltk.corpus.reader.senseval","api/nltk.corpus.reader.sentiwordnet","api/nltk.corpus.reader.sinica_treebank","api/nltk.corpus.reader.string_category","api/nltk.corpus.reader.switchboard","api/nltk.corpus.reader.tagged","api/nltk.corpus.reader.timit","api/nltk.corpus.reader.toolbox","api/nltk.corpus.reader.twitter","api/nltk.corpus.reader.udhr","api/nltk.corpus.reader.util","api/nltk.corpus.reader.verbnet","api/nltk.corpus.reader.wordlist","api/nltk.corpus.reader.wordnet","api/nltk.corpus.reader.xmldocs","api/nltk.corpus.reader.ycoe","api/nltk.corpus.util","api/nltk.data","api/nltk.decorators","api/nltk.downloader","api/nltk.draw","api/nltk.draw.cfg","api/nltk.draw.dispersion","api/nltk.draw.table","api/nltk.draw.tree","api/nltk.draw.util","api/nltk.featstruct","api/nltk.grammar","api/nltk.help","api/nltk.inference","api/nltk.inference.api","api/nltk.inference.discourse","api/nltk.inference.mace","api/nltk.inference.nonmonotonic","api/nltk.inference.prover9","api/nltk.inference.resolution","api/nltk.inference.tableau","api/nltk.internals","api/nltk.jsontags","api/nltk.lazyimport","api/nltk.lm","api/nltk.lm.api","api/nltk.lm.counter","api/nltk.lm.models","api/nltk.lm.preprocessing","api/nltk.lm.smoothing","api/nltk.lm.util","api/nltk.lm.vocabulary","api/nltk.metrics","api/nltk.metrics.agreement","api/nltk.metrics.aline","api/nltk.metrics.association","api/nltk.metrics.confusionmatrix","api/nltk.metrics.distance","api/nltk.metrics.paice","api/nltk.metrics.scores","api/nltk.metrics.segmentation","api/nltk.metrics.spearman","api/nltk.misc","api/nltk.misc.babelfish","api/nltk.misc.chomsky","api/nltk.misc.minimalset","api/nltk.misc.sort","api/nltk.misc.wordfinder","api/nltk.parse","api/nltk.parse.api","api/nltk.parse.bllip","api/nltk.parse.chart","api/nltk.parse.corenlp","api/nltk.parse.dependencygraph","api/nltk.parse.earleychart","api/nltk.parse.evaluate","api/nltk.parse.featurechart","api/nltk.parse.generate","api/nltk.parse.malt","api/nltk.parse.nonprojectivedependencyparser","api/nltk.parse.pchart","api/nltk.parse.projectivedependencyparser","api/nltk.parse.recursivedescent","api/nltk.parse.shiftreduce","api/nltk.parse.stanford","api/nltk.parse.transitionparser","api/nltk.parse.util","api/nltk.parse.viterbi","api/nltk.probability","api/nltk.sem","api/nltk.sem.boxer","api/nltk.sem.chat80","api/nltk.sem.cooper_storage","api/nltk.sem.drt","api/nltk.sem.drt_glue_demo","api/nltk.sem.evaluate","api/nltk.sem.glue","api/nltk.sem.hole","api/nltk.sem.lfg","api/nltk.sem.linearlogic","api/nltk.sem.logic","api/nltk.sem.relextract","api/nltk.sem.skolemize","api/nltk.sem.util","api/nltk.sentiment","api/nltk.sentiment.sentiment_analyzer","api/nltk.sentiment.util","api/nltk.sentiment.vader","api/nltk.stem","api/nltk.stem.api","api/nltk.stem.arlstem","api/nltk.stem.arlstem2","api/nltk.stem.cistem","api/nltk.stem.isri","api/nltk.stem.lancaster","api/nltk.stem.porter","api/nltk.stem.regexp","api/nltk.stem.rslp","api/nltk.stem.snowball","api/nltk.stem.util","api/nltk.stem.wordnet","api/nltk.tag","api/nltk.tag.api","api/nltk.tag.brill","api/nltk.tag.brill_trainer","api/nltk.tag.crf","api/nltk.tag.hmm","api/nltk.tag.hunpos","api/nltk.tag.mapping","api/nltk.tag.perceptron","api/nltk.tag.senna","api/nltk.tag.sequential","api/nltk.tag.stanford","api/nltk.tag.tnt","api/nltk.tag.util","api/nltk.tbl","api/nltk.tbl.demo","api/nltk.tbl.erroranalysis","api/nltk.tbl.feature","api/nltk.tbl.rule","api/nltk.tbl.template","api/nltk.test","api/nltk.test.all","api/nltk.test.childes_fixt","api/nltk.test.classify_fixt","api/nltk.test.conftest","api/nltk.test.discourse_fixt","api/nltk.test.gensim_fixt","api/nltk.test.gluesemantics_malt_fixt","api/nltk.test.inference_fixt","api/nltk.test.nonmonotonic_fixt","api/nltk.test.portuguese_en_fixt","api/nltk.test.probability_fixt","api/nltk.test.unit","api/nltk.test.unit.lm","api/nltk.test.unit.lm.test_counter","api/nltk.test.unit.lm.test_models","api/nltk.test.unit.lm.test_preprocessing","api/nltk.test.unit.lm.test_vocabulary","api/nltk.test.unit.test_aline","api/nltk.test.unit.test_brill","api/nltk.test.unit.test_cfd_mutation","api/nltk.test.unit.test_cfg2chomsky","api/nltk.test.unit.test_chunk","api/nltk.test.unit.test_classify","api/nltk.test.unit.test_collocations","api/nltk.test.unit.test_concordance","api/nltk.test.unit.test_corenlp","api/nltk.test.unit.test_corpora","api/nltk.test.unit.test_corpus_views","api/nltk.test.unit.test_data","api/nltk.test.unit.test_disagreement","api/nltk.test.unit.test_distance","api/nltk.test.unit.test_freqdist","api/nltk.test.unit.test_hmm","api/nltk.test.unit.test_json2csv_corpus","api/nltk.test.unit.test_json_serialization","api/nltk.test.unit.test_metrics","api/nltk.test.unit.test_naivebayes","api/nltk.test.unit.test_nombank","api/nltk.test.unit.test_pl196x","api/nltk.test.unit.test_pos_tag","api/nltk.test.unit.test_ribes","api/nltk.test.unit.test_rte_classify","api/nltk.test.unit.test_seekable_unicode_stream_reader","api/nltk.test.unit.test_senna","api/nltk.test.unit.test_stem","api/nltk.test.unit.test_tag","api/nltk.test.unit.test_tgrep","api/nltk.test.unit.test_tokenize","api/nltk.test.unit.test_twitter_auth","api/nltk.test.unit.test_util","api/nltk.test.unit.test_wordnet","api/nltk.test.unit.translate","api/nltk.test.unit.translate.test_bleu","api/nltk.test.unit.translate.test_gdfa","api/nltk.test.unit.translate.test_ibm1","api/nltk.test.unit.translate.test_ibm2","api/nltk.test.unit.translate.test_ibm3","api/nltk.test.unit.translate.test_ibm4","api/nltk.test.unit.translate.test_ibm5","api/nltk.test.unit.translate.test_ibm_model","api/nltk.test.unit.translate.test_meteor","api/nltk.test.unit.translate.test_nist","api/nltk.test.unit.translate.test_stack_decoder","api/nltk.text","api/nltk.tgrep","api/nltk.tokenize","api/nltk.tokenize.api","api/nltk.tokenize.casual","api/nltk.tokenize.destructive","api/nltk.tokenize.legality_principle","api/nltk.tokenize.mwe","api/nltk.tokenize.nist","api/nltk.tokenize.punkt","api/nltk.tokenize.regexp","api/nltk.tokenize.repp","api/nltk.tokenize.sexpr","api/nltk.tokenize.simple","api/nltk.tokenize.sonority_sequencing","api/nltk.tokenize.stanford","api/nltk.tokenize.stanford_segmenter","api/nltk.tokenize.texttiling","api/nltk.tokenize.toktok","api/nltk.tokenize.treebank","api/nltk.tokenize.util","api/nltk.toolbox","api/nltk.translate","api/nltk.translate.api","api/nltk.translate.bleu_score","api/nltk.translate.chrf_score","api/nltk.translate.gale_church","api/nltk.translate.gdfa","api/nltk.translate.gleu_score","api/nltk.translate.ibm1","api/nltk.translate.ibm2","api/nltk.translate.ibm3","api/nltk.translate.ibm4","api/nltk.translate.ibm5","api/nltk.translate.ibm_model","api/nltk.translate.meteor_score","api/nltk.translate.metrics","api/nltk.translate.nist_score","api/nltk.translate.phrase_based","api/nltk.translate.ribes_score","api/nltk.translate.stack_decoder","api/nltk.tree","api/nltk.treeprettyprinter","api/nltk.treetransforms","api/nltk.twitter","api/nltk.twitter.api","api/nltk.twitter.common","api/nltk.twitter.twitter_demo","api/nltk.twitter.twitterclient","api/nltk.twitter.util","api/nltk.util","api/nltk.wsd","contribute","data","index","install","news","py-modindex","team"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.viewcode":1,sphinx:56},filenames:["api/nltk.rst","api/nltk.app.rst","api/nltk.app.chartparser_app.rst","api/nltk.app.chunkparser_app.rst","api/nltk.app.collocations_app.rst","api/nltk.app.concordance_app.rst","api/nltk.app.nemo_app.rst","api/nltk.app.rdparser_app.rst","api/nltk.app.srparser_app.rst","api/nltk.app.wordfreq_app.rst","api/nltk.app.wordnet_app.rst","api/nltk.book.rst","api/nltk.ccg.rst","api/nltk.ccg.api.rst","api/nltk.ccg.chart.rst","api/nltk.ccg.combinator.rst","api/nltk.ccg.lexicon.rst","api/nltk.ccg.logic.rst","api/nltk.chat.rst","api/nltk.chat.eliza.rst","api/nltk.chat.iesha.rst","api/nltk.chat.rude.rst","api/nltk.chat.suntsu.rst","api/nltk.chat.util.rst","api/nltk.chat.zen.rst","api/nltk.chunk.rst","api/nltk.chunk.api.rst","api/nltk.chunk.named_entity.rst","api/nltk.chunk.regexp.rst","api/nltk.chunk.util.rst","api/nltk.classify.rst","api/nltk.classify.api.rst","api/nltk.classify.decisiontree.rst","api/nltk.classify.maxent.rst","api/nltk.classify.megam.rst","api/nltk.classify.naivebayes.rst","api/nltk.classify.positivenaivebayes.rst","api/nltk.classify.rte_classify.rst","api/nltk.classify.scikitlearn.rst","api/nltk.classify.senna.rst","api/nltk.classify.svm.rst","api/nltk.classify.tadm.rst","api/nltk.classify.textcat.rst","api/nltk.classify.util.rst","api/nltk.classify.weka.rst","api/nltk.cli.rst","api/nltk.cluster.rst","api/nltk.cluster.api.rst","api/nltk.cluster.em.rst","api/nltk.cluster.gaac.rst","api/nltk.cluster.kmeans.rst","api/nltk.cluster.util.rst","api/nltk.collections.rst","api/nltk.collocations.rst","api/nltk.compat.rst","api/nltk.corpus.rst","api/nltk.corpus.europarl_raw.rst","api/nltk.corpus.reader.rst","api/nltk.corpus.reader.aligned.rst","api/nltk.corpus.reader.api.rst","api/nltk.corpus.reader.bnc.rst","api/nltk.corpus.reader.bracket_parse.rst","api/nltk.corpus.reader.categorized_sents.rst","api/nltk.corpus.reader.chasen.rst","api/nltk.corpus.reader.childes.rst","api/nltk.corpus.reader.chunked.rst","api/nltk.corpus.reader.cmudict.rst","api/nltk.corpus.reader.comparative_sents.rst","api/nltk.corpus.reader.conll.rst","api/nltk.corpus.reader.crubadan.rst","api/nltk.corpus.reader.dependency.rst","api/nltk.corpus.reader.framenet.rst","api/nltk.corpus.reader.ieer.rst","api/nltk.corpus.reader.indian.rst","api/nltk.corpus.reader.ipipan.rst","api/nltk.corpus.reader.knbc.rst","api/nltk.corpus.reader.lin.rst","api/nltk.corpus.reader.mte.rst","api/nltk.corpus.reader.nkjp.rst","api/nltk.corpus.reader.nombank.rst","api/nltk.corpus.reader.nps_chat.rst","api/nltk.corpus.reader.opinion_lexicon.rst","api/nltk.corpus.reader.panlex_lite.rst","api/nltk.corpus.reader.panlex_swadesh.rst","api/nltk.corpus.reader.pl196x.rst","api/nltk.corpus.reader.plaintext.rst","api/nltk.corpus.reader.ppattach.rst","api/nltk.corpus.reader.propbank.rst","api/nltk.corpus.reader.pros_cons.rst","api/nltk.corpus.reader.reviews.rst","api/nltk.corpus.reader.rte.rst","api/nltk.corpus.reader.semcor.rst","api/nltk.corpus.reader.senseval.rst","api/nltk.corpus.reader.sentiwordnet.rst","api/nltk.corpus.reader.sinica_treebank.rst","api/nltk.corpus.reader.string_category.rst","api/nltk.corpus.reader.switchboard.rst","api/nltk.corpus.reader.tagged.rst","api/nltk.corpus.reader.timit.rst","api/nltk.corpus.reader.toolbox.rst","api/nltk.corpus.reader.twitter.rst","api/nltk.corpus.reader.udhr.rst","api/nltk.corpus.reader.util.rst","api/nltk.corpus.reader.verbnet.rst","api/nltk.corpus.reader.wordlist.rst","api/nltk.corpus.reader.wordnet.rst","api/nltk.corpus.reader.xmldocs.rst","api/nltk.corpus.reader.ycoe.rst","api/nltk.corpus.util.rst","api/nltk.data.rst","api/nltk.decorators.rst","api/nltk.downloader.rst","api/nltk.draw.rst","api/nltk.draw.cfg.rst","api/nltk.draw.dispersion.rst","api/nltk.draw.table.rst","api/nltk.draw.tree.rst","api/nltk.draw.util.rst","api/nltk.featstruct.rst","api/nltk.grammar.rst","api/nltk.help.rst","api/nltk.inference.rst","api/nltk.inference.api.rst","api/nltk.inference.discourse.rst","api/nltk.inference.mace.rst","api/nltk.inference.nonmonotonic.rst","api/nltk.inference.prover9.rst","api/nltk.inference.resolution.rst","api/nltk.inference.tableau.rst","api/nltk.internals.rst","api/nltk.jsontags.rst","api/nltk.lazyimport.rst","api/nltk.lm.rst","api/nltk.lm.api.rst","api/nltk.lm.counter.rst","api/nltk.lm.models.rst","api/nltk.lm.preprocessing.rst","api/nltk.lm.smoothing.rst","api/nltk.lm.util.rst","api/nltk.lm.vocabulary.rst","api/nltk.metrics.rst","api/nltk.metrics.agreement.rst","api/nltk.metrics.aline.rst","api/nltk.metrics.association.rst","api/nltk.metrics.confusionmatrix.rst","api/nltk.metrics.distance.rst","api/nltk.metrics.paice.rst","api/nltk.metrics.scores.rst","api/nltk.metrics.segmentation.rst","api/nltk.metrics.spearman.rst","api/nltk.misc.rst","api/nltk.misc.babelfish.rst","api/nltk.misc.chomsky.rst","api/nltk.misc.minimalset.rst","api/nltk.misc.sort.rst","api/nltk.misc.wordfinder.rst","api/nltk.parse.rst","api/nltk.parse.api.rst","api/nltk.parse.bllip.rst","api/nltk.parse.chart.rst","api/nltk.parse.corenlp.rst","api/nltk.parse.dependencygraph.rst","api/nltk.parse.earleychart.rst","api/nltk.parse.evaluate.rst","api/nltk.parse.featurechart.rst","api/nltk.parse.generate.rst","api/nltk.parse.malt.rst","api/nltk.parse.nonprojectivedependencyparser.rst","api/nltk.parse.pchart.rst","api/nltk.parse.projectivedependencyparser.rst","api/nltk.parse.recursivedescent.rst","api/nltk.parse.shiftreduce.rst","api/nltk.parse.stanford.rst","api/nltk.parse.transitionparser.rst","api/nltk.parse.util.rst","api/nltk.parse.viterbi.rst","api/nltk.probability.rst","api/nltk.sem.rst","api/nltk.sem.boxer.rst","api/nltk.sem.chat80.rst","api/nltk.sem.cooper_storage.rst","api/nltk.sem.drt.rst","api/nltk.sem.drt_glue_demo.rst","api/nltk.sem.evaluate.rst","api/nltk.sem.glue.rst","api/nltk.sem.hole.rst","api/nltk.sem.lfg.rst","api/nltk.sem.linearlogic.rst","api/nltk.sem.logic.rst","api/nltk.sem.relextract.rst","api/nltk.sem.skolemize.rst","api/nltk.sem.util.rst","api/nltk.sentiment.rst","api/nltk.sentiment.sentiment_analyzer.rst","api/nltk.sentiment.util.rst","api/nltk.sentiment.vader.rst","api/nltk.stem.rst","api/nltk.stem.api.rst","api/nltk.stem.arlstem.rst","api/nltk.stem.arlstem2.rst","api/nltk.stem.cistem.rst","api/nltk.stem.isri.rst","api/nltk.stem.lancaster.rst","api/nltk.stem.porter.rst","api/nltk.stem.regexp.rst","api/nltk.stem.rslp.rst","api/nltk.stem.snowball.rst","api/nltk.stem.util.rst","api/nltk.stem.wordnet.rst","api/nltk.tag.rst","api/nltk.tag.api.rst","api/nltk.tag.brill.rst","api/nltk.tag.brill_trainer.rst","api/nltk.tag.crf.rst","api/nltk.tag.hmm.rst","api/nltk.tag.hunpos.rst","api/nltk.tag.mapping.rst","api/nltk.tag.perceptron.rst","api/nltk.tag.senna.rst","api/nltk.tag.sequential.rst","api/nltk.tag.stanford.rst","api/nltk.tag.tnt.rst","api/nltk.tag.util.rst","api/nltk.tbl.rst","api/nltk.tbl.demo.rst","api/nltk.tbl.erroranalysis.rst","api/nltk.tbl.feature.rst","api/nltk.tbl.rule.rst","api/nltk.tbl.template.rst","api/nltk.test.rst","api/nltk.test.all.rst","api/nltk.test.childes_fixt.rst","api/nltk.test.classify_fixt.rst","api/nltk.test.conftest.rst","api/nltk.test.discourse_fixt.rst","api/nltk.test.gensim_fixt.rst","api/nltk.test.gluesemantics_malt_fixt.rst","api/nltk.test.inference_fixt.rst","api/nltk.test.nonmonotonic_fixt.rst","api/nltk.test.portuguese_en_fixt.rst","api/nltk.test.probability_fixt.rst","api/nltk.test.unit.rst","api/nltk.test.unit.lm.rst","api/nltk.test.unit.lm.test_counter.rst","api/nltk.test.unit.lm.test_models.rst","api/nltk.test.unit.lm.test_preprocessing.rst","api/nltk.test.unit.lm.test_vocabulary.rst","api/nltk.test.unit.test_aline.rst","api/nltk.test.unit.test_brill.rst","api/nltk.test.unit.test_cfd_mutation.rst","api/nltk.test.unit.test_cfg2chomsky.rst","api/nltk.test.unit.test_chunk.rst","api/nltk.test.unit.test_classify.rst","api/nltk.test.unit.test_collocations.rst","api/nltk.test.unit.test_concordance.rst","api/nltk.test.unit.test_corenlp.rst","api/nltk.test.unit.test_corpora.rst","api/nltk.test.unit.test_corpus_views.rst","api/nltk.test.unit.test_data.rst","api/nltk.test.unit.test_disagreement.rst","api/nltk.test.unit.test_distance.rst","api/nltk.test.unit.test_freqdist.rst","api/nltk.test.unit.test_hmm.rst","api/nltk.test.unit.test_json2csv_corpus.rst","api/nltk.test.unit.test_json_serialization.rst","api/nltk.test.unit.test_metrics.rst","api/nltk.test.unit.test_naivebayes.rst","api/nltk.test.unit.test_nombank.rst","api/nltk.test.unit.test_pl196x.rst","api/nltk.test.unit.test_pos_tag.rst","api/nltk.test.unit.test_ribes.rst","api/nltk.test.unit.test_rte_classify.rst","api/nltk.test.unit.test_seekable_unicode_stream_reader.rst","api/nltk.test.unit.test_senna.rst","api/nltk.test.unit.test_stem.rst","api/nltk.test.unit.test_tag.rst","api/nltk.test.unit.test_tgrep.rst","api/nltk.test.unit.test_tokenize.rst","api/nltk.test.unit.test_twitter_auth.rst","api/nltk.test.unit.test_util.rst","api/nltk.test.unit.test_wordnet.rst","api/nltk.test.unit.translate.rst","api/nltk.test.unit.translate.test_bleu.rst","api/nltk.test.unit.translate.test_gdfa.rst","api/nltk.test.unit.translate.test_ibm1.rst","api/nltk.test.unit.translate.test_ibm2.rst","api/nltk.test.unit.translate.test_ibm3.rst","api/nltk.test.unit.translate.test_ibm4.rst","api/nltk.test.unit.translate.test_ibm5.rst","api/nltk.test.unit.translate.test_ibm_model.rst","api/nltk.test.unit.translate.test_meteor.rst","api/nltk.test.unit.translate.test_nist.rst","api/nltk.test.unit.translate.test_stack_decoder.rst","api/nltk.text.rst","api/nltk.tgrep.rst","api/nltk.tokenize.rst","api/nltk.tokenize.api.rst","api/nltk.tokenize.casual.rst","api/nltk.tokenize.destructive.rst","api/nltk.tokenize.legality_principle.rst","api/nltk.tokenize.mwe.rst","api/nltk.tokenize.nist.rst","api/nltk.tokenize.punkt.rst","api/nltk.tokenize.regexp.rst","api/nltk.tokenize.repp.rst","api/nltk.tokenize.sexpr.rst","api/nltk.tokenize.simple.rst","api/nltk.tokenize.sonority_sequencing.rst","api/nltk.tokenize.stanford.rst","api/nltk.tokenize.stanford_segmenter.rst","api/nltk.tokenize.texttiling.rst","api/nltk.tokenize.toktok.rst","api/nltk.tokenize.treebank.rst","api/nltk.tokenize.util.rst","api/nltk.toolbox.rst","api/nltk.translate.rst","api/nltk.translate.api.rst","api/nltk.translate.bleu_score.rst","api/nltk.translate.chrf_score.rst","api/nltk.translate.gale_church.rst","api/nltk.translate.gdfa.rst","api/nltk.translate.gleu_score.rst","api/nltk.translate.ibm1.rst","api/nltk.translate.ibm2.rst","api/nltk.translate.ibm3.rst","api/nltk.translate.ibm4.rst","api/nltk.translate.ibm5.rst","api/nltk.translate.ibm_model.rst","api/nltk.translate.meteor_score.rst","api/nltk.translate.metrics.rst","api/nltk.translate.nist_score.rst","api/nltk.translate.phrase_based.rst","api/nltk.translate.ribes_score.rst","api/nltk.translate.stack_decoder.rst","api/nltk.tree.rst","api/nltk.treeprettyprinter.rst","api/nltk.treetransforms.rst","api/nltk.twitter.rst","api/nltk.twitter.api.rst","api/nltk.twitter.common.rst","api/nltk.twitter.twitter_demo.rst","api/nltk.twitter.twitterclient.rst","api/nltk.twitter.util.rst","api/nltk.util.rst","api/nltk.wsd.rst","contribute.rst","data.rst","index.rst","install.rst","news.rst","py-modindex.rst","team.rst"],objects:{"":{nltk:[0,0,0,"-"]},"nltk.app":{chartparser_app:[2,0,0,"-"],chunkparser_app:[3,0,0,"-"],collocations_app:[4,0,0,"-"],concordance_app:[5,0,0,"-"],nemo_app:[6,0,0,"-"],rdparser_app:[7,0,0,"-"],srparser_app:[8,0,0,"-"],wordnet_app:[10,0,0,"-"]},"nltk.app.chartparser_app":{app:[2,1,1,""]},"nltk.app.chunkparser_app":{app:[3,1,1,""]},"nltk.app.collocations_app":{app:[4,1,1,""]},"nltk.app.concordance_app":{app:[5,1,1,""]},"nltk.app.nemo_app":{app:[6,1,1,""]},"nltk.app.rdparser_app":{app:[7,1,1,""]},"nltk.app.srparser_app":{app:[8,1,1,""]},"nltk.app.wordnet_app":{app:[10,1,1,""]},"nltk.book":{sents:[11,1,1,""],texts:[11,1,1,""]},"nltk.ccg":{api:[13,0,0,"-"],chart:[14,0,0,"-"],combinator:[15,0,0,"-"],lexicon:[16,0,0,"-"],logic:[17,0,0,"-"]},"nltk.ccg.api":{AbstractCCGCategory:[13,2,1,""],CCGVar:[13,2,1,""],Direction:[13,2,1,""],FunctionalCategory:[13,2,1,""],PrimitiveCategory:[13,2,1,""]},"nltk.ccg.api.AbstractCCGCategory":{can_unify:[13,3,1,""],is_function:[13,3,1,""],is_primitive:[13,3,1,""],is_var:[13,3,1,""],substitute:[13,3,1,""]},"nltk.ccg.api.CCGVar":{__init__:[13,3,1,""],can_unify:[13,3,1,""],id:[13,3,1,""],is_function:[13,3,1,""],is_primitive:[13,3,1,""],is_var:[13,3,1,""],new_id:[13,3,1,""],reset_id:[13,3,1,""],substitute:[13,3,1,""]},"nltk.ccg.api.Direction":{__init__:[13,3,1,""],can_compose:[13,3,1,""],can_cross:[13,3,1,""],can_unify:[13,3,1,""],dir:[13,3,1,""],is_backward:[13,3,1,""],is_forward:[13,3,1,""],is_variable:[13,3,1,""],restrs:[13,3,1,""],substitute:[13,3,1,""]},"nltk.ccg.api.FunctionalCategory":{__init__:[13,3,1,""],arg:[13,3,1,""],can_unify:[13,3,1,""],dir:[13,3,1,""],is_function:[13,3,1,""],is_primitive:[13,3,1,""],is_var:[13,3,1,""],res:[13,3,1,""],substitute:[13,3,1,""]},"nltk.ccg.api.PrimitiveCategory":{__init__:[13,3,1,""],can_unify:[13,3,1,""],categ:[13,3,1,""],is_function:[13,3,1,""],is_primitive:[13,3,1,""],is_var:[13,3,1,""],restrs:[13,3,1,""],substitute:[13,3,1,""]},"nltk.ccg.chart":{BackwardTypeRaiseRule:[14,2,1,""],BinaryCombinatorRule:[14,2,1,""],CCGChart:[14,2,1,""],CCGChartParser:[14,2,1,""],CCGEdge:[14,2,1,""],CCGLeafEdge:[14,2,1,""],ForwardTypeRaiseRule:[14,2,1,""],compute_semantics:[14,1,1,""],demo:[14,1,1,""],printCCGDerivation:[14,1,1,""],printCCGTree:[14,1,1,""]},"nltk.ccg.chart.BackwardTypeRaiseRule":{NUMEDGES:[14,4,1,""],__init__:[14,3,1,""],apply:[14,3,1,""]},"nltk.ccg.chart.BinaryCombinatorRule":{NUMEDGES:[14,4,1,""],__init__:[14,3,1,""],apply:[14,3,1,""]},"nltk.ccg.chart.CCGChart":{__init__:[14,3,1,""]},"nltk.ccg.chart.CCGChartParser":{__init__:[14,3,1,""],lexicon:[14,3,1,""],parse:[14,3,1,""]},"nltk.ccg.chart.CCGEdge":{__init__:[14,3,1,""],categ:[14,3,1,""],dot:[14,3,1,""],end:[14,3,1,""],is_complete:[14,3,1,""],is_incomplete:[14,3,1,""],length:[14,3,1,""],lhs:[14,3,1,""],nextsym:[14,3,1,""],rhs:[14,3,1,""],rule:[14,3,1,""],span:[14,3,1,""],start:[14,3,1,""]},"nltk.ccg.chart.CCGLeafEdge":{__init__:[14,3,1,""],categ:[14,3,1,""],dot:[14,3,1,""],end:[14,3,1,""],is_complete:[14,3,1,""],is_incomplete:[14,3,1,""],leaf:[14,3,1,""],length:[14,3,1,""],lhs:[14,3,1,""],nextsym:[14,3,1,""],rhs:[14,3,1,""],span:[14,3,1,""],start:[14,3,1,""],token:[14,3,1,""]},"nltk.ccg.chart.ForwardTypeRaiseRule":{NUMEDGES:[14,4,1,""],__init__:[14,3,1,""],apply:[14,3,1,""]},"nltk.ccg.combinator":{BackwardCombinator:[15,2,1,""],DirectedBinaryCombinator:[15,2,1,""],ForwardCombinator:[15,2,1,""],UndirectedBinaryCombinator:[15,2,1,""],UndirectedComposition:[15,2,1,""],UndirectedFunctionApplication:[15,2,1,""],UndirectedSubstitution:[15,2,1,""],UndirectedTypeRaise:[15,2,1,""],backwardBxConstraint:[15,1,1,""],backwardOnly:[15,1,1,""],backwardSxConstraint:[15,1,1,""],backwardTConstraint:[15,1,1,""],bothBackward:[15,1,1,""],bothForward:[15,1,1,""],crossedDirs:[15,1,1,""],forwardOnly:[15,1,1,""],forwardSConstraint:[15,1,1,""],forwardTConstraint:[15,1,1,""],innermostFunction:[15,1,1,""]},"nltk.ccg.combinator.BackwardCombinator":{__init__:[15,3,1,""],can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.combinator.DirectedBinaryCombinator":{can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.combinator.ForwardCombinator":{__init__:[15,3,1,""],can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.combinator.UndirectedBinaryCombinator":{can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.combinator.UndirectedComposition":{can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.combinator.UndirectedFunctionApplication":{can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.combinator.UndirectedSubstitution":{can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.combinator.UndirectedTypeRaise":{can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.lexicon":{CCGLexicon:[16,2,1,""],Token:[16,2,1,""],augParseCategory:[16,1,1,""],fromstring:[16,1,1,""],matchBrackets:[16,1,1,""],nextCategory:[16,1,1,""],parseApplication:[16,1,1,""],parsePrimitiveCategory:[16,1,1,""],parseSubscripts:[16,1,1,""]},"nltk.ccg.lexicon.CCGLexicon":{__init__:[16,3,1,""],categories:[16,3,1,""],start:[16,3,1,""]},"nltk.ccg.lexicon.Token":{__init__:[16,3,1,""],categ:[16,3,1,""],semantics:[16,3,1,""]},"nltk.ccg.logic":{compute_composition_semantics:[17,1,1,""],compute_function_semantics:[17,1,1,""],compute_substitution_semantics:[17,1,1,""],compute_type_raised_semantics:[17,1,1,""]},"nltk.chat":{chatbots:[18,1,1,""],eliza:[19,0,0,"-"],iesha:[20,0,0,"-"],rude:[21,0,0,"-"],suntsu:[22,0,0,"-"],util:[23,0,0,"-"],zen:[24,0,0,"-"]},"nltk.chat.eliza":{demo:[19,1,1,""],eliza_chat:[19,1,1,""]},"nltk.chat.iesha":{demo:[20,1,1,""],iesha_chat:[20,1,1,""]},"nltk.chat.rude":{demo:[21,1,1,""],rude_chat:[21,1,1,""]},"nltk.chat.suntsu":{demo:[22,1,1,""],suntsu_chat:[22,1,1,""]},"nltk.chat.util":{Chat:[23,2,1,""]},"nltk.chat.util.Chat":{__init__:[23,3,1,""],converse:[23,3,1,""],respond:[23,3,1,""]},"nltk.chat.zen":{demo:[24,1,1,""],zen_chat:[24,1,1,""]},"nltk.chunk":{api:[26,0,0,"-"],named_entity:[27,0,0,"-"],ne_chunk:[25,1,1,""],ne_chunk_sents:[25,1,1,""],regexp:[28,0,0,"-"],util:[29,0,0,"-"]},"nltk.chunk.api":{ChunkParserI:[26,2,1,""]},"nltk.chunk.api.ChunkParserI":{evaluate:[26,3,1,""],parse:[26,3,1,""]},"nltk.chunk.named_entity":{NEChunkParser:[27,2,1,""],NEChunkParserTagger:[27,2,1,""],build_model:[27,1,1,""],cmp_chunks:[27,1,1,""],load_ace_data:[27,1,1,""],load_ace_file:[27,1,1,""],postag_tree:[27,1,1,""],shape:[27,1,1,""],simplify_pos:[27,1,1,""]},"nltk.chunk.named_entity.NEChunkParser":{__init__:[27,3,1,""],parse:[27,3,1,""]},"nltk.chunk.named_entity.NEChunkParserTagger":{__init__:[27,3,1,""]},"nltk.chunk.regexp":{ChunkRule:[28,2,1,""],ChunkRuleWithContext:[28,2,1,""],ChunkString:[28,2,1,""],ExpandLeftRule:[28,2,1,""],ExpandRightRule:[28,2,1,""],MergeRule:[28,2,1,""],RegexpChunkParser:[28,2,1,""],RegexpChunkRule:[28,2,1,""],RegexpParser:[28,2,1,""],SplitRule:[28,2,1,""],StripRule:[28,2,1,""],UnChunkRule:[28,2,1,""],demo:[28,1,1,""],demo_eval:[28,1,1,""],tag_pattern2re_pattern:[28,1,1,""]},"nltk.chunk.regexp.ChunkRule":{__init__:[28,3,1,""]},"nltk.chunk.regexp.ChunkRuleWithContext":{__init__:[28,3,1,""]},"nltk.chunk.regexp.ChunkString":{CHUNK_TAG:[28,4,1,""],CHUNK_TAG_CHAR:[28,4,1,""],IN_CHUNK_PATTERN:[28,4,1,""],IN_STRIP_PATTERN:[28,4,1,""],__init__:[28,3,1,""],to_chunkstruct:[28,3,1,""],xform:[28,3,1,""]},"nltk.chunk.regexp.ExpandLeftRule":{__init__:[28,3,1,""]},"nltk.chunk.regexp.ExpandRightRule":{__init__:[28,3,1,""]},"nltk.chunk.regexp.MergeRule":{__init__:[28,3,1,""]},"nltk.chunk.regexp.RegexpChunkParser":{__init__:[28,3,1,""],parse:[28,3,1,""],rules:[28,3,1,""]},"nltk.chunk.regexp.RegexpChunkRule":{__init__:[28,3,1,""],apply:[28,3,1,""],descr:[28,3,1,""],fromstring:[28,3,1,""]},"nltk.chunk.regexp.RegexpParser":{__init__:[28,3,1,""],parse:[28,3,1,""]},"nltk.chunk.regexp.SplitRule":{__init__:[28,3,1,""]},"nltk.chunk.regexp.StripRule":{__init__:[28,3,1,""]},"nltk.chunk.regexp.UnChunkRule":{__init__:[28,3,1,""]},"nltk.chunk.util":{ChunkScore:[29,2,1,""],accuracy:[29,1,1,""],conllstr2tree:[29,1,1,""],conlltags2tree:[29,1,1,""],demo:[29,1,1,""],ieerstr2tree:[29,1,1,""],tagstr2tree:[29,1,1,""],tree2conllstr:[29,1,1,""],tree2conlltags:[29,1,1,""]},"nltk.chunk.util.ChunkScore":{__init__:[29,3,1,""],accuracy:[29,3,1,""],correct:[29,3,1,""],f_measure:[29,3,1,""],guessed:[29,3,1,""],incorrect:[29,3,1,""],missed:[29,3,1,""],precision:[29,3,1,""],recall:[29,3,1,""],score:[29,3,1,""]},"nltk.classify":{api:[31,0,0,"-"],decisiontree:[32,0,0,"-"],maxent:[33,0,0,"-"],megam:[34,0,0,"-"],naivebayes:[35,0,0,"-"],positivenaivebayes:[36,0,0,"-"],rte_classify:[37,0,0,"-"],scikitlearn:[38,0,0,"-"],senna:[39,0,0,"-"],svm:[40,0,0,"-"],tadm:[41,0,0,"-"],textcat:[42,0,0,"-"],util:[43,0,0,"-"],weka:[44,0,0,"-"]},"nltk.classify.api":{ClassifierI:[31,2,1,""],MultiClassifierI:[31,2,1,""]},"nltk.classify.api.ClassifierI":{classify:[31,3,1,""],classify_many:[31,3,1,""],labels:[31,3,1,""],prob_classify:[31,3,1,""],prob_classify_many:[31,3,1,""]},"nltk.classify.api.MultiClassifierI":{classify:[31,3,1,""],classify_many:[31,3,1,""],labels:[31,3,1,""],prob_classify:[31,3,1,""],prob_classify_many:[31,3,1,""]},"nltk.classify.decisiontree":{DecisionTreeClassifier:[32,2,1,""],demo:[32,1,1,""],f:[32,1,1,""]},"nltk.classify.decisiontree.DecisionTreeClassifier":{__init__:[32,3,1,""],best_binary_stump:[32,3,1,""],best_stump:[32,3,1,""],binary_stump:[32,3,1,""],classify:[32,3,1,""],error:[32,3,1,""],labels:[32,3,1,""],leaf:[32,3,1,""],pretty_format:[32,3,1,""],pseudocode:[32,3,1,""],refine:[32,3,1,""],stump:[32,3,1,""],train:[32,3,1,""]},"nltk.classify.maxent":{BinaryMaxentFeatureEncoding:[33,2,1,""],ConditionalExponentialClassifier:[33,4,1,""],FunctionBackedMaxentFeatureEncoding:[33,2,1,""],GISEncoding:[33,2,1,""],MaxentClassifier:[33,2,1,""],MaxentFeatureEncodingI:[33,2,1,""],TadmEventMaxentFeatureEncoding:[33,2,1,""],TadmMaxentClassifier:[33,2,1,""],TypedMaxentFeatureEncoding:[33,2,1,""],calculate_deltas:[33,1,1,""],calculate_empirical_fcount:[33,1,1,""],calculate_estimated_fcount:[33,1,1,""],calculate_nfmap:[33,1,1,""],demo:[33,1,1,""],train_maxent_classifier_with_gis:[33,1,1,""],train_maxent_classifier_with_iis:[33,1,1,""],train_maxent_classifier_with_megam:[33,1,1,""]},"nltk.classify.maxent.BinaryMaxentFeatureEncoding":{__init__:[33,3,1,""],describe:[33,3,1,""],encode:[33,3,1,""],labels:[33,3,1,""],length:[33,3,1,""],train:[33,3,1,""]},"nltk.classify.maxent.FunctionBackedMaxentFeatureEncoding":{__init__:[33,3,1,""],describe:[33,3,1,""],encode:[33,3,1,""],labels:[33,3,1,""],length:[33,3,1,""]},"nltk.classify.maxent.GISEncoding":{C:[33,5,1,""],__init__:[33,3,1,""],describe:[33,3,1,""],encode:[33,3,1,""],length:[33,3,1,""]},"nltk.classify.maxent.MaxentClassifier":{ALGORITHMS:[33,4,1,""],__init__:[33,3,1,""],classify:[33,3,1,""],explain:[33,3,1,""],labels:[33,3,1,""],most_informative_features:[33,3,1,""],prob_classify:[33,3,1,""],set_weights:[33,3,1,""],show_most_informative_features:[33,3,1,""],train:[33,3,1,""],weights:[33,3,1,""]},"nltk.classify.maxent.MaxentFeatureEncodingI":{describe:[33,3,1,""],encode:[33,3,1,""],labels:[33,3,1,""],length:[33,3,1,""],train:[33,3,1,""]},"nltk.classify.maxent.TadmEventMaxentFeatureEncoding":{__init__:[33,3,1,""],describe:[33,3,1,""],encode:[33,3,1,""],labels:[33,3,1,""],length:[33,3,1,""],train:[33,3,1,""]},"nltk.classify.maxent.TadmMaxentClassifier":{train:[33,3,1,""]},"nltk.classify.maxent.TypedMaxentFeatureEncoding":{__init__:[33,3,1,""],describe:[33,3,1,""],encode:[33,3,1,""],labels:[33,3,1,""],length:[33,3,1,""],train:[33,3,1,""]},"nltk.classify.megam":{call_megam:[34,1,1,""],config_megam:[34,1,1,""],parse_megam_weights:[34,1,1,""],write_megam_file:[34,1,1,""]},"nltk.classify.naivebayes":{NaiveBayesClassifier:[35,2,1,""],demo:[35,1,1,""]},"nltk.classify.naivebayes.NaiveBayesClassifier":{__init__:[35,3,1,""],classify:[35,3,1,""],labels:[35,3,1,""],most_informative_features:[35,3,1,""],prob_classify:[35,3,1,""],show_most_informative_features:[35,3,1,""],train:[35,3,1,""]},"nltk.classify.positivenaivebayes":{PositiveNaiveBayesClassifier:[36,2,1,""],demo:[36,1,1,""]},"nltk.classify.positivenaivebayes.PositiveNaiveBayesClassifier":{train:[36,3,1,""]},"nltk.classify.rte_classify":{RTEFeatureExtractor:[37,2,1,""],rte_classifier:[37,1,1,""],rte_features:[37,1,1,""],rte_featurize:[37,1,1,""]},"nltk.classify.rte_classify.RTEFeatureExtractor":{__init__:[37,3,1,""],hyp_extra:[37,3,1,""],overlap:[37,3,1,""]},"nltk.classify.scikitlearn":{SklearnClassifier:[38,2,1,""]},"nltk.classify.scikitlearn.SklearnClassifier":{__init__:[38,3,1,""],classify_many:[38,3,1,""],labels:[38,3,1,""],prob_classify_many:[38,3,1,""],train:[38,3,1,""]},"nltk.classify.senna":{Senna:[39,2,1,""]},"nltk.classify.senna.Senna":{SUPPORTED_OPERATIONS:[39,4,1,""],__init__:[39,3,1,""],executable:[39,3,1,""],tag:[39,3,1,""],tag_sents:[39,3,1,""]},"nltk.classify.svm":{SvmClassifier:[40,2,1,""]},"nltk.classify.svm.SvmClassifier":{__init__:[40,3,1,""]},"nltk.classify.tadm":{call_tadm:[41,1,1,""],config_tadm:[41,1,1,""],encoding_demo:[41,1,1,""],names_demo:[41,1,1,""],parse_tadm_weights:[41,1,1,""],write_tadm_file:[41,1,1,""]},"nltk.classify.textcat":{TextCat:[42,2,1,""],demo:[42,1,1,""]},"nltk.classify.textcat.TextCat":{__init__:[42,3,1,""],calc_dist:[42,3,1,""],fingerprints:[42,4,1,""],guess_language:[42,3,1,""],lang_dists:[42,3,1,""],last_distances:[42,4,1,""],profile:[42,3,1,""],remove_punctuation:[42,3,1,""]},"nltk.classify.util":{CutoffChecker:[43,2,1,""],accuracy:[43,1,1,""],apply_features:[43,1,1,""],attested_labels:[43,1,1,""],binary_names_demo_features:[43,1,1,""],check_megam_config:[43,1,1,""],log_likelihood:[43,1,1,""],names_demo:[43,1,1,""],names_demo_features:[43,1,1,""],partial_names_demo:[43,1,1,""],wsd_demo:[43,1,1,""]},"nltk.classify.util.CutoffChecker":{__init__:[43,3,1,""],check:[43,3,1,""]},"nltk.classify.weka":{ARFF_Formatter:[44,2,1,""],WekaClassifier:[44,2,1,""],config_weka:[44,1,1,""]},"nltk.classify.weka.ARFF_Formatter":{__init__:[44,3,1,""],data_section:[44,3,1,""],format:[44,3,1,""],from_train:[44,3,1,""],header_section:[44,3,1,""],labels:[44,3,1,""],write:[44,3,1,""]},"nltk.classify.weka.WekaClassifier":{__init__:[44,3,1,""],classify_many:[44,3,1,""],parse_weka_distribution:[44,3,1,""],parse_weka_output:[44,3,1,""],prob_classify_many:[44,3,1,""],train:[44,3,1,""]},"nltk.cluster":{api:[47,0,0,"-"],em:[48,0,0,"-"],gaac:[49,0,0,"-"],kmeans:[50,0,0,"-"],util:[51,0,0,"-"]},"nltk.cluster.api":{ClusterI:[47,2,1,""]},"nltk.cluster.api.ClusterI":{classification_probdist:[47,3,1,""],classify:[47,3,1,""],cluster:[47,3,1,""],cluster_name:[47,3,1,""],cluster_names:[47,3,1,""],likelihood:[47,3,1,""],num_clusters:[47,3,1,""]},"nltk.cluster.em":{EMClusterer:[48,2,1,""],demo:[48,1,1,""]},"nltk.cluster.em.EMClusterer":{__init__:[48,3,1,""],classify_vectorspace:[48,3,1,""],cluster_vectorspace:[48,3,1,""],likelihood_vectorspace:[48,3,1,""],num_clusters:[48,3,1,""]},"nltk.cluster.gaac":{GAAClusterer:[49,2,1,""],demo:[49,1,1,""]},"nltk.cluster.gaac.GAAClusterer":{__init__:[49,3,1,""],classify_vectorspace:[49,3,1,""],cluster:[49,3,1,""],cluster_vectorspace:[49,3,1,""],dendrogram:[49,3,1,""],num_clusters:[49,3,1,""],update_clusters:[49,3,1,""]},"nltk.cluster.kmeans":{KMeansClusterer:[50,2,1,""],demo:[50,1,1,""]},"nltk.cluster.kmeans.KMeansClusterer":{__init__:[50,3,1,""],classify_vectorspace:[50,3,1,""],cluster_vectorspace:[50,3,1,""],means:[50,3,1,""],num_clusters:[50,3,1,""]},"nltk.cluster.util":{Dendrogram:[51,2,1,""],VectorSpaceClusterer:[51,2,1,""],cosine_distance:[51,1,1,""],euclidean_distance:[51,1,1,""]},"nltk.cluster.util.Dendrogram":{__init__:[51,3,1,""],groups:[51,3,1,""],merge:[51,3,1,""],show:[51,3,1,""]},"nltk.cluster.util.VectorSpaceClusterer":{__init__:[51,3,1,""],classify:[51,3,1,""],classify_vectorspace:[51,3,1,""],cluster:[51,3,1,""],cluster_vectorspace:[51,3,1,""],likelihood:[51,3,1,""],likelihood_vectorspace:[51,3,1,""],vector:[51,3,1,""]},"nltk.collections":{AbstractLazySequence:[52,2,1,""],LazyConcatenation:[52,2,1,""],LazyEnumerate:[52,2,1,""],LazyIteratorList:[52,2,1,""],LazyMap:[52,2,1,""],LazySubsequence:[52,2,1,""],LazyZip:[52,2,1,""],OrderedDict:[52,2,1,""],Trie:[52,2,1,""]},"nltk.collections.AbstractLazySequence":{count:[52,3,1,""],index:[52,3,1,""],iterate_from:[52,3,1,""]},"nltk.collections.LazyConcatenation":{__init__:[52,3,1,""],iterate_from:[52,3,1,""]},"nltk.collections.LazyEnumerate":{__init__:[52,3,1,""]},"nltk.collections.LazyIteratorList":{__init__:[52,3,1,""],iterate_from:[52,3,1,""]},"nltk.collections.LazyMap":{__init__:[52,3,1,""],iterate_from:[52,3,1,""]},"nltk.collections.LazySubsequence":{MIN_SIZE:[52,4,1,""],__init__:[52,3,1,""],__new__:[52,3,1,""],iterate_from:[52,3,1,""]},"nltk.collections.LazyZip":{__init__:[52,3,1,""],iterate_from:[52,3,1,""]},"nltk.collections.OrderedDict":{__init__:[52,3,1,""],clear:[52,3,1,""],copy:[52,3,1,""],items:[52,3,1,""],keys:[52,3,1,""],popitem:[52,3,1,""],setdefault:[52,3,1,""],update:[52,3,1,""],values:[52,3,1,""]},"nltk.collections.Trie":{LEAF:[52,4,1,""],__init__:[52,3,1,""],insert:[52,3,1,""]},"nltk.collocations":{BigramCollocationFinder:[53,2,1,""],QuadgramCollocationFinder:[53,2,1,""],TrigramCollocationFinder:[53,2,1,""]},"nltk.collocations.BigramCollocationFinder":{__init__:[53,3,1,""],default_ws:[53,4,1,""],from_words:[53,3,1,""],score_ngram:[53,3,1,""]},"nltk.collocations.QuadgramCollocationFinder":{__init__:[53,3,1,""],default_ws:[53,4,1,""],from_words:[53,3,1,""],score_ngram:[53,3,1,""]},"nltk.collocations.TrigramCollocationFinder":{__init__:[53,3,1,""],bigram_finder:[53,3,1,""],default_ws:[53,4,1,""],from_words:[53,3,1,""],score_ngram:[53,3,1,""]},"nltk.compat":{add_py3_data:[54,1,1,""],py3_data:[54,1,1,""]},"nltk.corpus":{demo:[55,1,1,""],europarl_raw:[56,0,0,"-"],reader:[57,0,0,"-"],util:[108,0,0,"-"]},"nltk.corpus.reader":{AlignedCorpusReader:[57,2,1,""],AlpinoCorpusReader:[57,2,1,""],BNCCorpusReader:[57,2,1,""],BracketParseCorpusReader:[57,2,1,""],CHILDESCorpusReader:[57,2,1,""],CMUDictCorpusReader:[57,2,1,""],CategorizedBracketParseCorpusReader:[57,2,1,""],CategorizedCorpusReader:[57,2,1,""],CategorizedPlaintextCorpusReader:[57,2,1,""],CategorizedSentencesCorpusReader:[57,2,1,""],CategorizedTaggedCorpusReader:[57,2,1,""],ChasenCorpusReader:[57,2,1,""],ChunkedCorpusReader:[57,2,1,""],ComparativeSentencesCorpusReader:[57,2,1,""],ConllChunkCorpusReader:[57,2,1,""],ConllCorpusReader:[57,2,1,""],CorpusReader:[57,2,1,""],CrubadanCorpusReader:[57,2,1,""],DependencyCorpusReader:[57,2,1,""],EuroparlCorpusReader:[57,2,1,""],FramenetCorpusReader:[57,2,1,""],IEERCorpusReader:[57,2,1,""],IPIPANCorpusReader:[57,2,1,""],IndianCorpusReader:[57,2,1,""],KNBCorpusReader:[57,2,1,""],LinThesaurusCorpusReader:[57,2,1,""],MTECorpusReader:[57,2,1,""],MWAPPDBCorpusReader:[57,2,1,""],MacMorphoCorpusReader:[57,2,1,""],NKJPCorpusReader:[57,2,1,""],NPSChatCorpusReader:[57,2,1,""],NombankCorpusReader:[57,2,1,""],NonbreakingPrefixesCorpusReader:[57,2,1,""],OpinionLexiconCorpusReader:[57,2,1,""],PPAttachmentCorpusReader:[57,2,1,""],PanLexLiteCorpusReader:[57,2,1,""],PanlexSwadeshCorpusReader:[57,2,1,""],Pl196xCorpusReader:[57,2,1,""],PlaintextCorpusReader:[57,2,1,""],PortugueseCategorizedPlaintextCorpusReader:[57,2,1,""],PropbankCorpusReader:[57,2,1,""],ProsConsCorpusReader:[57,2,1,""],RTECorpusReader:[57,2,1,""],ReviewsCorpusReader:[57,2,1,""],SemcorCorpusReader:[57,2,1,""],SensevalCorpusReader:[57,2,1,""],SentiSynset:[57,2,1,""],SentiWordNetCorpusReader:[57,2,1,""],SinicaTreebankCorpusReader:[57,2,1,""],StringCategoryCorpusReader:[57,2,1,""],SwadeshCorpusReader:[57,2,1,""],SwitchboardCorpusReader:[57,2,1,""],SyntaxCorpusReader:[57,2,1,""],TEICorpusView:[57,2,1,""],TaggedCorpusReader:[57,2,1,""],TimitCorpusReader:[57,2,1,""],TimitTaggedCorpusReader:[57,2,1,""],ToolboxCorpusReader:[57,2,1,""],TwitterCorpusReader:[57,2,1,""],UdhrCorpusReader:[57,2,1,""],UnicharsCorpusReader:[57,2,1,""],VerbnetCorpusReader:[57,2,1,""],WordListCorpusReader:[57,2,1,""],WordNetCorpusReader:[57,2,1,""],WordNetICCorpusReader:[57,2,1,""],XMLCorpusReader:[57,2,1,""],YCOECorpusReader:[57,2,1,""],aligned:[58,0,0,"-"],api:[59,0,0,"-"],bnc:[60,0,0,"-"],bracket_parse:[61,0,0,"-"],categorized_sents:[62,0,0,"-"],chasen:[63,0,0,"-"],childes:[64,0,0,"-"],chunked:[65,0,0,"-"],cmudict:[66,0,0,"-"],comparative_sents:[67,0,0,"-"],conll:[68,0,0,"-"],crubadan:[69,0,0,"-"],dependency:[70,0,0,"-"],find_corpus_fileids:[57,1,1,""],framenet:[71,0,0,"-"],ieer:[72,0,0,"-"],indian:[73,0,0,"-"],ipipan:[74,0,0,"-"],knbc:[75,0,0,"-"],lin:[76,0,0,"-"],mte:[77,0,0,"-"],nkjp:[78,0,0,"-"],nombank:[79,0,0,"-"],nps_chat:[80,0,0,"-"],opinion_lexicon:[81,0,0,"-"],panlex_lite:[82,0,0,"-"],panlex_swadesh:[83,0,0,"-"],pl196x:[84,0,0,"-"],plaintext:[85,0,0,"-"],ppattach:[86,0,0,"-"],propbank:[87,0,0,"-"],pros_cons:[88,0,0,"-"],reviews:[89,0,0,"-"],rte:[90,0,0,"-"],semcor:[91,0,0,"-"],senseval:[92,0,0,"-"],sentiwordnet:[93,0,0,"-"],sinica_treebank:[94,0,0,"-"],string_category:[95,0,0,"-"],switchboard:[96,0,0,"-"],tagged:[97,0,0,"-"],tagged_treebank_para_block_reader:[57,1,1,""],timit:[98,0,0,"-"],toolbox:[99,0,0,"-"],twitter:[100,0,0,"-"],udhr:[101,0,0,"-"],util:[102,0,0,"-"],verbnet:[103,0,0,"-"],wordlist:[104,0,0,"-"],wordnet:[105,0,0,"-"],xmldocs:[106,0,0,"-"],ycoe:[107,0,0,"-"]},"nltk.corpus.reader.AlignedCorpusReader":{__init__:[57,3,1,""],aligned_sents:[57,3,1,""],sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.AlpinoCorpusReader":{__init__:[57,3,1,""]},"nltk.corpus.reader.BNCCorpusReader":{__init__:[57,3,1,""],sents:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.BracketParseCorpusReader":{__init__:[57,3,1,""]},"nltk.corpus.reader.CHILDESCorpusReader":{MLU:[57,3,1,""],__init__:[57,3,1,""],age:[57,3,1,""],childes_url_base:[57,4,1,""],convert_age:[57,3,1,""],corpus:[57,3,1,""],participants:[57,3,1,""],sents:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],webview_file:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.CMUDictCorpusReader":{dict:[57,3,1,""],entries:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.CategorizedBracketParseCorpusReader":{__init__:[57,3,1,""],parsed_paras:[57,3,1,""],parsed_sents:[57,3,1,""],parsed_words:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""]},"nltk.corpus.reader.CategorizedCorpusReader":{__init__:[57,3,1,""],categories:[57,3,1,""],fileids:[57,3,1,""],paras:[57,3,1,""],raw:[57,3,1,""],sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.CategorizedPlaintextCorpusReader":{__init__:[57,3,1,""]},"nltk.corpus.reader.CategorizedSentencesCorpusReader":{CorpusView:[57,4,1,""],__init__:[57,3,1,""],sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.CategorizedTaggedCorpusReader":{__init__:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""]},"nltk.corpus.reader.ChasenCorpusReader":{__init__:[57,3,1,""],paras:[57,3,1,""],sents:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.ChunkedCorpusReader":{__init__:[57,3,1,""],chunked_paras:[57,3,1,""],chunked_sents:[57,3,1,""],chunked_words:[57,3,1,""],paras:[57,3,1,""],sents:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.ComparativeSentencesCorpusReader":{CorpusView:[57,4,1,""],__init__:[57,3,1,""],comparisons:[57,3,1,""],keywords:[57,3,1,""],keywords_readme:[57,3,1,""],sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.ConllChunkCorpusReader":{__init__:[57,3,1,""]},"nltk.corpus.reader.ConllCorpusReader":{CHUNK:[57,4,1,""],COLUMN_TYPES:[57,4,1,""],IGNORE:[57,4,1,""],NE:[57,4,1,""],POS:[57,4,1,""],SRL:[57,4,1,""],TREE:[57,4,1,""],WORDS:[57,4,1,""],__init__:[57,3,1,""],chunked_sents:[57,3,1,""],chunked_words:[57,3,1,""],iob_sents:[57,3,1,""],iob_words:[57,3,1,""],parsed_sents:[57,3,1,""],sents:[57,3,1,""],srl_instances:[57,3,1,""],srl_spans:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.CorpusReader":{__init__:[57,3,1,""],abspath:[57,3,1,""],abspaths:[57,3,1,""],citation:[57,3,1,""],encoding:[57,3,1,""],ensure_loaded:[57,3,1,""],fileids:[57,3,1,""],license:[57,3,1,""],open:[57,3,1,""],raw:[57,3,1,""],readme:[57,3,1,""],root:[57,5,1,""]},"nltk.corpus.reader.CrubadanCorpusReader":{__init__:[57,3,1,""],crubadan_to_iso:[57,3,1,""],iso_to_crubadan:[57,3,1,""],lang_freq:[57,3,1,""],langs:[57,3,1,""]},"nltk.corpus.reader.DependencyCorpusReader":{__init__:[57,3,1,""],parsed_sents:[57,3,1,""],sents:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.EuroparlCorpusReader":{chapters:[57,3,1,""],paras:[57,3,1,""]},"nltk.corpus.reader.FramenetCorpusReader":{__init__:[57,3,1,""],annotations:[57,3,1,""],buildindexes:[57,3,1,""],doc:[57,3,1,""],docs:[57,3,1,""],docs_metadata:[57,3,1,""],exemplars:[57,3,1,""],fe_relations:[57,3,1,""],fes:[57,3,1,""],frame:[57,3,1,""],frame_by_id:[57,3,1,""],frame_by_name:[57,3,1,""],frame_ids_and_names:[57,3,1,""],frame_relation_types:[57,3,1,""],frame_relations:[57,3,1,""],frames:[57,3,1,""],frames_by_lemma:[57,3,1,""],ft_sents:[57,3,1,""],help:[57,3,1,""],lu:[57,3,1,""],lu_basic:[57,3,1,""],lu_ids_and_names:[57,3,1,""],lus:[57,3,1,""],propagate_semtypes:[57,3,1,""],semtype:[57,3,1,""],semtype_inherits:[57,3,1,""],semtypes:[57,3,1,""],sents:[57,3,1,""],warnings:[57,3,1,""]},"nltk.corpus.reader.IEERCorpusReader":{docs:[57,3,1,""],parsed_docs:[57,3,1,""]},"nltk.corpus.reader.IPIPANCorpusReader":{__init__:[57,3,1,""],categories:[57,3,1,""],channels:[57,3,1,""],domains:[57,3,1,""],fileids:[57,3,1,""],paras:[57,3,1,""],sents:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.IndianCorpusReader":{sents:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.KNBCorpusReader":{__init__:[57,3,1,""]},"nltk.corpus.reader.LinThesaurusCorpusReader":{__init__:[57,3,1,""],scored_synonyms:[57,3,1,""],similarity:[57,3,1,""],synonyms:[57,3,1,""]},"nltk.corpus.reader.MTECorpusReader":{__init__:[57,3,1,""],lemma_paras:[57,3,1,""],lemma_sents:[57,3,1,""],lemma_words:[57,3,1,""],paras:[57,3,1,""],sents:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.MWAPPDBCorpusReader":{entries:[57,3,1,""],mwa_ppdb_xxxl_file:[57,4,1,""]},"nltk.corpus.reader.MacMorphoCorpusReader":{__init__:[57,3,1,""]},"nltk.corpus.reader.NKJPCorpusReader":{HEADER_MODE:[57,4,1,""],RAW_MODE:[57,4,1,""],SENTS_MODE:[57,4,1,""],WORDS_MODE:[57,4,1,""],__init__:[57,3,1,""],add_root:[57,3,1,""],fileids:[57,3,1,""],get_paths:[57,3,1,""],header:[57,3,1,""],raw:[57,3,1,""],sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.NPSChatCorpusReader":{__init__:[57,3,1,""],posts:[57,3,1,""],tagged_posts:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""],xml_posts:[57,3,1,""]},"nltk.corpus.reader.NombankCorpusReader":{__init__:[57,3,1,""],instances:[57,3,1,""],lines:[57,3,1,""],nouns:[57,3,1,""],roleset:[57,3,1,""],rolesets:[57,3,1,""]},"nltk.corpus.reader.NonbreakingPrefixesCorpusReader":{available_langs:[57,4,1,""],words:[57,3,1,""]},"nltk.corpus.reader.OpinionLexiconCorpusReader":{CorpusView:[57,4,1,""],negative:[57,3,1,""],positive:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.PPAttachmentCorpusReader":{attachments:[57,3,1,""],tuples:[57,3,1,""]},"nltk.corpus.reader.PanLexLiteCorpusReader":{MEANING_Q:[57,4,1,""],TRANSLATION_Q:[57,4,1,""],__init__:[57,3,1,""],language_varieties:[57,3,1,""],meanings:[57,3,1,""],translations:[57,3,1,""]},"nltk.corpus.reader.PanlexSwadeshCorpusReader":{__init__:[57,3,1,""],entries:[57,3,1,""],get_languages:[57,3,1,""],get_macrolanguages:[57,3,1,""],language_codes:[57,3,1,""],license:[57,3,1,""],words_by_iso639:[57,3,1,""],words_by_lang:[57,3,1,""]},"nltk.corpus.reader.Pl196xCorpusReader":{__init__:[57,3,1,""],decode_tag:[57,3,1,""],head_len:[57,4,1,""],paras:[57,3,1,""],sents:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],textids:[57,3,1,""],words:[57,3,1,""],xml:[57,3,1,""]},"nltk.corpus.reader.PlaintextCorpusReader":{CorpusView:[57,4,1,""],__init__:[57,3,1,""],paras:[57,3,1,""],sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.PortugueseCategorizedPlaintextCorpusReader":{__init__:[57,3,1,""]},"nltk.corpus.reader.PropbankCorpusReader":{__init__:[57,3,1,""],instances:[57,3,1,""],lines:[57,3,1,""],roleset:[57,3,1,""],rolesets:[57,3,1,""],verbs:[57,3,1,""]},"nltk.corpus.reader.ProsConsCorpusReader":{CorpusView:[57,4,1,""],__init__:[57,3,1,""],sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.RTECorpusReader":{pairs:[57,3,1,""]},"nltk.corpus.reader.ReviewsCorpusReader":{CorpusView:[57,4,1,""],__init__:[57,3,1,""],features:[57,3,1,""],reviews:[57,3,1,""],sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.SemcorCorpusReader":{__init__:[57,3,1,""],chunk_sents:[57,3,1,""],chunks:[57,3,1,""],sents:[57,3,1,""],tagged_chunks:[57,3,1,""],tagged_sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.SensevalCorpusReader":{instances:[57,3,1,""]},"nltk.corpus.reader.SentiSynset":{__init__:[57,3,1,""],neg_score:[57,3,1,""],obj_score:[57,3,1,""],pos_score:[57,3,1,""]},"nltk.corpus.reader.SentiWordNetCorpusReader":{__init__:[57,3,1,""],all_senti_synsets:[57,3,1,""],senti_synset:[57,3,1,""],senti_synsets:[57,3,1,""]},"nltk.corpus.reader.StringCategoryCorpusReader":{__init__:[57,3,1,""],tuples:[57,3,1,""]},"nltk.corpus.reader.SwadeshCorpusReader":{entries:[57,3,1,""]},"nltk.corpus.reader.SwitchboardCorpusReader":{__init__:[57,3,1,""],discourses:[57,3,1,""],tagged_discourses:[57,3,1,""],tagged_turns:[57,3,1,""],tagged_words:[57,3,1,""],turns:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.SyntaxCorpusReader":{parsed_sents:[57,3,1,""],sents:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.TEICorpusView":{__init__:[57,3,1,""],read_block:[57,3,1,""]},"nltk.corpus.reader.TaggedCorpusReader":{__init__:[57,3,1,""],paras:[57,3,1,""],sents:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.TimitCorpusReader":{__init__:[57,3,1,""],audiodata:[57,3,1,""],fileids:[57,3,1,""],phone_times:[57,3,1,""],phone_trees:[57,3,1,""],phones:[57,3,1,""],play:[57,3,1,""],sent_times:[57,3,1,""],sentid:[57,3,1,""],sents:[57,3,1,""],spkrid:[57,3,1,""],spkrinfo:[57,3,1,""],spkrutteranceids:[57,3,1,""],transcription_dict:[57,3,1,""],utterance:[57,3,1,""],utteranceids:[57,3,1,""],wav:[57,3,1,""],word_times:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.TimitTaggedCorpusReader":{__init__:[57,3,1,""],paras:[57,3,1,""],tagged_paras:[57,3,1,""]},"nltk.corpus.reader.ToolboxCorpusReader":{entries:[57,3,1,""],fields:[57,3,1,""],words:[57,3,1,""],xml:[57,3,1,""]},"nltk.corpus.reader.TwitterCorpusReader":{CorpusView:[57,4,1,""],__init__:[57,3,1,""],docs:[57,3,1,""],strings:[57,3,1,""],tokenized:[57,3,1,""]},"nltk.corpus.reader.UdhrCorpusReader":{ENCODINGS:[57,4,1,""],SKIP:[57,4,1,""],__init__:[57,3,1,""]},"nltk.corpus.reader.UnicharsCorpusReader":{available_categories:[57,4,1,""],chars:[57,3,1,""]},"nltk.corpus.reader.VerbnetCorpusReader":{__init__:[57,3,1,""],classids:[57,3,1,""],fileids:[57,3,1,""],frames:[57,3,1,""],lemmas:[57,3,1,""],longid:[57,3,1,""],pprint:[57,3,1,""],pprint_frames:[57,3,1,""],pprint_members:[57,3,1,""],pprint_subclasses:[57,3,1,""],pprint_themroles:[57,3,1,""],shortid:[57,3,1,""],subclasses:[57,3,1,""],themroles:[57,3,1,""],vnclass:[57,3,1,""],wordnetids:[57,3,1,""]},"nltk.corpus.reader.WordListCorpusReader":{words:[57,3,1,""]},"nltk.corpus.reader.WordNetCorpusReader":{ADJ:[57,4,1,""],ADJ_SAT:[57,4,1,""],ADV:[57,4,1,""],MORPHOLOGICAL_SUBSTITUTIONS:[57,4,1,""],NOUN:[57,4,1,""],VERB:[57,4,1,""],__init__:[57,3,1,""],all_lemma_names:[57,3,1,""],all_synsets:[57,3,1,""],citation:[57,3,1,""],custom_lemmas:[57,3,1,""],digraph:[57,3,1,""],get_version:[57,3,1,""],ic:[57,3,1,""],jcn_similarity:[57,3,1,""],langs:[57,3,1,""],lch_similarity:[57,3,1,""],lemma:[57,3,1,""],lemma_count:[57,3,1,""],lemma_from_key:[57,3,1,""],lemmas:[57,3,1,""],license:[57,3,1,""],lin_similarity:[57,3,1,""],morphy:[57,3,1,""],of2ss:[57,3,1,""],path_similarity:[57,3,1,""],readme:[57,3,1,""],res_similarity:[57,3,1,""],ss2of:[57,3,1,""],synset:[57,3,1,""],synset_from_pos_and_offset:[57,3,1,""],synset_from_sense_key:[57,3,1,""],synsets:[57,3,1,""],words:[57,3,1,""],wup_similarity:[57,3,1,""]},"nltk.corpus.reader.WordNetICCorpusReader":{__init__:[57,3,1,""],ic:[57,3,1,""]},"nltk.corpus.reader.XMLCorpusReader":{__init__:[57,3,1,""],words:[57,3,1,""],xml:[57,3,1,""]},"nltk.corpus.reader.YCOECorpusReader":{__init__:[57,3,1,""],documents:[57,3,1,""],fileids:[57,3,1,""],paras:[57,3,1,""],parsed_sents:[57,3,1,""],sents:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.aligned":{AlignedCorpusReader:[58,2,1,""],AlignedSentCorpusView:[58,2,1,""]},"nltk.corpus.reader.aligned.AlignedCorpusReader":{__init__:[58,3,1,""],aligned_sents:[58,3,1,""],sents:[58,3,1,""],words:[58,3,1,""]},"nltk.corpus.reader.aligned.AlignedSentCorpusView":{__init__:[58,3,1,""],read_block:[58,3,1,""]},"nltk.corpus.reader.api":{CategorizedCorpusReader:[59,2,1,""],CorpusReader:[59,2,1,""],SyntaxCorpusReader:[59,2,1,""]},"nltk.corpus.reader.api.CategorizedCorpusReader":{__init__:[59,3,1,""],categories:[59,3,1,""],fileids:[59,3,1,""],paras:[59,3,1,""],raw:[59,3,1,""],sents:[59,3,1,""],words:[59,3,1,""]},"nltk.corpus.reader.api.CorpusReader":{__init__:[59,3,1,""],abspath:[59,3,1,""],abspaths:[59,3,1,""],citation:[59,3,1,""],encoding:[59,3,1,""],ensure_loaded:[59,3,1,""],fileids:[59,3,1,""],license:[59,3,1,""],open:[59,3,1,""],raw:[59,3,1,""],readme:[59,3,1,""],root:[59,5,1,""]},"nltk.corpus.reader.api.SyntaxCorpusReader":{parsed_sents:[59,3,1,""],sents:[59,3,1,""],tagged_sents:[59,3,1,""],tagged_words:[59,3,1,""],words:[59,3,1,""]},"nltk.corpus.reader.bnc":{BNCCorpusReader:[60,2,1,""],BNCSentence:[60,2,1,""],BNCWordView:[60,2,1,""]},"nltk.corpus.reader.bnc.BNCCorpusReader":{__init__:[60,3,1,""],sents:[60,3,1,""],tagged_sents:[60,3,1,""],tagged_words:[60,3,1,""],words:[60,3,1,""]},"nltk.corpus.reader.bnc.BNCSentence":{__init__:[60,3,1,""]},"nltk.corpus.reader.bnc.BNCWordView":{__init__:[60,3,1,""],author:[60,4,1,""],editor:[60,4,1,""],handle_elt:[60,3,1,""],handle_header:[60,3,1,""],handle_sent:[60,3,1,""],handle_word:[60,3,1,""],resps:[60,4,1,""],tags_to_ignore:[60,4,1,""],title:[60,4,1,""]},"nltk.corpus.reader.bracket_parse":{AlpinoCorpusReader:[61,2,1,""],BracketParseCorpusReader:[61,2,1,""],CategorizedBracketParseCorpusReader:[61,2,1,""]},"nltk.corpus.reader.bracket_parse.AlpinoCorpusReader":{__init__:[61,3,1,""]},"nltk.corpus.reader.bracket_parse.BracketParseCorpusReader":{__init__:[61,3,1,""]},"nltk.corpus.reader.bracket_parse.CategorizedBracketParseCorpusReader":{__init__:[61,3,1,""],parsed_paras:[61,3,1,""],parsed_sents:[61,3,1,""],parsed_words:[61,3,1,""],tagged_paras:[61,3,1,""],tagged_sents:[61,3,1,""],tagged_words:[61,3,1,""]},"nltk.corpus.reader.categorized_sents":{CategorizedSentencesCorpusReader:[62,2,1,""]},"nltk.corpus.reader.categorized_sents.CategorizedSentencesCorpusReader":{CorpusView:[62,4,1,""],__init__:[62,3,1,""],sents:[62,3,1,""],words:[62,3,1,""]},"nltk.corpus.reader.chasen":{ChasenCorpusReader:[63,2,1,""],ChasenCorpusView:[63,2,1,""],demo:[63,1,1,""],test:[63,1,1,""]},"nltk.corpus.reader.chasen.ChasenCorpusReader":{__init__:[63,3,1,""],paras:[63,3,1,""],sents:[63,3,1,""],tagged_paras:[63,3,1,""],tagged_sents:[63,3,1,""],tagged_words:[63,3,1,""],words:[63,3,1,""]},"nltk.corpus.reader.chasen.ChasenCorpusView":{__init__:[63,3,1,""],read_block:[63,3,1,""]},"nltk.corpus.reader.childes":{CHILDESCorpusReader:[64,2,1,""],demo:[64,1,1,""]},"nltk.corpus.reader.childes.CHILDESCorpusReader":{MLU:[64,3,1,""],__init__:[64,3,1,""],age:[64,3,1,""],childes_url_base:[64,4,1,""],convert_age:[64,3,1,""],corpus:[64,3,1,""],participants:[64,3,1,""],sents:[64,3,1,""],tagged_sents:[64,3,1,""],tagged_words:[64,3,1,""],webview_file:[64,3,1,""],words:[64,3,1,""]},"nltk.corpus.reader.chunked":{ChunkedCorpusReader:[65,2,1,""],ChunkedCorpusView:[65,2,1,""]},"nltk.corpus.reader.chunked.ChunkedCorpusReader":{__init__:[65,3,1,""],chunked_paras:[65,3,1,""],chunked_sents:[65,3,1,""],chunked_words:[65,3,1,""],paras:[65,3,1,""],sents:[65,3,1,""],tagged_paras:[65,3,1,""],tagged_sents:[65,3,1,""],tagged_words:[65,3,1,""],words:[65,3,1,""]},"nltk.corpus.reader.chunked.ChunkedCorpusView":{__init__:[65,3,1,""],read_block:[65,3,1,""]},"nltk.corpus.reader.cmudict":{CMUDictCorpusReader:[66,2,1,""],read_cmudict_block:[66,1,1,""]},"nltk.corpus.reader.cmudict.CMUDictCorpusReader":{dict:[66,3,1,""],entries:[66,3,1,""],words:[66,3,1,""]},"nltk.corpus.reader.comparative_sents":{ComparativeSentencesCorpusReader:[67,2,1,""],Comparison:[67,2,1,""]},"nltk.corpus.reader.comparative_sents.ComparativeSentencesCorpusReader":{CorpusView:[67,4,1,""],__init__:[67,3,1,""],comparisons:[67,3,1,""],keywords:[67,3,1,""],keywords_readme:[67,3,1,""],sents:[67,3,1,""],words:[67,3,1,""]},"nltk.corpus.reader.comparative_sents.Comparison":{__init__:[67,3,1,""]},"nltk.corpus.reader.conll":{ConllChunkCorpusReader:[68,2,1,""],ConllCorpusReader:[68,2,1,""],ConllSRLInstance:[68,2,1,""],ConllSRLInstanceList:[68,2,1,""]},"nltk.corpus.reader.conll.ConllChunkCorpusReader":{__init__:[68,3,1,""]},"nltk.corpus.reader.conll.ConllCorpusReader":{CHUNK:[68,4,1,""],COLUMN_TYPES:[68,4,1,""],IGNORE:[68,4,1,""],NE:[68,4,1,""],POS:[68,4,1,""],SRL:[68,4,1,""],TREE:[68,4,1,""],WORDS:[68,4,1,""],__init__:[68,3,1,""],chunked_sents:[68,3,1,""],chunked_words:[68,3,1,""],iob_sents:[68,3,1,""],iob_words:[68,3,1,""],parsed_sents:[68,3,1,""],sents:[68,3,1,""],srl_instances:[68,3,1,""],srl_spans:[68,3,1,""],tagged_sents:[68,3,1,""],tagged_words:[68,3,1,""],words:[68,3,1,""]},"nltk.corpus.reader.conll.ConllSRLInstance":{__init__:[68,3,1,""],arguments:[68,4,1,""],pprint:[68,3,1,""],tagged_spans:[68,4,1,""],tree:[68,4,1,""],verb:[68,4,1,""],verb_head:[68,4,1,""],words:[68,4,1,""]},"nltk.corpus.reader.conll.ConllSRLInstanceList":{__init__:[68,3,1,""],pprint:[68,3,1,""]},"nltk.corpus.reader.crubadan":{CrubadanCorpusReader:[69,2,1,""]},"nltk.corpus.reader.crubadan.CrubadanCorpusReader":{__init__:[69,3,1,""],crubadan_to_iso:[69,3,1,""],iso_to_crubadan:[69,3,1,""],lang_freq:[69,3,1,""],langs:[69,3,1,""]},"nltk.corpus.reader.dependency":{DependencyCorpusReader:[70,2,1,""],DependencyCorpusView:[70,2,1,""]},"nltk.corpus.reader.dependency.DependencyCorpusReader":{__init__:[70,3,1,""],parsed_sents:[70,3,1,""],sents:[70,3,1,""],tagged_sents:[70,3,1,""],tagged_words:[70,3,1,""],words:[70,3,1,""]},"nltk.corpus.reader.dependency.DependencyCorpusView":{__init__:[70,3,1,""],read_block:[70,3,1,""]},"nltk.corpus.reader.framenet":{AttrDict:[71,2,1,""],FramenetCorpusReader:[71,2,1,""],FramenetError:[71,6,1,""],Future:[71,2,1,""],PrettyDict:[71,2,1,""],PrettyLazyConcatenation:[71,2,1,""],PrettyLazyIteratorList:[71,2,1,""],PrettyLazyMap:[71,2,1,""],PrettyList:[71,2,1,""],SpecialList:[71,2,1,""],demo:[71,1,1,""],mimic_wrap:[71,1,1,""]},"nltk.corpus.reader.framenet.AttrDict":{__init__:[71,3,1,""]},"nltk.corpus.reader.framenet.FramenetCorpusReader":{__init__:[71,3,1,""],annotations:[71,3,1,""],buildindexes:[71,3,1,""],doc:[71,3,1,""],docs:[71,3,1,""],docs_metadata:[71,3,1,""],exemplars:[71,3,1,""],fe_relations:[71,3,1,""],fes:[71,3,1,""],frame:[71,3,1,""],frame_by_id:[71,3,1,""],frame_by_name:[71,3,1,""],frame_ids_and_names:[71,3,1,""],frame_relation_types:[71,3,1,""],frame_relations:[71,3,1,""],frames:[71,3,1,""],frames_by_lemma:[71,3,1,""],ft_sents:[71,3,1,""],help:[71,3,1,""],lu:[71,3,1,""],lu_basic:[71,3,1,""],lu_ids_and_names:[71,3,1,""],lus:[71,3,1,""],propagate_semtypes:[71,3,1,""],semtype:[71,3,1,""],semtype_inherits:[71,3,1,""],semtypes:[71,3,1,""],sents:[71,3,1,""],warnings:[71,3,1,""]},"nltk.corpus.reader.framenet.Future":{__init__:[71,3,1,""]},"nltk.corpus.reader.framenet.PrettyDict":{__init__:[71,3,1,""]},"nltk.corpus.reader.framenet.PrettyList":{__init__:[71,3,1,""]},"nltk.corpus.reader.framenet.SpecialList":{__init__:[71,3,1,""]},"nltk.corpus.reader.ieer":{IEERCorpusReader:[72,2,1,""],IEERDocument:[72,2,1,""],documents:[72,7,1,""],titles:[72,7,1,""]},"nltk.corpus.reader.ieer.IEERCorpusReader":{docs:[72,3,1,""],parsed_docs:[72,3,1,""]},"nltk.corpus.reader.ieer.IEERDocument":{__init__:[72,3,1,""]},"nltk.corpus.reader.indian":{IndianCorpusReader:[73,2,1,""],IndianCorpusView:[73,2,1,""]},"nltk.corpus.reader.indian.IndianCorpusReader":{sents:[73,3,1,""],tagged_sents:[73,3,1,""],tagged_words:[73,3,1,""],words:[73,3,1,""]},"nltk.corpus.reader.indian.IndianCorpusView":{__init__:[73,3,1,""],read_block:[73,3,1,""]},"nltk.corpus.reader.ipipan":{IPIPANCorpusReader:[74,2,1,""],IPIPANCorpusView:[74,2,1,""]},"nltk.corpus.reader.ipipan.IPIPANCorpusReader":{__init__:[74,3,1,""],categories:[74,3,1,""],channels:[74,3,1,""],domains:[74,3,1,""],fileids:[74,3,1,""],paras:[74,3,1,""],sents:[74,3,1,""],tagged_paras:[74,3,1,""],tagged_sents:[74,3,1,""],tagged_words:[74,3,1,""],words:[74,3,1,""]},"nltk.corpus.reader.ipipan.IPIPANCorpusView":{PARAS_MODE:[74,4,1,""],SENTS_MODE:[74,4,1,""],WORDS_MODE:[74,4,1,""],__init__:[74,3,1,""],read_block:[74,3,1,""]},"nltk.corpus.reader.knbc":{KNBCorpusReader:[75,2,1,""],demo:[75,1,1,""],test:[75,1,1,""]},"nltk.corpus.reader.knbc.KNBCorpusReader":{__init__:[75,3,1,""]},"nltk.corpus.reader.lin":{LinThesaurusCorpusReader:[76,2,1,""],demo:[76,1,1,""]},"nltk.corpus.reader.lin.LinThesaurusCorpusReader":{__init__:[76,3,1,""],scored_synonyms:[76,3,1,""],similarity:[76,3,1,""],synonyms:[76,3,1,""]},"nltk.corpus.reader.mte":{MTECorpusReader:[77,2,1,""],MTECorpusView:[77,2,1,""],MTEFileReader:[77,2,1,""],MTETagConverter:[77,2,1,""],xpath:[77,1,1,""]},"nltk.corpus.reader.mte.MTECorpusReader":{__init__:[77,3,1,""],lemma_paras:[77,3,1,""],lemma_sents:[77,3,1,""],lemma_words:[77,3,1,""],paras:[77,3,1,""],sents:[77,3,1,""],tagged_paras:[77,3,1,""],tagged_sents:[77,3,1,""],tagged_words:[77,3,1,""],words:[77,3,1,""]},"nltk.corpus.reader.mte.MTECorpusView":{__init__:[77,3,1,""],read_block:[77,3,1,""]},"nltk.corpus.reader.mte.MTEFileReader":{__init__:[77,3,1,""],lemma_paras:[77,3,1,""],lemma_sents:[77,3,1,""],lemma_words:[77,3,1,""],ns:[77,4,1,""],para_path:[77,4,1,""],paras:[77,3,1,""],sent_path:[77,4,1,""],sents:[77,3,1,""],tag_ns:[77,4,1,""],tagged_paras:[77,3,1,""],tagged_sents:[77,3,1,""],tagged_words:[77,3,1,""],word_path:[77,4,1,""],words:[77,3,1,""],xml_ns:[77,4,1,""]},"nltk.corpus.reader.mte.MTETagConverter":{mapping_msd_universal:[77,4,1,""],msd_to_universal:[77,3,1,""]},"nltk.corpus.reader.nkjp":{NKJPCorpusReader:[78,2,1,""],NKJPCorpus_Header_View:[78,2,1,""],NKJPCorpus_Morph_View:[78,2,1,""],NKJPCorpus_Segmentation_View:[78,2,1,""],NKJPCorpus_Text_View:[78,2,1,""],XML_Tool:[78,2,1,""]},"nltk.corpus.reader.nkjp.NKJPCorpusReader":{HEADER_MODE:[78,4,1,""],RAW_MODE:[78,4,1,""],SENTS_MODE:[78,4,1,""],WORDS_MODE:[78,4,1,""],__init__:[78,3,1,""],add_root:[78,3,1,""],fileids:[78,3,1,""],get_paths:[78,3,1,""],header:[78,3,1,""],raw:[78,3,1,""],sents:[78,3,1,""],tagged_words:[78,3,1,""],words:[78,3,1,""]},"nltk.corpus.reader.nkjp.NKJPCorpus_Header_View":{__init__:[78,3,1,""],handle_elt:[78,3,1,""],handle_query:[78,3,1,""]},"nltk.corpus.reader.nkjp.NKJPCorpus_Morph_View":{__init__:[78,3,1,""],handle_elt:[78,3,1,""],handle_query:[78,3,1,""]},"nltk.corpus.reader.nkjp.NKJPCorpus_Segmentation_View":{__init__:[78,3,1,""],get_segm_id:[78,3,1,""],get_sent_beg:[78,3,1,""],get_sent_end:[78,3,1,""],get_sentences:[78,3,1,""],handle_elt:[78,3,1,""],handle_query:[78,3,1,""],remove_choice:[78,3,1,""]},"nltk.corpus.reader.nkjp.NKJPCorpus_Text_View":{RAW_MODE:[78,4,1,""],SENTS_MODE:[78,4,1,""],__init__:[78,3,1,""],get_segm_id:[78,3,1,""],handle_elt:[78,3,1,""],handle_query:[78,3,1,""],read_block:[78,3,1,""]},"nltk.corpus.reader.nkjp.XML_Tool":{__init__:[78,3,1,""],build_preprocessed_file:[78,3,1,""],remove_preprocessed_file:[78,3,1,""]},"nltk.corpus.reader.nombank":{NombankChainTreePointer:[79,2,1,""],NombankCorpusReader:[79,2,1,""],NombankInstance:[79,2,1,""],NombankPointer:[79,2,1,""],NombankSplitTreePointer:[79,2,1,""],NombankTreePointer:[79,2,1,""]},"nltk.corpus.reader.nombank.NombankChainTreePointer":{__init__:[79,3,1,""],pieces:[79,4,1,""],select:[79,3,1,""]},"nltk.corpus.reader.nombank.NombankCorpusReader":{__init__:[79,3,1,""],instances:[79,3,1,""],lines:[79,3,1,""],nouns:[79,3,1,""],roleset:[79,3,1,""],rolesets:[79,3,1,""]},"nltk.corpus.reader.nombank.NombankInstance":{__init__:[79,3,1,""],arguments:[79,4,1,""],baseform:[79,4,1,""],fileid:[79,4,1,""],parse:[79,3,1,""],parse_corpus:[79,4,1,""],predicate:[79,4,1,""],predid:[79,4,1,""],roleset:[79,5,1,""],sensenumber:[79,4,1,""],sentnum:[79,4,1,""],tree:[79,5,1,""],wordnum:[79,4,1,""]},"nltk.corpus.reader.nombank.NombankPointer":{__init__:[79,3,1,""]},"nltk.corpus.reader.nombank.NombankSplitTreePointer":{__init__:[79,3,1,""],pieces:[79,4,1,""],select:[79,3,1,""]},"nltk.corpus.reader.nombank.NombankTreePointer":{__init__:[79,3,1,""],parse:[79,3,1,""],select:[79,3,1,""],treepos:[79,3,1,""]},"nltk.corpus.reader.nps_chat":{NPSChatCorpusReader:[80,2,1,""]},"nltk.corpus.reader.nps_chat.NPSChatCorpusReader":{__init__:[80,3,1,""],posts:[80,3,1,""],tagged_posts:[80,3,1,""],tagged_words:[80,3,1,""],words:[80,3,1,""],xml_posts:[80,3,1,""]},"nltk.corpus.reader.opinion_lexicon":{IgnoreReadmeCorpusView:[81,2,1,""],OpinionLexiconCorpusReader:[81,2,1,""]},"nltk.corpus.reader.opinion_lexicon.IgnoreReadmeCorpusView":{__init__:[81,3,1,""]},"nltk.corpus.reader.opinion_lexicon.OpinionLexiconCorpusReader":{CorpusView:[81,4,1,""],negative:[81,3,1,""],positive:[81,3,1,""],words:[81,3,1,""]},"nltk.corpus.reader.panlex_lite":{Meaning:[82,2,1,""],PanLexLiteCorpusReader:[82,2,1,""]},"nltk.corpus.reader.panlex_lite.Meaning":{__init__:[82,3,1,""],expressions:[82,3,1,""],id:[82,3,1,""],quality:[82,3,1,""],source:[82,3,1,""],source_group:[82,3,1,""]},"nltk.corpus.reader.panlex_lite.PanLexLiteCorpusReader":{MEANING_Q:[82,4,1,""],TRANSLATION_Q:[82,4,1,""],__init__:[82,3,1,""],language_varieties:[82,3,1,""],meanings:[82,3,1,""],translations:[82,3,1,""]},"nltk.corpus.reader.panlex_swadesh":{PanlexLanguage:[83,2,1,""],PanlexSwadeshCorpusReader:[83,2,1,""]},"nltk.corpus.reader.panlex_swadesh.PanlexLanguage":{__new__:[83,3,1,""],iso639:[83,4,1,""],iso639_type:[83,4,1,""],langvar_uid:[83,4,1,""],name:[83,4,1,""],panlex_uid:[83,4,1,""],script:[83,4,1,""]},"nltk.corpus.reader.panlex_swadesh.PanlexSwadeshCorpusReader":{__init__:[83,3,1,""],entries:[83,3,1,""],get_languages:[83,3,1,""],get_macrolanguages:[83,3,1,""],language_codes:[83,3,1,""],license:[83,3,1,""],words_by_iso639:[83,3,1,""],words_by_lang:[83,3,1,""]},"nltk.corpus.reader.pl196x":{Pl196xCorpusReader:[84,2,1,""],TEICorpusView:[84,2,1,""]},"nltk.corpus.reader.pl196x.Pl196xCorpusReader":{__init__:[84,3,1,""],decode_tag:[84,3,1,""],head_len:[84,4,1,""],paras:[84,3,1,""],sents:[84,3,1,""],tagged_paras:[84,3,1,""],tagged_sents:[84,3,1,""],tagged_words:[84,3,1,""],textids:[84,3,1,""],words:[84,3,1,""],xml:[84,3,1,""]},"nltk.corpus.reader.pl196x.TEICorpusView":{__init__:[84,3,1,""],read_block:[84,3,1,""]},"nltk.corpus.reader.plaintext":{CategorizedPlaintextCorpusReader:[85,2,1,""],EuroparlCorpusReader:[85,2,1,""],PlaintextCorpusReader:[85,2,1,""],PortugueseCategorizedPlaintextCorpusReader:[85,2,1,""]},"nltk.corpus.reader.plaintext.CategorizedPlaintextCorpusReader":{__init__:[85,3,1,""]},"nltk.corpus.reader.plaintext.EuroparlCorpusReader":{chapters:[85,3,1,""],paras:[85,3,1,""]},"nltk.corpus.reader.plaintext.PlaintextCorpusReader":{CorpusView:[85,4,1,""],__init__:[85,3,1,""],paras:[85,3,1,""],sents:[85,3,1,""],words:[85,3,1,""]},"nltk.corpus.reader.plaintext.PortugueseCategorizedPlaintextCorpusReader":{__init__:[85,3,1,""]},"nltk.corpus.reader.ppattach":{PPAttachment:[86,2,1,""],PPAttachmentCorpusReader:[86,2,1,""]},"nltk.corpus.reader.ppattach.PPAttachment":{__init__:[86,3,1,""]},"nltk.corpus.reader.ppattach.PPAttachmentCorpusReader":{attachments:[86,3,1,""],tuples:[86,3,1,""]},"nltk.corpus.reader.propbank":{PropbankChainTreePointer:[87,2,1,""],PropbankCorpusReader:[87,2,1,""],PropbankInflection:[87,2,1,""],PropbankInstance:[87,2,1,""],PropbankPointer:[87,2,1,""],PropbankSplitTreePointer:[87,2,1,""],PropbankTreePointer:[87,2,1,""]},"nltk.corpus.reader.propbank.PropbankChainTreePointer":{__init__:[87,3,1,""],pieces:[87,4,1,""],select:[87,3,1,""]},"nltk.corpus.reader.propbank.PropbankCorpusReader":{__init__:[87,3,1,""],instances:[87,3,1,""],lines:[87,3,1,""],roleset:[87,3,1,""],rolesets:[87,3,1,""],verbs:[87,3,1,""]},"nltk.corpus.reader.propbank.PropbankInflection":{ACTIVE:[87,4,1,""],FINITE:[87,4,1,""],FUTURE:[87,4,1,""],GERUND:[87,4,1,""],INFINITIVE:[87,4,1,""],NONE:[87,4,1,""],PARTICIPLE:[87,4,1,""],PASSIVE:[87,4,1,""],PAST:[87,4,1,""],PERFECT:[87,4,1,""],PERFECT_AND_PROGRESSIVE:[87,4,1,""],PRESENT:[87,4,1,""],PROGRESSIVE:[87,4,1,""],THIRD_PERSON:[87,4,1,""],__init__:[87,3,1,""],parse:[87,3,1,""]},"nltk.corpus.reader.propbank.PropbankInstance":{__init__:[87,3,1,""],arguments:[87,4,1,""],baseform:[87,5,1,""],fileid:[87,4,1,""],inflection:[87,4,1,""],parse:[87,3,1,""],parse_corpus:[87,4,1,""],predicate:[87,4,1,""],predid:[87,5,1,""],roleset:[87,4,1,""],sensenumber:[87,5,1,""],sentnum:[87,4,1,""],tagger:[87,4,1,""],tree:[87,5,1,""],wordnum:[87,4,1,""]},"nltk.corpus.reader.propbank.PropbankPointer":{__init__:[87,3,1,""]},"nltk.corpus.reader.propbank.PropbankSplitTreePointer":{__init__:[87,3,1,""],pieces:[87,4,1,""],select:[87,3,1,""]},"nltk.corpus.reader.propbank.PropbankTreePointer":{__init__:[87,3,1,""],parse:[87,3,1,""],select:[87,3,1,""],treepos:[87,3,1,""]},"nltk.corpus.reader.pros_cons":{ProsConsCorpusReader:[88,2,1,""]},"nltk.corpus.reader.pros_cons.ProsConsCorpusReader":{CorpusView:[88,4,1,""],__init__:[88,3,1,""],sents:[88,3,1,""],words:[88,3,1,""]},"nltk.corpus.reader.reviews":{Review:[89,2,1,""],ReviewLine:[89,2,1,""],ReviewsCorpusReader:[89,2,1,""]},"nltk.corpus.reader.reviews.Review":{__init__:[89,3,1,""],add_line:[89,3,1,""],features:[89,3,1,""],sents:[89,3,1,""]},"nltk.corpus.reader.reviews.ReviewLine":{__init__:[89,3,1,""]},"nltk.corpus.reader.reviews.ReviewsCorpusReader":{CorpusView:[89,4,1,""],__init__:[89,3,1,""],features:[89,3,1,""],reviews:[89,3,1,""],sents:[89,3,1,""],words:[89,3,1,""]},"nltk.corpus.reader.rte":{RTECorpusReader:[90,2,1,""],RTEPair:[90,2,1,""],norm:[90,1,1,""]},"nltk.corpus.reader.rte.RTECorpusReader":{pairs:[90,3,1,""]},"nltk.corpus.reader.rte.RTEPair":{__init__:[90,3,1,""]},"nltk.corpus.reader.semcor":{SemcorCorpusReader:[91,2,1,""],SemcorSentence:[91,2,1,""],SemcorWordView:[91,2,1,""]},"nltk.corpus.reader.semcor.SemcorCorpusReader":{__init__:[91,3,1,""],chunk_sents:[91,3,1,""],chunks:[91,3,1,""],sents:[91,3,1,""],tagged_chunks:[91,3,1,""],tagged_sents:[91,3,1,""],words:[91,3,1,""]},"nltk.corpus.reader.semcor.SemcorSentence":{__init__:[91,3,1,""]},"nltk.corpus.reader.semcor.SemcorWordView":{__init__:[91,3,1,""],handle_elt:[91,3,1,""],handle_sent:[91,3,1,""],handle_word:[91,3,1,""]},"nltk.corpus.reader.senseval":{SensevalCorpusReader:[92,2,1,""],SensevalCorpusView:[92,2,1,""],SensevalInstance:[92,2,1,""]},"nltk.corpus.reader.senseval.SensevalCorpusReader":{instances:[92,3,1,""]},"nltk.corpus.reader.senseval.SensevalCorpusView":{__init__:[92,3,1,""],read_block:[92,3,1,""]},"nltk.corpus.reader.senseval.SensevalInstance":{__init__:[92,3,1,""]},"nltk.corpus.reader.sentiwordnet":{SentiSynset:[93,2,1,""],SentiWordNetCorpusReader:[93,2,1,""]},"nltk.corpus.reader.sentiwordnet.SentiSynset":{__init__:[93,3,1,""],neg_score:[93,3,1,""],obj_score:[93,3,1,""],pos_score:[93,3,1,""]},"nltk.corpus.reader.sentiwordnet.SentiWordNetCorpusReader":{__init__:[93,3,1,""],all_senti_synsets:[93,3,1,""],senti_synset:[93,3,1,""],senti_synsets:[93,3,1,""]},"nltk.corpus.reader.sinica_treebank":{SinicaTreebankCorpusReader:[94,2,1,""]},"nltk.corpus.reader.string_category":{StringCategoryCorpusReader:[95,2,1,""]},"nltk.corpus.reader.string_category.StringCategoryCorpusReader":{__init__:[95,3,1,""],tuples:[95,3,1,""]},"nltk.corpus.reader.switchboard":{SwitchboardCorpusReader:[96,2,1,""],SwitchboardTurn:[96,2,1,""]},"nltk.corpus.reader.switchboard.SwitchboardCorpusReader":{__init__:[96,3,1,""],discourses:[96,3,1,""],tagged_discourses:[96,3,1,""],tagged_turns:[96,3,1,""],tagged_words:[96,3,1,""],turns:[96,3,1,""],words:[96,3,1,""]},"nltk.corpus.reader.switchboard.SwitchboardTurn":{__init__:[96,3,1,""]},"nltk.corpus.reader.tagged":{CategorizedTaggedCorpusReader:[97,2,1,""],MacMorphoCorpusReader:[97,2,1,""],TaggedCorpusReader:[97,2,1,""],TaggedCorpusView:[97,2,1,""],TimitTaggedCorpusReader:[97,2,1,""]},"nltk.corpus.reader.tagged.CategorizedTaggedCorpusReader":{__init__:[97,3,1,""],tagged_paras:[97,3,1,""],tagged_sents:[97,3,1,""],tagged_words:[97,3,1,""]},"nltk.corpus.reader.tagged.MacMorphoCorpusReader":{__init__:[97,3,1,""]},"nltk.corpus.reader.tagged.TaggedCorpusReader":{__init__:[97,3,1,""],paras:[97,3,1,""],sents:[97,3,1,""],tagged_paras:[97,3,1,""],tagged_sents:[97,3,1,""],tagged_words:[97,3,1,""],words:[97,3,1,""]},"nltk.corpus.reader.tagged.TaggedCorpusView":{__init__:[97,3,1,""],read_block:[97,3,1,""]},"nltk.corpus.reader.tagged.TimitTaggedCorpusReader":{__init__:[97,3,1,""],paras:[97,3,1,""],tagged_paras:[97,3,1,""]},"nltk.corpus.reader.timit":{SpeakerInfo:[98,2,1,""],TimitCorpusReader:[98,2,1,""],read_timit_block:[98,1,1,""]},"nltk.corpus.reader.timit.SpeakerInfo":{__init__:[98,3,1,""]},"nltk.corpus.reader.timit.TimitCorpusReader":{__init__:[98,3,1,""],audiodata:[98,3,1,""],fileids:[98,3,1,""],phone_times:[98,3,1,""],phone_trees:[98,3,1,""],phones:[98,3,1,""],play:[98,3,1,""],sent_times:[98,3,1,""],sentid:[98,3,1,""],sents:[98,3,1,""],spkrid:[98,3,1,""],spkrinfo:[98,3,1,""],spkrutteranceids:[98,3,1,""],transcription_dict:[98,3,1,""],utterance:[98,3,1,""],utteranceids:[98,3,1,""],wav:[98,3,1,""],word_times:[98,3,1,""],words:[98,3,1,""]},"nltk.corpus.reader.toolbox":{ToolboxCorpusReader:[99,2,1,""],demo:[99,1,1,""]},"nltk.corpus.reader.toolbox.ToolboxCorpusReader":{entries:[99,3,1,""],fields:[99,3,1,""],words:[99,3,1,""],xml:[99,3,1,""]},"nltk.corpus.reader.twitter":{TwitterCorpusReader:[100,2,1,""]},"nltk.corpus.reader.twitter.TwitterCorpusReader":{CorpusView:[100,4,1,""],__init__:[100,3,1,""],docs:[100,3,1,""],strings:[100,3,1,""],tokenized:[100,3,1,""]},"nltk.corpus.reader.udhr":{UdhrCorpusReader:[101,2,1,""]},"nltk.corpus.reader.udhr.UdhrCorpusReader":{ENCODINGS:[101,4,1,""],SKIP:[101,4,1,""],__init__:[101,3,1,""]},"nltk.corpus.reader.util":{ConcatenatedCorpusView:[102,2,1,""],PickleCorpusView:[102,2,1,""],StreamBackedCorpusView:[102,2,1,""],concat:[102,1,1,""],find_corpus_fileids:[102,1,1,""],read_alignedsent_block:[102,1,1,""],read_blankline_block:[102,1,1,""],read_line_block:[102,1,1,""],read_regexp_block:[102,1,1,""],read_sexpr_block:[102,1,1,""],read_whitespace_block:[102,1,1,""],read_wordpunct_block:[102,1,1,""],tagged_treebank_para_block_reader:[102,1,1,""]},"nltk.corpus.reader.util.ConcatenatedCorpusView":{__init__:[102,3,1,""],close:[102,3,1,""],iterate_from:[102,3,1,""]},"nltk.corpus.reader.util.PickleCorpusView":{BLOCK_SIZE:[102,4,1,""],PROTOCOL:[102,4,1,""],__init__:[102,3,1,""],cache_to_tempfile:[102,3,1,""],read_block:[102,3,1,""],write:[102,3,1,""]},"nltk.corpus.reader.util.StreamBackedCorpusView":{__init__:[102,3,1,""],close:[102,3,1,""],fileid:[102,5,1,""],iterate_from:[102,3,1,""],read_block:[102,3,1,""]},"nltk.corpus.reader.verbnet":{VerbnetCorpusReader:[103,2,1,""]},"nltk.corpus.reader.verbnet.VerbnetCorpusReader":{__init__:[103,3,1,""],classids:[103,3,1,""],fileids:[103,3,1,""],frames:[103,3,1,""],lemmas:[103,3,1,""],longid:[103,3,1,""],pprint:[103,3,1,""],pprint_frames:[103,3,1,""],pprint_members:[103,3,1,""],pprint_subclasses:[103,3,1,""],pprint_themroles:[103,3,1,""],shortid:[103,3,1,""],subclasses:[103,3,1,""],themroles:[103,3,1,""],vnclass:[103,3,1,""],wordnetids:[103,3,1,""]},"nltk.corpus.reader.wordlist":{MWAPPDBCorpusReader:[104,2,1,""],NonbreakingPrefixesCorpusReader:[104,2,1,""],SwadeshCorpusReader:[104,2,1,""],UnicharsCorpusReader:[104,2,1,""],WordListCorpusReader:[104,2,1,""]},"nltk.corpus.reader.wordlist.MWAPPDBCorpusReader":{entries:[104,3,1,""],mwa_ppdb_xxxl_file:[104,4,1,""]},"nltk.corpus.reader.wordlist.NonbreakingPrefixesCorpusReader":{available_langs:[104,4,1,""],words:[104,3,1,""]},"nltk.corpus.reader.wordlist.SwadeshCorpusReader":{entries:[104,3,1,""]},"nltk.corpus.reader.wordlist.UnicharsCorpusReader":{available_categories:[104,4,1,""],chars:[104,3,1,""]},"nltk.corpus.reader.wordlist.WordListCorpusReader":{words:[104,3,1,""]},"nltk.corpus.reader.wordnet":{Lemma:[105,2,1,""],Synset:[105,2,1,""],WordNetCorpusReader:[105,2,1,""],WordNetError:[105,6,1,""],WordNetICCorpusReader:[105,2,1,""],information_content:[105,1,1,""],jcn_similarity:[105,1,1,""],lch_similarity:[105,1,1,""],lin_similarity:[105,1,1,""],path_similarity:[105,1,1,""],res_similarity:[105,1,1,""],wup_similarity:[105,1,1,""]},"nltk.corpus.reader.wordnet.Lemma":{__init__:[105,3,1,""],antonyms:[105,3,1,""],count:[105,3,1,""],derivationally_related_forms:[105,3,1,""],frame_ids:[105,3,1,""],frame_strings:[105,3,1,""],key:[105,3,1,""],lang:[105,3,1,""],name:[105,3,1,""],pertainyms:[105,3,1,""],synset:[105,3,1,""],syntactic_marker:[105,3,1,""]},"nltk.corpus.reader.wordnet.Synset":{__init__:[105,3,1,""],acyclic_tree:[105,3,1,""],closure:[105,3,1,""],common_hypernyms:[105,3,1,""],definition:[105,3,1,""],examples:[105,3,1,""],frame_ids:[105,3,1,""],hypernym_distances:[105,3,1,""],hypernym_paths:[105,3,1,""],jcn_similarity:[105,3,1,""],lch_similarity:[105,3,1,""],lemma_names:[105,3,1,""],lemmas:[105,3,1,""],lexname:[105,3,1,""],lin_similarity:[105,3,1,""],lowest_common_hypernyms:[105,3,1,""],max_depth:[105,3,1,""],min_depth:[105,3,1,""],mst:[105,3,1,""],name:[105,3,1,""],offset:[105,3,1,""],path_similarity:[105,3,1,""],pos:[105,3,1,""],res_similarity:[105,3,1,""],root_hypernyms:[105,3,1,""],shortest_path_distance:[105,3,1,""],tree:[105,3,1,""],wup_similarity:[105,3,1,""]},"nltk.corpus.reader.wordnet.WordNetCorpusReader":{ADJ:[105,4,1,""],ADJ_SAT:[105,4,1,""],ADV:[105,4,1,""],MORPHOLOGICAL_SUBSTITUTIONS:[105,4,1,""],NOUN:[105,4,1,""],VERB:[105,4,1,""],__init__:[105,3,1,""],all_lemma_names:[105,3,1,""],all_synsets:[105,3,1,""],citation:[105,3,1,""],custom_lemmas:[105,3,1,""],digraph:[105,3,1,""],get_version:[105,3,1,""],ic:[105,3,1,""],jcn_similarity:[105,3,1,""],langs:[105,3,1,""],lch_similarity:[105,3,1,""],lemma:[105,3,1,""],lemma_count:[105,3,1,""],lemma_from_key:[105,3,1,""],lemmas:[105,3,1,""],license:[105,3,1,""],lin_similarity:[105,3,1,""],morphy:[105,3,1,""],of2ss:[105,3,1,""],path_similarity:[105,3,1,""],readme:[105,3,1,""],res_similarity:[105,3,1,""],ss2of:[105,3,1,""],synset:[105,3,1,""],synset_from_pos_and_offset:[105,3,1,""],synset_from_sense_key:[105,3,1,""],synsets:[105,3,1,""],words:[105,3,1,""],wup_similarity:[105,3,1,""]},"nltk.corpus.reader.wordnet.WordNetICCorpusReader":{__init__:[105,3,1,""],ic:[105,3,1,""]},"nltk.corpus.reader.xmldocs":{XMLCorpusReader:[106,2,1,""],XMLCorpusView:[106,2,1,""]},"nltk.corpus.reader.xmldocs.XMLCorpusReader":{__init__:[106,3,1,""],words:[106,3,1,""],xml:[106,3,1,""]},"nltk.corpus.reader.xmldocs.XMLCorpusView":{__init__:[106,3,1,""],handle_elt:[106,3,1,""],read_block:[106,3,1,""]},"nltk.corpus.reader.ycoe":{YCOECorpusReader:[107,2,1,""],YCOEParseCorpusReader:[107,2,1,""],YCOETaggedCorpusReader:[107,2,1,""],documents:[107,7,1,""]},"nltk.corpus.reader.ycoe.YCOECorpusReader":{__init__:[107,3,1,""],documents:[107,3,1,""],fileids:[107,3,1,""],paras:[107,3,1,""],parsed_sents:[107,3,1,""],sents:[107,3,1,""],tagged_paras:[107,3,1,""],tagged_sents:[107,3,1,""],tagged_words:[107,3,1,""],words:[107,3,1,""]},"nltk.corpus.reader.ycoe.YCOETaggedCorpusReader":{__init__:[107,3,1,""]},"nltk.corpus.util":{LazyCorpusLoader:[108,2,1,""]},"nltk.corpus.util.LazyCorpusLoader":{__init__:[108,3,1,""]},"nltk.data":{AUTO_FORMATS:[109,7,1,""],BufferedGzipFile:[109,1,1,""],FORMATS:[109,7,1,""],FileSystemPathPointer:[109,2,1,""],GzipFileSystemPathPointer:[109,2,1,""],LazyLoader:[109,2,1,""],OpenOnDemandZipFile:[109,2,1,""],PathPointer:[109,2,1,""],SeekableUnicodeStreamReader:[109,2,1,""],clear_cache:[109,1,1,""],find:[109,1,1,""],load:[109,1,1,""],path:[109,7,1,""],retrieve:[109,1,1,""],show_cfg:[109,1,1,""]},"nltk.data.FileSystemPathPointer":{__init__:[109,3,1,""],file_size:[109,3,1,""],join:[109,3,1,""],open:[109,3,1,""],path:[109,5,1,""]},"nltk.data.GzipFileSystemPathPointer":{open:[109,3,1,""]},"nltk.data.LazyLoader":{__init__:[109,3,1,""]},"nltk.data.OpenOnDemandZipFile":{__init__:[109,3,1,""],read:[109,3,1,""],write:[109,3,1,""],writestr:[109,3,1,""]},"nltk.data.PathPointer":{file_size:[109,3,1,""],join:[109,3,1,""],open:[109,3,1,""]},"nltk.data.SeekableUnicodeStreamReader":{DEBUG:[109,4,1,""],__init__:[109,3,1,""],bytebuffer:[109,4,1,""],char_seek_forward:[109,3,1,""],close:[109,3,1,""],closed:[109,5,1,""],decode:[109,4,1,""],discard_line:[109,3,1,""],encoding:[109,4,1,""],errors:[109,4,1,""],linebuffer:[109,4,1,""],mode:[109,5,1,""],name:[109,5,1,""],next:[109,3,1,""],read:[109,3,1,""],readline:[109,3,1,""],readlines:[109,3,1,""],seek:[109,3,1,""],stream:[109,4,1,""],tell:[109,3,1,""],xreadlines:[109,3,1,""]},"nltk.decorators":{decorator:[110,1,1,""],getinfo:[110,1,1,""],new_wrapper:[110,1,1,""]},"nltk.downloader":{Collection:[111,2,1,""],Downloader:[111,2,1,""],DownloaderGUI:[111,2,1,""],DownloaderMessage:[111,2,1,""],DownloaderShell:[111,2,1,""],ErrorMessage:[111,2,1,""],FinishCollectionMessage:[111,2,1,""],FinishDownloadMessage:[111,2,1,""],FinishPackageMessage:[111,2,1,""],FinishUnzipMessage:[111,2,1,""],Package:[111,2,1,""],ProgressMessage:[111,2,1,""],SelectDownloadDirMessage:[111,2,1,""],StaleMessage:[111,2,1,""],StartCollectionMessage:[111,2,1,""],StartDownloadMessage:[111,2,1,""],StartPackageMessage:[111,2,1,""],StartUnzipMessage:[111,2,1,""],UpToDateMessage:[111,2,1,""],build_index:[111,1,1,""],download:[111,1,1,""],download_gui:[111,1,1,""],download_shell:[111,1,1,""],md5_hexdigest:[111,1,1,""],unzip:[111,1,1,""],update:[111,1,1,""]},"nltk.downloader.Collection":{__init__:[111,3,1,""],children:[111,4,1,""],fromxml:[111,3,1,""],id:[111,4,1,""],name:[111,4,1,""],packages:[111,4,1,""]},"nltk.downloader.Downloader":{DEFAULT_URL:[111,4,1,""],INDEX_TIMEOUT:[111,4,1,""],INSTALLED:[111,4,1,""],NOT_INSTALLED:[111,4,1,""],PARTIAL:[111,4,1,""],STALE:[111,4,1,""],__init__:[111,3,1,""],clear_status_cache:[111,3,1,""],collections:[111,3,1,""],corpora:[111,3,1,""],default_download_dir:[111,3,1,""],download:[111,3,1,""],download_dir:[111,5,1,""],incr_download:[111,3,1,""],index:[111,3,1,""],info:[111,3,1,""],is_installed:[111,3,1,""],is_stale:[111,3,1,""],list:[111,3,1,""],models:[111,3,1,""],packages:[111,3,1,""],status:[111,3,1,""],update:[111,3,1,""],url:[111,5,1,""],xmlinfo:[111,3,1,""]},"nltk.downloader.DownloaderGUI":{COLUMNS:[111,4,1,""],COLUMN_WEIGHTS:[111,4,1,""],COLUMN_WIDTHS:[111,4,1,""],DEFAULT_COLUMN_WIDTH:[111,4,1,""],HELP:[111,4,1,""],INITIAL_COLUMNS:[111,4,1,""],__init__:[111,3,1,""],about:[111,3,1,""],c:[111,4,1,""],destroy:[111,3,1,""],help:[111,3,1,""],mainloop:[111,3,1,""]},"nltk.downloader.DownloaderShell":{__init__:[111,3,1,""],run:[111,3,1,""]},"nltk.downloader.ErrorMessage":{__init__:[111,3,1,""]},"nltk.downloader.FinishCollectionMessage":{__init__:[111,3,1,""]},"nltk.downloader.FinishDownloadMessage":{__init__:[111,3,1,""]},"nltk.downloader.FinishPackageMessage":{__init__:[111,3,1,""]},"nltk.downloader.FinishUnzipMessage":{__init__:[111,3,1,""]},"nltk.downloader.Package":{__init__:[111,3,1,""],author:[111,4,1,""],checksum:[111,4,1,""],contact:[111,4,1,""],copyright:[111,4,1,""],filename:[111,4,1,""],fromxml:[111,3,1,""],id:[111,4,1,""],license:[111,4,1,""],name:[111,4,1,""],size:[111,4,1,""],subdir:[111,4,1,""],svn_revision:[111,4,1,""],unzip:[111,4,1,""],unzipped_size:[111,4,1,""],url:[111,4,1,""]},"nltk.downloader.ProgressMessage":{__init__:[111,3,1,""]},"nltk.downloader.SelectDownloadDirMessage":{__init__:[111,3,1,""]},"nltk.downloader.StaleMessage":{__init__:[111,3,1,""]},"nltk.downloader.StartCollectionMessage":{__init__:[111,3,1,""]},"nltk.downloader.StartDownloadMessage":{__init__:[111,3,1,""]},"nltk.downloader.StartPackageMessage":{__init__:[111,3,1,""]},"nltk.downloader.StartUnzipMessage":{__init__:[111,3,1,""]},"nltk.downloader.UpToDateMessage":{__init__:[111,3,1,""]},"nltk.draw":{cfg:[113,0,0,"-"],dispersion:[114,0,0,"-"],table:[115,0,0,"-"],tree:[116,0,0,"-"],util:[117,0,0,"-"]},"nltk.draw.cfg":{CFGDemo:[113,2,1,""],CFGEditor:[113,2,1,""],ProductionList:[113,2,1,""],demo2:[113,1,1,""],demo3:[113,1,1,""],demo:[113,1,1,""]},"nltk.draw.cfg.CFGDemo":{__init__:[113,3,1,""],destroy:[113,3,1,""],mainloop:[113,3,1,""],reset_workspace:[113,3,1,""],workspace_markprod:[113,3,1,""]},"nltk.draw.cfg.CFGEditor":{ARROW:[113,4,1,""],__init__:[113,3,1,""]},"nltk.draw.cfg.ProductionList":{ARROW:[113,4,1,""]},"nltk.draw.dispersion":{dispersion_plot:[114,1,1,""]},"nltk.draw.table":{MultiListbox:[115,2,1,""],Table:[115,2,1,""],demo:[115,1,1,""]},"nltk.draw.table.MultiListbox":{"delete":[115,3,1,""],FRAME_CONFIG:[115,4,1,""],LABEL_CONFIG:[115,4,1,""],LISTBOX_CONFIG:[115,4,1,""],__init__:[115,3,1,""],activate:[115,3,1,""],bbox:[115,3,1,""],bind_to_columns:[115,3,1,""],bind_to_labels:[115,3,1,""],bind_to_listboxes:[115,3,1,""],column_labels:[115,5,1,""],column_names:[115,5,1,""],columnconfig:[115,3,1,""],columnconfigure:[115,3,1,""],configure:[115,3,1,""],curselection:[115,3,1,""],get:[115,3,1,""],hide_column:[115,3,1,""],index:[115,3,1,""],insert:[115,3,1,""],itemcget:[115,3,1,""],itemconfig:[115,3,1,""],itemconfigure:[115,3,1,""],listboxes:[115,5,1,""],nearest:[115,3,1,""],rowconfig:[115,3,1,""],rowconfigure:[115,3,1,""],scan_dragto:[115,3,1,""],scan_mark:[115,3,1,""],see:[115,3,1,""],select:[115,3,1,""],select_anchor:[115,3,1,""],select_clear:[115,3,1,""],select_includes:[115,3,1,""],select_set:[115,3,1,""],selection_anchor:[115,3,1,""],selection_clear:[115,3,1,""],selection_includes:[115,3,1,""],selection_set:[115,3,1,""],show_column:[115,3,1,""],size:[115,3,1,""],yview:[115,3,1,""],yview_moveto:[115,3,1,""],yview_scroll:[115,3,1,""]},"nltk.draw.table.Table":{__init__:[115,3,1,""],append:[115,3,1,""],bind:[115,3,1,""],bind_to_columns:[115,3,1,""],bind_to_labels:[115,3,1,""],bind_to_listboxes:[115,3,1,""],clear:[115,3,1,""],column_index:[115,3,1,""],column_names:[115,5,1,""],columnconfig:[115,3,1,""],columnconfigure:[115,3,1,""],extend:[115,3,1,""],focus:[115,3,1,""],grid:[115,3,1,""],hide_column:[115,3,1,""],insert:[115,3,1,""],itemconfig:[115,3,1,""],itemconfigure:[115,3,1,""],pack:[115,3,1,""],rowconfig:[115,3,1,""],rowconfigure:[115,3,1,""],select:[115,3,1,""],selected_row:[115,3,1,""],show_column:[115,3,1,""],sort_by:[115,3,1,""]},"nltk.draw.tree":{TreeSegmentWidget:[116,2,1,""],TreeView:[116,2,1,""],TreeWidget:[116,2,1,""],demo:[116,1,1,""],draw_trees:[116,1,1,""],tree_to_treesegment:[116,1,1,""]},"nltk.draw.tree.TreeSegmentWidget":{__init__:[116,3,1,""],insert_child:[116,3,1,""],label:[116,3,1,""],remove_child:[116,3,1,""],replace_child:[116,3,1,""],set_label:[116,3,1,""],subtrees:[116,3,1,""]},"nltk.draw.tree.TreeView":{__init__:[116,3,1,""],destroy:[116,3,1,""],mainloop:[116,3,1,""],resize:[116,3,1,""]},"nltk.draw.tree.TreeWidget":{__init__:[116,3,1,""],bind_click_leaves:[116,3,1,""],bind_click_nodes:[116,3,1,""],bind_click_trees:[116,3,1,""],bind_drag_leaves:[116,3,1,""],bind_drag_nodes:[116,3,1,""],bind_drag_trees:[116,3,1,""],collapsed_tree:[116,3,1,""],expanded_tree:[116,3,1,""],toggle_collapsed:[116,3,1,""]},"nltk.draw.util":{AbstractContainerWidget:[117,2,1,""],BoxWidget:[117,2,1,""],BracketWidget:[117,2,1,""],CanvasFrame:[117,2,1,""],CanvasWidget:[117,2,1,""],ColorizedList:[117,2,1,""],EntryDialog:[117,2,1,""],MutableOptionMenu:[117,2,1,""],OvalWidget:[117,2,1,""],ParenWidget:[117,2,1,""],ScrollWatcherWidget:[117,2,1,""],SequenceWidget:[117,2,1,""],ShowText:[117,2,1,""],SpaceWidget:[117,2,1,""],StackWidget:[117,2,1,""],SymbolWidget:[117,2,1,""],TextWidget:[117,2,1,""],demo:[117,1,1,""]},"nltk.draw.util.AbstractContainerWidget":{__init__:[117,3,1,""],child:[117,3,1,""],set_child:[117,3,1,""]},"nltk.draw.util.BoxWidget":{__init__:[117,3,1,""]},"nltk.draw.util.BracketWidget":{__init__:[117,3,1,""]},"nltk.draw.util.CanvasFrame":{__init__:[117,3,1,""],add_widget:[117,3,1,""],canvas:[117,3,1,""],destroy:[117,3,1,""],destroy_widget:[117,3,1,""],mainloop:[117,3,1,""],pack:[117,3,1,""],print_to_file:[117,3,1,""],remove_widget:[117,3,1,""],scrollregion:[117,3,1,""]},"nltk.draw.util.CanvasWidget":{__init__:[117,3,1,""],bbox:[117,3,1,""],bind_click:[117,3,1,""],bind_drag:[117,3,1,""],canvas:[117,3,1,""],child_widgets:[117,3,1,""],destroy:[117,3,1,""],height:[117,3,1,""],hidden:[117,3,1,""],hide:[117,3,1,""],manage:[117,3,1,""],move:[117,3,1,""],moveto:[117,3,1,""],parent:[117,3,1,""],show:[117,3,1,""],tags:[117,3,1,""],unbind_click:[117,3,1,""],unbind_drag:[117,3,1,""],update:[117,3,1,""],width:[117,3,1,""]},"nltk.draw.util.ColorizedList":{__init__:[117,3,1,""],add_callback:[117,3,1,""],focus:[117,3,1,""],get:[117,3,1,""],grid:[117,3,1,""],mark:[117,3,1,""],markonly:[117,3,1,""],pack:[117,3,1,""],remove_callback:[117,3,1,""],set:[117,3,1,""],unmark:[117,3,1,""],view:[117,3,1,""]},"nltk.draw.util.EntryDialog":{__init__:[117,3,1,""]},"nltk.draw.util.MutableOptionMenu":{__init__:[117,3,1,""],add:[117,3,1,""],destroy:[117,3,1,""],remove:[117,3,1,""],set:[117,3,1,""]},"nltk.draw.util.OvalWidget":{RATIO:[117,4,1,""],__init__:[117,3,1,""]},"nltk.draw.util.ParenWidget":{__init__:[117,3,1,""]},"nltk.draw.util.ScrollWatcherWidget":{__init__:[117,3,1,""],add_child:[117,3,1,""],remove_child:[117,3,1,""]},"nltk.draw.util.SequenceWidget":{__init__:[117,3,1,""],children:[117,3,1,""],insert_child:[117,3,1,""],remove_child:[117,3,1,""],replace_child:[117,3,1,""]},"nltk.draw.util.ShowText":{__init__:[117,3,1,""],destroy:[117,3,1,""],find_dimentions:[117,3,1,""],mainloop:[117,3,1,""]},"nltk.draw.util.SpaceWidget":{__init__:[117,3,1,""],set_height:[117,3,1,""],set_width:[117,3,1,""]},"nltk.draw.util.StackWidget":{__init__:[117,3,1,""],children:[117,3,1,""],insert_child:[117,3,1,""],remove_child:[117,3,1,""],replace_child:[117,3,1,""]},"nltk.draw.util.SymbolWidget":{SYMBOLS:[117,4,1,""],__init__:[117,3,1,""],set_symbol:[117,3,1,""],symbol:[117,3,1,""],symbolsheet:[117,3,1,""]},"nltk.draw.util.TextWidget":{__init__:[117,3,1,""],set_text:[117,3,1,""],text:[117,3,1,""]},"nltk.featstruct":{FeatDict:[118,2,1,""],FeatList:[118,2,1,""],FeatStruct:[118,2,1,""],FeatStructReader:[118,2,1,""],Feature:[118,2,1,""],RangeFeature:[118,2,1,""],SlashFeature:[118,2,1,""],conflicts:[118,1,1,""],subsumes:[118,1,1,""],unify:[118,1,1,""]},"nltk.featstruct.FeatDict":{__init__:[118,3,1,""],clear:[118,3,1,""],get:[118,3,1,""],has_key:[118,3,1,""],pop:[118,3,1,""],popitem:[118,3,1,""],setdefault:[118,3,1,""],update:[118,3,1,""]},"nltk.featstruct.FeatList":{__init__:[118,3,1,""],append:[118,3,1,""],extend:[118,3,1,""],insert:[118,3,1,""],pop:[118,3,1,""],remove:[118,3,1,""],reverse:[118,3,1,""],sort:[118,3,1,""]},"nltk.featstruct.FeatStruct":{__new__:[118,3,1,""],copy:[118,3,1,""],cyclic:[118,3,1,""],equal_values:[118,3,1,""],freeze:[118,3,1,""],frozen:[118,3,1,""],remove_variables:[118,3,1,""],rename_variables:[118,3,1,""],retract_bindings:[118,3,1,""],substitute_bindings:[118,3,1,""],subsumes:[118,3,1,""],unify:[118,3,1,""],variables:[118,3,1,""],walk:[118,3,1,""]},"nltk.featstruct.FeatStructReader":{VALUE_HANDLERS:[118,4,1,""],__init__:[118,3,1,""],fromstring:[118,3,1,""],read_app_value:[118,3,1,""],read_fstruct_value:[118,3,1,""],read_int_value:[118,3,1,""],read_logic_value:[118,3,1,""],read_partial:[118,3,1,""],read_set_value:[118,3,1,""],read_str_value:[118,3,1,""],read_sym_value:[118,3,1,""],read_tuple_value:[118,3,1,""],read_value:[118,3,1,""],read_var_value:[118,3,1,""]},"nltk.featstruct.Feature":{"default":[118,5,1,""],__init__:[118,3,1,""],display:[118,5,1,""],name:[118,5,1,""],read_value:[118,3,1,""],unify_base_values:[118,3,1,""]},"nltk.featstruct.RangeFeature":{RANGE_RE:[118,4,1,""],read_value:[118,3,1,""],unify_base_values:[118,3,1,""]},"nltk.featstruct.SlashFeature":{read_value:[118,3,1,""]},"nltk.grammar":{CFG:[119,2,1,""],DependencyGrammar:[119,2,1,""],DependencyProduction:[119,2,1,""],Nonterminal:[119,2,1,""],PCFG:[119,2,1,""],ProbabilisticDependencyGrammar:[119,2,1,""],ProbabilisticProduction:[119,2,1,""],Production:[119,2,1,""],induce_pcfg:[119,1,1,""],nonterminals:[119,1,1,""],read_grammar:[119,1,1,""]},"nltk.grammar.CFG":{__init__:[119,3,1,""],binarize:[119,3,1,""],check_coverage:[119,3,1,""],chomsky_normal_form:[119,3,1,""],eliminate_start:[119,3,1,""],fromstring:[119,3,1,""],is_binarised:[119,3,1,""],is_chomsky_normal_form:[119,3,1,""],is_flexible_chomsky_normal_form:[119,3,1,""],is_leftcorner:[119,3,1,""],is_lexical:[119,3,1,""],is_nonempty:[119,3,1,""],is_nonlexical:[119,3,1,""],leftcorner_parents:[119,3,1,""],leftcorners:[119,3,1,""],max_len:[119,3,1,""],min_len:[119,3,1,""],productions:[119,3,1,""],remove_unitary_rules:[119,3,1,""],start:[119,3,1,""]},"nltk.grammar.DependencyGrammar":{__init__:[119,3,1,""],contains:[119,3,1,""],fromstring:[119,3,1,""]},"nltk.grammar.Nonterminal":{__init__:[119,3,1,""],symbol:[119,3,1,""]},"nltk.grammar.PCFG":{EPSILON:[119,4,1,""],__init__:[119,3,1,""],fromstring:[119,3,1,""]},"nltk.grammar.ProbabilisticDependencyGrammar":{__init__:[119,3,1,""],contains:[119,3,1,""]},"nltk.grammar.ProbabilisticProduction":{__init__:[119,3,1,""]},"nltk.grammar.Production":{__init__:[119,3,1,""],is_lexical:[119,3,1,""],is_nonlexical:[119,3,1,""],lhs:[119,3,1,""],rhs:[119,3,1,""]},"nltk.help":{brown_tagset:[120,1,1,""],claws5_tagset:[120,1,1,""],upenn_tagset:[120,1,1,""]},"nltk.inference":{api:[122,0,0,"-"],discourse:[123,0,0,"-"],mace:[124,0,0,"-"],nonmonotonic:[125,0,0,"-"],prover9:[126,0,0,"-"],resolution:[127,0,0,"-"],tableau:[128,0,0,"-"]},"nltk.inference.api":{BaseModelBuilderCommand:[122,2,1,""],BaseProverCommand:[122,2,1,""],BaseTheoremToolCommand:[122,2,1,""],ModelBuilder:[122,2,1,""],ModelBuilderCommand:[122,2,1,""],ModelBuilderCommandDecorator:[122,2,1,""],ParallelProverBuilder:[122,2,1,""],ParallelProverBuilderCommand:[122,2,1,""],Prover:[122,2,1,""],ProverCommand:[122,2,1,""],ProverCommandDecorator:[122,2,1,""],TheoremToolCommand:[122,2,1,""],TheoremToolCommandDecorator:[122,2,1,""],TheoremToolThread:[122,2,1,""]},"nltk.inference.api.BaseModelBuilderCommand":{__init__:[122,3,1,""],build_model:[122,3,1,""],get_model_builder:[122,3,1,""],model:[122,3,1,""]},"nltk.inference.api.BaseProverCommand":{__init__:[122,3,1,""],decorate_proof:[122,3,1,""],get_prover:[122,3,1,""],proof:[122,3,1,""],prove:[122,3,1,""]},"nltk.inference.api.BaseTheoremToolCommand":{__init__:[122,3,1,""],add_assumptions:[122,3,1,""],assumptions:[122,3,1,""],goal:[122,3,1,""],print_assumptions:[122,3,1,""],retract_assumptions:[122,3,1,""]},"nltk.inference.api.ModelBuilder":{build_model:[122,3,1,""]},"nltk.inference.api.ModelBuilderCommand":{build_model:[122,3,1,""],get_model_builder:[122,3,1,""],model:[122,3,1,""]},"nltk.inference.api.ModelBuilderCommandDecorator":{__init__:[122,3,1,""],build_model:[122,3,1,""],get_model_builder:[122,3,1,""],model:[122,3,1,""]},"nltk.inference.api.ParallelProverBuilder":{__init__:[122,3,1,""]},"nltk.inference.api.ParallelProverBuilderCommand":{__init__:[122,3,1,""],build_model:[122,3,1,""],prove:[122,3,1,""]},"nltk.inference.api.Prover":{prove:[122,3,1,""]},"nltk.inference.api.ProverCommand":{get_prover:[122,3,1,""],proof:[122,3,1,""],prove:[122,3,1,""]},"nltk.inference.api.ProverCommandDecorator":{__init__:[122,3,1,""],decorate_proof:[122,3,1,""],get_prover:[122,3,1,""],proof:[122,3,1,""],prove:[122,3,1,""]},"nltk.inference.api.TheoremToolCommand":{add_assumptions:[122,3,1,""],assumptions:[122,3,1,""],goal:[122,3,1,""],print_assumptions:[122,3,1,""],retract_assumptions:[122,3,1,""]},"nltk.inference.api.TheoremToolCommandDecorator":{__init__:[122,3,1,""],add_assumptions:[122,3,1,""],assumptions:[122,3,1,""],goal:[122,3,1,""],print_assumptions:[122,3,1,""],retract_assumptions:[122,3,1,""]},"nltk.inference.api.TheoremToolThread":{__init__:[122,3,1,""],result:[122,5,1,""],run:[122,3,1,""]},"nltk.inference.discourse":{CfgReadingCommand:[123,2,1,""],DiscourseTester:[123,2,1,""],DrtGlueReadingCommand:[123,2,1,""],ReadingCommand:[123,2,1,""],demo:[123,1,1,""],discourse_demo:[123,1,1,""],drt_discourse_demo:[123,1,1,""],load_fol:[123,1,1,""],spacer:[123,1,1,""]},"nltk.inference.discourse.CfgReadingCommand":{__init__:[123,3,1,""],combine_readings:[123,3,1,""],parse_to_readings:[123,3,1,""],to_fol:[123,3,1,""]},"nltk.inference.discourse.DiscourseTester":{__init__:[123,3,1,""],add_background:[123,3,1,""],add_sentence:[123,3,1,""],background:[123,3,1,""],expand_threads:[123,3,1,""],grammar:[123,3,1,""],models:[123,3,1,""],multiply:[123,3,1,""],readings:[123,3,1,""],retract_sentence:[123,3,1,""],sentences:[123,3,1,""]},"nltk.inference.discourse.DrtGlueReadingCommand":{__init__:[123,3,1,""],combine_readings:[123,3,1,""],parse_to_readings:[123,3,1,""],process_thread:[123,3,1,""],to_fol:[123,3,1,""]},"nltk.inference.discourse.ReadingCommand":{combine_readings:[123,3,1,""],parse_to_readings:[123,3,1,""],process_thread:[123,3,1,""],to_fol:[123,3,1,""]},"nltk.inference.mace":{Mace:[124,2,1,""],MaceCommand:[124,2,1,""],decode_result:[124,1,1,""],demo:[124,1,1,""],spacer:[124,1,1,""],test_build_model:[124,1,1,""],test_make_relation_set:[124,1,1,""],test_model_found:[124,1,1,""],test_transform_output:[124,1,1,""]},"nltk.inference.mace.Mace":{__init__:[124,3,1,""]},"nltk.inference.mace.MaceCommand":{__init__:[124,3,1,""],valuation:[124,5,1,""]},"nltk.inference.nonmonotonic":{ClosedDomainProver:[125,2,1,""],ClosedWorldProver:[125,2,1,""],PredHolder:[125,2,1,""],ProverParseError:[125,6,1,""],SetHolder:[125,2,1,""],UniqueNamesProver:[125,2,1,""],closed_domain_demo:[125,1,1,""],closed_world_demo:[125,1,1,""],combination_prover_demo:[125,1,1,""],default_reasoning_demo:[125,1,1,""],demo:[125,1,1,""],get_domain:[125,1,1,""],print_proof:[125,1,1,""],unique_names_demo:[125,1,1,""]},"nltk.inference.nonmonotonic.ClosedDomainProver":{assumptions:[125,3,1,""],goal:[125,3,1,""],replace_quants:[125,3,1,""]},"nltk.inference.nonmonotonic.ClosedWorldProver":{assumptions:[125,3,1,""]},"nltk.inference.nonmonotonic.PredHolder":{__init__:[125,3,1,""],append_prop:[125,3,1,""],append_sig:[125,3,1,""],validate_sig_len:[125,3,1,""]},"nltk.inference.nonmonotonic.UniqueNamesProver":{assumptions:[125,3,1,""]},"nltk.inference.prover9":{Prover9:[126,2,1,""],Prover9Command:[126,2,1,""],Prover9CommandParent:[126,2,1,""],Prover9Exception:[126,6,1,""],Prover9FatalException:[126,6,1,""],Prover9LimitExceededException:[126,6,1,""],Prover9Parent:[126,2,1,""],convert_to_prover9:[126,1,1,""],demo:[126,1,1,""],spacer:[126,1,1,""],test_config:[126,1,1,""],test_convert_to_prover9:[126,1,1,""],test_prove:[126,1,1,""]},"nltk.inference.prover9.Prover9":{__init__:[126,3,1,""],prover9_input:[126,3,1,""]},"nltk.inference.prover9.Prover9Command":{__init__:[126,3,1,""],decorate_proof:[126,3,1,""]},"nltk.inference.prover9.Prover9CommandParent":{print_assumptions:[126,3,1,""]},"nltk.inference.prover9.Prover9Exception":{__init__:[126,3,1,""]},"nltk.inference.prover9.Prover9Parent":{binary_locations:[126,3,1,""],config_prover9:[126,3,1,""],prover9_input:[126,3,1,""]},"nltk.inference.resolution":{BindingDict:[127,2,1,""],BindingException:[127,6,1,""],Clause:[127,2,1,""],DebugObject:[127,2,1,""],ProverParseError:[127,6,1,""],ResolutionProver:[127,2,1,""],ResolutionProverCommand:[127,2,1,""],UnificationException:[127,6,1,""],clausify:[127,1,1,""],demo:[127,1,1,""],most_general_unification:[127,1,1,""],resolution_test:[127,1,1,""],testResolutionProver:[127,1,1,""],test_clausify:[127,1,1,""]},"nltk.inference.resolution.BindingDict":{__init__:[127,3,1,""]},"nltk.inference.resolution.BindingException":{__init__:[127,3,1,""]},"nltk.inference.resolution.Clause":{__init__:[127,3,1,""],free:[127,3,1,""],isSubsetOf:[127,3,1,""],is_tautology:[127,3,1,""],replace:[127,3,1,""],substitute_bindings:[127,3,1,""],subsumes:[127,3,1,""],unify:[127,3,1,""]},"nltk.inference.resolution.DebugObject":{__init__:[127,3,1,""],line:[127,3,1,""]},"nltk.inference.resolution.ResolutionProver":{ANSWER_KEY:[127,4,1,""]},"nltk.inference.resolution.ResolutionProverCommand":{__init__:[127,3,1,""],find_answers:[127,3,1,""],prove:[127,3,1,""]},"nltk.inference.resolution.UnificationException":{__init__:[127,3,1,""]},"nltk.inference.tableau":{Agenda:[128,2,1,""],Categories:[128,2,1,""],Debug:[128,2,1,""],ProverParseError:[128,6,1,""],TableauProver:[128,2,1,""],TableauProverCommand:[128,2,1,""],demo:[128,1,1,""],tableau_test:[128,1,1,""],testHigherOrderTableauProver:[128,1,1,""],testTableauProver:[128,1,1,""]},"nltk.inference.tableau.Agenda":{__init__:[128,3,1,""],clone:[128,3,1,""],mark_alls_fresh:[128,3,1,""],mark_neqs_fresh:[128,3,1,""],pop_first:[128,3,1,""],put:[128,3,1,""],put_all:[128,3,1,""],put_atoms:[128,3,1,""],replace_all:[128,3,1,""]},"nltk.inference.tableau.Categories":{ALL:[128,4,1,""],AND:[128,4,1,""],APP:[128,4,1,""],ATOM:[128,4,1,""],D_NEG:[128,4,1,""],EQ:[128,4,1,""],EXISTS:[128,4,1,""],IFF:[128,4,1,""],IMP:[128,4,1,""],N_ALL:[128,4,1,""],N_AND:[128,4,1,""],N_APP:[128,4,1,""],N_ATOM:[128,4,1,""],N_EQ:[128,4,1,""],N_EXISTS:[128,4,1,""],N_IFF:[128,4,1,""],N_IMP:[128,4,1,""],N_OR:[128,4,1,""],N_PROP:[128,4,1,""],OR:[128,4,1,""],PROP:[128,4,1,""]},"nltk.inference.tableau.Debug":{__init__:[128,3,1,""],line:[128,3,1,""]},"nltk.inference.tableau.TableauProver":{is_atom:[128,3,1,""]},"nltk.inference.tableau.TableauProverCommand":{__init__:[128,3,1,""]},"nltk.internals":{Counter:[129,2,1,""],Deprecated:[129,2,1,""],ElementWrapper:[129,2,1,""],ReadError:[129,6,1,""],config_java:[129,1,1,""],deprecated:[129,1,1,""],find_binary:[129,1,1,""],find_binary_iter:[129,1,1,""],find_dir:[129,1,1,""],find_file:[129,1,1,""],find_file_iter:[129,1,1,""],find_jar:[129,1,1,""],find_jar_iter:[129,1,1,""],find_jars_within_path:[129,1,1,""],import_from_stdlib:[129,1,1,""],is_writable:[129,1,1,""],java:[129,1,1,""],overridden:[129,1,1,""],raise_unorderable_types:[129,1,1,""],read_int:[129,1,1,""],read_number:[129,1,1,""],read_str:[129,1,1,""],slice_bounds:[129,1,1,""]},"nltk.internals.Counter":{__init__:[129,3,1,""],get:[129,3,1,""]},"nltk.internals.Deprecated":{__new__:[129,3,1,""]},"nltk.internals.ElementWrapper":{__init__:[129,3,1,""],__new__:[129,3,1,""],find:[129,3,1,""],findall:[129,3,1,""],getchildren:[129,3,1,""],getiterator:[129,3,1,""],makeelement:[129,3,1,""],unwrap:[129,3,1,""]},"nltk.internals.ReadError":{__init__:[129,3,1,""]},"nltk.jsontags":{JSONTaggedDecoder:[130,2,1,""],JSONTaggedEncoder:[130,2,1,""],register_tag:[130,1,1,""]},"nltk.jsontags.JSONTaggedDecoder":{decode:[130,3,1,""],decode_obj:[130,3,1,""]},"nltk.jsontags.JSONTaggedEncoder":{"default":[130,3,1,""]},"nltk.lazyimport":{LazyModule:[131,2,1,""]},"nltk.lazyimport.LazyModule":{__init__:[131,3,1,""]},"nltk.lm":{AbsoluteDiscountingInterpolated:[132,2,1,""],KneserNeyInterpolated:[132,2,1,""],Laplace:[132,2,1,""],Lidstone:[132,2,1,""],MLE:[132,2,1,""],NgramCounter:[132,2,1,""],StupidBackoff:[132,2,1,""],Vocabulary:[132,2,1,""],WittenBellInterpolated:[132,2,1,""],api:[133,0,0,"-"],counter:[134,0,0,"-"],models:[135,0,0,"-"],preprocessing:[136,0,0,"-"],smoothing:[137,0,0,"-"],util:[138,0,0,"-"],vocabulary:[139,0,0,"-"]},"nltk.lm.AbsoluteDiscountingInterpolated":{__init__:[132,3,1,""]},"nltk.lm.KneserNeyInterpolated":{__init__:[132,3,1,""]},"nltk.lm.Laplace":{__init__:[132,3,1,""]},"nltk.lm.Lidstone":{__init__:[132,3,1,""],unmasked_score:[132,3,1,""]},"nltk.lm.MLE":{unmasked_score:[132,3,1,""]},"nltk.lm.NgramCounter":{N:[132,3,1,""],__init__:[132,3,1,""],update:[132,3,1,""]},"nltk.lm.StupidBackoff":{__init__:[132,3,1,""],unmasked_score:[132,3,1,""]},"nltk.lm.Vocabulary":{__init__:[132,3,1,""],cutoff:[132,5,1,""],lookup:[132,3,1,""],update:[132,3,1,""]},"nltk.lm.WittenBellInterpolated":{__init__:[132,3,1,""]},"nltk.lm.api":{LanguageModel:[133,2,1,""],Smoothing:[133,2,1,""]},"nltk.lm.api.LanguageModel":{__init__:[133,3,1,""],context_counts:[133,3,1,""],entropy:[133,3,1,""],fit:[133,3,1,""],generate:[133,3,1,""],logscore:[133,3,1,""],perplexity:[133,3,1,""],score:[133,3,1,""],unmasked_score:[133,3,1,""]},"nltk.lm.api.Smoothing":{__init__:[133,3,1,""],alpha_gamma:[133,3,1,""],unigram_score:[133,3,1,""]},"nltk.lm.counter":{NgramCounter:[134,2,1,""]},"nltk.lm.counter.NgramCounter":{N:[134,3,1,""],__init__:[134,3,1,""],update:[134,3,1,""]},"nltk.lm.models":{AbsoluteDiscountingInterpolated:[135,2,1,""],InterpolatedLanguageModel:[135,2,1,""],KneserNeyInterpolated:[135,2,1,""],Laplace:[135,2,1,""],Lidstone:[135,2,1,""],MLE:[135,2,1,""],StupidBackoff:[135,2,1,""],WittenBellInterpolated:[135,2,1,""]},"nltk.lm.models.AbsoluteDiscountingInterpolated":{__init__:[135,3,1,""]},"nltk.lm.models.InterpolatedLanguageModel":{__init__:[135,3,1,""],unmasked_score:[135,3,1,""]},"nltk.lm.models.KneserNeyInterpolated":{__init__:[135,3,1,""]},"nltk.lm.models.Laplace":{__init__:[135,3,1,""]},"nltk.lm.models.Lidstone":{__init__:[135,3,1,""],unmasked_score:[135,3,1,""]},"nltk.lm.models.MLE":{unmasked_score:[135,3,1,""]},"nltk.lm.models.StupidBackoff":{__init__:[135,3,1,""],unmasked_score:[135,3,1,""]},"nltk.lm.models.WittenBellInterpolated":{__init__:[135,3,1,""]},"nltk.lm.preprocessing":{flatten:[136,1,1,""],padded_everygram_pipeline:[136,1,1,""],padded_everygrams:[136,1,1,""]},"nltk.lm.smoothing":{AbsoluteDiscounting:[137,2,1,""],KneserNey:[137,2,1,""],WittenBell:[137,2,1,""]},"nltk.lm.smoothing.AbsoluteDiscounting":{__init__:[137,3,1,""],alpha_gamma:[137,3,1,""],unigram_score:[137,3,1,""]},"nltk.lm.smoothing.KneserNey":{__init__:[137,3,1,""],alpha_gamma:[137,3,1,""],unigram_score:[137,3,1,""]},"nltk.lm.smoothing.WittenBell":{__init__:[137,3,1,""],alpha_gamma:[137,3,1,""],unigram_score:[137,3,1,""]},"nltk.lm.util":{log_base2:[138,1,1,""]},"nltk.lm.vocabulary":{Vocabulary:[139,2,1,""]},"nltk.lm.vocabulary.Vocabulary":{__init__:[139,3,1,""],cutoff:[139,5,1,""],lookup:[139,3,1,""],update:[139,3,1,""]},"nltk.metrics":{agreement:[141,0,0,"-"],aline:[142,0,0,"-"],association:[143,0,0,"-"],confusionmatrix:[144,0,0,"-"],distance:[145,0,0,"-"],paice:[146,0,0,"-"],scores:[147,0,0,"-"],segmentation:[148,0,0,"-"],spearman:[149,0,0,"-"]},"nltk.metrics.agreement":{AnnotationTask:[141,2,1,""]},"nltk.metrics.agreement.AnnotationTask":{Ae_kappa:[141,3,1,""],Ao:[141,3,1,""],Disagreement:[141,3,1,""],Do_Kw:[141,3,1,""],Do_Kw_pairwise:[141,3,1,""],N:[141,3,1,""],Nck:[141,3,1,""],Nik:[141,3,1,""],Nk:[141,3,1,""],S:[141,3,1,""],__init__:[141,3,1,""],agr:[141,3,1,""],alpha:[141,3,1,""],avg_Ao:[141,3,1,""],kappa:[141,3,1,""],kappa_pairwise:[141,3,1,""],load_array:[141,3,1,""],multi_kappa:[141,3,1,""],pi:[141,3,1,""],weighted_kappa:[141,3,1,""],weighted_kappa_pairwise:[141,3,1,""]},"nltk.metrics.aline":{R:[142,1,1,""],V:[142,1,1,""],align:[142,1,1,""],delta:[142,1,1,""],demo:[142,1,1,""],diff:[142,1,1,""],sigma_exp:[142,1,1,""],sigma_skip:[142,1,1,""],sigma_sub:[142,1,1,""]},"nltk.metrics.association":{BigramAssocMeasures:[143,2,1,""],ContingencyMeasures:[143,2,1,""],NGRAM:[143,7,1,""],NgramAssocMeasures:[143,2,1,""],QuadgramAssocMeasures:[143,2,1,""],TOTAL:[143,7,1,""],TrigramAssocMeasures:[143,2,1,""],UNIGRAMS:[143,7,1,""],fisher_exact:[143,1,1,""]},"nltk.metrics.association.BigramAssocMeasures":{chi_sq:[143,3,1,""],dice:[143,3,1,""],fisher:[143,3,1,""],phi_sq:[143,3,1,""]},"nltk.metrics.association.ContingencyMeasures":{__init__:[143,3,1,""]},"nltk.metrics.association.NgramAssocMeasures":{chi_sq:[143,3,1,""],jaccard:[143,3,1,""],likelihood_ratio:[143,3,1,""],mi_like:[143,3,1,""],pmi:[143,3,1,""],poisson_stirling:[143,3,1,""],raw_freq:[143,3,1,""],student_t:[143,3,1,""]},"nltk.metrics.confusionmatrix":{ConfusionMatrix:[144,2,1,""],demo:[144,1,1,""]},"nltk.metrics.confusionmatrix.ConfusionMatrix":{__init__:[144,3,1,""],key:[144,3,1,""],pretty_format:[144,3,1,""]},"nltk.metrics.distance":{binary_distance:[145,1,1,""],custom_distance:[145,1,1,""],demo:[145,1,1,""],edit_distance:[145,1,1,""],edit_distance_align:[145,1,1,""],fractional_presence:[145,1,1,""],interval_distance:[145,1,1,""],jaccard_distance:[145,1,1,""],jaro_similarity:[145,1,1,""],jaro_winkler_similarity:[145,1,1,""],masi_distance:[145,1,1,""],presence:[145,1,1,""]},"nltk.metrics.paice":{Paice:[146,2,1,""],demo:[146,1,1,""],get_words_from_dictionary:[146,1,1,""]},"nltk.metrics.paice.Paice":{__init__:[146,3,1,""],update:[146,3,1,""]},"nltk.metrics.scores":{accuracy:[147,1,1,""],approxrand:[147,1,1,""],demo:[147,1,1,""],f_measure:[147,1,1,""],log_likelihood:[147,1,1,""],precision:[147,1,1,""],recall:[147,1,1,""]},"nltk.metrics.segmentation":{ghd:[148,1,1,""],pk:[148,1,1,""],windowdiff:[148,1,1,""]},"nltk.metrics.spearman":{ranks_from_scores:[149,1,1,""],ranks_from_sequence:[149,1,1,""],spearman_correlation:[149,1,1,""]},"nltk.misc":{babelfish:[151,0,0,"-"],chomsky:[152,0,0,"-"],minimalset:[153,0,0,"-"],sort:[154,0,0,"-"],wordfinder:[155,0,0,"-"]},"nltk.misc.babelfish":{babelize_shell:[151,1,1,""]},"nltk.misc.chomsky":{generate_chomsky:[152,1,1,""]},"nltk.misc.minimalset":{MinimalSet:[153,2,1,""]},"nltk.misc.minimalset.MinimalSet":{__init__:[153,3,1,""],add:[153,3,1,""],contexts:[153,3,1,""],display:[153,3,1,""],display_all:[153,3,1,""],targets:[153,3,1,""]},"nltk.misc.sort":{bubble:[154,1,1,""],demo:[154,1,1,""],merge:[154,1,1,""],quick:[154,1,1,""],selection:[154,1,1,""]},"nltk.misc.wordfinder":{check:[155,1,1,""],revword:[155,1,1,""],step:[155,1,1,""],word_finder:[155,1,1,""],wordfinder:[155,1,1,""]},"nltk.parse":{api:[157,0,0,"-"],bllip:[158,0,0,"-"],chart:[159,0,0,"-"],corenlp:[160,0,0,"-"],dependencygraph:[161,0,0,"-"],earleychart:[162,0,0,"-"],evaluate:[163,0,0,"-"],featurechart:[164,0,0,"-"],generate:[165,0,0,"-"],malt:[166,0,0,"-"],nonprojectivedependencyparser:[167,0,0,"-"],pchart:[168,0,0,"-"],projectivedependencyparser:[169,0,0,"-"],recursivedescent:[170,0,0,"-"],shiftreduce:[171,0,0,"-"],stanford:[172,0,0,"-"],transitionparser:[173,0,0,"-"],util:[174,0,0,"-"],viterbi:[175,0,0,"-"]},"nltk.parse.api":{ParserI:[157,2,1,""]},"nltk.parse.api.ParserI":{grammar:[157,3,1,""],parse:[157,3,1,""],parse_all:[157,3,1,""],parse_one:[157,3,1,""],parse_sents:[157,3,1,""]},"nltk.parse.bllip":{BllipParser:[158,2,1,""]},"nltk.parse.bllip.BllipParser":{__init__:[158,3,1,""],from_unified_model_dir:[158,3,1,""],parse:[158,3,1,""],tagged_parse:[158,3,1,""]},"nltk.parse.chart":{AbstractChartRule:[159,2,1,""],BottomUpChartParser:[159,2,1,""],BottomUpLeftCornerChartParser:[159,2,1,""],BottomUpPredictCombineRule:[159,2,1,""],BottomUpPredictRule:[159,2,1,""],CachedTopDownPredictRule:[159,2,1,""],Chart:[159,2,1,""],ChartParser:[159,2,1,""],ChartRuleI:[159,2,1,""],EdgeI:[159,2,1,""],EmptyPredictRule:[159,2,1,""],FilteredBottomUpPredictCombineRule:[159,2,1,""],FilteredSingleEdgeFundamentalRule:[159,2,1,""],FundamentalRule:[159,2,1,""],LeafEdge:[159,2,1,""],LeafInitRule:[159,2,1,""],LeftCornerChartParser:[159,2,1,""],SingleEdgeFundamentalRule:[159,2,1,""],SteppingChartParser:[159,2,1,""],TopDownChartParser:[159,2,1,""],TopDownInitRule:[159,2,1,""],TopDownPredictRule:[159,2,1,""],TreeEdge:[159,2,1,""],demo:[159,1,1,""],demo_grammar:[159,1,1,""]},"nltk.parse.chart.AbstractChartRule":{apply:[159,3,1,""],apply_everywhere:[159,3,1,""]},"nltk.parse.chart.BottomUpChartParser":{__init__:[159,3,1,""]},"nltk.parse.chart.BottomUpLeftCornerChartParser":{__init__:[159,3,1,""]},"nltk.parse.chart.BottomUpPredictCombineRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.BottomUpPredictRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.CachedTopDownPredictRule":{__init__:[159,3,1,""],apply:[159,3,1,""]},"nltk.parse.chart.Chart":{__init__:[159,3,1,""],child_pointer_lists:[159,3,1,""],dot_digraph:[159,3,1,""],edges:[159,3,1,""],initialize:[159,3,1,""],insert:[159,3,1,""],insert_with_backpointer:[159,3,1,""],iteredges:[159,3,1,""],leaf:[159,3,1,""],leaves:[159,3,1,""],num_edges:[159,3,1,""],num_leaves:[159,3,1,""],parses:[159,3,1,""],pretty_format:[159,3,1,""],pretty_format_edge:[159,3,1,""],pretty_format_leaves:[159,3,1,""],select:[159,3,1,""],trees:[159,3,1,""]},"nltk.parse.chart.ChartParser":{__init__:[159,3,1,""],chart_parse:[159,3,1,""],grammar:[159,3,1,""],parse:[159,3,1,""]},"nltk.parse.chart.ChartRuleI":{apply:[159,3,1,""],apply_everywhere:[159,3,1,""]},"nltk.parse.chart.EdgeI":{__init__:[159,3,1,""],dot:[159,3,1,""],end:[159,3,1,""],is_complete:[159,3,1,""],is_incomplete:[159,3,1,""],length:[159,3,1,""],lhs:[159,3,1,""],nextsym:[159,3,1,""],rhs:[159,3,1,""],span:[159,3,1,""],start:[159,3,1,""]},"nltk.parse.chart.EmptyPredictRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.FilteredBottomUpPredictCombineRule":{apply:[159,3,1,""]},"nltk.parse.chart.FundamentalRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.LeafEdge":{__init__:[159,3,1,""],dot:[159,3,1,""],end:[159,3,1,""],is_complete:[159,3,1,""],is_incomplete:[159,3,1,""],length:[159,3,1,""],lhs:[159,3,1,""],nextsym:[159,3,1,""],rhs:[159,3,1,""],span:[159,3,1,""],start:[159,3,1,""]},"nltk.parse.chart.LeafInitRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.LeftCornerChartParser":{__init__:[159,3,1,""]},"nltk.parse.chart.SingleEdgeFundamentalRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.SteppingChartParser":{__init__:[159,3,1,""],chart:[159,3,1,""],current_chartrule:[159,3,1,""],grammar:[159,3,1,""],initialize:[159,3,1,""],parse:[159,3,1,""],parses:[159,3,1,""],set_chart:[159,3,1,""],set_grammar:[159,3,1,""],set_strategy:[159,3,1,""],step:[159,3,1,""],strategy:[159,3,1,""]},"nltk.parse.chart.TopDownChartParser":{__init__:[159,3,1,""]},"nltk.parse.chart.TopDownInitRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.TopDownPredictRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.TreeEdge":{__init__:[159,3,1,""],dot:[159,3,1,""],end:[159,3,1,""],from_production:[159,3,1,""],is_complete:[159,3,1,""],is_incomplete:[159,3,1,""],length:[159,3,1,""],lhs:[159,3,1,""],move_dot_forward:[159,3,1,""],nextsym:[159,3,1,""],rhs:[159,3,1,""],span:[159,3,1,""],start:[159,3,1,""]},"nltk.parse.corenlp":{CoreNLPDependencyParser:[160,2,1,""],CoreNLPParser:[160,2,1,""],CoreNLPServer:[160,2,1,""],CoreNLPServerError:[160,6,1,""],GenericCoreNLPParser:[160,2,1,""],transform:[160,1,1,""],try_port:[160,1,1,""]},"nltk.parse.corenlp.CoreNLPDependencyParser":{make_tree:[160,3,1,""],parser_annotator:[160,4,1,""]},"nltk.parse.corenlp.CoreNLPParser":{make_tree:[160,3,1,""],parser_annotator:[160,4,1,""]},"nltk.parse.corenlp.CoreNLPServer":{__init__:[160,3,1,""],start:[160,3,1,""],stop:[160,3,1,""]},"nltk.parse.corenlp.GenericCoreNLPParser":{__init__:[160,3,1,""],api_call:[160,3,1,""],parse_sents:[160,3,1,""],parse_text:[160,3,1,""],raw_parse:[160,3,1,""],raw_parse_sents:[160,3,1,""],raw_tag_sents:[160,3,1,""],tag:[160,3,1,""],tag_sents:[160,3,1,""],tokenize:[160,3,1,""]},"nltk.parse.dependencygraph":{DependencyGraph:[161,2,1,""],DependencyGraphError:[161,6,1,""],conll_demo:[161,1,1,""],conll_file_demo:[161,1,1,""],cycle_finding_demo:[161,1,1,""],demo:[161,1,1,""],dot2img:[161,1,1,""],malt_demo:[161,1,1,""]},"nltk.parse.dependencygraph.DependencyGraph":{__init__:[161,3,1,""],add_arc:[161,3,1,""],add_node:[161,3,1,""],connect_graph:[161,3,1,""],contains_address:[161,3,1,""],contains_cycle:[161,3,1,""],get_by_address:[161,3,1,""],get_cycle_path:[161,3,1,""],left_children:[161,3,1,""],load:[161,3,1,""],nx_graph:[161,3,1,""],redirect_arcs:[161,3,1,""],remove_by_address:[161,3,1,""],right_children:[161,3,1,""],to_conll:[161,3,1,""],to_dot:[161,3,1,""],tree:[161,3,1,""],triples:[161,3,1,""]},"nltk.parse.earleychart":{CompleteFundamentalRule:[162,2,1,""],CompleterRule:[162,2,1,""],EarleyChartParser:[162,2,1,""],FeatureCompleteFundamentalRule:[162,2,1,""],FeatureCompleterRule:[162,2,1,""],FeatureEarleyChartParser:[162,2,1,""],FeatureIncrementalBottomUpChartParser:[162,2,1,""],FeatureIncrementalBottomUpLeftCornerChartParser:[162,2,1,""],FeatureIncrementalChart:[162,2,1,""],FeatureIncrementalChartParser:[162,2,1,""],FeatureIncrementalTopDownChartParser:[162,2,1,""],FeaturePredictorRule:[162,2,1,""],FeatureScannerRule:[162,2,1,""],FilteredCompleteFundamentalRule:[162,2,1,""],IncrementalBottomUpChartParser:[162,2,1,""],IncrementalBottomUpLeftCornerChartParser:[162,2,1,""],IncrementalChart:[162,2,1,""],IncrementalChartParser:[162,2,1,""],IncrementalLeftCornerChartParser:[162,2,1,""],IncrementalTopDownChartParser:[162,2,1,""],PredictorRule:[162,2,1,""],ScannerRule:[162,2,1,""],demo:[162,1,1,""]},"nltk.parse.earleychart.CompleterRule":{apply:[162,3,1,""]},"nltk.parse.earleychart.EarleyChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.FeatureEarleyChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.FeatureIncrementalBottomUpChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.FeatureIncrementalBottomUpLeftCornerChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.FeatureIncrementalChart":{select:[162,3,1,""]},"nltk.parse.earleychart.FeatureIncrementalChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.FeatureIncrementalTopDownChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.FilteredCompleteFundamentalRule":{apply:[162,3,1,""]},"nltk.parse.earleychart.IncrementalBottomUpChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.IncrementalBottomUpLeftCornerChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.IncrementalChart":{edges:[162,3,1,""],initialize:[162,3,1,""],iteredges:[162,3,1,""],select:[162,3,1,""]},"nltk.parse.earleychart.IncrementalChartParser":{__init__:[162,3,1,""],chart_parse:[162,3,1,""]},"nltk.parse.earleychart.IncrementalLeftCornerChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.IncrementalTopDownChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.ScannerRule":{apply:[162,3,1,""]},"nltk.parse.evaluate":{DependencyEvaluator:[163,2,1,""]},"nltk.parse.evaluate.DependencyEvaluator":{__init__:[163,3,1,""],eval:[163,3,1,""]},"nltk.parse.featurechart":{FeatureBottomUpChartParser:[164,2,1,""],FeatureBottomUpLeftCornerChartParser:[164,2,1,""],FeatureBottomUpPredictCombineRule:[164,2,1,""],FeatureBottomUpPredictRule:[164,2,1,""],FeatureChart:[164,2,1,""],FeatureChartParser:[164,2,1,""],FeatureEmptyPredictRule:[164,2,1,""],FeatureFundamentalRule:[164,2,1,""],FeatureSingleEdgeFundamentalRule:[164,2,1,""],FeatureTopDownChartParser:[164,2,1,""],FeatureTopDownInitRule:[164,2,1,""],FeatureTopDownPredictRule:[164,2,1,""],FeatureTreeEdge:[164,2,1,""],InstantiateVarsChart:[164,2,1,""],demo:[164,1,1,""],demo_grammar:[164,1,1,""],run_profile:[164,1,1,""]},"nltk.parse.featurechart.FeatureBottomUpChartParser":{__init__:[164,3,1,""]},"nltk.parse.featurechart.FeatureBottomUpLeftCornerChartParser":{__init__:[164,3,1,""]},"nltk.parse.featurechart.FeatureBottomUpPredictCombineRule":{apply:[164,3,1,""]},"nltk.parse.featurechart.FeatureBottomUpPredictRule":{apply:[164,3,1,""]},"nltk.parse.featurechart.FeatureChart":{parses:[164,3,1,""],select:[164,3,1,""]},"nltk.parse.featurechart.FeatureChartParser":{__init__:[164,3,1,""]},"nltk.parse.featurechart.FeatureEmptyPredictRule":{apply:[164,3,1,""]},"nltk.parse.featurechart.FeatureFundamentalRule":{apply:[164,3,1,""]},"nltk.parse.featurechart.FeatureTopDownChartParser":{__init__:[164,3,1,""]},"nltk.parse.featurechart.FeatureTopDownInitRule":{apply:[164,3,1,""]},"nltk.parse.featurechart.FeatureTopDownPredictRule":{apply:[164,3,1,""]},"nltk.parse.featurechart.FeatureTreeEdge":{__init__:[164,3,1,""],bindings:[164,3,1,""],from_production:[164,3,1,""],move_dot_forward:[164,3,1,""],next_with_bindings:[164,3,1,""],variables:[164,3,1,""]},"nltk.parse.featurechart.InstantiateVarsChart":{__init__:[164,3,1,""],initialize:[164,3,1,""],insert:[164,3,1,""],inst_vars:[164,3,1,""],instantiate_edge:[164,3,1,""]},"nltk.parse.generate":{demo:[165,1,1,""],generate:[165,1,1,""]},"nltk.parse.malt":{MaltParser:[166,2,1,""],find_malt_model:[166,1,1,""],find_maltparser:[166,1,1,""],malt_regex_tagger:[166,1,1,""]},"nltk.parse.malt.MaltParser":{__init__:[166,3,1,""],generate_malt_command:[166,3,1,""],parse_sents:[166,3,1,""],parse_tagged_sents:[166,3,1,""],train:[166,3,1,""],train_from_file:[166,3,1,""]},"nltk.parse.nonprojectivedependencyparser":{DemoScorer:[167,2,1,""],DependencyScorerI:[167,2,1,""],NaiveBayesDependencyScorer:[167,2,1,""],NonprojectiveDependencyParser:[167,2,1,""],ProbabilisticNonprojectiveParser:[167,2,1,""],demo:[167,1,1,""],hall_demo:[167,1,1,""],nonprojective_conll_parse_demo:[167,1,1,""],rule_based_demo:[167,1,1,""]},"nltk.parse.nonprojectivedependencyparser.DemoScorer":{score:[167,3,1,""],train:[167,3,1,""]},"nltk.parse.nonprojectivedependencyparser.DependencyScorerI":{__init__:[167,3,1,""],score:[167,3,1,""],train:[167,3,1,""]},"nltk.parse.nonprojectivedependencyparser.NaiveBayesDependencyScorer":{__init__:[167,3,1,""],score:[167,3,1,""],train:[167,3,1,""]},"nltk.parse.nonprojectivedependencyparser.NonprojectiveDependencyParser":{__init__:[167,3,1,""],parse:[167,3,1,""]},"nltk.parse.nonprojectivedependencyparser.ProbabilisticNonprojectiveParser":{__init__:[167,3,1,""],best_incoming_arc:[167,3,1,""],collapse_nodes:[167,3,1,""],compute_max_subtract_score:[167,3,1,""],compute_original_indexes:[167,3,1,""],initialize_edge_scores:[167,3,1,""],original_best_arc:[167,3,1,""],parse:[167,3,1,""],train:[167,3,1,""],update_edge_scores:[167,3,1,""]},"nltk.parse.pchart":{BottomUpProbabilisticChartParser:[168,2,1,""],InsideChartParser:[168,2,1,""],LongestChartParser:[168,2,1,""],ProbabilisticBottomUpInitRule:[168,2,1,""],ProbabilisticBottomUpPredictRule:[168,2,1,""],ProbabilisticFundamentalRule:[168,2,1,""],ProbabilisticLeafEdge:[168,2,1,""],ProbabilisticTreeEdge:[168,2,1,""],RandomChartParser:[168,2,1,""],SingleEdgeProbabilisticFundamentalRule:[168,2,1,""],UnsortedChartParser:[168,2,1,""],demo:[168,1,1,""]},"nltk.parse.pchart.BottomUpProbabilisticChartParser":{__init__:[168,3,1,""],grammar:[168,3,1,""],parse:[168,3,1,""],sort_queue:[168,3,1,""],trace:[168,3,1,""]},"nltk.parse.pchart.InsideChartParser":{sort_queue:[168,3,1,""]},"nltk.parse.pchart.LongestChartParser":{sort_queue:[168,3,1,""]},"nltk.parse.pchart.ProbabilisticBottomUpInitRule":{NUM_EDGES:[168,4,1,""],apply:[168,3,1,""]},"nltk.parse.pchart.ProbabilisticBottomUpPredictRule":{NUM_EDGES:[168,4,1,""],apply:[168,3,1,""]},"nltk.parse.pchart.ProbabilisticFundamentalRule":{NUM_EDGES:[168,4,1,""],apply:[168,3,1,""]},"nltk.parse.pchart.ProbabilisticLeafEdge":{prob:[168,3,1,""]},"nltk.parse.pchart.ProbabilisticTreeEdge":{__init__:[168,3,1,""],from_production:[168,3,1,""],prob:[168,3,1,""]},"nltk.parse.pchart.RandomChartParser":{sort_queue:[168,3,1,""]},"nltk.parse.pchart.SingleEdgeProbabilisticFundamentalRule":{NUM_EDGES:[168,4,1,""],apply:[168,3,1,""]},"nltk.parse.pchart.UnsortedChartParser":{sort_queue:[168,3,1,""]},"nltk.parse.projectivedependencyparser":{ChartCell:[169,2,1,""],DependencySpan:[169,2,1,""],ProbabilisticProjectiveDependencyParser:[169,2,1,""],ProjectiveDependencyParser:[169,2,1,""],arity_parse_demo:[169,1,1,""],demo:[169,1,1,""],projective_prob_parse_demo:[169,1,1,""],projective_rule_parse_demo:[169,1,1,""]},"nltk.parse.projectivedependencyparser.ChartCell":{__init__:[169,3,1,""],add:[169,3,1,""]},"nltk.parse.projectivedependencyparser.DependencySpan":{__init__:[169,3,1,""],head_index:[169,3,1,""]},"nltk.parse.projectivedependencyparser.ProbabilisticProjectiveDependencyParser":{__init__:[169,3,1,""],compute_prob:[169,3,1,""],concatenate:[169,3,1,""],parse:[169,3,1,""],train:[169,3,1,""]},"nltk.parse.projectivedependencyparser.ProjectiveDependencyParser":{__init__:[169,3,1,""],concatenate:[169,3,1,""],parse:[169,3,1,""]},"nltk.parse.recursivedescent":{RecursiveDescentParser:[170,2,1,""],SteppingRecursiveDescentParser:[170,2,1,""],demo:[170,1,1,""]},"nltk.parse.recursivedescent.RecursiveDescentParser":{__init__:[170,3,1,""],grammar:[170,3,1,""],parse:[170,3,1,""],trace:[170,3,1,""]},"nltk.parse.recursivedescent.SteppingRecursiveDescentParser":{__init__:[170,3,1,""],backtrack:[170,3,1,""],currently_complete:[170,3,1,""],expand:[170,3,1,""],expandable_productions:[170,3,1,""],frontier:[170,3,1,""],initialize:[170,3,1,""],match:[170,3,1,""],parse:[170,3,1,""],parses:[170,3,1,""],remaining_text:[170,3,1,""],set_grammar:[170,3,1,""],step:[170,3,1,""],tree:[170,3,1,""],untried_expandable_productions:[170,3,1,""],untried_match:[170,3,1,""]},"nltk.parse.shiftreduce":{ShiftReduceParser:[171,2,1,""],SteppingShiftReduceParser:[171,2,1,""],demo:[171,1,1,""]},"nltk.parse.shiftreduce.ShiftReduceParser":{__init__:[171,3,1,""],grammar:[171,3,1,""],parse:[171,3,1,""],trace:[171,3,1,""]},"nltk.parse.shiftreduce.SteppingShiftReduceParser":{__init__:[171,3,1,""],initialize:[171,3,1,""],parse:[171,3,1,""],parses:[171,3,1,""],reduce:[171,3,1,""],reducible_productions:[171,3,1,""],remaining_text:[171,3,1,""],set_grammar:[171,3,1,""],shift:[171,3,1,""],stack:[171,3,1,""],step:[171,3,1,""],undo:[171,3,1,""]},"nltk.parse.stanford":{GenericStanfordParser:[172,2,1,""],StanfordDependencyParser:[172,2,1,""],StanfordNeuralDependencyParser:[172,2,1,""],StanfordParser:[172,2,1,""]},"nltk.parse.stanford.GenericStanfordParser":{__init__:[172,3,1,""],parse_sents:[172,3,1,""],raw_parse:[172,3,1,""],raw_parse_sents:[172,3,1,""],tagged_parse:[172,3,1,""],tagged_parse_sents:[172,3,1,""]},"nltk.parse.stanford.StanfordDependencyParser":{__init__:[172,3,1,""]},"nltk.parse.stanford.StanfordNeuralDependencyParser":{__init__:[172,3,1,""],tagged_parse_sents:[172,3,1,""]},"nltk.parse.stanford.StanfordParser":{__init__:[172,3,1,""]},"nltk.parse.transitionparser":{Configuration:[173,2,1,""],Transition:[173,2,1,""],TransitionParser:[173,2,1,""],demo:[173,1,1,""]},"nltk.parse.transitionparser.Configuration":{__init__:[173,3,1,""],extract_features:[173,3,1,""]},"nltk.parse.transitionparser.Transition":{LEFT_ARC:[173,4,1,""],REDUCE:[173,4,1,""],RIGHT_ARC:[173,4,1,""],SHIFT:[173,4,1,""],__init__:[173,3,1,""],left_arc:[173,3,1,""],reduce:[173,3,1,""],right_arc:[173,3,1,""],shift:[173,3,1,""]},"nltk.parse.transitionparser.TransitionParser":{ARC_EAGER:[173,4,1,""],ARC_STANDARD:[173,4,1,""],__init__:[173,3,1,""],parse:[173,3,1,""],train:[173,3,1,""]},"nltk.parse.util":{TestGrammar:[174,2,1,""],extract_test_sentences:[174,1,1,""],load_parser:[174,1,1,""],taggedsent_to_conll:[174,1,1,""],taggedsents_to_conll:[174,1,1,""]},"nltk.parse.util.TestGrammar":{__init__:[174,3,1,""],run:[174,3,1,""]},"nltk.parse.viterbi":{ViterbiParser:[175,2,1,""],demo:[175,1,1,""]},"nltk.parse.viterbi.ViterbiParser":{__init__:[175,3,1,""],grammar:[175,3,1,""],parse:[175,3,1,""],trace:[175,3,1,""]},"nltk.probability":{ConditionalFreqDist:[176,2,1,""],ConditionalProbDist:[176,2,1,""],ConditionalProbDistI:[176,2,1,""],CrossValidationProbDist:[176,2,1,""],DictionaryConditionalProbDist:[176,2,1,""],DictionaryProbDist:[176,2,1,""],ELEProbDist:[176,2,1,""],FreqDist:[176,2,1,""],HeldoutProbDist:[176,2,1,""],ImmutableProbabilisticMixIn:[176,2,1,""],KneserNeyProbDist:[176,2,1,""],LaplaceProbDist:[176,2,1,""],LidstoneProbDist:[176,2,1,""],MLEProbDist:[176,2,1,""],MutableProbDist:[176,2,1,""],ProbDistI:[176,2,1,""],ProbabilisticMixIn:[176,2,1,""],SimpleGoodTuringProbDist:[176,2,1,""],UniformProbDist:[176,2,1,""],WittenBellProbDist:[176,2,1,""],add_logs:[176,1,1,""],entropy:[176,1,1,""],log_likelihood:[176,1,1,""],sum_logs:[176,1,1,""]},"nltk.probability.ConditionalFreqDist":{N:[176,3,1,""],__init__:[176,3,1,""],conditions:[176,3,1,""],plot:[176,3,1,""],tabulate:[176,3,1,""]},"nltk.probability.ConditionalProbDist":{__init__:[176,3,1,""]},"nltk.probability.ConditionalProbDistI":{__init__:[176,3,1,""],conditions:[176,3,1,""]},"nltk.probability.CrossValidationProbDist":{SUM_TO_ONE:[176,4,1,""],__init__:[176,3,1,""],discount:[176,3,1,""],freqdists:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.probability.DictionaryConditionalProbDist":{__init__:[176,3,1,""]},"nltk.probability.DictionaryProbDist":{__init__:[176,3,1,""],logprob:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.probability.ELEProbDist":{__init__:[176,3,1,""]},"nltk.probability.FreqDist":{B:[176,3,1,""],N:[176,3,1,""],Nr:[176,3,1,""],__init__:[176,3,1,""],copy:[176,3,1,""],freq:[176,3,1,""],hapaxes:[176,3,1,""],max:[176,3,1,""],pformat:[176,3,1,""],plot:[176,3,1,""],pprint:[176,3,1,""],r_Nr:[176,3,1,""],setdefault:[176,3,1,""],tabulate:[176,3,1,""],update:[176,3,1,""]},"nltk.probability.HeldoutProbDist":{SUM_TO_ONE:[176,4,1,""],__init__:[176,3,1,""],base_fdist:[176,3,1,""],discount:[176,3,1,""],heldout_fdist:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.probability.ImmutableProbabilisticMixIn":{set_logprob:[176,3,1,""],set_prob:[176,3,1,""]},"nltk.probability.KneserNeyProbDist":{__init__:[176,3,1,""],discount:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""],set_discount:[176,3,1,""]},"nltk.probability.LaplaceProbDist":{__init__:[176,3,1,""]},"nltk.probability.LidstoneProbDist":{SUM_TO_ONE:[176,4,1,""],__init__:[176,3,1,""],discount:[176,3,1,""],freqdist:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.probability.MLEProbDist":{__init__:[176,3,1,""],freqdist:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.probability.MutableProbDist":{__init__:[176,3,1,""],logprob:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""],update:[176,3,1,""]},"nltk.probability.ProbDistI":{SUM_TO_ONE:[176,4,1,""],__init__:[176,3,1,""],discount:[176,3,1,""],generate:[176,3,1,""],logprob:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.probability.ProbabilisticMixIn":{__init__:[176,3,1,""],logprob:[176,3,1,""],prob:[176,3,1,""],set_logprob:[176,3,1,""],set_prob:[176,3,1,""]},"nltk.probability.SimpleGoodTuringProbDist":{SUM_TO_ONE:[176,4,1,""],__init__:[176,3,1,""],check:[176,3,1,""],discount:[176,3,1,""],find_best_fit:[176,3,1,""],freqdist:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""],smoothedNr:[176,3,1,""]},"nltk.probability.UniformProbDist":{__init__:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.probability.WittenBellProbDist":{__init__:[176,3,1,""],discount:[176,3,1,""],freqdist:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.sem":{boxer:[178,0,0,"-"],chat80:[179,0,0,"-"],cooper_storage:[180,0,0,"-"],drt:[181,0,0,"-"],drt_glue_demo:[182,0,0,"-"],evaluate:[183,0,0,"-"],glue:[184,0,0,"-"],hole:[185,0,0,"-"],lfg:[186,0,0,"-"],linearlogic:[187,0,0,"-"],logic:[188,0,0,"-"],relextract:[189,0,0,"-"],skolemize:[190,0,0,"-"],util:[191,0,0,"-"]},"nltk.sem.boxer":{AbstractBoxerDrs:[178,2,1,""],Boxer:[178,2,1,""],BoxerCard:[178,2,1,""],BoxerDrs:[178,2,1,""],BoxerDrsParser:[178,2,1,""],BoxerEq:[178,2,1,""],BoxerIndexed:[178,2,1,""],BoxerNamed:[178,2,1,""],BoxerNot:[178,2,1,""],BoxerOr:[178,2,1,""],BoxerOutputDrsParser:[178,2,1,""],BoxerPred:[178,2,1,""],BoxerProp:[178,2,1,""],BoxerRel:[178,2,1,""],BoxerWhq:[178,2,1,""],NltkDrtBoxerDrsInterpreter:[178,2,1,""],PassthroughBoxerDrsInterpreter:[178,2,1,""],UnparseableInputException:[178,6,1,""]},"nltk.sem.boxer.AbstractBoxerDrs":{atoms:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""],variable_types:[178,3,1,""],variables:[178,3,1,""]},"nltk.sem.boxer.Boxer":{__init__:[178,3,1,""],interpret:[178,3,1,""],interpret_multi:[178,3,1,""],interpret_multi_sents:[178,3,1,""],interpret_sents:[178,3,1,""],set_bin_dir:[178,3,1,""]},"nltk.sem.boxer.BoxerCard":{__init__:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerDrs":{__init__:[178,3,1,""],atoms:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerDrsParser":{__init__:[178,3,1,""],attempt_adjuncts:[178,3,1,""],get_all_symbols:[178,3,1,""],get_next_token_variable:[178,3,1,""],handle:[178,3,1,""],nullableIntToken:[178,3,1,""]},"nltk.sem.boxer.BoxerEq":{__init__:[178,3,1,""],atoms:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerIndexed":{__init__:[178,3,1,""],atoms:[178,3,1,""]},"nltk.sem.boxer.BoxerNamed":{__init__:[178,3,1,""],change_var:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerNot":{__init__:[178,3,1,""],atoms:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerOr":{__init__:[178,3,1,""],atoms:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerOutputDrsParser":{__init__:[178,3,1,""],attempt_adjuncts:[178,3,1,""],get_all_symbols:[178,3,1,""],handle:[178,3,1,""],handle_condition:[178,3,1,""],handle_drs:[178,3,1,""],parse:[178,3,1,""],parse_condition:[178,3,1,""],parse_drs:[178,3,1,""],parse_index:[178,3,1,""],parse_variable:[178,3,1,""]},"nltk.sem.boxer.BoxerPred":{__init__:[178,3,1,""],change_var:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerProp":{__init__:[178,3,1,""],atoms:[178,3,1,""],clean:[178,3,1,""],referenced_labels:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerRel":{__init__:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerWhq":{__init__:[178,3,1,""],atoms:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.NltkDrtBoxerDrsInterpreter":{__init__:[178,3,1,""],interpret:[178,3,1,""]},"nltk.sem.boxer.PassthroughBoxerDrsInterpreter":{interpret:[178,3,1,""]},"nltk.sem.chat80":{Concept:[179,2,1,""],binary_concept:[179,1,1,""],cities2table:[179,1,1,""],clause2concepts:[179,1,1,""],concepts:[179,1,1,""],label_indivs:[179,1,1,""],main:[179,1,1,""],make_lex:[179,1,1,""],make_valuation:[179,1,1,""],process_bundle:[179,1,1,""],sql_demo:[179,1,1,""],sql_query:[179,1,1,""],unary_concept:[179,1,1,""],val_dump:[179,1,1,""],val_load:[179,1,1,""]},"nltk.sem.chat80.Concept":{__init__:[179,3,1,""],augment:[179,3,1,""],close:[179,3,1,""]},"nltk.sem.cooper_storage":{CooperStore:[180,2,1,""],demo:[180,1,1,""],parse_with_bindops:[180,1,1,""]},"nltk.sem.cooper_storage.CooperStore":{__init__:[180,3,1,""],s_retrieve:[180,3,1,""]},"nltk.sem.drt":{AnaphoraResolutionException:[181,6,1,""],DRS:[181,2,1,""],DrsDrawer:[181,2,1,""],DrtAbstractVariableExpression:[181,2,1,""],DrtApplicationExpression:[181,2,1,""],DrtBinaryExpression:[181,2,1,""],DrtBooleanExpression:[181,2,1,""],DrtConcatenation:[181,2,1,""],DrtConstantExpression:[181,2,1,""],DrtEqualityExpression:[181,2,1,""],DrtEventVariableExpression:[181,2,1,""],DrtExpression:[181,2,1,""],DrtFunctionVariableExpression:[181,2,1,""],DrtIndividualVariableExpression:[181,2,1,""],DrtLambdaExpression:[181,2,1,""],DrtNegatedExpression:[181,2,1,""],DrtOrExpression:[181,2,1,""],DrtParser:[181,2,1,""],DrtProposition:[181,2,1,""],DrtTokens:[181,2,1,""],DrtVariableExpression:[181,1,1,""],PossibleAntecedents:[181,2,1,""],demo:[181,1,1,""],resolve_anaphora:[181,1,1,""],test_draw:[181,1,1,""]},"nltk.sem.drt.DRS":{__init__:[181,3,1,""],eliminate_equality:[181,3,1,""],fol:[181,3,1,""],free:[181,3,1,""],get_refs:[181,3,1,""],replace:[181,3,1,""],visit:[181,3,1,""],visit_structured:[181,3,1,""]},"nltk.sem.drt.DrsDrawer":{BUFFER:[181,4,1,""],OUTERSPACE:[181,4,1,""],TOPSPACE:[181,4,1,""],__init__:[181,3,1,""],draw:[181,3,1,""]},"nltk.sem.drt.DrtAbstractVariableExpression":{eliminate_equality:[181,3,1,""],fol:[181,3,1,""],get_refs:[181,3,1,""]},"nltk.sem.drt.DrtApplicationExpression":{fol:[181,3,1,""],get_refs:[181,3,1,""]},"nltk.sem.drt.DrtBinaryExpression":{get_refs:[181,3,1,""]},"nltk.sem.drt.DrtConcatenation":{__init__:[181,3,1,""],eliminate_equality:[181,3,1,""],fol:[181,3,1,""],getOp:[181,3,1,""],get_refs:[181,3,1,""],replace:[181,3,1,""],simplify:[181,3,1,""],visit:[181,3,1,""]},"nltk.sem.drt.DrtEqualityExpression":{fol:[181,3,1,""]},"nltk.sem.drt.DrtExpression":{applyto:[181,3,1,""],draw:[181,3,1,""],eliminate_equality:[181,3,1,""],equiv:[181,3,1,""],fromstring:[181,3,1,""],get_refs:[181,3,1,""],is_pronoun_function:[181,3,1,""],make_EqualityExpression:[181,3,1,""],make_VariableExpression:[181,3,1,""],pretty_format:[181,3,1,""],pretty_print:[181,3,1,""],resolve_anaphora:[181,3,1,""],type:[181,5,1,""],typecheck:[181,3,1,""]},"nltk.sem.drt.DrtLambdaExpression":{alpha_convert:[181,3,1,""],fol:[181,3,1,""],get_refs:[181,3,1,""]},"nltk.sem.drt.DrtNegatedExpression":{fol:[181,3,1,""],get_refs:[181,3,1,""]},"nltk.sem.drt.DrtOrExpression":{fol:[181,3,1,""]},"nltk.sem.drt.DrtParser":{__init__:[181,3,1,""],get_BooleanExpression_factory:[181,3,1,""],get_all_symbols:[181,3,1,""],handle:[181,3,1,""],handle_DRS:[181,3,1,""],handle_conds:[181,3,1,""],handle_prop:[181,3,1,""],handle_refs:[181,3,1,""],isvariable:[181,3,1,""],make_ApplicationExpression:[181,3,1,""],make_BooleanExpression:[181,3,1,""],make_EqualityExpression:[181,3,1,""],make_LambdaExpression:[181,3,1,""],make_NegatedExpression:[181,3,1,""],make_VariableExpression:[181,3,1,""]},"nltk.sem.drt.DrtProposition":{__init__:[181,3,1,""],eliminate_equality:[181,3,1,""],fol:[181,3,1,""],get_refs:[181,3,1,""],replace:[181,3,1,""],visit:[181,3,1,""],visit_structured:[181,3,1,""]},"nltk.sem.drt.DrtTokens":{CLOSE_BRACKET:[181,4,1,""],COLON:[181,4,1,""],DRS:[181,4,1,""],DRS_CONC:[181,4,1,""],OPEN_BRACKET:[181,4,1,""],PRONOUN:[181,4,1,""],PUNCT:[181,4,1,""],SYMBOLS:[181,4,1,""],TOKENS:[181,4,1,""]},"nltk.sem.drt.PossibleAntecedents":{free:[181,3,1,""],replace:[181,3,1,""]},"nltk.sem.drt_glue_demo":{DrsWidget:[182,2,1,""],DrtGlueDemo:[182,2,1,""],demo:[182,1,1,""]},"nltk.sem.drt_glue_demo.DrsWidget":{__init__:[182,3,1,""],clear:[182,3,1,""],draw:[182,3,1,""]},"nltk.sem.drt_glue_demo.DrtGlueDemo":{__init__:[182,3,1,""],about:[182,3,1,""],destroy:[182,3,1,""],mainloop:[182,3,1,""],next:[182,3,1,""],postscript:[182,3,1,""],prev:[182,3,1,""],resize:[182,3,1,""]},"nltk.sem.evaluate":{Assignment:[183,2,1,""],Error:[183,6,1,""],Model:[183,2,1,""],Undefined:[183,6,1,""],Valuation:[183,2,1,""],arity:[183,1,1,""],demo:[183,1,1,""],foldemo:[183,1,1,""],folmodel:[183,1,1,""],is_rel:[183,1,1,""],propdemo:[183,1,1,""],read_valuation:[183,1,1,""],satdemo:[183,1,1,""],set2rel:[183,1,1,""],trace:[183,1,1,""]},"nltk.sem.evaluate.Assignment":{__init__:[183,3,1,""],add:[183,3,1,""],copy:[183,3,1,""],purge:[183,3,1,""]},"nltk.sem.evaluate.Model":{__init__:[183,3,1,""],evaluate:[183,3,1,""],i:[183,3,1,""],satisfiers:[183,3,1,""],satisfy:[183,3,1,""]},"nltk.sem.evaluate.Valuation":{__init__:[183,3,1,""],domain:[183,5,1,""],fromstring:[183,3,1,""],symbols:[183,5,1,""]},"nltk.sem.glue":{DrtGlue:[184,2,1,""],DrtGlueDict:[184,2,1,""],DrtGlueFormula:[184,2,1,""],Glue:[184,2,1,""],GlueDict:[184,2,1,""],GlueFormula:[184,2,1,""],demo:[184,1,1,""]},"nltk.sem.glue.DrtGlue":{__init__:[184,3,1,""],get_glue_dict:[184,3,1,""]},"nltk.sem.glue.DrtGlueDict":{get_GlueFormula_factory:[184,3,1,""]},"nltk.sem.glue.DrtGlueFormula":{__init__:[184,3,1,""],make_LambdaExpression:[184,3,1,""],make_VariableExpression:[184,3,1,""]},"nltk.sem.glue.Glue":{__init__:[184,3,1,""],dep_parse:[184,3,1,""],depgraph_to_glue:[184,3,1,""],get_glue_dict:[184,3,1,""],get_pos_tagger:[184,3,1,""],get_readings:[184,3,1,""],gfl_to_compiled:[184,3,1,""],parse_to_compiled:[184,3,1,""],parse_to_meaning:[184,3,1,""],train_depparser:[184,3,1,""]},"nltk.sem.glue.GlueDict":{__init__:[184,3,1,""],add_missing_dependencies:[184,3,1,""],find_label_name:[184,3,1,""],get_GlueFormula_factory:[184,3,1,""],get_glueformulas_from_semtype_entry:[184,3,1,""],get_label:[184,3,1,""],get_meaning_formula:[184,3,1,""],get_semtypes:[184,3,1,""],initialize_labels:[184,3,1,""],lookup:[184,3,1,""],lookup_unique:[184,3,1,""],read_file:[184,3,1,""],to_glueformula_list:[184,3,1,""]},"nltk.sem.glue.GlueFormula":{__init__:[184,3,1,""],applyto:[184,3,1,""],compile:[184,3,1,""],lambda_abstract:[184,3,1,""],make_LambdaExpression:[184,3,1,""],make_VariableExpression:[184,3,1,""],simplify:[184,3,1,""]},"nltk.sem.hole":{Constants:[185,2,1,""],Constraint:[185,2,1,""],HoleSemantics:[185,2,1,""],hole_readings:[185,1,1,""]},"nltk.sem.hole.Constants":{ALL:[185,4,1,""],AND:[185,4,1,""],EXISTS:[185,4,1,""],HOLE:[185,4,1,""],IFF:[185,4,1,""],IMP:[185,4,1,""],LABEL:[185,4,1,""],LEQ:[185,4,1,""],MAP:[185,4,1,""],NOT:[185,4,1,""],OR:[185,4,1,""],PRED:[185,4,1,""]},"nltk.sem.hole.Constraint":{__init__:[185,3,1,""]},"nltk.sem.hole.HoleSemantics":{__init__:[185,3,1,""],formula_tree:[185,3,1,""],is_node:[185,3,1,""],pluggings:[185,3,1,""]},"nltk.sem.lfg":{FStructure:[186,2,1,""],demo_read_depgraph:[186,1,1,""]},"nltk.sem.lfg.FStructure":{pretty_format:[186,3,1,""],read_depgraph:[186,3,1,""],safeappend:[186,3,1,""],to_depgraph:[186,3,1,""],to_glueformula_list:[186,3,1,""]},"nltk.sem.linearlogic":{ApplicationExpression:[187,2,1,""],AtomicExpression:[187,2,1,""],BindingDict:[187,2,1,""],ConstantExpression:[187,2,1,""],Expression:[187,2,1,""],ImpExpression:[187,2,1,""],LinearLogicApplicationException:[187,6,1,""],LinearLogicParser:[187,2,1,""],Tokens:[187,2,1,""],UnificationException:[187,6,1,""],VariableBindingException:[187,6,1,""],VariableExpression:[187,2,1,""],demo:[187,1,1,""]},"nltk.sem.linearlogic.ApplicationExpression":{__init__:[187,3,1,""],simplify:[187,3,1,""]},"nltk.sem.linearlogic.AtomicExpression":{__init__:[187,3,1,""],compile_neg:[187,3,1,""],compile_pos:[187,3,1,""],initialize_labels:[187,3,1,""],simplify:[187,3,1,""]},"nltk.sem.linearlogic.BindingDict":{__init__:[187,3,1,""]},"nltk.sem.linearlogic.ConstantExpression":{unify:[187,3,1,""]},"nltk.sem.linearlogic.Expression":{applyto:[187,3,1,""],fromstring:[187,3,1,""]},"nltk.sem.linearlogic.ImpExpression":{__init__:[187,3,1,""],compile_neg:[187,3,1,""],compile_pos:[187,3,1,""],initialize_labels:[187,3,1,""],simplify:[187,3,1,""],unify:[187,3,1,""]},"nltk.sem.linearlogic.LinearLogicParser":{__init__:[187,3,1,""],attempt_ApplicationExpression:[187,3,1,""],get_BooleanExpression_factory:[187,3,1,""],get_all_symbols:[187,3,1,""],handle:[187,3,1,""],make_BooleanExpression:[187,3,1,""],make_VariableExpression:[187,3,1,""]},"nltk.sem.linearlogic.Tokens":{CLOSE:[187,4,1,""],IMP:[187,4,1,""],OPEN:[187,4,1,""],PUNCT:[187,4,1,""],TOKENS:[187,4,1,""]},"nltk.sem.linearlogic.UnificationException":{__init__:[187,3,1,""]},"nltk.sem.linearlogic.VariableExpression":{unify:[187,3,1,""]},"nltk.sem.logic":{AbstractVariableExpression:[188,2,1,""],AllExpression:[188,2,1,""],AndExpression:[188,2,1,""],AnyType:[188,2,1,""],ApplicationExpression:[188,2,1,""],BasicType:[188,2,1,""],BinaryExpression:[188,2,1,""],BooleanExpression:[188,2,1,""],ComplexType:[188,2,1,""],ConstantExpression:[188,2,1,""],EntityType:[188,2,1,""],EqualityExpression:[188,2,1,""],EventType:[188,2,1,""],EventVariableExpression:[188,2,1,""],ExistsExpression:[188,2,1,""],ExpectedMoreTokensException:[188,6,1,""],Expression:[188,2,1,""],FunctionVariableExpression:[188,2,1,""],IffExpression:[188,2,1,""],IllegalTypeException:[188,6,1,""],ImpExpression:[188,2,1,""],InconsistentTypeHierarchyException:[188,6,1,""],IndividualVariableExpression:[188,2,1,""],LambdaExpression:[188,2,1,""],LogicParser:[188,2,1,""],LogicalExpressionException:[188,6,1,""],NegatedExpression:[188,2,1,""],OrExpression:[188,2,1,""],QuantifiedExpression:[188,2,1,""],SubstituteBindingsI:[188,2,1,""],Tokens:[188,2,1,""],TruthValueType:[188,2,1,""],Type:[188,2,1,""],TypeException:[188,6,1,""],TypeResolutionException:[188,6,1,""],UnexpectedTokenException:[188,6,1,""],Variable:[188,2,1,""],VariableBinderExpression:[188,2,1,""],VariableExpression:[188,1,1,""],binding_ops:[188,1,1,""],boolean_ops:[188,1,1,""],demo:[188,1,1,""],demoException:[188,1,1,""],demo_errors:[188,1,1,""],equality_preds:[188,1,1,""],is_eventvar:[188,1,1,""],is_funcvar:[188,1,1,""],is_indvar:[188,1,1,""],printtype:[188,1,1,""],read_logic:[188,1,1,""],read_type:[188,1,1,""],skolem_function:[188,1,1,""],typecheck:[188,1,1,""],unique_variable:[188,1,1,""]},"nltk.sem.logic.AbstractVariableExpression":{__init__:[188,3,1,""],findtype:[188,3,1,""],predicates:[188,3,1,""],replace:[188,3,1,""],simplify:[188,3,1,""]},"nltk.sem.logic.AllExpression":{getQuantifier:[188,3,1,""]},"nltk.sem.logic.AndExpression":{getOp:[188,3,1,""]},"nltk.sem.logic.AnyType":{__init__:[188,3,1,""],first:[188,5,1,""],matches:[188,3,1,""],resolve:[188,3,1,""],second:[188,5,1,""],str:[188,3,1,""]},"nltk.sem.logic.ApplicationExpression":{__init__:[188,3,1,""],args:[188,5,1,""],constants:[188,3,1,""],findtype:[188,3,1,""],is_atom:[188,3,1,""],pred:[188,5,1,""],predicates:[188,3,1,""],simplify:[188,3,1,""],type:[188,5,1,""],uncurry:[188,3,1,""],visit:[188,3,1,""]},"nltk.sem.logic.BasicType":{matches:[188,3,1,""],resolve:[188,3,1,""]},"nltk.sem.logic.BinaryExpression":{__init__:[188,3,1,""],findtype:[188,3,1,""],type:[188,5,1,""],visit:[188,3,1,""]},"nltk.sem.logic.ComplexType":{__init__:[188,3,1,""],matches:[188,3,1,""],resolve:[188,3,1,""],str:[188,3,1,""]},"nltk.sem.logic.ConstantExpression":{constants:[188,3,1,""],free:[188,3,1,""],type:[188,4,1,""]},"nltk.sem.logic.EntityType":{str:[188,3,1,""]},"nltk.sem.logic.EqualityExpression":{getOp:[188,3,1,""]},"nltk.sem.logic.EventType":{str:[188,3,1,""]},"nltk.sem.logic.EventVariableExpression":{type:[188,4,1,""]},"nltk.sem.logic.ExistsExpression":{getQuantifier:[188,3,1,""]},"nltk.sem.logic.ExpectedMoreTokensException":{__init__:[188,3,1,""]},"nltk.sem.logic.Expression":{applyto:[188,3,1,""],constants:[188,3,1,""],equiv:[188,3,1,""],findtype:[188,3,1,""],free:[188,3,1,""],fromstring:[188,3,1,""],make_VariableExpression:[188,3,1,""],negate:[188,3,1,""],normalize:[188,3,1,""],predicates:[188,3,1,""],replace:[188,3,1,""],simplify:[188,3,1,""],substitute_bindings:[188,3,1,""],typecheck:[188,3,1,""],variables:[188,3,1,""],visit:[188,3,1,""],visit_structured:[188,3,1,""]},"nltk.sem.logic.FunctionVariableExpression":{constants:[188,3,1,""],free:[188,3,1,""],type:[188,4,1,""]},"nltk.sem.logic.IffExpression":{getOp:[188,3,1,""]},"nltk.sem.logic.IllegalTypeException":{__init__:[188,3,1,""]},"nltk.sem.logic.ImpExpression":{getOp:[188,3,1,""]},"nltk.sem.logic.InconsistentTypeHierarchyException":{__init__:[188,3,1,""]},"nltk.sem.logic.IndividualVariableExpression":{constants:[188,3,1,""],free:[188,3,1,""],type:[188,5,1,""]},"nltk.sem.logic.LambdaExpression":{type:[188,5,1,""]},"nltk.sem.logic.LogicParser":{__init__:[188,3,1,""],assertNextToken:[188,3,1,""],assertToken:[188,3,1,""],attempt_ApplicationExpression:[188,3,1,""],attempt_BooleanExpression:[188,3,1,""],attempt_EqualityExpression:[188,3,1,""],attempt_adjuncts:[188,3,1,""],get_BooleanExpression_factory:[188,3,1,""],get_QuantifiedExpression_factory:[188,3,1,""],get_all_symbols:[188,3,1,""],get_next_token_variable:[188,3,1,""],handle:[188,3,1,""],handle_lambda:[188,3,1,""],handle_negation:[188,3,1,""],handle_open:[188,3,1,""],handle_quant:[188,3,1,""],handle_variable:[188,3,1,""],has_priority:[188,3,1,""],inRange:[188,3,1,""],isvariable:[188,3,1,""],make_ApplicationExpression:[188,3,1,""],make_BooleanExpression:[188,3,1,""],make_EqualityExpression:[188,3,1,""],make_LambdaExpression:[188,3,1,""],make_NegatedExpression:[188,3,1,""],make_QuanifiedExpression:[188,3,1,""],make_VariableExpression:[188,3,1,""],parse:[188,3,1,""],process:[188,3,1,""],process_next_expression:[188,3,1,""],process_quoted_token:[188,3,1,""],token:[188,3,1,""],type_check:[188,4,1,""]},"nltk.sem.logic.LogicalExpressionException":{__init__:[188,3,1,""]},"nltk.sem.logic.NegatedExpression":{__init__:[188,3,1,""],findtype:[188,3,1,""],negate:[188,3,1,""],type:[188,5,1,""],visit:[188,3,1,""]},"nltk.sem.logic.OrExpression":{getOp:[188,3,1,""]},"nltk.sem.logic.QuantifiedExpression":{type:[188,5,1,""]},"nltk.sem.logic.SubstituteBindingsI":{substitute_bindings:[188,3,1,""],variables:[188,3,1,""]},"nltk.sem.logic.Tokens":{ALL:[188,4,1,""],ALL_LIST:[188,4,1,""],AND:[188,4,1,""],AND_LIST:[188,4,1,""],BINOPS:[188,4,1,""],CLOSE:[188,4,1,""],COMMA:[188,4,1,""],DOT:[188,4,1,""],EQ:[188,4,1,""],EQ_LIST:[188,4,1,""],EXISTS:[188,4,1,""],EXISTS_LIST:[188,4,1,""],IFF:[188,4,1,""],IFF_LIST:[188,4,1,""],IMP:[188,4,1,""],IMP_LIST:[188,4,1,""],LAMBDA:[188,4,1,""],LAMBDA_LIST:[188,4,1,""],NEQ:[188,4,1,""],NEQ_LIST:[188,4,1,""],NOT:[188,4,1,""],NOT_LIST:[188,4,1,""],OPEN:[188,4,1,""],OR:[188,4,1,""],OR_LIST:[188,4,1,""],PUNCT:[188,4,1,""],QUANTS:[188,4,1,""],SYMBOLS:[188,4,1,""],TOKENS:[188,4,1,""]},"nltk.sem.logic.TruthValueType":{str:[188,3,1,""]},"nltk.sem.logic.Type":{fromstring:[188,3,1,""]},"nltk.sem.logic.TypeException":{__init__:[188,3,1,""]},"nltk.sem.logic.TypeResolutionException":{__init__:[188,3,1,""]},"nltk.sem.logic.UnexpectedTokenException":{__init__:[188,3,1,""]},"nltk.sem.logic.Variable":{__init__:[188,3,1,""],substitute_bindings:[188,3,1,""]},"nltk.sem.logic.VariableBinderExpression":{__init__:[188,3,1,""],alpha_convert:[188,3,1,""],findtype:[188,3,1,""],free:[188,3,1,""],replace:[188,3,1,""],visit:[188,3,1,""],visit_structured:[188,3,1,""]},"nltk.sem.relextract":{class_abbrev:[189,1,1,""],clause:[189,1,1,""],conllesp:[189,1,1,""],conllned:[189,1,1,""],descape_entity:[189,1,1,""],extract_rels:[189,1,1,""],ieer_headlines:[189,1,1,""],in_demo:[189,1,1,""],list2sym:[189,1,1,""],ne_chunked:[189,1,1,""],roles_demo:[189,1,1,""],rtuple:[189,1,1,""],semi_rel2reldict:[189,1,1,""],tree2semi_rel:[189,1,1,""]},"nltk.sem.skolemize":{skolemize:[190,1,1,""],to_cnf:[190,1,1,""]},"nltk.sem.util":{demo:[191,1,1,""],demo_legacy_grammar:[191,1,1,""],demo_model0:[191,1,1,""],evaluate_sents:[191,1,1,""],interpret_sents:[191,1,1,""],parse_sents:[191,1,1,""],read_sents:[191,1,1,""],root_semrep:[191,1,1,""]},"nltk.sentiment":{sentiment_analyzer:[193,0,0,"-"],util:[194,0,0,"-"],vader:[195,0,0,"-"]},"nltk.sentiment.sentiment_analyzer":{SentimentAnalyzer:[193,2,1,""]},"nltk.sentiment.sentiment_analyzer.SentimentAnalyzer":{__init__:[193,3,1,""],add_feat_extractor:[193,3,1,""],all_words:[193,3,1,""],apply_features:[193,3,1,""],bigram_collocation_feats:[193,3,1,""],classify:[193,3,1,""],evaluate:[193,3,1,""],extract_features:[193,3,1,""],save_file:[193,3,1,""],train:[193,3,1,""],unigram_word_feats:[193,3,1,""]},"nltk.sentiment.util":{demo_liu_hu_lexicon:[194,1,1,""],demo_movie_reviews:[194,1,1,""],demo_sent_subjectivity:[194,1,1,""],demo_subjectivity:[194,1,1,""],demo_tweets:[194,1,1,""],demo_vader_instance:[194,1,1,""],demo_vader_tweets:[194,1,1,""],extract_bigram_feats:[194,1,1,""],extract_unigram_feats:[194,1,1,""],json2csv_preprocess:[194,1,1,""],mark_negation:[194,1,1,""],output_markdown:[194,1,1,""],parse_tweets_set:[194,1,1,""],split_train_test:[194,1,1,""],timer:[194,1,1,""]},"nltk.sentiment.vader":{SentiText:[195,2,1,""],SentimentIntensityAnalyzer:[195,2,1,""],VaderConstants:[195,2,1,""]},"nltk.sentiment.vader.SentiText":{__init__:[195,3,1,""],allcap_differential:[195,3,1,""]},"nltk.sentiment.vader.SentimentIntensityAnalyzer":{__init__:[195,3,1,""],make_lex_dict:[195,3,1,""],polarity_scores:[195,3,1,""],score_valence:[195,3,1,""],sentiment_valence:[195,3,1,""]},"nltk.sentiment.vader.VaderConstants":{BOOSTER_DICT:[195,4,1,""],B_DECR:[195,4,1,""],B_INCR:[195,4,1,""],C_INCR:[195,4,1,""],NEGATE:[195,4,1,""],N_SCALAR:[195,4,1,""],PUNC_LIST:[195,4,1,""],REGEX_REMOVE_PUNCTUATION:[195,4,1,""],SPECIAL_CASE_IDIOMS:[195,4,1,""],__init__:[195,3,1,""],negated:[195,3,1,""],normalize:[195,3,1,""],scalar_inc_dec:[195,3,1,""]},"nltk.stem":{api:[197,0,0,"-"],arlstem2:[199,0,0,"-"],arlstem:[198,0,0,"-"],cistem:[200,0,0,"-"],isri:[201,0,0,"-"],lancaster:[202,0,0,"-"],porter:[203,0,0,"-"],regexp:[204,0,0,"-"],rslp:[205,0,0,"-"],snowball:[206,0,0,"-"],util:[207,0,0,"-"],wordnet:[208,0,0,"-"]},"nltk.stem.api":{StemmerI:[197,2,1,""]},"nltk.stem.api.StemmerI":{stem:[197,3,1,""]},"nltk.stem.arlstem":{ARLSTem:[198,2,1,""]},"nltk.stem.arlstem.ARLSTem":{__init__:[198,3,1,""],fem2masc:[198,3,1,""],norm:[198,3,1,""],plur2sing:[198,3,1,""],pref:[198,3,1,""],stem:[198,3,1,""],suff:[198,3,1,""],verb:[198,3,1,""],verb_t1:[198,3,1,""],verb_t2:[198,3,1,""],verb_t3:[198,3,1,""],verb_t4:[198,3,1,""],verb_t5:[198,3,1,""],verb_t6:[198,3,1,""]},"nltk.stem.arlstem2":{ARLSTem2:[199,2,1,""]},"nltk.stem.arlstem2.ARLSTem2":{__init__:[199,3,1,""],adjective:[199,3,1,""],fem2masc:[199,3,1,""],norm:[199,3,1,""],plur2sing:[199,3,1,""],pref:[199,3,1,""],stem1:[199,3,1,""],stem:[199,3,1,""],suff:[199,3,1,""],verb:[199,3,1,""],verb_t1:[199,3,1,""],verb_t2:[199,3,1,""],verb_t3:[199,3,1,""],verb_t4:[199,3,1,""],verb_t5:[199,3,1,""],verb_t6:[199,3,1,""]},"nltk.stem.cistem":{Cistem:[200,2,1,""]},"nltk.stem.cistem.Cistem":{__init__:[200,3,1,""],repl_xx:[200,4,1,""],repl_xx_back:[200,4,1,""],replace_back:[200,3,1,""],replace_to:[200,3,1,""],segment:[200,3,1,""],stem:[200,3,1,""],strip_emr:[200,4,1,""],strip_esn:[200,4,1,""],strip_ge:[200,4,1,""],strip_nd:[200,4,1,""],strip_t:[200,4,1,""]},"nltk.stem.isri":{ISRIStemmer:[201,2,1,""]},"nltk.stem.isri.ISRIStemmer":{__init__:[201,3,1,""],end_w5:[201,3,1,""],end_w6:[201,3,1,""],norm:[201,3,1,""],pre1:[201,3,1,""],pre32:[201,3,1,""],pro_w4:[201,3,1,""],pro_w53:[201,3,1,""],pro_w54:[201,3,1,""],pro_w64:[201,3,1,""],pro_w6:[201,3,1,""],stem:[201,3,1,""],suf1:[201,3,1,""],suf32:[201,3,1,""],waw:[201,3,1,""]},"nltk.stem.lancaster":{LancasterStemmer:[202,2,1,""]},"nltk.stem.lancaster.LancasterStemmer":{__init__:[202,3,1,""],default_rule_tuple:[202,4,1,""],parseRules:[202,3,1,""],stem:[202,3,1,""]},"nltk.stem.porter":{PorterStemmer:[203,2,1,""],demo:[203,1,1,""]},"nltk.stem.porter.PorterStemmer":{MARTIN_EXTENSIONS:[203,4,1,""],NLTK_EXTENSIONS:[203,4,1,""],ORIGINAL_ALGORITHM:[203,4,1,""],__init__:[203,3,1,""],stem:[203,3,1,""]},"nltk.stem.regexp":{RegexpStemmer:[204,2,1,""]},"nltk.stem.regexp.RegexpStemmer":{__init__:[204,3,1,""],stem:[204,3,1,""]},"nltk.stem.rslp":{RSLPStemmer:[205,2,1,""]},"nltk.stem.rslp.RSLPStemmer":{__init__:[205,3,1,""],apply_rule:[205,3,1,""],read_rule:[205,3,1,""],stem:[205,3,1,""]},"nltk.stem.snowball":{ArabicStemmer:[206,2,1,""],DanishStemmer:[206,2,1,""],DutchStemmer:[206,2,1,""],EnglishStemmer:[206,2,1,""],FinnishStemmer:[206,2,1,""],FrenchStemmer:[206,2,1,""],GermanStemmer:[206,2,1,""],HungarianStemmer:[206,2,1,""],ItalianStemmer:[206,2,1,""],NorwegianStemmer:[206,2,1,""],PorterStemmer:[206,2,1,""],PortugueseStemmer:[206,2,1,""],RomanianStemmer:[206,2,1,""],RussianStemmer:[206,2,1,""],SnowballStemmer:[206,2,1,""],SpanishStemmer:[206,2,1,""],SwedishStemmer:[206,2,1,""],demo:[206,1,1,""]},"nltk.stem.snowball.ArabicStemmer":{is_defined:[206,4,1,""],is_noun:[206,4,1,""],is_verb:[206,4,1,""],prefix_step2a_success:[206,4,1,""],prefix_step3a_noun_success:[206,4,1,""],prefix_step3b_noun_success:[206,4,1,""],stem:[206,3,1,""],suffix_noun_step1a_success:[206,4,1,""],suffix_noun_step2a_success:[206,4,1,""],suffix_noun_step2b_success:[206,4,1,""],suffix_noun_step2c2_success:[206,4,1,""],suffix_verb_step2a_success:[206,4,1,""],suffix_verb_step2b_success:[206,4,1,""],suffixe_noun_step1b_success:[206,4,1,""],suffixes_verb_step1_success:[206,4,1,""]},"nltk.stem.snowball.DanishStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.DutchStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.EnglishStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.FinnishStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.FrenchStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.GermanStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.HungarianStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.ItalianStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.NorwegianStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.PorterStemmer":{__init__:[206,3,1,""]},"nltk.stem.snowball.PortugueseStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.RomanianStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.RussianStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.SnowballStemmer":{__init__:[206,3,1,""],languages:[206,4,1,""],stem:[206,3,1,""]},"nltk.stem.snowball.SpanishStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.SwedishStemmer":{stem:[206,3,1,""]},"nltk.stem.util":{prefix_replace:[207,1,1,""],suffix_replace:[207,1,1,""]},"nltk.stem.wordnet":{WordNetLemmatizer:[208,2,1,""]},"nltk.stem.wordnet.WordNetLemmatizer":{lemmatize:[208,3,1,""]},"nltk.tag":{api:[210,0,0,"-"],brill:[211,0,0,"-"],brill_trainer:[212,0,0,"-"],crf:[213,0,0,"-"],hmm:[214,0,0,"-"],hunpos:[215,0,0,"-"],mapping:[216,0,0,"-"],perceptron:[217,0,0,"-"],pos_tag:[209,1,1,""],pos_tag_sents:[209,1,1,""],senna:[218,0,0,"-"],sequential:[219,0,0,"-"],stanford:[220,0,0,"-"],tnt:[221,0,0,"-"],util:[222,0,0,"-"]},"nltk.tag.api":{FeaturesetTaggerI:[210,2,1,""],TaggerI:[210,2,1,""]},"nltk.tag.api.TaggerI":{evaluate:[210,3,1,""],tag:[210,3,1,""],tag_sents:[210,3,1,""]},"nltk.tag.brill":{BrillTagger:[211,2,1,""],Pos:[211,2,1,""],Word:[211,2,1,""],brill24:[211,1,1,""],describe_template_sets:[211,1,1,""],fntbl37:[211,1,1,""],nltkdemo18:[211,1,1,""],nltkdemo18plus:[211,1,1,""]},"nltk.tag.brill.BrillTagger":{__init__:[211,3,1,""],batch_tag_incremental:[211,3,1,""],decode_json_obj:[211,3,1,""],encode_json_obj:[211,3,1,""],json_tag:[211,4,1,""],print_template_statistics:[211,3,1,""],rules:[211,3,1,""],tag:[211,3,1,""],train_stats:[211,3,1,""]},"nltk.tag.brill.Pos":{extract_property:[211,3,1,""],json_tag:[211,4,1,""]},"nltk.tag.brill.Word":{extract_property:[211,3,1,""],json_tag:[211,4,1,""]},"nltk.tag.brill_trainer":{BrillTaggerTrainer:[212,2,1,""]},"nltk.tag.brill_trainer.BrillTaggerTrainer":{__init__:[212,3,1,""],train:[212,3,1,""]},"nltk.tag.crf":{CRFTagger:[213,2,1,""]},"nltk.tag.crf.CRFTagger":{__init__:[213,3,1,""],set_model_file:[213,3,1,""],tag:[213,3,1,""],tag_sents:[213,3,1,""],train:[213,3,1,""]},"nltk.tag.hmm":{HiddenMarkovModelTagger:[214,2,1,""],HiddenMarkovModelTrainer:[214,2,1,""],demo:[214,1,1,""],demo_bw:[214,1,1,""],demo_pos:[214,1,1,""],demo_pos_bw:[214,1,1,""],load_pos:[214,1,1,""],logsumexp2:[214,1,1,""]},"nltk.tag.hmm.HiddenMarkovModelTagger":{__init__:[214,3,1,""],best_path:[214,3,1,""],best_path_simple:[214,3,1,""],entropy:[214,3,1,""],log_probability:[214,3,1,""],point_entropy:[214,3,1,""],probability:[214,3,1,""],random_sample:[214,3,1,""],reset_cache:[214,3,1,""],tag:[214,3,1,""],test:[214,3,1,""],train:[214,3,1,""]},"nltk.tag.hmm.HiddenMarkovModelTrainer":{__init__:[214,3,1,""],train:[214,3,1,""],train_supervised:[214,3,1,""],train_unsupervised:[214,3,1,""]},"nltk.tag.hunpos":{HunposTagger:[215,2,1,""]},"nltk.tag.hunpos.HunposTagger":{__init__:[215,3,1,""],close:[215,3,1,""],tag:[215,3,1,""]},"nltk.tag.mapping":{map_tag:[216,1,1,""],tagset_mapping:[216,1,1,""]},"nltk.tag.perceptron":{AveragedPerceptron:[217,2,1,""],PerceptronTagger:[217,2,1,""]},"nltk.tag.perceptron.AveragedPerceptron":{__init__:[217,3,1,""],average_weights:[217,3,1,""],decode_json_obj:[217,3,1,""],encode_json_obj:[217,3,1,""],json_tag:[217,4,1,""],load:[217,3,1,""],predict:[217,3,1,""],save:[217,3,1,""],update:[217,3,1,""]},"nltk.tag.perceptron.PerceptronTagger":{END:[217,4,1,""],START:[217,4,1,""],__init__:[217,3,1,""],decode_json_obj:[217,3,1,""],encode_json_obj:[217,3,1,""],json_tag:[217,4,1,""],load:[217,3,1,""],normalize:[217,3,1,""],tag:[217,3,1,""],train:[217,3,1,""]},"nltk.tag.senna":{SennaChunkTagger:[218,2,1,""],SennaNERTagger:[218,2,1,""],SennaTagger:[218,2,1,""]},"nltk.tag.senna.SennaChunkTagger":{__init__:[218,3,1,""],bio_to_chunks:[218,3,1,""],tag_sents:[218,3,1,""]},"nltk.tag.senna.SennaNERTagger":{__init__:[218,3,1,""],tag_sents:[218,3,1,""]},"nltk.tag.senna.SennaTagger":{__init__:[218,3,1,""],tag_sents:[218,3,1,""]},"nltk.tag.sequential":{AffixTagger:[219,2,1,""],BigramTagger:[219,2,1,""],ClassifierBasedPOSTagger:[219,2,1,""],ClassifierBasedTagger:[219,2,1,""],ContextTagger:[219,2,1,""],DefaultTagger:[219,2,1,""],NgramTagger:[219,2,1,""],RegexpTagger:[219,2,1,""],SequentialBackoffTagger:[219,2,1,""],TrigramTagger:[219,2,1,""],UnigramTagger:[219,2,1,""]},"nltk.tag.sequential.AffixTagger":{__init__:[219,3,1,""],context:[219,3,1,""],decode_json_obj:[219,3,1,""],encode_json_obj:[219,3,1,""],json_tag:[219,4,1,""]},"nltk.tag.sequential.BigramTagger":{__init__:[219,3,1,""],json_tag:[219,4,1,""]},"nltk.tag.sequential.ClassifierBasedPOSTagger":{feature_detector:[219,3,1,""]},"nltk.tag.sequential.ClassifierBasedTagger":{__init__:[219,3,1,""],choose_tag:[219,3,1,""],classifier:[219,3,1,""],feature_detector:[219,3,1,""]},"nltk.tag.sequential.ContextTagger":{__init__:[219,3,1,""],choose_tag:[219,3,1,""],context:[219,3,1,""],size:[219,3,1,""]},"nltk.tag.sequential.DefaultTagger":{__init__:[219,3,1,""],choose_tag:[219,3,1,""],decode_json_obj:[219,3,1,""],encode_json_obj:[219,3,1,""],json_tag:[219,4,1,""]},"nltk.tag.sequential.NgramTagger":{__init__:[219,3,1,""],context:[219,3,1,""],decode_json_obj:[219,3,1,""],encode_json_obj:[219,3,1,""],json_tag:[219,4,1,""]},"nltk.tag.sequential.RegexpTagger":{__init__:[219,3,1,""],choose_tag:[219,3,1,""],decode_json_obj:[219,3,1,""],encode_json_obj:[219,3,1,""],json_tag:[219,4,1,""]},"nltk.tag.sequential.SequentialBackoffTagger":{__init__:[219,3,1,""],backoff:[219,5,1,""],choose_tag:[219,3,1,""],tag:[219,3,1,""],tag_one:[219,3,1,""]},"nltk.tag.sequential.TrigramTagger":{__init__:[219,3,1,""],json_tag:[219,4,1,""]},"nltk.tag.sequential.UnigramTagger":{__init__:[219,3,1,""],context:[219,3,1,""],json_tag:[219,4,1,""]},"nltk.tag.stanford":{StanfordNERTagger:[220,2,1,""],StanfordPOSTagger:[220,2,1,""],StanfordTagger:[220,2,1,""]},"nltk.tag.stanford.StanfordNERTagger":{__init__:[220,3,1,""],parse_output:[220,3,1,""]},"nltk.tag.stanford.StanfordPOSTagger":{__init__:[220,3,1,""]},"nltk.tag.stanford.StanfordTagger":{__init__:[220,3,1,""],parse_output:[220,3,1,""],tag:[220,3,1,""],tag_sents:[220,3,1,""]},"nltk.tag.tnt":{TnT:[221,2,1,""],basic_sent_chop:[221,1,1,""],demo2:[221,1,1,""],demo3:[221,1,1,""],demo:[221,1,1,""]},"nltk.tag.tnt.TnT":{__init__:[221,3,1,""],tag:[221,3,1,""],tagdata:[221,3,1,""],train:[221,3,1,""]},"nltk.tag.util":{str2tuple:[222,1,1,""],tuple2str:[222,1,1,""],untag:[222,1,1,""]},"nltk.tbl":{demo:[224,0,0,"-"],erroranalysis:[225,0,0,"-"],feature:[226,0,0,"-"],rule:[227,0,0,"-"],template:[228,0,0,"-"]},"nltk.tbl.demo":{corpus_size:[224,1,1,""],demo:[224,1,1,""],demo_error_analysis:[224,1,1,""],demo_generated_templates:[224,1,1,""],demo_high_accuracy_rules:[224,1,1,""],demo_learning_curve:[224,1,1,""],demo_multifeature_template:[224,1,1,""],demo_multiposition_feature:[224,1,1,""],demo_repr_rule_format:[224,1,1,""],demo_serialize_tagger:[224,1,1,""],demo_str_rule_format:[224,1,1,""],demo_template_statistics:[224,1,1,""],demo_verbose_rule_format:[224,1,1,""],postag:[224,1,1,""]},"nltk.tbl.erroranalysis":{error_list:[225,1,1,""]},"nltk.tbl.feature":{Feature:[226,2,1,""]},"nltk.tbl.feature.Feature":{PROPERTY_NAME:[226,4,1,""],__init__:[226,3,1,""],decode_json_obj:[226,3,1,""],encode_json_obj:[226,3,1,""],expand:[226,3,1,""],extract_property:[226,3,1,""],intersects:[226,3,1,""],issuperset:[226,3,1,""],json_tag:[226,4,1,""]},"nltk.tbl.rule":{Rule:[227,2,1,""],TagRule:[227,2,1,""]},"nltk.tbl.rule.Rule":{__init__:[227,3,1,""],applies:[227,3,1,""],decode_json_obj:[227,3,1,""],encode_json_obj:[227,3,1,""],format:[227,3,1,""],json_tag:[227,4,1,""]},"nltk.tbl.rule.TagRule":{__init__:[227,3,1,""],applies:[227,3,1,""],apply:[227,3,1,""],original_tag:[227,4,1,""],replacement_tag:[227,4,1,""]},"nltk.tbl.template":{BrillTemplateI:[228,2,1,""],Template:[228,2,1,""]},"nltk.tbl.template.BrillTemplateI":{applicable_rules:[228,3,1,""],get_neighborhood:[228,3,1,""]},"nltk.tbl.template.Template":{ALLTEMPLATES:[228,4,1,""],__init__:[228,3,1,""],applicable_rules:[228,3,1,""],expand:[228,3,1,""],get_neighborhood:[228,3,1,""]},"nltk.test":{all:[230,0,0,"-"],childes_fixt:[231,0,0,"-"],classify_fixt:[232,0,0,"-"],discourse_fixt:[234,0,0,"-"],gensim_fixt:[235,0,0,"-"],gluesemantics_malt_fixt:[236,0,0,"-"],inference_fixt:[237,0,0,"-"],nonmonotonic_fixt:[238,0,0,"-"],portuguese_en_fixt:[239,0,0,"-"],probability_fixt:[240,0,0,"-"],unit:[241,0,0,"-"]},"nltk.test.all":{additional_tests:[230,1,1,""]},"nltk.test.childes_fixt":{setup_module:[231,1,1,""]},"nltk.test.classify_fixt":{setup_module:[232,1,1,""]},"nltk.test.discourse_fixt":{setup_module:[234,1,1,""]},"nltk.test.gensim_fixt":{setup_module:[235,1,1,""]},"nltk.test.gluesemantics_malt_fixt":{setup_module:[236,1,1,""]},"nltk.test.inference_fixt":{setup_module:[237,1,1,""]},"nltk.test.nonmonotonic_fixt":{setup_module:[238,1,1,""]},"nltk.test.portuguese_en_fixt":{setup_module:[239,1,1,""]},"nltk.test.probability_fixt":{setup_module:[240,1,1,""]},"nltk.test.unit":{lm:[242,0,0,"-"],test_aline:[247,0,0,"-"],test_brill:[248,0,0,"-"],test_cfg2chomsky:[250,0,0,"-"],test_chunk:[251,0,0,"-"],test_collocations:[253,0,0,"-"],test_concordance:[254,0,0,"-"],test_corpus_views:[257,0,0,"-"],test_disagreement:[259,0,0,"-"],test_freqdist:[261,0,0,"-"],test_json_serialization:[264,0,0,"-"],test_metrics:[265,0,0,"-"],test_naivebayes:[266,0,0,"-"],test_nombank:[267,0,0,"-"],test_pl196x:[268,0,0,"-"],test_pos_tag:[269,0,0,"-"],test_ribes:[270,0,0,"-"],test_senna:[273,0,0,"-"],test_stem:[274,0,0,"-"],test_tag:[275,0,0,"-"],test_tgrep:[276,0,0,"-"],test_wordnet:[280,0,0,"-"],translate:[281,0,0,"-"]},"nltk.test.unit.lm":{test_preprocessing:[245,0,0,"-"],test_vocabulary:[246,0,0,"-"]},"nltk.test.unit.lm.test_preprocessing":{TestPreprocessing:[245,2,1,""]},"nltk.test.unit.lm.test_preprocessing.TestPreprocessing":{test_padded_everygram_pipeline:[245,3,1,""]},"nltk.test.unit.lm.test_vocabulary":{NgramModelVocabularyTests:[246,2,1,""]},"nltk.test.unit.lm.test_vocabulary.NgramModelVocabularyTests":{setUpClass:[246,3,1,""],test_counts_set_correctly:[246,3,1,""],test_creation_with_counter:[246,3,1,""],test_cutoff_setter_checks_value:[246,3,1,""],test_cutoff_value_set_correctly:[246,3,1,""],test_eqality:[246,3,1,""],test_len_is_constant:[246,3,1,""],test_lookup:[246,3,1,""],test_lookup_None:[246,3,1,""],test_lookup_empty_iterables:[246,3,1,""],test_lookup_empty_str:[246,3,1,""],test_lookup_int:[246,3,1,""],test_lookup_iterables:[246,3,1,""],test_lookup_recursive:[246,3,1,""],test_membership_check_respects_cutoff:[246,3,1,""],test_str:[246,3,1,""],test_truthiness:[246,3,1,""],test_unable_to_change_cutoff:[246,3,1,""],test_update_empty_vocab:[246,3,1,""],test_vocab_iter_respects_cutoff:[246,3,1,""],test_vocab_len_respects_cutoff:[246,3,1,""]},"nltk.test.unit.test_aline":{test_aline:[247,1,1,""],test_aline_delta:[247,1,1,""]},"nltk.test.unit.test_brill":{TestBrill:[248,2,1,""]},"nltk.test.unit.test_brill.TestBrill":{test_brill_demo:[248,3,1,""],test_pos_template:[248,3,1,""]},"nltk.test.unit.test_cfg2chomsky":{ChomskyNormalFormForCFGTest:[250,2,1,""]},"nltk.test.unit.test_cfg2chomsky.ChomskyNormalFormForCFGTest":{test_complex:[250,3,1,""],test_simple:[250,3,1,""]},"nltk.test.unit.test_chunk":{TestChunkRule:[251,2,1,""]},"nltk.test.unit.test_chunk.TestChunkRule":{test_tag_pattern2re_pattern_quantifier:[251,3,1,""]},"nltk.test.unit.test_collocations":{close_enough:[253,1,1,""],test_bigram2:[253,1,1,""],test_bigram3:[253,1,1,""],test_bigram5:[253,1,1,""]},"nltk.test.unit.test_concordance":{TestConcordance:[254,2,1,""],stdout_redirect:[254,1,1,""]},"nltk.test.unit.test_concordance.TestConcordance":{setUp:[254,3,1,""],setUpClass:[254,3,1,""],tearDown:[254,3,1,""],tearDownClass:[254,3,1,""],test_concordance_lines:[254,3,1,""],test_concordance_list:[254,3,1,""],test_concordance_print:[254,3,1,""],test_concordance_width:[254,3,1,""]},"nltk.test.unit.test_corpus_views":{TestCorpusViews:[257,2,1,""]},"nltk.test.unit.test_corpus_views.TestCorpusViews":{data:[257,3,1,""],linetok:[257,4,1,""],names:[257,4,1,""],test_correct_length:[257,3,1,""],test_correct_values:[257,3,1,""]},"nltk.test.unit.test_disagreement":{TestDisagreement:[259,2,1,""]},"nltk.test.unit.test_disagreement.TestDisagreement":{test_advanced2:[259,3,1,""],test_advanced:[259,3,1,""],test_easy2:[259,3,1,""],test_easy:[259,3,1,""]},"nltk.test.unit.test_freqdist":{test_iterating_returns_an_iterator_ordered_by_frequency:[261,1,1,""]},"nltk.test.unit.test_json_serialization":{TestJSONSerialization:[264,2,1,""]},"nltk.test.unit.test_json_serialization.TestJSONSerialization":{setUp:[264,3,1,""],test_affix_tagger:[264,3,1,""],test_brill_tagger:[264,3,1,""],test_default_tagger:[264,3,1,""],test_ngram_taggers:[264,3,1,""],test_perceptron_tagger:[264,3,1,""],test_regexp_tagger:[264,3,1,""]},"nltk.test.unit.test_metrics":{TestLikelihoodRatio:[265,2,1,""]},"nltk.test.unit.test_metrics.TestLikelihoodRatio":{test_lr_bigram:[265,3,1,""],test_lr_quadgram:[265,3,1,""],test_lr_trigram:[265,3,1,""]},"nltk.test.unit.test_naivebayes":{NaiveBayesClassifierTest:[266,2,1,""]},"nltk.test.unit.test_naivebayes.NaiveBayesClassifierTest":{test_simple:[266,3,1,""]},"nltk.test.unit.test_nombank":{NombankDemo:[267,2,1,""]},"nltk.test.unit.test_nombank.NombankDemo":{test_framefiles_fileids:[267,3,1,""],test_instance:[267,3,1,""],test_numbers:[267,3,1,""]},"nltk.test.unit.test_pl196x":{TestCorpusViews:[268,2,1,""]},"nltk.test.unit.test_pl196x.TestCorpusViews":{test_corpus_reader:[268,3,1,""]},"nltk.test.unit.test_pos_tag":{TestPosTag:[269,2,1,""]},"nltk.test.unit.test_pos_tag.TestPosTag":{test_pos_tag_eng:[269,3,1,""],test_pos_tag_eng_universal:[269,3,1,""],test_pos_tag_rus:[269,3,1,""],test_pos_tag_rus_universal:[269,3,1,""],test_pos_tag_unknown_lang:[269,3,1,""],test_unspecified_lang:[269,3,1,""]},"nltk.test.unit.test_ribes":{test_no_zero_div:[270,1,1,""],test_ribes:[270,1,1,""],test_ribes_empty_worder:[270,1,1,""],test_ribes_one_worder:[270,1,1,""],test_ribes_two_worder:[270,1,1,""]},"nltk.test.unit.test_senna":{TestSennaPipeline:[273,2,1,""],TestSennaTagger:[273,2,1,""]},"nltk.test.unit.test_senna.TestSennaPipeline":{test_senna_pipeline:[273,3,1,""]},"nltk.test.unit.test_senna.TestSennaTagger":{test_senna_chunk_tagger:[273,3,1,""],test_senna_ner_tagger:[273,3,1,""],test_senna_tagger:[273,3,1,""]},"nltk.test.unit.test_stem":{PorterTest:[274,2,1,""],SnowballTest:[274,2,1,""]},"nltk.test.unit.test_stem.PorterTest":{test_lowercase_option:[274,3,1,""],test_oed_bug:[274,3,1,""],test_vocabulary_martin_mode:[274,3,1,""],test_vocabulary_nltk_mode:[274,3,1,""],test_vocabulary_original_mode:[274,3,1,""]},"nltk.test.unit.test_stem.SnowballTest":{test_arabic:[274,3,1,""],test_german:[274,3,1,""],test_russian:[274,3,1,""],test_short_strings_bug:[274,3,1,""],test_spanish:[274,3,1,""]},"nltk.test.unit.test_tag":{setup_module:[275,1,1,""],test_basic:[275,1,1,""]},"nltk.test.unit.test_tgrep":{TestSequenceFunctions:[276,2,1,""]},"nltk.test.unit.test_tgrep.TestSequenceFunctions":{test_bad_operator:[276,3,1,""],test_comments:[276,3,1,""],test_examples:[276,3,1,""],test_labeled_nodes:[276,3,1,""],test_multiple_conjs:[276,3,1,""],test_node_encoding:[276,3,1,""],test_node_nocase:[276,3,1,""],test_node_noleaves:[276,3,1,""],test_node_printing:[276,3,1,""],test_node_quoted:[276,3,1,""],test_node_regex:[276,3,1,""],test_node_regex_2:[276,3,1,""],test_node_simple:[276,3,1,""],test_node_tree_position:[276,3,1,""],test_rel_precedence:[276,3,1,""],test_rel_sister_nodes:[276,3,1,""],test_tokenize_encoding:[276,3,1,""],test_tokenize_examples:[276,3,1,""],test_tokenize_link_types:[276,3,1,""],test_tokenize_macros:[276,3,1,""],test_tokenize_node_labels:[276,3,1,""],test_tokenize_nodenames:[276,3,1,""],test_tokenize_quoting:[276,3,1,""],test_tokenize_segmented_patterns:[276,3,1,""],test_tokenize_simple:[276,3,1,""],test_trailing_semicolon:[276,3,1,""],test_use_macros:[276,3,1,""],tests_rel_dominance:[276,3,1,""],tests_rel_indexed_children:[276,3,1,""]},"nltk.test.unit.test_wordnet":{WordnNetDemo:[280,2,1,""]},"nltk.test.unit.test_wordnet.WordnNetDemo":{test_antonyms:[280,3,1,""],test_derivationally_related_forms:[280,3,1,""],test_domains:[280,3,1,""],test_hyperhyponyms:[280,3,1,""],test_in_topic_domains:[280,3,1,""],test_iterable_type_for_all_lemma_names:[280,3,1,""],test_lch:[280,3,1,""],test_meronyms_holonyms:[280,3,1,""],test_misc_relations:[280,3,1,""],test_omw_lemma_no_trailing_underscore:[280,3,1,""],test_retrieve_synset:[280,3,1,""],test_retrieve_synsets:[280,3,1,""],test_wordnet_similarities:[280,3,1,""]},"nltk.test.unit.translate":{test_bleu:[282,0,0,"-"],test_gdfa:[283,0,0,"-"],test_ibm1:[284,0,0,"-"],test_ibm2:[285,0,0,"-"],test_ibm3:[286,0,0,"-"],test_ibm4:[287,0,0,"-"],test_ibm5:[288,0,0,"-"],test_ibm_model:[289,0,0,"-"],test_meteor:[290,0,0,"-"],test_nist:[291,0,0,"-"],test_stack_decoder:[292,0,0,"-"]},"nltk.test.unit.translate.test_bleu":{TestBLEU:[282,2,1,""],TestBLEUFringeCases:[282,2,1,""],TestBLEUWithBadSentence:[282,2,1,""],TestBLEUvsMteval13a:[282,2,1,""]},"nltk.test.unit.translate.test_bleu.TestBLEU":{test_brevity_penalty:[282,3,1,""],test_full_matches:[282,3,1,""],test_modified_precision:[282,3,1,""],test_partial_matches_hypothesis_longer_than_reference:[282,3,1,""],test_zero_matches:[282,3,1,""]},"nltk.test.unit.translate.test_bleu.TestBLEUFringeCases":{test_case_where_n_is_bigger_than_hypothesis_length:[282,3,1,""],test_empty_hypothesis:[282,3,1,""],test_empty_references:[282,3,1,""],test_empty_references_and_hypothesis:[282,3,1,""],test_reference_or_hypothesis_shorter_than_fourgrams:[282,3,1,""]},"nltk.test.unit.translate.test_bleu.TestBLEUWithBadSentence":{test_corpus_bleu_with_bad_sentence:[282,3,1,""]},"nltk.test.unit.translate.test_bleu.TestBLEUvsMteval13a":{test_corpus_bleu:[282,3,1,""]},"nltk.test.unit.translate.test_gdfa":{TestGDFA:[283,2,1,""]},"nltk.test.unit.translate.test_gdfa.TestGDFA":{test_from_eflomal_outputs:[283,3,1,""]},"nltk.test.unit.translate.test_ibm1":{TestIBMModel1:[284,2,1,""]},"nltk.test.unit.translate.test_ibm1.TestIBMModel1":{test_prob_t_a_given_s:[284,3,1,""],test_set_uniform_translation_probabilities:[284,3,1,""],test_set_uniform_translation_probabilities_of_non_domain_values:[284,3,1,""]},"nltk.test.unit.translate.test_ibm2":{TestIBMModel2:[285,2,1,""]},"nltk.test.unit.translate.test_ibm2.TestIBMModel2":{test_prob_t_a_given_s:[285,3,1,""],test_set_uniform_alignment_probabilities:[285,3,1,""],test_set_uniform_alignment_probabilities_of_non_domain_values:[285,3,1,""]},"nltk.test.unit.translate.test_ibm3":{TestIBMModel3:[286,2,1,""]},"nltk.test.unit.translate.test_ibm3.TestIBMModel3":{test_prob_t_a_given_s:[286,3,1,""],test_set_uniform_distortion_probabilities:[286,3,1,""],test_set_uniform_distortion_probabilities_of_non_domain_values:[286,3,1,""]},"nltk.test.unit.translate.test_ibm4":{TestIBMModel4:[287,2,1,""]},"nltk.test.unit.translate.test_ibm4.TestIBMModel4":{test_prob_t_a_given_s:[287,3,1,""],test_set_uniform_distortion_probabilities_of_max_displacements:[287,3,1,""],test_set_uniform_distortion_probabilities_of_non_domain_values:[287,3,1,""]},"nltk.test.unit.translate.test_ibm5":{TestIBMModel5:[288,2,1,""]},"nltk.test.unit.translate.test_ibm5.TestIBMModel5":{test_prob_t_a_given_s:[288,3,1,""],test_prune:[288,3,1,""],test_set_uniform_vacancy_probabilities_of_max_displacements:[288,3,1,""],test_set_uniform_vacancy_probabilities_of_non_domain_values:[288,3,1,""]},"nltk.test.unit.translate.test_ibm_model":{TestIBMModel:[289,2,1,""]},"nltk.test.unit.translate.test_ibm_model.TestIBMModel":{test_best_model2_alignment:[289,3,1,""],test_best_model2_alignment_does_not_change_pegged_alignment:[289,3,1,""],test_best_model2_alignment_handles_empty_src_sentence:[289,3,1,""],test_best_model2_alignment_handles_empty_trg_sentence:[289,3,1,""],test_best_model2_alignment_handles_fertile_words:[289,3,1,""],test_hillclimb:[289,3,1,""],test_neighboring_finds_neighbor_alignments:[289,3,1,""],test_neighboring_returns_neighbors_with_pegged_alignment:[289,3,1,""],test_neighboring_sets_neighbor_alignment_info:[289,3,1,""],test_sample:[289,3,1,""],test_vocabularies_are_initialized:[289,3,1,""],test_vocabularies_are_initialized_even_with_empty_corpora:[289,3,1,""]},"nltk.test.unit.translate.test_meteor":{TestMETEOR:[290,2,1,""]},"nltk.test.unit.translate.test_meteor.TestMETEOR":{candidate:[290,4,1,""],reference:[290,4,1,""],test_candidate_type_check:[290,3,1,""],test_meteor:[290,3,1,""],test_reference_type_check:[290,3,1,""]},"nltk.test.unit.translate.test_nist":{TestNIST:[291,2,1,""]},"nltk.test.unit.translate.test_nist.TestNIST":{test_sentence_nist:[291,3,1,""]},"nltk.test.unit.translate.test_stack_decoder":{TestHypothesis:[292,2,1,""],TestStack:[292,2,1,""],TestStackDecoder:[292,2,1,""]},"nltk.test.unit.translate.test_stack_decoder.TestHypothesis":{setUp:[292,3,1,""],test_total_translated_words:[292,3,1,""],test_translated_positions:[292,3,1,""],test_translation_so_far:[292,3,1,""],test_translation_so_far_for_empty_hypothesis:[292,3,1,""],test_untranslated_spans:[292,3,1,""],test_untranslated_spans_for_empty_hypothesis:[292,3,1,""]},"nltk.test.unit.translate.test_stack_decoder.TestStack":{test_best_returns_none_when_stack_is_empty:[292,3,1,""],test_best_returns_the_best_hypothesis:[292,3,1,""],test_push_bumps_off_worst_hypothesis_when_stack_is_full:[292,3,1,""],test_push_does_not_add_hypothesis_that_falls_below_beam_threshold:[292,3,1,""],test_push_removes_hypotheses_that_fall_below_beam_threshold:[292,3,1,""]},"nltk.test.unit.translate.test_stack_decoder.TestStackDecoder":{create_fake_language_model:[292,3,1,""],create_fake_phrase_table:[292,3,1,""],test_compute_future_costs:[292,3,1,""],test_compute_future_costs_for_phrases_not_in_phrase_table:[292,3,1,""],test_distortion_score:[292,3,1,""],test_distortion_score_of_first_expansion:[292,3,1,""],test_find_all_src_phrases:[292,3,1,""],test_future_score:[292,3,1,""],test_valid_phrases:[292,3,1,""]},"nltk.text":{ConcordanceIndex:[293,2,1,""],ContextIndex:[293,2,1,""],Text:[293,2,1,""],TextCollection:[293,2,1,""],TokenSearcher:[293,2,1,""]},"nltk.text.ConcordanceIndex":{__init__:[293,3,1,""],find_concordance:[293,3,1,""],offsets:[293,3,1,""],print_concordance:[293,3,1,""],tokens:[293,3,1,""]},"nltk.text.ContextIndex":{__init__:[293,3,1,""],common_contexts:[293,3,1,""],similar_words:[293,3,1,""],tokens:[293,3,1,""],word_similarity_dict:[293,3,1,""]},"nltk.text.Text":{__init__:[293,3,1,""],collocation_list:[293,3,1,""],collocations:[293,3,1,""],common_contexts:[293,3,1,""],concordance:[293,3,1,""],concordance_list:[293,3,1,""],count:[293,3,1,""],dispersion_plot:[293,3,1,""],findall:[293,3,1,""],generate:[293,3,1,""],index:[293,3,1,""],plot:[293,3,1,""],readability:[293,3,1,""],similar:[293,3,1,""],vocab:[293,3,1,""]},"nltk.text.TextCollection":{__init__:[293,3,1,""],idf:[293,3,1,""],tf:[293,3,1,""],tf_idf:[293,3,1,""]},"nltk.text.TokenSearcher":{__init__:[293,3,1,""],findall:[293,3,1,""]},"nltk.tgrep":{TgrepException:[294,6,1,""],ancestors:[294,1,1,""],tgrep_compile:[294,1,1,""],tgrep_nodes:[294,1,1,""],tgrep_positions:[294,1,1,""],tgrep_tokenize:[294,1,1,""],treepositions_no_leaves:[294,1,1,""],unique_ancestors:[294,1,1,""]},"nltk.tokenize":{api:[296,0,0,"-"],casual:[297,0,0,"-"],destructive:[298,0,0,"-"],legality_principle:[299,0,0,"-"],mwe:[300,0,0,"-"],nist:[301,0,0,"-"],punkt:[302,0,0,"-"],regexp:[303,0,0,"-"],repp:[304,0,0,"-"],sent_tokenize:[295,1,1,""],sexpr:[305,0,0,"-"],simple:[306,0,0,"-"],sonority_sequencing:[307,0,0,"-"],stanford:[308,0,0,"-"],stanford_segmenter:[309,0,0,"-"],texttiling:[310,0,0,"-"],toktok:[311,0,0,"-"],treebank:[312,0,0,"-"],util:[313,0,0,"-"],word_tokenize:[295,1,1,""]},"nltk.tokenize.api":{StringTokenizer:[296,2,1,""],TokenizerI:[296,2,1,""]},"nltk.tokenize.api.StringTokenizer":{span_tokenize:[296,3,1,""],tokenize:[296,3,1,""]},"nltk.tokenize.api.TokenizerI":{span_tokenize:[296,3,1,""],span_tokenize_sents:[296,3,1,""],tokenize:[296,3,1,""],tokenize_sents:[296,3,1,""]},"nltk.tokenize.casual":{TweetTokenizer:[297,2,1,""],casual_tokenize:[297,1,1,""],reduce_lengthening:[297,1,1,""],remove_handles:[297,1,1,""]},"nltk.tokenize.casual.TweetTokenizer":{PHONE_WORD_RE:[297,5,1,""],WORD_RE:[297,5,1,""],__init__:[297,3,1,""],tokenize:[297,3,1,""]},"nltk.tokenize.destructive":{MacIntyreContractions:[298,2,1,""],NLTKWordTokenizer:[298,2,1,""]},"nltk.tokenize.destructive.MacIntyreContractions":{CONTRACTIONS2:[298,4,1,""],CONTRACTIONS3:[298,4,1,""],CONTRACTIONS4:[298,4,1,""]},"nltk.tokenize.destructive.NLTKWordTokenizer":{CONTRACTIONS2:[298,4,1,""],CONTRACTIONS3:[298,4,1,""],CONVERT_PARENTHESES:[298,4,1,""],DOUBLE_DASHES:[298,4,1,""],ENDING_QUOTES:[298,4,1,""],PARENS_BRACKETS:[298,4,1,""],PUNCTUATION:[298,4,1,""],STARTING_QUOTES:[298,4,1,""],tokenize:[298,3,1,""]},"nltk.tokenize.legality_principle":{LegalitySyllableTokenizer:[299,2,1,""]},"nltk.tokenize.legality_principle.LegalitySyllableTokenizer":{__init__:[299,3,1,""],find_legal_onsets:[299,3,1,""],onset:[299,3,1,""],tokenize:[299,3,1,""]},"nltk.tokenize.mwe":{MWETokenizer:[300,2,1,""]},"nltk.tokenize.mwe.MWETokenizer":{__init__:[300,3,1,""],add_mwe:[300,3,1,""],tokenize:[300,3,1,""]},"nltk.tokenize.nist":{NISTTokenizer:[301,2,1,""]},"nltk.tokenize.nist.NISTTokenizer":{DASH_PRECEED_DIGIT:[301,4,1,""],INTERNATIONAL_REGEXES:[301,4,1,""],LANG_DEPENDENT_REGEXES:[301,4,1,""],NONASCII:[301,4,1,""],PERIOD_COMMA_FOLLOW:[301,4,1,""],PERIOD_COMMA_PRECEED:[301,4,1,""],PUNCT:[301,4,1,""],PUNCT_1:[301,4,1,""],PUNCT_2:[301,4,1,""],STRIP_EOL_HYPHEN:[301,4,1,""],STRIP_SKIP:[301,4,1,""],SYMBOLS:[301,4,1,""],international_tokenize:[301,3,1,""],lang_independent_sub:[301,3,1,""],number_regex:[301,4,1,""],punct_regex:[301,4,1,""],pup_number:[301,4,1,""],pup_punct:[301,4,1,""],pup_symbol:[301,4,1,""],symbol_regex:[301,4,1,""],tokenize:[301,3,1,""]},"nltk.tokenize.punkt":{PunktBaseClass:[302,2,1,""],PunktLanguageVars:[302,2,1,""],PunktParameters:[302,2,1,""],PunktSentenceTokenizer:[302,2,1,""],PunktToken:[302,2,1,""],PunktTrainer:[302,2,1,""],demo:[302,1,1,""],format_debug_decision:[302,1,1,""]},"nltk.tokenize.punkt.PunktBaseClass":{__init__:[302,3,1,""]},"nltk.tokenize.punkt.PunktLanguageVars":{internal_punctuation:[302,4,1,""],period_context_re:[302,3,1,""],re_boundary_realignment:[302,4,1,""],sent_end_chars:[302,4,1,""],word_tokenize:[302,3,1,""]},"nltk.tokenize.punkt.PunktParameters":{__init__:[302,3,1,""],abbrev_types:[302,4,1,""],add_ortho_context:[302,3,1,""],clear_abbrevs:[302,3,1,""],clear_collocations:[302,3,1,""],clear_ortho_context:[302,3,1,""],clear_sent_starters:[302,3,1,""],collocations:[302,4,1,""],ortho_context:[302,4,1,""],sent_starters:[302,4,1,""]},"nltk.tokenize.punkt.PunktSentenceTokenizer":{PUNCTUATION:[302,4,1,""],__init__:[302,3,1,""],debug_decisions:[302,3,1,""],dump:[302,3,1,""],sentences_from_text:[302,3,1,""],sentences_from_text_legacy:[302,3,1,""],sentences_from_tokens:[302,3,1,""],span_tokenize:[302,3,1,""],text_contains_sentbreak:[302,3,1,""],tokenize:[302,3,1,""],train:[302,3,1,""]},"nltk.tokenize.punkt.PunktToken":{__init__:[302,3,1,""],abbr:[302,4,1,""],ellipsis:[302,4,1,""],first_case:[302,5,1,""],first_lower:[302,5,1,""],first_upper:[302,5,1,""],is_alpha:[302,5,1,""],is_ellipsis:[302,5,1,""],is_initial:[302,5,1,""],is_non_punct:[302,5,1,""],is_number:[302,5,1,""],linestart:[302,4,1,""],parastart:[302,4,1,""],period_final:[302,4,1,""],sentbreak:[302,4,1,""],tok:[302,4,1,""],type:[302,4,1,""],type_no_period:[302,5,1,""],type_no_sentperiod:[302,5,1,""]},"nltk.tokenize.punkt.PunktTrainer":{ABBREV:[302,4,1,""],ABBREV_BACKOFF:[302,4,1,""],COLLOCATION:[302,4,1,""],IGNORE_ABBREV_PENALTY:[302,4,1,""],INCLUDE_ABBREV_COLLOCS:[302,4,1,""],INCLUDE_ALL_COLLOCS:[302,4,1,""],MIN_COLLOC_FREQ:[302,4,1,""],SENT_STARTER:[302,4,1,""],__init__:[302,3,1,""],finalize_training:[302,3,1,""],find_abbrev_types:[302,3,1,""],freq_threshold:[302,3,1,""],get_params:[302,3,1,""],train:[302,3,1,""],train_tokens:[302,3,1,""]},"nltk.tokenize.regexp":{BlanklineTokenizer:[303,2,1,""],RegexpTokenizer:[303,2,1,""],WhitespaceTokenizer:[303,2,1,""],WordPunctTokenizer:[303,2,1,""],blankline_tokenize:[303,1,1,""],regexp_tokenize:[303,1,1,""],wordpunct_tokenize:[303,1,1,""]},"nltk.tokenize.regexp.BlanklineTokenizer":{__init__:[303,3,1,""]},"nltk.tokenize.regexp.RegexpTokenizer":{__init__:[303,3,1,""],span_tokenize:[303,3,1,""],tokenize:[303,3,1,""]},"nltk.tokenize.regexp.WhitespaceTokenizer":{__init__:[303,3,1,""]},"nltk.tokenize.regexp.WordPunctTokenizer":{__init__:[303,3,1,""]},"nltk.tokenize.repp":{ReppTokenizer:[304,2,1,""]},"nltk.tokenize.repp.ReppTokenizer":{__init__:[304,3,1,""],find_repptokenizer:[304,3,1,""],generate_repp_command:[304,3,1,""],parse_repp_outputs:[304,3,1,""],tokenize:[304,3,1,""],tokenize_sents:[304,3,1,""]},"nltk.tokenize.sexpr":{SExprTokenizer:[305,2,1,""],sexpr_tokenize:[305,1,1,""]},"nltk.tokenize.sexpr.SExprTokenizer":{__init__:[305,3,1,""],tokenize:[305,3,1,""]},"nltk.tokenize.simple":{CharTokenizer:[306,2,1,""],LineTokenizer:[306,2,1,""],SpaceTokenizer:[306,2,1,""],TabTokenizer:[306,2,1,""],line_tokenize:[306,1,1,""]},"nltk.tokenize.simple.CharTokenizer":{span_tokenize:[306,3,1,""],tokenize:[306,3,1,""]},"nltk.tokenize.simple.LineTokenizer":{__init__:[306,3,1,""],span_tokenize:[306,3,1,""],tokenize:[306,3,1,""]},"nltk.tokenize.sonority_sequencing":{SyllableTokenizer:[307,2,1,""]},"nltk.tokenize.sonority_sequencing.SyllableTokenizer":{__init__:[307,3,1,""],assign_values:[307,3,1,""],tokenize:[307,3,1,""],validate_syllables:[307,3,1,""]},"nltk.tokenize.stanford":{StanfordTokenizer:[308,2,1,""]},"nltk.tokenize.stanford.StanfordTokenizer":{__init__:[308,3,1,""],tokenize:[308,3,1,""]},"nltk.tokenize.stanford_segmenter":{StanfordSegmenter:[309,2,1,""]},"nltk.tokenize.stanford_segmenter.StanfordSegmenter":{__init__:[309,3,1,""],default_config:[309,3,1,""],segment:[309,3,1,""],segment_file:[309,3,1,""],segment_sents:[309,3,1,""],tokenize:[309,3,1,""]},"nltk.tokenize.texttiling":{TextTilingTokenizer:[310,2,1,""],TokenSequence:[310,2,1,""],TokenTableField:[310,2,1,""],demo:[310,1,1,""],smooth:[310,1,1,""]},"nltk.tokenize.texttiling.TextTilingTokenizer":{__init__:[310,3,1,""],tokenize:[310,3,1,""]},"nltk.tokenize.texttiling.TokenSequence":{__init__:[310,3,1,""]},"nltk.tokenize.texttiling.TokenTableField":{__init__:[310,3,1,""]},"nltk.tokenize.toktok":{ToktokTokenizer:[311,2,1,""]},"nltk.tokenize.toktok.ToktokTokenizer":{AMPERCENT:[311,4,1,""],CLOSE_PUNCT:[311,4,1,""],CLOSE_PUNCT_RE:[311,4,1,""],COMMA_IN_NUM:[311,4,1,""],CURRENCY_SYM:[311,4,1,""],CURRENCY_SYM_RE:[311,4,1,""],EN_EM_DASHES:[311,4,1,""],FINAL_PERIOD_1:[311,4,1,""],FINAL_PERIOD_2:[311,4,1,""],FUNKY_PUNCT_1:[311,4,1,""],FUNKY_PUNCT_2:[311,4,1,""],LSTRIP:[311,4,1,""],MULTI_COMMAS:[311,4,1,""],MULTI_DASHES:[311,4,1,""],MULTI_DOTS:[311,4,1,""],NON_BREAKING:[311,4,1,""],ONE_SPACE:[311,4,1,""],OPEN_PUNCT:[311,4,1,""],OPEN_PUNCT_RE:[311,4,1,""],PIPE:[311,4,1,""],PROB_SINGLE_QUOTES:[311,4,1,""],RSTRIP:[311,4,1,""],STUPID_QUOTES_1:[311,4,1,""],STUPID_QUOTES_2:[311,4,1,""],TAB:[311,4,1,""],TOKTOK_REGEXES:[311,4,1,""],URL_FOE_1:[311,4,1,""],URL_FOE_2:[311,4,1,""],URL_FOE_3:[311,4,1,""],URL_FOE_4:[311,4,1,""],tokenize:[311,3,1,""]},"nltk.tokenize.treebank":{TreebankWordDetokenizer:[312,2,1,""],TreebankWordTokenizer:[312,2,1,""]},"nltk.tokenize.treebank.TreebankWordDetokenizer":{CONTRACTIONS2:[312,4,1,""],CONTRACTIONS3:[312,4,1,""],CONVERT_PARENTHESES:[312,4,1,""],DOUBLE_DASHES:[312,4,1,""],ENDING_QUOTES:[312,4,1,""],PARENS_BRACKETS:[312,4,1,""],PUNCTUATION:[312,4,1,""],STARTING_QUOTES:[312,4,1,""],detokenize:[312,3,1,""],tokenize:[312,3,1,""]},"nltk.tokenize.treebank.TreebankWordTokenizer":{CONTRACTIONS2:[312,4,1,""],CONTRACTIONS3:[312,4,1,""],CONVERT_PARENTHESES:[312,4,1,""],DOUBLE_DASHES:[312,4,1,""],ENDING_QUOTES:[312,4,1,""],PARENS_BRACKETS:[312,4,1,""],PUNCTUATION:[312,4,1,""],STARTING_QUOTES:[312,4,1,""],span_tokenize:[312,3,1,""],tokenize:[312,3,1,""]},"nltk.tokenize.util":{CJKChars:[313,2,1,""],align_tokens:[313,1,1,""],is_cjk:[313,1,1,""],regexp_span_tokenize:[313,1,1,""],spans_to_relative:[313,1,1,""],string_span_tokenize:[313,1,1,""],xml_escape:[313,1,1,""],xml_unescape:[313,1,1,""]},"nltk.tokenize.util.CJKChars":{CJK_Compatibility_Forms:[313,4,1,""],CJK_Compatibility_Ideographs:[313,4,1,""],CJK_Radicals:[313,4,1,""],Hangul_Jamo:[313,4,1,""],Hangul_Syllables:[313,4,1,""],Katakana_Hangul_Halfwidth:[313,4,1,""],Phags_Pa:[313,4,1,""],Supplementary_Ideographic_Plane:[313,4,1,""],ranges:[313,4,1,""]},"nltk.toolbox":{StandardFormat:[314,2,1,""],ToolboxData:[314,2,1,""],ToolboxSettings:[314,2,1,""],add_blank_lines:[314,1,1,""],add_default_fields:[314,1,1,""],demo:[314,1,1,""],remove_blanks:[314,1,1,""],sort_fields:[314,1,1,""],to_settings_string:[314,1,1,""],to_sfm_string:[314,1,1,""]},"nltk.toolbox.StandardFormat":{__init__:[314,3,1,""],close:[314,3,1,""],fields:[314,3,1,""],open:[314,3,1,""],open_string:[314,3,1,""],raw_fields:[314,3,1,""]},"nltk.toolbox.ToolboxData":{parse:[314,3,1,""]},"nltk.toolbox.ToolboxSettings":{__init__:[314,3,1,""],parse:[314,3,1,""]},"nltk.translate":{api:[316,0,0,"-"],bleu_score:[317,0,0,"-"],chrf_score:[318,0,0,"-"],gale_church:[319,0,0,"-"],gdfa:[320,0,0,"-"],gleu_score:[321,0,0,"-"],ibm1:[322,0,0,"-"],ibm2:[323,0,0,"-"],ibm3:[324,0,0,"-"],ibm4:[325,0,0,"-"],ibm5:[326,0,0,"-"],ibm_model:[327,0,0,"-"],meteor_score:[328,0,0,"-"],metrics:[329,0,0,"-"],nist_score:[330,0,0,"-"],phrase_based:[331,0,0,"-"],ribes_score:[332,0,0,"-"],stack_decoder:[333,0,0,"-"]},"nltk.translate.api":{AlignedSent:[316,2,1,""],Alignment:[316,2,1,""],PhraseTable:[316,2,1,""],PhraseTableEntry:[316,2,1,""]},"nltk.translate.api.AlignedSent":{__init__:[316,3,1,""],alignment:[316,5,1,""],invert:[316,3,1,""],mots:[316,5,1,""],words:[316,5,1,""]},"nltk.translate.api.Alignment":{__new__:[316,3,1,""],fromstring:[316,3,1,""],invert:[316,3,1,""],range:[316,3,1,""]},"nltk.translate.api.PhraseTable":{__init__:[316,3,1,""],add:[316,3,1,""],translations_for:[316,3,1,""]},"nltk.translate.api.PhraseTableEntry":{__new__:[316,3,1,""],log_prob:[316,4,1,""],trg_phrase:[316,4,1,""]},"nltk.translate.bleu_score":{SmoothingFunction:[317,2,1,""],brevity_penalty:[317,1,1,""],closest_ref_length:[317,1,1,""],corpus_bleu:[317,1,1,""],modified_precision:[317,1,1,""],sentence_bleu:[317,1,1,""]},"nltk.translate.bleu_score.SmoothingFunction":{__init__:[317,3,1,""],method0:[317,3,1,""],method1:[317,3,1,""],method2:[317,3,1,""],method3:[317,3,1,""],method4:[317,3,1,""],method5:[317,3,1,""],method6:[317,3,1,""],method7:[317,3,1,""]},"nltk.translate.chrf_score":{chrf_precision_recall_fscore_support:[318,1,1,""],corpus_chrf:[318,1,1,""],sentence_chrf:[318,1,1,""]},"nltk.translate.gale_church":{LanguageIndependent:[319,2,1,""],align_blocks:[319,1,1,""],align_log_prob:[319,1,1,""],align_texts:[319,1,1,""],erfcc:[319,1,1,""],norm_cdf:[319,1,1,""],norm_logsf:[319,1,1,""],parse_token_stream:[319,1,1,""],split_at:[319,1,1,""],trace:[319,1,1,""]},"nltk.translate.gale_church.LanguageIndependent":{AVERAGE_CHARACTERS:[319,4,1,""],PRIORS:[319,4,1,""],VARIANCE_CHARACTERS:[319,4,1,""]},"nltk.translate.gdfa":{grow_diag_final_and:[320,1,1,""]},"nltk.translate.gleu_score":{corpus_gleu:[321,1,1,""],sentence_gleu:[321,1,1,""]},"nltk.translate.ibm1":{IBMModel1:[322,2,1,""]},"nltk.translate.ibm1.IBMModel1":{__init__:[322,3,1,""],align:[322,3,1,""],align_all:[322,3,1,""],prob_alignment_point:[322,3,1,""],prob_all_alignments:[322,3,1,""],prob_t_a_given_s:[322,3,1,""],set_uniform_probabilities:[322,3,1,""],train:[322,3,1,""]},"nltk.translate.ibm2":{IBMModel2:[323,2,1,""],Model2Counts:[323,2,1,""]},"nltk.translate.ibm2.IBMModel2":{__init__:[323,3,1,""],align:[323,3,1,""],align_all:[323,3,1,""],maximize_alignment_probabilities:[323,3,1,""],prob_alignment_point:[323,3,1,""],prob_all_alignments:[323,3,1,""],prob_t_a_given_s:[323,3,1,""],set_uniform_probabilities:[323,3,1,""],train:[323,3,1,""]},"nltk.translate.ibm2.Model2Counts":{__init__:[323,3,1,""],update_alignment:[323,3,1,""],update_lexical_translation:[323,3,1,""]},"nltk.translate.ibm3":{IBMModel3:[324,2,1,""],Model3Counts:[324,2,1,""]},"nltk.translate.ibm3.IBMModel3":{__init__:[324,3,1,""],maximize_distortion_probabilities:[324,3,1,""],prob_t_a_given_s:[324,3,1,""],reset_probabilities:[324,3,1,""],set_uniform_probabilities:[324,3,1,""],train:[324,3,1,""]},"nltk.translate.ibm3.Model3Counts":{__init__:[324,3,1,""],update_distortion:[324,3,1,""]},"nltk.translate.ibm4":{IBMModel4:[325,2,1,""],Model4Counts:[325,2,1,""]},"nltk.translate.ibm4.IBMModel4":{__init__:[325,3,1,""],maximize_distortion_probabilities:[325,3,1,""],model4_prob_t_a_given_s:[325,3,1,""],prob_t_a_given_s:[325,3,1,""],reset_probabilities:[325,3,1,""],set_uniform_probabilities:[325,3,1,""],train:[325,3,1,""]},"nltk.translate.ibm4.Model4Counts":{__init__:[325,3,1,""],update_distortion:[325,3,1,""]},"nltk.translate.ibm5":{IBMModel5:[326,2,1,""],Model5Counts:[326,2,1,""],Slots:[326,2,1,""]},"nltk.translate.ibm5.IBMModel5":{MIN_SCORE_FACTOR:[326,4,1,""],__init__:[326,3,1,""],hillclimb:[326,3,1,""],maximize_vacancy_probabilities:[326,3,1,""],prob_t_a_given_s:[326,3,1,""],prune:[326,3,1,""],reset_probabilities:[326,3,1,""],sample:[326,3,1,""],set_uniform_probabilities:[326,3,1,""],train:[326,3,1,""]},"nltk.translate.ibm5.Model5Counts":{__init__:[326,3,1,""],update_vacancy:[326,3,1,""]},"nltk.translate.ibm5.Slots":{__init__:[326,3,1,""],occupy:[326,3,1,""],vacancies_at:[326,3,1,""]},"nltk.translate.ibm_model":{AlignmentInfo:[327,2,1,""],Counts:[327,2,1,""],IBMModel:[327,2,1,""],longest_target_sentence_length:[327,1,1,""]},"nltk.translate.ibm_model.AlignmentInfo":{__init__:[327,3,1,""],alignment:[327,4,1,""],center_of_cept:[327,3,1,""],cepts:[327,4,1,""],fertility_of_i:[327,3,1,""],is_head_word:[327,3,1,""],previous_cept:[327,3,1,""],previous_in_tablet:[327,3,1,""],score:[327,4,1,""],src_sentence:[327,4,1,""],trg_sentence:[327,4,1,""],zero_indexed_alignment:[327,3,1,""]},"nltk.translate.ibm_model.Counts":{__init__:[327,3,1,""],update_fertility:[327,3,1,""],update_lexical_translation:[327,3,1,""],update_null_generation:[327,3,1,""]},"nltk.translate.ibm_model.IBMModel":{MIN_PROB:[327,4,1,""],__init__:[327,3,1,""],best_model2_alignment:[327,3,1,""],hillclimb:[327,3,1,""],init_vocab:[327,3,1,""],maximize_fertility_probabilities:[327,3,1,""],maximize_lexical_translation_probabilities:[327,3,1,""],maximize_null_generation_probabilities:[327,3,1,""],neighboring:[327,3,1,""],prob_of_alignments:[327,3,1,""],prob_t_a_given_s:[327,3,1,""],reset_probabilities:[327,3,1,""],sample:[327,3,1,""],set_uniform_probabilities:[327,3,1,""]},"nltk.translate.meteor_score":{align_words:[328,1,1,""],exact_match:[328,1,1,""],meteor_score:[328,1,1,""],single_meteor_score:[328,1,1,""],stem_match:[328,1,1,""],wordnetsyn_match:[328,1,1,""]},"nltk.translate.metrics":{alignment_error_rate:[329,1,1,""]},"nltk.translate.nist_score":{corpus_nist:[330,1,1,""],nist_length_penalty:[330,1,1,""],sentence_nist:[330,1,1,""]},"nltk.translate.phrase_based":{extract:[331,1,1,""],phrase_extraction:[331,1,1,""]},"nltk.translate.ribes_score":{corpus_ribes:[332,1,1,""],find_increasing_sequences:[332,1,1,""],kendall_tau:[332,1,1,""],position_of_ngram:[332,1,1,""],sentence_ribes:[332,1,1,""],spearman_rho:[332,1,1,""],word_rank_alignment:[332,1,1,""]},"nltk.translate.stack_decoder":{StackDecoder:[333,2,1,""]},"nltk.translate.stack_decoder.StackDecoder":{__init__:[333,3,1,""],beam_threshold:[333,4,1,""],compute_future_scores:[333,3,1,""],distortion_factor:[333,5,1,""],distortion_score:[333,3,1,""],expansion_score:[333,3,1,""],find_all_src_phrases:[333,3,1,""],future_score:[333,3,1,""],stack_size:[333,4,1,""],translate:[333,3,1,""],valid_phrases:[333,3,1,""],word_penalty:[333,4,1,""]},"nltk.tree":{ImmutableMultiParentedTree:[334,2,1,""],ImmutableParentedTree:[334,2,1,""],ImmutableProbabilisticTree:[334,2,1,""],ImmutableTree:[334,2,1,""],MultiParentedTree:[334,2,1,""],ParentedTree:[334,2,1,""],ProbabilisticMixIn:[334,2,1,""],ProbabilisticTree:[334,2,1,""],Tree:[334,2,1,""],bracket_parse:[334,1,1,""],sinica_parse:[334,1,1,""]},"nltk.tree.ImmutableProbabilisticTree":{__init__:[334,3,1,""],convert:[334,3,1,""],copy:[334,3,1,""]},"nltk.tree.ImmutableTree":{__init__:[334,3,1,""],append:[334,3,1,""],extend:[334,3,1,""],pop:[334,3,1,""],remove:[334,3,1,""],reverse:[334,3,1,""],set_label:[334,3,1,""],sort:[334,3,1,""]},"nltk.tree.MultiParentedTree":{__init__:[334,3,1,""],left_siblings:[334,3,1,""],parent_indices:[334,3,1,""],parents:[334,3,1,""],right_siblings:[334,3,1,""],roots:[334,3,1,""],treepositions:[334,3,1,""]},"nltk.tree.ParentedTree":{__init__:[334,3,1,""],left_sibling:[334,3,1,""],parent:[334,3,1,""],parent_index:[334,3,1,""],right_sibling:[334,3,1,""],root:[334,3,1,""],treeposition:[334,3,1,""]},"nltk.tree.ProbabilisticMixIn":{__init__:[334,3,1,""],logprob:[334,3,1,""],prob:[334,3,1,""],set_logprob:[334,3,1,""],set_prob:[334,3,1,""]},"nltk.tree.ProbabilisticTree":{__init__:[334,3,1,""],convert:[334,3,1,""],copy:[334,3,1,""]},"nltk.tree.Tree":{__init__:[334,3,1,""],chomsky_normal_form:[334,3,1,""],collapse_unary:[334,3,1,""],convert:[334,3,1,""],copy:[334,3,1,""],draw:[334,3,1,""],flatten:[334,3,1,""],freeze:[334,3,1,""],fromlist:[334,3,1,""],fromstring:[334,3,1,""],height:[334,3,1,""],label:[334,3,1,""],leaf_treeposition:[334,3,1,""],leaves:[334,3,1,""],node:[334,5,1,""],pformat:[334,3,1,""],pformat_latex_qtree:[334,3,1,""],pos:[334,3,1,""],pprint:[334,3,1,""],pretty_print:[334,3,1,""],productions:[334,3,1,""],set_label:[334,3,1,""],subtrees:[334,3,1,""],treeposition_spanning_leaves:[334,3,1,""],treepositions:[334,3,1,""],un_chomsky_normal_form:[334,3,1,""]},"nltk.treeprettyprinter":{TreePrettyPrinter:[335,2,1,""]},"nltk.treeprettyprinter.TreePrettyPrinter":{__init__:[335,3,1,""],nodecoords:[335,3,1,""],svg:[335,3,1,""],text:[335,3,1,""]},"nltk.treetransforms":{chomsky_normal_form:[336,1,1,""],collapse_unary:[336,1,1,""],un_chomsky_normal_form:[336,1,1,""]},"nltk.twitter":{api:[338,0,0,"-"],common:[339,0,0,"-"],twitter_demo:[340,0,0,"-"],twitterclient:[341,0,0,"-"],util:[342,0,0,"-"]},"nltk.twitter.api":{BasicTweetHandler:[338,2,1,""],LocalTimezoneOffsetWithUTC:[338,2,1,""],TweetHandlerI:[338,2,1,""]},"nltk.twitter.api.BasicTweetHandler":{__init__:[338,3,1,""],counter:[338,4,1,""],do_continue:[338,3,1,""],do_stop:[338,4,1,""]},"nltk.twitter.api.LocalTimezoneOffsetWithUTC":{DSTOFFSET:[338,4,1,""],STDOFFSET:[338,4,1,""],utcoffset:[338,3,1,""]},"nltk.twitter.api.TweetHandlerI":{__init__:[338,3,1,""],check_date_limit:[338,3,1,""],handle:[338,3,1,""],on_finish:[338,3,1,""]},"nltk.twitter.common":{extract_fields:[339,1,1,""],get_header_field_list:[339,1,1,""],json2csv:[339,1,1,""],json2csv_entities:[339,1,1,""]},"nltk.twitter.twitter_demo":{ALL:[340,7,1,""],corpusreader_demo:[340,1,1,""],expand_tweetids_demo:[340,1,1,""],followtoscreen_demo:[340,1,1,""],limit_by_time_demo:[340,1,1,""],lookup_by_userid_demo:[340,1,1,""],sampletoscreen_demo:[340,1,1,""],search_demo:[340,1,1,""],setup:[340,1,1,""],streamtofile_demo:[340,1,1,""],tracktoscreen_demo:[340,1,1,""],tweets_by_user_demo:[340,1,1,""],twitterclass_demo:[340,1,1,""],verbose:[340,1,1,""],yesterday:[340,1,1,""]},"nltk.twitter.twitterclient":{Query:[341,2,1,""],Streamer:[341,2,1,""],TweetViewer:[341,2,1,""],TweetWriter:[341,2,1,""],Twitter:[341,2,1,""]},"nltk.twitter.twitterclient.Query":{__init__:[341,3,1,""],expand_tweetids:[341,3,1,""],register:[341,3,1,""],search_tweets:[341,3,1,""],user_info_from_id:[341,3,1,""],user_tweets:[341,3,1,""]},"nltk.twitter.twitterclient.Streamer":{__init__:[341,3,1,""],filter:[341,3,1,""],on_error:[341,3,1,""],on_success:[341,3,1,""],register:[341,3,1,""],sample:[341,3,1,""]},"nltk.twitter.twitterclient.TweetViewer":{handle:[341,3,1,""],on_finish:[341,3,1,""]},"nltk.twitter.twitterclient.TweetWriter":{__init__:[341,3,1,""],do_continue:[341,3,1,""],handle:[341,3,1,""],on_finish:[341,3,1,""],timestamped_file:[341,3,1,""]},"nltk.twitter.twitterclient.Twitter":{__init__:[341,3,1,""],tweets:[341,3,1,""]},"nltk.twitter.util":{Authenticate:[342,2,1,""],add_access_token:[342,1,1,""],credsfromfile:[342,1,1,""],guess_path:[342,1,1,""]},"nltk.twitter.util.Authenticate":{__init__:[342,3,1,""],load_creds:[342,3,1,""]},"nltk.util":{Index:[343,2,1,""],acyclic_branches_depth_first:[343,1,1,""],acyclic_breadth_first:[343,1,1,""],acyclic_depth_first:[343,1,1,""],acyclic_dic2tree:[343,1,1,""],bigrams:[343,1,1,""],binary_search_file:[343,1,1,""],breadth_first:[343,1,1,""],choose:[343,1,1,""],clean_html:[343,1,1,""],clean_url:[343,1,1,""],edge_closure:[343,1,1,""],edges2dot:[343,1,1,""],elementtree_indent:[343,1,1,""],everygrams:[343,1,1,""],filestring:[343,1,1,""],flatten:[343,1,1,""],guess_encoding:[343,1,1,""],in_idle:[343,1,1,""],invert_dict:[343,1,1,""],invert_graph:[343,1,1,""],ngrams:[343,1,1,""],pad_sequence:[343,1,1,""],pairwise:[343,1,1,""],parallelize_preprocess:[343,1,1,""],pr:[343,1,1,""],print_string:[343,1,1,""],re_show:[343,1,1,""],set_proxy:[343,1,1,""],skipgrams:[343,1,1,""],tokenwrap:[343,1,1,""],transitive_closure:[343,1,1,""],trigrams:[343,1,1,""],unique_list:[343,1,1,""],unweighted_minimum_spanning_dict:[343,1,1,""],unweighted_minimum_spanning_digraph:[343,1,1,""],unweighted_minimum_spanning_tree:[343,1,1,""]},"nltk.util.Index":{__init__:[343,3,1,""]},"nltk.wsd":{lesk:[344,1,1,""]},nltk:{app:[1,0,0,"-"],book:[11,0,0,"-"],ccg:[12,0,0,"-"],chat:[18,0,0,"-"],chunk:[25,0,0,"-"],classify:[30,0,0,"-"],cluster:[46,0,0,"-"],collections:[52,0,0,"-"],collocations:[53,0,0,"-"],compat:[54,0,0,"-"],corpus:[55,0,0,"-"],data:[109,0,0,"-"],decorators:[110,0,0,"-"],demo:[0,1,1,""],downloader:[111,0,0,"-"],draw:[112,0,0,"-"],featstruct:[118,0,0,"-"],grammar:[119,0,0,"-"],help:[120,0,0,"-"],inference:[121,0,0,"-"],internals:[129,0,0,"-"],jsontags:[130,0,0,"-"],lazyimport:[131,0,0,"-"],lm:[132,0,0,"-"],metrics:[140,0,0,"-"],misc:[150,0,0,"-"],parse:[156,0,0,"-"],probability:[176,0,0,"-"],sem:[177,0,0,"-"],sentiment:[192,0,0,"-"],stem:[196,0,0,"-"],tag:[209,0,0,"-"],tbl:[223,0,0,"-"],test:[229,0,0,"-"],text:[293,0,0,"-"],tgrep:[294,0,0,"-"],tokenize:[295,0,0,"-"],toolbox:[314,0,0,"-"],translate:[315,0,0,"-"],tree:[334,0,0,"-"],treeprettyprinter:[335,0,0,"-"],treetransforms:[336,0,0,"-"],twitter:[337,0,0,"-"],util:[343,0,0,"-"],wsd:[344,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","function","Python function"],"2":["py","class","Python class"],"3":["py","method","Python method"],"4":["py","attribute","Python attribute"],"5":["py","property","Python property"],"6":["py","exception","Python exception"],"7":["py","data","Python data"]},objtypes:{"0":"py:module","1":"py:function","2":"py:class","3":"py:method","4":"py:attribute","5":"py:property","6":"py:exception","7":"py:data"},terms:{"0":[14,25,28,29,30,32,33,35,36,39,46,48,52,57,66,67,71,74,75,76,77,78,79,81,82,83,84,89,90,93,98,102,104,105,109,111,115,118,119,124,126,127,128,129,132,134,135,137,139,141,142,143,145,147,148,149,151,159,160,161,162,163,164,167,168,170,171,173,174,175,176,177,180,183,189,191,193,195,204,206,209,211,212,213,214,218,219,224,226,227,228,293,294,295,298,299,301,302,304,310,312,313,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,341,343,347,349],"00":148,"000":[25,94,145,148,212,349],"00001":163,"000010000100":148,"000100000010":148,"001":[212,227,299],"006060":116,"0099":319,"01":[57,71,72,93,105,119,160,343],"0100":148,"011":[148,319],"02":[57,71,93,105,160,343,344],"03":[57,93,105,332,336],"033":[325,326],"0370":317,"04":[57,81,89,93,94,105],"05":[32,160],"054":324,"06":[48,50,57,67,105,343],"061":322,"072":322,"07692":317,"08":[57,71,93],"08144":321,"08144v2":321,"089":319,"0909":321,"0a0":349,"0a1":349,"0a2":349,"0a4":349,"0b":349,"0b1":349,"0b2":349,"0b9":349,"0th":332,"1":[23,28,29,33,36,46,48,49,50,51,52,53,57,61,66,67,71,74,77,78,79,83,89,90,98,100,101,102,103,104,105,107,109,110,111,115,116,117,118,119,128,129,131,132,133,134,135,137,139,141,142,143,145,147,148,149,159,160,161,162,163,164,166,167,168,173,174,175,176,179,183,184,189,195,200,201,202,206,212,213,214,215,219,221,224,226,227,228,259,284,294,298,301,302,305,310,311,312,313,316,317,318,319,320,321,322,323,324,325,326,327,329,330,331,332,333,334,335,336,338,341,343,344,349],"10":[28,32,33,35,57,71,81,88,94,98,103,104,111,116,128,129,145,160,161,167,176,181,189,194,212,214,283,301,309,310,312,313,316,319,320,331,332,335,340,349],"100":[32,35,52,102,107,145,148,194,212,293,304,341],"1000":[38,43,57,71,194,221,224],"10000":194,"100000010000":148,"10020":[57,71],"1004":319,"101":304,"1011":[57,71],"1017":[57,104],"1019":[57,71],"102":304,"1024":39,"1029_paper":[57,83],"103":[304,313],"1031":221,"104":[94,313],"1040":[282,317,318],"105":304,"1054":336,"106":304,"107":[57,71,299,307],"109":[57,71,184,187,313],"1092":332,"10k":349,"10th":318,"11":[57,89,104,128,163,167,299,310,313,316,317,320,349],"110":[148,304,313],"1100000001":148,"1100010000":148,"1100100000":148,"1104":216,"111":[148,160,304],"112":304,"113":[304,322],"11370":[57,71],"1181":343,"11829":[57,71],"119":313,"11904":313,"119400":66,"12":[89,111,128,163,212,216,295,304,312,313,316,317,319,320,327,338,341,349],"120":[304,313],"1206":321,"121":304,"122":[304,313],"1221":[57,71],"123":313,"12393":[57,71],"124":[311,313],"125":[93,145,304],"1256":201,"126":304,"1269":212,"127069":66,"1289189":330,"1289273":330,"129":304,"13":[128,129,295,299,304,307,312,313,316,317,320],"130":[98,203,206,304],"131":313,"131072":313,"132":[212,313],"134":304,"135":304,"13572":[57,71],"136":[299,307],"1368":179,"137":[203,206],"1376":212,"14":[72,81,88,128,195,203,206,301,304,320,330],"141":[304,335],"14159":129,"142":304,"143":228,"14620633":71,"14642":[57,71],"1468":318,"14733":[57,71],"14743":[57,71],"14744":[57,71],"14751":349,"14894":[57,71],"149":335,"14920":[57,71],"1494":[57,71],"14th":[81,88],"15":[57,72,89,116,128,145,149,163,195,199,304,317,320,341,343],"15082":[57,71],"15384615384615385":132,"157":335,"1581":274,"1597":251,"16":[57,98,101,115,128,163,199,212,304,313,318,320,335],"160":98,"1609":321,"161":212,"16384":102,"1676":[57,71],"16khz":[57,98],"17":[128,198,212,295,304,312,313,317,320],"1740":[57,105],"177":148,"1775":212,"1793":257,"18":[57,67,71,88,105,128,211,212,295,312,313,320],"1829":283,"185":[166,299,307],"1855":212,"189":[57,71],"19":[128,148,199,212,312,320,322,323,324,325,326,327],"1904":[57,71,299,307],"1909":257,"1910":22,"1926":301,"1954":141,"1955":141,"1960":141,"196607":313,"1968":141,"1970":162,"1972":299,"1976":299,"1980":[141,145,203,206],"1982":141,"1984":[299,307],"1985":145,"1986":344,"1988":141,"1989":[145,214],"1990":[53,57,103,145,202],"1993":[319,322,323,324,325,326,327],"1994":[86,146],"1995":[133,135,137,176,211],"1996":[143,169],"1998":[57,66,72,77,103,131,343],"1999":[72,94,131,148],"1a":206,"1b":206,"1e":[48,50,149,318,327],"1gram_1":143,"1gram_j":143,"1h15m":349,"1rc1":349,"1rc2":349,"1rc3":349,"1rc4":349,"1st":[145,318],"2":[14,28,33,46,48,49,52,53,57,66,67,71,74,78,81,83,89,90,92,94,98,101,102,103,105,107,109,110,117,118,128,129,132,133,134,138,139,143,145,148,151,153,159,160,161,162,163,166,167,168,170,171,173,174,175,176,179,183,194,195,201,202,206,212,213,218,219,221,224,226,227,228,285,293,294,298,301,302,304,310,311,312,313,316,317,318,319,320,321,322,323,324,325,326,327,329,330,331,332,333,334,335,336,341,342,343,347,349],"20":[57,71,111,117,128,145,155,160,162,164,214,293,310,312,313,319,320,338,340],"200":[57,71,212,340,341],"2000":[29,94,341,345],"20000410_nyt":[57,71],"2001":[57,71,103,299,349],"2002":[142,148,189,214,302,317,330],"2003":336,"2004":[57,62,81,89,94,317],"2005":[62,81,88,131,167,185,201,320],"2006":[57,67,103,145,302,335],"2007":[141,328],"2008":[67,88,89],"2009":[0,148,299,307,347],"201":212,"2010":[57,71,322,323,324,325,326,327,332,333],"2011":130,"2012":304,"2013":[212,317],"2014":[57,83,195,311,317],"2014a":[57,104],"2014b":[57,104],"2015":[57,104,318,338,341],"2016":[309,318,321],"2017":[198,200,299],"2019":199,"2027":[57,104],"203":[299,307],"2039":[57,104],"205":332,"2070":[57,71],"2086":216,"21":[160,312,313,316,320],"210":148,"21578":349,"217":176,"22":[67,81,88,212,304,320],"227":[57,71],"2271":[57,71],"22nd":[67,88],"23":[57,62,71,165,227,295,304,312,313,320],"233":[57,71],"2341":318,"236":332,"237":176,"24":[57,72,89,132,160,202,211,212,295,304,312,313,320],"2417":212,"2450142":212,"25":[81,93,145,293,304,312,317,320,332],"250":349,"2507":274,"2554":[57,60],"256":[57,71,227],"26":[57,71,212,295,304,312,313,320],"262":[57,71],"263":[322,323,324,325,326,327],"2635971381157267":317,"2656":212,"27":[57,71,90,183,212,295,304,312,313,320,321],"270":349,"274":[57,71],"2770":[57,84],"28":[148,317,320],"280":349,"2857":317,"29":[72,163,198,304,312],"292481250360578":132,"293":195,"2935416":71,"2a":206,"2b":206,"2d":[57,105],"2f":148,"2gram_1":143,"2gram_k":143,"2nd":[94,176],"3":[0,28,29,30,33,42,46,52,53,57,67,69,71,72,78,82,83,87,89,90,98,103,105,107,109,115,117,128,129,132,133,134,139,141,143,144,145,148,159,160,161,163,167,173,174,176,181,183,186,193,195,198,201,202,203,206,212,218,219,221,224,226,228,276,286,293,295,297,298,301,302,303,304,306,308,312,313,316,317,318,319,320,321,322,323,324,325,326,327,328,330,331,332,333,334,338,339,343,347,348,349,351],"30":[57,71,111,123,124,148,295,302,304,312,313,338,341],"300":[25,212,224,349],"3002":179,"305":212,"306":226,"308":[299,307],"31":[160,173,212,295,304,309,312,313],"311":[299,307,322,323,324,325,326,327],"3128":[343,346],"316":[299,307],"318728":344,"32":[212,302,304,312],"3238":[57,71],"33":[212,304,317],"3330":318,"3333":[57,105],"3346":317,"34":[148,304,313],"340":349,"34200":338,"343":107,"35":[304,313],"352962":[57,71],"353":148,"354":145,"359":145,"3597":332,"36":[148,212,295,304,312,313],"3600":111,"37":[57,103,211,304,312,313],"3709":330,"371":226,"375":148,"38":[295,304,312,313],"382":332,"39":66,"3910":318,"3920":317,"397":212,"3__20000410_nyt":[57,71],"3b":206,"3class":220,"3rd":115,"4":[32,33,46,53,57,67,71,83,90,94,98,103,105,107,117,128,132,134,135,143,145,160,161,167,173,174,176,183,188,200,204,206,212,217,224,287,295,304,312,313,316,317,319,320,321,322,323,324,325,326,327,328,330,331,332,333,334,338,341,343,349],"40":[160,312,338,341,349],"400":148,"4000":310,"406":[145,304],"409":299,"4118":317,"4135":317,"414":145,"414213562373095":117,"42":[129,146,160,293,304],"420":[340,341],"42191":313,"423":176,"42960":86,"43":[304,313],"43072":313,"43135":313,"4352":313,"436":299,"4393":321,"43996":212,"44":[295,299,304,312,313],"44032":313,"44159544":212,"444":115,"4489":317,"449489742783178":132,"45":[57,71,111,126,295,312,313],"46":[160,304,312,313],"4600addf38b8":137,"4607":313,"4619":330,"467":145,"46742":86,"47":[212,304,312,313],"4750":212,"48":[295,312,313,349],"485":302,"49":[295,313],"4905":317,"5":[0,25,29,33,36,52,53,57,71,77,83,89,90,94,98,104,105,107,111,115,128,132,143,145,147,148,152,159,160,162,163,167,173,174,176,189,195,206,209,212,214,217,218,288,295,302,311,312,313,316,317,319,320,322,323,324,325,326,327,328,330,331,332,333,334,335,340,343,349],"50":[57,71,146,148,155,159,162,304,325,347],"500":[57,71,124,160,209,219,224],"5000":[176,194],"5045":317,"51":[142,212,295,304,312,313],"512":129,"52":[142,295,304,312,313],"52266":115,"525":[302,311],"53":312,"54":[142,313,316],"55":[295,304,312,313],"55215":313,"555":160,"557":198,"56":[202,295,304,312,313],"565":94,"5673":321,"57":[304,312],"5714":317,"573":198,"58":[212,295,304,312,313],"5882352941176471":317,"59":[295,304,312,313],"591":332,"5920":317,"59699508":[57,71],"5th":344,"6":[0,52,57,66,71,89,98,105,128,129,148,160,163,173,174,181,206,212,218,312,313,316,317,318,319,320,325,326,331,332,333,343,347,348,349],"60":[126,160,201,312,313],"600":[209,311],"603":212,"60375":[57,101],"6043":[57,71],"61":[163,202,304,313],"6144":321,"62":[304,312],"6223":317,"63":[212,304,312],"6349":318,"63744":313,"639":[42,57,69,82,105,209],"64":[212,295,312,313,348],"64255":313,"65":71,"65072":313,"65103":313,"65381":313,"65500":313,"66":[295,313],"6617":318,"665":[57,71,145],"6666666666666667":329,"67":[212,304,313],"68":[304,312,313],"6830":66,"69":[212,312],"692":[57,71],"7":[33,52,57,71,72,98,105,128,129,160,166,173,206,209,212,218,228,302,312,313,316,317,319,320,326,327,331,332,335,348,349],"70":[32,304,312,334,343],"700":[57,71,212],"700439718141092":132,"705":317,"71":[304,312],"713":228,"72":[152,304,312,313],"722":145,"73":[57,71,295,313],"733":195,"7398":328,"74":[195,336],"75":[28,132,135,137,176,304,312,313,324],"76":[304,312,313],"77":[304,312],"78":[57,71,304,312],"783":145,"785fb79":141,"79":[293,336],"790":145,"7995322418977615":141,"7b1":349,"8":[39,52,57,93,98,101,109,111,128,145,160,163,167,173,189,194,206,214,218,220,224,309,311,312,314,316,317,319,320,331,332,333,340,348,349],"80":[145,148,293,304],"8000":10,"805":145,"81":[145,304,312],"8199999999999998":141,"82":[304,312],"822":145,"823":29,"83":[312,313],"832":145,"833":145,"839":66,"84":[145,312,313],"845":212,"85":[212,304],"853":[57,67,145],"856":145,"858":145,"86":304,"8668":317,"867":145,"869":145,"87":[94,304],"875":93,"88":[141,145,160,295,301,302,303,304,306,308,312,313],"880":145,"8859":[57,61,101,109,215],"8879":71,"888":115,"889":[145,322],"88n":312,"89":[313,319],"893":145,"894":72,"896":145,"9":[52,57,71,75,82,89,90,103,109,128,132,160,163,173,206,212,219,228,301,311,312,313,316,320,328,331,332,333,348,349],"90":313,"900":145,"9000":160,"905":145,"91":[304,313],"911":145,"913":145,"917":145,"92":304,"9200":317,"921":145,"922":145,"926":145,"93":[304,313],"933":145,"938":323,"93winkler_dist":145,"94":304,"943":145,"944":145,"9444":317,"947":145,"95":304,"951":137,"956":145,"961":145,"967":145,"970":145,"9780596516499":349,"9784873114705":349,"98":313,"982":145,"99":[212,313],"997":160,"999":333,"9_":118,"9b1":349,"9b2":349,"\u00aa":189,"\u00b2":189,"\u00b3":189,"\u00b5":189,"\u00b9":[117,189],"\u00ba":189,"\u00bc":189,"\u00bd":189,"\u00be":189,"\u00df":189,"\u00e0":189,"\u00e0\u1602\u1692":301,"\u00e1":189,"\u00e2":189,"\u00e2\ua527\u15f8":301,"\u00e2\uff4d":301,"\u00e3":189,"\u00e3\u1538\u03f3":301,"\u00e4":[117,189,301],"\u00e5":189,"\u00e6":[117,189],"\u00e6lfric":107,"\u00e6thelr":107,"\u00e7":[117,189],"\u00e8":[117,189,301],"\u00e8\u0433\u0c8b\u0c99":301,"\u00e9":189,"\u00e9\u03e5":301,"\u00e9\ua35f":301,"\u00ea":189,"\u00ea\u1f10":301,"\u00eb":[117,189],"\u00eb\ua395\uaa02\u048d\ua891\u18e9":301,"\u00ec":[117,189],"\u00ed":[117,189,301],"\u00ed\u11d0\u037c\ua38d\u1384\u043e\ua6c1\u026b\u16db\u125c\u1331\u1e07\ua233":301,"\u00ee":189,"\u00ee\u1214":301,"\u00ef":189,"\u00ef\u06af\uab58\u0f5e":301,"\u00f0":189,"\u00f0\u0287\ua5fd\u00f8\u03b9\u1286":301,"\u00f1":189,"\u00f1\u11d2\uabcb\u024f\u06cb\u012b":301,"\u00f2":[189,301],"\u00f3":[189,301],"\u00f4":[189,301],"\u00f5":189,"\u00f6":189,"\u00f8":[117,189,301],"\u00f9":[117,189],"\u00f9\u0ba3":301,"\u00fa":[117,189],"\u00fb":189,"\u00fb\u128a\u0da4\uab81\u1600":301,"\u00fc":[189,301],"\u00fd":[189,301],"\u00fe":[113,117,189],"\u00ff":189,"\u00ff\ua16d\uab3b":301,"\u0109":301,"\u0109\ua283\ua6e0":301,"\u010b":301,"\u010b\u03e7\ua25d":301,"\u010d":301,"\u0117\uabbf\u133f\u17af\u1e69":301,"\u0119\u1d2a\uaa89\ua07c\u1d36":301,"\u011b\uaa1f":301,"\u011d":301,"\u011f":301,"\u0121":301,"\u0123\u0373\u04e7\u1918\u1fb3\u0565\u0442":301,"\u0127\u2c79":301,"\u0129":301,"\u012d\u0d39\uaae0":301,"\u012f\u057e\ua414\ua990":301,"\u013a":301,"\u013e":301,"\u013e\u18a4\u0628":301,"\u014b":301,"\u014d\uaa9f":301,"\u0151\u158e":301,"\u0153":189,"\u0153\u30d5\uab8d\u077f\u11a7":301,"\u0155":301,"\u0159":301,"\u0159\u0275\u2d8a\ua886":301,"\u015b\u06e6":301,"\u015d":301,"\u015d\ua194\u3033":301,"\u0161":189,"\u0161\u0268\u30df\u026a":301,"\u0165\u1c14":301,"\u0167\uff70\u0aa7":301,"\u0169\u1c15":301,"\u016b":301,"\u016f\u14c8\ua141\u047f\u1183":301,"\u0171":301,"\u0173":301,"\u0175":301,"\u0177\u018c\u0521":301,"\u017c\u02a3\ua591\u2c71\ua1b7":301,"\u017c\uffc3":301,"\u017e":301,"\u017e\u06a1":301,"\u017f":301,"\u0180\ua525\ua511":301,"\u0185\u0a20\u11d4\u1498\ua69c":301,"\u0188\u0219\u2c97\ua5bc\u05d3\ufcd1\ua8ae\u151b\u1bbd":301,"\u018c":301,"\u0192":189,"\u0195":301,"\u0195\u047d":301,"\u019a":301,"\u019e":301,"\u01a1\u1657\u1f90":301,"\u01a1\u1d11":301,"\u01a3":301,"\u01a3\u08a5":301,"\u01ad\ufb4f\uaadc\ua80d":301,"\u01b0\u07a0":301,"\u01b6":301,"\u01b6\u146e":301,"\u01b9\u306f\ufb9e":301,"\u01bb\u30ee":301,"\u01bd":301,"\u01c1\ua4d3\u0d2d\u1ef5\u00fa":301,"\u01c6":301,"\u01c6\u0da2\ua669":301,"\u01c9\u0db7":301,"\u01d2\u114b":301,"\u01d6":301,"\u01dc":301,"\u01dc\u16a0\u1915\u2107\u31f7":301,"\u01dd\u1130\ua201":301,"\u01df":301,"\u01e1":301,"\u01e3\u2cdd":301,"\u01e3\ua814\ua2d5\u044e\ufc88\u01ad\u1f74\u0211\u0b83\u06c7":301,"\u01e7":301,"\u01e9":301,"\u01ed\u03b5":301,"\u01ef\u013a\u0986\u0236\ua0e4":301,"\u01ef\u12d9\uab9d":301,"\u01f3\u198c":301,"\u01f5\ua0bd\ufb23\u0119":301,"\u01fd":301,"\u01ff\u1a3a":301,"\u0205\u1fa3\uff6a\ufc5a":301,"\u0207":301,"\u020d\ua3ee\u021d\ua9a1\ua139\ufc55\u0441\u10e9\u1d79\ua603\u165e\u03cc\u0cde":301,"\u0213":301,"\u0225\ufb48\ua0fc\ufd12\u1965\u1f51":301,"\u0227\u0932\ua3cc\ua8a8\u0576\u12d2\u1525":301,"\u022b\u03b8\ua723\ufb24":301,"\u022d\u1d4c\u068e\u078c":301,"\u022f":301,"\u0231\u164d":301,"\u0231\u30eb":301,"\u0233\ua1c0":301,"\u0234":301,"\u023c\u212c\ua0cc\ua65b":301,"\u0240\u0d9b\ua51a\u0771\u16b7\u07d1\ua97c\u0161\u0ba8\u14be\ua0a0\u11ef\u15a4\u076f\ua923":301,"\u0242\u044f":301,"\u0247\ua5f0":301,"\u024d\ua434\ua98a":301,"\u0250":301,"\u0251\u0d92\u0c06":301,"\u0253":301,"\u0255":301,"\u0256\ua153":301,"\u0259\u12a6":301,"\u0259\ufc7e":301,"\u025b\u15fa\ufcfa":301,"\u025f\ua717\u31f0\ufb90\u2d86\u03c0\ua56f":301,"\u0260\u1843\u1da5\ua2dc":301,"\u0262":301,"\u0264\u1820\u2d20\u16aa\u3041\ua15e\u090e":301,"\u0265\u0cb0":301,"\u026f\u0587\ua325\ufd31":301,"\u0271":301,"\u0271\u1a12":301,"\u0278\uff53\u111e\ua6ab\uaa6e\u212d\u1674\u0d14":301,"\u027a":301,"\u027c\ua3d1\u1d9a\ufd3a\u024d":301,"\u027d":301,"\u027d\u19bc\ua68f\ua872":301,"\u027f\u30b6":301,"\u0280":301,"\u0282\u0203\u1e09":301,"\u0283":301,"\u0284\u1b91":301,"\u0285":301,"\u0288":301,"\u028a\u14e3":301,"\u0290\u02bd":301,"\u0292\u1527":301,"\u0292\uaa73":301,"\u0293\ufca8t\u162b\u1ee9":301,"\u0295":301,"\u0297\u2c65\u0559\u1751\u1969":301,"\u029c\u02a2\u1e9d\u1f60":301,"\u029d":301,"\u029e":301,"\u02a0\u10fd\u0215":301,"\u02a1\u2c78\ua7fe\uaaa2\uff44\ua295\u1c18\u11ec\ua26a\u135a\u0260\u044b\u02e3\ufb3b\u0a25":301,"\u02a5\u1fc7\u01d8":301,"\u02a6":301,"\u02a7":301,"\u02ae":301,"\u02af":301,"\u02b0\u1f57":301,"\u02b2\uff59\u12e8":301,"\u02b5":301,"\u02b6":301,"\u02b7\ua757\ua13a":301,"\u02b8\u1416\u130e\u1c62\ua410\u0c3d":301,"\u02bc\ua8ac\u15cb\u3142\ua37b\ua7f7\u2c3a\ud7e6\ufe9b\u144d\u1a11":301,"\u02be":301,"\u02bf":301,"\u02c0\u16f6":301,"\u02c6":189,"\u02c8\u00e6":301,"\u02c8\u00e6m\u0259z\u0252n":301,"\u02cb\u1635\u0582\u2cb5\u0d2b":301,"\u02cf":301,"\u02d1":301,"\u02ee":301,"\u0377\u1654":301,"\u037a\ua4f1\u1132\ua1e2":301,"\u037b":301,"\u037b\u1509\ua08b\ua93d\u057d":301,"\u037d\u0c19\ua230":301,"\u03ac\u1e91\ua1ed":301,"\u03ad":301,"\u03b1":189,"\u03b1\u056f\u1469\u1534\ua6c5":301,"\u03b2":[189,318],"\u03b2\ua6e5":301,"\u03b3":189,"\u03b4":189,"\u03b4j":325,"\u03b4v":326,"\u03b5":189,"\u03b5\u318c\uff89\u0453\u0240":301,"\u03b6":189,"\u03b7":[189,301],"\u03b7\u0e17\u1ead\u0505\u1840\u1887\ua408\u0620\u1ef5t\u1992\u04bd":301,"\u03b8":[142,189],"\u03b8\u097d\u1119":301,"\u03b8in":142,"\u03b9":189,"\u03b9\u1699\u18e3\u147a\u2c5e":301,"\u03ba":189,"\u03ba\u2cc1\u0465\u134c\u0b93\u2c8f\u095d\u3106":301,"\u03ba\u2dbe":301,"\u03bb":189,"\u03bb\u0461\u0f53":301,"\u03bc":189,"\u03bc\u1a51":301,"\u03bd":189,"\u03be":189,"\u03bf":189,"\u03bf\u11d9":301,"\u03c0":189,"\u03c0\u2d5d\u1445\u01b4":301,"\u03c1":189,"\u03c2":[189,301],"\u03c3":189,"\u03c4":189,"\u03c4\u0e9c\ua0cf\u2c5c\u16a5\ua445\uab45\ufeef\u09b6\ua1b5\u317c\ua46a":301,"\u03c5":[189,301],"\u03c6":189,"\u03c6\ufd52\ua576":301,"\u03c7":189,"\u03c7\ua2fe":301,"\u03c8":189,"\u03c9":189,"\u03cb\u134b\u1d39\u1e45":301,"\u03cc":301,"\u03d0":301,"\u03d1":189,"\u03d2":189,"\u03d2\ufec0\ua3b0":301,"\u03d5\u0446":301,"\u03d6":189,"\u03d7\u1f82":301,"\u03d9":301,"\u03d9\ua614\u1697\u0586":301,"\u03db\ua976":301,"\u03df":301,"\u03e1":301,"\u03e3\ud7ea":301,"\u03e7":301,"\u03e9":301,"\u03ed\u2c43\ufdbf":301,"\u03ef\u1139\u131a":301,"\u03f1\u1505\u0643\u1a2d\ua317\u08a3":301,"\u03fc\ua4ed\ua206\u2d1d\u095c\ufcac\u0525":301,"\u0430":301,"\u0431\u0443\u043c\u0430\u0436\u043a\u0443":209,"\u0432\u0f69":301,"\u0433\u1825":301,"\u0434\u0432\u0430\u0436\u0434\u044b":209,"\u0435\u1762":301,"\u0436\u1edf":301,"\u0436\ua5a7\ua366\u18cb":301,"\u0438":209,"\u0438\u043b\u044c\u044f":209,"\u0439":301,"\u043a":301,"\u043a\u084d\u0b1f":301,"\u043c":301,"\u043e\u0442\u043e\u0440\u043e\u043f\u0435\u043b":209,"\u043f\u0435\u0440\u0435\u0447\u0438\u0442\u0430\u043b":209,"\u0440":301,"\u0440\uaa46":301,"\u0441\ua5b1":301,"\u0448":301,"\u0449":301,"\u044a\u16a4":301,"\u044d":301,"\u044e":301,"\u0450":301,"\u0450\u0d0c\ua1ba\uffbc\uff99\ufb40\ua362\u15d3":301,"\u0451":301,"\u0451\u052f":301,"\u0452":301,"\u0452\u07f5\u05e3\u0da3\uab79":301,"\u0453\u09a7\u03ae\u10df\u318a":301,"\u0454\u0d21\uab83\u011d\u0163":301,"\u0456\ufb58\u0148":301,"\u0457":301,"\u0458\u313c":301,"\u0459":301,"\u045a\u1f75\u1fa3":301,"\u045a\u2d0a":301,"\u045b\u1189\ufef6":301,"\u045e\u079e\u1907\u1d7c":301,"\u045f\u2cc7\u1412\u1b86\u148f\u1f01":301,"\u046b\u1287\ua935":301,"\u046f\u0f6c":301,"\u0471":301,"\u0475":301,"\u0479\u1664\u0e28":301,"\u048b":301,"\u048f\u2d4e":301,"\u0491":301,"\u0499\u17b0\u1f7a\ufcc3":301,"\u049b":301,"\u04a1":301,"\u04a1\u04b3\ua442\u119f":301,"\u04a5":301,"\u04a7":301,"\u04a7\u11d5\ua46e\u11ff":301,"\u04a9":301,"\u04ad":301,"\u04af\ua2b6":301,"\u04af\ua6af\u2cdf\u1f43\ua531\u1a3c\u1d76\u02e1\u12a8\u04e3\ufb6d\u1fb4\ufc3c\ua971\u08ad\u1270\u2d3a\uaa96\ua17f":301,"\u04b3":301,"\u04b5":301,"\u04b7":301,"\u04b7\u129b\ufdb4":301,"\u04bb":301,"\u04bb\uaa42\u0961":301,"\u04bd":301,"\u04bf\ua4f0\ufc0f":301,"\u04c2":301,"\u04c2\u196b\ua2e1":301,"\u04c6":301,"\u04c8\u1120":301,"\u04ca":301,"\u04d5\ufcea":301,"\u04db":301,"\u04df":301,"\u04e1\ua98b":301,"\u04e5\u1da7\u1748":301,"\u04e7":301,"\u04eb\u148e":301,"\u04ef\u1d78\u0a87":301,"\u04f3\u2d42\u147c\u2dc1\ua90b\u1fa1":301,"\u04f5":301,"\u04f9":301,"\u04fd\ufceb":301,"\u0501":301,"\u0503\uabae":301,"\u0505":301,"\u0507":301,"\u050b":301,"\u050d\u14b7\u1856":301,"\u050f\u1982":301,"\u0513":301,"\u0515\u1116\u1326\u3129\ua111":301,"\u0515\u1554\uff87":301,"\u0519\ud7d4\ua480\uff80\ufd15":301,"\u051f":301,"\u051f\u18df\u1960\u0840\u172b":301,"\u0521":301,"\u0523":301,"\u0529":301,"\u052b\u154e":301,"\u052d\u0672\u31a2\ua0bc\uab5f":301,"\u0562\u1853\uaabb\u1212":301,"\u0563\u151a\u151f\u1f06\ua91f\u1e11\ufe70":301,"\u0564":301,"\u0565\u18e0":301,"\u0566\u16da\ua9a6":301,"\u0567":301,"\u0568\ufd59\u186b":301,"\u056b":301,"\u056c":301,"\u0570":301,"\u0574\uab5a":301,"\u0575\u1564\u1e63":301,"\u0579\u03c9\ua548\ufc66\uff76":301,"\u057a":301,"\u057a\u2c46":301,"\u057b":301,"\u057b\ufdb6":301,"\u057c\u1508\u1bd8":301,"\u057d\ua157\u1662\ua37e\ua4f8\ufc43\u0146":301,"\u057e":301,"\u057f":301,"\u0580\u1d16":301,"\u0581\u0b2b":301,"\u0583":301,"\u0584\ua848\ua853\u3144":301,"\u0585":301,"\u05d1\u0680":301,"\u05d5\u11f5":301,"\u05d8":301,"\u05dd":301,"\u05df":301,"\u05e2\u3067":301,"\u05e4":301,"\u05ea\ua2e4":301,"\u05f0":301,"\u0622\u11f3\u1639\u071e\u1a4e\u0e95\u079b\u3184\u1bbb":301,"\u0625":301,"\u0627\u0644\u0639\u0631\u0628\u064a":309,"\u0627\u0644\u0643\u0644\u0645\u0627\u062a":309,"\u062a\u0635\u0646\u064a\u0641":309,"\u062a\u0641\u0627\u0639\u064a\u0644":201,"\u062b":301,"\u062d\u1251":301,"\u0630":301,"\u0633\u062a\u0627\u0646\u0641\u0648\u0631\u062f":309,"\u0634":301,"\u0639\u0250":301,"\u063c":301,"\u063d\u314a\u123b\u0a99":301,"\u0640\u2caf\u0d16\u1d7d":301,"\u0641":301,"\u0644":309,"\u0644\u0644\u0643\u0644\u0645\u0627\u062a":309,"\u0646\u2cee\ua01e\u2c40":301,"\u0647\u0630\u0627":309,"\u0647\u0648":309,"\u0648":201,"\u064a\u0639\u0645\u0644":199,"\u066e\u0bb7\u1ec7":301,"\u0675\u1e89":301,"\u0679":301,"\u067c\u0199\u115f\u1d1b\ua54e\ua489\u1112":301,"\u0681\u129d\u1649":301,"\u0682\u1f96\u318d\ufcb1\ufcf7\u14c1\u1f97":301,"\u0683\u0f43\ua423\u1ebb\ua0d1\u1b19":301,"\u0687\u1420":301,"\u0689\u0272\ua5c7":301,"\u068a\u0eae\uaaa8\u03bb":301,"\u0694\ua176\ua190\ua594":301,"\u0695\u0149\ua3f1":301,"\u069f":301,"\u06a0":301,"\u06a2":301,"\u06a3":301,"\u06a4":301,"\u06a5\ua262\uff73\u0907\u0671\u0912\u11d1\u0a5b\u0f5f":301,"\u06a8\u01d4\u30f3":301,"\u06a9":301,"\u06aa":301,"\u06ab\ufc62":301,"\u06b4\u2dd2\uff7a\u1634\ua068\u0a1b\u1e8b\ua024":301,"\u06b6\u044f\u0c93\u19b3":301,"\u06b8\uab04\u1f72":301,"\u06b9\u1f41\u170f\ua6be\ua559\u0131\u1695":301,"\u06ba":301,"\u06be":301,"\u06c0":301,"\u06c3\u0f56\ua3f6\ua1e9":301,"\u06c4\u112d":301,"\u06c5\u02b3":301,"\u06c8":301,"\u06ca\u0266\ufcc5":301,"\u06cd":301,"\u06ce\u3084d\u0430":301,"\u06d0":301,"\u06d1\u0155\ua081":301,"\u06d2\u0632\u1f06":301,"\u06ee\u10d0\ua1f8\u30ce\ua269\u0111\u1585\uaa15\ufb1f\u18b1":301,"\u06fa\ufbac":301,"\u06fb\u0828\ufd7d\u14a9\u0e05":301,"\u06ff\u04cf\u2c89\ua244\u1bdb":301,"\u0715\ua0c1\uaa1d":301,"\u0718":301,"\u0719\u026f":301,"\u071f\ua15c\u1a4f":301,"\u0725\u0138\u11af\u03bd\u015f":301,"\u0727":301,"\u0728":301,"\u072a\ua13b\ufd57\u03eb":301,"\u072d\u311d":301,"\u074f\u2184":301,"\u0755":301,"\u075b\u19a9\u2d4c":301,"\u075c\u078b\ua1ca\u1e85":301,"\u075d":301,"\u075f\ufb4a\uabc2\u1198":301,"\u0762\ua61b\ua68d\u02ca\u14ad\u1108\u170c":301,"\u0766\u10dd\u188b":301,"\u0767\u1e35\u145b\u3076\u0e24":301,"\u076a":301,"\u076d\u0f49":301,"\u0770\u3163":301,"\u0772":301,"\u0774":301,"\u0775\u00ea\u01a8\u11b3\u1222\u15ea":301,"\u0777\u30cd":301,"\u077b":301,"\u077c\ua6c7\u0c1f\ua24c":301,"\u077d\ufcb0\u0568\u1673\u072b\u09aa\u03f8\u05e8\u309d\uff44\uffa9\ud7ed":301,"\u0781\u1a38\ua356\u14ddm":301,"\u0782\ud7b1\uaa99\u2131":301,"\u0784\u0aa5":301,"\u0785":301,"\u0789":301,"\u078e":301,"\u078f\ua970":301,"\u0791\u3122\u12be\u1e89\u0bb9\uff85\ua3d9\u1694\u11da":301,"\u0795\u1415\u0439\u0796":301,"\u0797\ua08e\u020b":301,"\u0799\u18a3\ua30b":301,"\u07a1":301,"\u07b1\u0754":301,"\u07ca":301,"\u07cb\u0765\u0163\ua2ee\uaa6a\u0582\u2cf3\u178c":301,"\u07d4":301,"\u07d7\ua441\ua098":301,"\u07d9\ua114\u09a1\u0c8c":301,"\u07da":301,"\u07dc\u1f32":301,"\u07e2\u2d0c\u120c":301,"\u07e4\u0473\u0576\u0688\u0cbd\u1619\u1895\u084c\u1c64":301,"\u07fa\u1bc6":301,"\u0801\u1e4d\u021f":301,"\u0803":301,"\u0806":301,"\u0807\ua29e":301,"\u080a\u0ca3\u174d\u1ff2\u3171":301,"\u080b\u210c":301,"\u080c\u051d":301,"\u080e\u101d":301,"\u0810\u2cbb":301,"\u0811\u00e3":301,"\u0815\u03dd\u172e":301,"\u0841\u15cf":301,"\u0843":301,"\u0844":301,"\u0845\ua5b3\u028c\u01a5":301,"\u0848":301,"\u084e":301,"\u0850\u028d\u0479\u072c\u125b":301,"\u0851\u07e0\u15c0":301,"\u0856\u2c36\uab49":301,"\u0857":301,"\u0858\u0d2f\ua89f\ufeac\u1bc0\ua65f\u1e27\u0527\u1144":301,"\u08a6\ua3be\ua0de\u122e":301,"\u08a8":301,"\u08a9\u06cc\u0783":301,"\u08aa\u1632":301,"\u08ae\ufc0d\u1bce\ufc41\u0e84":301,"\u08af\ufedf\u04d3\u1790\ua57a":301,"\u08b1\ua251\u04bf":301,"\u0904\ua470\u155a\u1b09\u30e7\u1192":301,"\u0905\u09ad\u19aa\ua156\ua58a":301,"\u0909":301,"\u090b\ufb02\u2c87":301,"\u0910k\u0714":301,"\u0915\ua66b\u160f\uab7f":301,"\u0916":301,"\u091f\ua44a":301,"\u0923":301,"\u0927\u30d3\ua3a0\u2c47\u010f\u18ef":301,"\u0929\uaa11":301,"\u092a":301,"\u092b\u1169\uab7b\u0c14":301,"\u092d":301,"\u092f\u1c13\u02ad\u138e":301,"\u0931\u1268":301,"\u0939\u2d39\ua0ce\ua042\ua1d7\u118c\u0467\u16ba":301,"\u0958":301,"\u0959\u118b\ua0b9\ua0fb":301,"\u095a":301,"\u095e":301,"\u0971":301,"\u0974\ua382\u1f72\u0b90\u2d85\ufcdb\uaa0d\u03df":301,"\u0975":301,"\u0976":301,"\u0977":301,"\u097a\u14f5\u2c8f":301,"\u097c\u1b07\uab90":301,"\u097e\u2d04\u1f78\u0d06\u143b\ua310\ufc45\u1f79\u0684":301,"\u097f":301,"\u0993\u1f75\u2c4f":301,"\u0996":301,"\u099a":301,"\u09a5":301,"\u09a6":301,"\u09ab\ufbd3\ua93f\ua72b":301,"\u09ac\ufcfe\ufc44\u1d41":301,"\u09b9\ufdad":301,"\u09bd\u09ae":301,"\u09df\u0f54\u063b":301,"\u0a05\ua040":301,"\u0a07":301,"\u0a09\u3043\u1078\u30aa":301,"\u0a0a\u2d10":301,"\u0a0f\u1e2d\u30db\u153f":301,"\u0a13":301,"\u0a15\ua4e6\u04c4\u0979":301,"\u0a16":301,"\u0a18\u107b\u141e\ua93a\u1cee\u0db6\u1c7b":301,"\u0a1a\u1953":301,"\u0a1c\u18b5":301,"\u0a22":301,"\u0a28\u113b":301,"\u0a2d\u1337":301,"\u0a2e":301,"\u0a30\u03c8":301,"\u0a33\u3110":301,"\u0a59":301,"\u0a8b\ua6d3":301,"\u0a8c":301,"\u0a91\ua0ab\ufdf3a":301,"\u0a95\u0b1b\ua9cf\u1916\ufe8f":301,"\u0a98\u16a6\u0dc2\ua182\u0180":301,"\u0a9b":301,"\u0a9c":301,"\u0a9d":301,"\u0a9e\ufb01\u0a36\u1781":301,"\u0aa0":301,"\u0aab\u14a6\ua25b\u14d9\u1eb3\u0c98\u12ba\u1c65\u056e":301,"\u0aaf\uffc2":301,"\u0ab3":301,"\u0ad0":301,"\u0ae0\u174ec\u0113\u0638\u122b":301,"\u0b06":301,"\u0b0a":301,"\u0b10\u044c\u2d58\u0249\u1442\u0475":301,"\u0b13":301,"\u0b16\ua397":301,"\u0b17":301,"\u0b1c":301,"\u0b1ds\u115a\u1a07":301,"\u0b25\ua916":301,"\u0b26\u1f76":301,"\u0b2c\u148d\u1d62":301,"\u0b2f":301,"\u0b33\u06b2\u0f57\u14b4\ua34a":301,"\u0b35\u02a9\u0753\u049d\u1be2":301,"\u0b5f":301,"\u0b85":[57,104,301],"\u0b86":[57,104],"\u0b87":[57,104],"\u0b88":[57,104],"\u0b88\ufcf0\uab30":301,"\u0b89":[57,104],"\u0b8a":301,"\u0b92":301,"\u0b94\uab51\u10f0\u2da4":301,"\u0b95\u1ecf":301,"\u0b9a\u1ef3":301,"\u0b9e\u013c\u3047\ua39b\u30c0":301,"\u0ba4\u1e0d\ua0ef\ufd02":301,"\u0baf\ud7f3":301,"\u0bb0":301,"\u0bb1":301,"\u0bb3\ufdc6\ua870\u15dc\u11eb":301,"\u0c07":301,"\u0c08\uffb6\ua755\u2095":301,"\u0c09\ua36f":301,"\u0c13\u0c26\u1575":301,"\u0c15\u1c1f\u0573\u154c":301,"\u0c17":301,"\u0c1b\u1865":301,"\u0c1c":301,"\u0c20\u142b\u1990":301,"\u0c22":301,"\u0c24\u1656":301,"\u0c28":301,"\u0c2d\u075a":301,"\u0c2e\ufce8\u1d95\ua659\ua1b9\u1679\u0454":301,"\u0c2f\ua9e7":301,"\u0c31\u0f4b":301,"\u0c34\u154f":301,"\u0c35\u3124":301,"\u0c61":301,"\u0c8e\u1edd":301,"\u0c90\u16c8":301,"\u0c92\u14e0":301,"\u0c94":301,"\u0c9e\u0cad\ufc15":301,"\u0c9f":301,"\u0ca4\u2d0f\ua40c":301,"\u0ca7\ua31f":301,"\u0caa\ua400\ua0a8":301,"\u0cae":301,"\u0cb1":301,"\u0cb3\u1027\u2d0a":301,"\u0cb8\u0525\ua529\ufd6a\ua371":301,"\u0cf1":301,"\u0d05\u30b5\ua74b\u0447\uab15\uabc1":301,"\u0d08\ua0f5\ufdc5":301,"\u0d10\u2ca1\ua06f\u1b88":301,"\u0d12":301,"\u0d13\u1f62":301,"\u0d15\u1ee5\u127c\u2c39\u020f\u01aa\u149e\ua3ed":301,"\u0d18\ua884\u1e9c\u170a\u1ebb\uaa7e":301,"\u0d1a\u1f25\u1fd1\u01f9\u03c6\u1107":301,"\u0d1f\u0137\ua544\u0438\u1794\u024b\ua1d1\u15eb":301,"\u0d20\ufb7d\u2c4c":301,"\u0d25\u16a7\u1b9c\u0b71\ua727\ua219\u159d\u0199\uabb7\u00f4\ufcd8\ufd29\u027b\u1502\u1254v\u2133\ua398":301,"\u0d28\u090f\u124b\ua5df\u1e37\u1512":301,"\u0d30":301,"\u0d31":301,"\u0d32\u0509":301,"\u0d36\u0237\ua568\ua3ea\u1e49":301,"\u0d37\u2d53\u0121":301,"\u0d61\u1830\u1e1b":301,"\u0d7d\u1e41\u3167\uffcb\ufe8b\ua667":301,"\u0d8b":301,"\u0d8f":301,"\u0d90\u1b0c\u31a6":301,"\u0d93\u1913":301,"\u0d96":301,"\u0d9d":301,"\u0d9e":301,"\u0da5\u2cdd":301,"\u0da8\u1e79\u1517\u0c1e\ua89b\u1548":301,"\u0dad":301,"\u0daf\ua5de":301,"\u0db1\ua5e8\u155b\u186f":301,"\u0db5":301,"\u0dba\ua0a4\u14cc":301,"\u0dbb":301,"\u0dc0\u2db2\u0572\ua518\ua613\ua1af":301,"\u0dc5\u08a2\u1e19\u1603\ufbdf\u0148\u1ec9\u1610\u09b8":301,"\u0dc6\ua025":301,"\u0e01":301,"\u0e02":301,"\u0e07\u1b08\u1eff\u01d4":301,"\u0e0d":301,"\u0e12\u1958\u027e\u1b05":301,"\u0e13\u12b5\ua311":301,"\u0e15":301,"\u0e16":301,"\u0e19":301,"\u0e1b\u0233\u18e8":301,"\u0e1d\ua436":301,"\u0e21\u07e7\u0dae\ua3d3":301,"\u0e23\ua09e\u1a46":301,"\u0e25":301,"\u0e29":301,"\u0e2c":301,"\u0e2d":301,"\u0e2e\u2dc6\ua575\u0773":301,"\u0e30\ua873\uabb2":301,"\u0e32\u15b3":301,"\u0e42\uffa7":301,"\u0e43":301,"\u0e44\u0918\ua1a9":301,"\u0e94\u3173\u17dc":301,"\u0e9b\uab86\u0445\u31b2":301,"\u0e9d":301,"\u0e9e\u01be":301,"\u0ea5":301,"\u0ea7\u00f6\u0847\ua73d\ua9e3":301,"\u0eaf\u2c57":301,"\u0eb3\u10e0\u0296\u0906\u1d18\ufc0c\u11cb":301,"\u0ec3\u02d0\u0989":301,"\u0edc\ufdb5":301,"\u0edf\u06a6\u0c1d\u0e8d\u30cf\u15df":301,"\u0f00\ua0e5":301,"\u0f40\u0a5c\ua894\u2090\u138a":301,"\u0f42":301,"\u0f46\u1111\ua571\ua79b\u0d1c":301,"\u0f4c\uff88":301,"\u0f4f":301,"\u0f50\u01d0":301,"\u0f52":301,"\u0f5a\u15dd":301,"\u0f5b\ufc40":301,"\u0f5c\ua0ad\u10e6\u138b":301,"\u0f5d\u30e1\uaaac":301,"\u0f62\uffa0\u01b9\ufecd":301,"\u0f63\ua333\u2135\u12f3\u1e2b\u317e":301,"\u0f68\u0209\u1562\u16e9\ua4e2":301,"\u0f6b":301,"\u0f88\u3049\u049d\u0e27\u0229":301,"\u0f89":301,"\u1000\ud7e8":301,"\u1001\u0a8a\ua52a\u158a":301,"\u1006\ufbe6\ufc00\u1105":301,"\u1007":301,"\u1009\u1919l\ua387":301,"\u100b\ua76d\u1ec1\u160e":301,"\u100c\u1013":301,"\u1010\uabdf\u129f\ua5f8":301,"\u1011\ua35b\ua3a1":301,"\u1016\uab20\u18ec\uffb7\u02c1\ua551":301,"\u101c\u18e6\u1626\u1edb\u0275":301,"\u101e":301,"\u101f\u2d1d\u2c55\u2cb7\u099f\ua342\ua5d0\ufd60":301,"\u1022":301,"\u1025\u1d9c\u02ec\u0b22":301,"\u1026":301,"\u102a\ua751":301,"\u1052\ua083":301,"\u1054\ua1d9\ua9fc\u1c0c":301,"\u1055\u0e0c\u1232\u1e87\ua55b\uff78\ua418\u04fb\u1d31\u1267\u0b0c":301,"\u105a":301,"\u105b":301,"\u105d\u0471":301,"\u1061\u026c":301,"\u1066":301,"\u106f\uaae2\ua369\u11db\u1532\u1494\u00f5":301,"\u1075\ufbf1":301,"\u1077":301,"\u1079":301,"\u108e\u020b\ua31e":301,"\u10d1":301,"\u10d6":301,"\u10d7\u1f78":301,"\u10dc\u31f5\u1ecb\ua023\u1456\u1641":301,"\u10e3\ufca5\u0c39":301,"\u10e4":301,"\u10e7\u2c54\u1696\u2d13\u1a32\u1ee1":301,"\u10eb":301,"\u10ef\u10ed\ufd65\u1917\ua00d\u1487\u04d3":301,"\u10f1\ua47e\u1b31\u1f95\ua058\ua088\ua3cb\ufd5d\u16d6\u0238\ua1be\ua204\ua06e\u2c61\u30ff\u12c0\u0a96":301,"\u10f3\ua1d6\u072f":301,"\u10f4\u092c\ua11b\u0800\u10da":301,"\u10f7\u2c76\ufb85\u05e7\ua61f\u15c7\u1623\uab38\u127d\u168b\ua209\u03f2\u1821\ua2e2":301,"\u10fc\u2d52\uab4b":301,"\u1102\u304c\u01f0\u0572":301,"\u1104":301,"\u1106":301,"\u110a\u1859\u2c4b\u2c68":301,"\u110c":301,"\u110f\u2ca7\ufd95\u01fb":301,"\u1117":301,"\u1124\u3111":301,"\u1126\u1689":301,"\u1127\u1883\u12c2\uab16":301,"\u112c\u15cc\u1b84":301,"\u112e\u0142\u028b":301,"\u1135\ua539":301,"\u1137\u0215":301,"\u1138":301,"\u113a":301,"\u113d\ua1b1":301,"\u113e\ua921\ufbf8\u18b2":301,"\u1140\u1349\u1f13":301,"\u1146":301,"\u1148\u0698\ua5d7\u145d\ua8a2\uff4b\u0a19":301,"\u114a\u1f30":301,"\u114c\u1f34\ufc08\u1f14\u1796\uab50\u03f5":301,"\u1154\u2cbf\u2c3f":301,"\u1156":301,"\u115c":301,"\u1167\ua2a2\ua506":301,"\u116f":301,"\u1171\u1bbe\u3034":301,"\u1172":301,"\u1173\u2096":301,"\u1174\u147d\u09ce":301,"\u1175\ua152\ua21d":301,"\u1178":301,"\u117a\u098b\u158c\u2c34":301,"\u117b":301,"\u117c":301,"\u117e\u0978\u04f1\uaaa1\u1d10":301,"\u117f\u11e5":301,"\u1182\u1a05":301,"\u1184\u0f4d":301,"\u1185":301,"\u118a":301,"\u1190":301,"\u1193\u3031\u0824\u03b8\u1157":301,"\u119a":301,"\u119c":301,"\u119d\u1278\u2d16\u2c3f\u1b8b":301,"\u119e":301,"\u11a2\ua221\u152b":301,"\u11a4\ufbf4\uaa0e\u1851":301,"\u11a5\u1053":301,"\u11a6":301,"\u11a9\u1115\ufbf0\u03b3":301,"\u11aa\u0ce0":301,"\u11ac\u0455\u168a":301,"\u11ae\u04ed":301,"\u11b0\ud7fa\ufd51\ua097":301,"\u11b2\u1723\u1e37":301,"\u11b4\ua3fc\uab96":301,"\u11b5":301,"\u11b6\u00ee\u0710":301,"\u11b9\u0d17":301,"\u11ba":301,"\u11bc":301,"\u11beg":301,"\u11c0":301,"\u11c1":301,"\u11c2\ua34e\ufc2f\u02ac":301,"\u11c4\u2d5f\u11c8":301,"\u11c5\ufed8\uffb3\u2d18":301,"\u11c7":301,"\u11ca":301,"\u11cc":301,"\u11cf\u12a4":301,"\u11d3":301,"\u11de":301,"\u11e1\u0e20":301,"\u11e4\ua0a5\u1168":301,"\u11f0\u0f6a":301,"\u11f9\u1831":301,"\u1201\u04ab\u04b9\u1d2f\u0455":301,"\u1202\u0b2d\ufe7a\u071a":301,"\u1204\uab23":301,"\u1205":301,"\u1208\u1506\u01e5\u0cb6\u1434\u195a\u0788\u1595\ufea8\uff8e":301,"\u120a\ua168\u0ca5":301,"\u1216\u06b7":301,"\u1217\u2da8":301,"\u1219":301,"\u121a\u30d6":301,"\u121b":301,"\u121e":301,"\u1221":301,"\u1223":301,"\u1226\u14cf":301,"\u1227":301,"\u1229\u044c":301,"\u122a\u3188":301,"\u1231":301,"\u1238\u318e\u0d95\ua945\u0935":301,"\u123c":301,"\u123d\u020d":301,"\u1241\u1b29\u0d1b\u0562\u01ed":301,"\u1242":301,"\u1244\ua02e\uabd6\u14bf":301,"\u1246\ua9e9\u07de":301,"\u124d\u03db\u1efd\u1fa7\u112b":301,"\u1250\ua188\u18c0\u2146\ua5c6\u16f4\u1daf\u189f\u0e81\u1d38":301,"\u1255\u1d64":301,"\u125a\u163d\uab25\u0465":301,"\u125d\u1f63":301,"\u1265\u079c\u0b24\u2c30\u0273\u1ee9\u0928\u11e8\ua85d\ufd3c\u1862\u129a\u1c06":301,"\u1266":301,"\u1269":301,"\u126b\ua351\ufb06\u314e\ua4f6\u1ed9\uabc3\u1db3":301,"\u1275\uab0b\u0113\u0d3a":301,"\u1276\ua5d5\u0431\u08a7\u1ec3":301,"\u1277\u1545":301,"\u127a\ua294\ufc39":301,"\u127f":301,"\u1282\u164e\u0133":301,"\u1283":301,"\u1288\ufcf8\u116d":301,"\u128c\u1823\u062e\u045f":301,"\u128d\u1d3c":301,"\u1290\ufd7a\u11c9\ua30d\ua96a\ua4d9\u1d82":301,"\u1291":301,"\u1293\ua552\u1165\ua192":301,"\u1294\u1008\ua279\u03d7":301,"\u1298":301,"\u1299\u01ba":301,"\u129e":301,"\u12a5":301,"\u12a7\ua5ce\u1fbe\u1131\u2cdf\u01d2\u12e4\ua812\u1076\u2c8d":301,"\u12a9\u14d6\u0b18\ua86b\u1d69\u1197":301,"\u12aa\ua936":301,"\u12ab\u2da0\u1a0d\ua588\ufc0b\ufdab\ufdac":301,"\u12ac\ua5e9":301,"\u12ad\u30dc\u2136\u043f":301,"\u12ae":301,"\u12b0":301,"\u12b2":301,"\u12bb\u0ca0\u31ad\ufc64":301,"\u12bc":301,"\u12bd\ua910\u0c2a":301,"\u12c5":301,"\u12c8":301,"\u12c9\ua31d":301,"\u12cb\u2c42\u0e96":301,"\u12cc\u023c\u04eb\ua2fa\u1b06\u029a\ua008\u0a10":301,"\u12ce\u079d\u19b2":301,"\u12d0\u1248":301,"\u12d1\u1a47":301,"\u12d3":301,"\u12d4\u04cc\u0752\ua173\u166f\uff94\u043d\u2c38":301,"\u12d6\u06c2\u084f\u069d\u310d":301,"\u12d8\u1f15\u02c9":301,"\u12da\ua99c\u1687":301,"\u12db":301,"\u12dc":301,"\u12dd\u0287\u1a26":301,"\u12df":301,"\u12e1\uffae\u15de\u07ce":301,"\u12e5\u0b0b\u010d\u1765\u308a\ua006\u30d8\u121f\u132d\ua735":301,"\u12e6\u1ed9\u2d0f\u1324":301,"\u12e9\u1e6f\ua6da":301,"\u12eb\ufce1":301,"\u12ed\u14cb\u1be3":301,"\u12f2\u15b7\u0e99\ua041\u134f\ufd50\u15c9\u12c4\u1e5d\u18e1\ufbd5\u01e1":301,"\u12f4\u0146\u0b9f\ua8a3\ufee0\u0757":301,"\u12f5":301,"\u12f7":301,"\u12f9\u1f42":301,"\u12fa\u1d4e\ua3b9":301,"\u12fc":301,"\u12fd":301,"\u1300\uaa80\u1118":301,"\u1306":301,"\u1307\u119b":301,"\u1308":301,"\u130c\ua88b\ua585\ua36b\ufb33\u2d57\ufb34\u0ea1":301,"\u130d\u1550\ua75d\u17ac":301,"\u1312\u1db2\u1650":301,"\u1319\u044d":301,"\u1320\u3120\u04c8\ufe79\ufce6\u2d38":301,"\u1321":301,"\u1322":301,"\u1323\u2d5b":301,"\u1329\u0175\u30e5\u3134\ufb53":301,"\u132b":301,"\u132c":301,"\u1333":301,"\u1336\u1453":301,"\u133a\ua3ce\u1f65":301,"\u133d\u1849\ua469\u09b7\u2c93":301,"\u1340\u043d\u0481\u0e1f\u03ca":301,"\u1341":301,"\u1342\u056c\u1e8f\u1bd7":301,"\u1347":301,"\u134e":301,"\u1350\ua99e\u1109\u04db\uff41\u04ff\uaaa4\u1d59\u12ee\uaa8d\ua20a\u1a08":301,"\u1351\u1417":301,"\u1356":301,"\u1358\ufd26":301,"\u1380":301,"\u1385":301,"\u1388\u1513\u0b61\u19ab\u1b48\ufb92":301,"\u138c\u0750":301,"\u138f\ufd3d":301,"\u13f8\ufe95\u0171\u04c4\u062a\u2dce\ua7f8\u07d2":301,"\u13fb\u14e6\u2c3b\ufeb1\ufc03\uab64\u100e\u1cf6\u30f9":301,"\u13fc":301,"\u1405\u0b27\ua02f\ud7b4":301,"\u1406\u1b8c\u04cc\u2cc3\ua132":301,"\u1407":301,"\u1408\u00f9\u03f2":301,"\u140b\ua725\u07e5":301,"\u140c":301,"\u1414\u00df\uaa47":301,"\u1418":301,"\u1419\u30e0":301,"\u141a\ua74d":301,"\u141c\u01bf":301,"\u1421\u0456":301,"\u1424\ua0e2":301,"\u1427\ufc76\u1b9a\u07f4\uff4a":301,"\u1428\u17a8\u1f7d":301,"\u142a\u0d2a\u1ebdf":301,"\u142c\ufc2e\u0a86":301,"\u142e\u1463\uab59\u0263\u0da6":301,"\u1430":301,"\u1431":301,"\u1432":301,"\u1436\uff82\u095f\ua943\u156b\ua0b8\u0808\u0a38\ua534":301,"\u1439":301,"\u143a\u11cd\ua027\u0261\u195d\u157d":301,"\u143c\ua844":301,"\u143e\u1798\u131c\u1c7c\u1252":301,"\u143f":301,"\u1440\ufb5d":301,"\u1441\u140e":301,"\u144a\u19a4":301,"\u144b\u0e04\ua0e7":301,"\u144f\u15e7\u1d8d\u0dbd\u183c":301,"\u1450\ua0f7":301,"\u1452\u19c0\ua775\ufc73\u1578\u1012\u1589\u04ab\u1661\uaa85\ua447":301,"\u1454\ua683":301,"\u1457\u1114":301,"\u1460\u31a3\u1d4b":301,"\u146b\uab01":301,"\u1473":301,"\u1477\ua2aa\u04cf":301,"\u1481":301,"\u1483\u0da7":301,"\u1484\u0153\ud7d8":301,"\u148a\u1f03":301,"\u148c\ua440\ufce4":301,"\u1491\u0251\u30b3":301,"\u1492":301,"\u1493":301,"\u1495":301,"\u1497\u1c0e":301,"\u1499\u1533":301,"\u14a0\u0115":301,"\u14a1\u2c45\u04a3\ua014\ua55e":301,"\u14a5":301,"\u14a7":301,"\u14a8\u15a5":301,"\u14aa":301,"\u14b3\uaa9c\ud7df\uffcd\u0921\u11d8\ua6b8\u16dc\u01ce":301,"\u14b6\u1d92\u2cb3\ua6d7":301,"\u14b8":301,"\u14ba\u1a33":301,"\u14bb\u3174":301,"\u14bd\u0c0e\u1263":301,"\u14c0\u30b7\ufecf\u1891":301,"\u14c3\u18bf\u18cc\u15c6\u14c4\ua07e\ua150\ua25a\ua2ac\u0c60":301,"\u14c5":301,"\u14cd\ufecb\u1e7f":301,"\u14ce":301,"\u14d1":301,"\u14d3\u056f":301,"\u14dc\u16be\ua91d\uaba4\u2c33\u076b":301,"\u14de\u069e\ua0df\ua924\ufbe8\u10e8\ua7a5\uab3a\ua91c\u0e9a\ua04a":301,"\u14e4":301,"\u14e5":301,"\u14e7\u111b":301,"\u14ea":301,"\u14eb":301,"\u14ee":301,"\u14f2\u1bcc":301,"\u14f3\ua657":301,"\u14f4\u1839\u1c0b":301,"\u14f9\ua3c3":301,"\u14fa\ua8a1\ua84a\ua44d":301,"\u14fd\ua21f\uab2c\ua237\u03c9\u11fb\u1fb0\ua643":301,"\u14fe\ua55a\u1900":301,"\u14ff\ua7a9":301,"\u1503\ua178":301,"\u1507\u100d\u0265\ufc4e":301,"\u150b":301,"\u150e":301,"\u1511\ufd6c\ua064\ufe76":301,"\u1514":301,"\u1516\ufe89\ufc71\u16ae":301,"\u1518":301,"\u151c":301,"\u151e\u126c":301,"\u1520\u028a\u1567\ufd67":301,"\u1524\ua41d":301,"\u1526\u00ff\u30a9\ua59c\ua88e":301,"\u1528":301,"\u1529\u1fe5\ufef3":301,"\u152d":301,"\u152e\ua253":301,"\u1530\u1315\uab48":301,"\u1531\ua735\u1d8b\u00ec":301,"\u1535":301,"\u153b":301,"\u153c\u1f31":301,"\u153d":301,"\u153e\ua72f\ua59f\u1b25\u057c":301,"\u1540\u14c7":301,"\u1542":301,"\u1543":301,"\u1544\ua101\ua669":301,"\u1547\ua4f4\ua411\u0493":301,"\u1549":301,"\u154d":301,"\u1551\ua3ae":301,"\u1552n":301,"\u1556l\ua73f\ua120\ua5f9":301,"\u1559":301,"\u155f\u2c77\u0849\u101b\ua689\u11fa":301,"\u1566":301,"\u1568\u10d8":301,"\u156c\ua4dc\u0e1a":301,"\u1574\ua51f":301,"\u1576":301,"\u1577\u1203":301,"\u1579\u0ec2\u1eb5":301,"\u157e\u190a\uaae8\u154a\u0263\u0846\u116e":301,"\u1583":301,"\u1586\ua198\ua91b":301,"\u1588\ufd2d\uaa76\u2db1\ua450\u168f\u0481":301,"\u158d":301,"\u1593\u15a6":301,"\u1594":301,"\u1596\u037d\u1892\u10d5":301,"\u1597\u1e47\u1768":301,"\u159b\ufe7e":301,"\u159c\ua189\u133b":301,"\u159e":301,"\u159f\u2d0d\u185c":301,"\u15a0\u2c76\ua23e\uaa92\ua009":301,"\u15a3\ufcc6\ua3bc":301,"\u15a8\u189a":301,"\u15a9\ua1f4\u2d22":301,"\u15aa":301,"\u15ab\u0d07":301,"\u15ae":301,"\u15b0\ua1f2m\ufb4e":301,"\u15b1\u0ba9\u1599\ua80c\uff8a\u18d3\u141f\u0ab8\u3051\u079a\ufc1e":301,"\u15b9\u026d\ua370\uaa7a":301,"\u15ba":301,"\u15bb\u0185\u126f\u0a94":301,"\u15bc\u0c05":301,"\u15be":301,"\u15c3\u0ca8\uab9e\u167f":301,"\u15c4\u1967":301,"\u15cd\u01c3":301,"\u15d0\u0140":301,"\u15d1":301,"\u15d4\u1832\ua2ae\ua396\u0a5a\u16b6\u1235\uaa8b":301,"\u15d5\u0636\ufcbd\u1f83":301,"\u15d7":301,"\u15d8":301,"\u15d9\ufb2d\ufb65\ufb94":301,"\u15db":301,"\u15e2\u120b\ua03a\u0239\u14ca\u3156\ua25f":301,"\u15e3\u1404\u1f20":301,"\u15e9":301,"\u15ec\ua107\u182e":301,"\u15ed\u18b3\ufbf2":301,"\u15f0\u0c59\u0211":301,"\u15f3":301,"\u15f5\ufd97\ua607\u049f\u099d\u170b\u1e33\u1153\ufb86\u179e":301,"\u15fc\u1b17\u0f64":301,"\u15fe":301,"\u1608":301,"\u1609\u1d3d\uaa22\u13fa\ua32a\u2c31\u1b2a":301,"\u160a\u311f":301,"\u1611":301,"\u1613":301,"\u1614\u0a39\u1a23\ufd0b":301,"\u161c":301,"\u161d":301,"\u161e":301,"\u161f\u118e\u1e95\u2d0e\u0269":301,"\u1620\u1500":301,"\u1621":301,"\u1624":301,"\u1625\ua54c\ua918\u1a41\ua260":301,"\u162e\ua6ce":301,"\u1630\u158f\ua1b6\ua0ec":301,"\u1636":301,"\u1638\uffd3\u1f27":301,"\u163c\ua28f":301,"\u163e\ua231":301,"\u1640\u09dd\uff84":301,"\u1642\ua7a1":301,"\u1643\u2cd3":301,"\u1644\u1fd0":301,"\u1645\u0d7c":301,"\u164b\ua142":301,"\u164c":301,"\u1651":301,"\u1658\ufcc2\u0685\u03d3":301,"\u1659\u0995\u1433":301,"\u165b\u11ed\ua58f":301,"\u165f\u03ca":301,"\u1660":301,"\u1663":301,"\u1669\u30ac\u12ef":301,"\u166a\u199b\u1eb7\u117d":301,"\u166c\u1728\u211a\u03b4":301,"\u1670\u091b\u0447":301,"\u1672\u0247\ua119\u19bf":301,"\u1676\ufda2\ua6a0":301,"\u167a\u31b5\u06d5":301,"\u167b\ufc98\u1720\u0c10\u1a0e\u06d3":301,"\u167e\u2d07\u0b15\u1bc3\u2cf3\ua319\u0f61\ua14b":301,"\u1682\u1c61":301,"\u1683\u2097\u049b":301,"\u168e\ua3f3\ua922":301,"\u1691":301,"\u16a2\u0cb9":301,"\u16a8\u048d\u156f\u2cd7\u3136\ufc80":301,"\u16ab\u1e2f\ua9ac\uab14\u0571":301,"\u16ad":301,"\u16af":301,"\u16b0\ua691":301,"\u16b3":301,"\u16b4\u2ccd":301,"\u16b8\ua0ca\ua3c2\ua422":301,"\u16bc\u1d17\ua8f7":301,"\u16c3":301,"\u16c5\ua0bf":301,"\u16c6\u0107\ua815\u11f4\ua0dd":301,"\u16ce\u1985\uaa6b\ufba3\ua36d":301,"\u16d5":301,"\u16d7\ufccc\ua54f\u0626\u07df":301,"\u16d9\u1ce9\u168d":301,"\u16e3\ua3c1\uffbe\ua882\ua5ae\uff45":301,"\u16e5":301,"\u16e6":301,"\u16e8\u1d14":301,"\u16f3\ufcf4\uab70\ua3db\u1029\u1303":301,"\u16f7\u1d1d":301,"\u16f8":301,"\u1704\u31fa\ua08f":301,"\u170e":301,"\u1710\ua42e\u01fb":301,"\u1721":301,"\u1722\u160c\u18be":301,"\u172a\ufd9a\u166b\u1726\u11ad\u2c95\ua467\u0444":301,"\u172c\uaac0":301,"\u1731\u183e":301,"\u1740\ua13d":301,"\u1741\u157f":301,"\u1742\ua9fb\u11e6\uaba7\u1536\ufd1a\u12e7\ufc4a\ua039\uabb3":301,"\u1744":301,"\u1745\u1a10":301,"\u174b":301,"\u1760\u044b\u0854\ua428":301,"\u1761":301,"\u1763\u1ea3\u03be":301,"\u1766":301,"\u176e":301,"\u1770\ufd98":301,"\u1782\u11fd\u0561\u15ca\u2c3c\ua171\u1e8f\ufbe0":301,"\u1783\ufbfe\u1c1e":301,"\u1784\u00e7\u049f":301,"\u1789":301,"\u178a":301,"\u1791\u0990":301,"\u1797":301,"\u1799":301,"\u179a\uff6f":301,"\u179b\u050b\u091e\ua645":301,"\u17a5":301,"\u17aa\u1648\ua4d6\ua546\u1f04":301,"\u17ad\u11e9":301,"\u17b2\u0c36\u14ab\ua474":301,"\u17b3\u121d":301,"\u1822":301,"\u1826\uff52\u2c37":301,"\u1828\ufed4\u0793\u0f8a\u2caf":301,"\u182f\u1a35\u09b2":301,"\u1835\u209c\u10e5\u04f5":301,"\u1836\u1d98":301,"\u1838\u11b1\u114d\u1911":301,"\u183b":301,"\u183d":301,"\u183f\u313e\ua392u\ufe9f":301,"\u1841\ufed6\u074e":301,"\u1842":301,"\u1844\ua807":301,"\u1847":301,"\u184a\ufea1":301,"\u184f\u1f7c\uffa8\u30bb":301,"\u1850":301,"\u1857\uaa08\u0c16":301,"\u1858\u0434\u1354":301,"\u185e":301,"\u1861\u0bb6\u11ab":301,"\u1866\u1fe4\ufc8e\ua220":301,"\u1869\uabba\u1253":301,"\u186a\u129c":301,"\u186d":301,"\u1870":301,"\u1874\u190e\u1fc4":301,"\u1876\u1d23":301,"\u1877":301,"\u1880\u2db6":301,"\u1881\ua04c":301,"\u1882\u05d6":301,"\u1889":301,"\u188c\u141d\u00ef\ua388\ufdc2":301,"\u188d":301,"\u188e":301,"\u188f\ua5c9\u2dbc":301,"\u1890\u15bf\u1f64\ua78c":301,"\u1893\uff57":301,"\u1894":301,"\u1898\u0853\u1f66":301,"\u1899\ua1ea\u00eb":301,"\u189b\ua96e\u0d86\ufc90":301,"\u189c":301,"\u189d":301,"\u18a0":301,"\u18a2\u176c\u01f9":301,"\u18a5\ua10d":301,"\u18a7":301,"\u18aa\u0d60":301,"\u18b0":301,"\u18b6\u2d22":301,"\u18bb\ua1e5\ua50f":301,"\u18bc":301,"\u18bd\u047f\u12ea\u1c6c\u018d":301,"\u18c2":301,"\u18c7":301,"\u18ca":301,"\u18ce\ua90c":301,"\u18d1\ua3e8\ua53a\u0a89\u18d4":301,"\u18d2\u1d9f\u2147\uaa60\u18c5":301,"\u18d6\ua321\u3175\u1994\u047b":301,"\u18d7\ua07b\u16df\u195e\u1c23":301,"\u18d8\u3065\u1565":301,"\u18da":301,"\u18dc\u123a":301,"\u18dd":301,"\u18de\u1051":301,"\u18e5\u1884":301,"\u18e7\ua2b9":301,"\u18ea\u2d34\ua7a7\u16ea\ua64b":301,"\u18ee\ufb8d\ufc97\u19a5\u147e":301,"\u18f0\uabc5":301,"\u18f2\u1f32\u3154\ua963\ua1f6\u0571":301,"\u18f4":301,"\u1903":301,"\u1906":301,"\u1908":301,"\u1909":301,"\u190b\u1d0f":301,"\u190c":301,"\u1910\u078d":301,"\u1912":301,"\u191a":301,"\u191c":301,"\u191d":301,"\u1952\ufc49":301,"\u1957":301,"\u1959\u30e9":301,"\u195c\u067d\u0917\u199f\u2d0e":301,"\u1962\ud7d2":301,"\u1963\u029e":301,"\u1964\u1020":301,"\u1968\u1769":301,"\u196a\ua57f":301,"\u1971":301,"\u1972\u0b21\ua761\ufd94\ufcf1":301,"\u1973\ua9ed":301,"\u1981":301,"\u1986\ua53c\ufcdd":301,"\u1987\ua9a3\u0994":301,"\u1988\u1707\u16ca\u1237\ua365\u15c5\ua810":301,"\u198a\u1d79\u0a88\u03fb\ua5c5\u03c4\u0d2c\ua6cb\u0519\u0299":301,"\u198f\u14d2\u0aa3\u3113\ufebb":301,"\u1991\u14f8\u10ec\u067e\ua741\u1d47\u1466\u080f\u06e5":301,"\u1996":301,"\u1997\u183a":301,"\u199c\uff9a\uffc7":301,"\u199e\ua5fc\ufc59":301,"\u19a2\ua17b\u0908\u12a3":301,"\u19a3\ua26d\u0567":301,"\u19a7\ua5d4\u03eb\u1015\u19c5\u1e6b":301,"\u19a8\u1191":301,"\u19b0":301,"\u19b7\u0726\ufbaf\u305e\ua69d":301,"\u19b8\u0b3d":301,"\u19b9\u1c70":301,"\u19ba":301,"\u19be":301,"\u19c2":301,"\u19c4":301,"\u19c7":301,"\u19c8":301,"\u19c9\u068d":301,"\u1a04\u1f96":301,"\u1a09\ua74f\uff4a\u0c2c":301,"\u1a0a\u132a\ufb93":301,"\u1a13":301,"\u1a14":301,"\u1a21\uaa24\u15bd\u18ba":301,"\u1a22\u0aad":301,"\u1a24\u308e\u14b9\ua822\u0a90\ua51c\ufc51\u1d70\uff9f":301,"\u1a25":301,"\u1a28\ua155\ua5dd":301,"\u1a29":301,"\u1a2a":301,"\u1a2b\ua380":301,"\u1a2c":301,"\u1a2e\u0183\ua655\u1fb7":301,"\u1a34\u1e1f":301,"\u1a39\u063e\u1a3f":301,"\u1a3b":301,"\u1a3e\ua6c8\u0692\u0b99":301,"\u1a40\ua060":301,"\u1a42":301,"\u1a45":301,"\u1a49\ufc2d\u1413":301,"\u1a4a":301,"\u1a4c\u1954\ua9a8\ufd1c":301,"\u1a50\ua3e1\u12ca":301,"\u1a52":301,"\u1a53":301,"\u1a54\u0db9":301,"\u1b0d\u1cef":301,"\u1b0e":301,"\u1b0f":301,"\u1b10\u3055\ua7fa":301,"\u1b11":301,"\u1b12\u02a4\ua1e1\u1c6f\u09a4\u1b1e\u0dc1\u124c\ua667\u1e05":301,"\u1b14":301,"\u1b15":301,"\u1b18\u1e4d\u0c23":301,"\u1b1a\ufcaf\u3141":301,"\u1b1c\ua778\uabe2\ua583\u0280\u1eed\u1ff3":301,"\u1b1d":301,"\u1b1f":301,"\u1b22\ufbdb":301,"\u1b24\u172f\u2cd3\u3087":301,"\u1b27\ufc8f\u1459\u0729\u2c66\u0720":301,"\u1b2c":301,"\u1b2e":301,"\u1b2f\u2d96":301,"\u1b30\u1b13\u11d6":301,"\u1b32\u1dbb\u3090\u1652\u1338\ua38b\u03cd\u1b23\u19bd\u1113\u1558\u1f43\ua785":301,"\u1b4a":301,"\u1b4b\u0b2a":301,"\u1b85":301,"\u1b8a\ufb49\u0d8a":301,"\u1b8e\u1896\ua30c":301,"\u1b8f":301,"\u1b90":301,"\u1b93":301,"\u1b98\uff75":301,"\u1b9b\u31b4\ua86d":301,"\u1b9d":301,"\u1b9f\u0802\ua03b\ufc28":301,"\u1ba0":301,"\u1bae\u1451\ufbed\u12b4":301,"\u1baf\u025c\u04ad\u118d\u15e4\u1271":301,"\u1bba":301,"\u1bc1\ua939\ua93b":301,"\u1bc4\u178d\ufc95\ua4de":301,"\u1bc7":301,"\u1bc9":301,"\u1bcb\u17a0\ua1c6\u1472":301,"\u1bcd\u1335\u1727":301,"\u1bcf\ua5bd\u11c6":301,"\u1bd0\u1e51":301,"\u1bd2":301,"\u1bd3\u1425\u090c\u107e\u3083":301,"\u1bd4\u3089":301,"\u1bd5\uff6d":301,"\u1bd6":301,"\u1bde\u1d7b":301,"\u1be1":301,"\u1be4\u2d24":301,"\u1be5":301,"\u1c02\ua04e":301,"\u1c07\u2124\u1080":301,"\u1c19\u1e4b\u3157\u069b":301,"\u1c1d\u1fa5":301,"\u1c20\ua77f":301,"\u1c22":301,"\u1c4e\ua048\u04ff":301,"\u1c5a\u15e8\ufedc":301,"\u1c5b\u1d0e\ua340\ua9b0":301,"\u1c5d":301,"\u1c5e":301,"\u1c63":301,"\u1c66\u077e":301,"\u1c67\u03b6":301,"\u1c69":301,"\u1c6a\u2091\u2cbd\uff7e\u18f5\u1387\u213e\u043b":301,"\u1c6b\u176b\u14b2":301,"\u1c6d":301,"\u1c71\u03dd":301,"\u1c74":301,"\u1c75\ua43e\ua7a3\u3077":301,"\u1c79\u1136\u1a16":301,"\u1c7a\ufd9d\u2c46\u0b8e":301,"\u1ceb\u2c8b\ufcb4\u124a\ufdc4\ua64d\ua1c2\u156d\u1d60\ufdb8\ufeb3\u30d4\u0b23\u10ea":301,"\u1cec":301,"\u1cf1":301,"\u1cf5\ufc9e\u1c10\ua476\u2d41\u1c08\u01a5":301,"\u1d00":301,"\u1d07":301,"\u1d08":301,"\u1d09":301,"\u1d0b":301,"\u1d12":301,"\u1d13\u30a5\u0101\uffd4\u19c6\u2c40\ufb05\ua73b\u3058":301,"\u1d1a":301,"\u1d1e":301,"\u1d1f":301,"\u1d22\u1d28\ua053\ua2f0\u11b8":301,"\u1d24\ua770":301,"\u1d25\ud7be\u02ce":301,"\u1d26\u1bc5\u146f":301,"\u1d27\u1750\u1162":301,"\u1d2b":301,"\u1d2c\uabc7\u00f6\u178e\ua50e\u1d58\u11a1\ua406\ua517\u0142":301,"\u1d2d":301,"\u1d35\u0c9d":301,"\u1d40\ufced\u084a":301,"\u1d42\u017a\u185b":301,"\u1d43\ufca0\u16f5\ua66d":301,"\u1d44\u1c09\ua106\u01a8":301,"\u1d46":301,"\u1d49\uabb5\ufd27\u012b":301,"\u1d4d\u0d4e":301,"\u1d4f":301,"\u1d50\ufbff":301,"\u1d51\ua663\ufc13\u1f12":301,"\u1d52":301,"\u1d53\u068c\u1980":301,"\u1d56":301,"\u1d57":301,"\u1d5c":301,"\u1d5d":301,"\u1d5f\ua3e5\ua739\u3131":301,"\u1d61\u1295\u0d1d":301,"\u1d65":301,"\u1d66\ua7fd":301,"\u1d67\ud7e1\u0a23\u1f33\ua254":301,"\u1d6a":301,"\u1d6c":301,"\u1d71\ufe71\u1daa\u162f\u046d\ua297":301,"\u1d72\ua09a\u1fe1\u071c\u19a0":301,"\u1d75":301,"\u1d7d\u1163\ua1e7\u06bf\u10d4":301,"\u1d7e\u1d19\ufd14":301,"\u1d7f":301,"\u1d81\u0371":301,"\u1d83":301,"\u1d84\ua404":301,"\u1d86\u0776":301,"\u1d87\ufc65":301,"\u1d89\uff54\u0289\u03c1\u3079\ua313\u1485\u0144\u1296":301,"\u1d8a":301,"\u1d8c\u1bc8":301,"\u1d8e\ufd19":301,"\u1d8f":301,"\u1d90\u155c\u0268\u01dd":301,"\u1d94":301,"\u1d97":301,"\u1d9b\u0e1e":301,"\u1d9d\u1786\u1122":301,"\u1d9e\u14d7":301,"\u1da0\u11df\u16e4\u1fe3":301,"\u1da1\ua9ea\u2d04":301,"\u1da2\u03ef":301,"\u1da3\u03e5\u0e2b\ua09f\u131e\u1da4\u182a\u1f71":301,"\u1da9":301,"\u1dac\u3161\u00ba":301,"\u1db0\ua657\ud7db\u115e\u076c\u140f":301,"\u1db4":301,"\u1db5\u1ed1\u128b":301,"\u1dba\ua5b6":301,"\u1dbe\u1d34":301,"\u1dbf\ua962":301,"\u1e0d":301,"\u1e0f":301,"\u1e0f\u1c01":301,"\u1e11":301,"\u1e13\u1867\ua797\u1c16\u1d15\ua64d\u1411":301,"\u1e13\ua71c\u2c39":301,"\u1e17\u1eb1":301,"\u1e19":301,"\u1e1d":301,"\u1e21\u3091":301,"\u1e23":301,"\u1e25\ufed9\ua5e5\u2db9\u063a":301,"\u1e27":301,"\u1e29\u0235\u1359":301,"\u1e2d\ufc3e\ua229":301,"\u1e2f\u16d2\u00fd":301,"\u1e31":301,"\u1e33":301,"\u1e39":301,"\u1e39\uab4a\u0d7f":301,"\u1e3b\u0c21\u14df\u100a\ua3d8":301,"\u1e3d\ua1d8":301,"\u1e3f\ua065\u2dbd\u0b20":301,"\u1e41\u0446\ua507\u0936\ua503\ua8b2\ua0f0\u160d":301,"\u1e43":301,"\u1e43\ufb21":301,"\u1e47":301,"\u1e4b":301,"\u1e4f":301,"\u1e53":301,"\u1e55\ufd0a\u1c17":301,"\u1e57":301,"\u1e59\ua3fd\u2d37\u0d2e\u05db\u2c41\u0699\ufc99":301,"\u1e5b":301,"\u1e5f":301,"\u1e5f\u0980":301,"\u1e61":301,"\u1e65":301,"\u1e69":301,"\u1e6b":301,"\u1e6f\u06b3":301,"\u1e71\u0e22\u0566\u15af":301,"\u1e71\u16a9\u0712":301,"\u1e75":301,"\u1e77\u1f85":301,"\u1e79":301,"\u1e7b":301,"\u1e7f\ua43c":301,"\u1e83":301,"\u1e8b\ua508\ua8f6\ua06b":301,"\u1e8d":301,"\u1e93\ufb16\uffd5":301,"\u1e95":301,"\u1e96\ua099":301,"\u1e99":301,"\u1e9b\u1f00":301,"\u1ea1\u1582\ufd6e\u025d\u14e8\u11e7\u18cd\u18d5\u02e4\u0d19\u2cdb\ua862":301,"\u1ea7\ua33e":301,"\u1ea7\uab82\u1510":301,"\u1eab":301,"\u1eaf\u1449":301,"\u1eaf\ua5f2\u06bd\u12f0\uff6b\ufc6d\u15c2":301,"\u1eb3":301,"\u1eb5":301,"\u1eb9\u1004\u01e7":301,"\u1eb9\ufe96\u0cb2\ua9fa\ua5eb\ua43d\ua0c9\u1950":301,"\u1ebf\u01ce\u1627":301,"\u1ec1":301,"\u1ec5":301,"\u1ecb":301,"\u1ecf\ua3f4\u0144\u3168\u2c3e\u106e\u15a7":301,"\u1ed3\u1018\u0d0e":301,"\u1ed5\u2119\u169a\ua02b":301,"\u1ed5\ua116":301,"\u1ed7\uabd2\uabbb\u05e0":301,"\u1edf\ua24f":301,"\u1ee5\u0207\ua227\u02b1":301,"\u1ee7":301,"\u1eeb\ua818\ua44c\ua113\ua54d\u0d1e\u0e09\u1b46\ua212\uaa25\ufb74\u0c0a\u02cc":301,"\u1eed\u18a1\uaa71\u0ab2\ufee3":301,"\u1ef1":301,"\u1ef3":301,"\u1ef7\uffb1\u1d73\ua1a0\ua117\ua54b\uffa1":301,"\u1ef9\u30ea\ua377\ua51d\u028f\u0578":301,"\u1efb\uaa7f\u3071\u140d\u0e18":301,"\u1efd\u207f\u144e":301,"\u1eff\u1f52\ua047\u1e1d":301,"\u1f01":301,"\u1f05\u1785":301,"\u1f07":301,"\u1f11\ua530":301,"\u1f11\uab80\u0db8":301,"\u1f13\u0a1e\ua2c6":301,"\u1f15":301,"\u1f20":301,"\u1f21\u2d4a\ua817":301,"\u1f21\ua185\u1ff7\ua2c9\u046f":301,"\u1f22\ua334\ua358":301,"\u1f23\u18e4\u01c2\u315c\ua12c":301,"\u1f24\uff9e\uabaf":301,"\u1f25":301,"\u1f26\ua6b4\u020f\u1592":301,"\u1f27\ua9b1\ufeb9":301,"\u1f30":301,"\u1f33\ua353\u142d\ufdf0":301,"\u1f34":301,"\u1f37":301,"\u1f37\u0373":301,"\u1f42\ufd2b\u024f\u1a02":301,"\u1f45":301,"\u1f45\ufbe4\u3149\u0db3":301,"\u1f50\ua1f7":301,"\u1f51":301,"\u1f53\u15a1\u1f77":301,"\u1f54":301,"\u1f55":301,"\u1f60\ua964\u2c53\u1c11\ua71a\u028e\ua86e\u06ae":301,"\u1f61":301,"\u1f61\ua330\ua38e\ua781":301,"\u1f62\ua1bd\u1b49\u1256":301,"\u1f65\u1d3b\u1f80\uabcc\ufe8a\u0574\u03ce\u0c37\u103f":301,"\u1f66":301,"\u1f73":301,"\u1f73\ua084\u110e\ua0db\ufe99\u067b\u0223\u04d9\u1872\ufb71\u00e1\u00b5\u120f\u1d05":301,"\u1f74\u115b\u04d9\u1d0c\ua10a\ua4db\u1ebd\uff6e\u131f":301,"\u1f76\u1833":301,"\u1f7c":301,"\u1f7d\u178b":301,"\u1f81\u2dd1\u0dac":301,"\u1f82\u1e7d\u1646\u1871\u1fa4":301,"\u1f84\u2cb3\ufc06":301,"\u1f84\ufb60\ua865":301,"\u1f85":301,"\u1f86":301,"\u1f87":301,"\u1f87\u0167\ua00f\u2dcd\ua449\ua860\uaaaf":301,"\u1f91":301,"\u1f92\u2d03":301,"\u1f93\ua1ec":301,"\u1f94\u3061\uff71\u133c":301,"\u1fa0\u1e15\u1bd9\uaa8f\u14f6\ufbfb":301,"\u1fa0\ua079\ua076\u186e\ufbae\u2c4b\u03f8\ua541\u1438\u0c2b\u131b\u1e7d":301,"\u1fa1\u116c\u116b\u3145":301,"\u1fa2\u30ae\u2c35":301,"\u1fa5":301,"\u1fa6":301,"\u1fb2\ua769":301,"\u1fb3\u0281\u18c4\u2ca5\u00e7":301,"\u1fc3":301,"\u1fd0\u18b8\ufe73":301,"\u1fd2\u03e1\u03cb":301,"\u1fd3":301,"\u1fd6":301,"\u1fe0\u142f\ua301":301,"\u1fe2\ufbdd\uab05\u16a1\u1d02":301,"\u1fe7":301,"\u2092":301,"\u2093\u2d31":301,"\u2098":301,"\u2102\u1014":301,"\u210a\u2cab\ufc8b\u123e\ua2af":301,"\u210e\ua90f\u00fb\u190f":301,"\u210f":301,"\u2111":[189,301],"\u2113":301,"\u211b":301,"\u211c":[189,301],"\u211d":301,"\u2128\u1e55":301,"\u212f":301,"\u2130":301,"\u2134\u03af\ua115":301,"\u2135":189,"\u2138\u028b\ufc10\ufd1d":301,"\u213c":301,"\u213d\u14d0\u30fe":301,"\u213f":301,"\u2145\u1612\u2c73\u2cc7\u2dbb\u122f\u05dc\ua522\ua615\u1e45":301,"\u214e\ua103\ua3fe":301,"\u2c30\ua145":301,"\u2c31":301,"\u2c32\u14ae\ua90e\u01d0\ua452":301,"\u2c33\u1e2b":301,"\u2c36\u1272\u11c3\u16e0":301,"\u2c38\ua249":301,"\u2c3a":301,"\u2c3d\u013c":301,"\u2c3d\u0805\ua3c5\u1305\ua856":301,"\u2c3e":301,"\u2c41":301,"\u2c42":301,"\u2c43\ua4fa\ufb2b":301,"\u2c45\u0b1a":301,"\u2c47\u1591\ua641\u0219":301,"\u2c48\ufc9d":301,"\u2c49":301,"\u2c4a\ua58e\uabd4\u1447\u1633":301,"\u2c4c\ufe80\ua857":301,"\u2c4d":301,"\u2c4d\u1fe6\ua5a5\u10d3\ua854\u18db":301,"\u2c4f\u1280\u0647":301,"\u2c50":301,"\u2c51\u02a8\ua536\ud7b8\u02c8\u016b\u0794\ua35a\ufdf6\ua17d\u01c9\u31a9\ufda0":301,"\u2c52":301,"\u2c54":301,"\u2c55":301,"\u2c57\u1ef9":301,"\u2c58":301,"\u2c59\ua1ce\ua1cc\ua3b4":301,"\u2c59\ufbaa\u31f6\u17ab":301,"\u2c5d":301,"\u2c5d\u03bd":301,"\u2c65":301,"\u2c68":301,"\u2c6a":301,"\u2c6a\u0509\ua34d\u0722\u0463\u0778\u2d19\ufda5":301,"\u2c6c\uab33\u0973":301,"\u2c74\u307e":301,"\u2c7d\ua6c6\ufb67":301,"\u2c81\u1023":301,"\u2c83\u1537\u312b\u3139":301,"\u2c85":301,"\u2c85\ua226":301,"\u2c87\ua0fd\u0abd\u18f3\u3187":301,"\u2c91\u0eaa\u2c52":301,"\u2c91\u31b0\ua42f":301,"\u2c93\u165c":301,"\u2c97\u1bdc":301,"\u2c9f":301,"\u2ca1":301,"\u2ca3":301,"\u2ca3\u31ac":301,"\u2ca5\u1b83\u1e65\ua725\ua883\u2d12":301,"\u2ca7\u1284\u2c9d":301,"\u2ca9\u1d80":301,"\u2cab\ua35d\u15e1\ua5a9\uaa87\u0686\u2d1f":301,"\u2cad\ua85a":301,"\u2cb1\u0aa4\u0117":301,"\u2cb1\ufd39\u1ea3\u03c3\u01e5":301,"\u2cb9\u045c\u14fc\ua56c\uab72\u0e11\u2cc5":301,"\u2cbd\u067f":301,"\u2cbf\u1ec5\u1e5b\u305a":301,"\u2cc1\u0491":301,"\u2cc3":301,"\u2cc9\u15f6\u30ec\u1382\u1788":301,"\u2ccb\u09f0\ua973\ufd66\ufd7f":301,"\u2ccd\u0a97\u1478\ua02a\u0696":301,"\u2ccf\ua7a7\ua096\u145e\uaa1b\ufc2b\ufd33\u0201\u2099":301,"\u2cd5\u0127":301,"\u2cd9":301,"\u2ce1\u0a2a\u184b":301,"\u2ce1\ua051":301,"\u2ce3":301,"\u2d00":301,"\u2d01\u1123\ud7b3":301,"\u2d01\ua9fd\ufb91":301,"\u2d03\u1c78\u1590\u04b9":301,"\u2d05\uff98":301,"\u2d06\ua443":301,"\u2d06\ufeb7\ua4dd\u1618\u0627\u0717":301,"\u2d08\u1581\ua731":301,"\u2d09":301,"\u2d09\ua858\u0790\u0b07":301,"\u2d0b\u16e1":301,"\u2d0b\u1795":301,"\u2d0c\u161b\u1c03":301,"\u2d0d":301,"\u2d10":301,"\u2d11\u1f7a":301,"\u2d12\u0950":301,"\u2d14\u1efb":301,"\u2d15":301,"\u2d15\ua44e":301,"\u2d16e\u0289":301,"\u2d17":301,"\u2d1a\u1749\ua79d":301,"\u2d1b":301,"\u2d1b\ua6a2\u1339":301,"\u2d1c\u15f1\ua2a9\u0842\u1261\ufd7e\u1ed1":301,"\u2d1c\u18b9\u1905":301,"\u2d1e":301,"\u2d1e\ua2a4":301,"\u2d1f":301,"\u2d20":301,"\u2d21\u2cb7":301,"\u2d23":301,"\u2d23\ua020\ua2c3\u1ea9\ua578\ufd5a\u1b8d":301,"\u2d25\u0f45":301,"\u2d25\ua59a\u2c7b\uaaf4\u2d21\u185d":301,"\u2d27\u1024\ufbf3\u198d\u140a\ua34c\ua569\ufe90":301,"\u2d30\u10d2\u2c51\u0c95\ufb66":301,"\u2d33\ud7de\u3044":301,"\u2d35\ua10c\ua655\u0254\u1e6d":301,"\u2d36\uff4f\u1206":301,"\u2d3b":301,"\u2d3c\ua07d\ua199\u1598":301,"\u2d3e":301,"\u2d40":301,"\u2d44\u07a4":301,"\u2d45":301,"\u2d46\ufcf2\ua582":301,"\u2d48":301,"\u2d4d\ua357\u0e87":301,"\u2d51":301,"\u2d54\u2d60":301,"\u2d55":301,"\u2d59":301,"\u2d5a\ua029\u178f":301,"\u2d5c\u0274\ua5b2\u0581\u06b1\uab3d":301,"\u2d5e\u0103\ua2f1\u1fb1":301,"\u2d61\ua930\uabdc\u0203\ufeee\uaab9\ufc1f\ufc9c":301,"\u2d62":301,"\u2d63":301,"\u2d65":301,"\u2d66\u0713\ufd9f":301,"\u2d6f\u15e0\ufeca\ua9b2":301,"\u2d82\ua843\u1b47\u30ab\ua363\uabbc\ua47f\u1f44":301,"\u2d87\u0f59":301,"\u2d88":301,"\u2d89\ufc8a":301,"\u2d8b\u0283\u0374\u1edd\u1262":301,"\u2d8e":301,"\u2d94\u120e\u1c12":301,"\u2da2":301,"\u2da9":301,"\u2daa":301,"\u2dab\u114e\ua567":301,"\u2dac":301,"\u2dae\u0ab6":301,"\u2db4\uff43\uabe0":301,"\u2db8\u1128\ua481\u0aac":301,"\u2dc4\ufc52\u1386\u1a30":301,"\u2dc5\ua938\uaa94\u04d1\u0377":301,"\u2dc9":301,"\u2dca":301,"\u2dcb":301,"\u2dd0\u1160\ufcc9\u021d\u05d7":301,"\u2dd3\u10f2\u1561\u0266\ua2bc":301,"\u2dd5\ufd3b\u1984":301,"\u2dd6":301,"\u2dd8\u12c3\ua304\ufbdc":301,"\u2ddb":301,"\u2ddc":301,"\u2ddd\u0eb0k":301,"\u2e2f\u0b05":301,"\u3006\u1486":301,"\u303c\u0d91\ufb38\u2dc8\u1868":301,"\u3045\ua286\u07a2\ua9ef\uaa41\u019e":301,"\u3048\u1688\u1db1":301,"\u3059\ufd25\ufd69":301,"\u305b\uabc9\u0bb4":301,"\u305d\ua095\ua2fc":301,"\u3063\ua055\u147f":301,"\u3068\u1f71\ufb9f\ufda9\u03af\u0272":301,"\u3069\u2d02\u30cc\u1615\ua535\ua5b7\u2d14\u1746\u05e9\u2da5":301,"\u306d\ua383\uab32\u30ed":301,"\u306e\u3181\ua0b4":301,"\u3070\u1264\u1698\u043b\ufc56\u0b2e\u1a37\u307c":301,"\u3082\ua733\ua239\uaa16":301,"\u3085\ufb6a\ufc87":301,"\u308b\ua5ff\u0201\uab46\u11e0\ua050":301,"\u3092\u14d5\uffaf":301,"\u3093\u0101\u1225\ua8a5\u1e73\ua59d\u2d3f\ua846\u15ad\u1c77":301,"\u3094":301,"\u3095\u2c99\u1310\u1081\u044a\u0aaa\u043e":301,"\u309e":301,"\u30a1\ua021\ua51e\ua57b\u01bf\uabb8\u03c9":301,"\u30a3\u1793\ua202\uab65":301,"\u30a4\ua3e2":301,"\u30ad":301,"\u30af\ufe84\u0103\ua42c\ufd72\u126e":301,"\u30b0":301,"\u30b1\ua29d\u1d91\u2139\ua1b4":301,"\u30b4\ufc0a":301,"\u30bc\u099e\u0f4a":301,"\u30bd":301,"\u30c3\ufbec":301,"\u30c5":301,"\u30c7":301,"\u30c8\u0457":301,"\u30cf\u30ed\u30fc":320,"\u30d1\u0b0f\u120d\ud7f7\u1dad":301,"\u30d7":301,"\u30da\u0ab9":301,"\u30dd\ua893":301,"\u30de\ua90a\u05de\u189e\u0276":301,"\u30e2\u31a8\ua553\ufee2\ufd8a\u15b6\u14a2\u1b0b":301,"\u30e4":301,"\u30e6\ua48b\u15fd\ua073\u2c7c":301,"\u30e8\u1ec7":301,"\u30ef":301,"\u30f2\u3096n\u16b9":301,"\u30f4\uabc4\ua56b\ua0a3":301,"\u30f7\u1557\u30b2\ua41a":301,"\u30fa\u196d":301,"\u30fd\ufc86":301,"\u310a\ua350\u15c8\u14e1\u159a\u00fe\u31ae\ufb46\uaaf2\ua942\ua6dc\u1767\u15f2":301,"\u310b\ua36a\ua47c":301,"\u310c\u16b1\ufc96":301,"\u310e":301,"\u310f\u1d5a\u16dd\ua2be":301,"\u3112\ua067\u014d\u2cd5\u11bb\u0db4\ua054":301,"\u3114\u1e01\u19bb\ua816":301,"\u3115\u00fe\u1e81\uaa07\ufcab":301,"\u3117\ua610":301,"\u3118":301,"\u3119\u00df\ufd99\u1353\u1f67\uffc4":301,"\u311b\u04a3\ua08a\u0f66":301,"\u311e\ud7b2":301,"\u3126\ua013\ua934\u0751\u014f\u0298\u1ec9":301,"\u3127\u3132\ua314":301,"\u312a\ua465\u0a26\u090d":301,"\u312d":301,"\u3133\u11f2\ud7cb\ua252":301,"\u3137":301,"\u313b\ua1bf\u184c\ua4d1\ufbf5\ud7bc":301,"\u313d\ua16a\u0125\u1d6f":301,"\u313f":301,"\u3140\uaa0f":301,"\u3148\u11ce\u107d\u05d2":301,"\u314c":301,"\u314f":301,"\u3152\ua06a\ua394\u2d2d":301,"\u3153":301,"\u3158\u11dc":301,"\u315a":301,"\u315d\u00e6\u0445\u0578\u1743\ua99f":301,"\u315f":301,"\u3160\u14a4\ua5c4\ud7b0":301,"\u3165\ufef2\u1c5f\ua160":301,"\u3169\ua276":301,"\u316a\u0e0b\u2184":301,"\u316b\ufd8c\u3057\u1ec3":301,"\u316f\u0e03":301,"\u3170\u06c1\ufca3\u1150\u1f7b\u0497":301,"\u3178":301,"\u317d\u1145":301,"\u317f":301,"\u3180":301,"\u3185\u0937":301,"\u318b":301,"\u31a0\ufef0\u14f7\u1a00\ufd89":301,"\u31a4":301,"\u31a5\u1e6d":301,"\u31aa\u1426":301,"\u31ab\u052b\u0911\u09f1\u00f5":301,"\u31af":301,"\u31b1":301,"\u31b3":301,"\u31b9\u31f4\u1848\u10ee\u07e6":301,"\u31bak\u306c":301,"\u31f1\ua783\u09e1":301,"\u31f9\u00e5":301,"\u31fc\u1e09\ufc60\u2c8b\u122c\u0c38":301,"\u31fd":301,"\u31fe":301,"\u31ff":301,"\u4e0d\u9023\u7d9a":320,"\u4e2d\u6587":309,"\u5171":320,"\u5206\u8bcd\u5668":309,"\u5897\u52a0":320,"\u65af\u5766\u798f":309,"\u661f":320,"\u662f":309,"\u6709\u9650\u516c\u53f8":301,"\u671f\u5f85":320,"\u697d\u5929\u682a\u5f0f\u4f1a\u793e":301,"\u6d4b\u8bd5":309,"\u767d\u8272":320,"\u793a\u3057":320,"\u8fd9":309,"\u8fd9\u662f\u65af\u5766\u798f\u4e2d\u6587\u5206\u8bcd\u5668\u6d4b\u8bd5":309,"\u95a2\u6570":320,"\u963f\u91cc\u5df4\u5df4\u96c6\u56e2\u63a7\u80a1":301,"\ua000":301,"\ua002\uff57\ua444\u0115":301,"\ua003\u0aae":301,"\ua004":301,"\ua005":301,"\ua007":301,"\ua00b":301,"\ua00c":301,"\ua00e\u1546\ua2a6\u0e2a\uab9c\ua2c0\ua0e3":301,"\ua012":301,"\ua015\ua558\uaa23":301,"\ua016":301,"\ua017":301,"\ua019":301,"\ua01a\u0d38\u1103":301,"\ua01b\u317a\u0c0c\u1522":301,"\ua01c\u0aa6\u164f":301,"\ua01d":301,"\ua022":301,"\ua026\u1d68\u1d93":301,"\ua028\u10f6\u1999\u1c6e":301,"\ua02c\ua0a2\ua1c1":301,"\ua030\ua1e8":301,"\ua031\ua41f\u097b\u1d0d\ufb89":301,"\ua032\u056a\ua00a\u0563\u17d7":301,"\ua033\u16a3":301,"\ua035\ua36e":301,"\ua037\ua975\ufd22\ufce9":301,"\ua03c\u00fa\u1db7\u3080\u2c32\ufbd9\ufe85\u05f1\ua42a\ua200":301,"\ua03d\ua473\u03b2\u132f\u04b5\u1d30\u1eeb\ua344\u0761\ua763\u05e5\u2149\u174c":301,"\ua03e\uaba2\u1f35\u04dd":301,"\ua043\u1d32\u11bf\u01b0\u0c25\u1860\u0e14\u1a31":301,"\ua044":301,"\ua045":301,"\ua046":301,"\ua049\ua197\u1571\u2d24\u30fc\ua51b\u1951":301,"\ua052\u174f":301,"\ua059\ua842\u1705":301,"\ua05a\u0f8c\u31f8\ua2a5\u1690":301,"\ua05d\u04a5\ua0c7\u113f\u0760\uaa9d\ud7f4\u0249\u0804":301,"\ua05e\u1519":301,"\ua05f\ua138\ua478":301,"\ua061\ufc2a\ua0d6\ua555\ua2fd":301,"\ua063":301,"\ua069":301,"\ua06d":301,"\ua070\ua6db\uffc5":301,"\ua071\ua563\u2da3\ua216\u1d03\ua379":301,"\ua072":301,"\ua074":301,"\ua075\ua1a6":301,"\ua077":301,"\ua078\u315b\ua0eb":301,"\ua07a\u30c1\ua464":301,"\ua080":301,"\ua082\u1685\u0437\u04b1\ufe9d":301,"\ua085\ua6ac\u0758\u0988\u1180\u141b\ua718\u30f6\ua3ba\ua50c":301,"\ua091":301,"\ua093\u0ec0":301,"\ua094\ua77a":301,"\ua09b\ua2bb\ufd0d\u1245":301,"\ua09c\u0e8a\ua6a5":301,"\ua09d\u1187\u1f91":301,"\ua0a7\u1fe0":301,"\ua0a9\u1462":301,"\ua0af\ua75f\u313a\ufc31\u0a1d\ua1dd\u03bf\ua0dc\u2c3c":301,"\ua0b0\u12f1":301,"\ua0b1":301,"\ua0b2\u1677":301,"\ua0b6\ua2ff":301,"\ua0b7\ua8f2":301,"\ua0ba\u0d35\u0435\ua15b\ua616\ua167\u0b5c":301,"\ua0bb\ua290":301,"\ua0c2\ua3ab":301,"\ua0c3":301,"\ua0c8\uff53":301,"\ua0cb\u1f53\u0294\u214e\u30ca\ua010\u04ed\u03b1\u0d8e\ua43f\u3042\uab40\ufcb9\ua460\ua3f7\u2ce4\ua79d\uff52":301,"\ua0cd":301,"\ua0d0\u074d\u163b\u0c96\u1f70\ua0ac\ua0ed":301,"\ua0d4\u2d2d\uffab\u210b\u1345":301,"\ua0d7\u1eb1\u130f":301,"\ua0da\u1ea1":301,"\ua0e0\ua218\ua468\u1489":301,"\ua0e1\u2cc5\u1f81\u2c61":301,"\ua0e8":301,"\ua0e9\u02c7\u30c4\u2d92":301,"\ua0ea\u1179\u3183\ua84d\u09a3\u2c44\u0d26":301,"\ua0f2":301,"\ua0f4\u1709\u2ccb":301,"\ua0f6\ua41e\u1628":301,"\ua0fa":301,"\ua0fe\ua863\u1956\u152a\ua112\u0477\u1eab":301,"\ua0ff\u10d9\u2d8d\u0e1c":301,"\ua100":301,"\ua102\u0f8b\ua429\u1584":301,"\ua105\u0499\ua8a7":301,"\ua109":301,"\ua10e\u1a44":301,"\ua110\u047b":301,"\ua118\ua5fe":301,"\ua11c\u1e0b":301,"\ua11d":301,"\ua121\ufe86\u0463":301,"\ua122\u06b5":301,"\ua123":301,"\ua125\u09b0":301,"\ua129\u0721":301,"\ua12d\uaa18\u062f\u16bf\u150c":301,"\ua12e":301,"\ua12f\ua3a6":301,"\ua131\ua224":301,"\ua134":301,"\ua135":301,"\ua136":301,"\ua137\u2d08\ua35e":301,"\ua13e\ua619\u1ff6":301,"\ua140\u1827\ud7cd":301,"\ua143\u1228":301,"\ua146\ua04b\u1332\u1224\u1702":301,"\ua147":301,"\ua148\u1110":301,"\ua149":301,"\ua14c\u1d85\u149f":301,"\ua14d\ua19b\ua2c8\u2d1a\u1d1c\ua7ff":301,"\ua14e\uab2b\ua0c4\u2d64\u16cb\ua23b":301,"\ua14f\ua869\uff41\u0477":301,"\ua154":301,"\ua158\ua986\ua30e":301,"\ua159\ua2f4\ua487\ua611\ua98f":301,"\ua15a\u0daa\u1d0a\ua643\u16d8\ua9ae\u0140\ua14a":301,"\ua15d\u3125\ufbef":301,"\ua161\ufb50\uff9d\ua9ee":301,"\ua162\u1622\u052f":301,"\ua163":301,"\ua164":301,"\ua166\u2dde\ua58b\uaa9e\u0523":301,"\ua169":301,"\ua16b\ufc8c":301,"\ua16c\ua849\ufb03":301,"\ua16e\u0637":301,"\ua16f\u2d00":301,"\ua170":301,"\ua175":301,"\ua179":301,"\ua17a":301,"\ua17e":301,"\ua180\u00e5":301,"\ua181\ua61e\ua38c\u17a3\ua1c5\ufdbe":301,"\ua184\ua66e":301,"\ua186\ua0d5\u18b7\ua1dc\u012d\u3135\uffa5\ua08c\u1234\u1297\u1666":301,"\ua18a\u03b0\u30c6\u0286\u15a2":301,"\ua18b\uab29\u1e03\ua925":301,"\ua18d":301,"\ua18e\u3182\uaa01":301,"\ua191\u10e2":301,"\ua196\u0e33\ua595\uff51":301,"\ua19a\ufbf9\u2c89\u110d\u10de\ufce3\u1a36\u1461":301,"\ua19d":301,"\ua19e":301,"\ua19f":301,"\ua1a1\u191b\ua12a\ua23f":301,"\ua1a3\ua5f5\ufb56\u116a\uaa12":301,"\ua1a7\ua961":301,"\ua1a8":301,"\ua1aa\u145f":301,"\ua1ad":301,"\ua1b3\u1ed7\u196c\u156e":301,"\ua1bb\u10db":301,"\ua1bc\u1668\u03be\u11b7\ua432":301,"\ua1c3":301,"\ua1c7\ua967\u0b09\u06cf":301,"\ua1c8\ua55c\u0c87\ua5b0\ua30a\u00e0\u1998\ufdae\u307d\ua3aa\u107f":301,"\ua1c9\u2110\ua57e\u146d\u1213":301,"\ua1cb\u05da\u1e9f\u30f5":301,"\ua1cd":301,"\ua1cf":301,"\ua1d0\u025c":301,"\ua1d2\uff86\u0b30":301,"\ua1d4\u1e59\ua0d3\u15b5\u2c44\u2ca9":301,"\ua1d5\ufd8e":301,"\ua1da\u014b\u0e41\u174a\ufbd4":301,"\ua1db\u1cea\u0759":301,"\ua1de\ua7a3":301,"\ua1df\u17a4\u3176\u162c\u1855":301,"\ua1e0":301,"\ua1e3":301,"\ua1e6":301,"\ua1ee\u1703\ua774\u15fb\ua0f8":301,"\ua1f0":301,"\ua1f1\u050d\u12b9\u0925\u1e51":301,"\ua1f3\u0716\u06b0\u1974":301,"\ua1f5\ua3b3\u1d6b":301,"\ua1f9\ua066\ua57c":301,"\ua1fa\ua407\ua913":301,"\ua1fb\u011b\u12de\u3179\ufd30\ua5ba\u1142\u1465\u07dd\ua1c4\u1247\u03e3\ua266\u16b5\uffbb\u11d7\u0ca6\u0bae\ua0b5":301,"\ua1fc\ua800\u1383\u0213\uab31":301,"\ua1fd\uffa2":301,"\ua1fe\ufea2\ufdbb\u0569":301,"\ua1ff\ua6b7":301,"\ua203\u153a\u1eb7\ufb87":301,"\ua207\u182d\u016d\ua68f":301,"\ua208":301,"\ua20b\u0157":301,"\ua20c\u02ba\ufd53\uff77\ua6aa\u1647\ufc72\ufb9d\u0724\ufb1d\ufb5c":301,"\ua20d\u2c53\u1177":301,"\ua20f\u1d37":301,"\ua211\u190d":301,"\ua213":301,"\ua214":301,"\ua217":301,"\ua21a\u1133\u1355\u2071\u05e6":301,"\ua21bq\u1fc6\u1bbf\ufccb\ua367":301,"\ua21c\u1bca\ua3f0\u2d80\u0bb5\u0db0":301,"\ua223\u0511\u0b32\u3164\u043f":301,"\ua225\ua2c1\ufd05":301,"\ua228":301,"\ua22c\u064a\u1637":301,"\ua22d\u19b4\u1873\u1e83":301,"\ua22f":301,"\ua232\ua485":301,"\ua234":301,"\ua238\ua3fbv\u07e1\ua995\u056d":301,"\ua23a":301,"\ua23c\u15da\u14b1\u2c9b":301,"\ua23d\ua293":301,"\ua242\u0133\u2dad\u1e35\u1f55\ua897\u05f2":301,"\ua243\ua763":301,"\ua245\u15e5\u05e1":301,"\ua247\ufc11\u2d81\u1845\u0a93\u31b6\ud7ee\ufe98\u1d54":301,"\ua24a":301,"\ua24b\uaa27":301,"\ua24d\u316c\u14db":301,"\ua24e\ua69b\u3032\u1cf3":301,"\ua250\u067a\u0a9f":301,"\ua256\u056d\u095b\u2cbb\uaa0b":301,"\ua257\u1b28\u1f02\u3035\u1435":301,"\ua259\ua1d3\u316d\u0678\u1c1b":301,"\ua25c":301,"\ua25e":301,"\ua261\u045c\u0443\u04fb\ufee5":301,"\ua263":301,"\ua265\u1bbc\u167c\uff51\u1fb0":301,"\ua26b":301,"\ua26e\u145a":301,"\ua26f\ua687\ua996":301,"\ua270\u0b60\ua424":301,"\ua271\u15c1":301,"\ua273\u2cee\u051b\u10f9\u1f93\u046d\ua6d6\ufc68":301,"\ua275":301,"\ua277\u1470":301,"\ua278\ufcef":301,"\ua27a":301,"\ua27b\u0f4e\ua4fd\ua64b\u2cec\ua355\uabd5\u0e26\u07d6\u131d\uabac\u1f92\ua2b2\u2c5b\u1c0f":301,"\ua27c\u1678":301,"\ua27d\u2d47\ua743":301,"\ua27e\u0756":301,"\ua27f":301,"\ua280":301,"\ua281":301,"\ua284":301,"\ua287\ua258\u02e2\ua561\u1f63\ua089\u0258":301,"\ua289\u12a0":301,"\ua28c\ua2b3\ud7c3":301,"\ua28e\ua504\ua73d\u1569\u0442\u162a":301,"\ua291\ua057":301,"\ua292\ufc33":301,"\ua299\u025a\u1fe1\ua33f\u1747\ua2ed":301,"\ua29b":301,"\ua29f\u31f2\ua127\u017a\u12e2":301,"\ua2a0\ufb63":301,"\ua2a1\ufb00\ua972":301,"\ua2a3":301,"\ua2a7\u1653\u07e9\u1389\ua1b8\u1134":301,"\ua2ab":301,"\ua2b0\u1a06\u14a3":301,"\ua2b1\u12cd\ua437\ua151\ua847":301,"\ua2b4\u1a0f":301,"\ua2b5":301,"\ua2ba\u16cd\u17a6\u1ebf":301,"\ua2bd\u0dc4\ua248\uffb5\u1496\u304b\ua72f":301,"\ua2c5\u0b9c\u195b":301,"\ua2c7":301,"\ua2ca":301,"\ua2cb\u210d\ufc42":301,"\ua2cd\u0b38\ua2e3\u1629\ua2ad\u2d11":301,"\ua2ce":301,"\ua2d0":301,"\ua2d1":301,"\ua2d2":301,"\ua2d3\ua759\uaac2":301,"\ua2d6\ufd80\u026b":301,"\ua2d7":301,"\ua2d8\u0517\u0d85\u0469":301,"\ua2d9\u1c1c\u0d0f\ua514":301,"\ua2da":301,"\ua2dd":301,"\ua2de":301,"\ua2df\ua5e0\u1423\u14f1":301,"\ua2e7\u1100\u1c05":301,"\ua2e9\ua3fa\ua31b\ufcb3\ua3cd":301,"\ua2eb\u14ed\u1834\ua475":301,"\ua2f3":301,"\ua2f6":301,"\ua2f8":301,"\ua2f9\ua6e1\uab2d\ufd54\ud7c4":301,"\ua2fb":301,"\ua300":301,"\ua305\ufb54\u0999\ua373\u15d2":301,"\ua306\u1d01\ua5a3\ufc46":301,"\ua312\ua272\u0960\u1c5c\u1475\u014f":301,"\ua316\u0e2f":301,"\ua318\u1455\u0d0b\ua76d\u14c6\ua326":301,"\ua31a":301,"\ua31c":301,"\ua320\ua54a":301,"\ua322\u3177\u1d5b\ua038\u0d8c":301,"\ua323\u111d\ua590\ufd83\u1b0a\u07d8":301,"\ua328":301,"\ua329\uaa75\u0a2f\u0217\u1e98\ud7d1\u04d5":301,"\ua32b\u023f":301,"\ua32c":301,"\ua32e\u1631\u1d5e\u186c":301,"\ua331\u0926\u1ecd":301,"\ua332\u12f8\u1a20\u1c21\ua841":301,"\ua335":301,"\ua336\u14ac\u162d\ua06c\uaaea":301,"\ua337\u0257\ua235\u0c8f":301,"\ua338":301,"\ua33b":301,"\ua33c\ufea0\u0a35\ua984\u158b":301,"\ua33d\u1f86":301,"\ua341":301,"\ua345\u1681\u07ea\u0e0a\ufd74\u1e03":301,"\ua346\u0691":301,"\ua347\ufd10\ua609":301,"\ua34b\u2d4f":301,"\ua34f\ua0d2\u3151":301,"\ua352\u130a\u09a8":301,"\ua364":301,"\ua368":301,"\ua36c":301,"\ua374":301,"\ua375\u03b3\u0443":301,"\ua376":301,"\ua378\u0192\ufb28\ufe93\u0c32":301,"\ua37a":301,"\ua37c\ua3a9\ua608\uab98\ufefc\ua484":301,"\ua381\uffc6\u1159\u12f6":301,"\ua384\ufb26":301,"\ua386":301,"\ua389\ua605\ufd87\u084b":301,"\ua38a":301,"\ua38f\ufd0c\u048b\ua641\u14d8":301,"\ua391":301,"\ua393\u3105\ua2cc\ufeeb\u0d23\ua327":301,"\ua399\ufb95\ufdf1\u1e0b\ua144\u1e67\ua5ee":301,"\ua39a\ua6d9":301,"\ua39e":301,"\ua39f":301,"\ua3a3\ua799":301,"\ua3a4\ufee6":301,"\ua3a5\uaa4a\u00e5":301,"\ua3a7\ufd09":301,"\ua3ac\u1904\u1a43":301,"\ua3ad\u0644\ua99d":301,"\ua3af\u2dc2":301,"\ua3b1\u01f3\u145c":301,"\ua3b2\ua6c9\ufd96":301,"\ua3b7\ua303":301,"\ua3b8\u2d43":301,"\ua3bb\ua264\u01da\u1309":301,"\ua3bd\ufd04\uff4c\u0ae1\u306b":301,"\ua3bf":301,"\ua3c0":301,"\ua3c4":301,"\ua3c8\u16de":301,"\ua3c9\u1989":301,"\ua3ca":301,"\ua3cf":301,"\ua3d0\uff8f\u1194\u1e1f":301,"\ua3d5":301,"\ua3d6":301,"\ua3d7\uaa05\uff93\u068b\u029f\ua2db":301,"\ua3da\ua1b0":301,"\ua3dc\u1b9e\u1f12":301,"\ua3dd":301,"\ua3de\u30d9":301,"\ua3df\ua03f\u0b1e":301,"\ua3e0\ud7ce":301,"\ua3e3\ua5da\ufc91\ufce0":301,"\ua3e4\u0e88":301,"\ua3e6\u150a":301,"\ua3e7\u091a":301,"\ua3e9":301,"\ua3eb":301,"\ua3ec":301,"\ua3ef\u0c18\ufeda\uff7c\u015b\u309f":301,"\ua3f2":301,"\ua3f8\u16cc\ufef1":301,"\ua401\ufb5e\u1e81\ufbda\ua540\u07cf\u1d4a":301,"\ua402\u022d\u0ca1\u308d\u1fa7":301,"\ua405\u1f31\u015f\u1d2e":301,"\ua409":301,"\ua40a\u04e9":301,"\ua40b\u16e7":301,"\ua40d":301,"\ua40e\u2c99\u15e6\ua215\u2d3d\ua3a2":301,"\ua412\u07a3":301,"\ua415\u0edd\ua665\u1f97":301,"\ua416":301,"\ua417\u1d74":301,"\ua419\u1b21":301,"\ua41c\u146a\u00ec":301,"\ua421":301,"\ua425\ud7e4\ufdc7":301,"\ua42d":301,"\ua430\u156a\u19b5":301,"\ua431\u08a4\ua753":301,"\ua433\ua797\u010f":301,"\ua435\u0b39\u1521":301,"\ua438":301,"\ua439\u18c3":301,"\ua43b\u00f0":301,"\ua446\u1515":301,"\ua448":301,"\ua44b\u037c":301,"\ua44f":301,"\ua451\u050f\u1121\u1e15\ua309\uabe1\u01b4":301,"\ua454\ufd34\u3109":301,"\ua458\u045e\u3146\ud7f6\u0242\u1334\ua871\u11ee\u1db6\ua22a\u0b37":301,"\ua45b\u1b87":301,"\ua45c":301,"\ua45d":301,"\ua45e\u123f\u1e05\ua4f9":301,"\ua461":301,"\ua466\u2c49\ua21e":301,"\ua46b\ua755\ua6e2":301,"\ua46f\u0a24":301,"\ua471\ua602":301,"\ua472\ua59b\u110b":301,"\ua477":301,"\ua479\u2db0\u3060\uaba1\u1cf0":301,"\ua47a\u0254\u18c1\u1995\u2dba\u1488":301,"\ua47b":301,"\ua47d":301,"\ua482\uab39":301,"\ua483":301,"\ua488\u0107":301,"\ua48a\u00f2\u1003\u1147\ua850":301,"\ua48c":301,"\ua4d0":301,"\ua4d2":301,"\ua4d4":301,"\ua4d5":301,"\ua4d7":301,"\ua4d8":301,"\ua4da":301,"\ua4df\u021b":301,"\ua4e0\u3121\u0371\u051d":301,"\ua4e1\uffba":301,"\ua4e3\ufebd":301,"\ua4e4":301,"\ua4e5\ua0c6":301,"\ua4e7\u1343\u03c8\u0d94":301,"\ua4e9\ufe77\u2094":301,"\ua4ea":301,"\ua4eb\u0569\u0da9\u0768":301,"\ua4ec":301,"\ua4ef\u05d4\u0f65\u3186\u1200\ua75b":301,"\ua4f2\u1292\ufb3c\u1701\ua992\u16c0\u199a\u1158\u195f":301,"\ua4f3\ufee1":301,"\ua4f5":301,"\ua4f7\u0438":301,"\ua4fb":301,"\ua4fc":301,"\ua500":301,"\ua501\u1170\u18d9":301,"\ua505":301,"\ua50b\u0527\ufecc":301,"\ua50d\u0693":301,"\ua510":301,"\ua512\ufee4\u14e9":301,"\ua513":301,"\ua515\ua5a8":301,"\ua516\u0cb7\u1260":301,"\ua519\u30c2\uff42\u0690\u10f8\ufc93\u12b8\ua29a\ua19c\u0a85\u043c\u2c3b":301,"\ua520":301,"\ua521\u3107\ufc16":301,"\ua523\ua60a":301,"\ua524":301,"\ua526\u0d0a\ua0a6\uaa14\ua195\ua502\ufcfd\u0d22":301,"\ua528":301,"\ua52c\u17a2":301,"\ua52d\u1005\ua46c\ufc4c":301,"\ua52e\ua76f":301,"\ua532\ua2e5\ua104":301,"\ua537\u0f67":301,"\ua538\u0c9a\u2112\u1616":301,"\ua53b\u1607":301,"\ua53d\u1409\u149b":301,"\ua53f\u1875":301,"\ua542\ua28d\uabda\u315e\u04f7":301,"\ua543\u04f7\u0b87\u2cd7\ua852\uabbe\ua601":301,"\ua545\ua8a9\u1da8\u1186\u0649\ua130":301,"\ua547\u0677":301,"\ua549":301,"\ua550":301,"\ua556\u14af":301,"\ua55f":301,"\ua562\u308c\ua5ed":301,"\ua565":301,"\ua56a\u1f95":301,"\ua56d\ufd5f\u1448":301,"\ua56e":301,"\ua570":301,"\ua573\u0ec4\uaa65\u03c3\u045d\u152c\u1ea5\u1f22":301,"\ua577":301,"\ua579":301,"\ua57d":301,"\ua581":301,"\ua584\ufed0\ua5ac\u1854\uaa45":301,"\ua586\ua11a\u03ad":301,"\ua58c\u150f\ua589\ufec5\u0a74\u068f\u1d20\ufb8a\u099c\u081a\u0434\ua699":301,"\ua58d\u06bc\ua533\ua791":301,"\ua593":301,"\ua596":301,"\ua598\ua899":301,"\ua599\ua42b\u0d3d\ufd16":301,"\ua59e\u1019\u019a":301,"\ua5a0\ufc5b\u2dda":301,"\ua5a1\u0573":301,"\ua5a2":301,"\ua5a4":301,"\ua5a6\u0633\u0aa1":301,"\ua5aa":301,"\ua5ab\u15b8\u11dd\u1667":301,"\ua5ad":301,"\ua5af":301,"\ua5b4":301,"\ua5b5\u03d4":301,"\ua5b8\ua1a4\uaaf3\u185a\u1693":301,"\ua5b9\u30f0\u0279\u311a\ufd93\u1e75\u1e17\u0ec1\u1fd1":301,"\ua5be\ua288":301,"\ua5bf\u1c73\ua0a1\u0cac\u1f00\ufbd6\uaba0\ua776":301,"\ua5c0\u1563\u1983\ua64f":301,"\ua5c1":301,"\ua5c2\u165a\u18e2":301,"\ua5c3\u02b4":301,"\ua5c8\ua18c\ua2ec\u14ec\u09a2\u1e93\u0a06":301,"\ua5ca":301,"\ua5cb":301,"\ua5cc\ufb97\u02bb\ua1a2\ua2b8\u3062\u2d13":301,"\ua5cd":301,"\ua5cf\ufd11":301,"\ua5d1":301,"\ua5d2\u1313\u14c2\ua056\u06ad\u0157\u157a\ufc4d\u1675":301,"\ua5d6\u0809\ua2a8\u1482\u1d45\u0437\u12ff\u01eb":301,"\ua5d9\u03c7\u1730":301,"\ua5dc\ua3a8":301,"\ua5e1\u04ce\u1f23":301,"\ua5e2":301,"\ua5e4":301,"\ua5e6":301,"\ua5e7":301,"\ua5ea\u023f":301,"\ua5ec\u307a\ua557\u1a4d\u1db9\ufb70":301,"\ua5ef\ud7d6\u0920":301,"\ua5f6":301,"\ua5f7\u0f55\u2dd9\u1f36\u1f44":301,"\ua5fb\ua81c":301,"\ua600\u1304\u1d6d\ua01f\u1f04":301,"\ua604":301,"\ua60c":301,"\ua612\u08b2\ua6bc\u18d0\u2c83":301,"\ua617\u2cb9":301,"\ua618\u1e07":301,"\ua61a\u133e":301,"\ua61c\u0624\u0123\u1f14":301,"\ua61d\uaaa7":301,"\ua62a\u1f94\u06ef\u0b08\u021b\ua22b\ua1e4\ua777\ua172\ufc20\ua0d8":301,"\ua62b\uffa3":301,"\ua645":301,"\ua647\ua3b5":301,"\ua649\u045d":301,"\ua649\ua73f\ua9aa\ua087\u0aa2\u0561\u0e45\u1143":301,"\ua64f\uaaad":301,"\ua651":301,"\ua651\u0177\u1151\u1233\u15ac":301,"\ua653\ua693\u1328\u09e0":301,"\ua653\uab8a\u01ff":301,"\ua659\u14fb":301,"\ua65d\ua307":301,"\ua66b\u2dc0":301,"\ua66d\u2c81\u03c1\ua6ad":301,"\ua67f\u0497":301,"\ua683":301,"\ua685":301,"\ua685\ufd92\ua10b\u1c76":301,"\ua687\u1864\u1539\u2d02\u1e4f\uaab1\u1e63\ua37f":301,"\ua689\u1970\u071b":301,"\ua68b\u0f47":301,"\ua68b\u1ff4\u08a1":301,"\ua68d\u076e\u0e06\u2d84\u00f1":301,"\ua691":301,"\ua693\u2c5c\ua6a9\u1002\u1ee1\ud7d3\u1706\u02cd":301,"\ua695":301,"\ua695\u1d6e\uffaa\u1c68\u15ef":301,"\ua697":301,"\ua697\u0277":301,"\ua699":301,"\ua69b":301,"\ua6a3\u1b26\ufc79\u0511\ua02d\u080d\uab8c\u14b5":301,"\ua6a4":301,"\ua6a6":301,"\ua6a7":301,"\ua6a8\u1ef1":301,"\ua6b0\uab43\uaba8":301,"\ua6b1\ua349\u155e\u16c1\u1b2d":301,"\ua6b2\ua978\u157c":301,"\ua6b5":301,"\ua6ba\u12a2\u0b86\ua566\ua10f":301,"\ua6bb\u14d4\ua246\u311c\u062c\ufcc0":301,"\ua6bd\u2c34":301,"\ua6bf":301,"\ua6c0\u1504\u0814":301,"\ua6c2":301,"\ua6c3\ua3ff\u11f1\u1993":301,"\ua6c4\u1314":301,"\ua6ca\ufe81\u2d07":301,"\ua6cc\u1dbc":301,"\ua6cd\uabd8":301,"\ua6d0":301,"\ua6d1":301,"\ua6d2\u1f64":301,"\ua6d8":301,"\ua6dd\u0ea2\u304f":301,"\ua6de\u1729\u01c6\u107c":301,"\ua6df":301,"\ua6e3\u024b":301,"\ua6e4\u2c56\uff83\u1443\ufc54\u0a2b":301,"\ua719":301,"\ua71d\u2cd1\u11ea":301,"\ua71e\u0227\u1437\ua554\ufe91":301,"\ua71f":301,"\ua727":301,"\ua729":301,"\ua729\u160b\u0c89\u31a1":301,"\ua72b":301,"\ua72d":301,"\ua72d\ua3d2\u1239":301,"\ua730\u1829":301,"\ua733":301,"\ua737\ud7e3":301,"\ua737\ufd32":301,"\ua73b\u1914\ua564\u1b95":301,"\ua741\u105c":301,"\ua743\u0dc3":301,"\ua745":301,"\ua747\u134d":301,"\ua749\u04a9\u1792\ua339":301,"\ua749\ufcf9\u0c9b":301,"\ua74d\u0b36\u2c5e":301,"\ua74f":301,"\ua751":301,"\ua753":301,"\ua757\u0135\u1215":301,"\ua759\uab4e":301,"\ua75b\ua296\u146c\u1e01":301,"\ua75d":301,"\ua765":301,"\ua767\u16f2\u3108\u051b\ua08d":301,"\ua767\ua65d":301,"\ua76b\u0253\u04e1\uaa8e\u03f3\u0934\u3128\ud7cc\ufc81\u12d5\u18c6":301,"\ua76b\ufb15\u1501":301,"\ua771\u028c":301,"\ua772\uab54":301,"\ua773":301,"\ua77a":301,"\ua77c":301,"\ua783\u2dc3\u114f":301,"\ua785\u17ae\u1be0":301,"\ua787":301,"\ua787\ufc38\u16bd\u179f\u1724":301,"\ua788\u0493\u01c0\u075e":301,"\ua78c\ufb36\ua255\ua1eb":301,"\ua78e\u026c\u02ab":301,"\ua793\u188a":301,"\ua795\ufb32":301,"\ua799":301,"\ua79b":301,"\ua79f":301,"\ua7a1":301,"\ua7a5\u209a":301,"\ua7a9\u16d3":301,"\ua7f9\u17a1\u0577\u1207":301,"\ua7fb\u18c8\ufb75\u18ed":301,"\ua801\ua2c4":301,"\ua803\uab71\u1101\ua6b6\u199d\u0229\u0c30\u0c58":301,"\ua804\uaa72\uaa64p\u10fe\u1410":301,"\ua805\u1fa4\u151d\u1846":301,"\ua808\u1285\ua769":301,"\ua80a\ufdaf":301,"\ua80e\u1852\u2d83\u1bdf\u03d6\u11a8\ua79f":301,"\ua80f\u048f\u18b4":301,"\ua811":301,"\ua813\ua6b9":301,"\ua819":301,"\ua81d":301,"\ua81e":301,"\ua820\ua282\ua661\u16c2\ua665":301,"\ua821\ufec9":301,"\ua840\ua574\u00e6":301,"\ua845\ufcfc\u0d33":301,"\ua84b\u16ac\u0792":301,"\ua84f":301,"\ua851":301,"\ua855\u1da6\ufbd8h":301,"\ua85b\u2cb5\ua560":301,"\ua85c":301,"\ua85e\ufbe1":301,"\ua85f":301,"\ua861":301,"\ua864\u132e\ua7fc":301,"\ua867\u016d\u12a1":301,"\ua868\ufc26\ufba6":301,"\ua86a\u00aa\u2c9b\uab21\u0169\u308f\u0c86\u1863\u1458\u305f\uffbd\u0501":301,"\ua86c\u0c0b":301,"\ua86f":301,"\ua885\ufec1\ua6d4":301,"\ua887\u30f1\uabca\u03bc\u091d":301,"\ua889":301,"\ua88a\u0586\ufd5c\u0763":301,"\ua88c\u01cc\u1686\u15ff\ufefb\u1a0b\u1671":301,"\ua88d":301,"\ua88f":301,"\ua890":301,"\ua892":301,"\ua895\u1166\u0105\uaa8a":301,"\ua896\u149d":301,"\ua898":301,"\ua89a\u31a7":301,"\ua89c":301,"\ua89d\u0c0f\u0922":301,"\ua89e":301,"\ua8a0":301,"\ua8a4\u0a14\ufc67\ua592\u19c1":301,"\ua8a6\u167d\u07db":301,"\ua8aa\ua486":301,"\ua8af\u2d50":301,"\ua8b0\uffa4":301,"\ua8b1\u2d27\ua5e3\u157b\ua3f5\u0d9f\u1725\ud7d7":301,"\ua8b3\ufb31\u2cec":301,"\ua8f3\u11fe\u0a73\u0a08\u1523\u2d91":301,"\ua8f4\ua99a\ufda8":301,"\ua8f5\u03cd\u07cd\ua2c2\u1211":301,"\ua911":301,"\ua912\ua39d\ua413\u0577\u154b\ua3c6":301,"\ua915":301,"\ua917":301,"\ua919\ufc04\uaae9":301,"\ua91e\uff66":301,"\ua920\ufba1":301,"\ua931\u1b97":301,"\ua932\ua572":301,"\ua933\u30a7":301,"\ua937":301,"\ua93c\u14e2":301,"\ua93e\u3123":301,"\ua940\ufcd4\u1780\u08ab":301,"\ua941\u0125":301,"\ua944":301,"\ua946":301,"\ua960":301,"\ua965\ua8fb":301,"\ua966":301,"\ua968\u0c85":301,"\ua969":301,"\ua96b\u15ee\u1cf2":301,"\ua96c\u1c0d":301,"\ua96d\ua4e8\u02b9":301,"\ua96f\ua661\uffa6\u2c4a\ufb7f\u16d1\u1711":301,"\ua974\ua791\ua0b3":301,"\ua979":301,"\ua97b":301,"\ua987\ua5db\u0225\u165d":301,"\ua989":301,"\ua98d":301,"\ua98e":301,"\ua991":301,"\ua993\ufdb2\u1330\u1422\uaaa5\uab52":301,"\ua994":301,"\ua997\u16bb":301,"\ua998\u0997\u1f57\ua781":301,"\ua999\ua9e6\ufd08\u1fb6\u30be":301,"\ua99b\uaa10\ua2e8":301,"\ua9a0\u0da0\ua11f\ua41b\ua274\u0165\u3088\u1b16\u1243\u10e1":301,"\ua9a2\u1606\u0529\u1028":301,"\ua9a4":301,"\ua9a5\u2c5a":301,"\ua9a7\u0985\u3166\u16d4":301,"\ua9a9\u16c4\u02e0":301,"\ua9ab":301,"\ua9ad":301,"\ua9e1\u16cf\ua0f1":301,"\ua9e2\ua6ae\ua6a1":301,"\ua9e4\u100f":301,"\ua9e8\u022f\u147b":301,"\ua9eb\uff4e\u056a":301,"\ua9ec\u1e25\ua205\uab0a\u2d19\ufb79\ua509\u07cc\ufea6\u161a\uaa44\ua37d\u0192\u04ef\u1f7b\u0919":301,"\ua9fe\u314d\u12b3\u1017":301,"\uaa00\u18cf\u1490":301,"\uaa03":301,"\uaa04\uab2a\u2c4e\u1f77\u0d9a\ua33a\u0183":301,"\uaa06":301,"\uaa09":301,"\uaa0a":301,"\uaa13\uaa62":301,"\uaa17\u0209\u1a4b\uff6c\u01f3\u0288\u2da6\ua2bf":301,"\uaa19\u0998\ua647\u069c":301,"\uaa1e":301,"\uaa20":301,"\uaa21\uab5e\ufb68\u0a21\u04e5":301,"\uaa48\u0448\u14da\u06bb":301,"\uaa49\uab88\u1d7a\u03f0":301,"\uaa4b\u1837\ua0ae\u016f":301,"\uaa61\ufea7":301,"\uaa63\u069a":301,"\uaa67":301,"\uaa68":301,"\uaa69\u00f3\u14b0":301,"\uaa6c\u14ef":301,"\uaa6d\ufb98\u18a8\u1357\ufc5c\u0d89\u1f24\u0b89":301,"\uaa6f":301,"\uaa70\u0205\u1617\u16e2":301,"\uaa81\ud7d9":301,"\uaa82\u1ed3\u0ead\u1e97\ua84c":301,"\uaa83\u06fc":301,"\uaa84\u04d7":301,"\uaa86\u11fc\u30f8\u1ea9\u045b\u0135\u2d8f\ua05c":301,"\uaa88":301,"\uaa8c\u0ec6":301,"\uaa90":301,"\uaa91\u098c\u126a\u07d5\u0e46\u2d8c\ufbfd\u0503\u1fb1":301,"\uaa97\u0e97":301,"\uaa98":301,"\uaa9a":301,"\uaa9b\ua222":301,"\uaaa3\ufba8\ua3b6\ua32d\uff54":301,"\uaaa6":301,"\uaaab\u2d93\ua0e6\u0e40\u209b\ufc4b\ufc2c\u2d49\u30b8\ua5f1\ufd13\ua034\u0676\u03e9\ufd86":301,"\uaaae\ufece":301,"\uaab5\ufeae\ua1b2\ufcb6":301,"\uaab6\u12af":301,"\uaaba":301,"\uaabc\u1e7b\u11e3\u07d3":301,"\uaabd\u12e0\uabcf\ua05b\u072e":301,"\uaadb\u19b6\ua4ee\u1c4f":301,"\uaadd":301,"\uaae1\u01fd\ua1ab":301,"\uaae3\ua71b":301,"\uaae4\uab44\u1471":301,"\uaae5":301,"\uaae7\u163f":301,"\uab02":301,"\uab03\u02c6\ua866\ua81a":301,"\uab06\u30a2\ua385":301,"\uab09\u0e0e\ufc48\ua2ea":301,"\uab0c\ufc7a\u0105\u127b":301,"\uab0d":301,"\uab0e\u0f58\u0d87\ufda4":301,"\uab12\u16c9\u2c37\ua361\u092e\u17a9\u111c\u1f07":301,"\uab13\ua663\u01da\u1d21":301,"\uab22":301,"\uab26\u122d":301,"\uab28":301,"\uab34\u1a2f\u2148":301,"\uab35\u0256\u0bb2":301,"\uab36\u1ee7\u1580\uff8b\u025e\u0585\u1c00":301,"\uab3c\u1ff3\ufcb2\u134a":301,"\uab3e":301,"\uab3f":301,"\uab41":301,"\uab42\u090a\u1b45":301,"\uab47\ua65f":301,"\uab4c\u0d8d\u0c88\u09af":301,"\uab4d\ua07f\u0a2c\u2c6c\u0bb8":301,"\uab4f":301,"\uab53\u1572\u0e82":301,"\uab55\ud7ec\u1961\u30bf\ua39c":301,"\uab57\ufb25\ufb84":301,"\uab5c\u08a0\u2ccf\u1f70\u1787":301,"\uab5d":301,"\uab73":301,"\uab74\u18c9":301,"\uab75":301,"\uab76\u1e85":301,"\uab77\u098f":301,"\uab78\u2cc9\u1125\u0e9f\u1bd1":301,"\uab7a":301,"\uab7c\ua2f7\u0ab0\uff81":301,"\uab7d":301,"\uab84\u1e3d":301,"\uab87\uaba5":301,"\uab89":301,"\uab8b":301,"\uab8e\u2c48\u2c4e\ua5f4":301,"\uab8f":301,"\uab91":301,"\uab92\ua2b7\u2c7a\u1318\ud7b9\u1c0a\u0c27\u0da1\ufb73\u19a1":301,"\uab95\u0d7b":301,"\uab97":301,"\uab99":301,"\uab9a":301,"\uab9b\ua9af\u04e9":301,"\uab9f":301,"\uaba3":301,"\uaba6\u15b2\u2115\u3189\u1f26\ua723\ua888":301,"\uaba9":301,"\uabaa\uab56\u1273\u1f35\u0291":301,"\uabab\u1b2b":301,"\uabb0":301,"\uabb1\u0a9a\u1301":301,"\uabb6\u115d\ua12b":301,"\uabc6":301,"\uabc8":301,"\uabcd\uff48\ua0d9\ua45f\ua5d3\u1bda\u1164":301,"\uabce\u0913\u1605":301,"\uabd0\ufd38\u2d67\uaa74\ua062\u063f":301,"\uabd7":301,"\uabd9":301,"\uabdd\ua11e":301,"\uabde\u179c":301,"\ud7b5\u0c12\ua45a\ua81fd\u118f":301,"\ud7b6":301,"\ud7b7\u1327\u1f41":301,"\ud7ba":301,"\ud7bb":301,"\ud7bd\u1560":301,"\ud7c0":301,"\ud7c2\u0f60\u1ead\ufd70\ufc18":301,"\ud7c5":301,"\ud7c6":301,"\ud7d5":301,"\ud7dc\ufc9a\u0635":301,"\ud7e0\u08b0\ufd6b":301,"\ud7e5":301,"\ud7e9":301,"\ud7eb\u0570\u03ce\u1dab\u0b8f\uabb4":301,"\ud7ef\ua092\ufb7b":301,"\ud7f0":301,"\ud7f1":301,"\ud7f5\u046b":301,"\ud7f8\u1e23\ufd1e":301,"\ud7f9\u1d3a":301,"\ud7fb":301,"\ufb04\ufca4\ua133\ua1ae\u1553":301,"\ufb17":301,"\ufb20h\u01cc\u1480":301,"\ufb30\u1dbd\u0137\ua794":301,"\ufb35":301,"\ufb3a\ufd7c\u1c1a\u0580":301,"\ufb41\ufc12\u127e":301,"\ufb43\u304d":301,"\ufb44":301,"\ufb47":301,"\ufb4c":301,"\ufb4d\ua0f9\ua426\u04dd\u31b7\u1474\u12e3\u071d\ufcd9\ufc7f":301,"\ufb52\u01cc\u0458\u0e08":301,"\ufb55\u1d88\u05d9\u1444":301,"\ufb57\u3147\u091c":301,"\ufb59\ua193\u2c95\ua455\u0ca2\u0257\u0723\u1d04":301,"\ufb5a\u0461\u077a\u2c5b":301,"\ufb5b\u0467\ufb2a":301,"\ufb5f":301,"\ufb61\u04e3":301,"\ufb64\ua587":301,"\ufb69\uabad":301,"\ufb6b\uff95\u0d34\u03c5\u1381\u1bc2":301,"\ufb6c":301,"\ufb6e\u0623\u1274\ua463":301,"\ufb6f":301,"\ufb72\u06ac\u0a32":301,"\ufb76\u1700\u1a27":301,"\ufb77\u1d29":301,"\ufb78":301,"\ufb7a":301,"\ufb7c\u09a0":301,"\ufb7e":301,"\ufb80":301,"\ufb81":301,"\ufb82\u06c9\u019b\ua308":301,"\ufb83\u1e67":301,"\ufb8c\u0a8d":301,"\ufb8e":301,"\ufb8f\u1a03\u1195":301,"\ufb96\u14bc\u1401\u182b\u2dd4\u0111\ua597":301,"\ufb99\ua1ef\ua77f":301,"\ufb9a":301,"\ufb9b\u101a\ufd9b\u0812\ua343\ua302\ua15f":301,"\ufb9c":301,"\ufba2":301,"\ufba4\ua35c\u11a0\u066f\u12cf":301,"\ufba5\ua453\u1c72\ufeb0\uaa95":301,"\ufba7":301,"\ufba9\u14c9\u148b":301,"\ufbab\u14f0\ua187":301,"\ufbad\ufbf6\ua747\u0b19\u1352":301,"\ufbb0\u1d48\u1325q\ua22e\u026e":301,"\ufbb1":301,"\ufbd7\uaaa9":301,"\ufbde\uff47\u1196\u1d55":301,"\ufbe2\u0a17\ud7cf":301,"\ufbe3\ua5fa\u2cd1\u0b28":301,"\ufbe5\ud7f2":301,"\ufbe9\u3159":301,"\ufbea\u0a72\ua98c\ua739\ua240":301,"\ufbeb\u1e77":301,"\ufbf7\ufede\u1ea5":301,"\ufbfc\u0629":301,"\ufc01\u1346\u1902\ufdfb\u10f5\ua55d":301,"\ufc02\u0813":301,"\ufc05w\ua298":301,"\ufc07":301,"\ufc09z\u12fb":301,"\ufc0e\ua457\u1c04\ufd21\u111f":301,"\ufc17\ua011":301,"\ufc19":301,"\ufc1a\u172d\u1e87\u0a8f\ua0be\u30ba":301,"\ufc1b":301,"\ufc1d":301,"\ufc21\ud7e7":301,"\ufc22\u1955":301,"\ufc23\u1476\u0444\u130b\ua018":301,"\ufc24":301,"\ufc25\u04c6\u0a1f\ufd6d\u0ebd":301,"\ufc27\u10ff":301,"\ufc29\ufc50\u3053":301,"\ufc30\u04df":301,"\ufc32\u04ca\u01df\u113c":301,"\ufc34\ud7c1\ua2f5\u1f80\u13f9\u04f3":301,"\ufc35\u3150":301,"\ufc36\u1897\u05d0\u1e5d\u0852\u03ed\u176f":301,"\ufc37\u1684\u1240\u1d63":301,"\ufc3b\ufb88":301,"\ufc3d\u15f4\u1e53\u3155\u1e73\u316e":301,"\ufc4f\uffdc\u0584\ua13f\ufc57":301,"\ufc53":301,"\ufc58\u1c7d":301,"\ufc5d\ufb22":301,"\ufc5e\u2c66\ufbe7":301,"\ufc5f":301,"\ufc61":301,"\ufc69\ufcb5\ua53e":301,"\ufc6a\u185f":301,"\ufc6b":301,"\ufc6c\ua8ab":301,"\ufc6f":301,"\ufc70\u2c56i":301,"\ufc75":301,"\ufc77\ua267\uffdb\u19a6\ua28b":301,"\ufc78\ua17c":301,"\ufc7b\u025b\u0579":301,"\ufc7c\u01ab\u0914":301,"\ufc7d":301,"\ufc82\u0269\uabbd\ua126":301,"\ufc83":301,"\ufc84\u1e91\u1665\u06a7\u1f05":301,"\ufc85\ua18f":301,"\ufc89\u04d1\u1d99\u312c\u0924\u1eef":301,"\ufc8d":301,"\ufc92":301,"\ufc94\u0cab\u0252\ua354\ua174\ua90d\u0ede\u18eb\u0431\u09dc\ua606\ua60b":301,"\ufc9b\ufd18":301,"\ufc9f\ufcae\u0930\uab85\u198e":301,"\ufca2":301,"\ufca6\u2ce3\u2cdb\u2c35\ua2d4\ufd2f":301,"\ufca7":301,"\ufca9\u1258\u0432":301,"\ufcaa\uaa93\u1065\ua2e0\ua26c":301,"\ufcad":301,"\ufcb7":301,"\ufcb8":301,"\ufcba\u1149\u107a":301,"\ufcbb\ufe9e":301,"\ufcbc\ufb14":301,"\ufcbe\u2db5\u1601\u1176\u04fd":301,"\ufcbf":301,"\ufcc1\u1e1b\uab93\u022b":301,"\ufcc4\u2d05\ufd0f\u30d0\uabd3\u012f\u0261\uff9c":301,"\ufcc8\u1a48\u176a\u03b6\ufe7d\u0495\u2d4b":301,"\ufcca":301,"\ufccd\u03ac":301,"\ufcce":301,"\ufccf":301,"\ufcd0\u19b1\ufc63\u1236\ufb2f":301,"\ufcd2\ua3d4\u0b5d":301,"\ufcd3\u1446\u2d95\ua177\ua2cf":301,"\ufcd5":301,"\ufcd6":301,"\ufcd7\ua04d\u1141\u1e61\u03b4\ua20e\ua241\ua30f\ua52b\uab11\u0d7e\ufdba\u144c\u164a\u30b9":301,"\ufcda":301,"\ufcdc\u052d":301,"\ufcde\u3143":301,"\ufcdf":301,"\ufce2\u163a\u093d\ua315\ua765":301,"\ufce5":301,"\ufce7\u121c\u04f9\u1468\u1479\u1f10\ua76f\u0eab\u1d3e":301,"\ufcec\u0c9c\u1b33\u0d9c\u056e\u149c\u0390\ua40f\u1e9a\u1c60":301,"\ufcee":301,"\ufcf3\u191e\u0495\ua5d8\ufb2e\u0798":301,"\ufcf5\u155d\u1152\uab7e\u0769\u0c8a":301,"\ufcf6\u1b99":301,"\ufcfb\ua859\ufd8fb\ua6cf":301,"\ufcff\u1555\u2c5a\u1218\u317b\u3138\u04b1":301,"\ufd00\ua001":301,"\ufd01":301,"\ufd06\ua75f\u1f02":301,"\ufd07\ua1a5\u1fd7\ua793\ua52f\ua04f":301,"\ufd0e\u31f3\u17a7\u1402":301,"\ufd17\ua090\u0d09\ufebf":301,"\ufd1b\u0513\u0cf2\u1541":301,"\ufd1f\ua236":301,"\ufd20":301,"\ufd23\u1ef7\u1181\u1281":301,"\ufd24":301,"\ufd28":301,"\ufd2a":301,"\ufd2c":301,"\ufd2e\u31fb\ua81b":301,"\ufd35\uff69\u047d\u12ec\u1d33":301,"\ufd36":301,"\ufd37\u30e3\u2c9d":301,"\ufd55\u1467\ufee9\ua761\uaa28\u1bdd\ua681\ua324":301,"\ufd56\u1f40":301,"\ufd58\u1e49\u1fe5\u1fa2\ufea3\u0621\uabb9\u1d3f\u07d0\u111a":301,"\ufd5b\u0151":301,"\ufd5e\u3172\u0a27\u307b\u182c\ua0ee\u1b20\u1e3b\ua0f3\u15ce\u1824\u1c4d":301,"\ufd61\ufd7b\u029b":301,"\ufd62\u1a3d\u03d1":301,"\ufd63":301,"\ufd64\u1302\u152f\ua97a":301,"\ufd68\u01f5\u0517\ua360\u1344\u1070":301,"\ufd6f\u1e21":301,"\ufd73\u16d0":301,"\ufd75\u1429":301,"\ufd76":301,"\ufd77u\u0223\u3162":301,"\ufd78\ua2f2\ufb27\u1050\u0f51":301,"\ufd79":301,"\ufd81":301,"\ufd82":301,"\ufd84\ufeb2\u079f\ua086":301,"\ufd85\u179d":301,"\ufd88\u1764\ud7da":301,"\ufd8b\u0c97\u1d06\ufb4b\u07e8\u15f9":301,"\ufd8d":301,"\ufd9c":301,"\ufd9e\ua988\u0987\u01eb\ua1ac\u1403\ud7d0\uab24\u07a5\u0221\u01d8\u0ab7\u1a0c\u2dcc\ufba0\u0caf\ufe72\ua2e6\ua0c0":301,"\ufda1\uaaaa\ua681\ufb2c\u150d\ua28a":301,"\ufda3":301,"\ufda7\u0780":301,"\ufdaa\u1209":301,"\ufdb0":301,"\ufdb1\uffad\u01c9\u1a15":301,"\ufdb3\u0575\u0469\u1e57":301,"\ufdb7":301,"\ufdb9":301,"\ufdbc":301,"\ufdbd\ua65b\ufb62":301,"\ufdc0\u0d29\u0673":301,"\ufdc1":301,"\ufdc3\ua0c5":301,"\ufdf2":301,"\ufdf4":301,"\ufdf5\u098a":301,"\ufdf7\ua6b3":301,"\ufdf8":301,"\ufdf9":301,"\ufdfa\u16f1":301,"\ufe74":301,"\ufe78":301,"\ufe7b\u198b":301,"\ufe7c\ud7dd":301,"\ufe7f":301,"\ufe82\uabd1":301,"\ufe83\u1966\u011f":301,"\ufe87\ufcc7\ud7e2\u306a\u30a8\u0648":301,"\ufe88\u19c3":301,"\ufe8c\u2137\u0c33\u0252\ua43a\u18f1\u1dae\ufb51\u0779":301,"\ufe8d":301,"\ufe8e\u31b8\ua5f3\u0938":301,"\ufe92":301,"\ufe94\u143d":301,"\ufe97\u1220\ua427":301,"\ufe9a\u3116\u0a5e":301,"\ufe9c\ua390":301,"\ufea4":301,"\ufea5":301,"\ufea9\ua46d\ufbfa\u1161\u168c\uff4c\u11f6\u1708\u0f44":301,"\ufeaa\u04d7":301,"\ufeab":301,"\ufead":301,"\ufeaf\u126d":301,"\ufeb5\ua285\u1b94\ua13c\ua348\u078a":301,"\ufeb6":301,"\ufeb8\u0786":301,"\ufeba\ua50a":301,"\ufebc\u1604\u0d24\u1279\ua183":301,"\ufebe\u1888\u0e10\u0972":301,"\ufec2":301,"\ufec3\u2d17\u1188\u1fc2\uaa26\u1e3f\ua210":301,"\ufec4\u1f90\ua456\u30cb":301,"\ufec6\u0ab5\u1b92\u00e1\u01e9":301,"\ufec7\u06c6":301,"\ufec8\u0449":301,"\ufed1\u1021\ua914\u1e31\u0645\u1901":301,"\ufed2\uab94":301,"\ufed3":301,"\ufed5\uab2e":301,"\ufed7":301,"\ufedb\u16b2\ufd71":301,"\ufedd\u0c1a\ufc3f\ua459\ua91a\u021f\u1230\ud7bf":301,"\ufee7":301,"\ufee8\ua29c\u0ce1":301,"\ufeea\u2cad\u1b1b\u1db8":301,"\ufeec\u314b\u0173\u12fe\uaae6\u1e29\u0baa\u0270":301,"\ufeed":301,"\ufef4\u1ee3\u03ae\u0267":301,"\ufef5\u0b14":301,"\ufef7":301,"\ufef8\u03fb\u2da1":301,"\ufef9\u07e3\ufc74":301,"\ufefa":301,"\uff42\u30d2\uaa0c\u2c73\u15d6":301,"\uff43\ua108":301,"\uff45\u1eef\ua977":301,"\uff46":301,"\uff46\uff48":301,"\uff47":301,"\uff49\u0855":301,"\uff49\u1f40":301,"\uff4b\ufb3e\u1570":301,"\uff4c":320,"\uff4d\u0473\u18a6":301,"\uff4e\u2c8d\u2c50\ufc14\u0459\u15b4\u2d56\u11f7\u11a3\ufeb4\u1f03\u1587\ua124\u1573\u04f1\u0564\u30a6":301,"\uff4f\ua359\ua809\ua580\u0ea3\u149a\ufda6":301,"\uff50":301,"\uff50\u0d7a":301,"\uff55\u1b89":301,"\uff55\u1ee3":301,"\uff56":301,"\uff56\u0bd0\ua165\u1aa7":301,"\uff58":301,"\uff59":301,"\uff5a":301,"\uff5a\ufbee\u0aa8":301,"\uff67\uaa1c\u02aa\u0507\u112f\ua0aa\u30c9":301,"\uff68":301,"\uff72":301,"\uff74":301,"\uff79":301,"\uff7b":301,"\uff7d":301,"\uff7f\u0697":301,"\uff8c\u184d":301,"\uff8d":301,"\uff90\ua462\u0eb2":301,"\uff91\ua128":301,"\uff92\u1edb\uaa40\u2db3\u1155\ufc1c\uaa1a\u1b96s\u1348\ua420\ufca1\ufc47\u0933":301,"\uff96j\ua3c7\u0d27":301,"\uff97\ufd03\uaaa0\u099b\ua403":301,"\uff9b":301,"\uffac\u0631\ua6d5\uab37\ufb8b\u0d88":301,"\uffb0\u1f56\u04ce\ua74b":301,"\uffb2\u0188":301,"\uffb4\u2d90":301,"\uffb8":301,"\uffb9":301,"\uffca\u1fc3":301,"\uffcc\ua372\u0dab\u1f79":301,"\uffce":301,"\uffcf":301,"\uffd2":301,"\uffd6\u184e\u11f8\u16c7\u1d77":301,"\uffd7\u2d32\ua9e0\u1129":301,"\uffda\u0764\u138d\ufb39\u1655":301,"_\u0787":301,"a\ua5bb\u15f7\ua2ef\uaa66\u00e4":301,"abstract":[13,15,46,47,51,52,57,59,79,87,109,117,122,123,129,133,135,143,159,168,176,181,188,197,210,219,226,227,228,296,312,327,338],"b\ufc6e\u1d96\u1464\u10fa\u0674":301,"boolean":[13,30,33,38,44,48,49,50,51,57,71,148,181,187,188,194,200,213,214,221,316,332],"break":[57,62,67,85,88,89,100,101,160,302,307,310,343],"byte":[57,59,60,63,64,68,69,70,71,74,80,82,83,91,96,103,105,106,107,109,111,276,343],"case":[15,33,36,39,43,46,52,55,57,59,64,71,94,97,102,105,110,111,114,118,119,129,130,132,134,139,141,145,153,160,171,172,179,194,200,209,210,214,217,219,245,246,248,250,251,254,257,259,264,265,266,267,268,269,273,274,276,280,282,283,284,285,286,287,288,289,290,291,292,293,297,299,307,317,321,325,326,328,341,346],"catch":[36,105,343],"char":[57,104,174,305,306,313],"class":[13,14,15,16,18,23,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,42,43,44,46,47,48,49,50,51,52,53,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,113,115,116,117,118,119,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,137,139,140,141,143,144,145,146,153,156,157,158,159,160,161,162,163,164,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,193,195,197,198,199,200,201,202,203,204,205,206,208,209,210,211,212,213,214,215,217,218,219,220,221,224,226,227,228,245,246,248,250,251,254,257,259,264,265,266,267,268,269,273,274,276,280,282,283,284,285,286,287,288,289,290,291,292,293,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,316,317,319,322,323,324,325,326,327,333,334,335,337,338,339,340,341,342,343,349],"default":[10,14,28,29,32,33,36,38,39,43,52,57,59,60,61,62,63,64,65,67,68,69,70,71,74,77,80,82,83,85,88,89,91,96,97,100,102,103,105,106,107,109,110,111,115,116,117,118,122,129,130,131,132,133,136,139,141,142,143,145,153,159,162,164,174,176,178,194,200,203,206,212,214,215,217,218,219,220,224,226,228,293,294,297,299,300,302,303,305,307,310,313,314,317,328,329,333,334,335,336,341,342,343],"do":[10,23,24,29,37,38,52,57,59,71,76,89,105,109,110,115,117,118,119,122,129,132,134,135,144,147,169,170,171,173,176,179,188,194,199,203,206,211,219,226,228,293,294,302,306,312,333,334,335,336,339,346,348],"e\u1199\u01bd":301,"enum":114,"final":[25,28,57,65,109,141,159,162,175,188,206,212,302,311,314,317,326,340,349],"float":[29,33,38,47,48,50,51,57,64,76,105,129,132,133,135,142,147,148,163,176,193,195,210,212,214,224,299,316,317,318,321,322,323,327,328,329,332,333,334],"function":[13,15,17,25,28,29,30,33,34,35,39,43,47,50,51,52,53,58,61,64,65,70,71,75,77,78,79,84,85,87,97,98,102,105,106,109,110,111,115,116,117,118,119,126,129,132,133,135,138,141,142,143,145,166,173,174,176,177,179,181,182,183,185,187,188,191,193,194,198,199,202,206,208,213,214,216,219,221,224,228,293,294,297,301,303,305,306,313,317,318,319,320,321,326,327,328,330,331,332,334,336,339,340,341,342,343,344,345,346,348,349],"g\u11e2\ua84e\ua985":301,"gro\u00df":[322,323,324,325,326],"i\u0307\u0f41":301,"i\u11bd\u2d18":301,"import":[28,30,34,36,38,39,46,52,55,57,62,67,71,75,78,81,88,89,93,100,102,104,105,117,118,129,131,132,133,134,139,141,144,145,163,166,167,169,172,173,174,176,177,183,193,199,200,202,204,205,206,208,209,212,213,215,217,218,219,220,221,222,226,227,228,293,294,295,297,299,300,301,302,303,305,306,307,308,309,310,312,313,316,318,329,333,334,335,343,346,347,348,349],"int":[14,28,31,33,48,49,50,51,57,64,71,82,89,90,105,109,115,117,124,126,129,132,133,134,139,144,145,148,153,155,159,161,162,164,168,169,170,171,174,175,176,178,179,183,184,187,189,204,212,214,219,221,224,226,227,228,293,296,303,306,307,310,313,317,318,319,320,321,322,323,324,325,326,327,328,330,331,332,333,334,338,341,343],"j\ua268":301,"ljungl\u00f6f":351,"long":[24,52,57,103,205,206,304,322,341,343],"m\u00fasica":205,"new":[2,10,13,14,25,28,33,46,52,57,58,61,63,65,70,71,72,73,74,77,78,79,81,83,84,85,87,90,92,93,97,98,100,101,102,105,106,107,109,111,115,116,117,118,119,122,128,129,132,133,134,135,139,144,153,159,160,162,164,167,168,169,170,171,173,175,176,179,181,183,187,188,193,199,206,207,209,213,214,219,227,228,293,295,297,299,301,303,306,308,312,313,316,322,323,324,325,326,327,333,334,336,338,341,343,346,349],"null":[161,166,317,322,323,324,325,326,327],"o\u0642\ua8ad\ufb13":301,"p\u1f83":301,"public":[137,143,340,341,349],"r\u1a01\u1210\ua3f9\ua32f\u1f67\u08ac":301,"return":[7,13,14,16,23,25,26,28,29,30,31,32,33,34,35,36,38,39,41,42,43,44,47,48,49,50,51,52,53,55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,73,74,75,76,77,78,79,80,81,82,83,84,85,87,88,89,90,91,92,96,97,98,100,102,103,104,105,106,107,109,110,111,115,116,117,118,119,122,123,124,125,126,127,129,130,132,133,134,135,136,139,141,142,143,144,146,147,148,149,155,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,178,179,181,183,184,185,187,188,189,191,193,194,195,197,198,199,200,201,204,205,206,207,208,209,210,211,212,213,214,217,218,219,220,221,222,225,226,227,228,230,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,309,310,311,312,313,314,316,317,318,319,320,321,322,323,326,327,328,329,331,332,333,334,335,336,338,339,341,343,344,349],"s\u00e3o":349,"short":[28,57,78,90,103,123,201,211,317,331,336],"speicherbeh\u00e4l":200,"speicherbeh\u00e4lt":200,"speicherbeh\u00e4ltern":200,"static":[28,32,36,44,52,77,79,83,87,111,117,118,123,128,129,143,159,161,164,168,186,200,211,226,292,304,316,325,333,335],"switch":[57,88,321,349],"throw":[37,274],"transient":310,"true":[13,14,27,29,30,32,33,34,36,37,38,43,44,46,49,52,57,58,59,60,62,64,65,67,68,70,71,74,78,85,88,89,91,97,98,99,100,102,104,105,109,111,115,116,117,118,119,122,123,125,126,127,129,132,134,135,139,144,159,160,161,162,163,164,170,171,173,176,178,179,180,181,183,184,185,188,189,193,194,195,200,202,203,206,211,212,214,216,217,219,221,224,226,227,228,274,293,294,297,300,301,302,303,304,305,308,310,311,312,313,314,316,317,318,320,332,334,335,336,339,341,343],"try":[7,57,64,74,122,124,126,130,132,139,155,227,327],"var":[13,16,25,118,178,183],"while":[14,29,33,132,139,143,160,167,171,179,193,200,214,221,340,349],"y\u17b1\u1f36\u0e0f":301,A:[0,2,3,7,8,10,13,14,16,18,22,23,25,26,28,29,30,31,32,33,34,35,36,38,39,41,42,43,44,52,53,55,57,58,59,60,61,62,63,64,65,67,68,69,70,71,72,73,74,77,78,79,80,82,83,84,85,86,87,89,91,95,96,97,98,100,101,102,103,104,105,106,107,109,110,111,113,114,115,116,117,118,119,122,123,124,125,126,127,129,132,133,141,142,143,144,145,146,147,148,157,158,159,161,162,164,166,167,168,169,170,171,173,174,175,176,177,178,179,180,181,183,184,185,187,188,189,193,194,195,197,198,199,200,201,202,203,204,205,206,209,210,211,212,213,214,215,216,217,218,219,220,221,223,226,227,228,293,294,295,296,299,300,302,303,304,305,306,310,311,312,316,317,319,320,322,323,324,325,326,327,329,331,333,334,336,338,341,343,344,346,349],AND:[57,82,128,185,188,227],AS:[57,82,98],AT:[209,212,219],And:[15,116,132,302,317],As:[28,57,74,129,132,145,167,179,214,317,322],At:[2,28,33,102,147,347],BE:221,BY:[57,82],But:[25,57,97,102,105,118,157,219,304],By:[39,52,57,65,68,74,97,105,106,111,116,117,118,122,132,133,139,176,228,297,303,305,318,334],For:[10,12,23,25,30,33,35,39,40,42,55,57,59,60,64,69,71,78,79,85,86,87,89,91,93,95,103,105,106,109,111,115,116,117,118,119,123,125,130,132,133,145,156,159,162,167,168,170,171,175,176,178,179,183,188,193,200,203,209,210,212,214,220,224,226,227,228,229,294,295,302,303,305,306,307,317,318,321,325,326,327,333,334,336,339,340,341,342,343,346,348,349],IN:[28,118,144,160,163,172,173,174,209,212,215,217,218,219,220,334,347],IS:[57,71,341],If:[0,7,8,10,13,16,25,28,32,33,34,35,36,39,43,44,52,55,57,58,59,60,63,64,65,68,69,70,71,73,74,76,77,78,80,81,82,83,84,89,91,92,96,97,98,100,102,103,105,106,107,108,109,111,115,116,117,118,119,122,123,125,126,129,132,133,134,135,139,141,144,147,153,159,160,161,162,164,166,168,170,171,172,174,175,176,180,181,183,186,187,188,189,193,194,195,201,202,206,211,214,217,218,219,220,221,222,226,227,228,293,295,297,302,305,306,307,309,314,316,317,322,323,324,325,326,327,328,333,334,335,339,340,341,342,343,346,347,348],In:[8,14,25,28,30,33,35,36,39,43,52,57,71,83,84,90,98,102,117,118,119,129,132,135,139,141,142,145,146,147,159,164,167,168,170,171,175,176,179,189,200,214,219,226,228,299,300,303,304,305,307,316,317,318,320,322,323,324,325,326,328,331,332,333,334,336,338,346],Ine:107,Is:[36,117,174,181,188,290,311],It:[7,10,14,15,24,25,28,29,35,37,38,43,46,48,49,50,52,53,57,77,79,87,97,98,100,101,102,103,105,107,110,111,115,117,119,124,126,132,134,139,142,152,156,159,162,166,167,168,169,170,171,175,179,183,185,194,198,199,200,203,209,219,221,226,297,298,302,312,317,318,321,326,328,330,332,336,339,347,349],Its:[2,8,25,159,293,300],NOT:[71,185,188,193,221,330],No:[143,169,198,202,305,317,331,336,343],Not:[57,71],ON:[57,82],OR:[125,128,185,188,224,227,317,336],Of:[24,66,208],On:[8,57,88,94,102,111,299,307,346],One:[91,102,132,133,145,334],Such:302,TO:[37,212],That:[57,71,78],The:[0,2,7,8,10,14,15,16,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,38,39,41,42,43,46,48,49,50,51,52,53,55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,94,95,96,97,98,100,101,102,103,104,105,106,107,108,109,110,111,114,115,116,117,118,119,122,123,124,125,126,127,129,131,132,133,134,135,137,139,141,143,144,145,147,149,153,154,156,157,158,159,160,161,162,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,183,184,185,188,189,193,194,195,197,198,199,200,201,204,205,206,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,224,225,226,227,228,274,293,294,297,298,299,300,301,302,303,305,306,307,308,310,311,312,313,317,318,319,321,322,323,324,325,326,327,328,330,331,332,333,334,335,336,338,339,340,341,342,343,344,345,346,347,349,351],Then:[36,102,119,310,321,324],There:[8,25,28,57,66,69,85,98,118,119,141,156,159,176,183,184,187,203,206,211,294,295,312,317,326,327,336,349],These:[18,28,30,33,46,55,57,59,60,68,71,77,79,87,90,104,105,109,111,115,117,132,134,142,145,157,167,176,179,206,211,214,229,295,302,306,315,340,348],To:[7,25,36,38,57,74,108,109,111,115,132,134,135,152,159,176,200,214,221,317,318,322,323,333,334,346],WITH:221,Will:[24,132,134,349],With:[129,166,183,214,228,326,327,336,349],_:[13,57,97,98,160,174,195,300,301],__:[57,71],___:160,____:[160,335],_____:160,______:160,_______:160,________:160,_________:160,__________:160,___________:160,_____________:160,______________:160,_______________:160,__________________________:160,__a__:336,__adjectival_suffix:206,__callback:117,__canva:117,__children:117,__class__:108,__conson:206,__derivational_suffix:206,__dict__:[108,110],__digraph:206,__doc__:131,__double_conson:206,__drag_i:117,__drag_x:117,__draggabl:117,__file__:141,__getattr__:131,__getitem__:[115,117],__init__:[13,14,15,16,23,27,28,29,32,33,35,37,38,39,40,42,43,44,48,49,50,51,52,53,57,58,59,60,61,62,63,64,65,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,97,98,100,101,102,103,105,106,107,108,109,111,113,115,116,117,118,119,122,123,124,125,126,127,128,129,131,132,133,134,135,137,139,141,143,144,146,153,158,159,160,161,162,163,164,166,167,168,169,170,171,172,173,174,175,176,178,179,180,181,182,183,184,185,187,188,193,195,198,199,200,201,202,203,204,205,206,211,212,213,214,215,217,218,219,220,221,226,227,228,293,297,299,300,302,303,304,305,306,307,308,309,310,314,316,317,322,323,324,325,326,327,333,334,335,338,341,342,343],__len__:52,__li_end:206,__long_vowel:206,__name__:110,__new__:[52,83,118,129,316],__nonzero__:115,__noun_suffix:206,__parent:117,__perfective_gerund_suffix:206,__press:117,__reflexive_suffix:206,__repr__:[52,129],__restricted_vowel:206,__s_end:206,__setitem__:[115,117],__special_word:206,__st_end:206,__step0_suffix:206,__step1_suffix:206,__step1a_suffix:206,__step1b_suffix:206,__step2_suffix:206,__step2a_suffix:206,__step2b_suffix:206,__step3_suffix:206,__step3b_suffix:206,__step4_suffix:206,__step5_suffix:206,__step6_suffix:206,__step7_suffix:206,__step8_suffix:206,__step9_suffix:206,__str__:[129,159],__superlative_suffix:206,__updat:117,__verb_suffix:206,__vowel:206,_add_child_widget:117,_arg:143,_block_read:102,_cach:102,_cl:[83,316],_cleartempl:212,_cmd:220,_context_to_tag:219,_conting:143,_counter:177,_create_training_examples_arc_eag:173,_create_training_examples_arc_std:173,_debug:28,_delta:33,_dev:90,_edg:159,_edge_to_cpl:159,_elementinterfac:[314,343],_end_index:169,_eofpo:102,_epsilon:253,_estim:176,_evoked_:[57,71],_filepo:102,_fn:29,_fn_num:29,_fp:29,_fp_num:29,_gap:303,_get_featur:213,_grammar:[168,175],_head_index:169,_histori:[170,171],_hypothesi:333,_ident:214,_index:159,_init_colortag:117,_input:123,_intercept:176,_io:111,_item_repr:117,_jar:220,_kwarg:143,_languagespecificstemm:206,_len:102,_lh:119,_lx:221,_manag:117,_max_r:176,_mlb:115,_mwe:300,_n:143,_neg:194,_nfmap:33,_normal:[57,61],_not_:[57,105],_num_leav:159,_package_to_column:111,_pars:[57,59,61,75],_path:109,_piec:28,_read:123,_read_block:[57,59,75],_read_int_r:129,_read_number_valu:129,_regex:297,_restart:159,_result:122,_rh:119,_row:115,_rule:28,_rule_tupl:202,_scandinavianstemm:206,_sentenc:123,_separ:220,_slope:176,_stage:28,_standardstemm:206,_start:28,_start_index:169,_str:28,_stream:102,_string_end_r:129,_string_start_r:129,_symbol:119,_tag:[57,59,61,75,117],_tagger:219,_tagword:221,_test:90,_tgrep_exprs_act:294,_token:159,_toknum:102,_tp:29,_tp_num:29,_trace:[28,168,175],_train:219,_tried_:170,_tried_m:170,_type:[57,71],_updat:117,_valu:177,_word:[57,59,61,75],_wordnetobject:105,a00:221,a_:214,a_littl:300,a_little_bit:300,a_lot:300,aa:66,aaaaaaaaaaaa:317,aaaaaaaaaaaaaaa:317,aaaaaaaaaaaaaaaaa:317,aaai:[67,89],aachen:137,aacut:189,aardwolf:[57,105,208],aardwolv:[57,105,208],ab:[145,163,216,219],abaci:[57,105,208],abacu:[57,105,208],abainia:[198,199],abbr:[302,335],abbrev:[57,71,302],abbrev_backoff:302,abbrev_typ:302,abbrevi:[57,71,189,216,302,335],abc:[52,109,111,133,296],abcdefghijklmnopqrstuvwxyz:155,abdelkrim:206,abhaya:328,abkh:[57,101],abkhaz:[57,101],abl:[15,109,200,204,212,219,340],abnorm:[57,81],abolish:[57,81],abomin:[57,81],abort:[57,81],abounding_with:[57,71],about:[2,25,30,35,36,42,57,69,71,74,78,79,87,89,93,102,103,105,111,118,123,125,158,159,162,164,170,176,182,189,198,199,200,203,210,211,219,224,302,327,333,334,336,340,341,343,349],abov:[57,90,105,136,179,188,212,221,299,302,327,346],abram:145,abrom:145,absenc:194,absent:168,absolut:[52,57,59,60,63,64,68,69,70,71,74,80,82,83,91,96,103,105,106,107,109,132,135,137,195,342],absolutediscount:137,absolutediscountinginterpol:[132,135],absorb_heat:[57,71],abspath:[57,59],abstractboxerdr:178,abstractcanvascontain:117,abstractccgcategori:13,abstractchartrul:[14,159,168],abstractcollocationfind:53,abstractcontainerwidget:117,abstractexpress:181,abstractlazysequ:[52,102],abstractparentedtre:334,abstractvariableexpress:[127,181,188],ac:[57,60,107,123,189,343],academ:0,academia:94,acc:212,accept:[33,57,59,98,115,119,130,174,335,341],access:[29,52,57,60,64,68,69,71,74,91,102,105,106,108,109,111,115,117,118,119,120,132,134,176,293,334,338,341,342,346,349],access_token:[341,342],accessor:29,accident:[119,229],accommod:[57,84,178],accompani:[324,325,326,342],accomplish:167,accord:[36,53,118,119,137,167,174,179,185,189,211,214,307,317,321,326,327,334,340],accordingli:[322,323,324,327,346],account:[57,71,123,129,132,145,167,317,323,326,346],accur:[105,179,336],accuraci:[26,29,43,52,147,193,210,212,221,224,299,304,307,336],acero:214,achiev:[24,200,304],acirc:189,acknowledg:117,acl2014:[57,104,317],acl:[62,92,221,304,317,328,336,349],aclweb:[57,104,179,282,304,317,318,319,332,336],acm:[67,81,89,202,330,344,349],acoust:30,acquisit:46,across:[57,71,127,141,188,299,307],act:[25,28,52,57,71,79,87,102,115,118,179,183],action:[57,71,202,317,318,321,328,330,332,338,341,349],activ:[33,87,115,122,194,317,318,321,328,330,346,347,349],activest:115,activestyl:115,actual:[29,33,57,71,108,122,127,131,147,152,156,160,176,184,294,341],actual_pdist:176,acut:189,acycl:343,acyclic_branches_depth_first:343,acyclic_breadth_first:343,acyclic_depth_first:[105,343],acyclic_dic2tre:343,acyclic_tre:[105,343],ad:[2,15,36,57,90,102,105,115,117,119,123,159,162,164,168,176,193,201,216,299,300,302,307,312,320,336,343,349],adam:177,adapt:[71,297,298,335],add:[7,14,15,25,28,33,37,52,57,68,71,74,78,89,109,115,116,117,122,123,125,132,135,139,144,153,159,161,162,164,168,169,170,179,183,188,191,193,294,300,307,312,313,314,316,317,320,321,326,327,333,343,349],add_access_token:342,add_arc:161,add_assumpt:122,add_background:123,add_blank_lin:314,add_callback:117,add_child:117,add_default_field:314,add_feat_extractor:193,add_lin:89,add_log:176,add_missing_depend:184,add_mw:300,add_nod:161,add_ortho_context:302,add_py3_data:54,add_root:[57,78],add_sent:123,add_widget:117,addit:[52,57,62,88,98,105,109,117,118,123,129,132,135,139,166,168,169,176,179,188,193,200,201,214,221,226,293,299,302,312,316,320,326,336,343,349],addition:[55,57,105,129,141,189,203,211,339],additional_java_arg:166,additional_test:230,address:[161,167,346],adj:[28,57,77,105,167,209,213,216],adj_sat:[57,105],adjac:[154,159,169,335],adject:[28,57,71,105,199,208,212,216,219,325,349],adjud:212,adjust:[117,142,201],adjut:87,administr:[86,346],admiss:341,admit:299,adopt:349,adp:[77,216],adposit:216,adrian:107,adv:[57,71,77,105,209,216],advanc:[25,145,188,259,346],advantag:[39,42,131],adverb:[57,71,105,208,212,216,219],advi:204,advic:334,advis:[132,134,204],advmod:160,adwait:86,ae1:66,ae:66,ae_kappa:141,aeioui:299,aelig:189,aer:329,afenegus6:[57,101],affect:[29,117],affirm:194,affix:[57,105,196,197,198,199,204,205,206,219],affix_length:219,affixtagg:219,africa:179,after:[25,28,33,37,51,57,71,102,111,116,117,119,123,129,132,134,139,146,154,159,162,168,175,176,185,194,199,206,211,224,254,302,306,312,313,317,343,348],afternoon:24,ag:[57,64,117],again:[7,118,132,259,341],against:[7,26,29,57,71,144,147,170,181,188,203,210],agarw:328,age_year:[57,64],agenda:[128,159,164,184],agglom:[46,49],aggreg:[188,224,321],agnost:[299,307,318],agr:141,agra:[57,101],agrav:189,agre:[57,105,141,226],agreement:[140,145,259,349],agreestat:259,aguirr:[57,64],ah0:66,ah:66,ahd:107,ahead:[333,349],ai:[202,217],aid:[57,59,152],ailment:[57,71],aim:[173,196],ain:195,aint:195,air:153,airspe:[160,215,218,220],aka:[168,317,320,321,329,330,343],al:[57,71,103,104,214,299,307,317,321,326,332,335],alavi:328,albania:179,albb:301,albert:141,alcuin:107,alefsym:189,alert:[57,62,129],alex:321,alexand:[107,200],alexi:351,alfr:107,alg_opt:173,algasai:349,algeria:[198,199],algier:198,algnsent:316,algorithm:[2,33,35,37,38,42,46,48,50,57,86,104,133,137,142,146,154,159,162,164,167,169,173,175,176,185,196,198,199,201,202,203,206,213,214,221,228,247,274,299,302,307,310,320,322,323,324,325,326,327,331,332,334,335,336,343,344,349],alia:[33,57,62,67,81,83,85,88,89,100,132,134,316],alias:[118,188],alibaba:301,alien:[132,134,139],alif:[198,199],alifmaqsura:[198,199],align:[57,60,104,117,142,145,247,283,316,319,320,322,323,324,325,326,327,328,329,331,332,349],align_al:[322,323],align_block:319,align_log_prob:319,align_text:319,align_token:[312,313],align_word:328,aligned_s:[57,58,316],alignedcorpusread:[57,58],aligneds:[57,58,316,322,323,324,325,326,327,349],alignedsent_block_read:[57,58],alignedsentcorpusview:58,alignment_error_r:329,alignment_info:[322,323,324,325,326,327],alignment_t:[323,324,325,326],alignmentinfo:[326,327],alik:347,alin:[140,247,349],all:[8,15,16,20,22,28,29,32,33,34,35,38,42,43,44,46,51,52,53,57,59,60,62,63,64,66,67,68,69,70,71,72,74,76,77,79,80,81,82,83,84,87,88,89,91,96,98,103,105,106,107,109,111,113,115,116,117,118,119,123,125,127,128,129,131,132,133,134,135,141,142,143,159,160,161,162,164,165,167,169,170,171,175,176,178,179,180,181,183,185,187,188,193,194,195,198,200,201,203,209,211,214,216,217,219,220,221,224,226,227,228,229,274,293,294,299,302,303,305,306,314,317,320,321,322,323,324,325,326,327,328,330,331,333,334,336,340,343,346,347,349],all_express:125,all_inst:194,all_lemma_nam:[57,105],all_list:188,all_phrases_from:333,all_senti_synset:[57,93],all_synset:[57,105],all_word:193,allcap_differenti:195,allexpress:188,alloc:[46,50,176],allot:159,allow:[2,10,13,33,46,49,57,62,68,71,105,108,109,115,118,119,129,131,132,133,139,145,155,159,164,167,168,169,170,171,177,179,188,214,228,302,305,318,321,334,336,339,341,343,349],allow_step:129,allowed_typ:188,alltempl:228,almost:[38,43,57,105,132,195],alon:328,along:[2,57,71,79,87,176,295,316,347,349],alongsid:347,alph:155,alpha:[29,57,82,132,135,141,147,159,164,181,188,189,195,214,317,328,332,349],alpha_0:214,alpha_convert:[181,188],alpha_gamma:[133,137],alpha_t:214,alphabet:[57,81,117,155,184,214,299,302,303,307],alphanumer:118,alpino:[57,61,111],alpinocorpusread:[57,61],alreadi:[10,28,57,59,103,109,111,115,117,132,141,158,159,164,166,167,172,176,183,188,193,206,219,228,300,312,326,348],also:[10,13,14,25,28,29,30,33,36,37,43,46,53,57,71,74,89,105,109,110,111,117,118,119,123,130,132,134,135,139,141,143,145,156,157,158,159,161,162,171,173,174,176,179,181,183,200,201,206,209,211,214,224,228,280,293,295,301,302,303,305,310,313,316,325,331,332,334,336,341,343,349],also_se:[105,343],alter:200,altern:[66,111,129,136,169,176,206,226,228,305,335,348],although:[102,115,302,336],altlabel:179,alun:205,aluno:205,alva:304,alwai:[7,26,33,57,59,71,102,105,117,122,132,135,176,188,193,203,226,294,303,317,318,321,325,326,328,330,332,334,336],alwayson_featur:33,am:[24,57,89],amahuaca:[57,101],amaz:347,amazingli:195,amazon:[89,301],ambigu:[30,92,123,156,180,196,201,344],ambiguous_word:344,american:[98,145,160,301,308],amhar:[57,101],amod:[160,163,172],among:[98,328,332],amongst:169,amount:[38,111,116,117,141,167,176,224,333,336,341],amp:[57,74,189,311,313],amperc:311,amr:98,amus:214,amz:301,an:[0,7,10,13,14,15,16,24,25,28,33,34,35,36,37,38,39,41,42,44,46,48,51,52,53,55,57,58,59,60,65,66,67,68,69,70,71,73,74,77,78,79,82,84,87,90,91,92,93,95,98,102,103,105,106,107,108,109,110,111,115,117,118,119,123,125,127,129,131,132,133,135,136,137,139,141,142,143,144,145,146,147,148,149,151,152,157,158,159,160,161,162,164,165,166,167,168,169,170,171,173,174,175,176,178,179,181,183,184,185,187,188,189,194,195,199,200,202,203,206,209,211,212,213,214,215,217,218,219,220,221,222,226,227,228,274,293,294,295,299,300,301,302,304,305,310,313,314,316,317,318,319,321,322,323,324,325,326,327,328,329,330,331,333,334,335,336,338,340,341,342,343,344,346,347,349],anaconda:348,analog:43,analys:[145,293],analysi:[62,89,142,145,146,173,192,193,194,195,200,293,310,349],analyt:176,analyz:[57,71,74,81,88,154,177,347],anaphor:178,anaphora:123,anaphoraresolutionexcept:181,anc:[57,71],ancestor:[57,105,117,294,334],ancestri:106,anchor:117,and_list:188,andexpress:[185,188],andr:131,andrea:335,andreasvc:335,ang:189,angl:[28,51,293],anglo:107,angrili:160,anhalt:160,ani:[7,24,25,28,29,31,32,33,34,35,39,51,53,57,58,59,60,63,64,65,68,69,70,71,73,74,80,82,83,84,85,89,91,92,96,98,102,103,105,106,107,108,109,110,111,115,116,117,118,119,122,123,129,132,134,141,143,144,145,148,159,161,162,164,167,175,176,183,187,188,189,198,199,200,204,211,212,214,215,219,224,226,227,228,293,299,303,305,306,307,317,328,333,334,336,343],anim:[8,20,57,105,343],ann:195,ann_morphosyntax:78,ann_segment:78,anni:107,annot:[39,46,57,64,67,68,71,77,79,87,89,90,94,107,141,145,213,302,334,336,349],annotationset:[57,71],annotationtask:141,announc:[334,347],annual:344,anoth:[29,57,71,118,119,145,147,173,179,188,202,203,214,219,227,228,324,325,326,332],ans_typ:178,ansi:335,answer:[24,127,129],answer_kei:127,anteced:187,antholog:[57,104,179,282,304,317,318,319,332,336],antonym:105,anyon:[130,160],anyth:[25,117,122,125,127,187,294],anytyp:188,anywai:132,anywher:106,ao:[66,141,205],ap:[57,82],apart:[127,346],apha:259,api:[12,14,25,27,28,30,32,33,35,38,39,44,46,51,57,58,61,62,63,65,66,67,68,69,70,72,73,74,75,76,79,82,84,85,86,87,88,89,92,93,94,95,96,97,98,99,100,104,105,106,107,108,121,124,125,126,127,128,132,135,137,156,158,159,160,166,168,170,171,172,173,175,181,188,196,198,199,200,201,202,203,204,205,206,209,211,213,214,215,217,219,220,221,228,295,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,315,328,337,339,340,341,347,349],api_cal:160,api_vers:341,apo:[57,71,313],apolloniu:107,apostroph:42,app:[0,16,118,128,342],app_kei:[341,342],app_secret:[341,342],appeal:162,appear:[8,53,57,61,64,76,89,103,105,106,111,119,128,143,147,153,194,206,211,228,293,302,303,311,312,325,326,332,334,349],append:[57,74,109,115,118,141,169,179,186,194,322,323,324,325,326,333,334,342],append_no_spac:[57,74],append_prop:125,append_sig:125,append_spac:[57,74],appendix:326,appli:[2,7,8,14,15,25,28,30,31,39,43,44,52,53,57,59,71,77,79,84,87,89,106,115,118,125,136,141,145,154,157,159,162,164,168,171,173,175,176,180,187,188,193,194,210,211,214,218,220,226,227,228,296,298,299,302,307,317,327,328,333,334,335],applic:[1,7,10,13,14,15,16,52,69,129,159,170,187,188,199,211,214,227,228,302,324,335,341,349],applicable_rul:228,applicationexpress:[181,185,187,188],apply_everywher:159,apply_featur:[43,193],apply_heat:[57,71],apply_rul:205,applyto:[181,184,187,188],appo:160,appraoch:89,approach:[46,57,100,193,194,302,324,325,326,335],appropri:[30,31,32,33,35,44,48,49,50,51,53,57,60,71,78,91,102,106,109,117,176,181,188,203,210,211,214,219,220,293,319,338,346],approxim:[57,71,147,176,195,213,333,343],approxrand:147,april:[72,338,341,349],apuleiu:107,apurva:321,apw_19980314:72,apw_19980424:72,apw_19980429:72,ar:[2,7,8,10,13,14,15,20,24,25,26,28,29,30,31,33,34,35,36,37,39,42,43,44,46,48,50,52,53,55,57,58,59,60,61,62,64,65,66,68,69,71,72,73,74,77,78,79,80,81,82,84,85,87,89,90,96,97,98,102,103,104,105,106,109,111,115,116,117,118,119,122,123,125,127,129,131,132,133,134,135,139,141,142,143,145,146,147,148,149,153,154,156,157,158,159,160,161,162,164,167,168,169,170,171,173,174,175,176,177,178,179,180,181,183,185,187,188,189,193,194,195,198,199,203,206,208,209,210,211,214,215,217,218,219,221,224,227,228,229,253,274,276,293,294,295,297,299,302,303,304,305,306,309,310,312,313,314,315,316,317,320,321,322,323,324,325,326,327,328,330,331,332,333,334,335,336,339,340,341,343,346,349,351],arab:[57,101,198,199,201,206,274,349],arabicstemm:206,arbitrari:[46,50,130,179],arbitrarili:[46,48],arbor:195,arc:[161,167,169,173],arc_eag:173,arc_standard:173,arceag:173,archiv:[57,60,107,129],arcstd:173,area:[117,319,349],aren:195,arent:195,arff:44,arff_formatt:44,arg0:[79,87],arg:[13,15,34,40,41,57,61,71,81,83,84,85,97,108,109,110,111,113,115,116,117,118,122,127,129,132,135,157,160,168,172,176,182,183,184,188,220,224,226,228,293,317,343],argid:[68,79,87],argloc:[79,87],argm:[79,87],argmax:221,argnam:110,argspan:68,argu:299,argument:[13,15,17,29,33,34,41,52,55,57,59,60,61,68,71,77,78,79,84,85,87,91,97,102,105,106,108,110,111,115,117,119,122,124,126,129,132,133,135,136,141,143,147,149,161,164,166,168,176,179,181,183,187,188,193,202,203,214,219,226,294,302,303,305,314,334,338,341,343],argument_indic:187,argument_pair:124,ari:[183,206],aristid:6,aristotl:107,arithmet:330,ariti:[179,183],arity_parse_demo:169,arlstem2:196,arlstem:[196,199,349],arm:316,armenian:[57,101],armi:[98,317,321,328,330,332],aronoff:[299,307],around:[28,38,52,57,71,90,98,117,129,167,193,293],arpa:86,arr:214,arrai:[33,34,38,41,46,48,141,310,341],arraign:[57,71],arrang:[117,155],arrest:[57,71],arrow:[113,118,297],art:[22,51,57,71,334,335],arthur:347,articl:[57,71,152,212,219,349],artifact:[57,71],artifici:[67,89,125,198,224,334,336],artstein:141,artstein_poesio_exampl:141,arxiv:[216,321],ascend:[115,118,327,333,334,341],ascent:317,ascii:[51,57,71,105,334,335],asian:332,ask:24,asleep:[57,71],aspect:[30,87,89,117,199],aspen:95,ass:195,assem:206,assembl:152,assert:[25,145,311],assertnexttoken:188,asserttoken:188,assess:[214,327],assign:[29,32,34,35,36,46,47,49,51,57,66,74,93,118,123,141,145,161,167,176,177,179,183,191,209,210,211,214,219,221,307,310,318,328],assign_clust:[47,49,51],assign_valu:307,assist:349,assoc_measur:193,associ:[53,57,72,94,98,102,105,108,117,119,123,140,145,148,156,159,160,164,168,176,179,183,184,193,221,253,334,349],assum:[28,35,36,44,57,58,78,85,97,100,109,118,122,132,133,139,159,161,164,167,169,176,177,179,183,193,214,299,305,312,317,324,325,326,327,331,338,346,348],assumpt:[15,35,122,123,124,125,126,127,128,214,312,340],astound:334,asymp:189,ate:66,athen:179,atild:189,atlanta:219,atom:[95,127,128,169,178,183,187,188,189],atomicexpress:[127,187],att:173,attach:[57,86,163],attempt:[25,109,117,118,122,127,129,155,158,171,176,187,188,309,313,334,343,346],attempt_adjunct:[178,188],attempt_applicationexpress:[187,188],attempt_booleanexpress:188,attempt_equalityexpress:188,attest:[33,43],attested_label:43,attr:[57,82,105,116,343],attrdict:[57,71],attrib:[116,117,129,182],attribut:[57,60,71,90,91,94,96,105,108,110,115,116,117,131,132,135,159,179,203,299,307,343],attrnam:[57,71],au:331,audio:[57,98],audiodata:[57,98],aug:81,augment:[57,60,79,87,91,115,179],augparsecategori:16,august:[67,88,349],augustin:107,aumann:349,auml:189,ausgefeil:200,ausgefeilt:200,ausgefeilter:200,austin:351,australia:351,auth_endpoint:341,authent:[341,342,343,346],author:[30,57,60,61,62,81,86,111,131,206,341,343],auto:[43,109,129,188],auto_format:109,auto_reweigh:317,autobahn:206,autobahnen:206,automat:[7,8,18,39,57,59,60,63,64,68,69,70,71,74,80,82,83,91,96,102,103,105,106,107,109,111,117,131,132,158,160,166,172,179,181,188,209,215,218,219,221,317,318,328,330,332,334,335,344],autostep:7,autun:107,aux:160,avail:[0,7,8,33,46,57,64,79,87,94,98,103,111,118,151,161,170,171,173,179,200,206,209,211,224,301,303,305,306,312,336,340,347,349],available_categori:[57,104],available_lang:[57,104],averag:[20,33,46,49,141,147,148,176,217,310,317,318,321,325,327,330,332],average_charact:319,average_weight:217,averagedperceptron:[217,349],avg_ao:141,avoid:[25,43,50,52,57,71,105,106,118,132,161,176,181,188,317,348],avoid_empty_clust:50,aw:66,awai:37,awar:[57,71,297],awfulli:195,awkwardli:294,ax:[46,176,349],axelrod:320,ay:66,az:[57,101],azeri_azerbaijani_cyril:[57,101],azeri_azerbaijani_latin:[57,101],b1:[119,164,177],b2:[164,177],b3:164,b:[7,15,33,39,42,52,57,66,71,87,104,107,118,119,123,127,129,132,133,134,139,141,145,147,159,173,176,179,183,187,212,218,227,298,300,305,306,310,312,316,318,326,331,336,343],b_decr:195,b_graph:167,b_i:214,b_incr:195,b_of:179,ba:[98,145],babelfish:[150,349],babelize_shel:151,bach:302,bachelor:98,back:[25,52,60,78,91,102,176,219,320,332,334],background:[57,71,115,123],backlink:319,backoff:[133,137,212,219],backslash:305,backtrac:145,backtrack:[7,109,170,213],backward:[14,15,105,118,214,228,320],backwardbxconstraint:15,backwardcombin:15,backwardonli:15,backwardsxconstraint:15,backwardtconstraint:15,backwardtyperaiserul:14,bad:[145,194,195,209,328],badscor:[57,76],bag:37,bai:349,baikalfinansgroup:90,bake:[57,71],baker:[57,103],balanc:94,baldridg:349,ball:36,ballison:343,baltic:[57,101],bangla:[73,349],bank:344,bar:[46,60,71,78,91,106,129,307,349],bare:[195,199,222,294],bark:[57,61],bartlett:[299,307,310],base:[3,8,13,14,15,16,23,24,25,26,27,28,29,31,32,33,34,35,36,37,38,39,40,41,42,43,44,47,48,49,50,51,52,53,55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,111,113,115,116,117,118,119,122,123,124,125,126,127,128,129,130,131,132,133,134,135,137,138,139,141,143,144,145,146,152,153,157,158,159,160,161,162,163,164,166,167,168,169,170,171,172,173,174,175,176,178,179,180,181,182,183,184,185,186,187,188,191,193,195,197,198,199,200,201,202,203,204,205,206,208,209,210,211,212,213,214,215,217,218,219,220,221,223,226,227,228,245,246,248,250,251,254,257,259,264,265,266,267,268,269,273,274,276,280,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,316,317,319,322,323,324,325,326,327,328,330,332,333,334,335,338,339,341,342,343,349],base_fdist:176,base_path:39,base_url:111,baseform:[57,79,87],baselin:[148,212,224],baseline_backoff_tagg:224,baseline_data:212,basemodelbuildercommand:[122,124],basengrammodel:[132,135],baseprovercommand:[122,126,127,128],basetheoremtoolcommand:122,basewidget:117,basi:[32,57,79,87,111],basic:[46,47,57,71,118,119,123,159,173,194,201,206,221,224,276,297,334],basic_multilingual_plan:313,basic_sent_chop:221,basictweethandl:338,basictyp:188,basqu:349,batch:[38,191],batch_eat:129,batch_tag:211,batch_tag_increment:[211,212],batteri:[57,88,348],baum:214,bay:[35,36,38,194],baz:[60,78,91,106],bb1:202,bbox:[115,117],bdfoi:[57,104],bdquo:189,beam:[168,221],beam_siz:[168,174],beam_threshold:333,bearer:341,beauti:217,becaus:[33,57,71,77,78,105,106,109,115,118,122,132,135,139,145,172,173,201,206,219,294,306,312,317,321,324,325,326,327,332,333],becom:[50,95,118,168,336],bede:107,bee:204,beeferman:148,been:[7,8,29,35,57,59,90,100,102,109,115,117,118,123,127,129,132,133,139,141,142,145,146,158,159,166,167,168,170,171,172,175,176,179,189,193,194,201,206,214,221,228,251,300,302,311,312,326,333,338,339,341,346,347,349],befor:[7,25,30,34,48,50,108,109,111,117,118,122,124,125,126,129,132,144,149,159,160,162,166,168,170,171,172,176,194,206,213,219,221,254,264,292,302,306,307,314,317,319,326,335,341,343,348,349],beforehand:[57,71],beg_word:78,began:345,begin:[10,28,57,58,102,109,116,118,129,159,168,170,171,174,198,199,201,214,299,302,310,334],begsten:148,behav:[115,166,183],behavior:[2,28,50,57,102,105,305],behaviour:[203,317,339],behind:[93,117],being:[28,29,46,48,53,57,71,105,109,132,143,167,168,170,176,187,188,193,200,214,219,221,228,299,316,317,318,319,321,326,328,330,332,333,340,343,349],bell:[132,135,137,176],bellingham:349,belong:[31,48,51,57,71,89,105,167,226,327,339],below:[34,57,66,105,110,119,132,139,206,224,317,321,326,332,333,339,341,346],benchmark:[299,307],bender:276,benedictin:107,benefit:148,bennet:141,bennett:141,benzahia:206,berger:148,berkelei:[57,71,299],bernoulli:34,best:[26,38,57,64,82,167,168,193,200,203,217,221,304,322,323,324,326,327,328,332,333,347,349],best_binary_stump:32,best_incoming_arc:167,best_model2_align:327,best_path:214,best_path_simpl:214,best_stump:32,beta:[119,159,164,181,188,189,214,318,328,330,332,349],beta_0:214,beta_t:214,bethard:349,better:[14,24,37,57,68,151,200,212,221,228,293,349],betti:177,between:[23,25,28,37,42,50,51,52,53,57,71,74,85,89,102,105,116,117,118,119,123,141,142,144,145,147,149,164,169,176,189,194,195,212,214,216,224,247,293,300,303,310,316,317,319,321,325,328,329,332,333,334,335,336,341],beyond:[123,141,298,336],bfg:[33,213],bg:115,bhojpuri:[57,101],bi:221,bia:[34,48],bias:29,bib:[57,59,105],bicondit:188,bidirect:[220,293,320],big:[25,185,209,294,322,323,324,325,326],biggest:[57,62],bigram:[53,132,134,143,193,194,302,317,343],bigram_collocation_feat:193,bigram_fd:53,bigram_find:53,bigram_measur:53,bigram_score_fn:143,bigramassocmeasur:[143,193,194],bigramcollocationfind:53,bigramtagg:219,bilingu:[317,319,332],bill:[90,125,145],billi:145,billion:[57,71,90,212],bin:[34,41,129,176,178,214,349],bin_dir:178,binar:[119,336],binari:[14,15,25,27,32,33,34,36,39,41,43,119,126,129,141,167,174,179,183,188,215,304,343],binaris:119,binary_concept:179,binary_dist:[141,145],binary_loc:126,binary_nam:129,binary_names_demo_featur:43,binary_search_fil:343,binary_stump:32,binarycombinatorrul:14,binaryexpress:[181,188],binarymaxentfeatureencod:33,bind:[115,116,117,118,127,164,178,180,187,188,294],bind_click:117,bind_click_leav:116,bind_click_nod:116,bind_click_tre:116,bind_drag:117,bind_drag_leav:116,bind_drag_nod:116,bind_drag_tre:116,bind_to_column:115,bind_to_label:115,bind_to_listbox:115,binder:[181,188],binding_list:127,binding_op:188,bindingdict:[127,187],bindingexcept:127,bindop:180,bing:[67,81,88,89],binomi:343,binomial_coeffici:343,binop:188,bio:218,bio_to_chunk:218,biographi:160,birch:320,bird:[0,125,347,349,351],birth:[57,71,98],birthdai:[57,71],birthdat:98,bit:[57,71,98,224,294,300,335],bitext:[322,323,324,325,326],bitstr:86,black:[98,349],blackboard:[159,162],blackburn:[123,185],blackman:310,blanch:[57,71],blank:[57,65,68,73,81,85,97,104,155,303,306,314],blank_befor:314,blanklin:[212,306],blankline_token:303,blanklinetoken:[194,303],blanks_befor:314,blanks_between:314,bleibt:331,bleu:[282,301,317,318,321,330,332,349],bleu_scor:[315,321],blickl:107,blili:145,blk:98,bllip:[156,349],bllippars:158,blob:[206,229,301,311,313,349],block:[57,58,59,61,65,70,73,74,75,81,84,85,89,92,98,101,102,129,183,310,319],block_comparison:310,block_read:[57,58,63,65,70,73,74,81,84,92,97,102],block_siz:102,blog:217,blogspot:349,blue:[7,160,308,335],bn:119,bnc:[57,71,91,349],bnccorpusread:[57,60],bncsentenc:60,bncwordview:60,bo:[62,123,178,180,185],board:163,bob:332,bod:[57,71],bodenstab:349,bodi:77,bodlei:107,body_system:[57,71],boethiu:107,boi:[57,62,177,332],boil:[57,71,132],boisen:349,bold:115,bolli:145,bomb:195,bombai:73,bone:349,book1:123,book3:349,book:[0,57,77,105,151,152,173,209,251,254,293,295,317,321,322,323,324,325,326,332,346,347,349],book_1:347,bookstein:148,bool:[13,14,33,37,57,105,109,114,115,117,118,119,122,124,127,145,148,159,164,170,171,174,176,178,179,181,183,187,188,191,200,206,211,212,214,221,224,226,227,228,293,294,295,297,303,313,314,317,318,334,336,341,343],boolean_op:188,booleanexpress:[181,188],booster_dict:195,border:179,borderwidth:115,bore:93,borel:[42,69],bot:22,both:[25,28,31,36,37,39,53,55,57,68,71,84,90,91,105,109,111,115,117,118,119,122,126,132,133,137,149,159,169,173,176,187,188,194,198,199,201,210,214,221,224,294,296,299,302,310,317,333,334],bothbackward:15,bothforward:15,bothord:334,bottl:160,bottleneck:46,bottom:[2,115,116,117,159,168,169,171,175,335],bottomupchartpars:159,bottomupleftcornerchartpars:159,bottomuppredictcombinerul:[159,162,164],bottomuppredictrul:[159,164],bottomupprobabilisticchartpars:168,bought:90,bound:[52,57,71,105,115,117,118,127,129,145,179,181,187,188,193,219,302,313,339,343],boundari:[28,57,97,148,293,302,310,319],bounding_box:339,box:[115,117,317,339,346],boxer:[123,177],boxer_drs_interpret:178,boxercard:178,boxerdog:123,boxerdr:178,boxerdrspars:178,boxereq:178,boxerindex:178,boxernam:178,boxernot:178,boxeror:178,boxeroutputdrspars:178,boxerpr:178,boxerprop:178,boxerrel:178,boxerwhq:178,boxwidget:117,boyd:349,bp1:317,bp2:317,bp:332,br:349,brace:[28,343],bracket:[16,28,29,57,60,61,91,107,117,118,132,134,185,251,293,334],bracket_pars:[57,107,334],bracket_s:91,bracketparsecorpusread:[57,61,107],bracketwidget:117,branch:[32,51,105,116,167,169,334,343],brand:89,brant:221,brat:98,brave:293,brazilian:349,breadth:[105,343],breadth_first:343,breakbefor:[57,71],breakdown:[57,61,93],breaklin:[57,71],breviti:[317,330,332],brevity_penalti:317,bridg:321,brief:[57,71],brill24:211,brill:[209,212,224,226,227,228,248],brill_train:209,brillrul:228,brilltagg:[211,212,223],brilltaggertrain:212,brilltemplatei:228,bring:[293,349],british:[57,60],bro:293,broad:[57,103],broader:160,broil:[57,71],broken:[109,185,212,299,307],brook:[145,160,220],brookhaven:145,brown:[55,57,71,105,109,160,172,176,209,217,219,310,322,323,324,325,326,327,346,349],brown_tagset:120,brows:10,browser:[1,10,57,64,349],browserv:10,brrokhaven:145,brvbar:189,bs:98,bsd:110,bubbl:154,buch:[322,323,324,325,326],buf_0_form_econom:173,buf_0_form_market:173,buf_0_ldep_att:173,buf_0_lemma_econom:173,buf_0_lemma_market:173,buf_0_pos_jj:173,buf_0_pos_nn:173,buf_1_form_:173,buf_1_form_new:173,buf_1_pos_:173,buf_1_pos_nn:173,buf_2_pos_vbd:173,buf_3_pos_jj:173,buffer:[25,109,173,181,188,341],bufferedgzipfil:109,bug:[229,251,274,349],bugfix:[345,349],bui:[160,295,303,306,308,312,313],build:[7,8,36,37,38,52,57,71,83,90,117,121,122,123,124,132,161,174,179,183,211,213,228,302,303,306,333,343,347,349],build_index:111,build_model:[27,122],build_preprocessed_fil:78,builder:[122,123,124],buildindex:[57,71],built:[30,33,52,105,116,117,132,139,162,167,173,185,188,208,209,343,349],builtin:[109,314],bull:189,bundl:179,burmese_myanmar:[57,101],busi:39,butterless:293,button:[2,7,8,116,117],buttonpress:117,bypass:293,byrhtferth:107,bytebuff:109,c1:[57,71,141,213,334],c2:[141,213,334],c5:[57,60],c:[25,33,39,46,49,52,57,61,71,76,77,104,107,111,117,119,123,128,131,132,133,134,139,141,145,148,168,169,176,178,179,195,212,216,221,224,226,227,228,300,301,305,306,316,319,320,332,335,336,343,346],c_exp:142,c_graph:167,c_incr:195,c_skip:142,c_sub:142,c_vwl:142,ca:[57,104,141,142,205,307,312],cach:[52,57,68,102,109,111,159,164,176,224,343],cache_baseline_tagg:224,cache_s:52,cache_to_tempfil:102,cachedepth:343,cachedtopdownpredictrul:[159,162,164],caclcul:[57,64],cake:[57,71],calc_dist:42,calcul:[33,35,37,39,42,46,48,52,53,57,64,102,105,111,119,132,133,139,141,143,145,147,149,167,176,185,214,219,221,302,317,318,321,328,330,332,333,343,349],calculate_delta:33,calculate_empirical_fcount:33,calculate_estimated_fcount:33,calculate_leftcorn:119,calculate_nfmap:33,calculu:[181,188,349],california:[89,95],call:[2,7,14,25,33,34,41,51,52,57,59,60,71,77,78,79,87,98,100,102,105,106,110,111,115,116,117,118,119,122,123,129,130,131,132,134,139,156,159,162,166,168,170,171,176,182,183,188,193,198,199,202,206,215,221,226,227,228,294,297,302,305,317,321,334,340,341,343,347,349],call_megam:34,call_tadm:41,callabl:[33,71,110,122,214,328],callback:[115,116,117],caller:[110,215],callison:320,cambridg:[299,307,322,323,324,325,326,327,333],came:187,camera:[57,89],camera_review:[57,89],can:[2,7,8,13,14,15,24,25,28,29,30,31,33,34,36,39,43,44,46,49,52,55,57,58,59,60,61,63,64,65,67,68,69,70,71,73,74,77,78,79,80,81,82,83,84,85,87,89,91,92,96,97,98,100,102,103,104,105,106,107,108,109,110,111,115,116,117,118,119,122,123,127,129,132,133,134,139,141,142,153,155,156,157,158,159,162,164,166,167,168,169,170,171,174,176,177,178,179,183,188,189,193,194,195,198,199,200,203,206,211,212,214,218,219,221,224,226,228,251,274,293,294,295,298,299,302,303,305,306,307,312,314,317,319,324,325,326,328,332,333,334,335,336,338,339,341,343,346,348,349],can_combin:15,can_compos:13,can_cross:13,can_unifi:13,cancel:7,candc:178,candid:[57,105,156,224,228,290,302,324,325,326,329,333,349],canin:[57,105,343],cannot:[13,57,105,118,125,127,132,133,134,187,193,195,208,211,224,294,319,326,335],canon:[57,89,105,107,189],canon_g3:[57,89],canonic:189,cant:195,canva:[7,8,116,117,181,182],canvasfram:117,canvaswidget:[116,117],cao:321,cap:[189,195],capabl:188,cape:293,capit:[221,302,303],capital_of:179,capitalis:297,captor:160,captur:[129,167,189,293,303],capword_token:303,car:204,card:147,cardin:[29,189,212,216,219,325,326],care:[102,176,211],carnegi:66,carnivor:[57,105,343],carri:[123,145,179,180],cartesian:[123,228],cascad:[28,349],case_insensit:200,casinha:205,cast:[129,317,332],castellan:141,casual:[57,100,295],casual_token:297,cat:[36,119,153,160,172,213,217,294,317,318,321,328,334],cat_delimit:[57,59,84],cat_fil:[57,59,61,84,85,97],cat_map:[57,59,61,84,85,97],cat_pattern:[57,59,61,84,85,97],catalan:[57,104,349],catalog:349,categ:[13,14,15,16],categor:[42,57,59,61,62,85,95,97,325,347,349],categori:[12,13,15,16,30,31,32,33,35,57,59,61,62,74,84,85,88,97,104,119,128,141,209,219,349],categorized_s:57,categorizedbracketparsecorpusread:[57,61],categorizedcorpusread:[57,59,61,62,84,85,88,97],categorizedplaintextcorpusread:[57,85],categorizedsentencescorpusread:[57,62],categorizedtaggedcorpusread:[57,97],catfish:[57,71],cathi:[167,169],cathol:107,cato:107,caus:[25,105,109,129,176,227,276,334],cause_change_of_scalar_posit:[57,71],cause_to_make_nois:[57,71],caution:[295,303],caveat:[28,118,340],cavnar:42,cb:141,cby:[57,71],cc0:[57,83],cc:[89,212],ccb:[57,104],ccedil:189,ccg:[0,349],ccgchart:14,ccgchartpars:14,ccgedg:14,ccgleafedg:14,ccglexicon:16,ccgvar:[13,16],cd:[160,163,212,219,347],cdate:[57,71],ce:[299,307],ceas:341,cedil:189,ceil:[196,325,327],cell:[115,155,161,169],cell_extractor:161,cell_separ:161,cell_valu:115,cem:202,cement:202,censu:145,cent:189,center:[39,48,117,325,326],center_of_cept:327,central:346,central_africa:179,centroid:[46,49,50],cept:[325,326,327],certain:[24,57,71,132,133,227,293,325,333],certifi:343,cfdist:176,cfg:[7,8,14,109,112,119,159,162,164,165,170,171,174,179,349],cfgdemo:113,cfgeditor:113,cfgreadingcommand:123,cfm:[330,344],cgi:349,ch01:254,ch07:251,ch:[53,57,66,105],cha:[57,64,298],chad:107,chain:[79,87,136,214],chalk:166,challeng:[57,90,349],chanc:[132,141],chang:[25,39,48,105,117,129,132,139,142,145,159,170,171,201,202,203,212,214,221,227,228,229,315,333,346,349],change_of_consist:[57,71],change_var:178,changelog:349,channel:[57,74,98],chapter:[57,77,85,176,209,214,295,299,307,349],char_po:332,char_seek_forward:109,charact:[28,57,61,71,74,82,102,104,109,111,113,117,132,145,159,174,184,188,215,220,293,297,299,302,303,305,306,307,312,313,318,319,334,335,340],character:117,character_bas:332,characteris:214,characterist:[57,71],charset:215,chart:[1,2,12,119,156,162,164,168,169,174,336,349],chart_class:[159,162,164,174],chart_pars:[159,162],chartcel:169,chartoken:306,chartpars:[1,14,159,162,164,174],chartparser_app:1,chartrulei:[159,164],chase:[123,334],chasen:57,chasencorpusread:[57,63],chasencorpusview:63,chasenread:63,chat80:[109,177],chat:[0,349],chat_pnam:179,chatbot:[18,20,23,24],chatti:110,che:[57,105],check:[15,28,43,57,105,109,115,117,118,119,123,129,132,133,139,155,161,167,173,176,178,179,181,183,187,188,191,194,195,209,227,313,331,334,343,346],check_cach:[57,71],check_coverag:119,check_date_limit:338,check_except:[57,105],check_megam_config:43,check_reentr:118,checker:349,checksum:111,cheek:20,chees:66,chef:105,chelli:206,chen:[94,133,135,137,317,321],chencherri:317,cheng:[81,88],cherri:317,chi2:38,chi:[38,57,64,143,189],chi_sq:143,chiba:[81,88],chicago:[67,81,89],child1:161,child2:161,child3:161,child4:161,child:[24,32,57,71,116,117,119,129,159,167,169,170,314,334,335,336,349],child_edg:159,child_pointer_list:[159,164],child_widget:117,childchar:[334,336],childes_fixt:229,childes_url_bas:[57,64],childescorpusread:[57,64],childless:159,children:[7,14,32,105,111,116,117,119,159,160,161,168,169,170,175,334,336,343],chin:317,chines:[94,301,334],chinese_mandarin:[57,101],chk:39,chktagger:218,chodorow:[57,105],choic:[57,64,159,168],chomski:[119,150,334,336],chomsky_normal_form:[119,334,336,349],chomskynormalformforcfgtest:250,choos:[7,8,31,33,118,119,171,203,206,219,228,332,341,343],choose_tag:219,chordat:[57,105,343],chosen:[30,33,46,48,50,111,328,330],chrf:[318,349],chrf_precision_recall_fscore_support:318,chrf_score:315,chri:[146,202,336],christoph:[107,299],chrodegang:107,chronicl:107,chu:94,chunk:[0,1,3,16,57,68,84,91,98,189,214,218,251,331,347,349],chunk_label:[28,29],chunk_sent:[57,91],chunk_siz:341,chunk_struct:28,chunk_tag:28,chunk_tag_char:28,chunk_tag_pattern:[25,28],chunk_typ:[29,57,68,70,218],chunked_para:[57,65],chunked_s:[55,57,65,68],chunked_word:[57,65,68],chunkedcorpusread:[57,65],chunkedcorpusview:65,chunker:[25,26,27,28,29,346,349],chunkpars:[1,28,29],chunkparser_app:1,chunkparseri:[25,26,27,28,29],chunkrul:[25,28],chunkrulewithcontext:28,chunkscor:[25,26,28,29],chunkstr:[25,28],chunktre:[57,65],church:[53,57,105,107,208,319],ci2:202,ci:[57,86,104,179,200,312,349],cim:205,cima:205,circ:189,circle_of_lat:179,circle_of_long:179,circumv:189,cistem:[196,349],citat:[57,59,105,330,344],cite:[0,195,347],citi:[95,98,109,179],cities2t:179,citizen:293,city3:202,cjk:313,cjk_compatibility_form:313,cjk_compatibility_ideograph:313,cjk_radic:313,cjkchar:313,ckip:94,cl:[52,118,129,130,137,316,334],claim:160,clariss:205,clarissa:205,clash:[181,188],class_abbrev:189,classid:[57,103],classif:[30,31,33,34,36,37,38,40,41,52,89,90,95,167,193,194,347,349],classifi:[0,46,47,51,52,90,105,167,193,194,210,218,219,273,302,343,349],classification_probdist:[46,47],classifier_build:219,classifierbasedpostagg:219,classifierbasedtagg:[27,219],classifieri:[31,32,33,35,38,44],classify_fixt:229,classify_mani:[31,38,44],classify_vectorspac:[48,49,50,51],classmethod:[13,33,35,44,53,102,119,130,143,158,181,183,187,188,211,214,217,219,226,227,228,246,254,316,334],classnam:226,classpath:[44,129,220],claus:[28,127,179,189],clausal:189,clause2concept:179,clausifi:127,claws5_tagset:120,clean:[178,349],clean_html:343,clean_url:343,cleanup:349,clear:[52,109,115,117,118,159,162,164,182,183,212],clear_abbrev:302,clear_cach:109,clear_colloc:302,clear_ortho_context:302,clear_sent_start:302,clear_status_cach:111,clearli:[14,57,71,203,294],cli:0,click1:117,click2:117,click3:117,click:[7,8,115,117,346],click_to_sort:115,client:[10,338,341],client_arg:341,cliff:321,clig:117,climb:[46,50,324,325,326,327],clock:347,clone:128,close:[25,102,109,116,117,125,129,179,182,183,187,188,215,302,305,312,314],close_bracket:181,close_enough:253,close_punct:311,close_punct_r:311,close_punctu:[57,104],closed_domain_demo:125,closed_world_demo:125,closeddomainprov:125,closedworldprov:125,closer:183,closest:[46,49,50,57,102,105,310,317],closest_ref_len:317,closest_ref_length:317,closur:[105,119,125,179,343],club:189,clue:[57,67],cluster:[0,86,299,349],cluster_nam:47,cluster_vectorspac:[48,49,50,51],clusteri:[46,47,51],cm:144,cmd:129,cmp_chunk:27,cmu:[57,61,64,66,328,349],cmudict:57,cmudictcorpusread:[57,66],cn1t:202,cn:[117,334],cnf:[115,117,190,334,336],cnr:93,cnut:107,co:[33,57,71,199,310,330,332,349],coadrian:107,coaelhom:107,coaeliv:107,coalcuin:107,coalex:107,coapollo:107,coars:[57,79,87,216],coaugust:107,cobed:107,cobenrul:107,coblick:107,coboeth:107,cobyrhtf:107,cocanedgd:107,cocanedgx:107,cocathom1:107,cocathom2:107,cochad:107,cochdrul:107,cochristoph:107,cochron:107,cochrona:107,cochronc:107,cochrond:107,cocura:107,cocurac:107,cod:[57,71],coda:299,code:[25,28,42,57,69,82,105,107,115,129,151,175,176,179,185,189,201,206,209,214,216,228,229,294,306,313,318,332,335,340,341,349],codebas:349,codec:[109,314,339],coder:141,codict:107,codocu1:107,codocu2:107,codocu3:107,codocu4:107,coeffici:[141,143,149,213,332,343],coeluc1:107,coeluc2:107,coepigen:107,coeuphr:107,coeust:107,coexodusp:107,cogenes:107,cogniz:[57,71],cogregdc:107,cogregdh:107,cohen:141,coherbar:107,cohes:160,coil:153,coinspold:107,coinspolx:107,cojam:107,col:[115,155],col_index:115,colacnu:107,colaec:107,colaw1cn:107,colaw2cn:107,colaw5atr:107,colaw6atr:107,colawaf:107,colawafint:107,colawg:107,colawin:107,colawnorthu:107,colawwllad:107,cole:[67,88,317],coleofri:107,collaps:[116,167,334,336],collapse_nod:167,collapse_unari:[334,336],collapsed_tre:116,collapsepo:[334,336],collapseroot:[334,336],collapseunari:334,colleciton:111,collect:[0,46,53,57,71,73,100,102,111,117,132,134,139,143,159,164,171,175,176,188,206,211,214,224,293,302,323,324,325,326,333,336,338,341,343,346,349],collin:317,collis:118,colloc:[0,1,57,105,193,293,302,349],colloc_thr:302,collocation_list:[293,349],collocations_app:1,colon:[98,174,181,220,312],color:[25,115,116,117,160,308,335],colorado:[57,103],colorizedlist:[113,117],colortag:117,colour:[160,308],colowick:[57,83],colsigef:107,colsigewb:107,colsigewz:107,colum:115,columbu:311,column:[33,57,68,111,115,155,161,167,335],column_index:[115,167],column_label:115,column_nam:115,column_typ:[57,68],column_weight:[111,115],column_width:111,columnconfig:115,columnconfigur:115,columntyp:[57,68],colwgeat:107,colwsiget:107,colwsigexa:107,colwstan1:107,colwstan2:107,com:[71,89,111,115,131,137,166,205,206,216,229,251,259,274,283,301,311,313,318,335,339,340,341,343,346,348,349],comargac:107,comargat:107,comari:107,comart1:107,comart2:107,comart3:107,comarvel:107,comb:343,combat:336,combin:[8,12,13,14,29,33,51,52,57,61,111,118,123,132,154,159,162,171,181,187,188,209,214,226,228,299,326,339,341,343],combination_prover_demo:125,combinatori:[12,13,57,71],combine_read:123,come:[24,132,135,158,341,346,348],coming_to_believ:[57,71],comint:25,comma:[118,119,188,312,341],comma_in_num:311,command:[10,18,122,129,166,220,304,317,318,321,328,330,332,348],comment:[28,36,57,61,98,102,142,174,224,276,349],comment_char:[57,61,102,174],commerc:301,commiss:330,commit:349,committing_crim:[57,71],common:[46,52,55,57,59,64,71,94,105,126,129,132,133,135,139,145,159,193,196,199,214,216,289,293,302,327,337,349],common_context:293,common_hypernym:105,commonli:[46,50,343],commun:[111,215,347],comp:[57,78,216],comp_typ:67,compani:[90,301],companion:107,compar:[7,29,42,57,67,76,81,88,105,132,139,144,145,147,149,154,164,198,199,200,219,226,227,349],comparative_s:57,comparative_sent:[57,67],comparativesentencescorpusread:[57,67],comparison:[57,67,89,141,142,145,176,211,295,299,310,317,334,338],compat:[0,105,109,118,176,191,203,228,294,334,349],compet:89,compil:[118,184,187,195,200,202,221,297,298,301,302,303,311,312],compile_neg:187,compile_po:187,compl:[57,105],complementari:[36,319],complet:[7,25,28,55,57,60,64,91,102,109,116,117,118,125,141,158,159,162,164,168,170,171,176,182,188,195,293,333,334,336,346,349],completefundamentalrul:162,completerrul:162,complex:[25,57,71,109,117,119,183,336],complextyp:188,compli:107,complic:196,compon:[90,109,177,185,302],component_sound:[57,71],compos:[57,60,68,71,78,91,106,159,162,194],composit:[13,15],compr:67,comprehens:[347,349],compress:[33,109,111,142,194,339,341,349],compris:188,compromis:349,comput:[17,29,33,35,36,37,38,50,52,57,67,81,88,89,94,105,132,133,135,138,141,142,143,145,148,169,176,179,199,200,204,210,214,226,227,247,302,316,317,318,321,322,323,324,325,326,327,330,336,343,346,347,349],compute_composition_semant:17,compute_function_semant:17,compute_future_scor:333,compute_max_subtract_scor:167,compute_original_index:167,compute_prob:169,compute_semant:14,compute_substitution_semant:17,compute_type_raised_semant:17,computer_sci:105,comsem:123,comtran:316,con:[57,88],concat:102,concaten:[52,55,57,102,169,200,222],concatenatedcorpusview:102,conceiv:332,concept:[33,327],conceptu:[57,71,105,132,176,224,227],concern:105,concis:317,concord:[1,293,349],concordanc:[1,293],concordance_app:1,concordance_list:293,concordanceindex:293,concret:[79,87,132,133,135,226],cond:[178,181],cond_sampl:176,condit:[32,33,36,132,133,134,159,169,173,176,178,181,211,213,214,227,293,338],conditionalexponentialclassifi:33,conditionalfreqdist:[132,134,176,349],conditionalprobdist:176,conditionalprobdisti:[176,214],conduct:200,cone:344,coneot:107,conf:[57,83,173],confer:[67,81,86,88,89,195,199,318,344,349],confess:[57,103],confid:[167,349],config:[52,57,77,304],config_java:129,config_megam:34,config_prover9:126,config_tadm:41,config_weka:44,configur:[34,43,115,129,173,349],confin:[105,343],conflict:[106,118],conform:[221,349],conftest:229,confus:[33,57,85,144],confusionmatrix:140,cong:189,conicod:107,conicoda:107,conicodc:107,conicodd:107,conj:[77,117,209,216],conjunct:[13,57,71,167,185,188,190,216,276],conll2000:29,conll2002:[189,349],conll:[29,57,161,166,174,189,349],conll_data2:[167,169],conll_demo:161,conll_fil:166,conll_file_demo:161,conllchunkcorpusread:[57,68],conllcorpusread:[57,68],conllesp:189,conlln:189,conllsrlinst:68,conllsrlinstancelist:68,conllstr2tre:29,conlltags2tre:29,connect:[10,57,71,105,116,161,167,201,215,334,336,346],connect_graph:161,conrath:[57,105],consecut:[53,57,68],consequ:[178,181,187,299,317],conserv:[52,349],consid:[8,33,36,39,57,67,71,89,111,117,118,119,132,139,167,175,193,194,219,224,226,294,299,302,306,317,323,324,325,326,327,333,335],consider:[89,102,193,195,199,227,326,327,333],consist:[13,14,25,33,52,57,59,61,64,66,67,68,71,76,79,85,87,91,95,100,102,109,111,113,119,123,127,132,134,139,145,155,159,168,170,177,179,209,212,216,219,293,299,301,303,305,319,321,331,334,341,343,349],consistchk:123,consol:[107,293,340],conson:[202,206,299],constant:[33,111,125,145,148,176,177,179,183,185,187,188,195,203,220,310],constantexpress:[181,187,188,191],constitu:[2,26,57,67,79,87,159,175,334,336],constitut:[57,71,143,175],constrain:[122,169,214,326,327],constraint:[15,118,119,158,169,185,228,299],construct:[14,25,28,30,32,33,38,43,44,51,52,53,57,58,59,60,63,64,68,69,70,71,74,77,80,82,83,85,91,93,94,96,97,98,100,101,102,103,105,106,107,109,115,116,117,118,119,122,123,143,144,158,159,164,167,168,170,176,179,183,186,189,212,219,221,226,227,228,254,293,294,298,334],constructor:[28,33,44,57,60,61,68,77,78,85,91,97,100,102,105,106,111,116,117,118,122,123,136,141,158,161,164,168,176,183,185,187,188,203,217,302,305,334],consult:[209,214,219,318],consum:[28,319],contact:[67,81,88,89,111,131],contain:[7,8,13,25,28,29,30,32,33,34,36,39,41,43,46,52,57,59,62,65,66,67,68,71,72,74,76,77,79,84,86,87,89,90,97,98,102,103,105,109,110,111,115,116,117,118,119,123,124,125,126,127,129,130,132,134,141,147,156,159,160,161,162,164,166,170,171,176,177,179,180,183,184,185,188,194,195,202,206,209,214,215,218,222,226,259,276,294,300,303,305,306,316,317,319,326,330,331,333,334,335,337,339,340,341,342,349],contains0:179,contains_address:161,contains_cycl:161,content:[8,14,16,52,53,55,57,58,59,60,63,64,65,68,69,70,71,72,73,74,77,80,81,82,83,84,91,92,96,97,100,102,103,105,106,107,109,111,117,123,145,159,188,193,314,339,343,349],context:[28,30,33,53,60,69,78,91,92,106,109,113,119,128,132,133,134,135,137,153,168,178,181,187,188,189,215,219,224,293,302,333,336,344],context_count:133,context_func:293,context_sent:344,context_to_tag:219,contextindex:293,contexttagg:219,contigu:[25,53,169,194,224,226,334],contin:179,conting:143,contingencymeasur:143,continu:[7,28,33,46,48,49,132,134,145,159,168,169,175,349],contract:[298,312],contractions2:[298,312],contractions3:[298,312],contractions4:298,contrast:[57,71,109,304,322,323,325,327],contrib:[12,349],contribut:[211,224,322,323,349],contributor:[203,345],control:[2,7,8,36,57,84,111,117,168,217,295,328],conv_test:50,conv_threshold:48,conveni:[115,129,132,134,138,297,326,327,342,343,349],convent:[141,169,226,228,303,306,327],converg:[33,46,48,50,213,214],convergence_logprob:214,convers:[23,24,33,57,65,77,119,194],convert:[16,24,25,28,29,30,34,41,44,57,59,60,61,63,64,68,69,70,71,74,75,77,78,79,80,82,83,87,91,96,103,105,106,107,109,110,115,116,118,119,123,126,129,132,135,161,167,174,178,179,181,183,185,188,189,190,191,194,195,215,216,293,314,334,335,336,339,340,341,343],convert_ag:[57,64],convert_parenthes:[298,312],convert_to_prover9:126,convolut:[310,332],convolv:310,cook:[57,71],cookbook:[115,349],cooki:[159,162,164],cooking_cr:[57,71],cool:132,coomb:201,cooool:297,cooper:180,cooper_storag:177,cooperstor:180,coord:335,coordin:[117,169,335],coorosiu:107,cootest:107,cop:160,copi:[25,52,57,64,109,110,111,118,159,164,176,183,189,205,295,296,298,301,303,306,309,310,311,312,334],copiar:205,coprefcath1:107,coprefcath2:107,coprefcura:107,coprefgen:107,coprefl:107,coprefsolilo:107,copula:189,copyright:[66,110,111,131,142],coquadru:107,core:[57,71,160,179,180,297],corefer:349,corenlp:[156,349],corenlp_opt:[160,172],corenlpdependencypars:160,corenlppars:160,corenlpserv:160,corenlpservererror:160,coretyp:[57,71],cornel:[57,62,64],corner:[115,117,119,159,167],corood:107,corpid:[57,71],corpnam:[57,71],corpora:[53,57,58,59,61,62,64,65,68,69,77,78,85,89,90,97,100,101,106,108,109,111,179,189,224,251,257,302,319,328,346,347,349],corpu:[0,30,33,34,37,41,42,43,52,111,132,143,176,189,194,203,206,209,211,212,219,224,225,227,228,257,267,280,293,295,299,302,310,316,317,318,321,322,323,324,325,326,327,328,330,332,334,336,343,345,346,347,349],corpus1:[57,75],corpus_bleu:317,corpus_chrf:318,corpus_fil:[57,58,63,70,73,84,97],corpus_gleu:321,corpus_nist:330,corpus_property_kei:[57,64],corpus_rib:332,corpus_root:64,corpus_s:224,corpus_view:102,corpusread:[57,58,59,62,63,65,66,67,68,69,72,73,74,76,79,81,82,85,86,87,88,89,92,93,95,96,97,98,99,100,104,105,106,107,108,306],corpusreader_demo:340,corpusview:[57,62,67,81,85,88,89,100,102],corrado:321,correct:[16,27,29,33,109,119,144,156,167,188,211,212,214,219,221,225,228,302,349],correct_sent:29,correct_tag:228,correctli:[29,109,176,200,276,346],correcttag:228,correl:[143,149,321,328,332],correspond:[2,13,14,15,28,32,33,34,35,41,47,51,55,57,59,60,68,71,74,77,78,79,84,87,91,98,102,105,106,108,109,115,116,118,119,123,129,144,146,147,148,159,161,164,167,169,179,183,210,211,214,218,219,220,222,227,296,302,303,306,312,313,316,325,332,334,336,339,340,341],corrupt:111,cosevensl:107,cosin:[49,51,349],cosine_dist:51,cosolilo:107,cosolsat1:107,cosolsat2:107,cost:[34,39,108,145,148,160,168,295,301,303,306,308,312,313,319,333],cotempo:107,cotton:107,could:[39,57,62,68,71,105,127,130,132,145,168,175,176,294,310,318,332,333,334,335],couldn:195,couldnt:195,count:[52,53,57,105,109,119,132,133,134,135,137,139,143,144,146,147,176,194,211,293,302,317,321,322,323,324,325,326,327,332,338,340],count_cutoff:33,count_of_1gram_1:143,count_of_1gram_n:143,count_of_n:143,count_of_ngram:143,count_of_total_word:143,counter:[66,122,129,132,133,135,137,139,176,184,187,338],counter_arg:[132,139],counter_kwarg:[132,139],counterpart:[322,323,327,331],counti:[55,57,95,219,346],countri:179,cours:[24,137,141,349],court:36,covari:[46,48],covariance_matric:48,cover:[47,119,132,159,170,171,175,331,333,349],coverag:[57,103,178],coverhom:107,coverhoml:107,covinceb:107,covins:107,cow:66,cowsgosp:107,cowulf:107,coxilh:205,coxilha:205,cp1250:[57,101],cp1251:[57,101],cp1256:[57,101],cpan:[57,104],cpdist:176,cprobdisti:214,cranenburgh:335,crarr:189,crash:[160,313],cream:344,creat:[6,7,8,28,29,33,42,48,52,57,58,63,65,69,70,71,73,74,77,78,81,83,84,85,92,97,102,105,106,108,109,111,113,115,116,117,118,119,126,129,131,132,133,134,135,136,139,141,153,158,159,161,162,164,167,168,169,170,171,175,176,177,179,181,182,183,187,194,202,206,211,212,214,221,224,228,230,293,297,312,316,319,322,323,324,325,326,335,338,341,346],create_fake_language_model:292,create_fake_phrase_t:292,creation:[52,169,334],creativ:94,creativecommon:[57,83,94],creator:347,credenti:342,creds_fil:342,credsfromfil:342,crf:[209,220],crfsuit:[213,349],crftagger:213,crim:[57,71],crime_scenario:[57,71],criminal_investig:[57,71],criminal_process:[57,71],criteria:[57,71,94,320],criterion:[213,294],critiqu:148,cross:[15,132,133,167,169,176,319,328,335,349],crosseddir:15,crossroad:349,crossvalidationprobdist:176,crubadan:[42,57],crubadan_to_iso:[57,69],crubadancorpusread:[57,69],cry:202,cs124:145,cs:[57,61,62,66,67,81,88,89,104,142,148,211,219,328],csli:185,csv:[194,339],ct:213,ctrl:[7,8],cullei:[57,71],cultur:160,cumul:176,cunnigham:145,cup:189,cura:107,curli:251,curr_nod:161,curren:189,currency_sym:311,currency_sym_r:311,currency_symbol:[57,104],current:[13,25,28,33,34,46,49,50,57,68,71,76,77,103,109,111,115,117,122,123,125,126,129,132,159,168,170,171,172,173,174,175,176,179,180,187,209,211,214,217,223,224,226,227,295,307,338,341],current_chartrul:159,currentindex:188,currently_complet:170,curri:188,curselect:115,cursor:25,cursori:28,curt:123,curv:224,custom:[57,65,81,85,89,97,100,105,118,293,305,307,317,322,323,324,325,326,346,349],custom_dist:145,custom_lemma:[57,105],customis:119,cut:[46,49,51,62,195,302,340],cut_mark:[105,343],cutoff:[33,43,132,139,219],cutoff_polici:310,cutoff_prob:219,cutoffcheck:43,cve:349,cvenam:349,cyber:160,cycl:[57,105,161,167,343],cycle_finding_demo:161,cycle_index:167,cycle_path:167,cyclic:118,cyclic_dg:161,cyk:[169,336],cyr:[57,101],cyril:[57,101],czech:[57,101,104,311],czech_ceski:[57,101],d0:123,d10:332,d1:[125,177],d2:125,d:[46,48,49,52,57,66,71,77,92,104,105,107,111,118,119,123,132,134,139,145,146,148,183,202,212,298,300,302,303,305,306,310,311,312,320,324,332,334,336,343,346,349],d_head:325,d_neg:128,d_non_head:325,da:[216,316,322,323,324,325,326],dab:334,dachshund:167,dagger:189,dai:[57,71,217],dale:349,dallakhelv:[57,101],dan:[336,349,351],danger:293,daniel:299,danish:206,danishstemm:206,daren:195,darent:195,darpa:330,darr:189,dash:118,dash_preceed_digit:301,dass:331,dat:[57,105],data:[0,2,33,34,38,39,41,44,46,47,48,49,51,52,53,57,60,62,64,68,69,71,72,74,81,89,90,91,92,93,94,98,102,108,111,115,117,118,119,123,127,128,129,130,136,141,143,144,159,160,161,162,166,173,174,176,178,179,183,188,194,209,212,214,215,218,219,220,221,224,257,302,310,314,316,322,323,324,325,326,327,333,334,336,338,341,342,343,347,349],data_idx:188,data_sect:44,databas:[10,57,71,82,99,104,105,179,189,314],datadir:111,dataserv:111,dataset:[57,62,67,88,89,90,194,203,348],date:[29,57,64,95,98,111,189,338,340,341],date_limit:[338,341],date_tim:72,datetim:[338,340,341],daughter:294,davi:141,david:[57,83,349],davon:331,db:[57,71,179],dbname:179,dd1:202,dd:[298,312],de2:202,de:[57,104,107,117,137,148,163,173,200,205],deactiv:194,deal:[101,132,185,221,274,302,338],dean:321,death:195,debt:[57,71,212],debug:[28,37,109,122,127,128,213],debug_decis:302,debug_level:28,debugobject:127,decatir:145,decatur:145,deceler:93,decemb:[199,349],decid:[7,8,15,30,32,102,109,156,159,164,170,171,176,219,295,302,326,334,338],decidedli:195,decim:122,decis:[30,32,38,145,302,336,349],decisiontre:30,decisiontreeclassifi:32,declar:[78,203,206,294,349],decod:[57,59,109,124,130,161,169,201,292,295,314,333,343,349],decode_json_obj:[211,217,219,226,227],decode_obj:130,decode_result:124,decode_tag:[57,84],decoded_unicod:343,decomposit:[46,51],decompress:111,deconstruct:[254,318],decor:[0,122,125,129,130,194,340],decorate_proof:[122,126],decreas:[102,117,168,195,316,336],decri:209,dee1:202,dee:66,deec2ss:202,deem:[48,50],deep:[118,159,334],deepest:[57,105],deepli:195,def:[30,36,52,95,102,110,118,129,130,167,176,189,334],default_column_width:111,default_config:309,default_download_dir:111,default_field:314,default_reasoning_demo:125,default_rule_tupl:202,default_smooth:310,default_tagg:219,default_url:111,default_w:53,defaultdict:[176,189,333,343],defaultruleset:14,defaulttagg:[211,219],defconst:25,defer:[108,131],defici:326,defin:[15,25,28,30,31,33,46,52,55,57,59,64,66,67,68,71,79,87,98,102,103,105,111,115,117,118,119,125,129,132,133,135,141,143,147,156,157,159,167,168,169,173,176,191,196,209,210,214,219,220,226,276,293,296,297,303,325,326,327,333,334,336,341,343],definit:[57,71,105,117,168,276,293,294,326],definitionmarkup:[57,71],deg:189,degre:[57,71,98,102,221,343],dehdari:311,dei3i:202,dekang:[57,76],del_cost:148,delai:[302,340],deleg:[115,183,338],delet:[7,8,57,59,84,102,115,118,145,148,173,206,224,341],delete_on_gc:102,delimit:[28,29,57,59,84,95,100,119,221,303,306,334,341,343],delin:[57,61],della:[322,323,324,325,326,327],delta:[33,115,142,189,213],demand:[52,57,71,132],demo2:[113,221],demo3:[113,221],demo:[0,7,8,14,19,20,21,22,24,28,29,32,33,35,36,42,46,48,49,50,55,63,64,71,75,76,99,113,115,116,117,123,124,125,126,127,128,142,144,145,146,147,154,159,161,162,164,165,167,168,169,170,171,173,175,180,181,182,183,184,187,188,191,203,206,211,214,221,223,302,310,314,340,349],demo_:224,demo_bw:214,demo_error:188,demo_error_analysi:224,demo_ev:28,demo_generated_templ:224,demo_grammar:[159,164],demo_high_accuracy_rul:224,demo_learning_curv:224,demo_legacy_grammar:191,demo_liu_hu_lexicon:194,demo_mod:310,demo_model0:191,demo_movie_review:194,demo_multifeature_templ:224,demo_multiposition_featur:224,demo_po:214,demo_pos_bw:214,demo_read_depgraph:186,demo_repr_rule_format:224,demo_sent_subject:194,demo_serialize_tagg:224,demo_str_rule_format:224,demo_subject:194,demo_template_statist:224,demo_tweet:194,demo_vader_inst:194,demo_vader_tweet:194,demo_verbose_rule_format:224,demoexcept:188,demonstr:[2,14,28,48,49,117,125,132,139,142,146,159,161,162,168,169,170,171,175,193,203,206,214,224,336,349],demoscor:167,dendrogram:[46,49,51],dennyc:137,denomin:317,denot:[13,57,105,176],dens:[33,93],denver:95,dep:[160,161],dep_graph:[172,173],dep_pars:[160,172,184],depart:[67,81,89,198],depend:[28,30,33,52,53,57,71,77,105,119,123,132,139,145,160,161,163,166,167,169,172,173,174,184,187,193,194,214,227,228,302,305,310,317,335,341,343,349],dependency_grammar:[167,169],dependency_scor:167,dependencycorpusread:[57,70],dependencycorpusview:70,dependencyevalu:[163,173],dependencygrammar:[119,167,169],dependencygraph:[57,105,156,163,166,167,169,173,184,343],dependencygrapherror:161,dependencyproduct:119,dependencyscoreri:167,dependencyspan:169,dependent_index:[57,64],depgraph:[166,173,184,186],depgraph_to_glu:184,depict:169,deposit:344,deppars:[123,160,184],deprec:[40,109,129,141,203,228,349],depth:[25,28,32,46,49,51,57,65,105,165,343],depth_cutoff:32,der:[178,299,307],deregist:117,deriv:[14,82,86,117,156,157,159,162,169,176,179,196,214,221,293,302,322,323,324,327,334],derivationally_related_form:105,desahc:334,desc:[57,60,82,95],descape_ent:189,descend:[115,117,118,168,294,334,341],descent:[1,7,170,294],descr:28,describ:[28,30,33,46,57,71,77,87,104,109,111,141,142,148,173,176,185,198,199,201,214,302,304,318,321,328,332,346],describe_template_set:211,descript:[14,28,33,57,60,71,72,79,87,103,109,111,115,159,164,175,178,179,188,206,211,214,303,320],descriptor:129,deserialis:[57,100],desert:313,design:[28,57,74,78,89,94,297,302,321],desir:[57,71,111,116,142,214,345],despit:[195,302],destin:167,destroi:[111,113,116,117,182,302],destroy_widget:117,destruct:295,det:[28,77,144,160,171,172,209,213,216],detail:[34,42,57,60,69,71,74,78,93,103,105,111,115,176,198,199,206,213,214,217,220,224,318,341,348,349],detect:[43,52,57,59,75,102,219,302,310,346],detect_block:[57,61],detect_featur:102,detector:[30,219,302,349],determin:[2,13,28,33,39,44,52,53,109,115,116,153,167,177,195,210,211,216,219,220,299,302,310,322,323,324,326,327,333,334,335,341],determinist:[212,214,224],detoken:[298,312,313,349],dev:[205,229,339,340,341,345,348],develop:[72,86,123,178,179,200,206,212,229,330,345,349],devem:205,deviat:203,devis:203,devnul:[129,160],devset:86,devtest:72,dg:[161,169],dh:66,diacrit:[198,199,201],diagnost:[33,224],diagon:144,diagram:[116,334],dialect:[57,98],dialog:[113,117],dialogu:107,diam:189,dic:343,dice:143,dickson:145,dict:[23,30,33,34,36,38,41,43,44,52,57,64,66,71,82,100,105,107,110,118,123,146,149,158,164,176,178,179,183,184,186,187,188,191,193,211,219,300,302,314,319,322,323,324,325,326,333,343],dictionari:[30,32,33,34,35,39,41,52,57,59,60,63,64,66,68,69,70,71,72,74,80,82,83,84,91,96,98,100,103,105,106,107,109,110,111,117,118,122,125,127,132,134,139,146,158,159,164,167,176,179,183,187,189,193,194,195,198,199,201,206,210,211,213,216,219,293,294,302,313,319,335,343,344,349],dictionaryconditionalprobdist:176,dictionaryprobdist:176,did:[23,36,57,95,105,132,159,164,346,347],didn:[132,194,195,347],didnt:195,die:333,diff:142,differ:[2,24,25,28,33,34,37,50,57,68,71,89,105,111,118,132,139,141,142,145,149,154,168,173,176,178,181,187,188,200,201,214,219,228,247,303,310,317,321,326,327,330,332,333,334,335,336,341,343,346],differenti:221,difficult:[46,196,214,302,321],difficulti:[333,349],digit:[57,105,148,188,217,312],digraph:[57,105,161,206,343],dimens:[46,48,49,50,51,167,310],dimension:[46,51,167],dimensionsion:[48,49,50,51],dimitriadi:351,ding:89,dir:[13,155,161,173],direct:[13,14,15,57,71,115,117,155,161,167,176,214,317,318,321,322,323,324,325,326,328,330,332,336,341,343],directedbinarycombin:15,direction:316,directli:[33,40,43,53,57,58,89,97,100,109,111,118,133,135,161,176,206,211,293,295,302,306,324,334,336,349],director:163,directori:[39,57,58,59,60,61,62,63,64,65,67,68,69,70,71,74,76,77,79,80,82,83,85,87,88,89,91,95,96,97,98,100,101,103,105,106,107,109,126,129,158,166,178,179,218,341,342,346],dirnam:141,disabl:[57,71,105,109,145,302,335,343],disadvantag:302,disagr:[141,259],disallow:[13,57,105],disamb_onli:[57,74],disambigu:[30,33,57,74,92,105,344],disappear:160,discard:[33,46,105,169,201,212,224,303,306,343],discard_empti:[57,58,62,65,67,70,85,88,89,97,303],discard_lin:109,disco:335,discontinu:[320,332,335],discount:[132,135,137,176],discours:[57,96,121,156,157,177,178,181,183],discourse_demo:123,discourse_fixt:229,discourse_id:178,discoursetest:123,discov:46,discoveri:[81,89,293],discuss:[33,119,132,134,179,214,302,347,349],disj:117,disjoint:228,disjunct:[188,190],disk:[52,102,158],dislik:23,dismast:293,dismiss:160,dispers:112,dispersion_plot:[114,293],displac:325,displai:[2,10,28,32,52,57,71,108,109,111,114,115,116,117,118,123,153,159,162,164,168,175,176,293,335,340,341,343,347,349],display_al:153,diss:299,dissert:[142,184,187,299,311],dist:95,distanc:[42,50,51,57,105,117,140,141,148,317,325,335,343,349],distant:332,distinct:[33,57,105,153,214],distinguish:[37,119,156,325],distort:[324,325,326],distortion_factor:333,distortion_scor:333,distortion_t:324,distribut:[31,33,35,36,46,47,55,57,62,67,71,73,76,81,82,86,88,89,92,94,107,110,111,132,134,135,147,176,203,211,214,221,293,319,322,323,324,325,326,327,348,349],distsim:220,ditto:202,div:77,divid:[2,29,35,57,61,62,79,85,87,97,101,105,107,174,176,189,222,293,295,296,300,302,305,306,317,341],divis:[317,328],dixon:145,dj:325,dl:[330,344],dm:[299,341],dnx2:[57,82],dnx:[57,82],do_continu:[338,341],do_kw:141,do_kw_pairwis:141,do_stop:338,dobj:160,doc:[12,28,30,57,60,71,72,100,102,110,132,189,193,338,339,341,348],docid:[57,71],docnamepattern:[57,71],docno:72,docs_metadata:[57,71],docstart:[57,68],docstr:[57,90,110,115,129,224],doctest:[129,212,230,280,301],doctor:[98,160,311],doctyp:72,document:[28,30,34,38,55,57,58,60,61,63,65,67,68,70,71,72,73,74,77,81,84,85,89,90,92,94,97,100,101,102,105,106,107,108,110,117,120,130,131,170,174,176,189,193,194,206,214,220,224,293,310,334,337,340,341,343,344,347,349],document_featur:30,doddington:330,dodg:[57,71],doe:[24,25,28,29,32,33,34,46,48,52,53,57,71,77,78,79,87,105,106,109,115,117,118,129,132,133,135,145,156,171,201,211,212,214,221,222,226,293,299,302,307,313,321,327,333,334,346],doesn:[57,71,109,129,172,179,195,214,307,312,317,322],doesnt:195,dog:[25,57,61,105,119,159,160,162,164,171,172,176,177,188,208,213,217,294,334,343],dom:[177,183],domain:[57,74,103,125,156,177,179,183,297,302,349],domestic_anim:[57,105,343],domin:[36,276,294,334],don:[102,119,125,132,141,183,195,202,229,312,313],donat:349,done:[28,46,48,57,71,117,131,132,145,164,167,178,187,214,297,305,331],dont:195,dooh4:202,dop:335,dot2img:[57,105,161,343],dot:[14,33,57,71,105,159,161,162,164,168,188,217,343],dot_digraph:159,dot_str:[57,105,161,343],dotal:[57,58,62,65,67,70,85,88,89,97,303],dotprod:33,doubl:[117,194,206],double_dash:[298,312],double_neg_flip:194,down:[2,82,111,115,117,132,159,162,164,170,185],downcas:297,download:[0,55,64,109,129,137,220,343,346,348,349],download_dir:111,download_gui:111,download_shel:111,downloadergui:111,downloadermessag:111,downloadershel:111,downward:[116,117],dozen:[345,349],dr1:98,dr:[98,178,181,182],drag:[116,117],draggabl:[116,117],drain:153,draw:[0,57,105,147,181,182,293,334,335,347],draw_pars:168,draw_tre:116,drawback:321,drawn:[36,90,94,117,181],dream:160,dridan:304,drive:[57,105,179],driven:[325,347],driver:185,drop:[10,33,333,349],drs1:178,drs2:178,drs_conc:181,drsdrawer:181,drss:178,drswidget:182,drt:[177,178],drt_discourse_demo:123,drt_glue_demo:177,drtabstractvariableexpress:181,drtapplicationexpress:181,drtbinaryexpress:181,drtbooleanexpress:181,drtconcaten:181,drtconstantexpress:181,drtequalityexpress:181,drteventvariableexpress:181,drtexpress:[178,181],drtfunctionvariableexpress:181,drtglue:184,drtgluedemo:182,drtgluedict:184,drtglueformula:184,drtgluereadingcommand:123,drtindividualvariableexpress:181,drtlambdaexpress:181,drtnegatedexpress:181,drtorexpress:181,drtparser:[178,181],drtproposit:181,drttoken:181,drtvariableexpress:181,dstoffset:338,dt:[25,28,39,57,61,160,163,172,174,209,212,215,217,218,220,227,294,338],dti:219,dtype:38,du:322,duan:145,duck:312,due:[89,194,196,221,304,317],duh:332,dull:93,dum:205,duma:205,dummi:[161,173,327],dummysmilei:297,dump:[57,100,102,179,302],dunningham:145,duplic:[105,123,132,134,193,228,317,343],durat:[29,189,213],dure:[102,118,119,127,132,167,209,211,213,219,302,312,323,324,325,326,327,328],dusseldorf:39,dutch:[57,61,104,189,206,304,349],dutchstemm:206,dv:326,dwarf:320,dwayn:145,dx:117,dy:117,dynam:[156,159,162,175,214,336],e1:202,e2:145,e2f:320,e:[2,7,14,16,23,24,25,28,29,31,33,34,35,36,37,38,43,44,46,48,52,57,58,59,61,63,64,65,66,68,70,71,73,74,78,81,84,85,86,89,90,92,97,102,104,105,106,107,108,109,111,115,116,117,118,119,125,127,128,129,132,134,141,142,143,144,145,148,153,159,161,162,164,166,168,174,175,176,180,181,182,183,185,188,193,194,195,198,199,200,204,205,209,210,212,214,218,219,220,222,227,228,293,294,295,296,299,301,302,303,305,312,317,318,321,322,323,324,325,326,327,331,332,334,336,338,339,340,341,343,346,349],e_end:331,e_start:331,each:[2,14,23,25,27,28,29,30,31,32,33,34,35,36,38,43,44,46,47,48,49,50,51,52,53,55,57,58,59,60,62,63,64,65,66,68,69,70,71,74,76,77,78,79,80,82,83,84,85,87,88,89,90,91,92,93,96,97,98,102,103,105,106,107,108,109,111,115,116,117,118,119,123,129,132,134,139,141,143,144,149,153,154,155,156,157,159,160,162,164,166,167,168,169,170,172,173,175,176,178,179,180,188,191,193,194,209,210,211,212,213,214,217,218,219,220,221,222,224,226,227,228,253,293,296,300,302,307,310,312,313,314,316,317,319,321,322,323,325,326,327,328,331,332,333,334,335,341],eacut:189,eager:173,ear:202,earlei:[159,162,349],earleychart:156,earleychartpars:162,earli:[179,349],earlier:[28,46,49,132,134,170,171,179,212],earliest:[7,8,171],easi:[57,88,132,185,211,228,297,347,349],easier:[29,117,179,293,327,339],easili:[118,176,214,224,339],east:[57,77,88,107,349],easy_instal:348,eat:[16,57,66,88,129,213,322],eateri:129,ebook:349,ecirc:189,econom:[57,88,173],ed:[57,66,105,123,212,219,299,307,343],edg:[2,14,105,144,148,159,161,162,164,167,168,174,214,335,343],edgar:107,edge1:168,edge_closur:343,edgei:[14,159,162,164,168],edges2dot:343,edinburgh:320,edit:[111,113,141,145,176,349],edit_dist:[145,349],edit_distance_align:145,editor:60,editori:349,edu:[42,53,57,61,62,64,66,67,69,71,81,86,88,89,92,94,98,103,104,105,110,137,145,148,172,179,211,220,221,312,328,334],educ:[62,98,347],edward:[0,347,349],ef:195,effbot:189,effect:[28,33,173,198,199,302,335,340],efficacit:148,effici:[46,49,102,109,119,129,132,156,159,162,176,209,211,214,295,306,343,349],effort:[345,349],eflom:283,eg:[57,68,103,105,196,221],egg:230,egrav:189,egypt:[107,313],eh:66,eht:334,eid:[160,220],eight:[228,347],eighth:195,ein:[322,323,324,325,326],eisner:169,either:[14,31,38,52,57,58,59,60,63,64,65,68,69,70,71,73,74,77,79,80,81,82,83,84,87,91,92,96,97,102,103,105,106,107,111,115,117,118,119,122,129,147,159,162,164,168,185,203,210,213,214,228,294,302,303,305,318,334,335,336],ekaterinburg:351,el:[57,104,313],elect:219,electron:[160,301],eleg:152,elem:[314,343],element:[7,8,14,31,33,43,44,52,57,60,61,71,77,78,79,82,87,90,91,96,98,106,109,115,117,118,125,129,149,154,157,159,170,171,175,179,183,210,220,222,296,305,307,314,316,318,327,331,334,335,339,343],elementtre:[55,57,60,77,78,91,103,106,129,314,343],elementtree_ind:343,elementwrapp:129,eleph:166,eleprobdist:[35,36,176],elimeq:178,elimin:[119,159],eliminate_equ:181,eliminate_start:119,elisabeth:[299,307],elisp:25,eliza:[18,24],eliza_chat:19,elkouri:201,ellips:[57,71],ellipsi:302,els:[52,118,119,122,129,130,145,183,219,326],elt:[60,77,78,91,106],elt_handl:[60,77,78,91,106],elucidarium:107,em:[46,205,214,322,323,324,325,326,327],email:[111,321],embed:[57,61],emcluster:48,emili:276,emit:214,emnlp:332,emoticon:[194,297],empir:33,employ:[57,71],employe:[57,71],employment_start:[57,71],empti:[8,24,50,52,79,87,102,109,115,118,119,132,141,147,159,168,170,171,174,175,176,189,228,294,303,313,318,326,334],empty_first:184,emptypredictrul:[159,162,164],emptyset:117,emsp:189,emul:332,en:[57,74,77,104,145,148,200,216,307,313,341,343],en_em_dash:311,en_wsj:215,enabl:[57,71,74,105,127,131,349],encapsul:[117,179,316],enclos:117,encod:[25,28,29,30,33,34,39,41,52,57,58,59,60,61,62,63,64,65,67,68,69,70,71,73,74,75,77,78,79,80,81,82,83,84,85,87,88,89,91,92,93,95,96,97,98,99,100,101,102,103,105,106,107,109,111,118,119,129,130,159,160,172,174,176,183,184,188,189,191,194,198,209,210,211,215,218,219,220,295,304,308,309,311,314,334,339,340,343],encode_json_obj:[211,217,219,226,227],encoding_demo:41,encount:[33,35,57,71,109,111,219],encourag:332,end1:228,end2:[217,228],end:[14,28,52,57,68,71,97,98,102,109,115,118,129,130,132,159,162,164,169,171,174,175,188,198,199,200,201,202,211,217,224,226,228,276,299,300,302,303,306,310,312,313,331,333,334,343],end_i:[296,303,306],end_index:169,end_posit:129,end_r:102,end_siz:124,end_toknum:102,end_w5:201,end_w6:201,end_word:78,ending_quot:[298,312],endors:203,endpoint:[226,341],enforc:226,eng:[57,64,105,209],engin:[25,199,347],england:98,english:[14,57,72,103,104,105,107,119,179,196,206,209,220,295,299,302,304,307,311,325,331],englishpcfg:172,englishstemm:206,engmalt:166,engvers:94,enhanc:[145,221,345,349],enlighten:[14,24],enorm:195,enough:[132,153,156,168,195],ense2:[57,78],ensp:189,ensur:[33,46,48,57,71,115,117,159,176,188,215,229,251,274,307,317,318,321,328,330,332,336,346],ensure_load:[57,59,108],entail:[37,90,105,349],enter:[116,117,182,214,338,341],entertain:349,entir:[14,28,52,57,59,79,87,98,102,115,117,159,164,168,169,171,175,195,211,226,316,326,327,346,349],entiti:[25,27,29,37,39,57,67,68,72,74,91,105,177,179,183,184,188,189,220,313,339,343,347,349],entity_1:67,entity_2:[57,67],entity_field:339,entity_typ:339,entitytyp:188,entri:[16,57,66,71,83,91,99,102,104,105,111,118,132,139,144,167,169,175,179,219,302,322,323,324,325,326],entropi:[33,86,132,133,176,214,349],entropy_cutoff:32,entrydialog:117,enumer:[52,167,313,328,332],env_var:129,environ:[39,57,100,129,166,178,218,220,309,343,346],environment:[57,100],eof:[102,306],epilogu:107,eprint:321,epsilon:[117,119,142,189,213,317,318],eq:[128,188,330],eq_list:188,equ:67,equal:[33,35,43,51,52,57,67,77,102,105,115,117,118,119,132,139,145,147,148,164,170,176,178,181,187,188,194,299,307,322,334],equal_valu:118,equality_pr:188,equalityexpress:[181,188],equat:[33,57,105,176],equiv:[181,188,189],equival:[15,25,28,51,52,118,132,134,141,176,181,183,188,228,318,322,334,336],er0:66,er:[57,66,72,105,200,331],er_99:72,erfcc:319,erico:205,ern:200,err:[57,101],erron:224,error:[25,32,39,57,71,99,105,109,111,119,129,146,151,161,176,183,194,199,212,224,225,274,276,293,304,314,319,328,329,339,340,341],error_list:225,error_output:224,erroranalysi:223,errormessag:111,errt:146,erwartet:333,es:[57,103,104,105],escap:[109,129,188,313,335],eschbach:335,eschbachguentherbecker2006:335,esn:200,especi:[43,52,98,193,195],esperanto:[57,101],essenti:[52,119],est:[57,105],establish:[167,215],estim:[33,35,36,38,46,48,132,176,214,317,322,323,324,325,326,327],et:[57,71,103,104,107,214,299,307,317,321,326,332,335],eta:189,etc:[57,71,102,105,111,117,118,176,177,226,227,293,302,325,334,336,346],eth:189,etre:129,eu:316,euc:[57,75,101],euclidean:51,euclidean_dist:[46,51],euml:189,euphrosyn:107,euro:189,europarl:[57,85],europarl_raw:55,europarlcorpusread:[57,85],european:[301,302],eustac:107,evad:[57,71],eval:[129,163,173,320],evalu:[25,26,28,29,52,72,92,129,132,133,136,146,148,156,177,179,191,193,194,198,200,209,210,212,213,219,282,291,301,304,317,318,320,321,328,330,332,349],evaluate_s:191,even:[25,57,59,118,132,139,167,224,340],event:[57,60,71,115,117,119,143,176,178,188,214],eventtyp:188,eventu:[105,343],eventvariableexpress:[181,188],ever:[23,306],everi:[13,33,39,43,57,61,79,87,102,106,117,118,123,127,132,159,164,167,175,176,179,181,188,194,219,226,227,317,333,334,336],everygram:[132,136,343,349],everyth:[57,85,109,132,297],everywher:[311,326],evid:[57,71,219,302],evok:[57,71],ewan:[0,347,349],ex2:[57,82],ex:[57,82,125,161,178,188,214,341],exact:[127,143,147,181,221,328],exact_match:328,exactli:[31,36,71,118,132,184,188,327],exahust:[159,162],examin:[30,211],examlpl:161,exampl:[14,25,28,29,30,33,34,36,39,46,52,55,57,58,60,62,66,71,77,78,79,85,86,87,90,91,95,97,98,101,102,103,105,106,107,109,110,117,119,123,129,130,131,132,133,137,141,143,145,148,152,156,159,166,167,171,173,176,178,179,182,183,188,189,193,194,209,210,215,220,251,259,276,282,294,295,297,300,303,305,306,309,310,312,317,318,321,325,326,327,332,333,334,336,339,340,341,343,344,346,349],example_word:78,exce:[25,189],exceed:[25,155],except:[31,34,42,57,59,71,84,85,105,108,109,115,119,125,126,127,128,129,130,160,161,173,178,179,181,183,187,188,294,297,303,305,314,334,341],exception:195,excerpt:[57,71,206],exclud:[57,64,71,211,219,226,302,305,323,324,325,326],excludesf:[57,71],excludezero:[226,228],exclus:[38,98,109,224,333],execut:[2,28,39,122,126,129,179,194,215,218,220,340],exemplar:[57,71],exemplifi:224,exercis:[2,92,254,264,292,346,349],exhaust:[159,162,317],exhibit:[124,126],exist:[14,46,57,59,83,103,105,109,111,117,118,125,128,129,146,159,160,161,162,164,168,171,172,176,178,181,183,185,186,187,188,189,199,203,324,334,345,346,349],exists_list:188,existsexpress:188,exodu:107,exp:[33,330],expand:[7,25,28,111,116,119,164,168,170,224,226,228,333,334,349],expand_thread:123,expand_tweetid:341,expand_tweetids_demo:340,expandable_product:170,expanded_tre:116,expandleftrul:[25,28],expandrightrul:[25,28],expandunari:[334,336],expans:[7,142,170,333],expansion_scor:333,expect:[27,28,30,46,48,52,57,59,71,90,111,129,132,133,134,135,136,141,159,174,176,188,193,195,221,228,300,301,306,311,312,313,320,322,327,333,334],expected_albb:301,expected_amz:301,expected_cas:301,expected_detoken:312,expected_low:301,expected_output:313,expected_rkt:301,expected_s:301,expected_token:312,expectedmoretokensexcept:188,expens:[46,102,129,143,326],experi:[2,176,304,317,321,341],experiment:[154,198,315,333],explain:33,explan:334,explicit:[34,312],explicitli:[34,35,57,59,60,63,64,68,69,70,71,74,80,82,83,91,96,103,105,106,107,111,132,134,176,334],exploit:62,explor:[2,3,7,8,251,293],explos:217,expon:143,exponenti:[33,302,333],exportselect:115,expr:[126,183,184,188],expr_tt:[57,82],expr_uid:[57,82],expres:305,express:[1,3,6,16,23,25,28,29,32,35,36,52,57,59,60,63,64,67,68,69,70,71,74,80,82,83,84,91,96,102,103,105,106,107,109,122,123,124,125,126,127,128,129,160,177,178,181,183,185,187,188,189,190,191,204,214,219,228,293,294,295,297,300,302,303,305,312,313,322,323,324,334,339,343,349],extend:[46,57,71,104,115,118,122,126,132,170,171,176,181,334,349],extens:[57,65,109,111,122,137,145,164,166,179,203,214,302,316],extension:179,extent:28,extern:[33,34,44,55,57,124,126,294,327],extra:[28,33,57,71,129,168,176,334,349],extract:[14,37,39,43,57,60,72,94,104,111,159,161,162,173,179,185,189,191,193,194,201,213,218,226,228,305,318,321,331,333,334,339,343],extract_bigram_feat:194,extract_featur:[173,193],extract_field:339,extract_properti:[211,226,227],extract_rel:189,extract_test_sent:174,extract_unigram_feat:194,extractor:[30,193,194],extran:37,extrem:[57,89,195],ey:[57,66,71],f1:[35,118],f2:118,f2e:320,f:[28,29,32,33,46,52,57,66,71,87,98,104,105,110,111,118,132,134,142,147,178,183,184,200,212,301,305,318,331,332,336,343],f_align:331,f_end:331,f_id:33,f_measur:[29,147,193],f_start:331,fabul:195,face:[25,57,81],facil:330,facilit:[189,193,224,349],fact:[89,115,159,179],factor:[53,145,176,224,326,330,333,334,336],factori:[110,176,181,187,188,219,228],factory_arg:176,factory_kw_arg:176,fail:[37,109,118,129,183,302,341],fail_on_unknown:293,failobj:52,failur:276,faith:203,fake:[57,105],fall:[46,145,219,326],fallback:318,fals:[13,16,25,29,32,33,34,36,37,38,43,47,48,49,50,51,57,59,60,64,68,71,74,80,85,88,89,96,98,102,103,105,106,109,111,114,115,116,118,119,122,123,124,126,127,128,129,132,139,144,145,148,159,160,161,162,166,171,172,173,174,176,178,179,180,181,183,184,185,188,189,193,194,200,202,206,212,213,214,215,217,219,220,221,224,226,227,228,276,293,294,295,297,298,301,302,303,304,305,307,308,309,310,311,312,313,317,332,334,335,336,338,339,341,342,343],famili:[16,123],famou:[57,62,317],fang:94,far:[14,95,132,170,171,212,333,334],fass:[324,325,326],fast:[57,66,67,71,200,212,224,228,343],faster:[57,71,343],fat:153,favorit:311,favorite_count:339,favour:333,fcfg:[109,174,185,191],fdict_class:118,fdist:176,fe2:[57,71],fe:[57,71],fe_rel:[57,71],feat:[57,89],feat_val:33,featdict:118,featlist:118,featstruct:[0,180,334,351],featstructnontermin:164,featstructread:[109,118],featur:[32,34,35,36,37,38,41,43,44,52,57,67,74,89,102,105,109,132,133,142,158,164,167,173,174,180,191,193,194,201,210,211,213,217,219,223,224,227,228,299,307,315,349],feature1:228,feature2:228,feature_corpu:102,feature_detector:219,feature_extract:38,feature_func:[43,193,213],feature_nam:32,feature_probdist:35,feature_select:38,feature_valu:32,feature_vector:33,featurebottomupchartpars:164,featurebottomupleftcornerchartpars:164,featurebottomuppredictcombinerul:[162,164],featurebottomuppredictrul:164,featurechart:[156,162],featurechartpars:[162,164],featurecompletefundamentalrul:162,featurecompleterrul:162,featuredict:30,featureearleychartpars:162,featureemptypredictrul:[162,164],featurefundamentalrul:164,featuregrammar:[174,191],featureincrementalbottomupchartpars:162,featureincrementalbottomupleftcornerchartpars:162,featureincrementalchart:162,featureincrementalchartpars:162,featureincrementaltopdownchartpars:162,featurelist:228,featurepredictorrul:162,features_count:34,featurescannerrul:162,featureset:[31,32,33,34,35,36,38,41,43,44,52,193,210,219],featuresettagg:210,featuresettaggeri:[210,219],featuresingleedgefundamentalrul:[162,164],featuretopdownchartpars:164,featuretopdowninitrul:164,featuretopdownpredictrul:[162,164],featuretreeedg:164,feb:89,februari:349,fecoreset:[57,71],fed:30,fee:66,feed:[132,134],feedback:349,feel:[312,347],feet:312,feid:[57,71],feil1v:202,fellbaum:[57,103],fellegi:145,fellow:293,fem2masc:[198,199],femal:[36,98],feminin:[198,199],feng:94,ferel:[57,71],fertil:[324,325,326,327],fertility_of_i:327,fertility_t:[324,325,326],fetch:[338,340,341],few:[52,201,206,211,311],fewer:[33,50,219,334,341],ffreq_empir:33,fg:115,fi2:202,fi:[57,104,307],fid:33,fido:[177,188],field:[57,71,83,95,98,99,167,179,194,310,314,316,339,340,349],field_ord:314,fieldwork:349,fiendish:293,fig:[167,330],fight:343,file:[10,34,39,41,42,44,52,55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,96,97,98,100,102,103,104,105,106,107,109,111,117,123,126,129,145,158,161,166,173,174,188,193,194,195,213,215,218,220,224,226,227,228,230,304,314,334,339,340,341,342,343,346,349],file_id:[57,59,60,63,64,68,69,70,71,74,80,82,83,91,96,103,105,106,107],file_nam:[129,342],file_path:77,file_s:109,fileid:[30,57,58,59,60,61,62,63,64,65,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,97,98,99,100,101,102,103,104,105,106,107,109],fileid_or_classid:[57,103],filenam:[55,57,71,72,74,78,90,109,111,117,129,161,166,179,184,191,193,194,205,224,314],filepo:102,files:111,filespac:346,filestr:343,filesystempathpoint:109,filetyp:[57,98],fill:[116,117,132,155,175,224,228,326],filler:189,film:160,filter:[53,57,71,74,77,82,119,123,132,139,162,189,228,276,293,294,310,334,341],filteredbottomuppredictcombinerul:159,filteredcompletefundamentalrul:162,filteredsingleedgefundamentalrul:[159,162],final_period_1:311,final_period_2:311,finalerror:212,finalize_train:302,financi:173,find:[1,6,26,28,33,34,35,42,46,48,49,50,51,53,57,59,61,62,77,78,84,105,106,109,117,119,122,123,127,129,132,145,153,154,166,167,170,171,173,175,176,181,185,188,189,191,194,212,214,219,227,276,293,294,295,302,303,304,305,313,317,320,324,327,331,333,334],find_abbrev_typ:302,find_all_src_phras:333,find_answ:127,find_best_fit:176,find_binari:129,find_binary_it:129,find_concord:293,find_corpus_fileid:[57,102],find_diment:117,find_dir:129,find_fil:129,find_file_it:129,find_increasing_sequ:332,find_jar:129,find_jar_it:129,find_jars_within_path:129,find_label_nam:184,find_legal_onset:299,find_malt_model:166,find_maltpars:166,find_repptoken:304,find_vari:118,findal:[129,293,297],finder:53,finding_dir:129,findtyp:188,fine:[2,38,141,224],finer:[57,84],fingerprint:42,finish:[111,122],finishcollectionmessag:111,finishdownloadmessag:111,finishpackagemessag:111,finishunzipmessag:111,finit:[31,87,214,349],finnish:[57,104,206],finnishstemm:206,fire:34,fireman:105,first:[7,8,10,16,23,25,28,33,34,35,36,41,44,46,51,52,53,57,59,60,63,64,67,68,69,70,71,74,76,80,82,83,84,89,91,96,98,102,103,105,106,107,108,109,111,115,117,118,119,122,123,125,127,128,129,132,134,145,154,159,161,168,170,176,177,178,179,181,183,185,187,188,189,190,191,193,194,199,201,221,222,226,283,293,295,299,302,303,307,317,324,325,326,327,331,332,334,343,346,349],first_cas:302,first_low:302,first_po:310,first_upp:302,fisher:143,fisher_exact:143,fit:[38,132,133,155],fitzenreit:145,fitzrureit:145,five:[143,201],fix:[31,33,39,63,159,212,219,227,293,310,326,327,349],fixm:212,fixtur:[254,264,292],flag:[57,58,62,65,67,70,85,88,89,97,105,111,114,118,176,214,221,276,295,297,302,303,332,334,338,341,346],flat:[106,132,136,310,314,334],flatten:[57,68,132,136,334,343],flaw:20,fledg:118,fleiss:141,flexibl:[2,119,132,134,159,297],flip:[195,214,325],flippin:195,flist_class:118,floresta:349,florida:145,flush:159,fly:[209,222],fmt:[27,227],fn1_sent:[57,71],fn:[35,57,71,118],fn_docid:[57,71],fn_fid:[57,71],fn_fid_or_fnam:[57,71],fn_fname:[57,71],fn_luid:[57,71],fname:[33,35,44],fnof:189,fnreport:[57,71],fntbl37:211,fntbl:211,focu:[115,117],fol:[109,123,177,179,181],foldemo:183,folder:[57,64,346],follow:[7,8,14,25,28,30,33,36,38,52,53,57,59,60,63,64,68,69,70,71,72,74,77,80,82,83,84,86,90,91,96,98,100,102,103,105,106,107,109,111,113,115,117,118,119,129,132,136,141,143,145,151,159,167,168,170,171,174,176,178,179,183,185,188,191,203,206,209,214,216,219,222,228,293,295,302,303,305,306,307,312,317,318,322,323,324,325,326,332,334,336,340,341,346,347,349,351],followers_count:339,followtoscreen_demo:340,folmodel:183,fom:161,font:[25,57,101,115,117,335],foo:[60,71,78,91,106,129,301,307,349],foobar:[174,307],food:[57,71,129],foolswood:259,foral:[117,181,188,189],forc:[111,213,317,318,321,328,330,332],foreground:115,foreign:[212,216,331],forese:[57,71],forev:[317,318,321,328,330,332],form:[15,24,28,29,33,43,52,57,71,87,90,91,92,97,102,105,109,111,117,119,122,123,126,153,159,161,164,168,169,173,175,178,179,180,181,185,188,189,190,198,199,200,201,206,212,214,219,221,222,224,226,228,300,303,305,313,316,334,336,349],formal:[176,214,227],format:[28,29,33,34,44,55,57,59,64,66,76,77,85,86,90,98,105,109,119,122,124,126,129,132,161,166,174,179,183,212,219,224,227,304,313,314,316,320,331,333,335,338,339,340,341,349],format_debug_decis:302,formatt:44,former:160,formul:162,formula:[109,122,123,145,177,183,184,185,187,188,317,332],formula_tre:185,forth:332,fortun:132,forum:[202,347],forw:320,forward:[14,15,57,60,67,78,91,106,109,161,214,320],forwardcombin:15,forwardonli:15,forwardsconstraint:15,forwardtconstraint:15,forwardtyperaiserul:14,found:[34,39,46,48,49,57,61,64,77,102,104,105,106,108,109,118,122,123,124,129,159,168,170,171,175,176,201,203,206,208,212,218,224,293,299,302,317,326,327,334,346],foundat:[125,349],four:[28,117,143,201,293,305],fourgram:317,fourth:349,fox:[160,172,217],foxu:115,fp:339,fprefix:341,fr:[57,104],frac12:189,frac14:189,frac34:189,fraction:[147,224,317],fractional_pres:145,fragment:[185,221,328],frame2:[57,71],frame:[57,71,103,115,117],frame_by_id:[57,71],frame_by_nam:[57,71],frame_config:115,frame_id:105,frame_ids_and_nam:[57,71],frame_rel:[57,71],frame_relation_typ:[57,71],frame_str:105,framefil:[57,79,87],frameid:[57,71],framenam:[57,71],framenet2:[57,71],framenet:[57,103,349],framenetcorpusread:[57,71],frameneterror:71,framerel:[57,71],framerelationtyp:[57,71],frames_by_lemma:[57,71],frameset:[57,79,87],framework:[33,109,179,349],franc:179,franz:317,fraser:200,frasl:189,frauenfeld:299,fraze:349,free:[0,109,113,119,125,127,181,183,188,215,347,349],freez:[118,334],frel:[57,71],french:[57,104,206,311,325],frenchstemm:206,freq:176,freq_threshold:302,freqdist:[42,53,57,69,176,214,293,349],frequenc:[33,36,42,53,57,105,132,134,139,143,176,193,213,214,221,293,299,302,317,349],frequent:[20,132,139,144,176,194,209,219,293,302],fresh:118,fri:[57,71],frick:195,frickin:195,fridai:219,friend:[160,172],friendli:[132,134,341],friends_count:339,frig:195,friggin:195,fring:[7,170],from:[7,8,14,16,22,24,28,29,30,31,32,33,34,36,37,38,39,43,44,46,47,48,49,51,52,53,55,57,58,59,60,62,64,65,67,68,69,70,71,72,73,74,75,77,78,79,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,98,100,102,103,104,105,106,109,111,115,116,117,118,119,122,123,125,126,127,129,131,132,133,134,135,139,141,142,143,144,145,146,147,149,152,157,158,159,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,187,188,189,191,193,194,196,197,198,199,200,202,203,204,205,206,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,224,226,227,228,274,276,282,283,293,294,295,297,298,299,300,301,302,303,305,306,307,308,309,310,311,312,313,314,316,317,318,319,320,321,322,323,324,325,326,327,329,330,331,332,333,334,335,336,339,340,341,342,343,344,346,347,349],from_product:[159,164,168],from_train:44,from_tt:[57,82],from_uid:[57,82],from_unified_model_dir:158,from_word:53,fromlist:334,fromstr:[14,16,28,109,118,119,129,167,181,183,187,188,294,316,334,335],fromxml:111,frontier:[7,170],frozen:[118,203],frozenset:316,frt:[57,71],fs:[31,33,44],fs_class:118,fscore:318,fsnlp:53,fstruct1:118,fstruct2:118,fstruct:[118,187],fstruct_read:109,fstructur:186,ft_sent:[57,71],ftp:66,ftype:44,fuck:195,fuel:46,ful:299,fulfil:[227,294],full:[28,34,57,60,71,74,100,104,105,110,118,129,132,134,333,339,340,341,349],full_text:[57,71],fulli:[14,130,159,161,167,195,319],fullsignatur:110,fulltextannot:[57,71],fulton:[55,57,219,346],func:[33,110,115,117,200,340,343],funccolor:335,functionalcategori:13,functionbackedmaxentfeatureencod:33,functionvariableexpress:[181,188],functool:110,functor:15,fund:349,fundament:[164,168,347],fundamentalrul:[159,164],funky_punct_1:311,funky_punct_2:311,funni:[57,62],furiou:293,further:[28,131,167,169,179,183,203,221,295,299,302,332,349],furthermor:28,futur:[71,87,122,164,198,199,333,341],future_scor:333,future_score_t:333,fval1:118,fval2:118,fval:[33,34,35],fvmh0:98,g1:177,g2:177,g3:[57,89,183],g4:183,g:[2,7,8,16,23,25,28,36,38,57,59,61,64,66,68,71,74,78,85,87,89,104,105,108,109,111,115,116,117,118,119,122,141,142,144,145,148,153,161,166,182,183,193,194,209,218,219,293,295,298,301,302,303,305,312,317,332,334,338,339,340,341,343,346,349],g_graph:167,ga2:202,gaac:46,gaacluster:49,gai3i:202,gain:[221,336],gaisha:301,gale:[176,319],gale_church:315,galileo:95,game:36,gamma:[132,135,159,164,176,189,214,328],ganapathibhotla:[67,88],gang:349,gao:[94,317,321],gap:[57,58,60,62,65,67,70,85,88,89,97,303,310,321],garbag:[102,214],garner:176,garrett:[349,351],gate:343,gather:[69,299,302,343],gaussian:[33,46,48],gaussian_prior_sigma:33,gawron:349,gb2312:[57,101],gdfa:[283,315],ge:[189,200],gee:66,geht:331,gem:24,gender:[57,98],gener:[13,14,18,23,25,26,28,33,34,35,39,41,44,50,52,102,105,106,110,114,118,122,126,127,129,132,133,134,139,141,143,147,148,152,156,157,158,159,162,164,166,167,168,170,171,174,175,176,179,183,184,188,189,210,213,214,219,223,224,226,228,293,295,299,301,302,303,304,306,311,316,321,324,325,326,327,332,334,336,338,341,343,349],generate_chomski:152,generate_malt_command:166,generate_repp_command:304,genericcorenlppars:160,genericstanfordpars:172,genesereth:125,genesi:107,genr:349,gensim_fixt:229,geographi:179,geoloc:339,geometr:[317,330],georg:[321,330],geraldin:145,gerefa:107,german:[57,104,200,206,299,311,349],germani:[57,71],germanstemm:206,gerund:[87,212,219],get:[38,42,57,71,74,90,98,102,105,115,117,118,129,132,134,139,142,146,173,188,198,199,203,212,221,224,316,317,340,341,343,349],get_a_job:[57,71],get_all_symbol:[178,181,187,188],get_booleanexpression_factori:[181,187,188],get_by_address:161,get_cycle_path:161,get_domain:125,get_glue_dict:184,get_glueformula_factori:184,get_glueformulas_from_semtype_entri:184,get_header_field_list:339,get_label:184,get_languag:[57,83],get_macrolanguag:[57,83],get_meaning_formula:184,get_model_build:122,get_neighborhood:228,get_next_token_vari:[178,188],get_param:302,get_path:[57,78],get_pos_tagg:184,get_prov:122,get_quantifiedexpression_factori:188,get_read:184,get_ref:181,get_segm_id:78,get_semtyp:184,get_sent:78,get_sent_beg:78,get_sent_end:78,get_unified_model_paramet:158,get_vers:[57,105],get_words_from_dictionari:146,getchildren:129,getinfo:110,getiter:129,getop:[181,188],getquantifi:188,gettempdir:173,gf:[57,71],gfl:184,gfl_to_compil:184,gg1:202,gh:[111,346],ghd:148,gi:33,gid:90,gilbert:195,gile:22,gim:[298,312],girl:[123,177],gisencod:33,gist:71,github:[71,206,229,251,259,274,283,301,311,313,318,335,343,345,348,349],githubusercont:[111,346],give:[28,35,46,49,57,71,74,76,84,86,98,105,109,111,122,132,134,160,174,178,188,195,214,224,226,311,317,334,340,349],given:[2,10,14,23,25,26,28,29,30,31,32,33,34,35,36,41,43,44,46,48,49,50,51,52,53,55,57,58,59,60,62,64,65,67,68,69,71,77,78,79,80,81,84,85,87,88,89,91,96,97,98,100,101,102,103,104,105,106,107,109,111,115,116,117,118,119,122,123,127,129,131,132,133,134,135,139,141,143,145,146,147,149,159,161,162,164,168,169,170,171,175,176,179,181,185,187,188,189,193,201,208,209,210,211,214,219,220,221,222,224,225,226,227,228,276,293,294,302,305,307,313,316,319,320,322,323,324,325,326,327,328,332,333,334,335,338,340,341,343],giz:205,giza:[316,326],gl:166,gleu:[321,349],gleu_scor:315,glint:[57,71],global:[57,90,105,131,194,340,343],glue:[177,187],glue_dict:186,gluedict:184,glueformula:[184,187],glueformulafactori:187,gluesemantics_malt_fixt:229,gni3:202,go:[57,71,160,348,349],goal:[69,122,124,125,126,127,128,334],goal_node_index:161,goalkeep:36,god:334,goe:129,gold:[26,29,43,52,87,90,193,200,210,211,329],gold_data:212,gold_relation_list:[57,64],gold_sent:[163,173,213],goldberg:349,goldstein:141,golf:167,gon:[298,312],goo:166,good:[24,43,46,57,61,160,176,213,217,221,293,295,299,301,302,303,306,307,308,311,312,313,324,325,326,333,347],goodman:[133,135,137],googl:[216,321,349],googlecod:349,goslin:299,gospel:107,got:[298,312],gothenburg:351,gouw:321,gov:[72,145],governor:160,graber:349,gradabl:67,gradient:317,gradual:326,graem:349,grain:[2,57,79,87],gram:[42,57,69,176,219,253,317,318,321,330,343],gramfil:123,grammar:[0,2,7,8,12,13,14,16,20,28,94,109,111,113,123,157,159,162,164,165,167,168,169,170,171,174,175,180,185,191,314,336,346,349],grammar_filenam:185,grammar_url:174,grammat:[167,174,196],grand:[55,57,132,134,219,346],grandmoth:[57,71],grandpar:[60,78,91,106,336],grang:6,grant:349,granular:[57,84],graph:[57,105,117,161,166,167,169,173,176,184,214,335,343,349],grapher:117,graphic:[2,3,7,8,57,105,111,116,117,334,349],graphviz:[57,105,161,343],grate:349,great:[38,131,152,293,349],greater:[52,57,105,115,117,132,139,145,175,176,213,297,334],greatest:[115,176,335],greatli:195,greec:179,greedi:217,greedili:214,greek:[57,101,104],green:[7,66,335],greg:[321,349],gregori:107,grei:[160,172],grenzpo:200,grenzpost:200,grenzposten:200,grid:[57,68,115,117,155,335],ground:[127,214,321],group:[25,26,28,46,49,51,57,59,71,82,84,94,103,111,116,122,146,189,217,301,325,332,334,349],group_by_para:[57,63,65,84,97],group_by_s:[57,58,63,65,70,73,84,97],grow_diag_final_and:320,grsampson:176,grzegorz:142,gscl:200,gt:[189,313],guarante:[26,129,159,162,171,298,317,318,321,326,327,328,330,332],guess:[27,29,33,57,71,217,219,342],guess_encod:343,guess_languag:42,guess_path:342,gui:[18,349],guid:[179,317,318,321,328,330,332,347,348],guidebook:[317,318,321,328,330],guidelin:94,gujarati:[57,101],gulp:[57,88],gutenberg:[22,30,293],gz:[172,220],gzip:[109,194,339,341],gzip_compress:[194,339,341],gzipfil:109,gzipfilesystempathpoint:109,h0:332,h1:332,h2:332,h5:332,h:[7,8,10,57,90,104,107,198,199,212,214],ha:[7,8,13,28,32,33,35,36,39,43,52,57,59,61,65,71,90,102,105,106,109,111,115,116,117,118,119,123,129,131,132,133,139,142,145,156,159,167,168,170,173,175,176,177,179,185,194,203,209,211,212,214,221,228,251,294,298,300,302,307,311,312,317,321,325,326,327,328,330,331,333,334,335,336,338,339,341,343,346,347,349],hack:[180,293],had:[36,57,67,176,349],hadn:195,hadnt:195,hagiwara:349,hajim:332,half:[145,148,152,154],hall:167,hall_demo:167,hallow:145,halo:320,halt:33,halt_on_error:111,halv:154,ham:[148,310,322],hamza:201,hamzat:[198,199],han:310,hand:[8,14,30,102,117,119,159,164,168,170,175,195,211,336,347],handl:[34,44,102,109,117,123,129,131,132,141,164,178,180,181,187,188,194,219,228,276,297,306,314,338,341,349],handle_cond:181,handle_condit:178,handle_dr:[178,181],handle_elt:[60,77,78,91,106],handle_head:60,handle_lambda:188,handle_neg:[188,194],handle_open:188,handle_prop:181,handle_qu:188,handle_queri:78,handle_ref:181,handle_s:[60,91],handle_vari:188,handle_word:[60,91],handler:[77,78,106,118,183,341],hangul_jamo:313,hangul_syl:313,hank:53,hapax:176,happen:[57,62,227],happi:[93,194],happy0:93,har:230,hard:[92,304],hard_delimit:319,hardcopi:349,hardi:349,hardin:145,hardli:195,hardrock:[57,105,208],harmon:[15,147],harr:189,harsh:317,has_kei:118,has_prior:188,hash:[118,176,334],hashabl:[119,141,219,227],hashtag:[57,100,339],hasn:195,hasnt:195,hat:[57,71],hau:[316,322,323,324,325,326,331],have:[7,24,25,28,29,33,34,36,39,46,47,49,51,52,57,62,66,67,71,81,86,88,89,91,97,98,100,102,105,109,111,115,117,118,119,127,129,131,132,133,139,141,142,144,145,146,147,153,158,159,160,166,167,168,169,170,171,172,174,175,176,179,181,187,188,189,193,194,201,203,206,214,224,226,228,293,295,302,307,313,317,321,324,326,327,333,334,336,339,340,341,345,348,349],haven:195,havent:195,hawaii:95,hc:310,he:[57,66,71,203,312,317,321,331,332],head:[57,68,105,119,161,167,169,173,325,326,327,334,336],head_address:161,head_distortion_t:[325,326],head_id:[57,105],head_index:169,head_len:[57,84],head_vacancy_t:326,head_word:[57,105],header:[44,57,78,111,159,194,341],header_mod:[57,78],header_sect:44,headlin:72,headquart:218,headword:[57,71],health:30,heap:129,heapsiz:166,hear:[317,318,321,328,330],hearst:148,heart:189,heat:[57,71],heating_instru:[57,71],heavi:[57,71],hebrew:[57,101],heed:[317,318,321,328,330,332],heidelberg:137,height:[79,87,98,115,117,160,334],held:176,heldout:176,heldout_fdist:176,heldoutprobdist:176,hella:195,hellip:189,hello:[115,129,312],help:[0,7,8,10,24,57,59,71,78,105,108,111,117,129,132,293,302,312,346,349],helper:[17,43,78,118,131,133,136,327],helpfulli:132,helsinki:[57,107],helvetica:115,hen:[167,169],henc:[46,50,57,71,327,336],hench:299,heptateuch:107,her:[57,71,160],herbarium:107,here:[57,71,98,105,110,118,129,132,141,143,145,179,200,206,217,220,230,294,322,326,327,332,333,349],herm:349,hessian:213,heurist:[171,302],hh:66,hi:[57,62,107,167,203,312,349],hidden:[115,117,214],hiddenmarkovmodeltagg:214,hiddenmarkovmodeltrain:214,hide:[7,8,66,115,117],hide_column:115,hideki:332,hideto:321,hierarch:[57,103,116,117,334,336],hierarchi:[57,64,117,178,307,339],higbe:145,high:[98,102,111,198,209,212,224,299,304,317,340],highe:145,higher:[28,33,132,134,145,159,162,164,168,170,171,174,175,317,327,333],highest:[33,35,132,167,212,214,330,344],highlevel:328,highli:195,highlight:[117,334,335],highlightthick:115,higve:145,hill:[25,46,50,324,325,326,327],hillclimb:[326,327],him:[57,62,334],hindi:[73,349],hirao:332,hire:[57,71],histogram:333,histor:142,histori:[107,170,171,219,317,321,326,332],hit:[57,62,332],hllw:145,hlt94:86,hlt:[299,307,330],hmm:[176,209,349],hoc:312,hold:[109,115,122,164,173,185,224,227,301,310,327],holder:111,hole:[177,349],hole_read:185,holesemant:185,holi:107,holist:89,home:[57,78,109,111,166,304,342,349],homepag:[117,123,203,274,343],homili:107,hon:214,honest:343,honnib:217,honoriu:107,hood:[57,66,71],hook:[25,181,187,188,254,264,292,349],hope:[57,64],hor:300,horizont:[116,117,334,335,336],horzmarkov:[334,336],hosam:349,host:[22,109,203,349],hour:[57,71,160],hous:[57,71,316,322,323,324,325,326,331],how:[14,24,28,30,33,57,95,102,105,111,117,118,119,132,133,135,154,159,161,168,175,176,224,226,293,302,306,318,322,323,324,325,326,334,341,344,348],howev:[33,36,57,71,100,102,105,111,115,116,118,119,132,145,156,164,201,203,212,221,294,302,326],howto:340,hs:98,hsi3:202,hsieh:94,hsiug5ct:202,hss:[57,105],ht:[98,202,215],htm:[72,94,107],html:[42,57,60,69,78,92,94,103,104,110,117,123,148,161,166,176,179,206,251,254,334,335,338,339,343,346,348,349],http:[0,10,22,38,42,53,55,57,60,62,64,67,69,71,72,74,77,78,81,83,86,88,89,92,93,94,103,104,105,107,109,110,111,115,117,123,137,142,145,148,160,161,166,176,179,200,203,206,211,213,216,217,220,221,229,251,254,259,274,282,283,301,304,311,312,313,317,318,319,321,328,330,332,334,335,336,338,339,340,341,343,344,346,347,348,349],hu:[57,81,88,89,104,194,349],huang:[94,214],huge:[195,333],hugh:321,hum:95,human:[86,132,134,206,224,225,299,321,328,347,349],humanli:313,hungarian:[57,104,206],hungarian_magyar:[57,101],hungarianstemm:206,hunpo:209,hunpostagg:215,hurt:[66,224],husk:202,huston:349,hut:66,hutto:195,hyderabad:73,hyp1:[317,318,321,332],hyp2:[317,318,321,332],hyp:[90,105,148,318,321,330,332],hyp_extra:37,hyp_len:[317,330],hyper:302,hypergraph:335,hypernym:[57,105,343],hypernym_dist:105,hypernym_path:105,hyperparamet:332,hyperspher:46,hyphen:[57,71],hypnoym:[57,105],hyponym:105,hypothes:[2,14,159,162,168,317,318,321,330,332,333],hypothesi:[2,37,90,143,159,162,317,318,321,328,329,330,332,333],hypothesis1:[317,328,330],hypothesis2:[317,328,330],hypothet:148,hz:[57,101],i1:[116,334],i1i:202,i2:[116,334],i6:137,i:[23,24,25,28,29,30,31,33,34,35,36,37,39,43,44,52,57,58,63,65,67,70,71,73,74,81,84,86,87,89,90,92,97,98,102,104,105,106,107,109,111,115,116,118,119,123,129,141,142,143,144,147,148,159,160,162,164,166,172,174,175,176,180,183,185,194,195,198,199,202,210,212,214,218,219,220,222,227,228,294,296,298,299,302,312,316,317,318,319,320,321,322,323,324,325,326,327,331,332,334,340,341,343,344,349],iN:334,i_peg:327,iacut:189,iad:72,ibm1:[315,331],ibm2:315,ibm3:315,ibm4:315,ibm5:315,ibm:[284,285,286,287,288,289,322,323,324,325,326,327,349],ibm_model:[315,322,323,324,325,326],ibmmodel1:[322,327],ibmmodel2:[323,327],ibmmodel3:[324,327],ibmmodel4:[325,326,327],ibmmodel5:[326,327],ibmmodel:[322,323,324,325,326,327],ic:[57,105,194,344],icassp:137,iceland:[57,104],icfil:[57,105],ich:[324,325,326],icirc:189,icl:332,icsi:[57,71],ictaac:199,icwsm:195,id:[13,33,57,62,67,68,71,81,82,88,89,90,91,96,98,103,105,107,111,118,123,212,227,325,326,328,330,335,338,339,340,341,344],id_str:339,iddo:[184,187],idea:[43,117,123,125,133,135,209,331],ideal:133,ident:[57,84,105,132,135,145,149,169,179,214],identif:[42,69,167,349],identifi:[13,25,26,28,32,42,47,49,51,53,55,57,59,60,61,62,63,64,67,68,69,70,71,74,78,79,80,82,83,84,85,87,90,91,92,96,97,98,103,105,106,107,109,111,115,118,161,167,178,184,189,195,204,210,294,296,302,303,306,317,331,335,343,347],idf:[38,293],idiomat:[57,71],idl:[18,201,343],ids_f:341,ie:[57,72,80,84,90,106,127,188,221,334,336],ie_er_99:72,ieee:214,ieer:[29,57,189],ieer_headlin:189,ieercorpusread:[57,72],ieerdocu:72,ieerstr2tre:29,iesha:18,iesha_chat:20,iexcl:189,iff:[33,34,119,127,128,159,164,181,185,188,227],iff_list:188,iffexpress:[185,188],ignor:[25,28,33,35,57,60,68,71,73,80,81,84,89,98,104,106,109,114,117,118,132,163,168,174,188,215,259,276,293,314,318,322,326],ignore_abbrev_penalti:302,ignore_cas:114,ignore_lines_startswith:[57,104],ignore_stopword:206,ignore_whitespac:318,ignorecas:[298,312],ignorekei:[57,71],ignorereadmecorpusview:[57,81],igrav:189,ih:66,ii:[33,53,57,71,94,107,320,331,349],iii:[53,107,331],iiit:73,iit:73,iixi:53,ij:[214,216],ijcnlp:94,ill:305,illeg:226,illegaltypeexcept:188,illinoi:[67,81,89],illustr:[57,71,102,123,154,167],im:331,imag:[57,105,161,189,343],immedi:[14,28,119,129,159],immut:[31,32,33,35,43,118,119],immutablemultiparentedtre:334,immutableparentedtre:334,immutableprobabilisticmixin:[119,176],immutableprobabilistictre:334,immutabletre:334,imp:[117,128,185,187,188],imp_list:188,impact:[131,330],imper:199,impexpress:[185,187,188],implement:[13,14,15,25,28,33,34,38,42,43,52,57,59,71,75,77,84,105,117,118,119,122,130,132,133,135,141,142,143,145,148,156,159,162,164,167,168,169,170,171,173,176,185,193,198,199,200,203,211,214,217,221,226,295,299,307,312,317,318,321,322,323,324,327,330,332,333,334,338,344,349],impli:[181,188,328],implic:[187,188],implicitli:[57,59,60,63,64,68,69,70,71,74,80,82,83,91,96,103,105,106,107,117,119],import_from_stdlib:129,importantli:[307,317],impos:[113,118,119],imposing_oblig:[57,71],imposs:25,improp:28,improv:[33,110,148,199,200,201,203,213,274,295,298,323,324,325,336,349],in_chunk_pattern:28,in_demo:189,in_idl:343,in_spite_of:300,in_strip_pattern:28,inaugur:[257,349],inc:[0,301,330,347,349],incept:349,incl:349,includ:[25,28,29,30,33,36,38,46,50,53,57,59,60,71,74,77,78,79,87,91,97,100,105,106,107,110,115,117,118,119,129,132,134,169,173,176,188,189,203,214,293,299,302,305,323,324,325,326,327,330,331,333,334,336,341,348,349],include_abbrev_colloc:302,include_all_colloc:302,include_encod:[57,59],include_fileid:[57,59],include_nt:195,include_rt:341,include_semant:16,include_tre:68,inclus:[60,78,91,106,224,226],incom:167,incompat:[115,118,349],incomplet:[102,159,162,164],inconsist:[118,349],inconsistenttypehierarchyexcept:188,incorpor:[25,28,57,71,179,349],incorporatedf:[57,71],incorrect:[28,29,39,102,144,212,218,228,334,343],incorrectli:200,incr_download:111,increas:[46,48,102,117,129,132,135,149,159,164,176,195,201,221,320,327,332,333],incred:195,increment:[117,123,129,159,162,175,176,224,302,349],incremental_stat:224,incrementalbottomupchartpars:162,incrementalbottomupleftcornerchartpars:162,incrementalchart:162,incrementalchartpars:162,incrementalleftcornerchartpars:162,incrementaltopdownchartpars:162,indel:142,indent:[32,57,100,103,127,128,186,334,343],independ:[35,57,103,116,143,147,228,301,317,325,349],index:[14,30,33,42,47,48,49,50,51,52,57,64,68,69,71,79,87,102,111,115,116,117,118,129,132,134,143,146,159,160,161,162,164,167,168,169,175,176,178,179,183,184,188,198,211,213,219,226,227,228,276,293,310,319,322,323,327,331,332,334,343],index_count:187,index_timeout:111,indexerror:[118,334],india:73,indian:[57,98,349],indiancorpusread:[57,73],indiancorpusview:73,indic:[14,25,28,29,30,37,44,51,52,55,57,60,61,68,74,77,78,79,87,91,98,102,106,111,115,116,117,118,119,127,129,132,141,143,147,148,159,170,178,179,183,184,185,187,188,210,214,218,219,221,225,227,228,293,297,302,306,316,322,323,331,332,334,335,338,349],individu:[28,32,52,57,59,100,105,111,115,141,176,178,183,188,202,211,214,219,224,306,317,332,334,346,351],individualvariableexpress:[181,183,188],induc:[119,214,336],induce_pcfg:119,induct:349,industri:347,ineffici:211,inf:[123,343],infam:[318,321],infer:[0,57,71,177,181,185,188,349,351],inference_fixt:229,infil:339,infin:189,infinit:87,infix:[198,199],inflat:317,inflect:87,influenc:[132,139,333],info:[57,64,71,98,106,110,111,115,335],info_or_id:111,inform:[12,13,28,29,30,33,35,37,46,55,57,58,62,63,65,67,70,71,72,73,74,79,81,82,84,86,87,88,89,92,94,97,98,102,105,111,117,118,123,125,131,132,143,148,153,156,158,159,162,164,168,170,173,174,176,189,198,199,201,206,209,210,211,214,219,221,295,325,327,333,334,336,341,343,345,349],informatik:137,information_cont:105,informchk:123,ing:[57,105,204,212,219],inherit:[57,71,85,129,132,135,143,176],ini:[57,71],init_func:54,init_vocab:327,initi:[7,8,13,14,23,25,28,46,50,57,59,61,75,76,81,84,85,97,102,105,109,115,117,118,123,127,129,132,134,135,141,153,159,162,164,167,168,170,171,173,176,177,187,201,211,212,213,214,221,293,299,300,302,309,317,321,322,323,324,326,327,334,340,341,343],initial_column:111,initial_mean:[48,50],initial_tagg:[211,212],initial_valu:129,initialerror:212,initialis:[46,51,57,100,294],initialize_edge_scor:167,initialize_label:[184,187],inner:[57,76,167],innermostfunct:15,input:[16,23,25,27,28,29,30,34,35,38,39,41,44,57,58,59,65,70,73,74,75,84,90,92,102,105,109,110,115,119,122,123,124,126,127,128,129,132,139,158,160,161,166,167,169,172,173,174,178,179,183,188,189,191,194,195,198,199,200,208,212,215,218,219,220,221,297,298,301,304,310,311,313,314,318,341],input_feat:33,input_fil:173,input_file_path:309,input_str:313,input_word:195,inputfilenam:[166,304],inquisit:333,inrang:188,ins_cost:148,insensit:[33,57,71,200,276,293],insert:[52,57,74,115,117,118,145,148,159,164,168,178,300],insert_child:[116,117],insert_with_backpoint:159,insid:[57,71,109,160,168,172,180,251,297,305,336],insidechartpars:168,insight:160,inspect:[129,179,195],inspir:[189,311],inst_var:164,instal:[55,108,109,111,176,178,293,295,339,340,349],install3:348,instanc:[44,53,57,59,62,68,71,79,83,87,89,92,118,119,125,127,130,131,132,133,139,158,159,160,166,167,172,176,181,183,188,193,194,202,212,214,219,221,224,226,227,228,293,294,297,302,316,319,332,338,341],instance_hypernym:105,instance_hyponym:105,instant:6,instanti:[57,60,71,119,132,133,135,164,181,188,206,217,297,302,341],instantiate_edg:164,instantiatevarschart:164,instead:[25,33,43,52,57,59,60,62,64,74,85,109,129,132,133,135,141,159,161,176,203,211,219,221,293,301,303,306,310,317,318,321,326,328,332,334,335,349],institut:[94,107,201],instruct:[108,109,117,229,340,348,349],instrument:[57,71],insur:[317,318,321,328,330],insurg:343,intact:202,integ:[33,67,90,102,115,118,129,141,167,176,183,194,295,296,303,306,310,319,325,326,335,341,343],integr:[57,71,349],integratedpro:[57,88],intellig:[67,89,125,198],intend:[30,57,64,117,123,178,179,181,187,188,229,293,338],intens:[36,195],intent:20,intentionally_cr:[57,71],inter:[141,349],interact:[1,25,48,49,111,116,117,129,182,293],intercept:176,interest:[57,71,92,105,153,224,316,317,321,332,335,349],interfac:[10,13,25,26,30,31,33,34,39,46,47,57,69,93,94,103,105,106,111,121,122,129,132,133,139,151,156,158,159,160,164,166,167,168,172,176,178,188,196,197,209,210,211,215,216,220,227,228,273,293,296,306,308,309,315,333,338,347,349],interior:117,interject:[57,71],intern:[0,28,38,39,57,67,69,71,81,88,89,102,118,156,168,188,189,195,199,302,322,323,344],internal_punctu:302,international_regex:301,international_token:[301,349],internet:301,interpformat:124,interpol:[132,133,135,137,221,317],interpolatedlanguagemodel:[132,135],interpret:[33,119,177,178,183,346,348,349],interpret_multi:178,interpret_multi_s:178,interpret_s:[178,191],interrog:179,intersect:[117,147,226,228,320],interv:[141,145,226],interval_dist:145,interven:[189,334],intj:[57,71],intro:[57,71],introduc:[28,33,39,119,179,181,188,229,310,323,326,336,347],introduct:[107,145,347],intuit:332,invalid:[28,129,176],invari:214,inventor:[86,203],invers:[119,213],invert:[316,343],invert_dict:343,invert_graph:343,invest:312,investig:219,invoc:122,invok:[57,100,122,206,215,219,221,312],involv:[38,43,46,51,57,71,145,226,294],ioannidi:343,iob:[27,29,57,68],iob_sent:[57,68],iob_word:[57,68],ioerror:109,iona:145,iota:189,iowa:145,ipa:[299,307],ipi:[57,74],ipipan:57,ipipancorpusread:[57,74],ipipancorpusview:74,ipod:89,iquest:189,iraq:160,iraqi:160,iron:[57,71],irregular:[196,219],is_alpha:302,is_atom:[128,188],is_backward:13,is_binaris:119,is_cap_diff:195,is_chomsky_normal_form:119,is_cjk:313,is_complet:[14,159,162],is_defin:206,is_ellipsi:302,is_eventvar:188,is_flexible_chomsky_normal_form:119,is_forward:13,is_funct:13,is_funcvar:188,is_head_word:327,is_incomplet:[14,159,162],is_indvar:188,is_initi:302,is_instal:111,is_leftcorn:119,is_lex:119,is_nod:185,is_non_punct:302,is_nonempti:119,is_nonlex:119,is_noun:206,is_numb:302,is_primit:13,is_pronoun_funct:181,is_regex:129,is_rel:183,is_stal:111,is_tautolog:127,is_unseen:33,is_var:13,is_vari:13,is_verb:206,is_writ:129,isalnum:[57,104],isalpha:[57,104],isbndb:349,isin:189,isinst:[57,71],islow:[57,104],isn:[57,104,169,195,209,312],isnt:195,iso2022_jp:[57,101],iso639:83,iso639_typ:83,iso63_cod:[57,83],iso:[42,57,61,69,82,101,105,109,131,189,209,215],iso_to_crubadan:[57,69],isolated_plac:[57,71],isort:[0,57,209,223,315,349],isozaki:332,isri:[196,349],isristemm:201,issc:[57,104],isso:[57,104],issu:[57,71,251,274,283,301,336,343,349],issubset:316,issubsetof:127,issuperset:226,ist:[145,316,322,323,324,325,326],istanc:[57,62],istest:290,isti:93,isupp:[57,104],isvari:[181,188],italian:[57,104,206,304],italianstemm:206,item:[46,51,52,55,57,60,71,77,78,89,91,98,102,106,107,111,117,118,119,132,133,134,137,139,141,145,148,153,154,176,184,186,189,194,195,259,334,338,341,343],itemcget:115,itemconfig:115,itemconfigur:115,itemgett:[57,71],itemid:98,iter:[14,33,36,38,43,46,49,51,52,57,98,102,105,118,130,132,133,134,136,139,157,158,159,160,162,164,165,166,167,168,169,170,171,172,174,175,180,193,213,214,217,218,227,293,294,296,303,304,305,306,313,314,319,322,323,324,325,326,327,328,334,341,343,344],iterate_from:[52,102],iteredg:[159,162],itl:72,itman:145,its:[7,8,14,25,28,30,33,39,42,52,57,59,64,67,69,71,79,82,87,89,102,105,108,109,110,111,115,116,117,118,119,129,132,133,135,139,145,154,159,160,161,164,166,167,168,169,170,171,176,179,183,187,188,189,209,210,214,219,226,227,228,302,303,304,305,306,307,310,314,316,321,322,323,324,325,326,327,331,332,333,334,336,343,349],itself:[25,28,43,52,57,71,105,106,108,111,115,118,119,131,132,133,134,141,174,310,334,336],iuml:189,iwslt:320,ixi:53,ixii:53,ixxi:53,iy:66,j1:202,j82:179,j93:319,j:[42,57,104,115,123,125,148,159,162,164,195,201,202,214,316,319,322,323,324,325,326,327,331],j_peg:[326,327],ja:[322,323,324,325,326],jaccard:143,jaccard_dist:145,jacob:349,jai:162,jakobsen:349,jambon:322,jame:107,jan:[145,302],janel:205,janela:205,januari:349,japan:[81,88],japanes:[301,349],japanese_nihongo:[57,101],jar:[129,166,220,309],jaro:145,jaro_scor:145,jaro_sim:145,jaro_similar:145,jaro_winkler_sim:145,jaro_winkler_similar:145,jason:[321,349],java:[129,166],java_class:309,java_opt:[160,172,220,308,309],javascript:10,jbo:123,jcn_similar:[57,105],jd:98,je:322,jean:349,jeff:321,jeffrei:321,jeh1r:202,jenkin:349,jeraldin:145,jeremi:299,jespersen:[299,307],jesperson:307,jessica:160,jetai:198,jgaa:335,jh:66,jhu:211,ji1d:202,ji:[57,101],jiang:[57,105],jiann:94,jindal:[57,67],jing:317,jj:[25,28,39,57,61,144,160,163,172,173,174,209,212,217,219,220,294,347],jn1d:202,jo1d:202,joakim:173,joel:[349,351],johan:178,johann:302,john:[115,125,145,159,160,161,162,164,184,188,209,222,332,334],johnson:[145,321],join:[55,57,59,60,63,64,68,69,70,71,74,80,82,83,91,96,102,103,105,106,107,109,111,141,159,160,163,173,206,224,227,297,302,332,334,336,347],joinchar:[334,336],joint:[34,214],joint_feat:33,jon:[145,311],jonathan:[57,83],jone:145,jonsafari:311,jordan:349,josef:317,joseph:349,journal:[86,145,176,198,335,349],journei:24,jp:[57,75,101,332,349],jpeg:161,jrev1t:202,jscore:145,jsim2t:202,json2csv:339,json2csv_ent:339,json2csv_preprocess:194,json:[57,100,109,130,194,339,340,341],json_fil:194,json_tag:[211,217,219,226,227],jsondecod:130,jsonencod:130,jsontag:0,jsontaggeddecod:130,jsontaggedencod:130,ju1d:202,ju:307,judg:307,judgment:328,juf1:202,juli:[145,349],juliu:145,jump:[160,172,217],june:[195,349],junki:20,junsheng:[81,88],jurafski:176,juri:[55,57,219,346],jussi:349,just:[35,57,68,71,74,76,85,89,90,105,115,119,122,123,132,179,183,195,211,222,302,303,306,313,321,339,340,346],justif:[117,307],justifi:117,juvenil:[57,62],k1:228,k2:228,k:[38,46,48,50,52,57,60,66,111,118,141,148,159,164,167,198,199,201,212,214,228,259,310,317,343],kabushiki:301,kahn:299,kaiser:321,kamholz:[57,83],kappa:[141,189],kappa_pairwis:141,karg:108,katakana_hangul_halfwidth:313,kato:321,katsuhito:332,kaufmann:330,kazawa:321,kbeval:[57,71],kdd:[81,89],ke:307,kecl:332,keep:[52,105,117,118,132,139,145,167,169,195,306,312,326,333,336,343],keep_token_posit:304,keep_whitespac:309,keepend:109,keh:94,kei:[32,35,36,52,57,66,71,72,82,99,105,109,110,115,117,118,119,132,134,139,144,146,149,176,179,183,184,186,189,293,300,319,334,339,340,341,343,349],keith:[167,321],kendal:332,kendall_tau:332,kept:[151,194,228],kevin:332,kexw_5:349,keyboard:[7,8,115],keyerror:[52,115,117,118,183],keyword:[25,29,57,59,67,84,108,115,117,122,143,174,176,179,314,340,341],keywords_readm:[57,67],kharagpur:73,khoja:201,kid:36,kill:343,kilomet:202,kimmo:349,kind:[14,25,57,91,132,135,156,157,159,195,294],kinda:195,kindof:195,kipper:[57,103],kishor:317,kiss:[195,302],kivi:349,klau:321,klein:[0,316,322,323,324,325,326,336,347,349],klingner:321,kmean:46,kmeanscluster:[46,50],kn:[57,75],knbc:57,knbcorpusread:[57,75],knee:66,kneser:[132,135,137,176],kneser_ney_mini_exampl:137,knesernei:137,kneserneyinterpol:[132,135],kneserneyprobdist:176,know:[125,129,130,132,206,214,302],knowledg:[46,81,89,94,109,179],known:[29,30,31,33,36,39,57,59,90,102,105,111,119,132,156,157,197,214,302,336,343],known_len:52,koehn:[320,322,323,324,325,326,327,333],kondrak:142,konrad:117,korobov:[349,351],korpu:[57,74],krikun:321,krippendorf:349,krippendorff:[141,145,259,349],krippendorffs_alpha:259,kuang:94,kubler:173,kudo:321,kulyukin:148,kumaran:73,kurian:321,kw:[110,111,115,117,183],kwarg:[29,33,40,52,57,59,61,62,71,74,78,81,83,84,85,88,97,99,108,109,111,113,115,116,117,118,122,129,132,135,137,141,143,147,157,160,168,172,176,182,193,194,214,220,314,317,334,343],l162:301,l1:[213,221],l2:[213,221],l309:313,l3:221,l926:301,l:[10,33,35,57,61,66,107,117,141,145,148,185,212,213,228,294,306,319,322,323,324,325,326,327,334],la2:202,la:[163,201],label1:[35,145],label2:[35,145],label:[29,30,31,32,33,34,35,36,38,41,43,44,47,51,57,59,68,71,79,84,87,90,115,116,132,133,135,139,141,144,145,161,163,167,175,179,185,189,191,193,194,214,217,219,276,294,334,335,336,349],label_:115,label_config:115,label_foreground:115,label_freq:141,label_indiv:179,label_probdist:35,labeled_featureset:[32,35,38],labeled_sequ:214,labelled_sequ:214,lack:[52,118],lacnunga:107,lacura:145,lad:107,laddish:[57,62],lafferti:148,lagu:107,lai3:202,lakhdar:206,lambda:[25,57,75,105,117,181,185,188,189,293,294,333,334,343,349],lambda_abstract:184,lambda_list:188,lambdaexpress:[181,188],lambek:349,lancast:[196,349],lancasterstemm:202,lang22:107,lang:[42,57,69,104,105,189,209,307,309,341],lang_cod:[57,83],lang_dependent_regex:301,lang_dist:42,lang_freq:[57,69],lang_independent_sub:301,lang_var:302,languag:[0,42,46,57,69,73,82,86,94,104,105,133,135,137,138,139,142,167,176,179,185,200,203,206,209,214,293,295,299,301,302,304,307,309,311,316,320,322,323,324,325,326,327,331,332,333,334,336,341,343,345,349],language_cod:[57,83],language_model:333,language_prob:333,language_varieti:[57,82],languageindepend:319,languagemodel:[132,133,135],langvar_uid:83,lao:[57,101],laplac:[132,135,176],laplaceprobdist:176,laquo:189,larg:[14,25,46,102,109,176,214,224,302],larger:[29,39,102,167,175,176,214],largest:[57,103,136,154],larr:189,last:[25,52,102,115,118,145,213,226,227,228,305,314,326,334,335,338,341,349],last_dist:42,last_par:310,last_tok_seq:310,lat0117:[57,101],later:[28,46,49,90,131,211,310],latest:[178,341,349],latex:334,latin1:[57,58,101],latin2:[57,101],latin4:[57,101],latin5:[57,101],latin:[57,101,109,189,215,343],latter:[90,325,326],lattic:167,latvian:[57,104],lau3:202,launch:10,lavi:328,law:[42,107,343],lawrenc:214,layer:[57,71],layout:[57,98],lazi:[43,52,57,60,64,77,91,131,132,160,172,217],lazili:[43,52,71,102,108,136],lazyconcaten:[52,71],lazycorpusload:[57,59,75,108],lazyenumer:52,lazyimport:0,lazyiteratorlist:[52,71],lazyload:109,lazymap:[43,52,71,102,193],lazymodul:131,lazysubsequ:52,lazyzip:52,lba3:202,lbai4i:202,lbaifi6:202,lbfg:213,lbi3:202,lc1:202,lc:[57,82,105,310],lc_all:343,lcb:[298,312],lceil:189,lch_similar:[57,105],lcon:189,ldc:221,ldquo:189,le:[57,101,189,321],leacock:[57,105],lead:[102,167,171,219,317,347],leaf:[7,14,32,51,52,116,119,159,164,170,294,334,336],leaf_:116,leaf_freez:334,leaf_label:51,leaf_pattern:334,leaf_treeposit:334,leafcolor:335,leafedg:[14,159,168],leafinitrul:[159,162,164],leaflet:349,leak:115,learn:[38,40,46,47,49,51,148,193,211,212,213,214,223,224,302,327,343,348],learner:224,learning_curve_output:224,learning_curve_tak:224,learnt:211,least:[14,28,33,51,57,77,102,105,106,115,119,145,157,158,159,168,170,171,175,176,202,212,227,307,328,331],leav:[7,14,25,29,32,51,55,57,60,64,65,102,116,119,158,159,164,170,194,196,306,314,334,335],lec:145,led:[214,351],lee:[62,66,194,349],leechdom:107,left:[7,8,14,15,25,28,30,102,111,115,117,119,132,144,154,159,161,164,168,170,171,173,189,200,219,224,312,326,334,335,336,343],left_arc:173,left_children:161,left_context_tag_pattern:28,left_edg:[14,159,164,168],left_pad_symbol:[132,343],left_sibl:334,left_tag_pattern:28,leftarc:173,leftcorn:119,leftcorner_par:119,leftcornerchartpars:159,leftmost:[7,169],leftward:[25,117,169],legaci:191,legal:[185,299,343],legal_frequency_threshold:299,legalcod:[57,83],legality_principl:295,legalitysyllabletoken:299,legitim:343,legomena:176,lehrbuch:[299,307],leipzig:[299,307],lem:[298,312],lemburg:131,lemma:[57,71,75,77,79,87,91,103,105,146,208],lemma_count:[57,105],lemma_from_kei:[57,105],lemma_nam:105,lemma_para:[57,77],lemma_s:[57,77],lemma_word:[57,77],lemmaid:[57,71],lemmat:[37,208],len:[30,33,52,57,67,71,75,89,102,129,132,139,147,160,164,167,175,176,228,310,313,317,320,330,332,334,343],length:[14,25,33,48,49,50,51,52,57,90,102,105,115,119,136,144,145,147,148,159,162,167,168,174,176,189,201,204,214,217,219,226,293,297,299,310,313,317,319,322,323,324,325,326,330,331,333,334,341,343],leofric:107,leoni:200,leq:185,lesk:344,lesk_sens:344,less:[28,33,52,102,132,134,139,143,144,149,168,195,211,219,228,317],let:[7,8,25,36,42,129,130,132,214,341],letter:[107,143,145,153,155,183,200,202,206,349],lev:[184,187],level:[28,33,57,60,71,78,89,91,98,105,106,117,147,159,162,164,168,170,171,174,175,195,212,224,294,295,307,316,317,318,321,328,330,334,335,343,346,349],levenshtein:[145,349],lex_filenum:[57,105],lex_id:[57,105],lex_sens:[57,105],lex_str:16,lexem:[57,71],lexic:[57,61,71,83,93,103,104,105,114,119,310,322,323,324,325,326,327,347],lexicalis:119,lexicograph:[57,71,105],lexicon:[2,12,14,57,66,71,81,89,103,179,194,195,214,300,349],lexicon_fil:195,lexnam:105,lexname_index:105,lexpars:172,lexunit:[57,71],lfg:177,lfilter:310,lfloor:189,lgtm:349,lh:[14,119,159,162,164,168,175,185],li:[206,307,349],lib2l:202,lib:[57,104,109,111],libero:110,librari:[0,30,33,38,57,100,109,129,338,339,341,347,349],licens:[14,57,59,83,94,105,110,111,159,162,164,168,349],lidston:[132,135,176,214],lidstoneprobdist:176,life:160,lifetim:102,lifo:[52,118],light:[118,198,199,206,274],lign256:137,like:[14,23,28,31,32,43,46,52,57,61,71,74,80,84,102,105,106,109,110,115,118,119,130,131,132,153,157,158,159,161,166,168,170,171,175,176,179,183,188,194,219,224,302,307,312,322,325,326,327,333,334,339],like_neg:194,likelihood:[33,43,46,47,48,51,53,119,132,143,147,176,213,302,316,317,333],likelihood_ratio:143,likelihood_vectorspac:[48,51],liken:167,lile:351,lillian:62,limit:[24,25,57,100,105,147,166,194,213,221,224,301,333,336,338,340,341],limit_by_time_demo:340,lin:[57,105,317],lin_similar:[57,105],line:[10,16,18,28,29,32,44,57,58,61,65,66,68,71,73,79,81,85,86,87,89,92,94,97,100,102,103,104,109,116,117,127,128,144,159,162,164,174,176,194,198,205,213,226,227,228,293,303,306,311,312,314,317,334,335,339,340,341,343,348,349],line_color:116,line_length:152,line_separ:[57,104],line_token:306,linear:[38,166,176,187,221],linearlog:177,linearlogicapplicationexcept:187,linearlogicpars:187,linearsvc:38,linebreak:314,linebuff:109,linesearch:213,linestart:302,linetok:257,linetoken:[257,306],ling:334,lingfil:161,linguist:[25,28,67,88,94,117,141,142,148,152,167,176,200,302,322,323,324,325,326,327,347,349],link:[57,71,78,105,274,276,294,319,346,349],linkag:145,linspac:310,linthesauruscorpusread:[57,76],linux:347,linuxfest:349,lionel:22,lip:307,lippincott:349,lirg:332,lisp:[57,76],list2sym:189,list:[7,8,13,14,16,23,25,26,27,28,29,30,31,32,33,34,35,38,39,41,43,44,47,51,52,55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,87,88,89,90,91,92,93,95,96,97,98,100,101,102,103,104,105,106,107,109,110,111,114,115,116,117,118,119,122,123,124,125,126,127,128,129,130,132,133,134,141,142,144,145,146,147,148,149,153,154,155,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,183,184,186,187,188,189,191,193,194,195,209,210,211,212,213,214,215,217,218,219,220,221,222,224,225,226,227,228,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,309,310,311,312,313,316,317,318,319,321,322,323,324,325,326,327,328,330,331,332,333,334,335,339,340,341,343,345,346,349],list_of_list:52,list_of_refer:[317,321,330,332],listbox:115,listbox_:115,listbox_config:115,listbox_foreground:115,listofkeyword:[57,67],lite:[82,349],liter:129,literatur:33,lithuanian_lietuviskai:[57,101],littl:[57,61,71,90,173,195,224,300,332],liu:[57,67,81,88,89,194,321],liub:[67,81,88,89],lium:301,live:[107,341],lively_plac:[57,71],living_th:[57,105,343],ll1:202,ll:[52,63,158,298,312,348],lld0:349,lm:[0,33,241,293],lmu:200,ln:317,load:[52,55,57,59,71,77,105,108,109,111,123,131,141,158,159,161,174,179,189,217,293,302,343,349],load_ace_data:27,load_ace_fil:27,load_arg:174,load_arrai:141,load_cr:342,load_fol:123,load_pars:174,load_parser_opt:158,load_po:214,load_reranker_model:158,loader:[71,130],loc:[39,95,217,218],loc_:116,loc_attr:116,local:[46,50,57,85,101,102,109,111,131,189,326,327,334,338,343,346],local_test:229,localhost:[10,160],localtimezoneoffsetwithutc:338,locat:[7,29,57,58,59,64,68,75,77,79,85,87,97,100,101,107,108,109,116,117,118,129,168,170,174,188,189,217,220,293,325,331,339,346,349],location_of_sound_sourc:[57,71],lock:25,locura:145,log:[10,33,43,57,105,133,147,176,213,214,221,302,316,319,330,333,334,346],log_base2:138,log_likelihood:[43,147,176],log_prob:[214,316],logarithm:[33,132,138,143,176],logi:176,logic:[12,109,118,122,123,125,126,129,133,135,177,178,181,183,185,187,191,224,227,294,297],logic_pars:[109,118,188],logicalexpressionexcept:188,logicpars:[109,181,187,188],logist:38,logprob:[176,334],logscor:[132,133],logsumexp2:214,logx:176,london:218,longer:[57,102,105,117,151,168,215,321,331,333,335,349],longest:[57,105,119,299,327],longest_target_sentence_length:327,longestchartpars:168,longid:[57,103],look:[28,38,52,57,64,71,79,87,109,130,132,135,139,219,224,226,293,297,326,327,333],lookahead:188,lookback:188,lookup:[57,76,105,132,139,184,325,326,341],lookup_by_userid_demo:340,lookup_uniqu:184,lookuperror:[34,109,129],loop:[28,331],loos:179,loper:[0,347,349],lose:336,loss:[334,336],lost:36,lot:[52,57,71,300,302],love:[160,161,177,194],low:[211,224,349],lowast:189,lower:[36,46,49,57,105,132,135,160,176,212,217,293,317,326,327,328,333,341],lower_date_limit:[338,341],lowercas:[57,66,188,191,200,203,274,301,302],lowest:[35,105,168,299,333,334],lowest_common_hypernym:105,loz:189,lp:299,lrb:[160,172,298,312],lrec2014:[57,83],lrec:[57,83],lrec_skipgram:343,lrm:189,lsaquo:189,lsb:[298,312],lsquo:189,lst:[52,189,301],lstrip:311,lt:[189,313,349],ltext:33,lu256:[57,71],lu2:202,lu:[57,71],lu_bas:[57,71],lu_ids_and_nam:[57,71],lucid:349,lucorpu:[57,71],luf3:202,lufi4i:202,luid:[57,71],lukasz:321,luminos:320,lunam:[57,71],lunamepattern:[57,71],lv:[57,82,104],lwidth:14,lx:[57,99],ly:[212,219],lynch:160,lynx:349,m:[7,22,42,46,48,57,66,77,83,90,98,111,145,148,160,172,177,183,189,203,206,226,227,274,276,294,298,301,318,319,320,322,323,324,325,326,335,346,348],ma:[98,204],mac:[346,347],mac_morpho:[57,97],macduff:321,mace4:[123,124,349],mace:[121,123,126],macecommand:[124,126],macherei:321,machin:[38,40,46,57,104,148,193,214,315,316,317,318,321,322,323,324,325,326,327,330,332,333,344,346,348],macintosh:349,macintyr:298,macintyrecontract:298,macmorphi:349,macmorphocorpusread:[57,97],macr:189,macro:[276,294,317,318,321,332],made:[15,28,52,57,62,71,85,89,90,100,111,115,118,129,160,194,201,206,302,312,326,331],madnani:349,magahi:[57,101],mai:[13,15,18,25,28,31,34,38,44,46,50,52,53,57,71,79,81,87,88,89,97,102,105,106,109,111,115,117,118,119,122,129,143,157,158,159,168,176,188,200,206,214,219,221,224,226,227,228,293,295,302,305,317,324,327,334,340,343,349],mail:345,mailto:131,main:[7,8,57,71,84,89,115,117,129,132,162,177,179,201,330,339,349],main_field:339,mainentri:[57,71],mainli:[57,62,118,306,339],mainloop:[111,113,116,117,182,343],maintain:[7,8,13,52,102,109,115,118,126,159,168,171,203,221,299,304,334,351],maja:318,major:[46,299,307],majorli:195,make:[28,29,32,35,44,52,57,59,60,63,64,68,69,70,71,74,78,79,80,82,83,87,91,96,98,103,105,106,107,109,116,117,118,122,124,125,126,129,132,133,134,141,145,156,170,171,179,187,188,212,219,228,293,295,302,321,330,333,340,341,343,349],make_applicationexpress:[181,188],make_booleanexpress:[181,187,188],make_equalityexpress:[181,188],make_lambdaexpress:[181,184,188],make_leaf:116,make_lex:179,make_lex_dict:195,make_negatedexpress:[181,188],make_nod:116,make_nois:[57,71],make_quanifiedexpress:188,make_tre:160,make_valu:179,make_variableexpress:[181,184,187,188],makeel:129,mal:131,male:[36,98],mallet:349,malt:[156,161],malt_demo:161,malt_model:166,malt_pars:166,malt_regex_tagg:166,maltpars:166,maltxml:161,mammal:[57,105,343],man:[53,57,105,143,167,171,180,293,336],manag:[116,117,215,348,349],manchest:[67,88],maneuv:[57,88],mang:322,mangl:48,mani:[38,43,52,57,62,66,71,105,109,115,117,118,123,132,133,154,168,175,176,201,203,211,224,226,294,302,322,323,324,325,326,336,346,349],manipul:[99,314,349],manner:[46,159,178,341],manual:[7,8,44,57,64,71,102,107,158,219,228,276,294,313],map:[16,23,30,32,33,36,38,43,51,52,57,59,64,71,77,84,90,98,102,103,105,109,117,118,119,123,127,132,139,145,159,164,176,178,179,183,185,188,191,209,210,214,219,293,294,302,316,325,326,328],map_tag:216,mapping_msd_univers:77,mar:132,marathi:[57,73,101,349],marc:131,march:[72,349],marek:349,margaret:[107,349],margin:[117,119,143,144,195,322,323,334],marhta:145,mari:[57,71,107,125,160,161,188,222,335],mark:[29,66,77,102,117,118,129,130,132,139,148,194,221,293,300,302,305,310,326,334,343,349],mark_alls_fresh:128,mark_neg:194,mark_neqs_fresh:128,marker:[28,57,74,221,314,343],market:173,markonli:117,markov:[214,221,334,336],martha:145,martin:[176,203,206,274],martin_extens:203,martinez:145,martyrolog:107,marvel:107,masato:349,masculin:[198,199],masi:[141,145],masi_dist:145,mask:[132,133,135],mass:[176,204,228],massei:145,massi:145,master:[98,115,117,152,206,259,301,311,313],mat:[317,318,321],match:[7,16,18,23,25,28,32,57,59,60,61,63,64,68,69,70,71,74,77,78,80,82,83,84,91,96,102,103,105,106,107,111,118,119,127,129,141,145,170,171,188,204,219,276,293,294,303,305,313,317,321,328,331,334,343],match_phone_numb:297,matchbracket:16,materi:[23,25,28,37,303],mathemat:[322,323,324,325,326,327,349],matild:[57,71],matplotlib:[176,224],matric:[33,38,46,48],matrix:[38,46,48,144,213,214],matter:[259,322],matthew:[145,217],matur:293,max:[30,35,57,82,145,176,195,321],max_depth:105,max_dist:141,max_fn_exampl:29,max_fp_exampl:29,max_id:341,max_it:[33,193],max_iter:[213,214],max_l:145,max_len:[119,132,318,321,343],max_linesearch:213,max_model:124,max_phrase_length:331,max_rul:[212,224],max_sentence_s:39,max_tp_exampl:29,max_v:326,maxdepth:[57,105,343],maxent:[30,34,38,167],maxentclassifi:[33,34,167,193],maxentfeatureencodingi:[33,34],maxim:[33,165,202,299,321,327,331],maxima:327,maximis:[46,48,214],maximize_alignment_prob:323,maximize_distortion_prob:[324,325],maximize_fertility_prob:327,maximize_lexical_translation_prob:327,maximize_null_generation_prob:327,maximize_vacancy_prob:326,maximum:[25,28,29,33,39,46,48,50,57,86,105,109,124,129,132,155,165,167,168,174,176,202,213,214,221,224,293,317,318,321,326,327,333,334,335,341,343,349],maxlen:176,maxreprs:[57,71],maxwidth:335,mayb:[10,24,188],mcdonal:173,mcdonald:[167,216],mcintyr:312,mco:166,md5:111,md5_hexdigest:111,md:[98,111,160,163],mdash:189,me:[24,66,160,295,298,303,306,308,312,313],mean:[24,28,46,48,50,57,71,82,89,105,119,122,129,131,132,147,148,161,176,178,184,188,214,227,299,307,317,321,327,330,341],meaning:149,meaning_q:[57,82],meant:[202,327],measur:[28,29,42,50,53,143,145,147,163,189,193,194,200,211,321,349],meat:213,med:145,media:[0,195,339,347,349],medical_specialti:[57,71],medicina:107,medit:24,medium:137,meet:169,megabyt:129,megam:[30,33,43,349],melbourn:351,mellon:66,melt:194,melvil:293,melvin:321,member:[29,33,34,41,57,103,132,176,179,183,189],member_holonym:105,member_meronym:105,membership:[46,48,49,50,132,139],memoiz:110,memori:[38,43,52,102,115,132,189,213,221,224,228,302,316,343],men:[57,105],mention:[118,189],mentor:349,menu:[117,346],menubutton:117,mercer:[322,323,324,325,326,327],mere:15,merg:[25,28,46,49,51,117,154,178,300],merger:[57,71],mergerul:[25,28],messag:[10,109,111,126,129,151,188,213,341,349],mesur:148,met:[202,320],metadata:[57,74,98,179],meteor:[328,349],meteor_scor:315,method0:317,method1:317,method2:317,method3:317,method4:317,method5:317,method6:317,method7:317,method:[13,25,26,28,29,33,34,39,52,57,59,60,61,63,64,68,69,70,71,74,75,77,78,80,81,82,83,84,85,91,96,102,103,105,106,107,109,110,115,117,118,122,123,124,126,129,130,132,133,134,135,139,140,143,145,146,154,158,159,167,168,169,170,171,173,176,178,181,183,187,188,193,194,200,202,213,214,215,218,219,221,224,226,228,254,264,284,285,286,287,288,289,292,293,294,295,297,302,303,304,306,310,312,314,317,322,323,325,327,328,333,334,336,338,341,342,349],methodolog:[145,299],metric:[0,29,49,57,105,193,259,282,291,301,315,317,321,328,349,351],metz:107,mi:195,mi_lik:143,michael:[125,145,331,344],michel:[57,71,110,145],michelesimionato:110,micro:[189,317,332],microchip:86,microsoft:73,middl:[2,57,71,102,117,173,299],middot:189,midland:98,midwai:[159,171],might:[30,33,108,109,159,160,164,174,316,326,327,348],mightn:195,mightnt:195,mike:321,mikhail:[349,351],mikheev:302,mileston:349,militari:[317,318,321,328,330,332],miller:[57,103],million:[57,71,107],mimic:105,mimic_wrap:71,min:[42,145,204,212,330],min_acc:[212,224],min_colloc_freq:302,min_depth:105,min_freq:193,min_len:[119,318,321,343],min_ll:33,min_lldelta:33,min_prob:327,min_scor:[212,224],min_score_factor:326,min_siz:52,min_stem_length:219,mind:[24,118,336],mine:[67,81,88,89,93],minfreq:213,ming:94,minim:[25,118,145,153,176,302,310,330,331,335,338],minimalset:150,minimum:[52,62,105,145,153,193,204,213,224,302,318,321,341,343,349],minnesota:92,minor:[201,206,349],minq:[81,88,89],minu:[51,118,189],minut:[57,71,313,341],mirror:[57,64],miryanov:71,misalign:39,misc:[0,346,349],miscellan:[57,71],mismatch:28,miss:[28,29,52,57,71,141],misss:29,mistak:20,mit:[299,307],mitchel:[209,349],mitr:349,mix:[2,36,57,105,118,176,334],mixin:[57,59],mixtur:[46,48],mk_reldict:189,mlb:115,mlc:175,mle:[132,133,135,214],mleprobdist:176,mlu:[57,64],mm1:202,mm:[298,312],mn:[57,82],mobi:293,moby_dick:293,mod:[119,161],mod_address:161,mode:[10,25,109,111,166,203,216],mode_fil:213,model2count:323,model3count:324,model4_prob_t_a_given_:325,model4count:325,model5count:326,model:[32,33,34,41,46,48,57,67,86,109,110,111,121,122,123,124,133,137,138,139,145,148,158,166,169,172,173,176,177,178,179,183,184,185,191,195,213,214,215,217,219,220,221,284,285,286,287,288,289,293,295,302,304,311,317,322,323,324,325,326,327,333,346,348,349],model_build:124,model_dir:158,model_fil:213,model_filenam:[44,166,220],model_found:124,model_path:172,modelbuild:[122,124],modelbuildercommand:122,modelbuildercommanddecor:122,modelfil:173,moder:25,modern:94,modern_e_wordtyp:94,modesto:95,modif:[52,118,201,203,206,302,336,343],modifi:[25,28,52,57,102,103,105,108,115,117,118,119,122,129,159,164,169,176,179,194,203,214,293,294,302,307,317,326,334,336,343],modified_precis:317,modul:[0,1,12,18,25,30,46,55,57,112,121,140,150,156,177,192,196,209,223,229,241,242,281,295,315,337,349],mohammad:321,moin:349,molina:214,moment:[57,89,318],monei:[29,189,303,344],moni:293,monolingu:[57,104],monoton:[332,333],month:[57,64,349],more:[12,14,25,28,29,30,31,33,37,52,55,57,58,59,60,62,63,65,66,67,70,71,73,74,77,78,79,81,82,84,87,92,97,102,105,106,109,111,115,116,117,119,129,132,134,139,141,143,144,145,153,156,158,159,162,164,168,170,171,174,175,176,179,183,185,188,194,195,201,206,209,210,211,213,214,217,219,220,221,224,226,227,228,259,276,293,295,298,306,307,312,317,318,321,324,325,326,333,334,336,341,347,349],more_prompt:111,morefeatur:118,moreov:[57,71,132],moreovo:110,morethuent:213,morgan:330,morn:347,morphem:156,morphi:[57,105,208],morphlist:[57,75],morpholog:[13,57,74,105,156,157,196,197,204,219,334,349],morphological_substitut:[57,105],morphs2str:[57,75],mose:[57,104,301,313,349],mosesdecod:[301,313],most:[7,8,14,28,30,31,32,33,35,38,46,50,52,57,59,71,102,105,115,117,119,127,132,157,158,159,168,169,170,171,175,176,193,194,195,200,209,210,211,212,214,219,220,226,227,228,293,305,312,324,326,327,334,336,341,349],most_general_unif:127,most_informative_featur:[33,35],mostli:[101,214,224,302],mot:[57,64,316,322,323,324,325,326],mother:[57,64,71],motion:[57,71,153],motiv:[109,211,325],mots_index:316,mouth:195,move:[8,98,109,115,117,171,310,327,349],move_dot_forward:[159,164],movement:117,moveto:117,movi:[62,194,349],movie_neg:194,mp:166,mpalmer:[57,103],mpdbvq:166,mr:[200,302],mrf:317,mrg:347,ms:98,msd:[57,77],msd_to_univers:77,msg:188,msi3:202,msnm:20,mst:[105,167,343],mt:[318,320,328,330,349],mte:57,mtecorpusread:[57,77,349],mtecorpusview:77,mtefileread:77,mtetagconvert:77,mteval:[301,317,330],mtevalbleu:301,mu:[189,202],much:[14,24,46,57,67,71,84,105,111,132,159,224,297,302,336],muffin:[160,295,301,303,306,308,312,313],mui3:202,mult:115,multex:77,multext:[57,77,349],multi:[31,46,57,68,71,115,117,141,144,211,300,332,334,346,349],multi_comma:311,multi_dash:311,multi_dot:311,multi_kappa:141,multiclassifieri:31,multidimension:167,multidisciplinari:349,multilin:[29,57,58,62,65,67,70,85,88,89,97,302,303],multilingu:[57,105,302],multilistbox:115,multinomialnb:38,multiparentedtre:334,multipl:[8,28,29,39,57,68,69,71,102,105,118,124,126,141,145,156,160,166,171,172,193,221,276,304,308,324,328,332,333,334,341],multipli:[123,143,202,317],multiword:[57,71],mung:298,munn:349,murthi:[67,88],must:[15,28,31,33,34,38,46,51,57,71,109,111,113,115,116,117,118,119,122,127,129,141,145,157,158,159,166,167,169,172,176,182,187,188,202,210,214,215,219,220,221,226,227,293,294,296,299,302,303,313,322,323,324,325,326,333,335,336,340,343],mustard:195,mustn:195,mustnt:195,mutabl:[52,118,176],mutableoptionmenu:117,mutableprobdist:176,mutual:[86,143],mwa:[57,104,349],mwa_ppdb_xxxl_fil:[57,104],mwappdbcorpusread:[57,104],mwe:295,mwetoken:300,mx1000m:[220,308],mx2g:309,mx4g:172,mxpost:312,my:[24,36,109,159,160,162,164,166,172,311,312,346],mytext:293,n:[32,33,35,42,43,46,49,51,57,58,60,65,66,69,70,71,77,82,86,87,89,90,91,93,97,104,105,111,117,118,119,122,129,132,134,141,142,143,152,160,161,162,165,167,168,169,171,175,176,183,185,194,208,209,211,212,219,221,227,228,253,293,294,295,298,302,303,306,311,312,313,317,318,321,324,330,334,335,336,343,344,347],n_all:[128,143,321],n_all_output:321,n_all_target:321,n_and:128,n_app:128,n_atom:128,n_eq:128,n_exist:128,n_iff:128,n_ii:143,n_iii:143,n_iiii:143,n_iiix:143,n_iix:143,n_iixi:143,n_iixx:143,n_imp:128,n_instanc:194,n_io:143,n_ix:143,n_ix_xi_tupl:143,n_ixi:143,n_ixii:143,n_ixix:143,n_ixx:143,n_ixxi:143,n_ixxx:143,n_match:321,n_oi:143,n_oo:143,n_or:128,n_prop:128,n_review:[57,89],n_scalar:195,n_xi:143,n_xii:143,n_xiii:143,n_xiix:143,n_xix:143,n_xixi:143,n_xixx:143,n_xx:143,n_xxi:143,n_xxii:143,n_xxix:143,n_xxx:143,n_xxxi:143,n_xxxx:143,na2:202,na:[298,312],naacl:[299,307,317],nabla:189,nai3:202,naiv:[35,36,38,141,194],naive_bay:38,naivebay:[30,36,44,219,349],naivebayesclassifi:[35,36,167,193,219],naivebayesclassifiertest:266,naivebayesdependencyscor:167,name:[25,27,28,29,30,32,33,36,37,39,43,47,55,57,59,60,61,63,64,68,69,70,71,72,74,79,80,82,83,84,87,91,96,98,103,105,106,107,108,109,110,111,115,117,118,119,122,123,125,129,131,141,145,159,161,164,166,173,176,178,179,181,184,187,188,189,191,206,210,211,220,230,257,276,293,294,295,302,312,314,339,340,341,347,349],name_or_path:118,name_pattern:129,named_ent:25,namedtemporaryfil:173,names_demo:[41,43],names_demo_featur:43,namespac:[77,78,131],narad:349,nasiriya:160,natcorp:60,nathan:[57,61,349],nation:[57,60,67,78,89,209,349],natur:[0,46,66,94,179,185,221,336,345,349],natural_language_processing_with_python:349,navaho:[57,101],navaho_din:[57,101],navajo:[57,101],nb:[38,145,209,213,347,349],nbp:[57,104],nbsp:189,nc:[94,176,216],nck:[141,343],ncollect:111,nd:200,ndash:189,ndirectori:111,ndownload:111,ndp:167,ne2:202,ne:[37,57,68,91,117,189],ne_chunk:[25,189,347],ne_chunk_s:25,ne_label:189,nearbi:211,nearest:115,necessari:[13,33,57,78,79,87,111,117,132,167,169,188,294,295,299,307,312,317,346,348],necessarili:[57,105,219,326,334],nechunkpars:27,nechunkparsertagg:27,nee0:202,need:[14,28,30,33,39,43,46,52,57,64,71,78,89,102,105,108,109,111,115,117,119,132,134,141,145,159,167,176,179,187,203,213,214,215,220,226,294,295,302,312,313,319,332,334,336,339,346,348,349],needn:195,neednt:195,neg:[29,33,52,57,62,81,89,93,109,115,117,129,160,167,194,195,219,302,325,326,333],neg_scor:[57,93],negat:[117,122,188,194,195],negatedexpress:[181,185,188],negr:205,negro:205,negscor:93,nei:[132,135,137,176],neighbor:[57,71,320,326,327],neighborhood:228,neither:[57,64,119,188,195],nemo:[1,6],nemo_app:1,neot:107,nep:334,neq:188,neq_list:188,ner:[39,160,218,349],nertagg:218,nervou:293,ness:[202,212,219],nest:[28,32,77,78,106,118,183,202,305,314,334,343],net:[53,176,212,349],netherland:[57,71,351],network:340,networkx:161,neural:[132,172,321,349],neurophysiolog:311,neutral:194,nevada:201,never:[35,43,102,117,159,195,203,334,335,343],new_assumpt:122,new_edg:159,new_end:[159,164],new_id:13,new_index:167,new_nod:167,new_p:175,new_prop:125,new_sig:125,new_token_pad:119,new_tre:175,new_var:118,new_weight:33,new_wrapp:110,newchild:[116,117],newclassnam:129,newcom:349,newli:[115,187],newlin:[109,174,215,302,303,306,314],newswir:72,newton:33,newvar:[181,188],next:[7,8,16,50,102,109,111,117,159,160,162,170,179,182,187,188,314,319,332,346],next_src_phrase_span:333,next_with_bind:164,nextcategori:16,nextsym:[14,159,162],nf:33,nfarrai:33,nfmap:33,nftranspos:33,ng:[66,332],ngram1:[57,76],ngram2:[57,76],ngram:[53,57,76,132,133,134,135,136,137,143,193,317,318,330,332,343,349],ngram_count:[132,134],ngram_text:[132,134],ngramassocmeasur:[143,193],ngramcount:[132,133,134,135,137],ngrammodelvocabularytest:246,ngrams_fn:[132,133,135],ngramtagg:219,nhaa:334,ni:189,nice:[14,110,132,227],nicer:129,nichleson:145,nichulson:145,nicodemu:107,niemand:333,nik:141,nil:125,nilsson:125,nin:[111,160,295,303,306,308,312,313],nine:349,nineteeth:89,nishant:321,nist:[72,98,291,295,317,330,349],nist_length_penalti:330,nist_scor:315,nisttoken:301,nitin:[67,349],nivr:[161,173],njindal:67,nk:141,nkeyboard:111,nkjp:[57,349],nkjpcorpus_header_view:78,nkjpcorpus_morph_view:78,nkjpcorpus_segmentation_view:78,nkjpcorpus_text_view:78,nkjpcorpusread:[57,78],nl:[42,57,104],nlg:349,nlp:[38,53,90,137,160,172,199,214,220,347,349],nltk2:105,nltk3:[105,349],nltk:349,nltk_contrib:46,nltk_data:[55,57,64,78,85,101,109,111,328,346],nltk_data_dir:64,nltk_data_subdir:108,nltk_extens:203,nltk_org:340,nltkdemo18:211,nltkdemo18plu:211,nltkdestructivewordtoken:298,nltkdrtboxerdrsinterpret:178,nltkwordtoken:298,nmod:[160,163,172],nmodel:111,nmtpy:301,nn1:[57,71,202],nn:[25,28,39,57,61,144,160,163,172,173,174,209,212,215,216,217,218,219,220,222,227,294,347],nnp:[39,160,163,209,222,334,347],noam:152,nobodi:333,node:[7,28,29,32,51,57,80,84,91,105,106,107,116,119,159,160,161,164,166,167,168,169,170,175,180,184,185,191,276,294,334,335,336,343],node_:116,node_address:161,node_attr:116,node_index:[161,167],node_pattern:334,nodecolor:335,nodecoord:335,nodedist:335,nodelist:161,nodesep:334,noi3:202,nois4j:202,noise_mak:[57,71],noix4ct:202,nombank:[57,267],nombankchaintreepoint:79,nombankcorpusread:[57,79],nombankdemo:267,nombankinst:[57,79],nombankpoint:79,nombanksplittreepoint:79,nombanktreepoint:79,nomfil:[57,79],nomin:44,non:[25,26,28,33,48,49,53,57,58,59,60,61,63,64,65,67,68,69,70,71,73,74,77,78,80,81,82,83,84,85,91,92,96,97,102,103,105,106,107,108,116,117,118,119,129,131,160,161,167,168,174,177,182,183,188,214,226,228,293,301,302,303,305,312,314,325,326,328,334,335,336],non_break:311,non_head_distortion_t:[325,326],non_head_vacancy_t:326,nonascii:301,nonbreak:[57,104],nonbreaking_prefix:[57,104],nonbreakingprefixescorpusread:[57,104],noncommerci:94,none:[13,14,16,23,28,29,32,33,34,35,37,41,43,44,48,49,50,51,52,57,58,59,60,61,62,63,64,65,67,68,69,70,71,72,73,74,76,77,78,79,80,81,82,83,84,85,87,88,89,90,91,92,93,95,96,97,98,99,100,102,103,104,105,106,107,109,111,113,115,116,117,118,119,120,122,123,124,126,127,128,129,131,132,133,134,135,139,141,144,147,148,153,157,158,159,160,161,162,164,165,166,167,168,170,171,172,174,175,176,178,180,181,182,183,184,185,186,187,188,189,190,193,194,195,202,209,211,212,213,214,215,217,219,220,221,222,224,226,227,228,293,294,300,302,303,308,309,310,314,316,317,322,323,324,325,326,327,329,334,335,336,338,341,342,343,344,346],nonequ:118,nonexecut:163,nonlex:[119,209],nonmonoton:121,nonmonotonic_fixt:229,nonneg:[176,343],nonproject:167,nonprojective_conll_parse_demo:167,nonprojectivedependencypars:156,nonterm_pars:119,nontermin:[14,28,113,119,159,164,165,168],nonzero:[33,129,176],nope:195,nor:[188,195],norm:[90,198,199,201,336],norm_cdf:319,norm_logsf:319,normal0117:[57,101],normal:[28,35,57,59,60,61,63,64,68,69,70,71,74,80,82,83,90,91,96,97,103,105,106,107,119,176,188,190,195,198,199,201,217,219,293,310,317,319,321,332,334,335,336,343],normalis:[46,48,49,50,51,214],norouzi:321,north:98,northern:98,northumbra:107,northwest:349,norwegian:206,norwegianstemm:206,not_found:227,not_instal:111,not_list:188,notabl:141,notat:[94,132,134,141,183,322,323,324,325,326,334],note1:212,note:[8,15,25,28,33,39,44,46,53,57,59,71,77,78,81,89,96,98,102,105,106,109,115,116,117,118,119,132,133,134,135,139,141,142,144,152,156,159,163,164,171,173,176,193,203,206,209,211,218,219,221,224,226,228,302,303,307,312,313,321,322,326,330,332,334,341],notequ:117,noth:[25,28,57,103,117,122,170,171,195,227],nothman:[349,351],notic:[57,71,132],notimplementederror:[109,129],notin:189,notion:179,notsubset:117,noun1:[57,86],noun2:[57,86],noun:[25,26,28,57,71,77,79,86,87,105,153,208,209,212,213,216,219,325],nounsfil:[57,79],nov:[163,212],novel:198,novemb:349,now:[57,105,132,228,321,349],nowher:[57,71,195],np:[25,28,29,39,57,61,86,119,160,171,172,209,218,219,294,334,335,349],npp:167,nps_chat:57,npschatcorpusread:[57,80],nr:[176,219],nr_iter:217,ns:77,nschneid:[57,61],nsp:53,nsub:189,nsubj:[160,172],nth:317,nthank:[160,295,303,306,308,312,313],nthat:111,nthe:111,nti:[57,71],ntild:189,ntop:161,ntt:332,ntu:[57,105],ntwo:[160,295,303,306,308,312],nu:189,nullableinttoken:178,nullifi:195,num:[57,60,71,77,91,95,123,124,126,179,183,201,212,216,293],num_clust:[47,48,49,50],num_edg:[14,159,162,164,168],num_increasing_pair:332,num_leav:159,num_mean:50,num_memori:213,num_possible_pair:332,num_sent:[214,224],num_word:133,number:[14,23,25,28,29,30,31,33,38,43,46,47,48,49,50,51,52,53,57,64,71,79,83,87,90,98,102,104,105,109,111,115,117,118,122,123,124,126,129,132,134,135,139,141,143,144,145,147,153,155,159,160,161,162,164,165,167,168,169,170,171,173,174,175,176,179,183,189,193,194,211,212,213,214,216,217,219,221,224,293,297,302,310,311,316,317,320,321,322,323,324,325,326,327,328,331,332,333,334,335,336,338,341,343,349],number_possible_pair:332,number_regex:301,numedg:14,numer:[29,33,35,38,44,137,141,143,295,317],nummod:160,numpars:[159,162],numpi:[34,41,46,48,310,348,349],nw:[117,318],nwhen:111,nwt:72,nx:161,nx_graph:161,ny:[160,218,220],nyt_19980315:72,nyt_19980403:72,nyt_19980407:72,o12:107,o1:107,o23:107,o24:107,o2:107,o34:107,o3:107,o4:107,o:[0,39,87,130,160,184,187,212,214,218,220,224,274,301,347,349],o_k:214,o_t:214,oacut:189,oana:[57,77],oat:66,oauth:[341,342],oauth_token:[341,342],oauth_token_secret:[341,342],oauth_vers:341,obei:[317,318,321,328,330,332],obj:[57,62,130,163,173,179,189,211,217,219,226,227],obj_scor:[57,93],objclass:189,object:[13,15,16,23,26,28,29,31,33,35,37,38,40,42,43,44,46,47,51,52,57,58,59,67,68,70,71,72,77,78,79,82,85,86,87,89,90,92,93,96,97,98,100,102,105,106,108,109,110,111,113,115,116,117,118,119,122,123,125,126,127,128,129,130,131,132,133,134,135,137,139,141,143,144,146,153,157,158,159,160,161,162,163,164,166,167,168,169,173,174,176,178,179,180,181,182,183,184,185,187,188,189,193,194,195,197,208,210,212,213,214,215,217,226,227,228,257,293,297,298,302,310,313,314,316,317,319,321,322,323,324,325,326,327,328,333,334,335,338,339,340,341,342,343,344,349],objsym:189,objtext:189,observ:[81,88,141,176,214,325],obtain:[46,52,57,60,71,78,98,105,118,127,188,194,304,322,323,325,326,327,334,340],occasion:195,occupi:326,occur:[30,33,37,46,50,57,105,117,129,132,139,144,153,159,176,189,199,219,222,293,302,305,307,322,334,344],occur_index:178,occurr:[13,28,52,53,118,119,143,178,181,188,189,193,222,293,310,313,330,334],ocean:179,och:317,ocirc:189,oct:321,octob:349,odd:[66,310],ody1ysutd7o:137,oehrl:[299,307],oelig:189,oepen:304,oeuvr:300,of2ss:[57,105],off:[36,57,62,74,88,144,176,194,203,209,302,312,313,340,349],offer:341,offici:[200,330,332,343],offset:[52,57,98,102,105,109,293,296,303,306,312,313,319,338],often:[30,46,50,53,117,119,132,141,159,161,171,176,211,214,224,293,302],ograv:189,oh:311,ohio:311,oi:146,oil:90,ok:126,oken:[159,168],old:[57,107,128,163,207,343,349],old_p:175,oldchild:[116,117],oldclassnam:129,older:[105,109,309,348],oldi:179,olin:189,omega:189,omicron:189,omit:[97,98,161,179,334,341],omw:[57,105],omw_read:[57,105],on_error:341,on_finish:[338,341],on_success:341,onc:[28,33,39,102,105,108,109,118,132,159,176,194,333,334],one:[8,14,25,28,29,30,31,33,34,35,36,46,49,50,52,53,55,57,59,62,63,64,65,71,73,77,78,79,80,84,87,97,98,102,104,105,106,111,115,117,118,119,122,123,126,129,132,133,135,139,141,145,149,153,154,156,157,159,160,162,164,166,168,169,171,173,174,175,176,179,183,184,186,188,189,193,202,203,209,211,214,219,224,226,227,228,293,300,302,307,311,317,322,323,324,325,326,327,331,332,333,334,336,339,341,346,349],one_spac:311,one_tag:[57,74],ones:[25,36,98,168,211,324],ongo:[123,343],onkrippendorffalpha:259,onli:[8,25,28,33,36,39,46,49,52,53,57,62,71,80,84,89,96,98,100,102,103,105,106,109,111,116,117,118,119,123,132,134,139,144,149,159,162,167,169,170,171,173,174,176,178,183,193,194,196,200,203,212,214,219,224,226,228,259,294,299,302,303,306,311,312,317,318,321,324,326,327,331,334,335,340,341,343],onlin:[0,151,199,347,349],onset:299,onto:294,onward:302,oov:[91,132,133,135],open:[0,10,57,59,64,102,105,109,111,116,117,122,129,141,183,187,188,205,215,293,305,307,314,334,346,347],open_bracket:181,open_punct:311,open_punct_r:311,open_punctu:[57,104],open_str:314,openondemandzipfil:109,oper:[7,8,13,16,28,29,39,46,109,117,118,119,145,148,159,164,167,169,170,171,173,176,178,180,181,185,187,188,276,294,295,312,349],opinion:[57,67,81,88,89,93,194],opinion_lexicon:57,opinionlexiconcorpusread:[57,81],oplu:189,opportun:349,oppos:188,opposit:[119,122,149],opt:109,optim:[34,119,142,159,164,213,214,332],optimis:214,option:[10,33,34,39,44,51,57,65,67,71,74,77,89,105,117,129,131,132,133,134,135,139,145,158,160,166,168,173,174,176,179,183,193,203,208,213,214,215,218,219,220,224,226,293,297,304,306,308,309,317,318,322,323,324,325,326,327,334,335,341,343,346,348,349],or_list:188,orang:317,order:[14,28,29,30,33,34,35,46,49,51,52,53,57,61,71,81,82,84,90,102,105,109,111,115,117,118,119,123,127,128,129,132,133,134,135,136,137,139,141,142,143,144,145,147,151,154,159,167,168,175,176,177,178,179,183,184,185,188,189,191,198,201,211,214,219,221,224,293,297,314,316,317,318,321,322,323,325,327,330,332,333,334,335,336,341,343,349],ordereddict:52,ordf:189,ordin:302,ordinari:37,ordm:189,oreilli:349,orexpress:[181,185,188],org:[0,22,38,55,57,64,77,83,92,94,104,109,130,145,166,179,189,203,206,213,216,218,251,254,274,282,304,313,317,318,319,321,330,332,336,338,339,341,343,344,346,347,348,349],organ:[29,57,92,105,117,156,160,168,189,220,343],orient:[98,116],origin:[57,64,90,98,104,105,110,117,119,146,161,162,167,194,199,200,201,203,206,207,211,227,282,297,298,301,302,310,312,313,317,318,320,325,332,334,335,336,344,347,349],original_algorithm:203,original_best_arc:167,original_length:310,original_tag:227,original_text:117,oriol:321,ormiston:349,orn:98,orosiu:107,orth:335,ortho_context:302,ortho_thresh:302,orthograph:302,os:[57,100,141,173,205,347],osborn:320,oserror:[129,160],oslash:189,ostrich:125,ota:[57,60,107],other:[7,10,13,15,33,36,51,53,57,61,67,71,75,76,79,87,91,98,102,103,105,108,111,118,122,127,129,132,139,145,156,157,158,159,161,164,167,169,176,179,181,183,184,187,188,198,200,203,212,214,216,217,219,226,227,228,253,293,294,295,299,301,302,303,305,307,311,317,320,322,323,324,325,326,327,334,339,349],other_indic:187,other_typ:188,otherwis:[7,33,34,39,52,57,60,64,76,102,109,111,115,116,117,118,122,129,132,134,139,161,170,171,176,182,187,188,214,226,227,302,314,341,346],othr:141,otild:189,otim:189,otto:[299,307],ouamour:198,ought:66,oughtn:195,oughtnt:195,ouml:189,ounc:160,our:[132,193,230,304,321,349],out:[42,51,52,57,61,102,106,107,109,111,118,123,132,133,145,154,160,176,179,180,185,189,200,206,228,276,294,303,306,310,316,326,327,334,340],outcom:176,outdat:334,outer:167,outerspac:181,outfil:[44,194,339],outlin:[116,117,167],output:[28,29,33,34,41,44,46,50,57,71,105,118,124,129,132,142,145,159,160,162,163,164,166,168,170,171,174,175,178,179,189,193,194,212,213,214,221,224,226,228,274,283,294,298,302,304,310,313,318,320,321,325,326,327,330,331,332,333,335,339,340,341,343],output_fil:102,output_format:126,output_markdown:194,outputfilenam:166,outsid:[327,331,336],oval:117,ovalwidget:117,over:[2,25,28,31,33,35,36,38,39,46,47,52,57,58,59,60,63,64,65,68,69,70,71,73,74,80,81,82,83,84,91,92,96,97,102,103,105,106,107,110,116,132,134,136,139,141,147,148,159,160,162,164,169,172,179,188,198,213,214,217,218,221,293,295,318,322,323,328,331,347],overal:[2,29,317],overflow:176,overhead:43,overlap:[25,26,28,37,118,226,317,318,330,335,344],overrid:[28,57,59,85,111,122,129,131,176,302,327,334],overridden:[57,60,61,78,91,106,111,117,129,178,181,187,188,219,302],overstem:146,overt:[57,71],overtli:[57,71],overview:[339,340],overweigh:145,ow:[66,202],own:[35,57,71,90,100,108,109,117,118,228,305,334],ox2:107,ox:[57,60],oxford:107,oy:66,p02:[282,317,318],p03:336,p0:[324,325,326],p103:176,p108:[184,187],p12:304,p1:[324,325,326,332],p5:[57,77],p:[7,8,10,35,36,57,66,77,87,89,105,107,109,118,119,125,127,132,139,142,145,147,163,168,175,176,179,180,188,194,214,216,221,227,297,299,304,320,327,334],p_factor:145,p_n:317,pabo:62,pack:[115,117],packag:[34,38,44,53,105,106,108,109,124,126,161,174,302,334,343,346,348,349,351],package_id:111,packt:349,pad:[119,132,133,135,136,227,312,343],pad_both_end:[132,136],pad_fn:[132,133,135],pad_left:[132,343],pad_right:[132,343],pad_sequ:[132,343],padded_bigram:132,padded_everygram:136,padded_everygram_pipelin:[132,136],page:[111,137,173,203,304,346,349],pagin:338,pai:[90,205],paic:[140,198,202],pair:[23,32,33,34,35,41,46,49,52,57,71,74,90,104,105,117,118,119,123,125,141,145,148,158,159,171,176,177,179,183,189,191,219,227,228,302,316,317,319,322,323,326,327,328,331,332,343],pairwis:343,paisagem:205,pajama:166,pale:293,palmer:[57,105],pan:[57,74],pang:[62,194,349],panlex:[57,82,83,349],panlex_lit:57,panlex_swadesh:57,panlex_uid:83,panlexlanguag:83,panlexlitecorpusread:[57,82],panlexswadeshcorpusread:[57,83],panlingu:[57,83],paper:[62,67,81,86,88,89,141,145,152,167,200,203,211,282,317,318,332,349],papineni:317,par_count:310,para:[55,57,59,63,65,74,77,84,85,97,107,189],para_block_read:[57,65,70,85,97,101],para_path:77,paradigm:349,paragno:[57,71],paragraph:[57,62,63,65,67,71,77,85,88,89,97,101,301,310,319],parallel:[57,68,122,322,323,324,325,326,327],parallel_corpu:[322,323,324,325,326],parallelize_preprocess:343,parallelproverbuild:122,parallelproverbuildercommand:122,param:[33,51,57,71,98,105,119,122,123,127,129,132,133,135,146,148,158,161,166,167,173,178,179,181,183,184,188,189,193,213,217,221,224,226,227,293,302,317,319,341],paramater:35,paramet:[13,14,23,26,28,29,30,32,33,34,35,36,37,38,41,43,44,46,47,48,49,50,51,52,57,58,59,60,61,62,63,64,65,67,68,69,70,71,73,74,76,77,78,79,80,81,82,83,84,85,87,88,89,90,91,92,95,96,97,98,100,101,102,103,105,106,107,108,109,114,115,116,117,118,119,122,123,124,125,126,127,128,129,132,133,134,135,136,137,139,142,144,145,146,147,148,153,155,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,183,184,187,188,189,191,193,194,195,197,198,199,200,203,204,205,206,207,208,209,210,211,212,213,214,215,217,218,219,221,222,224,225,226,227,228,293,294,295,297,299,300,302,303,304,305,306,307,310,312,313,314,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,338,339,341,342,343,344],parameter:[33,176,228],paramfil:41,paraphras:[57,104],paras_mod:74,parastart:302,paren:[28,187,188,305,334],parens_bracket:[298,312],parent:[57,60,71,78,91,106,113,115,116,117,119,129,169,176,276,294,334,335,336],parent_index:334,parent_indic:334,parentchar:[334,336],parentedtre:[294,334],parenthas:117,parenthasi:117,parenthes:[23,25,28,118,293,303,305,312],parenthesi:[25,57,61,302,305],parenwidget:117,paresd:118,pari:179,pars:[0,2,7,8,14,16,25,26,27,28,29,57,59,61,65,68,75,77,79,87,94,97,105,107,109,118,119,123,126,178,179,180,183,184,185,188,191,193,194,276,294,304,314,319,334,336,343,347,349,351],parse_al:157,parse_condit:178,parse_corpu:[57,79,87],parse_dog:160,parse_dr:178,parse_fileid_xform:[57,79,87],parse_fox:160,parse_friend:160,parse_index:178,parse_john:160,parse_mari:160,parse_megam_weight:34,parse_on:[157,166],parse_output:220,parse_repp_output:304,parse_s:[157,160,166,172,191],parse_tadm_weight:41,parse_tagged_s:166,parse_text:160,parse_to_compil:184,parse_to_mean:184,parse_to_read:123,parse_token_stream:319,parse_tweets_set:194,parse_vari:178,parse_weka_distribut:44,parse_weka_output:44,parse_with_bindop:180,parse_wolf:160,parseappl:16,parsed_doc:[57,72],parsed_docu:[57,68],parsed_para:[55,57,61],parsed_s:[55,57,59,61,68,70,107,163,347],parsed_word:[57,61],parseprimitivecategori:16,parser:[1,2,3,7,8,14,16,25,26,27,28,29,109,118,119,123,156,157,158,159,160,162,163,164,166,167,168,169,170,171,172,173,174,175,178,181,187,188,214,304,349],parser_annot:160,parser_arg:[159,162,164],parser_dirnam:166,parser_eag:173,parser_model:158,parser_opt:158,parser_std:173,parseri:[14,26,156,157,158,159,160,166,168,170,171,172,173,175],parserul:202,parsesubscript:16,parsimoni:195,part:[1,28,55,57,64,68,71,74,77,79,87,91,97,103,105,115,117,132,133,139,153,159,162,169,179,189,196,206,208,209,210,214,217,219,221,293,294,302,310,325,333,334,336,344,349],part_holonym:105,part_meronym:105,parti:[317,318,321,328,330,332,341,349],partial:[2,14,36,57,71,102,111,118,145,159,164,168,170,173,214,305,333],partial_names_demo:43,particip:[57,64,71,103],participant_property_kei:[57,64],participl:87,particl:216,particular:[14,25,28,33,43,52,57,71,78,90,102,117,118,119,143,147,159,164,167,168,169,170,175,176,179,189,219,228,294,295,305,306,323,324,325,326,334,338,349],particularli:[14,52,57,71,195],partli:195,pass:[14,34,43,57,61,62,64,71,85,88,97,105,110,116,122,129,130,132,139,154,159,162,164,167,168,172,176,181,183,188,193,203,214,215,293,294,302,310,314,322,323,324,325,326,334,335,336,338,344,349],passiv:[87,159],passonneau:145,passthroughboxerdrsinterpret:178,password:[343,346],past:[87,115,212,219,340,341],pastorali:107,pat:[57,71,166],patch:301,path:[34,39,54,57,58,59,60,63,64,65,68,69,70,71,73,74,77,80,81,82,83,84,91,92,96,97,100,102,103,105,106,107,108,109,111,116,118,127,129,141,158,166,167,170,178,214,215,217,218,220,294,304,334,339,342,343,349],path_point:[57,59],path_similar:[57,105],path_to_bin:[129,215],path_to_dict:309,path_to_jar:[129,160,172,220,308,309],path_to_model:[215,309],path_to_models_jar:[160,172],path_to_sihan_corpora_dict:309,path_to_slf4j:309,path_to_tre:116,pathnam:342,pathpoint:[57,58,59,60,63,64,65,68,69,70,71,73,74,80,81,82,83,84,91,92,96,97,102,103,105,106,107,109],patient:349,patil:321,pattern:[18,23,28,57,58,59,62,65,67,70,71,84,85,88,89,97,102,152,188,189,201,276,293,294,297,303,310,334],paul:349,paulo:349,paus:60,pb:60,pc:173,pcfg:[109,119,168,174,175,349],pchart:156,pcm:98,pcv:102,pdf:[12,42,53,57,83,104,137,145,179,221,259,282,304,317,318,319,321,328,332,335,343,349],pdist:176,pdt:209,pe08:349,peak:310,pearson:143,peculiar:89,pedersen:[57,92,105,143],pee:66,peg:[326,327],penalti:[145,302,317,328,330,332,333],penn:[57,61,79,87,161,203,209,312,334,336,349],peopl:[62,105,141,151,336,351],pep8:349,per:[28,38,57,65,71,73,79,87,98,102,104,105,174,218,224,311,321,328,339],percent:[29,160,176,189],perceptron:209,perceptrontagg:217,pereira:179,perfect:[87,329],perfect_and_progress:87,perfectli:219,perform:[7,8,14,15,18,26,28,30,33,36,39,51,52,57,65,102,117,118,122,125,127,144,146,169,170,171,178,181,187,188,193,194,198,200,211,214,219,221,224,227,293,294,299,301,302,307,312],period:[213,302,311,312],period_comma_follow:301,period_comma_prece:301,period_context_r:302,period_fin:302,periodcategori:334,peripher:[57,71],perkin:349,perl:[53,57,104,105,313],perldoc:[57,104],perluniprop:[57,104],permil:189,permiss:[62,67,73,81,86,88,89,92,111],permit:228,permut:[13,15,147,180],perp:189,perplex:[132,133],persian:311,persist:52,person:[23,29,87,111,160,189,220,347],perspect:[57,71],perspectiv:[57,71],perspective_on:[57,71],pertainym:105,peter:[322,323,324,325,326,327,349,351],petersburg:313,petro:349,petrov:216,pevzner:148,pfeat:228,pformat:[176,334],pformat_latex_qtre:334,pg:[299,307],phags_pa:313,pharaoh:320,phd:[98,142,184,187,299],phenomena:167,phenomenon:324,phi:[143,189,324,325,326],phi_sq:143,philip:89,philipp:[322,323,324,325,326,327,333],philosophi:107,phn:[57,98],phone:[57,98,160,297,349],phone_tim:[57,98],phone_tre:[57,98],phone_word_r:297,phonem:[30,66,98,299,307],phonet:[57,98,142,247],phonetik:[299,307],phonolog:[142,299,307],php:[57,64],phrasal:[119,335],phrase:[25,26,28,57,67,71,86,152,293,316,317,331,333,334],phrase_bas:315,phrase_extract:331,phrase_t:333,phraset:[316,333],phrasetableentri:[316,333],phyast:110,physical_ent:[57,105,343],pi0:317,pi:[94,129,141,189],pick:[184,328],pickl:[102,109,194,217,224,302],picklecorpusview:102,pictur:[57,89],piec:[52,57,68,71,79,87,119,156,160],pierr:163,pietra:[322,323,324,325,326,327],pihs4:202,pine:344,ping:66,piou:293,pip:348,pipe:[129,160,161,215,311],pipelin:[38,39,172,273],pitt:110,piv:189,pivot:169,pixel:117,pk:148,pl196x:57,pl196xcorpusread:[57,84],pl:[16,57,74,78,104,109,179,301,311,317,330],place:[25,42,57,74,116,117,118,131,155,161,164,168,194,213,219,326,333,334,335,339,349],placehold:[57,71],placement:[326,335],placent:[57,105,343],plai:[57,98,167,347],plain:[57,67,335],plaintext:[57,101,161,302],plaintextcorpusread:[57,85,101,108],plan:118,plane:313,platform:[109,111,347,349],plausibl:[184,341],pleas:[0,55,160,173,195,209,214,221,229,295,303,306,308,312,313,332,347,348,349],plot:[114,176,194,224,293,310,349],plu:[57,91,118,122,188,189,294,334,343,347,349],plug:185,plur2s:[198,199],plural:[198,199,212,219],plusmn:189,ply:202,pm:[57,104,338,341],pmi:[143,193,194],pmod:163,pn:317,png:161,po:[14,27,33,39,57,59,60,61,62,63,64,68,69,70,71,73,74,80,82,83,91,93,96,103,105,106,107,118,160,166,172,174,178,208,209,211,212,213,214,215,216,217,218,220,221,224,226,227,228,334,344,349],poesio:141,point:[28,57,64,71,77,79,87,108,109,160,176,211,214,224,226,313,319,320,326,327,331,346],point_entropi:214,pointer:[57,59,60,63,64,68,69,70,71,74,79,80,82,83,87,91,96,103,105,106,107,109,159,334],pointwis:[143,214],poisson:143,poisson_stirl:143,polar:[57,62,194],polarity_scor:195,polic:194,polici:310,poliqarp:[57,78],polish:[57,78,104],politi:107,polysem:[57,71],pool:[57,83],poor:[160,173],pop:[118,128,334],pop_first:128,popen:129,popitem:[52,118],popov:318,popul:[132,139,179,193,194],popular:[336,348],population_of:179,popup:8,port:[10,57,104,142,160,205,206,301,311,312,313,319,349],porta:205,porter:[196,206,274,349],porterstemm:[203,206,274,328,349],portertest:274,portion:[7,8,14,16,28,57,59,98,159,168,170,171,175,221,302],portugues:[57,104,205,206,349],portuguese_en_fixt:229,portuguesecategorizedplaintextcorpusread:[57,85],portuguesestemm:206,pos1:[57,75],pos2:[57,75],pos3:[57,75],pos_in_tre:[57,68],pos_m2:228,pos_m33:228,pos_scor:[57,93],pos_tag:[91,166,174,209,269,347],pos_tag_s:209,pos_tagset:[57,71],pose:[57,71],posid1:[57,75],posid2:[57,75],posid3:[57,75],posit:[14,23,28,29,33,36,57,58,63,65,70,71,73,74,79,81,84,87,89,92,93,97,102,105,109,115,117,118,119,129,148,154,155,159,164,167,168,179,194,195,213,214,224,226,227,228,276,293,294,316,318,322,323,324,325,326,327,332,333,334,343],position_of_ngram:332,positive_featureset:36,positive_prob_prior:36,positivenaivebay:30,positivenaivebayesclassifi:36,posix:109,poss:160,posscor:93,possibl:[13,14,16,23,28,33,34,36,46,53,57,62,64,68,71,74,105,109,111,116,117,118,119,132,139,145,147,153,157,159,162,164,167,168,169,170,171,174,175,176,180,183,189,211,213,214,221,224,226,228,294,298,302,312,316,322,323,324,325,326,329,331,334,341,343,344],possible_st:213,possible_transit:213,possibleanteced:181,post:[57,80,312,341,346,349],postag:[211,224],postag_tre:27,postord:334,postpl:228,postposit:216,postscript:[117,182],potato:[57,71],potenti:[69,118,119,141,302],pound:189,pour:148,power:[143,219],powershot:[57,89],pp1:202,pp560:94,pp:[29,86,94,148,160,172,176,198,218,299,307],ppattach:57,ppattachmentcorpusread:[57,86],ppdb:[57,104,349],ppdp:169,pprint:[57,68,71,103,105,176,334,343],pprint_fram:[57,103],pprint_memb:[57,103],pprint_subclass:[57,103],pprint_themrol:[57,103],pr:[214,343],practic:[57,71,317,321,328,330,332,336,347],pragmat:145,prasa:[57,74],pre1:201,pre32:201,pre:[25,57,85,111,132,139,166,172,173,213,217,302,303,328,334,349],prec_count:317,preced:[28,33,57,71,98,102,132,133,145,195,201,219,276,293,302,312],precis:[28,29,57,77,147,193,221,226,317,318,321,328,332],precomput:176,precondit:[33,173],pred:[185,188,294],predhold:125,predic:[15,57,79,87,125,178,179,185,188,294],predict:[34,38,52,57,71,164,176,217,323,324,325],predictor:159,predictorrul:162,predid:[79,87],pref:[198,199],prefac:[57,58,63,65,70,73,74,81,84,85,92,97,102,107],prefer:[176,179,299,301,333],prefix:[32,57,104,111,116,118,130,145,159,168,173,198,199,201,202,207,274,341],prefix_replac:207,prefix_step2a_success:206,prefix_step3a_noun_success:206,prefix_step3b_noun_success:206,preflabel:179,prejudic:[57,71],preliminari:25,premis:125,preorder:334,preosta:107,prep:[57,71,86],prepar:[92,310,349],prepend:109,preposit:[57,71,86,189,216,325],preprocess:[38,119,132,194,328],presenc:[145,194],present:[33,42,52,57,87,105,118,122,129,149,167,168,169,193,198,199,203,317,322,323,324,325,326,334,349],preserv:[46,105,115,118,132,139,178,297,314],preserve_cas:297,preserve_lin:295,presid:36,press:[72,299,307,322,323,324,325,326,327,333],presum:202,presuppos:[57,71,312],pretermin:119,pretrain:217,pretti:[32,57,103,159,181,189,334,335,343],pretty_format:[32,144,159,181,186],pretty_format_edg:159,pretty_format_leav:159,pretty_print:[160,181,334],prettydict:[57,71],prettylazyconcaten:71,prettylazyiteratorlist:71,prettylazymap:71,prettylist:[57,71],prev:182,prevent:[28,33,115,119,122,127,194],previou:[30,57,71,111,149,159,170,171,199,219,302,325,326,327,335,341,346],previous:[57,105,151,193,314,326,334],previous_cept:327,previous_edg:159,previous_in_tablet:327,prevword:[30,33],price:349,prim_onli:13,primari:[15,43,66,179,219],primarili:117,prime:189,primit:[13,16,52],primitivecategori:13,princeton:[57,105],principl:[57,71,299,307,317,318,321,328,330,332],print:[7,8,14,28,29,30,32,33,46,51,55,57,71,89,93,100,103,105,109,110,115,117,122,123,124,126,127,129,132,144,159,160,161,173,174,176,179,181,183,188,189,193,199,205,206,208,209,211,214,219,224,226,276,293,302,304,309,311,316,317,322,323,324,325,326,331,334,335,340,341,343,349],print_assumpt:[122,124,126],print_concord:293,print_error_to:111,print_grammar:[159,162,164],print_pars:168,print_proof:125,print_sent:164,print_str:343,print_template_statist:[211,212],print_tim:[159,162,164],print_to_fil:117,print_tre:[159,162,164],printccgderiv:14,printccgtre:14,printtyp:188,printunus:[211,212],prior:[33,36,46,48,117,214,302,317,319,322,323,324,325,326,332],prison:160,privileg:346,prn:[160,172],pro:[57,88,181],pro_w4:201,pro_w53:201,pro_w54:201,pro_w64:201,pro_w6:201,prob:[33,35,119,168,176,293,334],prob_alignment_point:[322,323],prob_all_align:[322,323],prob_classifi:[31,33,35,44],prob_classify_mani:[31,38,44],prob_dict:176,prob_dist:176,prob_kwarg:[176,334],prob_of_align:327,prob_single_quot:311,prob_t_a_given_:[322,323,324,325,326,327],probabilist:[109,119,156,167,168,169,174,175,176,214,349],probabilistica:[176,334],probabilisticbottomupinitrul:168,probabilisticbottomuppredictrul:168,probabilisticdependencygrammar:[119,169],probabilisticfundamentalrul:168,probabilisticleafedg:168,probabilisticmixin:[176,334],probabilisticnonprojectivepars:167,probabilisticproduct:119,probabilisticprojectivedependencypars:169,probabilistictre:334,probabilistictreeedg:168,probability_chang:333,probability_fixt:229,probability_t:[322,323,324,325,326],probabl:[0,14,28,31,33,35,36,38,46,47,48,119,132,135,147,156,168,169,175,214,219,221,316,319,322,323,324,325,326,327,333,334,336,341,349],probdist:176,probdist_dict:176,probdist_factori:176,probdisti:[31,33,35,38,44,147,176,214],problem:[25,38,46,57,71,109,154,196,304,317,349],proce:310,procedur:[119,167,170],proceed:[57,62,67,81,83,86,88,89,94,145,146,168,173,200,317,318,328,330,332,344],process:[0,2,14,25,26,31,46,48,49,50,57,59,60,63,64,68,69,70,71,74,80,82,83,85,91,94,96,102,103,105,106,107,109,117,118,123,140,157,159,167,168,170,171,176,179,188,191,197,198,201,210,214,217,221,224,230,296,300,305,310,314,326,327,333,334,338,339,341,343,347,349],process_bundl:179,process_next_express:188,process_quoted_token:188,process_thread:123,prod:[119,175,189],produc:[14,29,33,46,48,52,57,105,118,119,136,141,156,159,161,162,164,167,168,170,171,174,175,176,178,185,198,212,219,221,224,293,295,302,310,319,320,324,325,326,332,334,335,340],product:[7,8,33,57,89,113,119,123,145,159,164,168,169,170,171,175,214,217,228,334,336],product_reviews_1:[57,89],product_reviews_2:89,productionlist:113,profil:42,program:[57,105,116,117,123,129,156,159,161,162,175,178,182,203,206,214,293,319,336,343,347,349],programm:349,progress:[7,87,111],progress_bar:343,progressmessag:111,proj:221,project:[22,42,57,66,103,167,169,173,221,230,335,345,347,349,351],projective_prob_parse_demo:169,projective_rule_parse_demo:169,projectivedependencypars:156,prolog:[178,179],promin:131,promis:198,promo:53,prompt:[117,168,175],pron:[77,167,216],prone:315,pronoun:[89,181,216],pronounc:66,pronunci:[57,66,349],proof:[122,124,126,127,128],proof_str:[122,126],prop:[57,71,128,183,189],propag:[57,71,117,183],propagate_semtyp:[57,71],propbank:[57,71,349],propbankchaintreepoint:87,propbankcorpusread:[57,87],propbankinflect:87,propbankinst:[57,87],propbankpoint:87,propbanksplittreepoint:87,propbanktreepoint:87,propdemo:183,proper:[179,216],properli:[108,276],properti:[33,57,59,71,79,87,102,104,109,111,115,116,118,122,123,124,125,132,139,143,160,174,176,179,181,183,188,195,209,210,214,219,220,226,227,228,294,297,302,316,321,333,334,346],property_nam:226,propfil:[57,87],propn:179,proportion:317,propos:[200,203,299,307,333],proposit:[178,183],pros_con:57,prosconscorpusread:[57,88],prose:[57,107],prosodi:299,protocol:[102,109,174],prototyp:349,prove:[121,122,123,124,125,126,127,128,145],proven:125,prover9:[121,124,349],prover9_input:126,prover9command:126,prover9commandpar:[124,126],prover9except:126,prover9fatalexcept:126,prover9limitexceededexcept:126,prover9par:[124,126],prover:[122,123,125,126,127,128,181,188,349],provercommand:[122,126],provercommanddecor:[122,125],proverparseerror:[125,127,128],provid:[14,42,52,53,55,57,59,68,71,79,81,84,87,89,90,96,98,106,109,111,117,118,120,126,129,132,133,135,142,143,145,151,154,158,159,161,166,167,168,173,176,177,179,183,185,202,206,214,221,274,293,295,338,341,343,347,349],provied:309,provis:202,proxi:[71,108,341,343],prp:[160,172,217],prt:[77,209,216],prune:[326,333],ps:[86,128],pseudo:[107,147,175],pseudocod:32,pseudosent:310,psi:189,pst:[57,71],psy:[57,64],pt:[57,71,104],ptb:216,ptbtoken:308,ptcl:216,pth:342,ptree:334,pu:173,publicdomain:[57,83],publicli:200,publicystyczni:[57,74],publish:[142,203,330,347,349],pull:[160,189],punc:167,punc_list:195,punct:[160,172,181,187,188,301],punct_1:301,punct_2:301,punct_regex:301,punctuat:[42,57,58,60,61,62,64,65,67,77,80,81,84,85,88,89,91,97,100,104,106,163,167,194,216,221,295,298,302,312],punkt:[57,85,295,349,351],punktbaseclass:302,punktlanguagevar:302,punktparamet:302,punktsentencetoken:[57,85,295,302],punkttoken:302,punkttrain:302,pup:[57,104],pup_numb:301,pup_punct:301,pup_symbol:301,purchas:[57,89,349],pure:195,purg:183,purpos:[28,35,43,57,105,110,115,117,129,132,169,193,214,223,322,323,324,325,326,336,338],put:[57,61,103,118,128,132,155,297],put_al:128,put_atom:128,py3_data:54,py:[10,14,39,111,218,226,227,228,230,301,349],pyfound:349,pylab:293,pypi:[213,348],python25:111,python38:348,python:[0,10,14,25,38,52,57,98,102,104,108,109,110,111,115,118,129,130,132,134,141,178,179,200,201,203,212,213,217,224,294,301,303,311,313,338,339,341,343,346,347,349,351],pythonhom:111,pyupgrad:349,q14:[57,104],q:[7,8,77,111,142,188],qin:321,qtree:334,quadgram:[53,143],quadgram_fd:53,quadgramassocmeasur:[143,349],quadgramcollocationfind:53,quadro:205,quadrupedibu:107,qualifi:130,qualiti:[46,57,82,89,307,330,332],quant:188,quanti:[57,71],quantifi:[122,180,188,251],quantifiedexpress:188,quantit:176,quantiti:[46,334],quarter:160,que:205,queer:293,quel:148,queri:[22,117,179,189,293,334,340,341],question:[23,24,95,111,143,178,340,349],queue:[168,174],quick:[154,160,172,217],quiet:[44,111,183],quit:[7,8,23,25,57,77,89,173,195,321],quoc:321,quot:[22,57,74,118,129,188,189,276,312,313,334],r0:[123,332],r1:[123,332],r2:332,r5:332,r:[8,23,25,57,60,66,71,75,77,93,105,109,110,123,125,132,139,142,144,147,176,188,201,208,212,219,227,301,312,313,317,318,321,330,332,343],r_nr:176,ra2:202,rabin:214,race:98,radic:189,radiu:117,rae0:202,rain:145,rais:[13,14,15,28,34,52,57,59,84,108,109,115,117,118,119,127,129,130,132,134,139,144,147,176,183,187,188,206,226,227,228,293,305,328,334,341,343],raise_on_error:111,raise_unorderable_typ:129,raita:148,rakuten:301,ramakrishnan:343,rami:[160,220],randn:310,random:[46,50,102,133,147,168,214,224,293],random_sampl:214,random_se:[132,133,293],randomchartpars:168,randomis:50,randomli:[147,176,194,214],rang:[29,30,33,57,105,118,149,159,176,189,214,224,226,313,316,321,334],range_r:118,rangefeatur:118,rank:[33,53,57,71,149,211,332],rank_gap:149,ranks1:149,ranks2:149,ranks_from_scor:149,ranks_from_sequ:149,rape:160,rapid:153,raquo:189,rare:[33,195,302],rarr:189,rate:[25,62,146,209,259,304,329,340,341],rater:349,rather:[32,35,53,57,71,105,109,118,143,161,164,176,179,201,211,224,302,314],ratio:[53,117,143,176,317,321],ratnaparkhi:86,raw:[55,57,59,78,100,109,111,166,221,259,310,340,346],raw_field:314,raw_freq:143,raw_mod:[57,78],raw_pars:[160,172],raw_parse_s:[160,172],raw_tag_s:160,rb:[160,209,212,219,347],rcb:[298,312],rceil:189,rcon:189,rdparser:1,rdparser_app:1,rdquo:189,re2:202,re:[13,25,28,33,43,57,58,62,65,67,70,85,88,89,97,102,108,109,111,117,118,122,127,132,179,195,200,214,298,300,301,302,303,311,312,317,325,332,348,349],re_boundary_realign:302,re_show:343,reach:[7,32,57,89,102,105,159,194,334,338,341],reachabl:51,read:[39,42,52,55,57,58,59,61,62,63,65,66,67,68,70,73,74,75,77,81,83,84,86,88,89,92,95,97,98,99,102,104,105,106,109,115,118,123,129,161,183,185,200,224,314,316,317,321,327,332,334,340,342],read_:129,read_alignedsent_block:[57,58,102],read_app_valu:118,read_blankline_block:[57,65,70,85,97,102],read_block:[57,58,63,65,70,73,74,77,78,84,92,97,102,106],read_cmudict_block:66,read_depgraph:186,read_fil:184,read_fstruct_valu:118,read_grammar:119,read_int:129,read_int_valu:118,read_leaf:334,read_line_block:102,read_log:188,read_logic_valu:118,read_nod:334,read_numb:129,read_parti:118,read_regexp_block:102,read_rul:205,read_sent:191,read_set_valu:118,read_sexpr_block:102,read_str:129,read_str_valu:118,read_sym_valu:118,read_timit_block:98,read_tuple_valu:118,read_typ:188,read_valu:[118,183],read_var_valu:118,read_whitespace_block:102,read_wordpunct_block:102,readabl:[109,141,225,293,302,313,344,349],reader:[42,43,52,109,188,328,347,349],reader_cl:108,readerror:129,readi:132,reading_command:123,readingcommand:123,readlin:[102,109],readm:[57,59,81,82,105,257],real:[132,146,176,189,228,324],realign:302,realign_boundari:302,realiti:[57,71],realiz:[57,71],realli:[24,188,195,312],reason:[57,85,123,125,132,135,188,214,311,340,343,347],rebbani:199,rebecca:304,rebuilt:39,recal:[28,29,147,193,318,321,328],recalcul:[46,50,132,139,302],recdat:98,receiv:[35,123,209,340],recent:[7,8,57,89,102,115,159,170,171,219,226,227,228,305,341],rechunk:[26,29],recip:[115,154,349],recogn:[33,90,162,183],recognis:349,recognit:[72,214,349],recommend:[25,28,57,89,100,118,148,203,209,219,295,304,346,348],reconstruct:[52,142,159,164],record:[2,7,8,29,30,60,91,98,105,111,119,145,159,168,170,171,175,176,179,312,314,321],recov:[294,341],rectangl:117,recurs:[1,7,25,57,105,111,170,177,181,183,188,221,343],recursivedesc:156,recursivedescentpars:[156,170],red:[115,117,217,335],redefin:[176,334],redesign:349,redirect:[129,160,161],redirect_arc:161,redo:349,reduc:[1,8,46,48,49,50,51,52,109,171,173,199,212,302],reduce_len:297,reduce_lengthen:297,reducible_product:171,reduct:[8,43,51,57,71,171,178],redund:[13,105],reentranc:118,reentrant:118,ref1:[318,321],ref1a:[317,321,332],ref1b:[317,321,332],ref1c:[317,321,332],ref2:318,ref2a:[317,321,332],ref:[60,72,144,148,178,181,318,321,329,330,332],ref_len:330,refactor:349,refer:[25,28,33,52,60,78,94,115,118,143,144,146,147,148,161,164,167,170,178,179,181,226,290,299,307,311,317,318,320,321,322,323,324,325,326,327,328,329,330,332,333,334,335,336,338,341,343,349],referenc:[319,333],reference1:[317,328,330],reference2:[317,328,330],reference3:[317,328,330],referenced_label:178,referringentri:[57,71],refin:[32,224,336],reflect:[23,26,167,194,310,334,343],reflex:[119,179,343],reg:189,regard:[42,214,304],regardless:[109,159],regex:[57,71,129,276,297,298,312],regex_remove_punctu:195,regexflag:303,regexp:[25,57,58,59,60,61,62,63,64,65,67,68,69,70,71,74,77,79,80,81,82,83,85,87,88,89,91,95,96,97,100,101,102,103,105,106,107,118,196,219,293,295,297,313,343],regexp_span_token:313,regexp_tagg:219,regexp_token:303,regexpchunkpars:[3,28],regexpchunkrul:28,regexppars:28,regexpstemm:204,regexptagg:[212,219],regexptoken:[57,58,65,70,97,303],regim:214,region:[57,98,117,179],region_domain:105,regist:[116,117,130,131,341],register_tag:130,regress:[38,176,257],regular:[1,3,6,23,25,28,29,33,57,59,60,63,64,68,69,70,71,74,80,82,83,84,85,90,91,96,103,105,106,107,129,189,204,213,219,293,294,295,297,302,303,312,313,334,343],rei3i:202,reilli:[0,347,349],reject:174,rel:[29,53,57,59,79,87,102,105,109,115,132,146,161,173,178,179,183,184,186,211,224,226,228,313,325,328,334],rel_nam:179,relat:[57,62,64,67,71,81,88,89,90,105,111,118,119,129,148,161,167,169,173,178,179,183,184,188,189,276,294,325],relation_list:[57,64],relationship:[52,57,62,105,119,169],reldict:189,releas:[109,347,351],relev:[30,57,100,142,179,183,195,338],relextract:177,reli:[132,178],reliabl:349,relief:115,reload:224,relsym:189,remain:[8,33,57,61,71,85,97,109,117,129,132,170,171,173,176,214,326,327,333],remaind:[153,154,159,162,164],remaining_text:[170,171],remark:195,rememb:[7,170],remi:297,remov:[26,28,51,52,57,85,109,116,117,118,119,123,149,161,167,170,173,178,183,188,194,196,197,198,199,200,201,202,204,228,259,297,302,312,314,326,333,334,336,349],remove_blank:314,remove_by_address:161,remove_callback:117,remove_child:[116,117],remove_choic:78,remove_dupl:[123,184,194],remove_empty_top_bracket:334,remove_handl:297,remove_preprocessed_fil:78,remove_punctu:42,remove_unitary_rul:119,remove_vari:118,remove_widget:117,ren:94,renam:[118,181,188],rename_var:118,rename_vari:118,render:[117,189],renumber_sent:178,reorder:[325,333,349],reparameteris:46,repars:[178,180],repeat:[46,50,102,118,119,297,302,317,326,327,341],repeatedli:[170,171,176],repetit:[25,28,185],repl:28,repl_xx:200,repl_xx_back:200,replac:[1,6,13,16,28,51,57,64,71,74,109,115,116,117,118,119,125,127,131,159,164,167,181,184,188,194,198,199,202,207,227,297,324,333,339,340],replace_al:128,replace_back:200,replace_bound:[181,188],replace_child:[116,117],replace_qu:125,replace_to:200,replace_xmlent:[57,74],replacement_tag:227,replic:142,report:[28,86,123,153,194,212,318,349],repp:295,repp_dir:304,repp_dirnam:304,repp_output:304,repptoken:304,repr:[71,118,189,224,227],repres:[2,13,14,15,16,25,30,33,34,41,49,51,57,62,67,71,82,89,91,98,100,105,107,118,119,122,123,141,156,157,159,162,167,168,169,170,173,176,177,183,185,188,193,194,214,217,220,221,294,295,300,302,310,316,322,323,325,326,327,331,333,334,343],represent:[13,14,16,28,32,44,57,75,103,105,117,118,122,130,144,159,161,166,167,169,173,175,176,178,179,180,181,185,188,191,194,222,226,227,299,314,316,334,335,343],reprfunc:115,reproduc:[132,133,293],request:[10,102,117,131,152,310,341,349],requir:[10,25,33,46,52,53,57,64,71,84,102,109,117,126,132,135,139,141,143,145,146,166,168,169,176,179,183,193,196,198,201,210,214,224,293,294,295,302,306,317,321,324,325,327,334,336,339,341,344,346,348],requiresf:[57,71],rerank:158,reranker_featur:158,reranker_opt:158,reranker_weight:158,rerankingpars:158,res_similar:[57,105],rescal:145,research:[0,57,64,73,103,145,161,201,347],research_pap:259,reserv:[35,122,131,159,162,164,176,327],reset:[7,8],reset_cach:214,reset_id:13,reset_prob:[324,325,326,327],reset_workspac:113,resid:109,resiz:[111,116,182],resnik:[57,105],resolut:[89,121,178,349],resolution_test:127,resolutionprov:127,resolutionprovercommand:127,resolv:[57,79,87,123,178,188,349],resolve_anaphora:181,reson:299,resourc:[57,83,93,103,109,137,215,347],resource_nam:109,resource_url:109,resp:[57,60,71],respect:[38,57,62,71,89,117,122,129,132,143,167,179,214,317,321,325,329,330,334],respond:[18,22,23,24],respons:[23,24,60,115,117,126,340,341,349],rest:[16,33,57,61,119,200,340,341],restart:[108,159],restr:13,restrict:[13,15,57,59,71,105,113,118,132,159,162,164,189,334,341,343],result:[13,15,28,29,52,57,71,74,77,86,98,102,105,106,109,117,118,119,122,123,124,125,126,127,132,139,141,142,143,145,149,156,159,160,161,168,173,174,175,176,179,188,193,194,198,199,203,209,212,221,293,302,311,313,317,318,332,333,334,339,340,343],resum:[14,159,162,164,168],retag:210,retain:[53,105,302],retir:341,retoken:300,retract:122,retract_assumpt:122,retract_bind:118,retract_sent:123,retri:341,retries_after_twython_except:341,retriev:[57,62,67,68,71,81,96,105,109,111,133,148,176,180,189,199,216,319,321,337,341,342,349],retry_count:341,retry_in:341,return_conf:217,return_str:[298,301,311,312],returncod:126,retweet:[194,341],retweet_count:339,reus:[224,334],reuter:349,reveng:[57,71],revers:[115,118,145,312,316,317,334],revert:298,review:[57,62,81,194,349],review_lin:89,reviewlin:89,reviewscorpusread:[57,89],revis:[111,349],revword:155,reward:321,rewards_and_punish:[57,71],rewind:[57,67],rewrit:349,rewritten:[57,71],rfloor:189,rflorian:211,rgoodtur:176,rh:[14,119,159,162,164,168,175,185],rho:[189,332],ri:144,ribe:[332,349],ribes_scor:315,rid:[42,123],riedel:349,riesa:321,right:[7,8,14,15,24,25,28,30,57,64,110,111,117,119,131,132,144,154,159,161,164,168,170,171,173,174,175,189,195,206,215,219,224,312,325,326,334,336,343,346,349],right_arc:173,right_children:161,right_context_tag_pattern:28,right_edg:[14,159,164,168],right_pad_symbol:[132,343],right_sibl:334,right_tag_pattern:28,rightarc:173,rightarrow:117,rightmost:[8,154,169,171,222],rightward:[116,117,169],riot:194,risc:205,risca:205,rise:[46,49],ritheu:107,river:179,rkt:301,rl:321,rlevi:137,rlm:189,rnc:216,rng:[50,214],ro2:202,ro:[57,104],robert:[298,312,322,323,324,325,326,327,349],robin:349,robinson:349,robust:[132,198],rocl:94,role:[57,68,71,79,87,103,196],roles_demo:189,roleset:[57,68,79,87],roleset_id:[57,79,87],romanc:[57,64],romanian:[57,104,206],romanianstemm:206,rood:107,roof:116,roof_color:116,roof_fil:116,root:[27,28,29,57,58,59,60,61,62,63,64,65,67,68,69,70,71,74,75,76,77,78,79,80,82,83,85,87,88,89,90,91,93,95,96,97,98,100,101,102,103,105,106,107,111,116,117,119,159,160,161,163,164,169,170,172,173,178,191,201,334,335,336,343],root_hypernym:105,root_label:[28,29,57,68],root_semrep:191,roottyp:[57,71],rosneft:90,rot:[57,71],roth:349,rouko:317,round:[145,317,323,324,325,326,328,332,338,341],row:[57,62,115,155,179,194,335],row_index:115,rowconfig:115,rowconfigur:115,rowvalu:115,royalti:349,rr1:202,rr93:145,rr94:145,rr:118,rrb:[160,172,298,312],rsaquo:189,rsb:[298,312],rslp:196,rslpstemmer:205,rsquo:189,rst:229,rstrip:311,rt1:202,rte1:90,rte2:90,rte3:90,rte:[37,57,349],rte_classifi:30,rte_featur:37,rte_pair:37,rtecorpusread:[57,90],rtefeatureextractor:37,rtepair:[37,57,90],rtext:170,rtupl:189,rtype:[47,57,71,100,122,132,133,134,135,145,146,148,153,157,158,167,183,189,213,317,333],ru2:202,ru:[57,104,209,216],rude:18,rude_chat:21,rudnick:321,rug:42,rule:[2,7,8,13,14,15,25,28,35,57,71,107,119,145,159,162,164,167,168,169,176,179,195,196,202,211,212,223,224,226,228,251,293,304,334],rule_based_demo:167,rule_dictionari:202,rule_index:205,rule_tupl:202,rulebas:304,ruleformat:[212,224],rulescor:212,ruleset:14,run:[10,14,25,28,52,57,71,102,108,111,122,129,160,161,168,174,175,176,183,199,224,230,318,322,323,324,325,326,334,340,343,346,348,349],run_profil:164,runtim:206,ruppenhof:[57,71],rural:109,russia:351,russian:[57,90,104,206,209,311,349],russian_russki:[57,101],russianstemm:206,rwth:137,ryan:173,s0:[119,123,202,214,297,343],s0_sigma:119,s14:[57,104],s15:[57,104],s1:[57,105,119,123,145,148,200,297,316,343],s1t:202,s2:[57,105,145,148,200,316,343],s3:[148,200,343],s:[7,8,10,14,22,23,25,27,28,29,30,33,34,35,36,38,39,42,43,44,47,51,52,53,55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,73,74,76,77,78,79,80,81,82,83,84,85,87,88,89,90,91,92,93,96,97,98,100,102,103,104,105,106,107,109,111,115,116,117,118,119,122,123,125,129,130,131,132,133,139,141,142,143,145,146,149,156,158,159,160,162,164,166,167,168,169,170,171,172,174,175,176,178,179,180,181,183,184,187,188,191,198,199,201,202,203,204,205,206,208,209,211,212,213,214,219,222,224,227,228,230,274,293,294,295,296,297,298,299,301,302,303,305,306,308,309,310,311,312,313,314,316,317,318,321,322,323,324,325,326,327,330,331,332,334,335,340,341,342,343,346,347,349],s_0:214,s_1:145,s_2:145,s_:214,s_i:[145,214],s_j:214,s_retriev:180,s_t:214,sa1:98,sa2:98,sa:94,sad:194,safe:[115,132,312,328],safeappend:186,sai:[22,36,57,71,132,162,174,181,188,202],said:[55,57,160,219,312,346],sain:145,saint:107,sale:[90,349],salienc:142,salim:317,salin:105,salmela:349,salt:105,salti:105,salvatori:107,sam:[125,349],same:[14,24,25,28,29,57,68,71,85,89,98,105,110,111,115,118,119,127,129,132,133,135,139,141,144,147,148,154,159,168,169,170,173,175,176,188,193,203,219,221,224,226,259,276,293,295,302,306,313,314,317,324,325,326,327,333,334,335,336,343],samp:176,sampl:[24,33,35,38,57,94,98,109,133,147,176,203,214,274,293,325,326,327,340,341,349],sample_n:37,sampletoscreen_demo:340,sampson:176,sandra:173,sandt:178,sara:36,satdemo:183,satellit:[57,105,208],satisfact:183,satisfi:[57,89,122,132,139,145,173,183,227],saturdai:313,saturn:107,save:[64,109,111,173,193,213,217,224,293,312],save_analyz:194,save_classifi:193,save_fil:193,save_loc:217,savings_bank:344,saw:[25,159,162,164,171,222,334],sax:313,saxon:107,saxutil:313,sayoud:198,sb:117,sbj:173,sbl:206,sbquo:189,scalar:226,scalar_inc_dec:195,scale:[33,53,62,123,132,135,145,176,310,317],scan:154,scan_dragto:115,scan_mark:115,scannerrul:162,scarc:195,scaron:189,schema:179,scheme:[57,77,314,349],schneider:[57,61],school:[98,349],schuler:[57,103],schuster:321,schutz:[53,143],scienc:[94,199,201],scikit:[38,40],scikitlearn:[30,40],scipi:[143,310,343,348],scipylib:348,scon:[57,71],scope:[28,180,194],score1:[317,321],score2:[317,321],score:[25,26,28,29,53,57,76,89,93,105,132,133,135,138,140,142,143,148,149,163,167,169,193,194,195,210,211,212,224,293,310,317,318,321,326,327,328,330,332,333,349],score_fn:[53,143],score_ngram:53,score_val:195,scored_synonym:[57,76],scorer:167,scott:141,scratch:[52,158],screen:[57,62,152,340,341],screen_nam:341,screenanam:[57,100],scribe6:137,script:[57,71,83,116,117,131,182,301,312,313,330,332],scroll:117,scrollbar:[115,117],scrollregion:117,scrollwatcherwidget:117,sd500:89,sdot:189,se:[57,105,161],sea:[66,179],sean:[145,349],search:[24,28,34,39,57,71,76,89,102,104,105,109,114,126,129,167,168,174,199,213,218,221,276,293,320,326,327,332,333,340,341,343,346,349],search_demo:340,search_leav:[276,294],search_tweet:341,searchpath:129,seattl:81,sebastian:349,sec:341,second:[23,25,33,34,41,52,57,67,71,76,82,98,109,125,126,127,132,179,181,187,188,189,190,221,226,303,307,331,338,349],secondari:[66,297],secret:341,secript:[116,117,182],sect:189,section:[2,23,44,57,58,61,63,65,70,73,74,81,84,85,92,97,102,105,145,179,310,326,341,346,349],secur:349,sed:312,see:[12,14,28,33,34,38,42,46,53,55,57,58,62,63,65,69,70,71,73,74,78,81,82,84,90,92,93,97,102,103,104,105,106,108,109,110,111,115,117,118,119,122,123,125,126,129,131,132,133,135,143,145,158,159,162,164,166,170,171,174,176,179,181,188,189,201,203,206,210,212,213,214,216,217,219,220,221,224,229,280,293,295,302,303,307,310,322,323,324,325,326,327,330,334,336,338,339,340,341,343,346,348,349],see_also:[57,71],seealso:[33,293],seed:[132,133,139,293,327],seek:[24,102,109,160,214],seekabl:109,seekableunicodestreamread:109,seen:[35,132,139,176,209,219],seg1:148,seg2:148,seg:309,segm:78,segment:[116,140,142,200,247,276,309,310,312,317,318,333,349],segment_fil:309,segment_s:309,sei3i:202,seizur:66,seldom:195,select:[2,8,13,32,38,57,59,64,71,79,82,87,98,105,106,111,115,117,152,154,159,162,164,168,175,176,189,203,214,228,276,294,303,333,339,340,341,346],select_anchor:115,select_clear:115,select_includ:115,select_set:115,selectbackground:115,selectborderwidth:115,selectdownloaddirmessag:111,selected_row:115,selectforeground:115,selection_anchor:115,selection_clear:115,selection_includ:115,selection_set:115,selectkbest:38,selector:294,self:[14,31,33,44,52,57,77,78,97,102,105,106,109,110,111,115,117,118,122,123,127,129,130,132,139,157,159,167,176,181,183,184,187,188,202,210,219,220,296,327,330,333,334],selkirk:[299,307],seller:349,sem:[0,57,91,109,118,122,123,124,126,127,128,351],sem_tag:91,semant:[16,17,28,57,68,71,91,103,109,123,145,177,178,179,180,183,185,191,194,228,295,347,349,351],semanticb:349,semcor:57,semcorcorpusread:[57,91],semcorsent:91,semcorwordview:91,semi:189,semi_rel2reldict:189,semicolon:276,semin:211,semkei:191,semtyp:[57,71,184],semtype_fil:[123,184],semtype_inherit:[57,71],sen:[299,307],send:[340,341],senna:[30,209,273,349],senna_main:39,senna_path:39,sennachunktagg:218,sennanertagg:218,sennatagg:218,sens:[30,33,57,71,79,87,92,105,122,132,141,145,169,178,196,210,321,344,349],sense_kei:[57,105],senseidx5wn:[57,105],sensenumb:[79,87],sensev:[57,349],sensevalcorpusread:[57,92],sensevalcorpusview:92,sensevalinst:92,sensibl:[224,322,323,324,325,326],sensit:[143,209,210,293],sent:[11,14,39,55,57,58,59,60,62,63,64,65,67,68,70,71,73,74,75,77,78,84,85,86,88,89,91,97,98,107,132,134,157,159,162,164,168,169,170,171,174,175,176,194,209,210,213,215,218,219,220,221,301,304,309,332,343],sent_detector:302,sent_end_char:302,sent_index:178,sent_path:77,sent_segm:78,sent_splitt:[57,63],sent_start:302,sent_tim:[57,98],sent_token:[57,58,62,65,67,70,85,97,101,174,194,295,312],sent_typ:[57,98],sentbreak:302,sentenc:[2,14,18,25,26,28,29,30,36,39,57,58,59,60,61,62,63,64,65,67,68,71,75,77,78,79,85,87,88,89,91,94,97,98,101,119,123,132,133,134,135,136,152,156,157,158,159,160,161,162,164,165,166,168,170,171,172,173,174,175,176,178,179,180,183,184,185,189,191,194,195,209,210,211,213,214,215,217,218,219,220,221,222,224,226,227,228,295,299,301,302,304,307,308,309,310,311,312,313,316,317,318,319,321,322,323,324,325,326,327,328,330,331,332,333,334,335,344,347,349],sentence_aligned_corpu:[322,323,324,325,326,327],sentence_bleu:[317,321],sentence_chrf:318,sentence_gleu:321,sentence_id:[57,86],sentence_length:333,sentence_nist:330,sentence_pair:[322,323,326,327],sentence_polar:[57,62],sentence_read:123,sentence_rib:332,sentencecount:[57,71],sentences_from_text:302,sentences_from_text_legaci:302,sentences_from_token:302,senti_synset:[57,93],sentid:[57,98],sentiment:[0,62,89,93,346,349],sentiment_analyz:192,sentiment_val:195,sentimentanalyz:[193,194],sentimentintensityanalyz:195,sentisynset:[57,93],sentitext:[195,349],sentiwordnet:[57,349],sentiwordnetcorpusread:[57,93],sentno:[57,71],sentnum:[79,87],sents_mod:[57,74,78],sentstart_thresh:302,sep:[29,57,58,97,222,313],separ:[16,30,42,57,58,59,60,68,71,78,85,89,91,97,102,104,106,109,119,129,161,172,194,220,221,222,294,300,302,303,305,306,307,310,312,313,334,339,341,343,349],separate_baseline_data:224,septemb:349,seq:[48,51,149,212,214,224,332],sequenc:[25,28,33,46,50,51,52,53,79,87,102,105,115,116,117,118,119,123,129,132,133,134,135,136,139,141,142,145,148,149,157,159,164,169,175,176,179,209,210,211,214,219,220,221,226,247,253,293,294,297,300,302,303,305,307,313,314,316,317,321,332,334,335,343],sequence1:52,sequence2:52,sequencewidget:117,sequenti:[27,122,209,214,217,314,326,328],sequentialbackofftagg:219,ser:[172,220],sergei:71,seri:[57,68,214,219,327,349],serial:[102,109,180,189,224],serialis:100,serializ:130,serialize_output:224,serv:[10,57,71,92,160,169,181,187,188,219,338,341],server:[10,109,160,343,349],server_index_url:111,servic:151,session:[25,129,341],sessionapi:341,set2rel:183,set:[2,13,14,16,25,28,30,31,32,33,34,36,38,46,47,48,49,50,51,52,57,58,59,63,64,67,68,71,76,77,79,82,85,86,87,97,98,99,100,101,105,107,111,114,115,116,117,118,119,122,123,125,126,127,129,132,141,143,145,146,147,148,153,156,159,161,162,164,166,167,168,169,170,171,173,175,176,177,178,179,181,183,187,188,189,193,194,200,201,202,206,211,212,213,214,220,221,227,228,254,264,276,292,294,297,299,302,304,305,314,316,317,320,322,323,324,325,326,327,331,334,335,341,343,346,349],set_bin_dir:178,set_callback:117,set_cfg_callback:113,set_chart:159,set_child:117,set_discount:176,set_grammar:[159,170,171],set_height:117,set_label:[116,334],set_logprob:[176,334],set_model_fil:213,set_prob:[176,334],set_proxi:[343,346],set_strategi:159,set_symbol:117,set_text:117,set_uniform_prob:[322,323,324,325,326,327],set_weight:33,set_width:117,setdefault:[52,118,176],sethold:125,setlocal:343,setp:171,setup:[179,230,254,264,292,340],setup_modul:[231,232,234,235,236,237,238,239,240,275],setupclass:[246,254],setuptool:[230,348],seven:[57,82,107],sever:[29,57,71,86,111,155,156,160,198,199,203,209,297,303,317,319,334,343,349],sex:[57,98],sexpr:[57,61,295],sexpr_token:305,sexprtoken:305,sfm_file:314,sg:[57,105,179],sh:[57,66,105],shackelford:145,shackleford:145,shadow:129,shah:321,shakespear:[218,349],shallow:[25,52,57,65,118,183,194,214,334],shan:195,shant:195,shape:[27,57,105,117,161,328,343],shapeabl:116,share:[39,57,68,85,98,101,105,109,111,159,164,169,201,214,218,332,334,346],sharealik:94,sharm:313,she:[57,66,105,160],sheikh:313,shelf:[209,349],shell:25,shift:[1,8,60,115,117,148,171,173,310,312],shift_cost_coeff:148,shiftreduc:156,shiftreducepars:[156,171,349],shin:145,shine:145,shit:195,shoebox:349,shortcut:[7,8,57,81,111],shorten:72,shorter:[52,168,317,333],shortest:[52,57,105,119,317,331],shortest_path_dist:105,shortid:[57,103],shot:166,should:[14,25,27,28,29,30,33,34,35,37,38,39,41,43,48,49,50,51,52,55,57,58,59,60,62,63,64,65,67,68,69,70,71,73,74,77,78,79,80,81,82,83,84,85,87,88,89,91,92,96,97,98,102,103,105,106,107,109,111,114,115,116,117,118,119,122,123,126,127,129,132,133,137,141,143,149,159,160,162,164,166,168,170,171,174,175,176,178,179,181,183,184,187,188,197,199,200,203,204,205,206,211,213,214,215,219,221,224,226,228,259,293,294,297,300,302,303,305,306,307,309,310,311,316,317,318,321,322,323,324,326,327,333,334,335,338,339,340,341,342,343,346],shouldn:[109,195],shouldnt:195,show:[7,8,33,36,51,57,71,117,123,144,169,176,198,199,200,224,293,317,318,321,340,341,346],show_cfg:109,show_collect:111,show_column:115,show_exampl:184,show_most_informative_featur:[33,35],show_packag:111,show_perc:144,show_thread_read:123,show_tre:174,shown:[7,8,14,66,111,143,206,302,334],showtext:117,shuffl:147,shut:[57,71],shutdown:10,shy:189,si2:202,si:214,sibl:[334,336],sicenc:[67,81,89],sid:123,side:[7,8,14,117,119,159,164,168,170,175,336],siegel:141,sigefyrth:107,sigeweard:107,sigir:[67,146,202],sigkdd:[81,89],siglex:92,sigma:[33,176,189,221],sigma_exp:142,sigma_skip:142,sigma_sub:142,sigmaf:189,sign:[117,118,347],signal:[30,90,109,310],signatur:[77,78,106,110,115,118,125,178,181,188,219,334,344],signifi:188,signific:[33,43,147,221],significantli:[29,46,48,52],sihan_post_process:309,sil:349,silb:[299,307],silent:10,silli:[57,62],sim:189,similar:[46,49,57,63,71,76,104,105,117,141,142,145,169,173,221,293,302,306,310,317,333,349],similar_to:105,similar_word:293,similarity_method:310,similarli:[132,134,200,214],simionato:110,simmer:[57,71],simpl:[7,8,18,37,48,49,57,59,60,70,91,97,102,111,117,118,123,130,131,132,134,137,145,156,159,170,171,176,193,214,221,257,259,276,293,295,311,336,341,349],simple_block_read:102,simplegoodtur:176,simplegoodturingprobdist:176,simpler:[25,28,117,295],simplest:[57,71,141],simpli:[25,33,36,60,78,91,106,111,116,119,124,132,133,141,160,164,168,176,194,209,214,312,318,321,322,323,336],simplic:[132,322,326,349],simplifi:[57,60,122,126,181,184,187,188,340,349],simplify_po:27,simplify_tag:[57,74],simplist:[57,62,212],simulate_root:[57,105],simultan:25,sin:310,sinai:313,sinc:[24,37,38,52,57,62,85,119,159,161,164,179,187,188,203,294,299,334,336,340,345,349],singl:[2,8,25,28,29,31,32,33,42,55,57,59,62,68,71,79,82,84,87,97,98,102,105,109,111,115,116,117,118,119,123,132,136,145,156,159,164,170,171,174,175,176,178,188,189,193,194,211,214,215,221,224,228,293,294,299,300,304,307,312,314,317,318,321,328,330,334,336,343,349],single_meteor_scor:328,singleedgefundamentalrul:[159,162,164],singleedgeprobabilisticfundamentalrul:168,singleton:[46,49],singular:[46,48,51,198,199],sinica:[57,94,334,349],sinica_pars:334,sinica_treebank:57,sinicacorpu:94,sinicatreebankcorpusread:[57,94],sink:214,sion:202,sir:300,sis2:202,sister:[276,294],site:[57,103,349],situat:[38,57,71,349],six:201,size:[33,39,52,102,109,111,115,117,129,132,139,148,159,168,181,182,219,310,333,335,341],size_canva:181,sizehint:109,sj:214,sji:[57,101],sk:[57,104],skardal:349,skikda:199,skillet:[57,71],skip:[57,58,63,65,70,73,74,81,84,85,92,97,101,102,127,145,194,301,318,343],skip_ambiguous_tweet:194,skip_bnew:27,skip_fil:[0,57,209,223,315],skip_head:194,skip_instal:111,skip_retweet:194,skip_tongue_tweet:194,skipgram:[343,349],skipintersect:228,sklearn:38,sklearnclassifi:38,sko:179,skolem:[127,177,188],skolem_funct:188,sl:[57,104],slash:[29,60,78,91,106,109,118,311],slashfeatur:118,sleep:[57,71],sleeper:107,slf4j:309,slice:[52,129,295,313],slice_bound:129,slice_obj:129,slide:349,slight:343,slightli:[179,195,200,321],slip:[57,71,349],slope:176,slot:326,slovak:[57,104],slovenian:[57,104],slow:[93,224,340],slowli:93,slu:[42,69],small:[105,119,122,132,143,224,316,322,323,324,325,326,330,336,349],smaller:[25,29,57,100,102,154,317],smallest:154,smart:[57,62],smith:[145,302,321,349],smooth:[57,105,132,133,135,176,214,310,317,330,334,336,349],smoothednr:176,smoothing_cl:135,smoothing_funct:317,smoothing_method:310,smoothing_round:310,smoothing_width:310,smoothingfunct:317,smoothli:[57,67],smt:[137,301,313],snowbal:[196,200,274,349],snowballstem:206,snowballstemm:206,snowballtest:274,so:[25,28,33,35,57,62,71,77,97,102,105,115,130,132,133,134,139,141,154,159,169,170,171,176,188,194,195,200,203,211,227,294,306,310,313,316,317,321,326,327,333,335,340,343,349],social:195,societi:200,sociolinguist:142,socrat:125,soda:160,soft_delimit:319,softwar:[148,220,349],sold:90,sole:302,soliloqui:107,sollten:316,solomon:107,solut:221,solv:[33,154,304],some:[25,33,36,37,53,55,57,61,71,77,89,98,105,106,111,115,117,118,119,123,124,126,129,131,132,133,134,135,136,142,159,167,169,173,176,179,181,185,188,195,203,209,210,211,212,214,226,228,294,297,312,313,317,321,322,323,324,325,326,334,338,340,341,349],some_corpu:[57,59],some_fileid:102,some_label:33,somehow:132,someth:[57,98,110,132,135],sometim:[28,30,57,71,118,119,129,145,156,302,334],somewhat:[195,294],sonor:307,sonority_hiearchi:307,sonority_hierarchi:307,sonority_sequenc:295,soon:[116,117,182],sophist:327,soron:307,sort:[14,57,71,81,115,116,118,119,132,134,139,144,150,157,159,168,170,171,175,193,194,195,227,311,314,320,328,331,334,343],sort_bi:115,sort_by_count:144,sort_field:314,sort_kei:[57,100],sort_queu:168,sorta:195,sortof:195,sound:[57,71,98,299,307],sound_mak:[57,71],sound_mov:[57,71],sound_sourc:[57,71],sourc:[0,2,3,4,5,6,7,8,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,113,114,115,116,117,118,119,120,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,141,142,143,144,145,146,147,148,149,151,152,153,154,155,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,178,179,180,181,182,183,184,185,186,187,188,189,190,191,193,194,195,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,224,225,226,227,228,229,230,231,232,234,235,236,237,238,239,240,245,246,247,248,250,251,253,254,257,259,261,264,265,266,267,268,269,270,273,274,275,276,280,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,338,339,340,341,342,343,344,347],source_block:319,source_group:82,source_s:319,source_sents_len:319,source_tag:216,source_tagset:[29,65],source_word_class:[325,326],sourceforg:[53,349],south:98,southern:98,space:[7,8,46,51,52,57,60,64,74,102,113,116,117,119,160,167,176,183,214,224,303,306,312,314,326,327,333,343],spacer:[123,124,126],spacetoken:306,spacewidget:117,spade:189,span1:169,span2:169,span:[14,68,105,159,162,164,167,168,169,175,293,295,302,307,312,313,333,343],span_token:[295,296,302,303,306,312,313],span_tokenize_s:296,spanisch:333,spanish:[57,64,98,104,206,333,349],spanishstemm:206,spans_to_rel:313,sparciti:336,spars:[33,38],sparsearrai:[46,48],spawn:129,speaker:[57,64,96,98],speakerinfo:98,spearker:96,spearman:[140,332,349],spearman_correl:149,spearman_rho:332,spec:[57,106,227],special:[16,57,58,60,63,64,71,74,78,91,96,97,107,115,117,118,131,132,139,156,164,185,188,206,214,226,305,343],special_case_idiom:195,speciallist:71,specif:[39,44,57,59,62,64,69,71,77,78,85,89,91,104,105,106,108,109,124,126,133,143,159,169,193,206,210,214,221,224,226,227,228,304,319,327,348],specifi:[10,13,14,28,29,33,34,39,44,51,57,58,59,60,61,62,63,64,65,67,68,69,70,71,73,74,75,76,77,78,79,80,81,82,83,84,85,87,88,89,91,92,93,95,96,97,98,100,101,102,103,104,105,106,107,109,111,115,116,117,118,119,129,132,134,144,145,148,153,155,158,159,160,161,162,164,168,169,170,173,174,175,176,189,193,194,200,206,209,211,213,218,219,220,221,226,227,228,293,294,295,296,297,305,306,309,314,316,322,323,324,325,326,327,331,334,335,336,340,343,344,346,349],speech:[1,57,66,68,71,74,77,91,97,105,153,162,176,196,208,209,210,214,217,219,221,299,320,325,334,336,344,349],speed:[39,49,57,71,221,349],speicherbeh:200,speicherbehalt:200,spell:20,sphere:98,spiller:349,spite:300,spkrid:[57,98],spkrinfo:[57,98],spkrutteranceid:[57,98],split:[14,25,28,36,39,57,65,68,79,85,87,97,98,102,132,141,144,154,160,161,166,167,169,172,188,190,194,205,215,217,218,219,220,295,296,300,302,303,306,309,312,313,317,318,319,320,321,332,343],split_at:319,split_non_ascii:301,split_train_test:194,split_valu:319,splitrul:[25,28],spn:98,spoken:[98,214],sponsor:349,sponsorship:349,sport:[30,36],sports_sent:36,spx:160,sq:143,sql:[179,189],sql_demo:179,sql_queri:179,sqlite:82,squar:[29,38,118,132,134,143,176],src_class:[325,326],src_phrase:316,src_phrase_span:333,src_sentenc:[322,323,327,333],srclen:[320,331],srctext:[320,331],srd:145,sre_pattern:189,srl:[57,68],srl_includes_roleset:[57,68],srl_instanc:[57,68],srl_span:[57,68],srparser:1,srparser_app:1,ss0:202,ss15:137,ss2of:[57,105],ss:[57,105,298,310,312],ss_type:[57,105],ssen4:202,ssl:341,ssp:[307,349],st:[57,71,107,202,204,205,206,214,220,313],st_custom:202,st_pre:202,stabilis:[46,50],stabl:[118,334],stack:[8,117,171,173,292,333,349],stack_decod:315,stack_siz:333,stackdecod:333,stackoverflow:71,stackwidget:117,stage:[28,159],stai:[132,139,331],stale:111,stalemessag:111,stand:[108,118,132,139,178,179],standard:[14,25,26,28,29,31,36,51,52,53,57,79,84,87,90,98,106,107,122,127,129,132,134,156,158,159,160,173,176,183,196,200,210,211,306,312,314,329,336,343],standardformat:314,stanford:[53,89,145,156,209,295,309,349],stanford_model:[220,309],stanford_segment:[295,349],stanfordcorenlp:172,stanforddependencypars:172,stanfordnertagg:220,stanfordneuraldependencypars:172,stanfordpars:172,stanfordpostagg:220,stanfordsegment:[309,349],stanfordtagg:220,stanfordtoken:308,star:[62,185],start1:228,start2:[217,228],start:[7,10,14,16,25,28,33,46,48,49,50,52,57,58,61,63,65,68,70,71,73,74,79,81,84,87,89,92,97,98,102,105,109,111,118,119,127,129,132,145,159,160,161,162,164,165,168,169,170,171,175,180,188,202,214,215,217,226,228,302,303,310,312,313,317,326,327,331,333,334,335,343,346,348,349],start_i:[296,303,306],start_index:[52,169],start_posit:129,start_r:102,start_symbol:175,start_tok:102,start_toknum:102,startcollectionmessag:111,startdownloadmessag:111,starter:302,starting_quot:[298,312],startpackagemessag:111,startpo:[57,58,63,65,70,73,74,81,84,92,97,102],startunzipmessag:111,startup:131,stat:[57,89,141,212,224],state:[7,28,90,95,115,119,145,164,169,170,171,213,214,293,298,311,326,349],state_j:214,statefulli:294,stateless:109,statement:[23,32,60,127,215,349],statis:221,statist:[29,69,145,146,147,148,169,211,212,221,224,302,311,321,322,323,324,325,326,327,330,333],statmt:318,statu:[57,71,91,111,341],status:341,status_cod:341,stderr:[111,129,160],stdin:[129,226,227,228,343],stdlib:129,stdoffset:338,stdout:[34,41,129,160,161,176,341],stdout_redirect:254,steam:[57,71],stem1:199,stem:[0,57,60,64,105,146,274,328,347],stem_match:328,stem_unicod:206,stemmer:[196,198,199,200,201,202,203,204,205,206,274,328,346,349],stemmeri:[196,197,198,199,200,201,202,203,204,205,206,328],sten:200,step:[2,7,8,38,46,48,57,65,119,129,132,145,155,159,170,171,193,201,206,224,312,320,322,323,324,325,326],stephan:[304,321],stephen:[322,323,324,325,326,327],steppingchartpars:159,steppingrecursivedescentpars:170,steppingshiftreducepars:171,steven:[0,321,347,349,351],stevenbird:[109,328],still:[109,115,119,161,167,221,293,317,347,349],stimulu:[57,71],stirl:143,stk_0_form_on:173,stk_0_lemma_on:173,stk_0_pos_in:173,stk_0_pos_top:173,stk_1_pos_nn:173,stoni:[160,220],stop:[37,52,129,160,176,194,201,212,213,338,341],stopword:[37,206,293,310],storag:[180,316],store:[30,43,57,59,71,84,102,107,108,109,122,123,125,127,132,134,139,146,158,164,173,174,176,179,180,189,193,194,294,300,302,316,323,324,325,326,327,338,341,349],store_log:176,stori:160,stp:161,str1:142,str2:142,str2chunktre:[57,65],str2tupl:[57,97,222],str:[14,23,28,29,31,33,34,41,52,55,57,58,59,60,62,63,64,65,67,68,69,70,71,73,74,75,77,78,80,81,82,83,84,85,88,89,90,91,92,96,97,100,102,103,105,106,107,108,109,110,114,115,117,119,122,123,129,130,132,133,134,135,136,139,142,145,146,148,153,157,158,159,160,161,162,166,167,168,169,170,171,172,173,174,175,176,178,179,183,184,187,188,189,191,193,197,199,200,204,205,206,208,209,210,211,212,213,215,217,218,219,220,221,222,224,227,276,293,294,295,296,297,298,299,300,301,303,304,305,306,307,309,310,311,312,313,314,316,317,318,320,321,322,323,324,325,326,327,328,330,331,332,333,334,336,339,341,342,343,344],straight:343,strang:105,strategi:[2,28,159,162,164,168,194],stream:[34,41,57,58,59,60,63,65,66,70,73,74,75,77,78,84,89,91,92,97,98,102,106,109,111,132,136,174,176,188,319,334,340,341,343],streambackedcorpusview:[57,58,62,63,65,67,70,73,74,81,84,85,88,89,92,97,100,102,106,109],streamer:341,streamlin:349,streamread:109,streamtofile_demo:340,street:[86,145],strength:[57,89,195,347],stress:66,strict:[29,57,99,109,305,314],strictli:118,string:[13,14,16,23,25,28,29,30,32,33,38,44,51,52,57,58,59,60,63,64,65,67,68,69,70,71,73,74,76,77,78,79,80,81,82,83,84,85,87,89,90,91,92,93,95,96,97,98,100,102,103,105,106,107,109,111,113,115,117,118,119,122,126,129,132,134,135,139,141,142,144,145,159,160,161,166,169,170,171,172,174,176,177,178,179,180,181,183,184,188,189,193,195,201,202,204,206,207,209,210,211,219,221,222,224,225,227,276,293,294,295,296,297,298,299,300,301,302,303,304,305,306,310,312,313,314,316,318,331,332,333,334,335,336,340,341,343],string_categori:57,string_span_token:313,stringcategorycorpusread:[57,95],stringtoken:[296,306],strip:[25,28,29,57,60,64,82,99,102,105,107,109,194,197,199,203,204,205,206,210,302,306,314],strip_emr:200,strip_eol_hyphen:301,strip_esn:200,strip_g:200,strip_handl:297,strip_nd:200,strip_off_emoticon:194,strip_prefix_flag:202,strip_skip:301,strip_spac:[57,60,64],strip_t:200,striprul:[25,28],strong:[213,299],strongbacktrack:213,strongest:89,strongli:[118,203,348],structur:[2,7,8,14,24,25,26,28,29,32,57,60,61,62,68,71,75,79,87,90,91,102,109,117,119,120,123,156,157,159,160,161,162,164,168,169,170,171,177,178,181,183,185,189,299,307,314,333,334,336,343,347,349],strunk:302,stuart:349,stuck:[326,327],student:[117,143,347,349],student_t:143,studi:[148,160,220,299,307],stump:32,stupid_quotes_1:311,stupid_quotes_2:311,stupidbackoff:[132,135],style:[29,53,57,68,109,126,152,161,334],stylist:152,stype:[57,71],su:202,sub:[13,28,156,163,189,210,321],subcategori:13,subclass:[25,28,31,52,57,59,60,71,78,79,85,87,91,103,106,109,116,117,118,119,122,129,130,157,159,168,174,176,178,206,210,219,220,226,228,296,303,334,338],subcorpora:[57,71],subcorpu:[57,71],subdir:[111,341,342],subdirectori:[57,72,98,108,111,342],sube:189,subel:314,subev:[57,71],subexpress:188,subf:[57,71],subfenam:[57,71],subfold:346,subfram:[57,71],subframenam:[57,71],subid:[57,71],subiter:319,subj:[57,62,179,184,189],subjclass:189,subject:[57,62,89,158,169,179,189,194],subjsym:189,subjtext:189,sublist:[52,228],submodul:117,subordin:[57,71],subpart:117,subprocess:[129,161],subscr:16,subscript:[16,52],subsequ:[52,118,141,159,162,179,189,214,299,333,334],subset:[57,104,117,183,194,206,224,228,348,349],subseteq:117,subsitut:200,subst:[57,78],substance_holonym:105,substance_meronym:105,substanti:[195,326,349],substituit:301,substitut:[13,15,28,60,109,127,142,145,170,188,294,312],substitute_bind:[118,127,188],substitutebindingsi:[118,164,188],substitution_cost:145,substr:[28,78,102,169,204,219,295,296,303,305,306,334,343],subsum:[57,105,118,127,167],subterm:181,subtoken:[170,214,228],subtop:310,subtract:167,subtre:[25,28,116,119,159,170,175,334,336,343],subtyp:[57,71,181,188,334],subvers:[111,178],succe:334,success:[122,170,171,180,313,325,326,343],successful_encod:343,successfulli:[127,155,170,171,348],sudo:[346,348],sudoh:332,suf1:201,suf32:201,suff:[198,199],suffici:[89,111,340],suffix:[15,143,179,194,198,199,200,201,203,206,207,219,274],suffix_noun_step1a_success:206,suffix_noun_step2a_success:206,suffix_noun_step2b_success:206,suffix_noun_step2c2_success:206,suffix_replac:207,suffix_verb_step2a_success:206,suffix_verb_step2b_success:206,suffixe_noun_step1b_success:206,suffixes_verb_step1_success:206,sufix:201,suggest:[89,119,317,349],suit:[174,230,302,347],suitabl:[115,149,161,194,313,327,333,343,347],sultan:[57,104],sum1:33,sum2:33,sum:[33,35,57,71,82,89,119,132,135,142,172,176,189,211,214,317,321,322,323,332],sum_:214,sum_log:176,sum_s0:214,sum_t:214,sum_to_on:176,summar:[57,62,71,81,89,324,325,326],summari:[168,175],summat:214,summer:349,summerofcod:349,sun:22,sunter:145,suntsu:18,suntsu_chat:22,suo3:202,sup1:189,sup2:189,sup3:189,sup:189,supe:189,superf:[57,71],superfenam:[57,71],superfram:[57,71],superframenam:[57,71],superl:67,superset:226,superst:[57,71],supertyp:[57,71],supervis:[46,214,302],supid:[57,71],supplement:107,supplementari:[210,251],supplementary_ideographic_plan:313,suppli:[33,52,57,84,92,115,118,129,141,176,219,297,307],support:[10,25,28,33,38,39,40,57,68,69,71,74,77,105,109,110,117,118,130,141,145,172,173,174,177,206,215,293,294,295,318,338,349],support_cutoff:32,supported_oper:39,suppos:24,sure:[24,28,57,59,122,132,134,295,316,329,346],surfac:[57,75],surround:[28,293,343],survei:[141,145,304],susan:[57,83,145,299,307],sv:[57,104],svar:16,svd:[46,48,49,50,51],svd_dimens:[48,49,50,51],svg:[161,335],svm:[30,38],svmclassifi:40,svn_revis:111,sw:146,swadesh:[57,83,349],swadeshcorpusread:[57,104],swallow:[160,215,218,220,319],swap:[154,327],swbp:179,sweden:351,swedish:[57,104,206],swedishstemm:206,swift:340,switchboard:[57,349],switchboardcorpusread:[57,96],switchboardturn:96,swn:93,sx206:98,sx:15,sy:[161,341,343],sydnei:351,syllab:299,syllabif:[299,307],syllabifi:[299,307],syllabl:[299,307],syllable_list:[299,307],syllabletoken:307,symbol:[7,28,57,58,60,62,64,65,67,77,80,81,84,85,88,89,91,97,100,104,105,106,117,119,132,159,164,170,175,177,179,181,183,188,189,214,301,311,312,341,343],symbol_regex:301,symbolsheet:117,symbolwidget:117,symmetr:[179,320,321],symmetrisat:320,synchron:142,synonym:[57,76,104,105,328],syns:[57,105],synset1:[57,105],synset2:[57,105],synset:[57,93,105,343,344,349],synset_from_pos_and_offset:[57,105],synset_from_sense_kei:[57,105],syntact:[2,26,57,59,105,107,119,156,159,160,191],syntactic_mark:105,syntax:[2,57,89,103,157,191,211,294,334],syntaxcorpusread:[57,59,61,70,75,94],syntre:191,system:[34,39,109,111,117,129,179,214,215,228,317,320,321,330,332,343,344,346,349],systemat:317,szlig:189,t2:141,t3:141,t61:[57,101],t:[29,33,57,66,71,89,90,102,107,109,111,116,119,125,129,131,132,141,142,143,144,145,148,158,161,169,172,175,176,179,183,188,194,195,200,202,209,212,214,224,229,276,295,298,306,307,310,311,312,313,317,318,321,322,323,324,325,326,330,331,332,334,335,347],t_i:221,t_t:221,ta2:202,ta:[57,104,298,312],tab:[10,57,102,105,111,161,303,306,311,349],tab_fil:[57,105],tabbi:[160,172],tabl:[24,33,36,57,68,98,105,111,112,118,143,145,173,175,179,219,310,322,323,324,325,326,327,333],tableau:121,tableau_test:128,tableauprov:128,tableauprovercommand:128,tablet:[325,326,327],tabtoken:[57,70,306],tabul:[176,332,349],tac:334,tacilp4i:202,tacohn:46,tadm:[30,33],tadmeventmaxentfeatureencod:[33,41],tadmmaxentclassifi:33,taft:257,tag1:[57,97],tag2:[57,97],tag3:[57,97],tag:[0,26,27,28,29,39,52,57,59,60,61,63,64,65,68,69,70,71,73,74,75,77,78,80,82,83,84,87,89,91,92,96,98,103,105,106,107,117,119,129,130,158,160,161,166,167,169,172,174,176,189,208,223,224,225,226,227,228,273,334,336,347,349],tag_1:214,tag_:[57,61],tag_mapping_funct:[73,97],tag_n:[77,214],tag_on:219,tag_pattern2re_pattern:[25,28],tag_pattern:28,tag_sent:[39,160,210,213,215,218,220],tagdata:221,tagged_:[57,59,60,61,63,64,68,69,70,71,74,80,82,83,91,96,103,105,106,107],tagged_chunk:[57,91],tagged_data:224,tagged_discours:[57,96],tagged_para:[55,57,61,63,65,74,77,84,97,107],tagged_pars:[158,172],tagged_parse_s:172,tagged_post:[57,80],tagged_s:[55,57,59,60,61,63,64,65,68,70,73,74,77,84,91,97,107,209,212,218,219],tagged_sent:[25,222],tagged_sequ:211,tagged_span:68,tagged_tok:209,tagged_token:[25,28,222,228],tagged_treebank_para_block_read:[57,102],tagged_turn:[57,96],tagged_word:[55,57,59,60,61,63,64,65,68,70,73,74,75,77,78,80,84,91,96,97,107,176],taggedcorpusread:[57,77,97,107],taggedcorpusview:[63,97],taggedsent_to_conl:174,taggedsents_to_conl:174,taggedtyp:28,tagger1:212,tagger2:212,tagger:[27,87,111,158,160,166,172,209,210,211,212,213,214,215,217,218,219,220,221,224,227,228,248,346,349],taggeri:[39,160,210,211,213,214,215,217,219,220,221],taggertrain:211,taghva:201,tagpattern:120,tagrul:[211,227],tags_to_ignor:60,tagset:[57,59,60,61,63,64,65,68,69,70,71,73,74,77,80,82,83,84,91,94,96,97,103,105,106,107,209,216,349],tagset_map:216,tagspec:[77,78,106],tagstr2tre:[29,57,65],tagtyp:160,tajik:311,take:[13,14,15,20,30,33,38,42,50,51,52,55,57,59,68,71,75,77,78,90,98,102,105,106,110,117,119,123,129,132,136,141,143,145,158,160,166,167,172,176,183,188,189,193,209,213,214,219,221,224,228,300,303,313,317,318,326,341,343],takefocu:115,taken:[57,71,72,89,90,122,152,159,164,168,176,180,202,203,211,325,335,343,349],taku:321,talbot:320,talk:[24,57,71,349],talkbank:[57,64,349],taln:148,tamil:[57,101,104],tampa:145,tan:351,tanya:145,target:[16,30,33,57,71,82,111,114,122,153,216,226,293,302,316,319,320,321,322,323,324,325,326,327,331,333],target_block:319,target_s:319,target_sentence_length:326,target_sents_len:319,target_tagset:[29,65],target_word_class:[325,326],tartaru:[203,206,274],task:[25,30,46,52,90,117,141,148,156,193,211,214,297,332],tau:[189,332],taught:167,tautolog:127,tax:90,taxonomi:[57,105],taylor:340,tb:306,tbl:[0,211,212],tcp:10,tcud1:202,tcvn:[57,101],tea:66,teach:[193,214,347,349],team:36,tear:176,teardown:254,teardownclass:254,technic:[60,336],techniqu:[102,214,317],technolog:[86,200],ted:[92,105],tediou:[57,62,132],teen:[20,160],teenag:[57,62],tei:[57,77],teicorpusview:[57,84],telecommun:198,telephon:349,televis:[57,62],tell:[34,102,109,129,132,293,344],telugu:[73,349],temp:[57,71,173],temperatur:[57,71],tempfil:173,templat:[211,212,223,224,226,227],template_stat:224,templateid:227,temporari:[25,102,173,224],temporarili:[117,123],temporibu:107,ten:[299,307],tend:325,tendenc:299,tens:[87,196,199,212,216,219],tenwi:142,tere:200,term:[33,35,53,57,94,104,110,127,180,181,184,188,214,224,293,336,341],termin:[14,33,113,119,159,164,165,166,173,189,304,334,335,336,340,341],terminolog:[141,325,326],tern:200,test:[0,25,28,39,63,72,75,86,98,117,119,123,126,129,132,143,144,145,147,173,174,191,193,194,202,211,212,213,214,218,219,221,224,300,302,311,317,329,346,348,349],test_advanc:259,test_advanced2:259,test_affix_tagg:264,test_alin:241,test_aline_delta:247,test_antonym:280,test_arab:274,test_bad_oper:276,test_bas:275,test_best_model2_align:289,test_best_model2_alignment_does_not_change_pegged_align:289,test_best_model2_alignment_handles_empty_src_sent:289,test_best_model2_alignment_handles_empty_trg_sent:289,test_best_model2_alignment_handles_fertile_word:289,test_best_returns_none_when_stack_is_empti:292,test_best_returns_the_best_hypothesi:292,test_bigram2:253,test_bigram3:253,test_bigram5:253,test_bleu:281,test_brevity_penalti:282,test_bril:241,test_brill_demo:248,test_brill_tagg:264,test_build_model:124,test_candidate_type_check:290,test_case_where_n_is_bigger_than_hypothesis_length:282,test_cfd_mut:241,test_cfg2chomski:241,test_chunk:241,test_classifi:241,test_clausifi:127,test_colloc:241,test_com:276,test_complex:250,test_compute_future_cost:292,test_compute_future_costs_for_phrases_not_in_phrase_t:292,test_concord:241,test_concordance_lin:254,test_concordance_list:254,test_concordance_print:254,test_concordance_width:254,test_config:126,test_convert_to_prover9:126,test_corenlp:241,test_corpora:241,test_corpus_bleu:282,test_corpus_bleu_with_bad_sent:282,test_corpus_read:268,test_corpus_view:241,test_correct_length:257,test_correct_valu:257,test_count:242,test_counts_set_correctli:246,test_creation_with_count:246,test_cutoff_setter_checks_valu:246,test_cutoff_value_set_correctli:246,test_data:241,test_default_tagg:264,test_derivationally_related_form:280,test_disagr:241,test_dist:241,test_distortion_scor:292,test_distortion_score_of_first_expans:292,test_domain:280,test_draw:181,test_easi:259,test_easy2:259,test_empty_hypothesi:282,test_empty_refer:282,test_empty_references_and_hypothesi:282,test_eq:246,test_exampl:276,test_find_all_src_phras:292,test_framefiles_fileid:267,test_freqdist:241,test_from_eflomal_output:283,test_full_match:282,test_future_scor:292,test_gdfa:281,test_german:274,test_hillclimb:289,test_hmm:241,test_hyperhyponym:280,test_ibm1:281,test_ibm2:281,test_ibm3:281,test_ibm4:281,test_ibm5:281,test_ibm_model:281,test_in_topic_domain:280,test_inst:267,test_iterable_type_for_all_lemma_nam:280,test_iterating_returns_an_iterator_ordered_by_frequ:261,test_json2csv_corpu:241,test_json_seri:241,test_labeled_nod:276,test_lch:280,test_len_is_const:246,test_lookup:246,test_lookup_empty_iter:246,test_lookup_empty_str:246,test_lookup_int:246,test_lookup_iter:246,test_lookup_non:246,test_lookup_recurs:246,test_lowercase_opt:274,test_lr_bigram:265,test_lr_quadgram:265,test_lr_trigram:265,test_make_relation_set:124,test_membership_check_respects_cutoff:246,test_meronyms_holonym:280,test_meteor:281,test_metr:241,test_misc_rel:280,test_model:242,test_model_found:124,test_modified_precis:282,test_multiple_conj:276,test_naivebay:241,test_neighboring_finds_neighbor_align:289,test_neighboring_returns_neighbors_with_pegged_align:289,test_neighboring_sets_neighbor_alignment_info:289,test_ngram_tagg:264,test_nist:281,test_no_zero_div:270,test_node_encod:276,test_node_nocas:276,test_node_noleav:276,test_node_print:276,test_node_quot:276,test_node_regex:276,test_node_regex_2:276,test_node_simpl:276,test_node_tree_posit:276,test_nombank:241,test_numb:267,test_oed_bug:274,test_omw_lemma_no_trailing_underscor:280,test_padded_everygram_pipelin:245,test_partial_matches_hypothesis_longer_than_refer:282,test_pdist:176,test_perceptron_tagg:264,test_pl196x:241,test_pos_tag:241,test_pos_tag_eng:269,test_pos_tag_eng_univers:269,test_pos_tag_ru:269,test_pos_tag_rus_univers:269,test_pos_tag_unknown_lang:269,test_pos_templ:248,test_preprocess:242,test_prob_t_a_given_:[284,285,286,287,288],test_prov:126,test_prun:288,test_push_bumps_off_worst_hypothesis_when_stack_is_ful:292,test_push_does_not_add_hypothesis_that_falls_below_beam_threshold:292,test_push_removes_hypotheses_that_fall_below_beam_threshold:292,test_reference_or_hypothesis_shorter_than_fourgram:282,test_reference_type_check:290,test_regexp_tagg:264,test_rel_preced:276,test_rel_sister_nod:276,test_relation_list:[57,64],test_retrieve_synset:280,test_rib:241,test_ribes_empty_word:270,test_ribes_one_word:270,test_ribes_two_word:270,test_rte_classifi:241,test_russian:274,test_sampl:289,test_seekable_unicode_stream_read:241,test_senna:[39,218,241],test_senna_chunk_tagg:273,test_senna_ner_tagg:273,test_senna_pipelin:273,test_senna_tagg:273,test_sent:[219,225,322,323,324,325,326],test_sentence_nist:291,test_sequ:214,test_set:193,test_set_uniform_alignment_prob:285,test_set_uniform_alignment_probabilities_of_non_domain_valu:285,test_set_uniform_distortion_prob:286,test_set_uniform_distortion_probabilities_of_max_displac:287,test_set_uniform_distortion_probabilities_of_non_domain_valu:[286,287],test_set_uniform_translation_prob:284,test_set_uniform_translation_probabilities_of_non_domain_valu:284,test_set_uniform_vacancy_probabilities_of_max_displac:288,test_set_uniform_vacancy_probabilities_of_non_domain_valu:288,test_short_strings_bug:274,test_simpl:[250,266],test_spanish:274,test_stack_decod:281,test_stat:[211,212],test_stem:241,test_str:246,test_suit:230,test_tag:241,test_tag_pattern2re_pattern_quantifi:251,test_tgrep:241,test_token:241,test_tokenize_encod:276,test_tokenize_exampl:276,test_tokenize_link_typ:276,test_tokenize_macro:276,test_tokenize_node_label:276,test_tokenize_nodenam:276,test_tokenize_quot:276,test_tokenize_segmented_pattern:276,test_tokenize_simpl:276,test_total_translated_word:292,test_trailing_semicolon:276,test_transform_output:124,test_translated_posit:292,test_translation_so_far:292,test_translation_so_far_for_empty_hypothesi:292,test_truthi:246,test_twitter_auth:241,test_unable_to_change_cutoff:246,test_unspecified_lang:269,test_untranslated_span:292,test_untranslated_spans_for_empty_hypothesi:292,test_update_empty_vocab:246,test_use_macro:276,test_util:241,test_valid_phras:292,test_vocab_iter_respects_cutoff:246,test_vocab_len_respects_cutoff:246,test_vocabulari:242,test_vocabularies_are_initi:289,test_vocabularies_are_initialized_even_with_empty_corpora:289,test_vocabulary_martin_mod:274,test_vocabulary_nltk_mod:274,test_vocabulary_original_mod:274,test_wordnet:241,test_wordnet_similar:280,test_zero_match:282,testbleu:282,testbleufringecas:282,testbleuvsmteval13a:282,testbleuwithbadsent:282,testbril:248,testcas:[245,246,248,250,251,254,257,259,264,265,266,267,268,269,273,274,276,280,282,283,284,285,286,287,288,289,290,291,292],testchunkrul:251,testconcord:254,testcorpusview:[257,268],testdisagr:259,testgdfa:283,testgrammar:174,testhigherordertableauprov:128,testhypothesi:292,testibmmodel1:284,testibmmodel2:285,testibmmodel3:286,testibmmodel4:287,testibmmodel5:288,testibmmodel:289,testing_data:212,testjsonseri:264,testlikelihoodratio:265,testmeteor:290,testnist:291,testpostag:269,testpreprocess:245,testresolutionprov:127,tests_rel_domin:276,tests_rel_indexed_children:276,testsennapipelin:273,testsennatagg:273,testsequencefunct:276,teststack:292,teststackdecod:292,testtableauprov:128,teubner:[299,307],text1:293,text2:293,text3:293,text4:293,text5:293,text9:293,text:[0,1,7,8,11,14,25,26,28,29,31,33,37,38,42,53,57,59,60,64,67,71,72,74,77,78,80,82,84,90,98,100,105,106,107,109,111,113,114,116,117,119,132,133,134,135,136,148,156,159,160,162,164,168,170,171,174,175,176,194,195,200,201,205,206,210,211,214,220,221,226,251,254,295,297,298,299,300,301,302,303,305,306,307,310,311,312,313,314,317,319,335,339,340,341,342,343,347,349],text_bigram:[132,134],text_contains_sentbreak:302,text_ngram:133,text_profil:42,text_se:[132,133,293],text_unigram:[132,134],text_word:299,textbook:[183,349],textbox_opt:117,textcat:30,textcollect:293,textfil:[27,57,104],textid:[57,84],textiowrapp:111,texttil:295,texttilingtoken:310,textual:[57,71,90,349],textwidget:[116,117],textwrap:71,tf:[38,293],tf_idf:293,tfidf:38,tfidftransform:38,tgrep2:[276,294],tgrep:[0,276,349],tgrep_compil:294,tgrep_nod:294,tgrep_posit:294,tgrep_str:294,tgrep_token:294,tgrepexcept:294,th:[33,52,66,115,227,228,293,316,318,334],than:[24,25,28,29,32,33,35,37,46,49,52,53,57,59,62,67,71,84,102,105,109,115,117,118,132,139,143,149,153,156,159,161,164,171,175,176,179,187,188,194,200,201,209,211,212,213,219,224,227,228,293,302,307,309,314,317,324,326,327,334,335,336],thank:[160,295,303,306,308,312,345,347,349],theatr:218,thee:66,thei:[13,15,28,30,33,34,35,36,43,46,52,53,55,57,65,71,103,104,105,108,109,115,117,118,119,127,129,132,141,145,154,164,167,176,188,214,215,226,227,228,293,299,306,307,312,314,316,321,334,336],them:[25,28,30,33,35,51,52,53,102,106,109,117,118,119,126,129,132,139,141,154,159,160,164,167,168,171,176,180,185,189,219,293,295,303,306,308,310,312,313,316,317,326,328,331,334,336,338,339,340,346],themat:[57,71,103],theme:334,themrol:[57,103],themselv:[57,77,78,79,87,106,303],theo:299,theorem:[121,122,126,127,128,181,188,349],theoremtoolcommand:122,theoremtoolcommanddecor:122,theoremtoolthread:122,theoret:[177,183,198,199,299,325],theori:[57,71,178,299,307],there4:189,therefor:[25,28,33,57,68,302,317,320,321],thesauru:[57,71,76],thesaurus:[57,76],theta:[66,189],thetasym:189,thi:[2,8,10,13,14,20,24,25,26,28,29,31,32,33,34,35,36,37,38,39,43,46,48,49,50,51,52,53,55,57,58,59,60,61,62,63,64,65,67,68,69,70,71,72,73,74,75,77,78,79,80,81,82,83,84,85,87,89,90,91,92,95,96,97,98,100,101,102,103,104,105,106,107,108,109,110,111,115,116,117,118,119,122,123,125,126,127,129,130,131,132,133,134,135,137,139,141,142,143,144,145,151,154,156,157,158,159,160,161,162,164,166,167,168,169,170,171,172,173,174,175,176,177,178,179,181,182,183,185,187,188,189,190,193,194,196,197,198,199,200,201,202,203,206,209,210,211,213,214,215,217,218,219,220,221,222,224,226,227,228,230,251,274,290,293,294,295,297,299,300,301,302,303,304,305,306,307,310,311,312,313,314,317,318,319,320,321,322,323,324,325,326,327,328,331,332,333,334,335,336,337,338,339,340,341,343,344,346,348,349],thick:293,thing:[127,132,293,294,311,343],think:321,thinsp:189,third:[52,179,212,226,341,349],third_person:87,thirteen:[57,62],this_corpu:108,this_neg:194,thoma:349,thorn:189,thoroughli:195,thorsten:221,those:[28,42,57,61,67,71,72,105,108,109,118,119,123,125,129,158,159,167,176,196,202,203,226,293,299,302,316,333,334],though:[57,71,132,139,141,145,169,211,332],thought:[188,293,336],thread:[122,123,293],thread_id:123,threadgroup:122,three:[2,7,28,53,57,66,68,71,79,87,93,98,117,141,143,145,156,159,167,201,203,214,228,294,300,304,317,322,325,326,333,334,340],threshold:[48,142,189,213,299,302,333],thrice:200,through:[7,10,24,28,57,71,109,118,143,148,154,159,160,167,169,170,171,180,183,214,293,326,343,347,348],thrown:37,thu:[24,28,46,57,71,84,102,118,132,143,179,203,211,311,312,322,325,326],thuent:213,thursdai:[347,349],ti:[212,307],tiago:349,tibor:302,tid:123,tigersearch:349,tighten:228,tigrinya_tigrigna:[57,101],tild:189,time:[7,14,25,28,29,33,39,52,57,63,68,71,72,97,101,102,108,111,117,129,131,132,144,147,152,155,159,160,162,164,168,170,171,176,179,189,193,194,211,214,219,224,293,302,317,322,323,324,325,326,333,334,338,340,341,343,349],timedelta:338,timeout:[126,160,340,341],timer:194,timestamp:341,timestamped_fil:341,timezon:338,timi:98,timit:[57,97,349],timitcorpusread:[57,98],timitd:[57,98],timittaggedcorpusread:[57,97],tion:307,titl:[60,72,89,107,114,117,176],tj:144,tk:[117,343],tkinter:[111,115,116,117,182,343,349],tknzr:297,tl:219,tmp:[79,87],tna3:202,tne3:202,tnem4:202,tnt:[209,349],to_chunkstruct:28,to_cnf:190,to_conl:[160,161],to_depgraph:186,to_dot:161,to_fol:123,to_glueformula_list:[184,186],to_lowercas:[203,274],to_screen:341,to_settings_str:314,to_sfm_str:314,to_uid:[57,82],todai:217,todd:317,todo:[57,68,141,144,160,310],togeth:[13,57,71,89,102,136,158,293,300],toggl:[8,115],toggle_collaps:116,toi:[66,109,346,349],tok:[43,178,181,187,188,193,219,302,311,312],tok_cl:302,token:[0,7,8,14,16,25,26,27,28,29,30,31,32,33,34,39,41,43,44,47,51,52,57,58,60,62,63,64,65,67,70,73,74,84,85,88,89,91,92,97,98,100,101,102,104,111,119,129,132,139,157,158,159,160,162,164,165,166,167,168,169,170,171,172,175,176,181,187,188,193,194,197,198,199,201,204,205,206,209,210,211,212,213,214,215,217,219,220,221,222,226,227,228,257,276,293,294,318,319,320,321,327,328,330,331,332,341,342,343,346,347,349,351],token_cl:302,token_typ:341,tokenis:340,tokenize_s:[296,304],tokenized_source_text:299,tokenizeri:[160,296,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312],tokensearch:293,tokensequ:310,tokentablefield:310,tokenwrap:343,toknum:102,toktok:295,toktok_regex:311,toktoktoken:[311,349],toktyp:37,tokyo:301,tom:349,tomcavag:349,tongu:20,tonight:300,tonya:145,too:[57,71,88,102,297,324,325,326,332],took:[57,71,219,349],tool:[1,2,3,7,8,53,111,113,117,122,149,161,193,195,347,349],toolbox:[0,57,349],toolboxcorpusread:[57,99],toolboxdata:314,toolboxset:314,toolkit:[0,57,94,104,117,304,345,349],top:[2,8,28,33,60,78,91,106,115,116,117,131,159,161,162,164,170,188,194,294,334,335,336,346,349],top_n:193,top_relation_label:[161,166],topdownchartpars:159,topdowninitrul:[159,164],topdownpredictrul:159,topic:[30,36,105,310,347,349],topic_domain:105,topmost:105,topspac:181,toronto:[57,107,142],torsten:349,tot:[57,89],total:[57,71,98,102,111,132,134,143,159,162,164,176,194,195,321,322,323,349],total_count:310,totalannot:[57,71],toward:[29,144,211],tower:[57,71],tp:[334,340],tpec2iv:202,tpeders:92,tpir2b:202,tpl:212,tpmus2:202,tpro2b:202,tr:[176,179],trace:[14,28,33,48,49,50,51,79,87,118,159,162,164,168,170,171,174,175,180,183,189,191,212,224,319,334],trace_chart_width:[159,162,164],traceback:[226,227,228,305,319],track:[52,118,167,169,326,333,340,341],tracktoscreen_demo:340,tractabl:333,trade:189,tradeoff:336,trail:[57,60,64,181,219,314],train:[27,32,33,34,35,36,38,41,44,52,86,98,133,135,153,166,167,169,173,176,189,193,194,209,211,212,213,214,215,217,219,220,221,224,228,284,285,286,287,288,302,317,322,323,324,325,326,327,346,349],train_cl:302,train_data:213,train_deppars:184,train_from_fil:166,train_maxent_classifi:33,train_maxent_classifier_with_gi:33,train_maxent_classifier_with_ii:33,train_maxent_classifier_with_megam:33,train_sent:[212,225],train_stat:[211,212],train_supervis:214,train_text:302,train_tok:[33,34,41,43],train_token:302,train_unsupervis:214,trainer:[43,193,194,212,214,226,228],training_data:212,training_opt:213,training_set:193,training_stat:211,transcript:[57,66,98,349],transcription_dict:[57,98],transfer:[173,176],transform:[25,28,57,77,78,79,87,106,108,124,145,148,160,198,199,211,212,214,223,227,228,313,334,336],transit:[105,119,173,179,213,214,343,349],transition_pars:173,transitionpars:156,transitive_closur:343,translat:[0,22,24,33,57,66,82,83,104,125,151,189,241,349],translation_opt:333,translation_q:[57,82],translation_t:[322,323,324,325,326],translations_for:316,transpos:33,transposit:145,travers:[57,71,105,167,211,319,335,336,343],travi:349,treat:[32,52,57,71,115,122,132,134,141,159,160,172,188,293,303,305,312],tree2conllstr:29,tree2conlltag:29,tree2semi_rel:189,tree:[0,2,7,8,14,25,26,27,28,29,32,38,46,49,51,55,57,61,65,68,75,79,87,91,94,105,106,107,112,117,119,129,156,157,158,159,160,161,162,164,165,166,167,168,169,170,171,172,173,174,175,176,180,185,189,191,276,314,335,336,343,347,349],tree_:116,tree_class:[57,68,159,164],tree_str:161,tree_to_treeseg:116,treebank:[57,61,79,87,94,111,161,203,209,212,216,295,334,336,347,349],treebank_data:161,treebanktoken:312,treebankworddetoken:[298,312],treebankwordtoken:[295,298,312,313],treeedg:[14,159,164,168],treepo:[79,87,334],treeposit:[294,334],treeposition_spanning_leav:334,treepositions_no_leav:294,treeprettyprint:[0,334],treeseg:116,treesegmentwidget:116,treetoken:119,treetransform:0,treeview:116,treewidget:116,tremend:195,trenkl:42,tresoldi:349,trg_class:[325,326],trg_phrase:316,trg_sentenc:[322,323,327],trglen:[320,331],trgtext:[320,331],tri:[109,122,141,168,170,219,221,227,304,341,343],trial:[50,57,71,213],triangular:116,trie:[52,300],trigger:[57,71,105,117,202,227,343],trigram:[42,53,132,143,176,293,317,343],trigram_fd:53,trigram_measur:53,trigram_score_fn:143,trigramassocmeasur:143,trigramcollocationfind:53,trigramtagg:219,tripl:[141,160,161,172,189,191,304],trippl:170,trn:98,troop:[317,318,321,328,330],troubl:24,trough:307,trq:[57,82],truncat:[52,105,144,146,343],truth:[24,152,177,183,191,217,321],truthi:[57,71],truthvaluetyp:188,try_port:160,ts_occur:310,tsai:94,tsi3:202,tsis0:202,tst:98,tsu:22,tsukada:332,tsutomu:332,tt1:202,tt:[57,82,212,310],tulo2v:202,tune:[142,176],tupl:[14,16,23,26,29,30,33,34,35,41,43,44,52,55,57,59,60,63,64,65,66,68,69,70,71,74,75,76,77,79,80,82,83,86,87,89,91,95,96,97,98,102,103,104,105,106,107,110,115,117,118,122,123,125,127,129,132,133,134,135,139,141,142,143,145,147,149,153,155,158,159,160,163,164,166,168,170,172,174,176,177,179,183,188,189,191,193,194,200,209,210,211,212,213,214,217,218,219,220,221,222,225,227,228,293,294,295,296,297,300,302,303,304,306,307,313,314,316,318,319,320,327,328,331,332,333,334,335,338,340,341,343],tuple2str:222,ture:176,turkish:[57,101],turkish_turkc:[57,101],turn:[25,28,57,65,68,74,77,79,85,87,96,97,132,133,135,136,203,346],turn_on:[57,79,87],tussl:[57,104],tutori:[214,294,336,349],tw:94,twd:312,tweet:[57,100,194,297,337,338,339,340,341,342,349],tweethandl:338,tweethandleri:[338,341],tweeti:125,tweetid:339,tweets_by_user_demo:340,tweets_fil:339,tweettoken:[57,100,194,297,349],tweetview:341,tweetwrit:341,twenti:67,twitter:[0,57,297,349],twitter_demo:337,twitterclass_demo:340,twittercli:[337,339,340,342],twittercorpusread:[57,100,340],twizt:293,two:[8,13,25,28,33,35,36,50,51,52,53,57,66,71,74,76,79,87,96,102,105,109,111,117,118,119,127,129,132,136,139,141,142,145,147,148,149,156,159,160,169,171,174,175,176,177,188,189,194,200,201,219,226,228,247,253,293,295,300,302,303,305,306,308,312,313,316,317,319,321,334,336,339,349],twython:[339,341],twythonstream:341,txt:[57,58,67,82,85,87,88,89,97,98,101,107,109,141,195,257,274,293,342],ty:149,typ:[71,302],type:[13,14,15,18,23,25,26,28,29,31,32,33,35,38,43,44,49,51,55,57,58,59,60,62,64,65,67,68,70,71,73,74,77,80,81,82,84,85,88,89,90,91,92,97,98,100,102,103,105,106,109,116,117,118,119,122,123,129,130,132,133,135,139,141,142,146,147,148,152,155,156,157,158,159,160,161,162,164,166,167,168,169,170,171,172,173,174,175,176,178,179,181,183,184,187,188,189,191,193,200,201,206,208,209,210,211,212,213,214,217,218,219,220,221,224,226,227,228,251,276,293,294,296,297,298,299,300,301,302,303,304,305,306,307,309,310,311,312,313,314,316,317,318,319,320,321,322,323,324,325,326,327,328,329,331,332,333,334,339,341,343,346,348,349],type_check:[178,181,187,188],type_no_period:302,type_no_sentperiod:302,type_str:188,type_thresh:302,typecheck:[181,188],typedmaxentfeatureencod:33,typeerror:[130,132,134,139,226,228,334],typeexcept:188,typeresolutionexcept:188,typic:[26,28,29,30,31,33,34,52,57,58,71,97,105,109,117,118,119,129,156,157,158,159,160,167,171,176,199,214,219,226,293,305,316,327,334],typo:216,tyre:107,tzinfo:338,u1:183,u2002:189,u2003:189,u2009:189,u200:189,u200c:189,u200d:189,u200f:189,u2028:301,u2:183,u3:183,u4:183,u:[51,57,89,104,172,212,293,301,304,309,311,313,348],ua:163,uacut:189,ualberta:142,uarr:189,uber:195,ubuntu:349,uc:299,ucirc:189,ucsd:137,udhr:[57,349],udhrcorpusread:[57,101],ugh:180,ugo1:202,ugrav:189,uh:[66,195],uhuh:195,ui:[57,82,146],uic:[67,81,88,89],uk:[57,60,107,123,343],ulrich:299,um:202,uma:205,uml:189,umn:92,umsd:343,un:[25,28,57,71,218,305],un_chomsky_normal_form:[334,336],unabl:219,unann:[57,71],unannot:46,unari:[13,119,179,183,334,336],unary_concept:179,unarychar:[334,336],unattest:33,unbeliev:195,unbind_click:117,unbind_drag:117,unbound:118,unchang:[183,200,206,208,326,327],unchunk:[25,29],unchunkrul:[25,28],unclear:60,uncompress:33,unconstrain:158,uncurri:188,undefin:[50,52,102,176,183,276],under:[33,57,59,64,71,94,105,109,110,118,119,131,159,161,176,179,198,199,206,227,297,317,318,319,321,326,327,328,330,332],underflow:[132,176],undergo:349,undergon:349,underli:[43,52,60,91,102,109,188],underscor:[57,71,160],underspecifi:[185,318],understand:[24,132,203,302],understem:146,understood:169,understudi:317,undesir:321,undirect:15,undirectedbinarycombin:15,undirectedcomposit:15,undirectedfunctionappl:15,undirectedsubstitut:15,undirectedtyperais:15,undo:[8,170,171,312],undon:[170,171,312],unemploy:209,unescap:313,unexpand:159,unexpect:188,unexpectedtokenexcept:188,unfortun:[57,61,129],unfre:[105,343],ungrammat:[167,174],uni:[117,137,221],unicharscorpusread:[57,104],unicod:[25,57,58,59,60,62,63,64,65,67,68,69,70,71,73,74,77,78,80,81,82,83,84,85,88,89,91,92,96,97,101,102,103,104,105,106,107,109,189,198,199,201,206,215,295,303,314,334,335,349],unicode_field:[57,99,314],unicodeerror:343,unicodelin:335,unif:[118,127,164,349],unifi:[13,118,127,158,164,178,187],unificationexcept:[127,187],unificationfailur:118,uniform:[57,82,159,176,317,322,323,324,325,326,327,335],uniformli:[317,324,325,326],uniformprobdist:176,unify_base_valu:118,unigram:[53,132,133,134,135,143,194,209,219,224,317,318,332],unigram_scor:[133,137],unigram_tagg:219,unigram_word_feat:193,unigramtagg:[209,212,219],unimelb:46,unimpl:172,unind:[57,61],unindented_paren:[57,61],unintellig:214,union:[117,125,320],uniqu:[13,30,55,57,66,90,96,98,105,111,122,125,164,176,187,188,334],unique_ancestor:294,unique_index:184,unique_list:343,unique_names_demo:125,unique_vari:188,uniquenamesprov:125,uniquepair:[57,104],unit:[39,46,57,71,91,100,174,214,218,229,293],unitari:119,uniti:[132,135],unittest:[245,246,248,250,251,254,257,259,264,265,266,267,268,269,273,274,276,280,282,283,284,285,286,287,288,289,290,291,292],univ_scop:[188,190],univers:[57,66,67,77,81,83,89,92,122,142,160,198,201,206,209,213,216,220,299,307,311,322,323,324,325,326,327,333,349],unix:346,unk:[132,139,221],unk_cutoff:[132,139],unk_label:[132,139],unknown:[36,57,59,77,98,111,117,118,132,139,183,219,221,227],unlabel:[33,36,163,214,219],unlabeled_featureset:36,unlabeled_sequ:214,unladen:[160,215,218,220],unless:[60,78,91,102,106,109],unlexic:336,unlik:[26,169,302,326,349],unmark:117,unmasked_scor:[132,133,135],unmatch:[305,328],unmodifi:[57,64,194,201],unnecessari:[122,127],unord:119,unparseableinputexcept:178,unpermut:147,unprocess:[55,57],unquot:118,unrestrict:[15,25,26],unseen:[33,35,132,139,176,221,224],unseen_featur:33,unseg:221,unsortedchartpars:168,unspecifi:[57,82,118,159],unsuccess:343,unsuit:302,unsupervis:[46,214,302],unsur:348,untag:[212,222],until:[7,25,33,46,48,49,50,57,77,102,105,106,108,118,119,131,155,159,168,175,299,302,334],untouch:[57,61],untrain:[212,221],untransl:333,untri:[7,170],untried_expandable_product:170,untried_match:170,unus:[211,224],unusu:195,unweight:[105,211,343],unweighted_minimum_spanning_dict:343,unweighted_minimum_spanning_digraph:343,unweighted_minimum_spanning_tre:[105,343],unwrap:[57,99,129,314],unzip:[111,346],unzipped_s:111,up:[2,7,8,49,52,57,59,60,63,64,68,69,70,71,74,78,79,80,82,83,87,91,96,97,98,103,105,106,107,109,111,115,116,117,129,130,132,135,136,139,156,158,159,168,169,171,175,219,221,254,264,292,293,299,307,317,318,321,326,327,331,333,335,347,349],updat:[25,33,46,48,52,110,111,117,118,123,132,134,139,146,167,175,176,183,213,217,347,349],update_align:323,update_clust:49,update_distort:[324,325],update_edge_scor:167,update_fertil:327,update_lexical_transl:[323,327],update_null_gener:327,update_output:214,update_vac:326,update_wrapp:110,upenn:[57,86,104,179,221,312,334],upenn_tagset:120,upon:[102,152,167,176,188,217,298],upper:[57,97,117,200,302,334,341],upper_data_limit:[338,341],upper_date_limit:[338,341],upperbound:145,uppercas:[66,188,302],upsih:189,upsilon:189,uptodatemessag:111,upward:117,uq:[57,82],uqi3:202,urg:60,url:[57,62,64,71,100,109,111,129,160,174,339,343],url_foe_1:311,url_foe_2:311,url_foe_3:311,url_foe_4:311,urlbas:[57,64],us:[0,2,7,8,10,13,14,18,24,25,26,27,28,29,30,31,32,33,34,35,36,38,39,40,41,42,43,44,46,48,49,50,51,52,53,55,57,58,59,60,61,62,63,64,65,67,68,69,70,71,73,74,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,96,97,98,100,101,102,103,104,105,106,107,108,109,111,115,116,117,118,119,122,123,124,125,126,127,128,129,130,131,133,134,135,136,139,141,142,143,145,146,148,149,155,156,157,158,159,161,162,164,165,166,167,168,169,170,171,172,174,175,176,178,179,180,181,183,185,187,188,189,191,193,194,195,196,198,199,200,201,202,203,204,206,208,209,210,211,212,213,214,215,217,219,220,221,222,223,224,228,251,254,276,293,294,295,296,297,299,300,301,302,303,304,305,306,307,308,309,310,312,314,316,317,318,319,321,322,323,324,325,326,327,330,331,332,333,334,335,336,337,338,339,340,341,343,344,346,347,348,349],usa:[57,64,81,89,201,218,311,351],usabl:143,usag:[10,34,46,52,57,58,71,77,85,97,101,107,110,111,129,178,179,221,224,344],usage_domain:105,use_agenda:[159,164],use_lemmat:37,use_min_depth:105,use_tagdict:217,use_thread:111,used_var:118,used_vari:190,user:[10,18,23,33,57,58,59,78,84,97,102,107,109,111,115,116,117,129,166,168,175,176,177,179,213,218,297,303,318,328,332,339,340,341,342,343,346,347,348,349,351],user_info_from_id:341,user_ment:339,user_tweet:341,userid:[340,341],usernam:[297,343,346],userwarn:105,usr:[39,57,85,101,109,111,185,218,346],usthb:198,usu:148,usual:[24,33,57,64,105,118,119,131,145,166,176,183,188,194,211,214,226,293,294,319,325,328,339],utc:338,utcoffset:338,utf8:[57,59,61,62,63,65,67,68,69,70,75,77,79,85,87,88,89,95,97,98,99,100,101,102,107,129,160,172,191,194,220,295,304,308,314,339,343],utf:[39,57,93,101,109,111,189,218,220,309,311,314,340],util:[0,18,25,30,46,48,49,50,55,57,58,62,63,65,67,70,73,74,75,81,84,85,88,89,92,97,100,105,106,112,113,114,116,123,132,134,136,156,169,177,192,193,196,209,295,299,300,307,337,339],utrecht:351,utter:[57,58,60,64,65,77,85,96,97,98,156],utteranceid:[57,98],utterli:195,uu:[118,161],uuml:189,uw:66,v0:[57,71],v13a:317,v14:301,v1:[141,167],v2:[105,167],v3:[39,167,218],v:[28,33,51,52,57,66,71,77,86,87,93,105,107,118,137,142,148,161,167,171,177,181,183,185,188,208,209,216,224,321,326,334,349],v_head:326,v_non_head:326,vacanc:326,vacancies_at:326,vacant:326,vader:[192,194,349],vader_lexicon:195,vaderconst:195,vagu:24,val:[33,57,93,109,118,129,176,177,183,334],val_dump:179,val_load:179,valenc:[57,71,195],valid:[25,28,57,71,78,115,117,129,145,160,173,176,181,187,188,202,208,224,294,299,300,306,322,323,324,325,326,338,341,343],valid_phras:333,validate_sig_len:125,validate_syl:307,valu:[14,28,29,30,32,33,34,35,38,39,43,44,46,51,52,57,59,60,63,64,66,67,68,69,70,71,72,74,77,78,80,82,83,90,91,96,98,100,103,105,106,107,109,110,111,115,116,117,118,119,126,129,132,139,141,142,143,144,145,146,147,148,153,159,160,164,168,169,174,175,176,177,178,179,180,183,184,188,189,191,195,210,219,221,224,226,227,228,253,293,294,302,306,307,314,317,318,319,321,322,323,324,325,326,333,334,336,339,340,341,343],valuat:[109,122,124,177,179,183],value_handl:118,value_str:90,valueerror:[28,109,117,118,119,129,144,147,176,206,226,227,305,334],values_in_chart:144,van:[178,189,335],vannoord:42,var1:178,var2:178,varex:183,vari:[30,57,68,105,147],variabl:[13,14,16,28,29,33,39,46,55,57,85,100,102,115,117,118,119,125,127,129,141,159,162,164,168,170,171,175,176,178,181,183,184,185,187,188,206,218,219,220,302,309,317,340,346],variable_typ:178,variablebinderexpress:188,variablebindingexcept:187,variableexpress:[183,187,188],varianc:[48,176],variance_charact:319,variant:[36,57,66,105,143,148,183,214],variat:[15,46,50,53,176,189,302,330],varieti:[28,55,57,82,111,117,154,167,176,293,341],variou:[33,36,57,105,111,123,124,167,216,317,323,324,325,326,327,333,349],various_sent:36,varnam:183,vb:[144,160,163,212,215,220,227,347],vbd:[28,57,61,172,173,176,212,219,222,347],vbg:[160,212,219],vbn:28,vbp:[160,172,216],vbz:[39,160,172,174,209,215,216,217,218,220],vc:163,ve:[57,67,105,108,298,312,349],vector:[33,34,40,41,46,47,48,49,50,51,167],vectorspac:46,vectorspacecluster:[48,49,50,51],vee:66,vega:201,vennemann:299,verb:[25,57,68,71,77,86,87,103,105,153,196,198,199,208,209,212,213,216,219],verb_group:105,verb_head:68,verb_stem:68,verb_t1:[198,199],verb_t2:[198,199],verb_t3:[198,199],verb_t4:[198,199],verb_t5:[198,199],verb_t6:[198,199],verbnet:[57,349],verbnetcorpusread:[57,103],verbos:[28,32,33,57,105,109,111,122,123,126,127,128,129,132,134,159,160,162,164,166,168,170,171,172,173,174,175,178,179,183,184,185,193,212,213,214,215,219,220,224,227,302,308,309,338,340,341,342,343],verbsfil:[57,87],vercelli:107,veri:[14,24,25,33,57,102,104,123,132,134,173,195,200,228,317,347],verif:341,verifi:[253,341],verissimo:205,verkhogliad:349,versa:169,version:[0,25,28,29,32,52,57,60,64,74,82,90,92,103,105,107,109,123,129,132,135,159,161,164,176,178,179,181,188,199,203,206,212,222,224,293,295,309,313,332,334,341,343,347,348,349],versu:148,vertebr:[57,105,343],vertex:214,vertic:[116,117,334,335,336],vertmarkov:[334,336],vg2main:[57,101],vg:216,vi2:202,vi:[28,107,216],via:[29,97,105,106,109,118,167,180,189,213,214,215,293,321,341,349],vice:169,video:349,vie0:202,vietnames:[57,101,311],view:[52,57,58,59,60,63,65,68,70,71,73,74,77,78,79,81,84,87,91,92,97,102,106,117,257,326,336,341],vincent:[107,322,323,324,325,326,327],vindicta:107,vinken:163,vinyal:321,viqr:[57,101],virtual:348,virtualenv:348,virtutibu:107,vis3j:202,visibl:[115,117,118,131],visit:[181,188],visit_structur:[181,188],vista:346,visual:[113,117,194,335,336,349],visualis:[57,105],viterbi:[156,214],viterbipars:175,vitii:107,vkulyukin:148,vkweb:148,vl2:334,vmod:163,vn:[57,103],vnclass:[57,103],vnclass_id:[57,103],voc:274,vocab2:[132,139],vocab:[132,133,137,139,293],vocabulari:[132,133,135,136,137,148,246,274,325],vocabulary_introduct:310,vocabulary_text:133,vocal:[57,60,71],voic:87,vol:[176,198],volum:340,voluntari:345,vowel:[66,142,202,206,299,307],vp:[29,39,57,61,86,101,119,160,172,216,218,294,334,335],vs:[153,316,336],vulner:349,w14:317,w16:318,w1:[53,143],w2:[53,143],w3:[53,77,143,179],w4:[53,143],w:[30,36,42,55,57,60,66,77,85,88,89,106,109,111,118,142,183,209,298,301,303,310,317,318,321,330,332],w_i:221,wa:[28,33,36,40,42,57,64,69,71,84,89,90,95,105,109,115,116,117,122,129,132,139,160,167,170,171,176,179,194,198,199,200,201,204,209,214,218,219,293,298,299,301,302,312,314,317,321,322,323,324,325,326,332,334,349],waaaaayyyi:297,waaayyi:297,waaw:[198,199],wai:[8,24,32,105,108,115,117,132,141,145,156,158,169,176,185,200,206,228,276,294,295,299,324,325,332,333,334,343],wait:[132,134,188,341],walk:[118,123,125,160,184,335],wall:[86,160,308],wan:[298,312],wang:[321,349],want:[36,57,59,71,89,100,102,117,119,132,133,134,135,141,158,176,179,193,194,218,219,346],war:[22,160,324,325,326],ward:317,warm:194,warn:[57,71,102,105,122,129,228,327,343,349],warren:179,washington:[81,257,349],wasn:[195,312],wasnt:195,wat:[57,64,332],watch:[57,64,117,137,349],watcher:117,wav:[57,98],waw:201,wd_01:228,wd_0:228,we:[25,28,33,36,57,64,66,68,71,77,89,102,105,106,117,119,122,132,133,134,135,139,141,153,161,176,177,179,180,183,185,193,194,200,209,214,221,230,294,295,300,304,312,313,317,320,321,326,328,336,343,349],weakest:89,web:[10,57,64,81,88,89,109,145,203,229,349],webdoc:142,weblog:195,websit:[57,64,203,311,349],webtext:[57,85,101],webview_fil:[57,64],weed:326,week:[131,349],weekli:72,wei:[317,321],weierp:189,weight:[29,33,34,38,41,57,105,111,118,141,142,146,147,148,158,167,198,211,214,217,317,318,322,323,324,325,326,328],weight_senses_equ:[57,105],weighted_kappa:141,weighted_kappa_pairwis:141,weird:311,weissweil:200,weka:[30,349],wekaclassifi:44,welch:214,welcom:24,well:[57,68,92,105,117,132,179,227,299,302,313,321,336],went:[36,344],were:[28,29,33,57,71,72,74,80,84,90,105,106,117,127,129,155,159,164,167,169,176,200,201,227,274,312,343],weren:195,werent:195,weshalb:316,west:107,western:98,western_lang:301,wfeat1:228,wfeat2:228,wha:298,whad:298,what:[14,15,28,30,33,36,57,71,77,78,95,106,111,115,116,119,129,130,132,134,135,141,146,159,160,164,167,168,170,209,214,215,218,220,302],whatev:168,when:[2,14,25,28,30,32,33,34,36,38,41,43,46,50,52,53,57,68,71,95,98,102,104,105,109,111,114,115,116,117,118,122,123,126,129,132,133,135,139,145,146,147,157,159,162,164,166,167,168,169,170,171,174,175,176,178,183,188,202,212,213,214,215,219,221,226,228,274,294,295,297,301,302,303,305,306,312,317,321,322,323,330,332,333,334,335,336,338,341,343,346],whenc:109,whenev:[33,102,109,117,131,164,224],where:[14,15,25,28,29,32,33,34,38,44,52,57,59,68,71,74,76,82,90,98,102,105,106,108,109,111,115,116,117,118,119,122,123,129,132,145,146,148,153,159,160,162,166,167,168,172,173,174,175,176,177,179,181,184,185,189,193,194,199,214,217,219,221,224,226,227,228,254,293,294,296,297,300,302,303,306,310,311,312,314,317,319,322,324,325,326,327,330,331,334,335,336,338,339,341,342,343,344,349],wherea:141,whether:[2,13,15,25,37,38,43,44,52,91,111,115,116,117,119,122,127,129,145,159,161,170,176,183,188,195,214,224,294,295,297,302,327,335,338,341,343],which:[2,7,8,10,13,14,15,23,25,28,29,30,31,32,33,34,37,41,43,44,46,49,50,51,52,53,55,57,58,59,61,62,63,65,67,68,70,71,73,74,75,76,77,78,79,81,84,85,87,89,90,91,92,97,98,102,103,105,106,108,109,111,115,117,118,119,122,123,125,126,127,129,132,133,135,139,141,143,149,153,156,159,160,161,162,165,167,168,169,170,171,173,174,175,176,177,178,179,181,183,187,188,189,194,199,200,203,206,209,210,211,212,213,214,215,219,221,224,226,227,228,293,294,295,300,301,302,303,306,312,316,317,318,319,321,323,325,326,327,328,330,332,334,335,338,339,341,343,348,349],whichev:[122,169,318],white:[98,115,293,314,320],whitespac:[25,28,29,57,58,68,85,102,117,118,160,161,172,189,194,295,302,303,305,306,312,314,318,334,343],whitespacetoken:[57,58,62,67,88,97,194,295,303,313],who:[87,95,111,160,345],whole:[36,57,71,78,105,314,343],whose:[7,8,28,30,33,35,36,52,57,59,60,61,62,63,64,66,67,68,69,70,71,72,74,77,78,80,81,82,83,85,88,89,91,96,97,103,105,106,107,109,111,115,117,118,119,129,159,164,167,170,171,175,176,179,189,194,206,219,226,227,228,314,317,334,338],wht:98,why:[23,203,302,316],wide:[30,81,88,111,178,304],wider:117,widget:[115,116,117],widgetnam:117,width:[28,32,111,115,116,117,148,159,162,164,175,293,310,335,343],wiki:[145,313,343,348,349],wikipedia:[145,313,343],wild:169,wildcard_fd:53,wilk:[57,78],wilkdom:[57,78],wilkwilczi:[57,78],william:[107,145],win:348,wind:153,window:[2,7,8,18,111,113,116,117,148,189,201,226,293,310,334,340,341,346,347,349],window_len:310,window_s:[53,293],windowdiff:148,winkler:145,winkler_exampl:145,winkler_scor:145,winlen:226,winresearch:[57,101],winter2008:137,wisdom:[24,152],wise:[293,332],wish:[2,102,117,293,336],within:[28,42,53,57,79,87,96,103,105,107,109,117,119,129,145,169,188,189,202,214,253,343],without:[52,57,74,78,91,108,115,118,131,132,139,145,166,176,188,189,195,198,201,202,214,274,302,317,349],witten:[132,135,137,176],wittenbel:137,wittenbellinterpol:[132,135],wittenbellprobdist:176,wlement:106,wmt14:317,wmt15:318,wmt16:318,wmt49:318,wn:[57,105,343],wninput5wn:105,wnl:208,wolf:[160,172,213],wolfgang:321,won:[36,119,195,276,299,349],wonder:[299,347],wont:195,word1:[57,97],word2:[57,97],word3:[57,97],word:[16,24,26,27,29,30,33,36,37,39,43,53,55,57,58,59,60,61,62,63,64,65,66,67,68,70,71,73,74,75,76,77,78,79,80,81,83,84,85,86,87,88,89,91,92,96,97,98,99,100,101,104,105,106,107,111,113,114,118,119,132,133,135,136,137,139,143,145,146,153,155,156,158,159,160,161,166,167,169,172,173,174,176,184,189,193,194,195,196,197,198,199,200,201,202,203,204,205,206,208,209,211,212,214,215,216,217,218,219,221,222,224,226,227,228,274,293,295,299,300,302,303,304,307,309,316,317,320,322,323,324,325,326,327,328,330,331,332,333,334,335,343,344,346,349],word_1:214,word_:[57,61],word_and_tag_pair:158,word_class:326,word_class_:325,word_class_t:[325,326],word_fd:53,word_find:155,word_index_in_sourc:322,word_index_in_target:322,word_indic:178,word_n:214,word_path:77,word_penalti:333,word_po:332,word_r:297,word_rank_align:332,word_similarity_dict:293,word_tim:[57,98],word_token:[57,58,62,67,70,85,88,89,97,100,101,174,176,194,209,295,299,302,307,312,347],worder:332,wordfind:150,wordfreq_app:1,wordlist:[57,81,83,349],wordlistcorpusread:[57,81,83,104,108],wordnet:[1,10,57,91,93,103,196,280,328,343,347,349],wordnet_:[57,105],wordnet_app:1,wordnet_corpus_read:105,wordnetcorpusread:[57,105,328],wordneterror:105,wordneticcorpusread:[57,105],wordnetid:[57,103],wordnetlemmat:208,wordnetsyn_match:328,wordnnetdemo:280,wordnum:[79,87],wordpunct_token:[295,303],wordpuncttoken:[57,85,88,89,194,303],words_by_iso639:[57,83],words_by_lang:[57,83],words_index:316,words_mod:[57,74,78],wordtpl:228,work:[18,25,38,57,67,74,78,100,105,109,110,111,119,126,129,132,133,137,141,176,180,200,221,294,302,316,334,336,343,347,348,349],workaround:[212,224,294],workshop:[94,318,320,332],workspace_markprod:113,world0:179,world1:179,world:[81,88,129,160,179,212,228,317,321,332],worldli:24,worri:102,wors:224,worst:82,would:[34,57,68,71,118,125,132,141,145,153,159,167,188,211,219,227,228,294,317,334,336],wouldn:[132,195],wouldnt:195,wp:[160,215,218,220],wrap:[38,52,71,117,119,129,131,132,139,143,153,176,297,334,335,343],wrap_at:71,wrap_etre:[57,80,103,106],wrapper:[15,38,57,71,76,90,110,119,129,131,193,293,341,347,349],wrd:[57,98],wrdindex_list:310,write:[44,99,102,109,111,115,152,161,194,203,224,293,314,339,340,341,347],write_megam_fil:34,write_tadm_fil:41,writer:194,writestr:109,written:[34,41,152,179,228,312,339,341,342,347,349],wrong:[141,317],wscore:145,wsd:[0,30],wsd_demo:43,wsd_featur:30,wsdm:89,wsj:[209,334,336],wsj_0001:347,wu:[57,105,321],wulfgeat:107,wulfsig:107,wulfstan:107,wup_similar:[57,105],www:[22,42,55,57,60,62,67,72,77,81,83,86,88,89,92,94,104,107,110,117,137,145,166,176,179,200,203,211,251,254,259,282,312,317,318,328,332,334,336,348,349],x00:301,x0:183,x2:118,x7f:301,x:[15,16,25,32,33,57,71,77,78,98,105,107,109,110,115,117,118,123,125,129,132,139,141,143,155,166,169,176,180,181,183,184,185,188,212,216,253,294,298,300,301,310,312,319,334,347],x_t:214,xa0:[189,311],xa:107,xad:189,xe:[57,105],xemac:25,xf:155,xform:28,xi:[176,189],xiaob:321,xiaowen:89,xmax:117,xmin:117,xml:[55,57,60,61,64,71,74,77,78,79,80,84,87,90,91,92,99,103,106,111,129,313,349],xml_escap:313,xml_n:77,xml_post:[57,80],xml_tool:78,xml_unescap:313,xmlcorpusread:[57,60,64,71,78,80,84,90,91,103,106],xmlcorpusview:[60,77,78,91,106],xmldoc:[57,60,64,71,77,78,80,84,90,91,103],xmlinfo:111,xmx1024m:166,xmx512m:129,xpath:77,xreadlin:109,xs:[183,343],xspace:116,xtag:[57,103],xx:334,xxxl:[57,104],xxxx:89,xy:15,y332:183,y:[15,16,33,57,66,71,105,110,117,118,125,148,155,169,176,181,183,188,253,301,310,334],ya:298,yaa:[198,199],yacut:189,yahoomesseng:20,yaml:109,yca3:202,ycn2t:202,ycoe:57,ycoecorpusread:[57,107],ycoehom:107,ycoeparsecorpusread:107,ycoetaggedcorpusread:107,ye:[24,90,217,298,312],yeah:195,year:[57,64,163,217,293],yen:189,yes_no_s:216,yesterdai:[332,340],yet:[7,8,42,102,109,129,168,170,171,176,334,336],yevett:145,yew:317,yf:155,yfi3:202,ygo1:202,yhp1:202,yi:[94,176],yield:[14,33,52,66,149,159,162,164,168,174,176,188,212,214,302,343],yl2:202,ylb1:202,yli3i:202,ylp0:202,ymax:117,ymin:117,ymo1:202,yoav:349,yonghui:321,york:[57,72,98,107,160,295,301,303,306,308,312,313,322,323,324,325,326,327,333],yoshikiyo:321,you:[0,2,7,8,14,23,24,25,28,34,35,39,52,57,60,68,71,74,89,98,100,102,105,111,115,117,118,119,122,130,132,134,158,159,170,171,176,194,195,201,203,206,209,211,219,293,295,297,306,317,334,335,336,340,341,346,348],young:321,younger:293,your:[24,28,35,39,57,100,118,132,141,178,228,317,332,340,341,346,348,349],your_app_kei:342,your_app_secret:342,your_path:309,youtub:[137,349],ypo1:202,yra3:202,yro3:202,yrtsi5:202,yspace:116,yte3:202,yti3:202,ytl2:202,yu:[89,94],yuan:321,yuganskneftegaz:90,yuko:90,yuml:189,yvett:145,yview:115,yview_moveto:115,yview_scrol:115,z0:118,z1:180,z:[8,15,57,66,105,107,118,125,176,183,214,216,301,303],z_:118,za:118,zag:[167,169],zc:24,ze:[57,105],zee:66,zen:18,zen_chat:24,zero:[25,28,31,33,52,57,79,83,87,102,105,116,117,141,159,161,168,176,185,188,221,226,227,228,317,322,323,324,325,327,328,333,334],zero_bas:161,zero_indexed_align:327,zeta:189,zh:[66,309],zhao:94,zhifeng:321,zhu:317,zi2:202,zip:[33,52,57,104,109,111,129,145,195,346,349],zipf:42,zipfil:[109,111],zipfilepathpoint:109,zloti:179,zpar:161,zusammen:[324,325,326],zwaaien:[167,169],zwj:189,zwnj:189,zy1:202},titles:["nltk package","nltk.app package","nltk.app.chartparser_app module","nltk.app.chunkparser_app module","nltk.app.collocations_app module","nltk.app.concordance_app module","nltk.app.nemo_app module","nltk.app.rdparser_app module","nltk.app.srparser_app module","nltk.app.wordfreq_app module","nltk.app.wordnet_app module","nltk.book module","nltk.ccg package","nltk.ccg.api module","nltk.ccg.chart module","nltk.ccg.combinator module","nltk.ccg.lexicon module","nltk.ccg.logic module","nltk.chat package","nltk.chat.eliza module","nltk.chat.iesha module","nltk.chat.rude module","nltk.chat.suntsu module","nltk.chat.util module","nltk.chat.zen module","nltk.chunk package","nltk.chunk.api module","nltk.chunk.named_entity module","nltk.chunk.regexp module","nltk.chunk.util module","nltk.classify package","nltk.classify.api module","nltk.classify.decisiontree module","nltk.classify.maxent module","nltk.classify.megam module","nltk.classify.naivebayes module","nltk.classify.positivenaivebayes module","nltk.classify.rte_classify module","nltk.classify.scikitlearn module","nltk.classify.senna module","nltk.classify.svm module","nltk.classify.tadm module","nltk.classify.textcat module","nltk.classify.util module","nltk.classify.weka module","nltk.cli module","nltk.cluster package","nltk.cluster.api module","nltk.cluster.em module","nltk.cluster.gaac module","nltk.cluster.kmeans module","nltk.cluster.util module","nltk.collections module","nltk.collocations module","nltk.compat module","nltk.corpus package","nltk.corpus.europarl_raw module","nltk.corpus.reader package","nltk.corpus.reader.aligned module","nltk.corpus.reader.api module","nltk.corpus.reader.bnc module","nltk.corpus.reader.bracket_parse module","nltk.corpus.reader.categorized_sents module","nltk.corpus.reader.chasen module","nltk.corpus.reader.childes module","nltk.corpus.reader.chunked module","nltk.corpus.reader.cmudict module","nltk.corpus.reader.comparative_sents module","nltk.corpus.reader.conll module","nltk.corpus.reader.crubadan module","nltk.corpus.reader.dependency module","nltk.corpus.reader.framenet module","nltk.corpus.reader.ieer module","nltk.corpus.reader.indian module","nltk.corpus.reader.ipipan module","nltk.corpus.reader.knbc module","nltk.corpus.reader.lin module","nltk.corpus.reader.mte module","nltk.corpus.reader.nkjp module","nltk.corpus.reader.nombank module","nltk.corpus.reader.nps_chat module","nltk.corpus.reader.opinion_lexicon module","nltk.corpus.reader.panlex_lite module","nltk.corpus.reader.panlex_swadesh module","nltk.corpus.reader.pl196x module","nltk.corpus.reader.plaintext module","nltk.corpus.reader.ppattach module","nltk.corpus.reader.propbank module","nltk.corpus.reader.pros_cons module","nltk.corpus.reader.reviews module","nltk.corpus.reader.rte module","nltk.corpus.reader.semcor module","nltk.corpus.reader.senseval module","nltk.corpus.reader.sentiwordnet module","nltk.corpus.reader.sinica_treebank module","nltk.corpus.reader.string_category module","nltk.corpus.reader.switchboard module","nltk.corpus.reader.tagged module","nltk.corpus.reader.timit module","nltk.corpus.reader.toolbox module","nltk.corpus.reader.twitter module","nltk.corpus.reader.udhr module","nltk.corpus.reader.util module","nltk.corpus.reader.verbnet module","nltk.corpus.reader.wordlist module","nltk.corpus.reader.wordnet module","nltk.corpus.reader.xmldocs module","nltk.corpus.reader.ycoe module","nltk.corpus.util module","nltk.data module","nltk.decorators module","nltk.downloader module","nltk.draw package","nltk.draw.cfg module","nltk.draw.dispersion module","nltk.draw.table module","nltk.draw.tree module","nltk.draw.util module","nltk.featstruct module","nltk.grammar module","nltk.help module","nltk.inference package","nltk.inference.api module","nltk.inference.discourse module","nltk.inference.mace module","nltk.inference.nonmonotonic module","nltk.inference.prover9 module","nltk.inference.resolution module","nltk.inference.tableau module","nltk.internals module","nltk.jsontags module","nltk.lazyimport module","nltk.lm package","nltk.lm.api module","nltk.lm.counter module","nltk.lm.models module","nltk.lm.preprocessing module","nltk.lm.smoothing module","nltk.lm.util module","nltk.lm.vocabulary module","nltk.metrics package","nltk.metrics.agreement module","nltk.metrics.aline module","nltk.metrics.association module","nltk.metrics.confusionmatrix module","nltk.metrics.distance module","nltk.metrics.paice module","nltk.metrics.scores module","nltk.metrics.segmentation module","nltk.metrics.spearman module","nltk.misc package","nltk.misc.babelfish module","nltk.misc.chomsky module","nltk.misc.minimalset module","nltk.misc.sort module","nltk.misc.wordfinder module","nltk.parse package","nltk.parse.api module","nltk.parse.bllip module","nltk.parse.chart module","nltk.parse.corenlp module","nltk.parse.dependencygraph module","nltk.parse.earleychart module","nltk.parse.evaluate module","nltk.parse.featurechart module","nltk.parse.generate module","nltk.parse.malt module","nltk.parse.nonprojectivedependencyparser module","nltk.parse.pchart module","nltk.parse.projectivedependencyparser module","nltk.parse.recursivedescent module","nltk.parse.shiftreduce module","nltk.parse.stanford module","nltk.parse.transitionparser module","nltk.parse.util module","nltk.parse.viterbi module","nltk.probability module","nltk.sem package","nltk.sem.boxer module","nltk.sem.chat80 module","nltk.sem.cooper_storage module","nltk.sem.drt module","nltk.sem.drt_glue_demo module","nltk.sem.evaluate module","nltk.sem.glue module","nltk.sem.hole module","nltk.sem.lfg module","nltk.sem.linearlogic module","nltk.sem.logic module","nltk.sem.relextract module","nltk.sem.skolemize module","nltk.sem.util module","nltk.sentiment package","nltk.sentiment.sentiment_analyzer module","nltk.sentiment.util module","nltk.sentiment.vader module","nltk.stem package","nltk.stem.api module","nltk.stem.arlstem module","nltk.stem.arlstem2 module","nltk.stem.cistem module","nltk.stem.isri module","nltk.stem.lancaster module","nltk.stem.porter module","nltk.stem.regexp module","nltk.stem.rslp module","nltk.stem.snowball module","nltk.stem.util module","nltk.stem.wordnet module","nltk.tag package","nltk.tag.api module","nltk.tag.brill module","nltk.tag.brill_trainer module","nltk.tag.crf module","nltk.tag.hmm module","nltk.tag.hunpos module","nltk.tag.mapping module","nltk.tag.perceptron module","nltk.tag.senna module","nltk.tag.sequential module","nltk.tag.stanford module","nltk.tag.tnt module","nltk.tag.util module","nltk.tbl package","nltk.tbl.demo module","nltk.tbl.erroranalysis module","nltk.tbl.feature module","nltk.tbl.rule module","nltk.tbl.template module","nltk.test package","nltk.test.all module","nltk.test.childes_fixt module","nltk.test.classify_fixt module","nltk.test.conftest module","nltk.test.discourse_fixt module","nltk.test.gensim_fixt module","nltk.test.gluesemantics_malt_fixt module","nltk.test.inference_fixt module","nltk.test.nonmonotonic_fixt module","nltk.test.portuguese_en_fixt module","nltk.test.probability_fixt module","nltk.test.unit package","nltk.test.unit.lm package","nltk.test.unit.lm.test_counter module","nltk.test.unit.lm.test_models module","nltk.test.unit.lm.test_preprocessing module","nltk.test.unit.lm.test_vocabulary module","nltk.test.unit.test_aline module","nltk.test.unit.test_brill module","nltk.test.unit.test_cfd_mutation module","nltk.test.unit.test_cfg2chomsky module","nltk.test.unit.test_chunk module","nltk.test.unit.test_classify module","nltk.test.unit.test_collocations module","nltk.test.unit.test_concordance module","nltk.test.unit.test_corenlp module","nltk.test.unit.test_corpora module","nltk.test.unit.test_corpus_views module","nltk.test.unit.test_data module","nltk.test.unit.test_disagreement module","nltk.test.unit.test_distance module","nltk.test.unit.test_freqdist module","nltk.test.unit.test_hmm module","nltk.test.unit.test_json2csv_corpus module","nltk.test.unit.test_json_serialization module","nltk.test.unit.test_metrics module","nltk.test.unit.test_naivebayes module","nltk.test.unit.test_nombank module","nltk.test.unit.test_pl196x module","nltk.test.unit.test_pos_tag module","nltk.test.unit.test_ribes module","nltk.test.unit.test_rte_classify module","nltk.test.unit.test_seekable_unicode_stream_reader module","nltk.test.unit.test_senna module","nltk.test.unit.test_stem module","nltk.test.unit.test_tag module","nltk.test.unit.test_tgrep module","nltk.test.unit.test_tokenize module","nltk.test.unit.test_twitter_auth module","nltk.test.unit.test_util module","nltk.test.unit.test_wordnet module","nltk.test.unit.translate package","nltk.test.unit.translate.test_bleu module","nltk.test.unit.translate.test_gdfa module","nltk.test.unit.translate.test_ibm1 module","nltk.test.unit.translate.test_ibm2 module","nltk.test.unit.translate.test_ibm3 module","nltk.test.unit.translate.test_ibm4 module","nltk.test.unit.translate.test_ibm5 module","nltk.test.unit.translate.test_ibm_model module","nltk.test.unit.translate.test_meteor module","nltk.test.unit.translate.test_nist module","nltk.test.unit.translate.test_stack_decoder module","nltk.text module","nltk.tgrep module","nltk.tokenize package","nltk.tokenize.api module","nltk.tokenize.casual module","nltk.tokenize.destructive module","nltk.tokenize.legality_principle module","nltk.tokenize.mwe module","nltk.tokenize.nist module","nltk.tokenize.punkt module","nltk.tokenize.regexp module","nltk.tokenize.repp module","nltk.tokenize.sexpr module","nltk.tokenize.simple module","nltk.tokenize.sonority_sequencing module","nltk.tokenize.stanford module","nltk.tokenize.stanford_segmenter module","nltk.tokenize.texttiling module","nltk.tokenize.toktok module","nltk.tokenize.treebank module","nltk.tokenize.util module","nltk.toolbox module","nltk.translate package","nltk.translate.api module","nltk.translate.bleu_score module","nltk.translate.chrf_score module","nltk.translate.gale_church module","nltk.translate.gdfa module","nltk.translate.gleu_score module","nltk.translate.ibm1 module","nltk.translate.ibm2 module","nltk.translate.ibm3 module","nltk.translate.ibm4 module","nltk.translate.ibm5 module","nltk.translate.ibm_model module","nltk.translate.meteor_score module","nltk.translate.metrics module","nltk.translate.nist_score module","nltk.translate.phrase_based module","nltk.translate.ribes_score module","nltk.translate.stack_decoder module","nltk.tree module","nltk.treeprettyprinter module","nltk.treetransforms module","nltk.twitter package","nltk.twitter.api module","nltk.twitter.common module","nltk.twitter.twitter_demo module","nltk.twitter.twitterclient module","nltk.twitter.util module","nltk.util module","nltk.wsd module","Contributing to NLTK","Installing NLTK Data","Natural Language Toolkit","Installing NLTK","Release Notes","Index","NLTK Team"],titleterms:{"2005":349,"2006":349,"2007":349,"2008":349,"2009":349,"2010":349,"2011":349,"2012":349,"2013":349,"2014":349,"2015":349,"2016":349,"2017":349,"2018":349,"2019":349,"2020":349,"2021":349,"32":348,"80":179,"do":347,"function":[55,57],agreement:141,align:58,alin:142,all:230,api:[13,26,31,47,59,122,133,157,197,210,296,316,338],app:[1,2,3,4,5,6,7,8,9,10],arlstem2:199,arlstem:198,associ:143,avail:55,babelfish:151,binari:348,bit:348,bleu_scor:317,bllip:158,bnc:60,book:11,boxer:178,bracket_pars:61,brill:211,brill_train:212,can:347,casual:297,categorized_s:62,caveat:294,ccg:[12,13,14,15,16,17],cfg:113,chart:[14,159],chartparser_app:2,chasen:63,chat80:179,chat:[18,19,20,21,22,23,24,179],child:64,childes_fixt:231,chomski:152,chrf_score:318,chunk:[25,26,27,28,29,65],chunkparser_app:3,cistem:200,classifi:[30,31,32,33,34,35,36,37,38,39,40,41,42,43,44],classify_fixt:232,cli:45,cluster:[46,47,48,49,50,51],cmudict:66,collect:52,colloc:53,collocations_app:4,combin:15,command:346,common:339,comparative_s:67,compat:54,concept:179,concordance_app:5,conftest:233,confusionmatrix:144,conll:68,content:98,contribut:345,convert:33,cooper_storag:180,corenlp:160,corpora:55,corpu:[55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108],counter:134,crf:213,crubadan:69,data:[109,132,346,348],decisiontre:32,decor:110,demo:224,depend:70,dependencygraph:161,destruct:298,directori:111,discours:123,discourse_fixt:234,dispers:114,distanc:145,download:111,draw:[112,113,114,115,116,117],drt:181,drt_glue_demo:182,earleychart:162,effici:25,eliza:19,em:48,emac:25,environ:348,erroranalysi:225,europarl_raw:56,evalu:[163,183],exampl:142,featstruct:118,featur:[30,33,118,226],featurechart:164,featureset:30,file:179,framenet:71,gaac:49,gale_church:319,gdfa:320,gener:165,gensim_fixt:235,gleu_scor:321,glue:184,gluesemantics_malt_fixt:236,grammar:119,help:120,hmm:214,hole:185,hunpo:215,ibm1:322,ibm2:323,ibm3:324,ibm4:325,ibm5:326,ibm_model:327,ieer:72,iesha:20,implement:294,index:350,indian:73,individu:179,infer:[121,122,123,124,125,126,127,128],inference_fixt:237,input:33,instal:[346,348],interact:346,intern:129,ipipan:74,isri:201,issu:[25,294],item:179,joint:33,jsontag:130,kmean:50,knbc:75,known:294,lancast:202,languag:[132,134,347],lazyimport:131,legality_principl:299,lexic:179,lexicon:16,lfg:186,lightweight:118,lin:76,line:346,linearlog:187,lm:[132,133,134,135,136,137,138,139,242,243,244,245,246],logic:[17,188],mac:348,mace:124,malt:166,manual:346,map:216,maxent:33,megam:34,meteor_scor:328,metric:[140,141,142,143,144,145,146,147,148,149,329],minimalset:153,misc:[150,151,152,153,154,155],model:[132,134,135],modul:[2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,19,20,21,22,23,24,26,27,28,29,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,56,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,113,114,115,116,117,118,119,120,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,141,142,143,144,145,146,147,148,149,151,152,153,154,155,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,178,179,180,181,182,183,184,185,186,187,188,189,190,191,193,194,195,197,198,199,200,201,202,203,204,205,206,207,208,210,211,212,213,214,215,216,217,218,219,220,221,222,224,225,226,227,228,230,231,232,233,234,235,236,237,238,239,240,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,282,283,284,285,286,287,288,289,290,291,292,293,294,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,338,339,340,341,342,343,344],mte:77,mwe:300,naivebay:35,named_ent:27,natur:347,nemo_app:6,next:347,nist:301,nist_scor:330,nkjp:78,nltk:[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,351],nombank:79,nonmonoton:125,nonmonotonic_fixt:238,nonprojectivedependencypars:167,note:[294,349],nps_chat:80,opinion_lexicon:81,overview:179,packag:[0,1,12,18,25,30,46,55,57,111,112,121,132,140,150,156,177,192,196,209,223,229,241,242,281,295,315,337],paic:146,panlex_lit:82,panlex_swadesh:83,pars:[156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175],parti:348,pattern:25,pchart:168,perceptron:217,persist:179,phrase_bas:331,pl196x:84,plaintext:85,porter:203,portuguese_en_fixt:239,positivenaivebay:36,ppattach:86,prepar:132,preprocess:136,probability_fixt:240,probabl:176,projectivedependencypars:169,propbank:87,pros_con:88,prover9:126,proxi:346,punkt:302,python:348,rdparser_app:7,read:179,reader:[55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107],recursivedesc:170,regexp:[28,204,303],regexpchunkpars:25,regexpchunkrul:25,releas:349,relextract:189,repp:304,resolut:127,review:89,ribes_scor:332,rslp:205,rte:90,rte_classifi:37,rude:21,rule:227,scikitlearn:38,score:147,search:294,segment:148,sem:[177,178,179,180,181,182,183,184,185,186,187,188,189,190,191],semcor:91,senna:[39,218],sensev:92,sentiment:[192,193,194,195],sentiment_analyz:193,sentiwordnet:93,sequenti:219,server:[111,346],set:348,sexpr:305,shiftreduc:171,simpl:[306,347],sinica_treebank:94,skolem:190,smooth:137,snowbal:206,softwar:348,some:347,sonority_sequenc:307,sort:154,spearman:149,srparser_app:8,stack_decod:333,stanford:[172,220,308],stanford_segment:309,stem:[196,197,198,199,200,201,202,203,204,205,206,207,208],step:347,string_categori:95,structur:118,submodul:[0,1,12,18,25,30,46,55,57,112,121,132,140,150,156,177,192,196,209,223,229,241,242,281,295,315,337],subpackag:[0,55,229,241],suntsu:22,svm:40,switchboard:96,tabl:115,tableau:128,tadm:41,tag:[25,97,209,210,211,212,213,214,215,216,217,218,219,220,221,222],tbl:[223,224,225,226,227,228],team:351,templat:228,terminolog:33,test:[229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292],test_alin:247,test_bleu:282,test_bril:248,test_cfd_mut:249,test_cfg2chomski:250,test_chunk:251,test_classifi:252,test_colloc:253,test_concord:254,test_corenlp:255,test_corpora:256,test_corpus_view:257,test_count:243,test_data:258,test_disagr:259,test_dist:260,test_freqdist:261,test_gdfa:283,test_hmm:262,test_ibm1:284,test_ibm2:285,test_ibm3:286,test_ibm4:287,test_ibm5:288,test_ibm_model:289,test_json2csv_corpu:263,test_json_seri:264,test_meteor:290,test_metr:265,test_model:244,test_naivebay:266,test_nist:291,test_nombank:267,test_pl196x:268,test_pos_tag:269,test_preprocess:245,test_rib:270,test_rte_classifi:271,test_seekable_unicode_stream_read:272,test_senna:273,test_stack_decod:292,test_stem:274,test_tag:275,test_tgrep:276,test_token:277,test_twitter_auth:278,test_util:279,test_vocabulari:246,test_wordnet:280,text:293,textcat:42,texttil:310,tgrep:294,thing:347,third:348,timit:98,tip:25,tnt:221,token:[295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313],toktok:311,toolbox:[99,314],toolkit:347,train:[30,132],transitionpars:173,translat:[281,282,283,284,285,286,287,288,289,290,291,292,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333],tree:[116,294,334],treebank:312,treeprettyprint:335,treetransform:336,twitter:[100,337,338,339,340,341,342],twitter_demo:340,twittercli:341,udhr:101,unit:[241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292],unix:348,unresolv:25,up:348,us:132,usag:[142,294],util:[23,29,43,51,102,108,117,138,174,191,194,207,222,313,342,343],vader:195,verbnet:103,via:346,viterbi:175,vocabulari:139,web:346,weka:44,window:348,wordfind:155,wordfreq_app:9,wordlist:104,wordnet:[105,208],wordnet_app:10,wsd:344,xmldoc:106,ycoe:107,you:347,zen:24}}) \ No newline at end of file +Search.setIndex({docnames:["api/nltk","api/nltk.app","api/nltk.app.chartparser_app","api/nltk.app.chunkparser_app","api/nltk.app.collocations_app","api/nltk.app.concordance_app","api/nltk.app.nemo_app","api/nltk.app.rdparser_app","api/nltk.app.srparser_app","api/nltk.app.wordfreq_app","api/nltk.app.wordnet_app","api/nltk.book","api/nltk.ccg","api/nltk.ccg.api","api/nltk.ccg.chart","api/nltk.ccg.combinator","api/nltk.ccg.lexicon","api/nltk.ccg.logic","api/nltk.chat","api/nltk.chat.eliza","api/nltk.chat.iesha","api/nltk.chat.rude","api/nltk.chat.suntsu","api/nltk.chat.util","api/nltk.chat.zen","api/nltk.chunk","api/nltk.chunk.api","api/nltk.chunk.named_entity","api/nltk.chunk.regexp","api/nltk.chunk.util","api/nltk.classify","api/nltk.classify.api","api/nltk.classify.decisiontree","api/nltk.classify.maxent","api/nltk.classify.megam","api/nltk.classify.naivebayes","api/nltk.classify.positivenaivebayes","api/nltk.classify.rte_classify","api/nltk.classify.scikitlearn","api/nltk.classify.senna","api/nltk.classify.svm","api/nltk.classify.tadm","api/nltk.classify.textcat","api/nltk.classify.util","api/nltk.classify.weka","api/nltk.cli","api/nltk.cluster","api/nltk.cluster.api","api/nltk.cluster.em","api/nltk.cluster.gaac","api/nltk.cluster.kmeans","api/nltk.cluster.util","api/nltk.collections","api/nltk.collocations","api/nltk.compat","api/nltk.corpus","api/nltk.corpus.europarl_raw","api/nltk.corpus.reader","api/nltk.corpus.reader.aligned","api/nltk.corpus.reader.api","api/nltk.corpus.reader.bnc","api/nltk.corpus.reader.bracket_parse","api/nltk.corpus.reader.categorized_sents","api/nltk.corpus.reader.chasen","api/nltk.corpus.reader.childes","api/nltk.corpus.reader.chunked","api/nltk.corpus.reader.cmudict","api/nltk.corpus.reader.comparative_sents","api/nltk.corpus.reader.conll","api/nltk.corpus.reader.crubadan","api/nltk.corpus.reader.dependency","api/nltk.corpus.reader.framenet","api/nltk.corpus.reader.ieer","api/nltk.corpus.reader.indian","api/nltk.corpus.reader.ipipan","api/nltk.corpus.reader.knbc","api/nltk.corpus.reader.lin","api/nltk.corpus.reader.mte","api/nltk.corpus.reader.nkjp","api/nltk.corpus.reader.nombank","api/nltk.corpus.reader.nps_chat","api/nltk.corpus.reader.opinion_lexicon","api/nltk.corpus.reader.panlex_lite","api/nltk.corpus.reader.panlex_swadesh","api/nltk.corpus.reader.pl196x","api/nltk.corpus.reader.plaintext","api/nltk.corpus.reader.ppattach","api/nltk.corpus.reader.propbank","api/nltk.corpus.reader.pros_cons","api/nltk.corpus.reader.reviews","api/nltk.corpus.reader.rte","api/nltk.corpus.reader.semcor","api/nltk.corpus.reader.senseval","api/nltk.corpus.reader.sentiwordnet","api/nltk.corpus.reader.sinica_treebank","api/nltk.corpus.reader.string_category","api/nltk.corpus.reader.switchboard","api/nltk.corpus.reader.tagged","api/nltk.corpus.reader.timit","api/nltk.corpus.reader.toolbox","api/nltk.corpus.reader.twitter","api/nltk.corpus.reader.udhr","api/nltk.corpus.reader.util","api/nltk.corpus.reader.verbnet","api/nltk.corpus.reader.wordlist","api/nltk.corpus.reader.wordnet","api/nltk.corpus.reader.xmldocs","api/nltk.corpus.reader.ycoe","api/nltk.corpus.util","api/nltk.data","api/nltk.decorators","api/nltk.downloader","api/nltk.draw","api/nltk.draw.cfg","api/nltk.draw.dispersion","api/nltk.draw.table","api/nltk.draw.tree","api/nltk.draw.util","api/nltk.featstruct","api/nltk.grammar","api/nltk.help","api/nltk.inference","api/nltk.inference.api","api/nltk.inference.discourse","api/nltk.inference.mace","api/nltk.inference.nonmonotonic","api/nltk.inference.prover9","api/nltk.inference.resolution","api/nltk.inference.tableau","api/nltk.internals","api/nltk.jsontags","api/nltk.lazyimport","api/nltk.lm","api/nltk.lm.api","api/nltk.lm.counter","api/nltk.lm.models","api/nltk.lm.preprocessing","api/nltk.lm.smoothing","api/nltk.lm.util","api/nltk.lm.vocabulary","api/nltk.metrics","api/nltk.metrics.agreement","api/nltk.metrics.aline","api/nltk.metrics.association","api/nltk.metrics.confusionmatrix","api/nltk.metrics.distance","api/nltk.metrics.paice","api/nltk.metrics.scores","api/nltk.metrics.segmentation","api/nltk.metrics.spearman","api/nltk.misc","api/nltk.misc.babelfish","api/nltk.misc.chomsky","api/nltk.misc.minimalset","api/nltk.misc.sort","api/nltk.misc.wordfinder","api/nltk.parse","api/nltk.parse.api","api/nltk.parse.bllip","api/nltk.parse.chart","api/nltk.parse.corenlp","api/nltk.parse.dependencygraph","api/nltk.parse.earleychart","api/nltk.parse.evaluate","api/nltk.parse.featurechart","api/nltk.parse.generate","api/nltk.parse.malt","api/nltk.parse.nonprojectivedependencyparser","api/nltk.parse.pchart","api/nltk.parse.projectivedependencyparser","api/nltk.parse.recursivedescent","api/nltk.parse.shiftreduce","api/nltk.parse.stanford","api/nltk.parse.transitionparser","api/nltk.parse.util","api/nltk.parse.viterbi","api/nltk.probability","api/nltk.sem","api/nltk.sem.boxer","api/nltk.sem.chat80","api/nltk.sem.cooper_storage","api/nltk.sem.drt","api/nltk.sem.drt_glue_demo","api/nltk.sem.evaluate","api/nltk.sem.glue","api/nltk.sem.hole","api/nltk.sem.lfg","api/nltk.sem.linearlogic","api/nltk.sem.logic","api/nltk.sem.relextract","api/nltk.sem.skolemize","api/nltk.sem.util","api/nltk.sentiment","api/nltk.sentiment.sentiment_analyzer","api/nltk.sentiment.util","api/nltk.sentiment.vader","api/nltk.stem","api/nltk.stem.api","api/nltk.stem.arlstem","api/nltk.stem.arlstem2","api/nltk.stem.cistem","api/nltk.stem.isri","api/nltk.stem.lancaster","api/nltk.stem.porter","api/nltk.stem.regexp","api/nltk.stem.rslp","api/nltk.stem.snowball","api/nltk.stem.util","api/nltk.stem.wordnet","api/nltk.tag","api/nltk.tag.api","api/nltk.tag.brill","api/nltk.tag.brill_trainer","api/nltk.tag.crf","api/nltk.tag.hmm","api/nltk.tag.hunpos","api/nltk.tag.mapping","api/nltk.tag.perceptron","api/nltk.tag.senna","api/nltk.tag.sequential","api/nltk.tag.stanford","api/nltk.tag.tnt","api/nltk.tag.util","api/nltk.tbl","api/nltk.tbl.demo","api/nltk.tbl.erroranalysis","api/nltk.tbl.feature","api/nltk.tbl.rule","api/nltk.tbl.template","api/nltk.test","api/nltk.test.all","api/nltk.test.childes_fixt","api/nltk.test.classify_fixt","api/nltk.test.conftest","api/nltk.test.discourse_fixt","api/nltk.test.gensim_fixt","api/nltk.test.gluesemantics_malt_fixt","api/nltk.test.inference_fixt","api/nltk.test.nonmonotonic_fixt","api/nltk.test.portuguese_en_fixt","api/nltk.test.probability_fixt","api/nltk.test.unit","api/nltk.test.unit.lm","api/nltk.test.unit.lm.test_counter","api/nltk.test.unit.lm.test_models","api/nltk.test.unit.lm.test_preprocessing","api/nltk.test.unit.lm.test_vocabulary","api/nltk.test.unit.test_aline","api/nltk.test.unit.test_brill","api/nltk.test.unit.test_cfd_mutation","api/nltk.test.unit.test_cfg2chomsky","api/nltk.test.unit.test_chunk","api/nltk.test.unit.test_classify","api/nltk.test.unit.test_collocations","api/nltk.test.unit.test_concordance","api/nltk.test.unit.test_corenlp","api/nltk.test.unit.test_corpora","api/nltk.test.unit.test_corpus_views","api/nltk.test.unit.test_data","api/nltk.test.unit.test_disagreement","api/nltk.test.unit.test_distance","api/nltk.test.unit.test_freqdist","api/nltk.test.unit.test_hmm","api/nltk.test.unit.test_json2csv_corpus","api/nltk.test.unit.test_json_serialization","api/nltk.test.unit.test_metrics","api/nltk.test.unit.test_naivebayes","api/nltk.test.unit.test_nombank","api/nltk.test.unit.test_pl196x","api/nltk.test.unit.test_pos_tag","api/nltk.test.unit.test_ribes","api/nltk.test.unit.test_rte_classify","api/nltk.test.unit.test_seekable_unicode_stream_reader","api/nltk.test.unit.test_senna","api/nltk.test.unit.test_stem","api/nltk.test.unit.test_tag","api/nltk.test.unit.test_tgrep","api/nltk.test.unit.test_tokenize","api/nltk.test.unit.test_twitter_auth","api/nltk.test.unit.test_util","api/nltk.test.unit.test_wordnet","api/nltk.test.unit.translate","api/nltk.test.unit.translate.test_bleu","api/nltk.test.unit.translate.test_gdfa","api/nltk.test.unit.translate.test_ibm1","api/nltk.test.unit.translate.test_ibm2","api/nltk.test.unit.translate.test_ibm3","api/nltk.test.unit.translate.test_ibm4","api/nltk.test.unit.translate.test_ibm5","api/nltk.test.unit.translate.test_ibm_model","api/nltk.test.unit.translate.test_meteor","api/nltk.test.unit.translate.test_nist","api/nltk.test.unit.translate.test_stack_decoder","api/nltk.text","api/nltk.tgrep","api/nltk.tokenize","api/nltk.tokenize.api","api/nltk.tokenize.casual","api/nltk.tokenize.destructive","api/nltk.tokenize.legality_principle","api/nltk.tokenize.mwe","api/nltk.tokenize.nist","api/nltk.tokenize.punkt","api/nltk.tokenize.regexp","api/nltk.tokenize.repp","api/nltk.tokenize.sexpr","api/nltk.tokenize.simple","api/nltk.tokenize.sonority_sequencing","api/nltk.tokenize.stanford","api/nltk.tokenize.stanford_segmenter","api/nltk.tokenize.texttiling","api/nltk.tokenize.toktok","api/nltk.tokenize.treebank","api/nltk.tokenize.util","api/nltk.toolbox","api/nltk.translate","api/nltk.translate.api","api/nltk.translate.bleu_score","api/nltk.translate.chrf_score","api/nltk.translate.gale_church","api/nltk.translate.gdfa","api/nltk.translate.gleu_score","api/nltk.translate.ibm1","api/nltk.translate.ibm2","api/nltk.translate.ibm3","api/nltk.translate.ibm4","api/nltk.translate.ibm5","api/nltk.translate.ibm_model","api/nltk.translate.meteor_score","api/nltk.translate.metrics","api/nltk.translate.nist_score","api/nltk.translate.phrase_based","api/nltk.translate.ribes_score","api/nltk.translate.stack_decoder","api/nltk.tree","api/nltk.treeprettyprinter","api/nltk.treetransforms","api/nltk.twitter","api/nltk.twitter.api","api/nltk.twitter.common","api/nltk.twitter.twitter_demo","api/nltk.twitter.twitterclient","api/nltk.twitter.util","api/nltk.util","api/nltk.wsd","contribute","data","howto","howto/bleu","howto/bnc","howto/ccg","howto/ccg_semantics","howto/chat80","howto/childes","howto/chunk","howto/classify","howto/collections","howto/collocations","howto/concordance","howto/corpus","howto/crubadan","howto/data","howto/dependency","howto/discourse","howto/drt","howto/featgram","howto/featstruct","howto/framenet","howto/generate","howto/gensim","howto/gluesemantics","howto/gluesemantics_malt","howto/grammar","howto/grammartestsuites","howto/inference","howto/internals","howto/japanese","howto/lm","howto/logic","howto/meteor","howto/metrics","howto/misc","howto/nonmonotonic","howto/paice","howto/parse","howto/portuguese_en","howto/probability","howto/propbank","howto/relextract","howto/resolution","howto/semantics","howto/sentiment","howto/sentiwordnet","howto/simple","howto/stem","howto/tag","howto/tokenize","howto/toolbox","howto/translate","howto/tree","howto/treeprettyprinter","howto/treetransforms","howto/util","howto/wordnet","howto/wordnet_lch","howto/wsd","index","install","news","py-modindex","team"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.viewcode":1,sphinx:56},filenames:["api/nltk.rst","api/nltk.app.rst","api/nltk.app.chartparser_app.rst","api/nltk.app.chunkparser_app.rst","api/nltk.app.collocations_app.rst","api/nltk.app.concordance_app.rst","api/nltk.app.nemo_app.rst","api/nltk.app.rdparser_app.rst","api/nltk.app.srparser_app.rst","api/nltk.app.wordfreq_app.rst","api/nltk.app.wordnet_app.rst","api/nltk.book.rst","api/nltk.ccg.rst","api/nltk.ccg.api.rst","api/nltk.ccg.chart.rst","api/nltk.ccg.combinator.rst","api/nltk.ccg.lexicon.rst","api/nltk.ccg.logic.rst","api/nltk.chat.rst","api/nltk.chat.eliza.rst","api/nltk.chat.iesha.rst","api/nltk.chat.rude.rst","api/nltk.chat.suntsu.rst","api/nltk.chat.util.rst","api/nltk.chat.zen.rst","api/nltk.chunk.rst","api/nltk.chunk.api.rst","api/nltk.chunk.named_entity.rst","api/nltk.chunk.regexp.rst","api/nltk.chunk.util.rst","api/nltk.classify.rst","api/nltk.classify.api.rst","api/nltk.classify.decisiontree.rst","api/nltk.classify.maxent.rst","api/nltk.classify.megam.rst","api/nltk.classify.naivebayes.rst","api/nltk.classify.positivenaivebayes.rst","api/nltk.classify.rte_classify.rst","api/nltk.classify.scikitlearn.rst","api/nltk.classify.senna.rst","api/nltk.classify.svm.rst","api/nltk.classify.tadm.rst","api/nltk.classify.textcat.rst","api/nltk.classify.util.rst","api/nltk.classify.weka.rst","api/nltk.cli.rst","api/nltk.cluster.rst","api/nltk.cluster.api.rst","api/nltk.cluster.em.rst","api/nltk.cluster.gaac.rst","api/nltk.cluster.kmeans.rst","api/nltk.cluster.util.rst","api/nltk.collections.rst","api/nltk.collocations.rst","api/nltk.compat.rst","api/nltk.corpus.rst","api/nltk.corpus.europarl_raw.rst","api/nltk.corpus.reader.rst","api/nltk.corpus.reader.aligned.rst","api/nltk.corpus.reader.api.rst","api/nltk.corpus.reader.bnc.rst","api/nltk.corpus.reader.bracket_parse.rst","api/nltk.corpus.reader.categorized_sents.rst","api/nltk.corpus.reader.chasen.rst","api/nltk.corpus.reader.childes.rst","api/nltk.corpus.reader.chunked.rst","api/nltk.corpus.reader.cmudict.rst","api/nltk.corpus.reader.comparative_sents.rst","api/nltk.corpus.reader.conll.rst","api/nltk.corpus.reader.crubadan.rst","api/nltk.corpus.reader.dependency.rst","api/nltk.corpus.reader.framenet.rst","api/nltk.corpus.reader.ieer.rst","api/nltk.corpus.reader.indian.rst","api/nltk.corpus.reader.ipipan.rst","api/nltk.corpus.reader.knbc.rst","api/nltk.corpus.reader.lin.rst","api/nltk.corpus.reader.mte.rst","api/nltk.corpus.reader.nkjp.rst","api/nltk.corpus.reader.nombank.rst","api/nltk.corpus.reader.nps_chat.rst","api/nltk.corpus.reader.opinion_lexicon.rst","api/nltk.corpus.reader.panlex_lite.rst","api/nltk.corpus.reader.panlex_swadesh.rst","api/nltk.corpus.reader.pl196x.rst","api/nltk.corpus.reader.plaintext.rst","api/nltk.corpus.reader.ppattach.rst","api/nltk.corpus.reader.propbank.rst","api/nltk.corpus.reader.pros_cons.rst","api/nltk.corpus.reader.reviews.rst","api/nltk.corpus.reader.rte.rst","api/nltk.corpus.reader.semcor.rst","api/nltk.corpus.reader.senseval.rst","api/nltk.corpus.reader.sentiwordnet.rst","api/nltk.corpus.reader.sinica_treebank.rst","api/nltk.corpus.reader.string_category.rst","api/nltk.corpus.reader.switchboard.rst","api/nltk.corpus.reader.tagged.rst","api/nltk.corpus.reader.timit.rst","api/nltk.corpus.reader.toolbox.rst","api/nltk.corpus.reader.twitter.rst","api/nltk.corpus.reader.udhr.rst","api/nltk.corpus.reader.util.rst","api/nltk.corpus.reader.verbnet.rst","api/nltk.corpus.reader.wordlist.rst","api/nltk.corpus.reader.wordnet.rst","api/nltk.corpus.reader.xmldocs.rst","api/nltk.corpus.reader.ycoe.rst","api/nltk.corpus.util.rst","api/nltk.data.rst","api/nltk.decorators.rst","api/nltk.downloader.rst","api/nltk.draw.rst","api/nltk.draw.cfg.rst","api/nltk.draw.dispersion.rst","api/nltk.draw.table.rst","api/nltk.draw.tree.rst","api/nltk.draw.util.rst","api/nltk.featstruct.rst","api/nltk.grammar.rst","api/nltk.help.rst","api/nltk.inference.rst","api/nltk.inference.api.rst","api/nltk.inference.discourse.rst","api/nltk.inference.mace.rst","api/nltk.inference.nonmonotonic.rst","api/nltk.inference.prover9.rst","api/nltk.inference.resolution.rst","api/nltk.inference.tableau.rst","api/nltk.internals.rst","api/nltk.jsontags.rst","api/nltk.lazyimport.rst","api/nltk.lm.rst","api/nltk.lm.api.rst","api/nltk.lm.counter.rst","api/nltk.lm.models.rst","api/nltk.lm.preprocessing.rst","api/nltk.lm.smoothing.rst","api/nltk.lm.util.rst","api/nltk.lm.vocabulary.rst","api/nltk.metrics.rst","api/nltk.metrics.agreement.rst","api/nltk.metrics.aline.rst","api/nltk.metrics.association.rst","api/nltk.metrics.confusionmatrix.rst","api/nltk.metrics.distance.rst","api/nltk.metrics.paice.rst","api/nltk.metrics.scores.rst","api/nltk.metrics.segmentation.rst","api/nltk.metrics.spearman.rst","api/nltk.misc.rst","api/nltk.misc.babelfish.rst","api/nltk.misc.chomsky.rst","api/nltk.misc.minimalset.rst","api/nltk.misc.sort.rst","api/nltk.misc.wordfinder.rst","api/nltk.parse.rst","api/nltk.parse.api.rst","api/nltk.parse.bllip.rst","api/nltk.parse.chart.rst","api/nltk.parse.corenlp.rst","api/nltk.parse.dependencygraph.rst","api/nltk.parse.earleychart.rst","api/nltk.parse.evaluate.rst","api/nltk.parse.featurechart.rst","api/nltk.parse.generate.rst","api/nltk.parse.malt.rst","api/nltk.parse.nonprojectivedependencyparser.rst","api/nltk.parse.pchart.rst","api/nltk.parse.projectivedependencyparser.rst","api/nltk.parse.recursivedescent.rst","api/nltk.parse.shiftreduce.rst","api/nltk.parse.stanford.rst","api/nltk.parse.transitionparser.rst","api/nltk.parse.util.rst","api/nltk.parse.viterbi.rst","api/nltk.probability.rst","api/nltk.sem.rst","api/nltk.sem.boxer.rst","api/nltk.sem.chat80.rst","api/nltk.sem.cooper_storage.rst","api/nltk.sem.drt.rst","api/nltk.sem.drt_glue_demo.rst","api/nltk.sem.evaluate.rst","api/nltk.sem.glue.rst","api/nltk.sem.hole.rst","api/nltk.sem.lfg.rst","api/nltk.sem.linearlogic.rst","api/nltk.sem.logic.rst","api/nltk.sem.relextract.rst","api/nltk.sem.skolemize.rst","api/nltk.sem.util.rst","api/nltk.sentiment.rst","api/nltk.sentiment.sentiment_analyzer.rst","api/nltk.sentiment.util.rst","api/nltk.sentiment.vader.rst","api/nltk.stem.rst","api/nltk.stem.api.rst","api/nltk.stem.arlstem.rst","api/nltk.stem.arlstem2.rst","api/nltk.stem.cistem.rst","api/nltk.stem.isri.rst","api/nltk.stem.lancaster.rst","api/nltk.stem.porter.rst","api/nltk.stem.regexp.rst","api/nltk.stem.rslp.rst","api/nltk.stem.snowball.rst","api/nltk.stem.util.rst","api/nltk.stem.wordnet.rst","api/nltk.tag.rst","api/nltk.tag.api.rst","api/nltk.tag.brill.rst","api/nltk.tag.brill_trainer.rst","api/nltk.tag.crf.rst","api/nltk.tag.hmm.rst","api/nltk.tag.hunpos.rst","api/nltk.tag.mapping.rst","api/nltk.tag.perceptron.rst","api/nltk.tag.senna.rst","api/nltk.tag.sequential.rst","api/nltk.tag.stanford.rst","api/nltk.tag.tnt.rst","api/nltk.tag.util.rst","api/nltk.tbl.rst","api/nltk.tbl.demo.rst","api/nltk.tbl.erroranalysis.rst","api/nltk.tbl.feature.rst","api/nltk.tbl.rule.rst","api/nltk.tbl.template.rst","api/nltk.test.rst","api/nltk.test.all.rst","api/nltk.test.childes_fixt.rst","api/nltk.test.classify_fixt.rst","api/nltk.test.conftest.rst","api/nltk.test.discourse_fixt.rst","api/nltk.test.gensim_fixt.rst","api/nltk.test.gluesemantics_malt_fixt.rst","api/nltk.test.inference_fixt.rst","api/nltk.test.nonmonotonic_fixt.rst","api/nltk.test.portuguese_en_fixt.rst","api/nltk.test.probability_fixt.rst","api/nltk.test.unit.rst","api/nltk.test.unit.lm.rst","api/nltk.test.unit.lm.test_counter.rst","api/nltk.test.unit.lm.test_models.rst","api/nltk.test.unit.lm.test_preprocessing.rst","api/nltk.test.unit.lm.test_vocabulary.rst","api/nltk.test.unit.test_aline.rst","api/nltk.test.unit.test_brill.rst","api/nltk.test.unit.test_cfd_mutation.rst","api/nltk.test.unit.test_cfg2chomsky.rst","api/nltk.test.unit.test_chunk.rst","api/nltk.test.unit.test_classify.rst","api/nltk.test.unit.test_collocations.rst","api/nltk.test.unit.test_concordance.rst","api/nltk.test.unit.test_corenlp.rst","api/nltk.test.unit.test_corpora.rst","api/nltk.test.unit.test_corpus_views.rst","api/nltk.test.unit.test_data.rst","api/nltk.test.unit.test_disagreement.rst","api/nltk.test.unit.test_distance.rst","api/nltk.test.unit.test_freqdist.rst","api/nltk.test.unit.test_hmm.rst","api/nltk.test.unit.test_json2csv_corpus.rst","api/nltk.test.unit.test_json_serialization.rst","api/nltk.test.unit.test_metrics.rst","api/nltk.test.unit.test_naivebayes.rst","api/nltk.test.unit.test_nombank.rst","api/nltk.test.unit.test_pl196x.rst","api/nltk.test.unit.test_pos_tag.rst","api/nltk.test.unit.test_ribes.rst","api/nltk.test.unit.test_rte_classify.rst","api/nltk.test.unit.test_seekable_unicode_stream_reader.rst","api/nltk.test.unit.test_senna.rst","api/nltk.test.unit.test_stem.rst","api/nltk.test.unit.test_tag.rst","api/nltk.test.unit.test_tgrep.rst","api/nltk.test.unit.test_tokenize.rst","api/nltk.test.unit.test_twitter_auth.rst","api/nltk.test.unit.test_util.rst","api/nltk.test.unit.test_wordnet.rst","api/nltk.test.unit.translate.rst","api/nltk.test.unit.translate.test_bleu.rst","api/nltk.test.unit.translate.test_gdfa.rst","api/nltk.test.unit.translate.test_ibm1.rst","api/nltk.test.unit.translate.test_ibm2.rst","api/nltk.test.unit.translate.test_ibm3.rst","api/nltk.test.unit.translate.test_ibm4.rst","api/nltk.test.unit.translate.test_ibm5.rst","api/nltk.test.unit.translate.test_ibm_model.rst","api/nltk.test.unit.translate.test_meteor.rst","api/nltk.test.unit.translate.test_nist.rst","api/nltk.test.unit.translate.test_stack_decoder.rst","api/nltk.text.rst","api/nltk.tgrep.rst","api/nltk.tokenize.rst","api/nltk.tokenize.api.rst","api/nltk.tokenize.casual.rst","api/nltk.tokenize.destructive.rst","api/nltk.tokenize.legality_principle.rst","api/nltk.tokenize.mwe.rst","api/nltk.tokenize.nist.rst","api/nltk.tokenize.punkt.rst","api/nltk.tokenize.regexp.rst","api/nltk.tokenize.repp.rst","api/nltk.tokenize.sexpr.rst","api/nltk.tokenize.simple.rst","api/nltk.tokenize.sonority_sequencing.rst","api/nltk.tokenize.stanford.rst","api/nltk.tokenize.stanford_segmenter.rst","api/nltk.tokenize.texttiling.rst","api/nltk.tokenize.toktok.rst","api/nltk.tokenize.treebank.rst","api/nltk.tokenize.util.rst","api/nltk.toolbox.rst","api/nltk.translate.rst","api/nltk.translate.api.rst","api/nltk.translate.bleu_score.rst","api/nltk.translate.chrf_score.rst","api/nltk.translate.gale_church.rst","api/nltk.translate.gdfa.rst","api/nltk.translate.gleu_score.rst","api/nltk.translate.ibm1.rst","api/nltk.translate.ibm2.rst","api/nltk.translate.ibm3.rst","api/nltk.translate.ibm4.rst","api/nltk.translate.ibm5.rst","api/nltk.translate.ibm_model.rst","api/nltk.translate.meteor_score.rst","api/nltk.translate.metrics.rst","api/nltk.translate.nist_score.rst","api/nltk.translate.phrase_based.rst","api/nltk.translate.ribes_score.rst","api/nltk.translate.stack_decoder.rst","api/nltk.tree.rst","api/nltk.treeprettyprinter.rst","api/nltk.treetransforms.rst","api/nltk.twitter.rst","api/nltk.twitter.api.rst","api/nltk.twitter.common.rst","api/nltk.twitter.twitter_demo.rst","api/nltk.twitter.twitterclient.rst","api/nltk.twitter.util.rst","api/nltk.util.rst","api/nltk.wsd.rst","contribute.rst","data.rst","howto.rst","howto/bleu.rst","howto/bnc.rst","howto/ccg.rst","howto/ccg_semantics.rst","howto/chat80.rst","howto/childes.rst","howto/chunk.rst","howto/classify.rst","howto/collections.rst","howto/collocations.rst","howto/concordance.rst","howto/corpus.rst","howto/crubadan.rst","howto/data.rst","howto/dependency.rst","howto/discourse.rst","howto/drt.rst","howto/featgram.rst","howto/featstruct.rst","howto/framenet.rst","howto/generate.rst","howto/gensim.rst","howto/gluesemantics.rst","howto/gluesemantics_malt.rst","howto/grammar.rst","howto/grammartestsuites.rst","howto/inference.rst","howto/internals.rst","howto/japanese.rst","howto/lm.rst","howto/logic.rst","howto/meteor.rst","howto/metrics.rst","howto/misc.rst","howto/nonmonotonic.rst","howto/paice.rst","howto/parse.rst","howto/portuguese_en.rst","howto/probability.rst","howto/propbank.rst","howto/relextract.rst","howto/resolution.rst","howto/semantics.rst","howto/sentiment.rst","howto/sentiwordnet.rst","howto/simple.rst","howto/stem.rst","howto/tag.rst","howto/tokenize.rst","howto/toolbox.rst","howto/translate.rst","howto/tree.rst","howto/treeprettyprinter.rst","howto/treetransforms.rst","howto/util.rst","howto/wordnet.rst","howto/wordnet_lch.rst","howto/wsd.rst","index.rst","install.rst","news.rst","py-modindex.rst","team.rst"],objects:{"":{nltk:[0,0,0,"-"]},"nltk.app":{chartparser_app:[2,0,0,"-"],chunkparser_app:[3,0,0,"-"],collocations_app:[4,0,0,"-"],concordance_app:[5,0,0,"-"],nemo_app:[6,0,0,"-"],rdparser_app:[7,0,0,"-"],srparser_app:[8,0,0,"-"],wordnet_app:[10,0,0,"-"]},"nltk.app.chartparser_app":{app:[2,1,1,""]},"nltk.app.chunkparser_app":{app:[3,1,1,""]},"nltk.app.collocations_app":{app:[4,1,1,""]},"nltk.app.concordance_app":{app:[5,1,1,""]},"nltk.app.nemo_app":{app:[6,1,1,""]},"nltk.app.rdparser_app":{app:[7,1,1,""]},"nltk.app.srparser_app":{app:[8,1,1,""]},"nltk.app.wordnet_app":{app:[10,1,1,""]},"nltk.book":{sents:[11,1,1,""],texts:[11,1,1,""]},"nltk.ccg":{api:[13,0,0,"-"],chart:[14,0,0,"-"],combinator:[15,0,0,"-"],lexicon:[16,0,0,"-"],logic:[17,0,0,"-"]},"nltk.ccg.api":{AbstractCCGCategory:[13,2,1,""],CCGVar:[13,2,1,""],Direction:[13,2,1,""],FunctionalCategory:[13,2,1,""],PrimitiveCategory:[13,2,1,""]},"nltk.ccg.api.AbstractCCGCategory":{can_unify:[13,3,1,""],is_function:[13,3,1,""],is_primitive:[13,3,1,""],is_var:[13,3,1,""],substitute:[13,3,1,""]},"nltk.ccg.api.CCGVar":{__init__:[13,3,1,""],can_unify:[13,3,1,""],id:[13,3,1,""],is_function:[13,3,1,""],is_primitive:[13,3,1,""],is_var:[13,3,1,""],new_id:[13,3,1,""],reset_id:[13,3,1,""],substitute:[13,3,1,""]},"nltk.ccg.api.Direction":{__init__:[13,3,1,""],can_compose:[13,3,1,""],can_cross:[13,3,1,""],can_unify:[13,3,1,""],dir:[13,3,1,""],is_backward:[13,3,1,""],is_forward:[13,3,1,""],is_variable:[13,3,1,""],restrs:[13,3,1,""],substitute:[13,3,1,""]},"nltk.ccg.api.FunctionalCategory":{__init__:[13,3,1,""],arg:[13,3,1,""],can_unify:[13,3,1,""],dir:[13,3,1,""],is_function:[13,3,1,""],is_primitive:[13,3,1,""],is_var:[13,3,1,""],res:[13,3,1,""],substitute:[13,3,1,""]},"nltk.ccg.api.PrimitiveCategory":{__init__:[13,3,1,""],can_unify:[13,3,1,""],categ:[13,3,1,""],is_function:[13,3,1,""],is_primitive:[13,3,1,""],is_var:[13,3,1,""],restrs:[13,3,1,""],substitute:[13,3,1,""]},"nltk.ccg.chart":{BackwardTypeRaiseRule:[14,2,1,""],BinaryCombinatorRule:[14,2,1,""],CCGChart:[14,2,1,""],CCGChartParser:[14,2,1,""],CCGEdge:[14,2,1,""],CCGLeafEdge:[14,2,1,""],ForwardTypeRaiseRule:[14,2,1,""],compute_semantics:[14,1,1,""],demo:[14,1,1,""],printCCGDerivation:[14,1,1,""],printCCGTree:[14,1,1,""]},"nltk.ccg.chart.BackwardTypeRaiseRule":{NUMEDGES:[14,4,1,""],__init__:[14,3,1,""],apply:[14,3,1,""]},"nltk.ccg.chart.BinaryCombinatorRule":{NUMEDGES:[14,4,1,""],__init__:[14,3,1,""],apply:[14,3,1,""]},"nltk.ccg.chart.CCGChart":{__init__:[14,3,1,""]},"nltk.ccg.chart.CCGChartParser":{__init__:[14,3,1,""],lexicon:[14,3,1,""],parse:[14,3,1,""]},"nltk.ccg.chart.CCGEdge":{__init__:[14,3,1,""],categ:[14,3,1,""],dot:[14,3,1,""],end:[14,3,1,""],is_complete:[14,3,1,""],is_incomplete:[14,3,1,""],length:[14,3,1,""],lhs:[14,3,1,""],nextsym:[14,3,1,""],rhs:[14,3,1,""],rule:[14,3,1,""],span:[14,3,1,""],start:[14,3,1,""]},"nltk.ccg.chart.CCGLeafEdge":{__init__:[14,3,1,""],categ:[14,3,1,""],dot:[14,3,1,""],end:[14,3,1,""],is_complete:[14,3,1,""],is_incomplete:[14,3,1,""],leaf:[14,3,1,""],length:[14,3,1,""],lhs:[14,3,1,""],nextsym:[14,3,1,""],rhs:[14,3,1,""],span:[14,3,1,""],start:[14,3,1,""],token:[14,3,1,""]},"nltk.ccg.chart.ForwardTypeRaiseRule":{NUMEDGES:[14,4,1,""],__init__:[14,3,1,""],apply:[14,3,1,""]},"nltk.ccg.combinator":{BackwardCombinator:[15,2,1,""],DirectedBinaryCombinator:[15,2,1,""],ForwardCombinator:[15,2,1,""],UndirectedBinaryCombinator:[15,2,1,""],UndirectedComposition:[15,2,1,""],UndirectedFunctionApplication:[15,2,1,""],UndirectedSubstitution:[15,2,1,""],UndirectedTypeRaise:[15,2,1,""],backwardBxConstraint:[15,1,1,""],backwardOnly:[15,1,1,""],backwardSxConstraint:[15,1,1,""],backwardTConstraint:[15,1,1,""],bothBackward:[15,1,1,""],bothForward:[15,1,1,""],crossedDirs:[15,1,1,""],forwardOnly:[15,1,1,""],forwardSConstraint:[15,1,1,""],forwardTConstraint:[15,1,1,""],innermostFunction:[15,1,1,""]},"nltk.ccg.combinator.BackwardCombinator":{__init__:[15,3,1,""],can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.combinator.DirectedBinaryCombinator":{can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.combinator.ForwardCombinator":{__init__:[15,3,1,""],can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.combinator.UndirectedBinaryCombinator":{can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.combinator.UndirectedComposition":{can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.combinator.UndirectedFunctionApplication":{can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.combinator.UndirectedSubstitution":{can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.combinator.UndirectedTypeRaise":{can_combine:[15,3,1,""],combine:[15,3,1,""]},"nltk.ccg.lexicon":{CCGLexicon:[16,2,1,""],Token:[16,2,1,""],augParseCategory:[16,1,1,""],fromstring:[16,1,1,""],matchBrackets:[16,1,1,""],nextCategory:[16,1,1,""],parseApplication:[16,1,1,""],parsePrimitiveCategory:[16,1,1,""],parseSubscripts:[16,1,1,""]},"nltk.ccg.lexicon.CCGLexicon":{__init__:[16,3,1,""],categories:[16,3,1,""],start:[16,3,1,""]},"nltk.ccg.lexicon.Token":{__init__:[16,3,1,""],categ:[16,3,1,""],semantics:[16,3,1,""]},"nltk.ccg.logic":{compute_composition_semantics:[17,1,1,""],compute_function_semantics:[17,1,1,""],compute_substitution_semantics:[17,1,1,""],compute_type_raised_semantics:[17,1,1,""]},"nltk.chat":{chatbots:[18,1,1,""],eliza:[19,0,0,"-"],iesha:[20,0,0,"-"],rude:[21,0,0,"-"],suntsu:[22,0,0,"-"],util:[23,0,0,"-"],zen:[24,0,0,"-"]},"nltk.chat.eliza":{demo:[19,1,1,""],eliza_chat:[19,1,1,""]},"nltk.chat.iesha":{demo:[20,1,1,""],iesha_chat:[20,1,1,""]},"nltk.chat.rude":{demo:[21,1,1,""],rude_chat:[21,1,1,""]},"nltk.chat.suntsu":{demo:[22,1,1,""],suntsu_chat:[22,1,1,""]},"nltk.chat.util":{Chat:[23,2,1,""]},"nltk.chat.util.Chat":{__init__:[23,3,1,""],converse:[23,3,1,""],respond:[23,3,1,""]},"nltk.chat.zen":{demo:[24,1,1,""],zen_chat:[24,1,1,""]},"nltk.chunk":{api:[26,0,0,"-"],named_entity:[27,0,0,"-"],ne_chunk:[25,1,1,""],ne_chunk_sents:[25,1,1,""],regexp:[28,0,0,"-"],util:[29,0,0,"-"]},"nltk.chunk.api":{ChunkParserI:[26,2,1,""]},"nltk.chunk.api.ChunkParserI":{evaluate:[26,3,1,""],parse:[26,3,1,""]},"nltk.chunk.named_entity":{NEChunkParser:[27,2,1,""],NEChunkParserTagger:[27,2,1,""],build_model:[27,1,1,""],cmp_chunks:[27,1,1,""],load_ace_data:[27,1,1,""],load_ace_file:[27,1,1,""],postag_tree:[27,1,1,""],shape:[27,1,1,""],simplify_pos:[27,1,1,""]},"nltk.chunk.named_entity.NEChunkParser":{__init__:[27,3,1,""],parse:[27,3,1,""]},"nltk.chunk.named_entity.NEChunkParserTagger":{__init__:[27,3,1,""]},"nltk.chunk.regexp":{ChunkRule:[28,2,1,""],ChunkRuleWithContext:[28,2,1,""],ChunkString:[28,2,1,""],ExpandLeftRule:[28,2,1,""],ExpandRightRule:[28,2,1,""],MergeRule:[28,2,1,""],RegexpChunkParser:[28,2,1,""],RegexpChunkRule:[28,2,1,""],RegexpParser:[28,2,1,""],SplitRule:[28,2,1,""],StripRule:[28,2,1,""],UnChunkRule:[28,2,1,""],demo:[28,1,1,""],demo_eval:[28,1,1,""],tag_pattern2re_pattern:[28,1,1,""]},"nltk.chunk.regexp.ChunkRule":{__init__:[28,3,1,""]},"nltk.chunk.regexp.ChunkRuleWithContext":{__init__:[28,3,1,""]},"nltk.chunk.regexp.ChunkString":{CHUNK_TAG:[28,4,1,""],CHUNK_TAG_CHAR:[28,4,1,""],IN_CHUNK_PATTERN:[28,4,1,""],IN_STRIP_PATTERN:[28,4,1,""],__init__:[28,3,1,""],to_chunkstruct:[28,3,1,""],xform:[28,3,1,""]},"nltk.chunk.regexp.ExpandLeftRule":{__init__:[28,3,1,""]},"nltk.chunk.regexp.ExpandRightRule":{__init__:[28,3,1,""]},"nltk.chunk.regexp.MergeRule":{__init__:[28,3,1,""]},"nltk.chunk.regexp.RegexpChunkParser":{__init__:[28,3,1,""],parse:[28,3,1,""],rules:[28,3,1,""]},"nltk.chunk.regexp.RegexpChunkRule":{__init__:[28,3,1,""],apply:[28,3,1,""],descr:[28,3,1,""],fromstring:[28,3,1,""]},"nltk.chunk.regexp.RegexpParser":{__init__:[28,3,1,""],parse:[28,3,1,""]},"nltk.chunk.regexp.SplitRule":{__init__:[28,3,1,""]},"nltk.chunk.regexp.StripRule":{__init__:[28,3,1,""]},"nltk.chunk.regexp.UnChunkRule":{__init__:[28,3,1,""]},"nltk.chunk.util":{ChunkScore:[29,2,1,""],accuracy:[29,1,1,""],conllstr2tree:[29,1,1,""],conlltags2tree:[29,1,1,""],demo:[29,1,1,""],ieerstr2tree:[29,1,1,""],tagstr2tree:[29,1,1,""],tree2conllstr:[29,1,1,""],tree2conlltags:[29,1,1,""]},"nltk.chunk.util.ChunkScore":{__init__:[29,3,1,""],accuracy:[29,3,1,""],correct:[29,3,1,""],f_measure:[29,3,1,""],guessed:[29,3,1,""],incorrect:[29,3,1,""],missed:[29,3,1,""],precision:[29,3,1,""],recall:[29,3,1,""],score:[29,3,1,""]},"nltk.classify":{api:[31,0,0,"-"],decisiontree:[32,0,0,"-"],maxent:[33,0,0,"-"],megam:[34,0,0,"-"],naivebayes:[35,0,0,"-"],positivenaivebayes:[36,0,0,"-"],rte_classify:[37,0,0,"-"],scikitlearn:[38,0,0,"-"],senna:[39,0,0,"-"],svm:[40,0,0,"-"],tadm:[41,0,0,"-"],textcat:[42,0,0,"-"],util:[43,0,0,"-"],weka:[44,0,0,"-"]},"nltk.classify.api":{ClassifierI:[31,2,1,""],MultiClassifierI:[31,2,1,""]},"nltk.classify.api.ClassifierI":{classify:[31,3,1,""],classify_many:[31,3,1,""],labels:[31,3,1,""],prob_classify:[31,3,1,""],prob_classify_many:[31,3,1,""]},"nltk.classify.api.MultiClassifierI":{classify:[31,3,1,""],classify_many:[31,3,1,""],labels:[31,3,1,""],prob_classify:[31,3,1,""],prob_classify_many:[31,3,1,""]},"nltk.classify.decisiontree":{DecisionTreeClassifier:[32,2,1,""],demo:[32,1,1,""],f:[32,1,1,""]},"nltk.classify.decisiontree.DecisionTreeClassifier":{__init__:[32,3,1,""],best_binary_stump:[32,3,1,""],best_stump:[32,3,1,""],binary_stump:[32,3,1,""],classify:[32,3,1,""],error:[32,3,1,""],labels:[32,3,1,""],leaf:[32,3,1,""],pretty_format:[32,3,1,""],pseudocode:[32,3,1,""],refine:[32,3,1,""],stump:[32,3,1,""],train:[32,3,1,""]},"nltk.classify.maxent":{BinaryMaxentFeatureEncoding:[33,2,1,""],ConditionalExponentialClassifier:[33,4,1,""],FunctionBackedMaxentFeatureEncoding:[33,2,1,""],GISEncoding:[33,2,1,""],MaxentClassifier:[33,2,1,""],MaxentFeatureEncodingI:[33,2,1,""],TadmEventMaxentFeatureEncoding:[33,2,1,""],TadmMaxentClassifier:[33,2,1,""],TypedMaxentFeatureEncoding:[33,2,1,""],calculate_deltas:[33,1,1,""],calculate_empirical_fcount:[33,1,1,""],calculate_estimated_fcount:[33,1,1,""],calculate_nfmap:[33,1,1,""],demo:[33,1,1,""],train_maxent_classifier_with_gis:[33,1,1,""],train_maxent_classifier_with_iis:[33,1,1,""],train_maxent_classifier_with_megam:[33,1,1,""]},"nltk.classify.maxent.BinaryMaxentFeatureEncoding":{__init__:[33,3,1,""],describe:[33,3,1,""],encode:[33,3,1,""],labels:[33,3,1,""],length:[33,3,1,""],train:[33,3,1,""]},"nltk.classify.maxent.FunctionBackedMaxentFeatureEncoding":{__init__:[33,3,1,""],describe:[33,3,1,""],encode:[33,3,1,""],labels:[33,3,1,""],length:[33,3,1,""]},"nltk.classify.maxent.GISEncoding":{C:[33,5,1,""],__init__:[33,3,1,""],describe:[33,3,1,""],encode:[33,3,1,""],length:[33,3,1,""]},"nltk.classify.maxent.MaxentClassifier":{ALGORITHMS:[33,4,1,""],__init__:[33,3,1,""],classify:[33,3,1,""],explain:[33,3,1,""],labels:[33,3,1,""],most_informative_features:[33,3,1,""],prob_classify:[33,3,1,""],set_weights:[33,3,1,""],show_most_informative_features:[33,3,1,""],train:[33,3,1,""],weights:[33,3,1,""]},"nltk.classify.maxent.MaxentFeatureEncodingI":{describe:[33,3,1,""],encode:[33,3,1,""],labels:[33,3,1,""],length:[33,3,1,""],train:[33,3,1,""]},"nltk.classify.maxent.TadmEventMaxentFeatureEncoding":{__init__:[33,3,1,""],describe:[33,3,1,""],encode:[33,3,1,""],labels:[33,3,1,""],length:[33,3,1,""],train:[33,3,1,""]},"nltk.classify.maxent.TadmMaxentClassifier":{train:[33,3,1,""]},"nltk.classify.maxent.TypedMaxentFeatureEncoding":{__init__:[33,3,1,""],describe:[33,3,1,""],encode:[33,3,1,""],labels:[33,3,1,""],length:[33,3,1,""],train:[33,3,1,""]},"nltk.classify.megam":{call_megam:[34,1,1,""],config_megam:[34,1,1,""],parse_megam_weights:[34,1,1,""],write_megam_file:[34,1,1,""]},"nltk.classify.naivebayes":{NaiveBayesClassifier:[35,2,1,""],demo:[35,1,1,""]},"nltk.classify.naivebayes.NaiveBayesClassifier":{__init__:[35,3,1,""],classify:[35,3,1,""],labels:[35,3,1,""],most_informative_features:[35,3,1,""],prob_classify:[35,3,1,""],show_most_informative_features:[35,3,1,""],train:[35,3,1,""]},"nltk.classify.positivenaivebayes":{PositiveNaiveBayesClassifier:[36,2,1,""],demo:[36,1,1,""]},"nltk.classify.positivenaivebayes.PositiveNaiveBayesClassifier":{train:[36,3,1,""]},"nltk.classify.rte_classify":{RTEFeatureExtractor:[37,2,1,""],rte_classifier:[37,1,1,""],rte_features:[37,1,1,""],rte_featurize:[37,1,1,""]},"nltk.classify.rte_classify.RTEFeatureExtractor":{__init__:[37,3,1,""],hyp_extra:[37,3,1,""],overlap:[37,3,1,""]},"nltk.classify.scikitlearn":{SklearnClassifier:[38,2,1,""]},"nltk.classify.scikitlearn.SklearnClassifier":{__init__:[38,3,1,""],classify_many:[38,3,1,""],labels:[38,3,1,""],prob_classify_many:[38,3,1,""],train:[38,3,1,""]},"nltk.classify.senna":{Senna:[39,2,1,""]},"nltk.classify.senna.Senna":{SUPPORTED_OPERATIONS:[39,4,1,""],__init__:[39,3,1,""],executable:[39,3,1,""],tag:[39,3,1,""],tag_sents:[39,3,1,""]},"nltk.classify.svm":{SvmClassifier:[40,2,1,""]},"nltk.classify.svm.SvmClassifier":{__init__:[40,3,1,""]},"nltk.classify.tadm":{call_tadm:[41,1,1,""],config_tadm:[41,1,1,""],encoding_demo:[41,1,1,""],names_demo:[41,1,1,""],parse_tadm_weights:[41,1,1,""],write_tadm_file:[41,1,1,""]},"nltk.classify.textcat":{TextCat:[42,2,1,""],demo:[42,1,1,""]},"nltk.classify.textcat.TextCat":{__init__:[42,3,1,""],calc_dist:[42,3,1,""],fingerprints:[42,4,1,""],guess_language:[42,3,1,""],lang_dists:[42,3,1,""],last_distances:[42,4,1,""],profile:[42,3,1,""],remove_punctuation:[42,3,1,""]},"nltk.classify.util":{CutoffChecker:[43,2,1,""],accuracy:[43,1,1,""],apply_features:[43,1,1,""],attested_labels:[43,1,1,""],binary_names_demo_features:[43,1,1,""],check_megam_config:[43,1,1,""],log_likelihood:[43,1,1,""],names_demo:[43,1,1,""],names_demo_features:[43,1,1,""],partial_names_demo:[43,1,1,""],wsd_demo:[43,1,1,""]},"nltk.classify.util.CutoffChecker":{__init__:[43,3,1,""],check:[43,3,1,""]},"nltk.classify.weka":{ARFF_Formatter:[44,2,1,""],WekaClassifier:[44,2,1,""],config_weka:[44,1,1,""]},"nltk.classify.weka.ARFF_Formatter":{__init__:[44,3,1,""],data_section:[44,3,1,""],format:[44,3,1,""],from_train:[44,3,1,""],header_section:[44,3,1,""],labels:[44,3,1,""],write:[44,3,1,""]},"nltk.classify.weka.WekaClassifier":{__init__:[44,3,1,""],classify_many:[44,3,1,""],parse_weka_distribution:[44,3,1,""],parse_weka_output:[44,3,1,""],prob_classify_many:[44,3,1,""],train:[44,3,1,""]},"nltk.cluster":{api:[47,0,0,"-"],em:[48,0,0,"-"],gaac:[49,0,0,"-"],kmeans:[50,0,0,"-"],util:[51,0,0,"-"]},"nltk.cluster.api":{ClusterI:[47,2,1,""]},"nltk.cluster.api.ClusterI":{classification_probdist:[47,3,1,""],classify:[47,3,1,""],cluster:[47,3,1,""],cluster_name:[47,3,1,""],cluster_names:[47,3,1,""],likelihood:[47,3,1,""],num_clusters:[47,3,1,""]},"nltk.cluster.em":{EMClusterer:[48,2,1,""],demo:[48,1,1,""]},"nltk.cluster.em.EMClusterer":{__init__:[48,3,1,""],classify_vectorspace:[48,3,1,""],cluster_vectorspace:[48,3,1,""],likelihood_vectorspace:[48,3,1,""],num_clusters:[48,3,1,""]},"nltk.cluster.gaac":{GAAClusterer:[49,2,1,""],demo:[49,1,1,""]},"nltk.cluster.gaac.GAAClusterer":{__init__:[49,3,1,""],classify_vectorspace:[49,3,1,""],cluster:[49,3,1,""],cluster_vectorspace:[49,3,1,""],dendrogram:[49,3,1,""],num_clusters:[49,3,1,""],update_clusters:[49,3,1,""]},"nltk.cluster.kmeans":{KMeansClusterer:[50,2,1,""],demo:[50,1,1,""]},"nltk.cluster.kmeans.KMeansClusterer":{__init__:[50,3,1,""],classify_vectorspace:[50,3,1,""],cluster_vectorspace:[50,3,1,""],means:[50,3,1,""],num_clusters:[50,3,1,""]},"nltk.cluster.util":{Dendrogram:[51,2,1,""],VectorSpaceClusterer:[51,2,1,""],cosine_distance:[51,1,1,""],euclidean_distance:[51,1,1,""]},"nltk.cluster.util.Dendrogram":{__init__:[51,3,1,""],groups:[51,3,1,""],merge:[51,3,1,""],show:[51,3,1,""]},"nltk.cluster.util.VectorSpaceClusterer":{__init__:[51,3,1,""],classify:[51,3,1,""],classify_vectorspace:[51,3,1,""],cluster:[51,3,1,""],cluster_vectorspace:[51,3,1,""],likelihood:[51,3,1,""],likelihood_vectorspace:[51,3,1,""],vector:[51,3,1,""]},"nltk.collections":{AbstractLazySequence:[52,2,1,""],LazyConcatenation:[52,2,1,""],LazyEnumerate:[52,2,1,""],LazyIteratorList:[52,2,1,""],LazyMap:[52,2,1,""],LazySubsequence:[52,2,1,""],LazyZip:[52,2,1,""],OrderedDict:[52,2,1,""],Trie:[52,2,1,""]},"nltk.collections.AbstractLazySequence":{count:[52,3,1,""],index:[52,3,1,""],iterate_from:[52,3,1,""]},"nltk.collections.LazyConcatenation":{__init__:[52,3,1,""],iterate_from:[52,3,1,""]},"nltk.collections.LazyEnumerate":{__init__:[52,3,1,""]},"nltk.collections.LazyIteratorList":{__init__:[52,3,1,""],iterate_from:[52,3,1,""]},"nltk.collections.LazyMap":{__init__:[52,3,1,""],iterate_from:[52,3,1,""]},"nltk.collections.LazySubsequence":{MIN_SIZE:[52,4,1,""],__init__:[52,3,1,""],__new__:[52,3,1,""],iterate_from:[52,3,1,""]},"nltk.collections.LazyZip":{__init__:[52,3,1,""],iterate_from:[52,3,1,""]},"nltk.collections.OrderedDict":{__init__:[52,3,1,""],clear:[52,3,1,""],copy:[52,3,1,""],items:[52,3,1,""],keys:[52,3,1,""],popitem:[52,3,1,""],setdefault:[52,3,1,""],update:[52,3,1,""],values:[52,3,1,""]},"nltk.collections.Trie":{LEAF:[52,4,1,""],__init__:[52,3,1,""],insert:[52,3,1,""]},"nltk.collocations":{BigramCollocationFinder:[53,2,1,""],QuadgramCollocationFinder:[53,2,1,""],TrigramCollocationFinder:[53,2,1,""]},"nltk.collocations.BigramCollocationFinder":{__init__:[53,3,1,""],default_ws:[53,4,1,""],from_words:[53,3,1,""],score_ngram:[53,3,1,""]},"nltk.collocations.QuadgramCollocationFinder":{__init__:[53,3,1,""],default_ws:[53,4,1,""],from_words:[53,3,1,""],score_ngram:[53,3,1,""]},"nltk.collocations.TrigramCollocationFinder":{__init__:[53,3,1,""],bigram_finder:[53,3,1,""],default_ws:[53,4,1,""],from_words:[53,3,1,""],score_ngram:[53,3,1,""]},"nltk.compat":{add_py3_data:[54,1,1,""],py3_data:[54,1,1,""]},"nltk.corpus":{demo:[55,1,1,""],europarl_raw:[56,0,0,"-"],reader:[57,0,0,"-"],util:[108,0,0,"-"]},"nltk.corpus.reader":{AlignedCorpusReader:[57,2,1,""],AlpinoCorpusReader:[57,2,1,""],BNCCorpusReader:[57,2,1,""],BracketParseCorpusReader:[57,2,1,""],CHILDESCorpusReader:[57,2,1,""],CMUDictCorpusReader:[57,2,1,""],CategorizedBracketParseCorpusReader:[57,2,1,""],CategorizedCorpusReader:[57,2,1,""],CategorizedPlaintextCorpusReader:[57,2,1,""],CategorizedSentencesCorpusReader:[57,2,1,""],CategorizedTaggedCorpusReader:[57,2,1,""],ChasenCorpusReader:[57,2,1,""],ChunkedCorpusReader:[57,2,1,""],ComparativeSentencesCorpusReader:[57,2,1,""],ConllChunkCorpusReader:[57,2,1,""],ConllCorpusReader:[57,2,1,""],CorpusReader:[57,2,1,""],CrubadanCorpusReader:[57,2,1,""],DependencyCorpusReader:[57,2,1,""],EuroparlCorpusReader:[57,2,1,""],FramenetCorpusReader:[57,2,1,""],IEERCorpusReader:[57,2,1,""],IPIPANCorpusReader:[57,2,1,""],IndianCorpusReader:[57,2,1,""],KNBCorpusReader:[57,2,1,""],LinThesaurusCorpusReader:[57,2,1,""],MTECorpusReader:[57,2,1,""],MWAPPDBCorpusReader:[57,2,1,""],MacMorphoCorpusReader:[57,2,1,""],NKJPCorpusReader:[57,2,1,""],NPSChatCorpusReader:[57,2,1,""],NombankCorpusReader:[57,2,1,""],NonbreakingPrefixesCorpusReader:[57,2,1,""],OpinionLexiconCorpusReader:[57,2,1,""],PPAttachmentCorpusReader:[57,2,1,""],PanLexLiteCorpusReader:[57,2,1,""],PanlexSwadeshCorpusReader:[57,2,1,""],Pl196xCorpusReader:[57,2,1,""],PlaintextCorpusReader:[57,2,1,""],PortugueseCategorizedPlaintextCorpusReader:[57,2,1,""],PropbankCorpusReader:[57,2,1,""],ProsConsCorpusReader:[57,2,1,""],RTECorpusReader:[57,2,1,""],ReviewsCorpusReader:[57,2,1,""],SemcorCorpusReader:[57,2,1,""],SensevalCorpusReader:[57,2,1,""],SentiSynset:[57,2,1,""],SentiWordNetCorpusReader:[57,2,1,""],SinicaTreebankCorpusReader:[57,2,1,""],StringCategoryCorpusReader:[57,2,1,""],SwadeshCorpusReader:[57,2,1,""],SwitchboardCorpusReader:[57,2,1,""],SyntaxCorpusReader:[57,2,1,""],TEICorpusView:[57,2,1,""],TaggedCorpusReader:[57,2,1,""],TimitCorpusReader:[57,2,1,""],TimitTaggedCorpusReader:[57,2,1,""],ToolboxCorpusReader:[57,2,1,""],TwitterCorpusReader:[57,2,1,""],UdhrCorpusReader:[57,2,1,""],UnicharsCorpusReader:[57,2,1,""],VerbnetCorpusReader:[57,2,1,""],WordListCorpusReader:[57,2,1,""],WordNetCorpusReader:[57,2,1,""],WordNetICCorpusReader:[57,2,1,""],XMLCorpusReader:[57,2,1,""],YCOECorpusReader:[57,2,1,""],aligned:[58,0,0,"-"],api:[59,0,0,"-"],bnc:[60,0,0,"-"],bracket_parse:[61,0,0,"-"],categorized_sents:[62,0,0,"-"],chasen:[63,0,0,"-"],childes:[64,0,0,"-"],chunked:[65,0,0,"-"],cmudict:[66,0,0,"-"],comparative_sents:[67,0,0,"-"],conll:[68,0,0,"-"],crubadan:[69,0,0,"-"],dependency:[70,0,0,"-"],find_corpus_fileids:[57,1,1,""],framenet:[71,0,0,"-"],ieer:[72,0,0,"-"],indian:[73,0,0,"-"],ipipan:[74,0,0,"-"],knbc:[75,0,0,"-"],lin:[76,0,0,"-"],mte:[77,0,0,"-"],nkjp:[78,0,0,"-"],nombank:[79,0,0,"-"],nps_chat:[80,0,0,"-"],opinion_lexicon:[81,0,0,"-"],panlex_lite:[82,0,0,"-"],panlex_swadesh:[83,0,0,"-"],pl196x:[84,0,0,"-"],plaintext:[85,0,0,"-"],ppattach:[86,0,0,"-"],propbank:[87,0,0,"-"],pros_cons:[88,0,0,"-"],reviews:[89,0,0,"-"],rte:[90,0,0,"-"],semcor:[91,0,0,"-"],senseval:[92,0,0,"-"],sentiwordnet:[93,0,0,"-"],sinica_treebank:[94,0,0,"-"],string_category:[95,0,0,"-"],switchboard:[96,0,0,"-"],tagged:[97,0,0,"-"],tagged_treebank_para_block_reader:[57,1,1,""],timit:[98,0,0,"-"],toolbox:[99,0,0,"-"],twitter:[100,0,0,"-"],udhr:[101,0,0,"-"],util:[102,0,0,"-"],verbnet:[103,0,0,"-"],wordlist:[104,0,0,"-"],wordnet:[105,0,0,"-"],xmldocs:[106,0,0,"-"],ycoe:[107,0,0,"-"]},"nltk.corpus.reader.AlignedCorpusReader":{__init__:[57,3,1,""],aligned_sents:[57,3,1,""],sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.AlpinoCorpusReader":{__init__:[57,3,1,""]},"nltk.corpus.reader.BNCCorpusReader":{__init__:[57,3,1,""],sents:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.BracketParseCorpusReader":{__init__:[57,3,1,""]},"nltk.corpus.reader.CHILDESCorpusReader":{MLU:[57,3,1,""],__init__:[57,3,1,""],age:[57,3,1,""],childes_url_base:[57,4,1,""],convert_age:[57,3,1,""],corpus:[57,3,1,""],participants:[57,3,1,""],sents:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],webview_file:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.CMUDictCorpusReader":{dict:[57,3,1,""],entries:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.CategorizedBracketParseCorpusReader":{__init__:[57,3,1,""],parsed_paras:[57,3,1,""],parsed_sents:[57,3,1,""],parsed_words:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""]},"nltk.corpus.reader.CategorizedCorpusReader":{__init__:[57,3,1,""],categories:[57,3,1,""],fileids:[57,3,1,""],paras:[57,3,1,""],raw:[57,3,1,""],sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.CategorizedPlaintextCorpusReader":{__init__:[57,3,1,""]},"nltk.corpus.reader.CategorizedSentencesCorpusReader":{CorpusView:[57,4,1,""],__init__:[57,3,1,""],sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.CategorizedTaggedCorpusReader":{__init__:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""]},"nltk.corpus.reader.ChasenCorpusReader":{__init__:[57,3,1,""],paras:[57,3,1,""],sents:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.ChunkedCorpusReader":{__init__:[57,3,1,""],chunked_paras:[57,3,1,""],chunked_sents:[57,3,1,""],chunked_words:[57,3,1,""],paras:[57,3,1,""],sents:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.ComparativeSentencesCorpusReader":{CorpusView:[57,4,1,""],__init__:[57,3,1,""],comparisons:[57,3,1,""],keywords:[57,3,1,""],keywords_readme:[57,3,1,""],sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.ConllChunkCorpusReader":{__init__:[57,3,1,""]},"nltk.corpus.reader.ConllCorpusReader":{CHUNK:[57,4,1,""],COLUMN_TYPES:[57,4,1,""],IGNORE:[57,4,1,""],NE:[57,4,1,""],POS:[57,4,1,""],SRL:[57,4,1,""],TREE:[57,4,1,""],WORDS:[57,4,1,""],__init__:[57,3,1,""],chunked_sents:[57,3,1,""],chunked_words:[57,3,1,""],iob_sents:[57,3,1,""],iob_words:[57,3,1,""],parsed_sents:[57,3,1,""],sents:[57,3,1,""],srl_instances:[57,3,1,""],srl_spans:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.CorpusReader":{__init__:[57,3,1,""],abspath:[57,3,1,""],abspaths:[57,3,1,""],citation:[57,3,1,""],encoding:[57,3,1,""],ensure_loaded:[57,3,1,""],fileids:[57,3,1,""],license:[57,3,1,""],open:[57,3,1,""],raw:[57,3,1,""],readme:[57,3,1,""],root:[57,5,1,""]},"nltk.corpus.reader.CrubadanCorpusReader":{__init__:[57,3,1,""],crubadan_to_iso:[57,3,1,""],iso_to_crubadan:[57,3,1,""],lang_freq:[57,3,1,""],langs:[57,3,1,""]},"nltk.corpus.reader.DependencyCorpusReader":{__init__:[57,3,1,""],parsed_sents:[57,3,1,""],sents:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.EuroparlCorpusReader":{chapters:[57,3,1,""],paras:[57,3,1,""]},"nltk.corpus.reader.FramenetCorpusReader":{__init__:[57,3,1,""],annotations:[57,3,1,""],buildindexes:[57,3,1,""],doc:[57,3,1,""],docs:[57,3,1,""],docs_metadata:[57,3,1,""],exemplars:[57,3,1,""],fe_relations:[57,3,1,""],fes:[57,3,1,""],frame:[57,3,1,""],frame_by_id:[57,3,1,""],frame_by_name:[57,3,1,""],frame_ids_and_names:[57,3,1,""],frame_relation_types:[57,3,1,""],frame_relations:[57,3,1,""],frames:[57,3,1,""],frames_by_lemma:[57,3,1,""],ft_sents:[57,3,1,""],help:[57,3,1,""],lu:[57,3,1,""],lu_basic:[57,3,1,""],lu_ids_and_names:[57,3,1,""],lus:[57,3,1,""],propagate_semtypes:[57,3,1,""],semtype:[57,3,1,""],semtype_inherits:[57,3,1,""],semtypes:[57,3,1,""],sents:[57,3,1,""],warnings:[57,3,1,""]},"nltk.corpus.reader.IEERCorpusReader":{docs:[57,3,1,""],parsed_docs:[57,3,1,""]},"nltk.corpus.reader.IPIPANCorpusReader":{__init__:[57,3,1,""],categories:[57,3,1,""],channels:[57,3,1,""],domains:[57,3,1,""],fileids:[57,3,1,""],paras:[57,3,1,""],sents:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.IndianCorpusReader":{sents:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.KNBCorpusReader":{__init__:[57,3,1,""]},"nltk.corpus.reader.LinThesaurusCorpusReader":{__init__:[57,3,1,""],scored_synonyms:[57,3,1,""],similarity:[57,3,1,""],synonyms:[57,3,1,""]},"nltk.corpus.reader.MTECorpusReader":{__init__:[57,3,1,""],lemma_paras:[57,3,1,""],lemma_sents:[57,3,1,""],lemma_words:[57,3,1,""],paras:[57,3,1,""],sents:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.MWAPPDBCorpusReader":{entries:[57,3,1,""],mwa_ppdb_xxxl_file:[57,4,1,""]},"nltk.corpus.reader.MacMorphoCorpusReader":{__init__:[57,3,1,""]},"nltk.corpus.reader.NKJPCorpusReader":{HEADER_MODE:[57,4,1,""],RAW_MODE:[57,4,1,""],SENTS_MODE:[57,4,1,""],WORDS_MODE:[57,4,1,""],__init__:[57,3,1,""],add_root:[57,3,1,""],fileids:[57,3,1,""],get_paths:[57,3,1,""],header:[57,3,1,""],raw:[57,3,1,""],sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.NPSChatCorpusReader":{__init__:[57,3,1,""],posts:[57,3,1,""],tagged_posts:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""],xml_posts:[57,3,1,""]},"nltk.corpus.reader.NombankCorpusReader":{__init__:[57,3,1,""],instances:[57,3,1,""],lines:[57,3,1,""],nouns:[57,3,1,""],roleset:[57,3,1,""],rolesets:[57,3,1,""]},"nltk.corpus.reader.NonbreakingPrefixesCorpusReader":{available_langs:[57,4,1,""],words:[57,3,1,""]},"nltk.corpus.reader.OpinionLexiconCorpusReader":{CorpusView:[57,4,1,""],negative:[57,3,1,""],positive:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.PPAttachmentCorpusReader":{attachments:[57,3,1,""],tuples:[57,3,1,""]},"nltk.corpus.reader.PanLexLiteCorpusReader":{MEANING_Q:[57,4,1,""],TRANSLATION_Q:[57,4,1,""],__init__:[57,3,1,""],language_varieties:[57,3,1,""],meanings:[57,3,1,""],translations:[57,3,1,""]},"nltk.corpus.reader.PanlexSwadeshCorpusReader":{__init__:[57,3,1,""],entries:[57,3,1,""],get_languages:[57,3,1,""],get_macrolanguages:[57,3,1,""],language_codes:[57,3,1,""],license:[57,3,1,""],words_by_iso639:[57,3,1,""],words_by_lang:[57,3,1,""]},"nltk.corpus.reader.Pl196xCorpusReader":{__init__:[57,3,1,""],decode_tag:[57,3,1,""],head_len:[57,4,1,""],paras:[57,3,1,""],sents:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],textids:[57,3,1,""],words:[57,3,1,""],xml:[57,3,1,""]},"nltk.corpus.reader.PlaintextCorpusReader":{CorpusView:[57,4,1,""],__init__:[57,3,1,""],paras:[57,3,1,""],sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.PortugueseCategorizedPlaintextCorpusReader":{__init__:[57,3,1,""]},"nltk.corpus.reader.PropbankCorpusReader":{__init__:[57,3,1,""],instances:[57,3,1,""],lines:[57,3,1,""],roleset:[57,3,1,""],rolesets:[57,3,1,""],verbs:[57,3,1,""]},"nltk.corpus.reader.ProsConsCorpusReader":{CorpusView:[57,4,1,""],__init__:[57,3,1,""],sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.RTECorpusReader":{pairs:[57,3,1,""]},"nltk.corpus.reader.ReviewsCorpusReader":{CorpusView:[57,4,1,""],__init__:[57,3,1,""],features:[57,3,1,""],reviews:[57,3,1,""],sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.SemcorCorpusReader":{__init__:[57,3,1,""],chunk_sents:[57,3,1,""],chunks:[57,3,1,""],sents:[57,3,1,""],tagged_chunks:[57,3,1,""],tagged_sents:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.SensevalCorpusReader":{instances:[57,3,1,""]},"nltk.corpus.reader.SentiSynset":{__init__:[57,3,1,""],neg_score:[57,3,1,""],obj_score:[57,3,1,""],pos_score:[57,3,1,""]},"nltk.corpus.reader.SentiWordNetCorpusReader":{__init__:[57,3,1,""],all_senti_synsets:[57,3,1,""],senti_synset:[57,3,1,""],senti_synsets:[57,3,1,""]},"nltk.corpus.reader.StringCategoryCorpusReader":{__init__:[57,3,1,""],tuples:[57,3,1,""]},"nltk.corpus.reader.SwadeshCorpusReader":{entries:[57,3,1,""]},"nltk.corpus.reader.SwitchboardCorpusReader":{__init__:[57,3,1,""],discourses:[57,3,1,""],tagged_discourses:[57,3,1,""],tagged_turns:[57,3,1,""],tagged_words:[57,3,1,""],turns:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.SyntaxCorpusReader":{parsed_sents:[57,3,1,""],sents:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.TEICorpusView":{__init__:[57,3,1,""],read_block:[57,3,1,""]},"nltk.corpus.reader.TaggedCorpusReader":{__init__:[57,3,1,""],paras:[57,3,1,""],sents:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.TimitCorpusReader":{__init__:[57,3,1,""],audiodata:[57,3,1,""],fileids:[57,3,1,""],phone_times:[57,3,1,""],phone_trees:[57,3,1,""],phones:[57,3,1,""],play:[57,3,1,""],sent_times:[57,3,1,""],sentid:[57,3,1,""],sents:[57,3,1,""],spkrid:[57,3,1,""],spkrinfo:[57,3,1,""],spkrutteranceids:[57,3,1,""],transcription_dict:[57,3,1,""],utterance:[57,3,1,""],utteranceids:[57,3,1,""],wav:[57,3,1,""],word_times:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.TimitTaggedCorpusReader":{__init__:[57,3,1,""],paras:[57,3,1,""],tagged_paras:[57,3,1,""]},"nltk.corpus.reader.ToolboxCorpusReader":{entries:[57,3,1,""],fields:[57,3,1,""],words:[57,3,1,""],xml:[57,3,1,""]},"nltk.corpus.reader.TwitterCorpusReader":{CorpusView:[57,4,1,""],__init__:[57,3,1,""],docs:[57,3,1,""],strings:[57,3,1,""],tokenized:[57,3,1,""]},"nltk.corpus.reader.UdhrCorpusReader":{ENCODINGS:[57,4,1,""],SKIP:[57,4,1,""],__init__:[57,3,1,""]},"nltk.corpus.reader.UnicharsCorpusReader":{available_categories:[57,4,1,""],chars:[57,3,1,""]},"nltk.corpus.reader.VerbnetCorpusReader":{__init__:[57,3,1,""],classids:[57,3,1,""],fileids:[57,3,1,""],frames:[57,3,1,""],lemmas:[57,3,1,""],longid:[57,3,1,""],pprint:[57,3,1,""],pprint_frames:[57,3,1,""],pprint_members:[57,3,1,""],pprint_subclasses:[57,3,1,""],pprint_themroles:[57,3,1,""],shortid:[57,3,1,""],subclasses:[57,3,1,""],themroles:[57,3,1,""],vnclass:[57,3,1,""],wordnetids:[57,3,1,""]},"nltk.corpus.reader.WordListCorpusReader":{words:[57,3,1,""]},"nltk.corpus.reader.WordNetCorpusReader":{ADJ:[57,4,1,""],ADJ_SAT:[57,4,1,""],ADV:[57,4,1,""],MORPHOLOGICAL_SUBSTITUTIONS:[57,4,1,""],NOUN:[57,4,1,""],VERB:[57,4,1,""],__init__:[57,3,1,""],all_lemma_names:[57,3,1,""],all_synsets:[57,3,1,""],citation:[57,3,1,""],custom_lemmas:[57,3,1,""],digraph:[57,3,1,""],get_version:[57,3,1,""],ic:[57,3,1,""],jcn_similarity:[57,3,1,""],langs:[57,3,1,""],lch_similarity:[57,3,1,""],lemma:[57,3,1,""],lemma_count:[57,3,1,""],lemma_from_key:[57,3,1,""],lemmas:[57,3,1,""],license:[57,3,1,""],lin_similarity:[57,3,1,""],morphy:[57,3,1,""],of2ss:[57,3,1,""],path_similarity:[57,3,1,""],readme:[57,3,1,""],res_similarity:[57,3,1,""],ss2of:[57,3,1,""],synset:[57,3,1,""],synset_from_pos_and_offset:[57,3,1,""],synset_from_sense_key:[57,3,1,""],synsets:[57,3,1,""],words:[57,3,1,""],wup_similarity:[57,3,1,""]},"nltk.corpus.reader.WordNetICCorpusReader":{__init__:[57,3,1,""],ic:[57,3,1,""]},"nltk.corpus.reader.XMLCorpusReader":{__init__:[57,3,1,""],words:[57,3,1,""],xml:[57,3,1,""]},"nltk.corpus.reader.YCOECorpusReader":{__init__:[57,3,1,""],documents:[57,3,1,""],fileids:[57,3,1,""],paras:[57,3,1,""],parsed_sents:[57,3,1,""],sents:[57,3,1,""],tagged_paras:[57,3,1,""],tagged_sents:[57,3,1,""],tagged_words:[57,3,1,""],words:[57,3,1,""]},"nltk.corpus.reader.aligned":{AlignedCorpusReader:[58,2,1,""],AlignedSentCorpusView:[58,2,1,""]},"nltk.corpus.reader.aligned.AlignedCorpusReader":{__init__:[58,3,1,""],aligned_sents:[58,3,1,""],sents:[58,3,1,""],words:[58,3,1,""]},"nltk.corpus.reader.aligned.AlignedSentCorpusView":{__init__:[58,3,1,""],read_block:[58,3,1,""]},"nltk.corpus.reader.api":{CategorizedCorpusReader:[59,2,1,""],CorpusReader:[59,2,1,""],SyntaxCorpusReader:[59,2,1,""]},"nltk.corpus.reader.api.CategorizedCorpusReader":{__init__:[59,3,1,""],categories:[59,3,1,""],fileids:[59,3,1,""],paras:[59,3,1,""],raw:[59,3,1,""],sents:[59,3,1,""],words:[59,3,1,""]},"nltk.corpus.reader.api.CorpusReader":{__init__:[59,3,1,""],abspath:[59,3,1,""],abspaths:[59,3,1,""],citation:[59,3,1,""],encoding:[59,3,1,""],ensure_loaded:[59,3,1,""],fileids:[59,3,1,""],license:[59,3,1,""],open:[59,3,1,""],raw:[59,3,1,""],readme:[59,3,1,""],root:[59,5,1,""]},"nltk.corpus.reader.api.SyntaxCorpusReader":{parsed_sents:[59,3,1,""],sents:[59,3,1,""],tagged_sents:[59,3,1,""],tagged_words:[59,3,1,""],words:[59,3,1,""]},"nltk.corpus.reader.bnc":{BNCCorpusReader:[60,2,1,""],BNCSentence:[60,2,1,""],BNCWordView:[60,2,1,""]},"nltk.corpus.reader.bnc.BNCCorpusReader":{__init__:[60,3,1,""],sents:[60,3,1,""],tagged_sents:[60,3,1,""],tagged_words:[60,3,1,""],words:[60,3,1,""]},"nltk.corpus.reader.bnc.BNCSentence":{__init__:[60,3,1,""]},"nltk.corpus.reader.bnc.BNCWordView":{__init__:[60,3,1,""],author:[60,4,1,""],editor:[60,4,1,""],handle_elt:[60,3,1,""],handle_header:[60,3,1,""],handle_sent:[60,3,1,""],handle_word:[60,3,1,""],resps:[60,4,1,""],tags_to_ignore:[60,4,1,""],title:[60,4,1,""]},"nltk.corpus.reader.bracket_parse":{AlpinoCorpusReader:[61,2,1,""],BracketParseCorpusReader:[61,2,1,""],CategorizedBracketParseCorpusReader:[61,2,1,""]},"nltk.corpus.reader.bracket_parse.AlpinoCorpusReader":{__init__:[61,3,1,""]},"nltk.corpus.reader.bracket_parse.BracketParseCorpusReader":{__init__:[61,3,1,""]},"nltk.corpus.reader.bracket_parse.CategorizedBracketParseCorpusReader":{__init__:[61,3,1,""],parsed_paras:[61,3,1,""],parsed_sents:[61,3,1,""],parsed_words:[61,3,1,""],tagged_paras:[61,3,1,""],tagged_sents:[61,3,1,""],tagged_words:[61,3,1,""]},"nltk.corpus.reader.categorized_sents":{CategorizedSentencesCorpusReader:[62,2,1,""]},"nltk.corpus.reader.categorized_sents.CategorizedSentencesCorpusReader":{CorpusView:[62,4,1,""],__init__:[62,3,1,""],sents:[62,3,1,""],words:[62,3,1,""]},"nltk.corpus.reader.chasen":{ChasenCorpusReader:[63,2,1,""],ChasenCorpusView:[63,2,1,""],demo:[63,1,1,""],test:[63,1,1,""]},"nltk.corpus.reader.chasen.ChasenCorpusReader":{__init__:[63,3,1,""],paras:[63,3,1,""],sents:[63,3,1,""],tagged_paras:[63,3,1,""],tagged_sents:[63,3,1,""],tagged_words:[63,3,1,""],words:[63,3,1,""]},"nltk.corpus.reader.chasen.ChasenCorpusView":{__init__:[63,3,1,""],read_block:[63,3,1,""]},"nltk.corpus.reader.childes":{CHILDESCorpusReader:[64,2,1,""],demo:[64,1,1,""]},"nltk.corpus.reader.childes.CHILDESCorpusReader":{MLU:[64,3,1,""],__init__:[64,3,1,""],age:[64,3,1,""],childes_url_base:[64,4,1,""],convert_age:[64,3,1,""],corpus:[64,3,1,""],participants:[64,3,1,""],sents:[64,3,1,""],tagged_sents:[64,3,1,""],tagged_words:[64,3,1,""],webview_file:[64,3,1,""],words:[64,3,1,""]},"nltk.corpus.reader.chunked":{ChunkedCorpusReader:[65,2,1,""],ChunkedCorpusView:[65,2,1,""]},"nltk.corpus.reader.chunked.ChunkedCorpusReader":{__init__:[65,3,1,""],chunked_paras:[65,3,1,""],chunked_sents:[65,3,1,""],chunked_words:[65,3,1,""],paras:[65,3,1,""],sents:[65,3,1,""],tagged_paras:[65,3,1,""],tagged_sents:[65,3,1,""],tagged_words:[65,3,1,""],words:[65,3,1,""]},"nltk.corpus.reader.chunked.ChunkedCorpusView":{__init__:[65,3,1,""],read_block:[65,3,1,""]},"nltk.corpus.reader.cmudict":{CMUDictCorpusReader:[66,2,1,""],read_cmudict_block:[66,1,1,""]},"nltk.corpus.reader.cmudict.CMUDictCorpusReader":{dict:[66,3,1,""],entries:[66,3,1,""],words:[66,3,1,""]},"nltk.corpus.reader.comparative_sents":{ComparativeSentencesCorpusReader:[67,2,1,""],Comparison:[67,2,1,""]},"nltk.corpus.reader.comparative_sents.ComparativeSentencesCorpusReader":{CorpusView:[67,4,1,""],__init__:[67,3,1,""],comparisons:[67,3,1,""],keywords:[67,3,1,""],keywords_readme:[67,3,1,""],sents:[67,3,1,""],words:[67,3,1,""]},"nltk.corpus.reader.comparative_sents.Comparison":{__init__:[67,3,1,""]},"nltk.corpus.reader.conll":{ConllChunkCorpusReader:[68,2,1,""],ConllCorpusReader:[68,2,1,""],ConllSRLInstance:[68,2,1,""],ConllSRLInstanceList:[68,2,1,""]},"nltk.corpus.reader.conll.ConllChunkCorpusReader":{__init__:[68,3,1,""]},"nltk.corpus.reader.conll.ConllCorpusReader":{CHUNK:[68,4,1,""],COLUMN_TYPES:[68,4,1,""],IGNORE:[68,4,1,""],NE:[68,4,1,""],POS:[68,4,1,""],SRL:[68,4,1,""],TREE:[68,4,1,""],WORDS:[68,4,1,""],__init__:[68,3,1,""],chunked_sents:[68,3,1,""],chunked_words:[68,3,1,""],iob_sents:[68,3,1,""],iob_words:[68,3,1,""],parsed_sents:[68,3,1,""],sents:[68,3,1,""],srl_instances:[68,3,1,""],srl_spans:[68,3,1,""],tagged_sents:[68,3,1,""],tagged_words:[68,3,1,""],words:[68,3,1,""]},"nltk.corpus.reader.conll.ConllSRLInstance":{__init__:[68,3,1,""],arguments:[68,4,1,""],pprint:[68,3,1,""],tagged_spans:[68,4,1,""],tree:[68,4,1,""],verb:[68,4,1,""],verb_head:[68,4,1,""],words:[68,4,1,""]},"nltk.corpus.reader.conll.ConllSRLInstanceList":{__init__:[68,3,1,""],pprint:[68,3,1,""]},"nltk.corpus.reader.crubadan":{CrubadanCorpusReader:[69,2,1,""]},"nltk.corpus.reader.crubadan.CrubadanCorpusReader":{__init__:[69,3,1,""],crubadan_to_iso:[69,3,1,""],iso_to_crubadan:[69,3,1,""],lang_freq:[69,3,1,""],langs:[69,3,1,""]},"nltk.corpus.reader.dependency":{DependencyCorpusReader:[70,2,1,""],DependencyCorpusView:[70,2,1,""]},"nltk.corpus.reader.dependency.DependencyCorpusReader":{__init__:[70,3,1,""],parsed_sents:[70,3,1,""],sents:[70,3,1,""],tagged_sents:[70,3,1,""],tagged_words:[70,3,1,""],words:[70,3,1,""]},"nltk.corpus.reader.dependency.DependencyCorpusView":{__init__:[70,3,1,""],read_block:[70,3,1,""]},"nltk.corpus.reader.framenet":{AttrDict:[71,2,1,""],FramenetCorpusReader:[71,2,1,""],FramenetError:[71,6,1,""],Future:[71,2,1,""],PrettyDict:[71,2,1,""],PrettyLazyConcatenation:[71,2,1,""],PrettyLazyIteratorList:[71,2,1,""],PrettyLazyMap:[71,2,1,""],PrettyList:[71,2,1,""],SpecialList:[71,2,1,""],demo:[71,1,1,""],mimic_wrap:[71,1,1,""]},"nltk.corpus.reader.framenet.AttrDict":{__init__:[71,3,1,""]},"nltk.corpus.reader.framenet.FramenetCorpusReader":{__init__:[71,3,1,""],annotations:[71,3,1,""],buildindexes:[71,3,1,""],doc:[71,3,1,""],docs:[71,3,1,""],docs_metadata:[71,3,1,""],exemplars:[71,3,1,""],fe_relations:[71,3,1,""],fes:[71,3,1,""],frame:[71,3,1,""],frame_by_id:[71,3,1,""],frame_by_name:[71,3,1,""],frame_ids_and_names:[71,3,1,""],frame_relation_types:[71,3,1,""],frame_relations:[71,3,1,""],frames:[71,3,1,""],frames_by_lemma:[71,3,1,""],ft_sents:[71,3,1,""],help:[71,3,1,""],lu:[71,3,1,""],lu_basic:[71,3,1,""],lu_ids_and_names:[71,3,1,""],lus:[71,3,1,""],propagate_semtypes:[71,3,1,""],semtype:[71,3,1,""],semtype_inherits:[71,3,1,""],semtypes:[71,3,1,""],sents:[71,3,1,""],warnings:[71,3,1,""]},"nltk.corpus.reader.framenet.Future":{__init__:[71,3,1,""]},"nltk.corpus.reader.framenet.PrettyDict":{__init__:[71,3,1,""]},"nltk.corpus.reader.framenet.PrettyList":{__init__:[71,3,1,""]},"nltk.corpus.reader.framenet.SpecialList":{__init__:[71,3,1,""]},"nltk.corpus.reader.ieer":{IEERCorpusReader:[72,2,1,""],IEERDocument:[72,2,1,""],documents:[72,7,1,""],titles:[72,7,1,""]},"nltk.corpus.reader.ieer.IEERCorpusReader":{docs:[72,3,1,""],parsed_docs:[72,3,1,""]},"nltk.corpus.reader.ieer.IEERDocument":{__init__:[72,3,1,""]},"nltk.corpus.reader.indian":{IndianCorpusReader:[73,2,1,""],IndianCorpusView:[73,2,1,""]},"nltk.corpus.reader.indian.IndianCorpusReader":{sents:[73,3,1,""],tagged_sents:[73,3,1,""],tagged_words:[73,3,1,""],words:[73,3,1,""]},"nltk.corpus.reader.indian.IndianCorpusView":{__init__:[73,3,1,""],read_block:[73,3,1,""]},"nltk.corpus.reader.ipipan":{IPIPANCorpusReader:[74,2,1,""],IPIPANCorpusView:[74,2,1,""]},"nltk.corpus.reader.ipipan.IPIPANCorpusReader":{__init__:[74,3,1,""],categories:[74,3,1,""],channels:[74,3,1,""],domains:[74,3,1,""],fileids:[74,3,1,""],paras:[74,3,1,""],sents:[74,3,1,""],tagged_paras:[74,3,1,""],tagged_sents:[74,3,1,""],tagged_words:[74,3,1,""],words:[74,3,1,""]},"nltk.corpus.reader.ipipan.IPIPANCorpusView":{PARAS_MODE:[74,4,1,""],SENTS_MODE:[74,4,1,""],WORDS_MODE:[74,4,1,""],__init__:[74,3,1,""],read_block:[74,3,1,""]},"nltk.corpus.reader.knbc":{KNBCorpusReader:[75,2,1,""],demo:[75,1,1,""],test:[75,1,1,""]},"nltk.corpus.reader.knbc.KNBCorpusReader":{__init__:[75,3,1,""]},"nltk.corpus.reader.lin":{LinThesaurusCorpusReader:[76,2,1,""],demo:[76,1,1,""]},"nltk.corpus.reader.lin.LinThesaurusCorpusReader":{__init__:[76,3,1,""],scored_synonyms:[76,3,1,""],similarity:[76,3,1,""],synonyms:[76,3,1,""]},"nltk.corpus.reader.mte":{MTECorpusReader:[77,2,1,""],MTECorpusView:[77,2,1,""],MTEFileReader:[77,2,1,""],MTETagConverter:[77,2,1,""],xpath:[77,1,1,""]},"nltk.corpus.reader.mte.MTECorpusReader":{__init__:[77,3,1,""],lemma_paras:[77,3,1,""],lemma_sents:[77,3,1,""],lemma_words:[77,3,1,""],paras:[77,3,1,""],sents:[77,3,1,""],tagged_paras:[77,3,1,""],tagged_sents:[77,3,1,""],tagged_words:[77,3,1,""],words:[77,3,1,""]},"nltk.corpus.reader.mte.MTECorpusView":{__init__:[77,3,1,""],read_block:[77,3,1,""]},"nltk.corpus.reader.mte.MTEFileReader":{__init__:[77,3,1,""],lemma_paras:[77,3,1,""],lemma_sents:[77,3,1,""],lemma_words:[77,3,1,""],ns:[77,4,1,""],para_path:[77,4,1,""],paras:[77,3,1,""],sent_path:[77,4,1,""],sents:[77,3,1,""],tag_ns:[77,4,1,""],tagged_paras:[77,3,1,""],tagged_sents:[77,3,1,""],tagged_words:[77,3,1,""],word_path:[77,4,1,""],words:[77,3,1,""],xml_ns:[77,4,1,""]},"nltk.corpus.reader.mte.MTETagConverter":{mapping_msd_universal:[77,4,1,""],msd_to_universal:[77,3,1,""]},"nltk.corpus.reader.nkjp":{NKJPCorpusReader:[78,2,1,""],NKJPCorpus_Header_View:[78,2,1,""],NKJPCorpus_Morph_View:[78,2,1,""],NKJPCorpus_Segmentation_View:[78,2,1,""],NKJPCorpus_Text_View:[78,2,1,""],XML_Tool:[78,2,1,""]},"nltk.corpus.reader.nkjp.NKJPCorpusReader":{HEADER_MODE:[78,4,1,""],RAW_MODE:[78,4,1,""],SENTS_MODE:[78,4,1,""],WORDS_MODE:[78,4,1,""],__init__:[78,3,1,""],add_root:[78,3,1,""],fileids:[78,3,1,""],get_paths:[78,3,1,""],header:[78,3,1,""],raw:[78,3,1,""],sents:[78,3,1,""],tagged_words:[78,3,1,""],words:[78,3,1,""]},"nltk.corpus.reader.nkjp.NKJPCorpus_Header_View":{__init__:[78,3,1,""],handle_elt:[78,3,1,""],handle_query:[78,3,1,""]},"nltk.corpus.reader.nkjp.NKJPCorpus_Morph_View":{__init__:[78,3,1,""],handle_elt:[78,3,1,""],handle_query:[78,3,1,""]},"nltk.corpus.reader.nkjp.NKJPCorpus_Segmentation_View":{__init__:[78,3,1,""],get_segm_id:[78,3,1,""],get_sent_beg:[78,3,1,""],get_sent_end:[78,3,1,""],get_sentences:[78,3,1,""],handle_elt:[78,3,1,""],handle_query:[78,3,1,""],remove_choice:[78,3,1,""]},"nltk.corpus.reader.nkjp.NKJPCorpus_Text_View":{RAW_MODE:[78,4,1,""],SENTS_MODE:[78,4,1,""],__init__:[78,3,1,""],get_segm_id:[78,3,1,""],handle_elt:[78,3,1,""],handle_query:[78,3,1,""],read_block:[78,3,1,""]},"nltk.corpus.reader.nkjp.XML_Tool":{__init__:[78,3,1,""],build_preprocessed_file:[78,3,1,""],remove_preprocessed_file:[78,3,1,""]},"nltk.corpus.reader.nombank":{NombankChainTreePointer:[79,2,1,""],NombankCorpusReader:[79,2,1,""],NombankInstance:[79,2,1,""],NombankPointer:[79,2,1,""],NombankSplitTreePointer:[79,2,1,""],NombankTreePointer:[79,2,1,""]},"nltk.corpus.reader.nombank.NombankChainTreePointer":{__init__:[79,3,1,""],pieces:[79,4,1,""],select:[79,3,1,""]},"nltk.corpus.reader.nombank.NombankCorpusReader":{__init__:[79,3,1,""],instances:[79,3,1,""],lines:[79,3,1,""],nouns:[79,3,1,""],roleset:[79,3,1,""],rolesets:[79,3,1,""]},"nltk.corpus.reader.nombank.NombankInstance":{__init__:[79,3,1,""],arguments:[79,4,1,""],baseform:[79,4,1,""],fileid:[79,4,1,""],parse:[79,3,1,""],parse_corpus:[79,4,1,""],predicate:[79,4,1,""],predid:[79,4,1,""],roleset:[79,5,1,""],sensenumber:[79,4,1,""],sentnum:[79,4,1,""],tree:[79,5,1,""],wordnum:[79,4,1,""]},"nltk.corpus.reader.nombank.NombankPointer":{__init__:[79,3,1,""]},"nltk.corpus.reader.nombank.NombankSplitTreePointer":{__init__:[79,3,1,""],pieces:[79,4,1,""],select:[79,3,1,""]},"nltk.corpus.reader.nombank.NombankTreePointer":{__init__:[79,3,1,""],parse:[79,3,1,""],select:[79,3,1,""],treepos:[79,3,1,""]},"nltk.corpus.reader.nps_chat":{NPSChatCorpusReader:[80,2,1,""]},"nltk.corpus.reader.nps_chat.NPSChatCorpusReader":{__init__:[80,3,1,""],posts:[80,3,1,""],tagged_posts:[80,3,1,""],tagged_words:[80,3,1,""],words:[80,3,1,""],xml_posts:[80,3,1,""]},"nltk.corpus.reader.opinion_lexicon":{IgnoreReadmeCorpusView:[81,2,1,""],OpinionLexiconCorpusReader:[81,2,1,""]},"nltk.corpus.reader.opinion_lexicon.IgnoreReadmeCorpusView":{__init__:[81,3,1,""]},"nltk.corpus.reader.opinion_lexicon.OpinionLexiconCorpusReader":{CorpusView:[81,4,1,""],negative:[81,3,1,""],positive:[81,3,1,""],words:[81,3,1,""]},"nltk.corpus.reader.panlex_lite":{Meaning:[82,2,1,""],PanLexLiteCorpusReader:[82,2,1,""]},"nltk.corpus.reader.panlex_lite.Meaning":{__init__:[82,3,1,""],expressions:[82,3,1,""],id:[82,3,1,""],quality:[82,3,1,""],source:[82,3,1,""],source_group:[82,3,1,""]},"nltk.corpus.reader.panlex_lite.PanLexLiteCorpusReader":{MEANING_Q:[82,4,1,""],TRANSLATION_Q:[82,4,1,""],__init__:[82,3,1,""],language_varieties:[82,3,1,""],meanings:[82,3,1,""],translations:[82,3,1,""]},"nltk.corpus.reader.panlex_swadesh":{PanlexLanguage:[83,2,1,""],PanlexSwadeshCorpusReader:[83,2,1,""]},"nltk.corpus.reader.panlex_swadesh.PanlexLanguage":{__new__:[83,3,1,""],iso639:[83,4,1,""],iso639_type:[83,4,1,""],langvar_uid:[83,4,1,""],name:[83,4,1,""],panlex_uid:[83,4,1,""],script:[83,4,1,""]},"nltk.corpus.reader.panlex_swadesh.PanlexSwadeshCorpusReader":{__init__:[83,3,1,""],entries:[83,3,1,""],get_languages:[83,3,1,""],get_macrolanguages:[83,3,1,""],language_codes:[83,3,1,""],license:[83,3,1,""],words_by_iso639:[83,3,1,""],words_by_lang:[83,3,1,""]},"nltk.corpus.reader.pl196x":{Pl196xCorpusReader:[84,2,1,""],TEICorpusView:[84,2,1,""]},"nltk.corpus.reader.pl196x.Pl196xCorpusReader":{__init__:[84,3,1,""],decode_tag:[84,3,1,""],head_len:[84,4,1,""],paras:[84,3,1,""],sents:[84,3,1,""],tagged_paras:[84,3,1,""],tagged_sents:[84,3,1,""],tagged_words:[84,3,1,""],textids:[84,3,1,""],words:[84,3,1,""],xml:[84,3,1,""]},"nltk.corpus.reader.pl196x.TEICorpusView":{__init__:[84,3,1,""],read_block:[84,3,1,""]},"nltk.corpus.reader.plaintext":{CategorizedPlaintextCorpusReader:[85,2,1,""],EuroparlCorpusReader:[85,2,1,""],PlaintextCorpusReader:[85,2,1,""],PortugueseCategorizedPlaintextCorpusReader:[85,2,1,""]},"nltk.corpus.reader.plaintext.CategorizedPlaintextCorpusReader":{__init__:[85,3,1,""]},"nltk.corpus.reader.plaintext.EuroparlCorpusReader":{chapters:[85,3,1,""],paras:[85,3,1,""]},"nltk.corpus.reader.plaintext.PlaintextCorpusReader":{CorpusView:[85,4,1,""],__init__:[85,3,1,""],paras:[85,3,1,""],sents:[85,3,1,""],words:[85,3,1,""]},"nltk.corpus.reader.plaintext.PortugueseCategorizedPlaintextCorpusReader":{__init__:[85,3,1,""]},"nltk.corpus.reader.ppattach":{PPAttachment:[86,2,1,""],PPAttachmentCorpusReader:[86,2,1,""]},"nltk.corpus.reader.ppattach.PPAttachment":{__init__:[86,3,1,""]},"nltk.corpus.reader.ppattach.PPAttachmentCorpusReader":{attachments:[86,3,1,""],tuples:[86,3,1,""]},"nltk.corpus.reader.propbank":{PropbankChainTreePointer:[87,2,1,""],PropbankCorpusReader:[87,2,1,""],PropbankInflection:[87,2,1,""],PropbankInstance:[87,2,1,""],PropbankPointer:[87,2,1,""],PropbankSplitTreePointer:[87,2,1,""],PropbankTreePointer:[87,2,1,""]},"nltk.corpus.reader.propbank.PropbankChainTreePointer":{__init__:[87,3,1,""],pieces:[87,4,1,""],select:[87,3,1,""]},"nltk.corpus.reader.propbank.PropbankCorpusReader":{__init__:[87,3,1,""],instances:[87,3,1,""],lines:[87,3,1,""],roleset:[87,3,1,""],rolesets:[87,3,1,""],verbs:[87,3,1,""]},"nltk.corpus.reader.propbank.PropbankInflection":{ACTIVE:[87,4,1,""],FINITE:[87,4,1,""],FUTURE:[87,4,1,""],GERUND:[87,4,1,""],INFINITIVE:[87,4,1,""],NONE:[87,4,1,""],PARTICIPLE:[87,4,1,""],PASSIVE:[87,4,1,""],PAST:[87,4,1,""],PERFECT:[87,4,1,""],PERFECT_AND_PROGRESSIVE:[87,4,1,""],PRESENT:[87,4,1,""],PROGRESSIVE:[87,4,1,""],THIRD_PERSON:[87,4,1,""],__init__:[87,3,1,""],parse:[87,3,1,""]},"nltk.corpus.reader.propbank.PropbankInstance":{__init__:[87,3,1,""],arguments:[87,4,1,""],baseform:[87,5,1,""],fileid:[87,4,1,""],inflection:[87,4,1,""],parse:[87,3,1,""],parse_corpus:[87,4,1,""],predicate:[87,4,1,""],predid:[87,5,1,""],roleset:[87,4,1,""],sensenumber:[87,5,1,""],sentnum:[87,4,1,""],tagger:[87,4,1,""],tree:[87,5,1,""],wordnum:[87,4,1,""]},"nltk.corpus.reader.propbank.PropbankPointer":{__init__:[87,3,1,""]},"nltk.corpus.reader.propbank.PropbankSplitTreePointer":{__init__:[87,3,1,""],pieces:[87,4,1,""],select:[87,3,1,""]},"nltk.corpus.reader.propbank.PropbankTreePointer":{__init__:[87,3,1,""],parse:[87,3,1,""],select:[87,3,1,""],treepos:[87,3,1,""]},"nltk.corpus.reader.pros_cons":{ProsConsCorpusReader:[88,2,1,""]},"nltk.corpus.reader.pros_cons.ProsConsCorpusReader":{CorpusView:[88,4,1,""],__init__:[88,3,1,""],sents:[88,3,1,""],words:[88,3,1,""]},"nltk.corpus.reader.reviews":{Review:[89,2,1,""],ReviewLine:[89,2,1,""],ReviewsCorpusReader:[89,2,1,""]},"nltk.corpus.reader.reviews.Review":{__init__:[89,3,1,""],add_line:[89,3,1,""],features:[89,3,1,""],sents:[89,3,1,""]},"nltk.corpus.reader.reviews.ReviewLine":{__init__:[89,3,1,""]},"nltk.corpus.reader.reviews.ReviewsCorpusReader":{CorpusView:[89,4,1,""],__init__:[89,3,1,""],features:[89,3,1,""],reviews:[89,3,1,""],sents:[89,3,1,""],words:[89,3,1,""]},"nltk.corpus.reader.rte":{RTECorpusReader:[90,2,1,""],RTEPair:[90,2,1,""],norm:[90,1,1,""]},"nltk.corpus.reader.rte.RTECorpusReader":{pairs:[90,3,1,""]},"nltk.corpus.reader.rte.RTEPair":{__init__:[90,3,1,""]},"nltk.corpus.reader.semcor":{SemcorCorpusReader:[91,2,1,""],SemcorSentence:[91,2,1,""],SemcorWordView:[91,2,1,""]},"nltk.corpus.reader.semcor.SemcorCorpusReader":{__init__:[91,3,1,""],chunk_sents:[91,3,1,""],chunks:[91,3,1,""],sents:[91,3,1,""],tagged_chunks:[91,3,1,""],tagged_sents:[91,3,1,""],words:[91,3,1,""]},"nltk.corpus.reader.semcor.SemcorSentence":{__init__:[91,3,1,""]},"nltk.corpus.reader.semcor.SemcorWordView":{__init__:[91,3,1,""],handle_elt:[91,3,1,""],handle_sent:[91,3,1,""],handle_word:[91,3,1,""]},"nltk.corpus.reader.senseval":{SensevalCorpusReader:[92,2,1,""],SensevalCorpusView:[92,2,1,""],SensevalInstance:[92,2,1,""]},"nltk.corpus.reader.senseval.SensevalCorpusReader":{instances:[92,3,1,""]},"nltk.corpus.reader.senseval.SensevalCorpusView":{__init__:[92,3,1,""],read_block:[92,3,1,""]},"nltk.corpus.reader.senseval.SensevalInstance":{__init__:[92,3,1,""]},"nltk.corpus.reader.sentiwordnet":{SentiSynset:[93,2,1,""],SentiWordNetCorpusReader:[93,2,1,""]},"nltk.corpus.reader.sentiwordnet.SentiSynset":{__init__:[93,3,1,""],neg_score:[93,3,1,""],obj_score:[93,3,1,""],pos_score:[93,3,1,""]},"nltk.corpus.reader.sentiwordnet.SentiWordNetCorpusReader":{__init__:[93,3,1,""],all_senti_synsets:[93,3,1,""],senti_synset:[93,3,1,""],senti_synsets:[93,3,1,""]},"nltk.corpus.reader.sinica_treebank":{SinicaTreebankCorpusReader:[94,2,1,""]},"nltk.corpus.reader.string_category":{StringCategoryCorpusReader:[95,2,1,""]},"nltk.corpus.reader.string_category.StringCategoryCorpusReader":{__init__:[95,3,1,""],tuples:[95,3,1,""]},"nltk.corpus.reader.switchboard":{SwitchboardCorpusReader:[96,2,1,""],SwitchboardTurn:[96,2,1,""]},"nltk.corpus.reader.switchboard.SwitchboardCorpusReader":{__init__:[96,3,1,""],discourses:[96,3,1,""],tagged_discourses:[96,3,1,""],tagged_turns:[96,3,1,""],tagged_words:[96,3,1,""],turns:[96,3,1,""],words:[96,3,1,""]},"nltk.corpus.reader.switchboard.SwitchboardTurn":{__init__:[96,3,1,""]},"nltk.corpus.reader.tagged":{CategorizedTaggedCorpusReader:[97,2,1,""],MacMorphoCorpusReader:[97,2,1,""],TaggedCorpusReader:[97,2,1,""],TaggedCorpusView:[97,2,1,""],TimitTaggedCorpusReader:[97,2,1,""]},"nltk.corpus.reader.tagged.CategorizedTaggedCorpusReader":{__init__:[97,3,1,""],tagged_paras:[97,3,1,""],tagged_sents:[97,3,1,""],tagged_words:[97,3,1,""]},"nltk.corpus.reader.tagged.MacMorphoCorpusReader":{__init__:[97,3,1,""]},"nltk.corpus.reader.tagged.TaggedCorpusReader":{__init__:[97,3,1,""],paras:[97,3,1,""],sents:[97,3,1,""],tagged_paras:[97,3,1,""],tagged_sents:[97,3,1,""],tagged_words:[97,3,1,""],words:[97,3,1,""]},"nltk.corpus.reader.tagged.TaggedCorpusView":{__init__:[97,3,1,""],read_block:[97,3,1,""]},"nltk.corpus.reader.tagged.TimitTaggedCorpusReader":{__init__:[97,3,1,""],paras:[97,3,1,""],tagged_paras:[97,3,1,""]},"nltk.corpus.reader.timit":{SpeakerInfo:[98,2,1,""],TimitCorpusReader:[98,2,1,""],read_timit_block:[98,1,1,""]},"nltk.corpus.reader.timit.SpeakerInfo":{__init__:[98,3,1,""]},"nltk.corpus.reader.timit.TimitCorpusReader":{__init__:[98,3,1,""],audiodata:[98,3,1,""],fileids:[98,3,1,""],phone_times:[98,3,1,""],phone_trees:[98,3,1,""],phones:[98,3,1,""],play:[98,3,1,""],sent_times:[98,3,1,""],sentid:[98,3,1,""],sents:[98,3,1,""],spkrid:[98,3,1,""],spkrinfo:[98,3,1,""],spkrutteranceids:[98,3,1,""],transcription_dict:[98,3,1,""],utterance:[98,3,1,""],utteranceids:[98,3,1,""],wav:[98,3,1,""],word_times:[98,3,1,""],words:[98,3,1,""]},"nltk.corpus.reader.toolbox":{ToolboxCorpusReader:[99,2,1,""],demo:[99,1,1,""]},"nltk.corpus.reader.toolbox.ToolboxCorpusReader":{entries:[99,3,1,""],fields:[99,3,1,""],words:[99,3,1,""],xml:[99,3,1,""]},"nltk.corpus.reader.twitter":{TwitterCorpusReader:[100,2,1,""]},"nltk.corpus.reader.twitter.TwitterCorpusReader":{CorpusView:[100,4,1,""],__init__:[100,3,1,""],docs:[100,3,1,""],strings:[100,3,1,""],tokenized:[100,3,1,""]},"nltk.corpus.reader.udhr":{UdhrCorpusReader:[101,2,1,""]},"nltk.corpus.reader.udhr.UdhrCorpusReader":{ENCODINGS:[101,4,1,""],SKIP:[101,4,1,""],__init__:[101,3,1,""]},"nltk.corpus.reader.util":{ConcatenatedCorpusView:[102,2,1,""],PickleCorpusView:[102,2,1,""],StreamBackedCorpusView:[102,2,1,""],concat:[102,1,1,""],find_corpus_fileids:[102,1,1,""],read_alignedsent_block:[102,1,1,""],read_blankline_block:[102,1,1,""],read_line_block:[102,1,1,""],read_regexp_block:[102,1,1,""],read_sexpr_block:[102,1,1,""],read_whitespace_block:[102,1,1,""],read_wordpunct_block:[102,1,1,""],tagged_treebank_para_block_reader:[102,1,1,""]},"nltk.corpus.reader.util.ConcatenatedCorpusView":{__init__:[102,3,1,""],close:[102,3,1,""],iterate_from:[102,3,1,""]},"nltk.corpus.reader.util.PickleCorpusView":{BLOCK_SIZE:[102,4,1,""],PROTOCOL:[102,4,1,""],__init__:[102,3,1,""],cache_to_tempfile:[102,3,1,""],read_block:[102,3,1,""],write:[102,3,1,""]},"nltk.corpus.reader.util.StreamBackedCorpusView":{__init__:[102,3,1,""],close:[102,3,1,""],fileid:[102,5,1,""],iterate_from:[102,3,1,""],read_block:[102,3,1,""]},"nltk.corpus.reader.verbnet":{VerbnetCorpusReader:[103,2,1,""]},"nltk.corpus.reader.verbnet.VerbnetCorpusReader":{__init__:[103,3,1,""],classids:[103,3,1,""],fileids:[103,3,1,""],frames:[103,3,1,""],lemmas:[103,3,1,""],longid:[103,3,1,""],pprint:[103,3,1,""],pprint_frames:[103,3,1,""],pprint_members:[103,3,1,""],pprint_subclasses:[103,3,1,""],pprint_themroles:[103,3,1,""],shortid:[103,3,1,""],subclasses:[103,3,1,""],themroles:[103,3,1,""],vnclass:[103,3,1,""],wordnetids:[103,3,1,""]},"nltk.corpus.reader.wordlist":{MWAPPDBCorpusReader:[104,2,1,""],NonbreakingPrefixesCorpusReader:[104,2,1,""],SwadeshCorpusReader:[104,2,1,""],UnicharsCorpusReader:[104,2,1,""],WordListCorpusReader:[104,2,1,""]},"nltk.corpus.reader.wordlist.MWAPPDBCorpusReader":{entries:[104,3,1,""],mwa_ppdb_xxxl_file:[104,4,1,""]},"nltk.corpus.reader.wordlist.NonbreakingPrefixesCorpusReader":{available_langs:[104,4,1,""],words:[104,3,1,""]},"nltk.corpus.reader.wordlist.SwadeshCorpusReader":{entries:[104,3,1,""]},"nltk.corpus.reader.wordlist.UnicharsCorpusReader":{available_categories:[104,4,1,""],chars:[104,3,1,""]},"nltk.corpus.reader.wordlist.WordListCorpusReader":{words:[104,3,1,""]},"nltk.corpus.reader.wordnet":{Lemma:[105,2,1,""],Synset:[105,2,1,""],WordNetCorpusReader:[105,2,1,""],WordNetError:[105,6,1,""],WordNetICCorpusReader:[105,2,1,""],information_content:[105,1,1,""],jcn_similarity:[105,1,1,""],lch_similarity:[105,1,1,""],lin_similarity:[105,1,1,""],path_similarity:[105,1,1,""],res_similarity:[105,1,1,""],wup_similarity:[105,1,1,""]},"nltk.corpus.reader.wordnet.Lemma":{__init__:[105,3,1,""],antonyms:[105,3,1,""],count:[105,3,1,""],derivationally_related_forms:[105,3,1,""],frame_ids:[105,3,1,""],frame_strings:[105,3,1,""],key:[105,3,1,""],lang:[105,3,1,""],name:[105,3,1,""],pertainyms:[105,3,1,""],synset:[105,3,1,""],syntactic_marker:[105,3,1,""]},"nltk.corpus.reader.wordnet.Synset":{__init__:[105,3,1,""],acyclic_tree:[105,3,1,""],closure:[105,3,1,""],common_hypernyms:[105,3,1,""],definition:[105,3,1,""],examples:[105,3,1,""],frame_ids:[105,3,1,""],hypernym_distances:[105,3,1,""],hypernym_paths:[105,3,1,""],jcn_similarity:[105,3,1,""],lch_similarity:[105,3,1,""],lemma_names:[105,3,1,""],lemmas:[105,3,1,""],lexname:[105,3,1,""],lin_similarity:[105,3,1,""],lowest_common_hypernyms:[105,3,1,""],max_depth:[105,3,1,""],min_depth:[105,3,1,""],mst:[105,3,1,""],name:[105,3,1,""],offset:[105,3,1,""],path_similarity:[105,3,1,""],pos:[105,3,1,""],res_similarity:[105,3,1,""],root_hypernyms:[105,3,1,""],shortest_path_distance:[105,3,1,""],tree:[105,3,1,""],wup_similarity:[105,3,1,""]},"nltk.corpus.reader.wordnet.WordNetCorpusReader":{ADJ:[105,4,1,""],ADJ_SAT:[105,4,1,""],ADV:[105,4,1,""],MORPHOLOGICAL_SUBSTITUTIONS:[105,4,1,""],NOUN:[105,4,1,""],VERB:[105,4,1,""],__init__:[105,3,1,""],all_lemma_names:[105,3,1,""],all_synsets:[105,3,1,""],citation:[105,3,1,""],custom_lemmas:[105,3,1,""],digraph:[105,3,1,""],get_version:[105,3,1,""],ic:[105,3,1,""],jcn_similarity:[105,3,1,""],langs:[105,3,1,""],lch_similarity:[105,3,1,""],lemma:[105,3,1,""],lemma_count:[105,3,1,""],lemma_from_key:[105,3,1,""],lemmas:[105,3,1,""],license:[105,3,1,""],lin_similarity:[105,3,1,""],morphy:[105,3,1,""],of2ss:[105,3,1,""],path_similarity:[105,3,1,""],readme:[105,3,1,""],res_similarity:[105,3,1,""],ss2of:[105,3,1,""],synset:[105,3,1,""],synset_from_pos_and_offset:[105,3,1,""],synset_from_sense_key:[105,3,1,""],synsets:[105,3,1,""],words:[105,3,1,""],wup_similarity:[105,3,1,""]},"nltk.corpus.reader.wordnet.WordNetICCorpusReader":{__init__:[105,3,1,""],ic:[105,3,1,""]},"nltk.corpus.reader.xmldocs":{XMLCorpusReader:[106,2,1,""],XMLCorpusView:[106,2,1,""]},"nltk.corpus.reader.xmldocs.XMLCorpusReader":{__init__:[106,3,1,""],words:[106,3,1,""],xml:[106,3,1,""]},"nltk.corpus.reader.xmldocs.XMLCorpusView":{__init__:[106,3,1,""],handle_elt:[106,3,1,""],read_block:[106,3,1,""]},"nltk.corpus.reader.ycoe":{YCOECorpusReader:[107,2,1,""],YCOEParseCorpusReader:[107,2,1,""],YCOETaggedCorpusReader:[107,2,1,""],documents:[107,7,1,""]},"nltk.corpus.reader.ycoe.YCOECorpusReader":{__init__:[107,3,1,""],documents:[107,3,1,""],fileids:[107,3,1,""],paras:[107,3,1,""],parsed_sents:[107,3,1,""],sents:[107,3,1,""],tagged_paras:[107,3,1,""],tagged_sents:[107,3,1,""],tagged_words:[107,3,1,""],words:[107,3,1,""]},"nltk.corpus.reader.ycoe.YCOETaggedCorpusReader":{__init__:[107,3,1,""]},"nltk.corpus.util":{LazyCorpusLoader:[108,2,1,""]},"nltk.corpus.util.LazyCorpusLoader":{__init__:[108,3,1,""]},"nltk.data":{AUTO_FORMATS:[109,7,1,""],BufferedGzipFile:[109,1,1,""],FORMATS:[109,7,1,""],FileSystemPathPointer:[109,2,1,""],GzipFileSystemPathPointer:[109,2,1,""],LazyLoader:[109,2,1,""],OpenOnDemandZipFile:[109,2,1,""],PathPointer:[109,2,1,""],SeekableUnicodeStreamReader:[109,2,1,""],clear_cache:[109,1,1,""],find:[109,1,1,""],load:[109,1,1,""],path:[109,7,1,""],retrieve:[109,1,1,""],show_cfg:[109,1,1,""]},"nltk.data.FileSystemPathPointer":{__init__:[109,3,1,""],file_size:[109,3,1,""],join:[109,3,1,""],open:[109,3,1,""],path:[109,5,1,""]},"nltk.data.GzipFileSystemPathPointer":{open:[109,3,1,""]},"nltk.data.LazyLoader":{__init__:[109,3,1,""]},"nltk.data.OpenOnDemandZipFile":{__init__:[109,3,1,""],read:[109,3,1,""],write:[109,3,1,""],writestr:[109,3,1,""]},"nltk.data.PathPointer":{file_size:[109,3,1,""],join:[109,3,1,""],open:[109,3,1,""]},"nltk.data.SeekableUnicodeStreamReader":{DEBUG:[109,4,1,""],__init__:[109,3,1,""],bytebuffer:[109,4,1,""],char_seek_forward:[109,3,1,""],close:[109,3,1,""],closed:[109,5,1,""],decode:[109,4,1,""],discard_line:[109,3,1,""],encoding:[109,4,1,""],errors:[109,4,1,""],linebuffer:[109,4,1,""],mode:[109,5,1,""],name:[109,5,1,""],next:[109,3,1,""],read:[109,3,1,""],readline:[109,3,1,""],readlines:[109,3,1,""],seek:[109,3,1,""],stream:[109,4,1,""],tell:[109,3,1,""],xreadlines:[109,3,1,""]},"nltk.decorators":{decorator:[110,1,1,""],getinfo:[110,1,1,""],new_wrapper:[110,1,1,""]},"nltk.downloader":{Collection:[111,2,1,""],Downloader:[111,2,1,""],DownloaderGUI:[111,2,1,""],DownloaderMessage:[111,2,1,""],DownloaderShell:[111,2,1,""],ErrorMessage:[111,2,1,""],FinishCollectionMessage:[111,2,1,""],FinishDownloadMessage:[111,2,1,""],FinishPackageMessage:[111,2,1,""],FinishUnzipMessage:[111,2,1,""],Package:[111,2,1,""],ProgressMessage:[111,2,1,""],SelectDownloadDirMessage:[111,2,1,""],StaleMessage:[111,2,1,""],StartCollectionMessage:[111,2,1,""],StartDownloadMessage:[111,2,1,""],StartPackageMessage:[111,2,1,""],StartUnzipMessage:[111,2,1,""],UpToDateMessage:[111,2,1,""],build_index:[111,1,1,""],download:[111,1,1,""],download_gui:[111,1,1,""],download_shell:[111,1,1,""],md5_hexdigest:[111,1,1,""],unzip:[111,1,1,""],update:[111,1,1,""]},"nltk.downloader.Collection":{__init__:[111,3,1,""],children:[111,4,1,""],fromxml:[111,3,1,""],id:[111,4,1,""],name:[111,4,1,""],packages:[111,4,1,""]},"nltk.downloader.Downloader":{DEFAULT_URL:[111,4,1,""],INDEX_TIMEOUT:[111,4,1,""],INSTALLED:[111,4,1,""],NOT_INSTALLED:[111,4,1,""],PARTIAL:[111,4,1,""],STALE:[111,4,1,""],__init__:[111,3,1,""],clear_status_cache:[111,3,1,""],collections:[111,3,1,""],corpora:[111,3,1,""],default_download_dir:[111,3,1,""],download:[111,3,1,""],download_dir:[111,5,1,""],incr_download:[111,3,1,""],index:[111,3,1,""],info:[111,3,1,""],is_installed:[111,3,1,""],is_stale:[111,3,1,""],list:[111,3,1,""],models:[111,3,1,""],packages:[111,3,1,""],status:[111,3,1,""],update:[111,3,1,""],url:[111,5,1,""],xmlinfo:[111,3,1,""]},"nltk.downloader.DownloaderGUI":{COLUMNS:[111,4,1,""],COLUMN_WEIGHTS:[111,4,1,""],COLUMN_WIDTHS:[111,4,1,""],DEFAULT_COLUMN_WIDTH:[111,4,1,""],HELP:[111,4,1,""],INITIAL_COLUMNS:[111,4,1,""],__init__:[111,3,1,""],about:[111,3,1,""],c:[111,4,1,""],destroy:[111,3,1,""],help:[111,3,1,""],mainloop:[111,3,1,""]},"nltk.downloader.DownloaderShell":{__init__:[111,3,1,""],run:[111,3,1,""]},"nltk.downloader.ErrorMessage":{__init__:[111,3,1,""]},"nltk.downloader.FinishCollectionMessage":{__init__:[111,3,1,""]},"nltk.downloader.FinishDownloadMessage":{__init__:[111,3,1,""]},"nltk.downloader.FinishPackageMessage":{__init__:[111,3,1,""]},"nltk.downloader.FinishUnzipMessage":{__init__:[111,3,1,""]},"nltk.downloader.Package":{__init__:[111,3,1,""],author:[111,4,1,""],checksum:[111,4,1,""],contact:[111,4,1,""],copyright:[111,4,1,""],filename:[111,4,1,""],fromxml:[111,3,1,""],id:[111,4,1,""],license:[111,4,1,""],name:[111,4,1,""],size:[111,4,1,""],subdir:[111,4,1,""],svn_revision:[111,4,1,""],unzip:[111,4,1,""],unzipped_size:[111,4,1,""],url:[111,4,1,""]},"nltk.downloader.ProgressMessage":{__init__:[111,3,1,""]},"nltk.downloader.SelectDownloadDirMessage":{__init__:[111,3,1,""]},"nltk.downloader.StaleMessage":{__init__:[111,3,1,""]},"nltk.downloader.StartCollectionMessage":{__init__:[111,3,1,""]},"nltk.downloader.StartDownloadMessage":{__init__:[111,3,1,""]},"nltk.downloader.StartPackageMessage":{__init__:[111,3,1,""]},"nltk.downloader.StartUnzipMessage":{__init__:[111,3,1,""]},"nltk.downloader.UpToDateMessage":{__init__:[111,3,1,""]},"nltk.draw":{cfg:[113,0,0,"-"],dispersion:[114,0,0,"-"],table:[115,0,0,"-"],tree:[116,0,0,"-"],util:[117,0,0,"-"]},"nltk.draw.cfg":{CFGDemo:[113,2,1,""],CFGEditor:[113,2,1,""],ProductionList:[113,2,1,""],demo2:[113,1,1,""],demo3:[113,1,1,""],demo:[113,1,1,""]},"nltk.draw.cfg.CFGDemo":{__init__:[113,3,1,""],destroy:[113,3,1,""],mainloop:[113,3,1,""],reset_workspace:[113,3,1,""],workspace_markprod:[113,3,1,""]},"nltk.draw.cfg.CFGEditor":{ARROW:[113,4,1,""],__init__:[113,3,1,""]},"nltk.draw.cfg.ProductionList":{ARROW:[113,4,1,""]},"nltk.draw.dispersion":{dispersion_plot:[114,1,1,""]},"nltk.draw.table":{MultiListbox:[115,2,1,""],Table:[115,2,1,""],demo:[115,1,1,""]},"nltk.draw.table.MultiListbox":{"delete":[115,3,1,""],FRAME_CONFIG:[115,4,1,""],LABEL_CONFIG:[115,4,1,""],LISTBOX_CONFIG:[115,4,1,""],__init__:[115,3,1,""],activate:[115,3,1,""],bbox:[115,3,1,""],bind_to_columns:[115,3,1,""],bind_to_labels:[115,3,1,""],bind_to_listboxes:[115,3,1,""],column_labels:[115,5,1,""],column_names:[115,5,1,""],columnconfig:[115,3,1,""],columnconfigure:[115,3,1,""],configure:[115,3,1,""],curselection:[115,3,1,""],get:[115,3,1,""],hide_column:[115,3,1,""],index:[115,3,1,""],insert:[115,3,1,""],itemcget:[115,3,1,""],itemconfig:[115,3,1,""],itemconfigure:[115,3,1,""],listboxes:[115,5,1,""],nearest:[115,3,1,""],rowconfig:[115,3,1,""],rowconfigure:[115,3,1,""],scan_dragto:[115,3,1,""],scan_mark:[115,3,1,""],see:[115,3,1,""],select:[115,3,1,""],select_anchor:[115,3,1,""],select_clear:[115,3,1,""],select_includes:[115,3,1,""],select_set:[115,3,1,""],selection_anchor:[115,3,1,""],selection_clear:[115,3,1,""],selection_includes:[115,3,1,""],selection_set:[115,3,1,""],show_column:[115,3,1,""],size:[115,3,1,""],yview:[115,3,1,""],yview_moveto:[115,3,1,""],yview_scroll:[115,3,1,""]},"nltk.draw.table.Table":{__init__:[115,3,1,""],append:[115,3,1,""],bind:[115,3,1,""],bind_to_columns:[115,3,1,""],bind_to_labels:[115,3,1,""],bind_to_listboxes:[115,3,1,""],clear:[115,3,1,""],column_index:[115,3,1,""],column_names:[115,5,1,""],columnconfig:[115,3,1,""],columnconfigure:[115,3,1,""],extend:[115,3,1,""],focus:[115,3,1,""],grid:[115,3,1,""],hide_column:[115,3,1,""],insert:[115,3,1,""],itemconfig:[115,3,1,""],itemconfigure:[115,3,1,""],pack:[115,3,1,""],rowconfig:[115,3,1,""],rowconfigure:[115,3,1,""],select:[115,3,1,""],selected_row:[115,3,1,""],show_column:[115,3,1,""],sort_by:[115,3,1,""]},"nltk.draw.tree":{TreeSegmentWidget:[116,2,1,""],TreeView:[116,2,1,""],TreeWidget:[116,2,1,""],demo:[116,1,1,""],draw_trees:[116,1,1,""],tree_to_treesegment:[116,1,1,""]},"nltk.draw.tree.TreeSegmentWidget":{__init__:[116,3,1,""],insert_child:[116,3,1,""],label:[116,3,1,""],remove_child:[116,3,1,""],replace_child:[116,3,1,""],set_label:[116,3,1,""],subtrees:[116,3,1,""]},"nltk.draw.tree.TreeView":{__init__:[116,3,1,""],destroy:[116,3,1,""],mainloop:[116,3,1,""],resize:[116,3,1,""]},"nltk.draw.tree.TreeWidget":{__init__:[116,3,1,""],bind_click_leaves:[116,3,1,""],bind_click_nodes:[116,3,1,""],bind_click_trees:[116,3,1,""],bind_drag_leaves:[116,3,1,""],bind_drag_nodes:[116,3,1,""],bind_drag_trees:[116,3,1,""],collapsed_tree:[116,3,1,""],expanded_tree:[116,3,1,""],toggle_collapsed:[116,3,1,""]},"nltk.draw.util":{AbstractContainerWidget:[117,2,1,""],BoxWidget:[117,2,1,""],BracketWidget:[117,2,1,""],CanvasFrame:[117,2,1,""],CanvasWidget:[117,2,1,""],ColorizedList:[117,2,1,""],EntryDialog:[117,2,1,""],MutableOptionMenu:[117,2,1,""],OvalWidget:[117,2,1,""],ParenWidget:[117,2,1,""],ScrollWatcherWidget:[117,2,1,""],SequenceWidget:[117,2,1,""],ShowText:[117,2,1,""],SpaceWidget:[117,2,1,""],StackWidget:[117,2,1,""],SymbolWidget:[117,2,1,""],TextWidget:[117,2,1,""],demo:[117,1,1,""]},"nltk.draw.util.AbstractContainerWidget":{__init__:[117,3,1,""],child:[117,3,1,""],set_child:[117,3,1,""]},"nltk.draw.util.BoxWidget":{__init__:[117,3,1,""]},"nltk.draw.util.BracketWidget":{__init__:[117,3,1,""]},"nltk.draw.util.CanvasFrame":{__init__:[117,3,1,""],add_widget:[117,3,1,""],canvas:[117,3,1,""],destroy:[117,3,1,""],destroy_widget:[117,3,1,""],mainloop:[117,3,1,""],pack:[117,3,1,""],print_to_file:[117,3,1,""],remove_widget:[117,3,1,""],scrollregion:[117,3,1,""]},"nltk.draw.util.CanvasWidget":{__init__:[117,3,1,""],bbox:[117,3,1,""],bind_click:[117,3,1,""],bind_drag:[117,3,1,""],canvas:[117,3,1,""],child_widgets:[117,3,1,""],destroy:[117,3,1,""],height:[117,3,1,""],hidden:[117,3,1,""],hide:[117,3,1,""],manage:[117,3,1,""],move:[117,3,1,""],moveto:[117,3,1,""],parent:[117,3,1,""],show:[117,3,1,""],tags:[117,3,1,""],unbind_click:[117,3,1,""],unbind_drag:[117,3,1,""],update:[117,3,1,""],width:[117,3,1,""]},"nltk.draw.util.ColorizedList":{__init__:[117,3,1,""],add_callback:[117,3,1,""],focus:[117,3,1,""],get:[117,3,1,""],grid:[117,3,1,""],mark:[117,3,1,""],markonly:[117,3,1,""],pack:[117,3,1,""],remove_callback:[117,3,1,""],set:[117,3,1,""],unmark:[117,3,1,""],view:[117,3,1,""]},"nltk.draw.util.EntryDialog":{__init__:[117,3,1,""]},"nltk.draw.util.MutableOptionMenu":{__init__:[117,3,1,""],add:[117,3,1,""],destroy:[117,3,1,""],remove:[117,3,1,""],set:[117,3,1,""]},"nltk.draw.util.OvalWidget":{RATIO:[117,4,1,""],__init__:[117,3,1,""]},"nltk.draw.util.ParenWidget":{__init__:[117,3,1,""]},"nltk.draw.util.ScrollWatcherWidget":{__init__:[117,3,1,""],add_child:[117,3,1,""],remove_child:[117,3,1,""]},"nltk.draw.util.SequenceWidget":{__init__:[117,3,1,""],children:[117,3,1,""],insert_child:[117,3,1,""],remove_child:[117,3,1,""],replace_child:[117,3,1,""]},"nltk.draw.util.ShowText":{__init__:[117,3,1,""],destroy:[117,3,1,""],find_dimentions:[117,3,1,""],mainloop:[117,3,1,""]},"nltk.draw.util.SpaceWidget":{__init__:[117,3,1,""],set_height:[117,3,1,""],set_width:[117,3,1,""]},"nltk.draw.util.StackWidget":{__init__:[117,3,1,""],children:[117,3,1,""],insert_child:[117,3,1,""],remove_child:[117,3,1,""],replace_child:[117,3,1,""]},"nltk.draw.util.SymbolWidget":{SYMBOLS:[117,4,1,""],__init__:[117,3,1,""],set_symbol:[117,3,1,""],symbol:[117,3,1,""],symbolsheet:[117,3,1,""]},"nltk.draw.util.TextWidget":{__init__:[117,3,1,""],set_text:[117,3,1,""],text:[117,3,1,""]},"nltk.featstruct":{FeatDict:[118,2,1,""],FeatList:[118,2,1,""],FeatStruct:[118,2,1,""],FeatStructReader:[118,2,1,""],Feature:[118,2,1,""],RangeFeature:[118,2,1,""],SlashFeature:[118,2,1,""],conflicts:[118,1,1,""],subsumes:[118,1,1,""],unify:[118,1,1,""]},"nltk.featstruct.FeatDict":{__init__:[118,3,1,""],clear:[118,3,1,""],get:[118,3,1,""],has_key:[118,3,1,""],pop:[118,3,1,""],popitem:[118,3,1,""],setdefault:[118,3,1,""],update:[118,3,1,""]},"nltk.featstruct.FeatList":{__init__:[118,3,1,""],append:[118,3,1,""],extend:[118,3,1,""],insert:[118,3,1,""],pop:[118,3,1,""],remove:[118,3,1,""],reverse:[118,3,1,""],sort:[118,3,1,""]},"nltk.featstruct.FeatStruct":{__new__:[118,3,1,""],copy:[118,3,1,""],cyclic:[118,3,1,""],equal_values:[118,3,1,""],freeze:[118,3,1,""],frozen:[118,3,1,""],remove_variables:[118,3,1,""],rename_variables:[118,3,1,""],retract_bindings:[118,3,1,""],substitute_bindings:[118,3,1,""],subsumes:[118,3,1,""],unify:[118,3,1,""],variables:[118,3,1,""],walk:[118,3,1,""]},"nltk.featstruct.FeatStructReader":{VALUE_HANDLERS:[118,4,1,""],__init__:[118,3,1,""],fromstring:[118,3,1,""],read_app_value:[118,3,1,""],read_fstruct_value:[118,3,1,""],read_int_value:[118,3,1,""],read_logic_value:[118,3,1,""],read_partial:[118,3,1,""],read_set_value:[118,3,1,""],read_str_value:[118,3,1,""],read_sym_value:[118,3,1,""],read_tuple_value:[118,3,1,""],read_value:[118,3,1,""],read_var_value:[118,3,1,""]},"nltk.featstruct.Feature":{"default":[118,5,1,""],__init__:[118,3,1,""],display:[118,5,1,""],name:[118,5,1,""],read_value:[118,3,1,""],unify_base_values:[118,3,1,""]},"nltk.featstruct.RangeFeature":{RANGE_RE:[118,4,1,""],read_value:[118,3,1,""],unify_base_values:[118,3,1,""]},"nltk.featstruct.SlashFeature":{read_value:[118,3,1,""]},"nltk.grammar":{CFG:[119,2,1,""],DependencyGrammar:[119,2,1,""],DependencyProduction:[119,2,1,""],Nonterminal:[119,2,1,""],PCFG:[119,2,1,""],ProbabilisticDependencyGrammar:[119,2,1,""],ProbabilisticProduction:[119,2,1,""],Production:[119,2,1,""],induce_pcfg:[119,1,1,""],nonterminals:[119,1,1,""],read_grammar:[119,1,1,""]},"nltk.grammar.CFG":{__init__:[119,3,1,""],binarize:[119,3,1,""],check_coverage:[119,3,1,""],chomsky_normal_form:[119,3,1,""],eliminate_start:[119,3,1,""],fromstring:[119,3,1,""],is_binarised:[119,3,1,""],is_chomsky_normal_form:[119,3,1,""],is_flexible_chomsky_normal_form:[119,3,1,""],is_leftcorner:[119,3,1,""],is_lexical:[119,3,1,""],is_nonempty:[119,3,1,""],is_nonlexical:[119,3,1,""],leftcorner_parents:[119,3,1,""],leftcorners:[119,3,1,""],max_len:[119,3,1,""],min_len:[119,3,1,""],productions:[119,3,1,""],remove_unitary_rules:[119,3,1,""],start:[119,3,1,""]},"nltk.grammar.DependencyGrammar":{__init__:[119,3,1,""],contains:[119,3,1,""],fromstring:[119,3,1,""]},"nltk.grammar.Nonterminal":{__init__:[119,3,1,""],symbol:[119,3,1,""]},"nltk.grammar.PCFG":{EPSILON:[119,4,1,""],__init__:[119,3,1,""],fromstring:[119,3,1,""]},"nltk.grammar.ProbabilisticDependencyGrammar":{__init__:[119,3,1,""],contains:[119,3,1,""]},"nltk.grammar.ProbabilisticProduction":{__init__:[119,3,1,""]},"nltk.grammar.Production":{__init__:[119,3,1,""],is_lexical:[119,3,1,""],is_nonlexical:[119,3,1,""],lhs:[119,3,1,""],rhs:[119,3,1,""]},"nltk.help":{brown_tagset:[120,1,1,""],claws5_tagset:[120,1,1,""],upenn_tagset:[120,1,1,""]},"nltk.inference":{api:[122,0,0,"-"],discourse:[123,0,0,"-"],mace:[124,0,0,"-"],nonmonotonic:[125,0,0,"-"],prover9:[126,0,0,"-"],resolution:[127,0,0,"-"],tableau:[128,0,0,"-"]},"nltk.inference.api":{BaseModelBuilderCommand:[122,2,1,""],BaseProverCommand:[122,2,1,""],BaseTheoremToolCommand:[122,2,1,""],ModelBuilder:[122,2,1,""],ModelBuilderCommand:[122,2,1,""],ModelBuilderCommandDecorator:[122,2,1,""],ParallelProverBuilder:[122,2,1,""],ParallelProverBuilderCommand:[122,2,1,""],Prover:[122,2,1,""],ProverCommand:[122,2,1,""],ProverCommandDecorator:[122,2,1,""],TheoremToolCommand:[122,2,1,""],TheoremToolCommandDecorator:[122,2,1,""],TheoremToolThread:[122,2,1,""]},"nltk.inference.api.BaseModelBuilderCommand":{__init__:[122,3,1,""],build_model:[122,3,1,""],get_model_builder:[122,3,1,""],model:[122,3,1,""]},"nltk.inference.api.BaseProverCommand":{__init__:[122,3,1,""],decorate_proof:[122,3,1,""],get_prover:[122,3,1,""],proof:[122,3,1,""],prove:[122,3,1,""]},"nltk.inference.api.BaseTheoremToolCommand":{__init__:[122,3,1,""],add_assumptions:[122,3,1,""],assumptions:[122,3,1,""],goal:[122,3,1,""],print_assumptions:[122,3,1,""],retract_assumptions:[122,3,1,""]},"nltk.inference.api.ModelBuilder":{build_model:[122,3,1,""]},"nltk.inference.api.ModelBuilderCommand":{build_model:[122,3,1,""],get_model_builder:[122,3,1,""],model:[122,3,1,""]},"nltk.inference.api.ModelBuilderCommandDecorator":{__init__:[122,3,1,""],build_model:[122,3,1,""],get_model_builder:[122,3,1,""],model:[122,3,1,""]},"nltk.inference.api.ParallelProverBuilder":{__init__:[122,3,1,""]},"nltk.inference.api.ParallelProverBuilderCommand":{__init__:[122,3,1,""],build_model:[122,3,1,""],prove:[122,3,1,""]},"nltk.inference.api.Prover":{prove:[122,3,1,""]},"nltk.inference.api.ProverCommand":{get_prover:[122,3,1,""],proof:[122,3,1,""],prove:[122,3,1,""]},"nltk.inference.api.ProverCommandDecorator":{__init__:[122,3,1,""],decorate_proof:[122,3,1,""],get_prover:[122,3,1,""],proof:[122,3,1,""],prove:[122,3,1,""]},"nltk.inference.api.TheoremToolCommand":{add_assumptions:[122,3,1,""],assumptions:[122,3,1,""],goal:[122,3,1,""],print_assumptions:[122,3,1,""],retract_assumptions:[122,3,1,""]},"nltk.inference.api.TheoremToolCommandDecorator":{__init__:[122,3,1,""],add_assumptions:[122,3,1,""],assumptions:[122,3,1,""],goal:[122,3,1,""],print_assumptions:[122,3,1,""],retract_assumptions:[122,3,1,""]},"nltk.inference.api.TheoremToolThread":{__init__:[122,3,1,""],result:[122,5,1,""],run:[122,3,1,""]},"nltk.inference.discourse":{CfgReadingCommand:[123,2,1,""],DiscourseTester:[123,2,1,""],DrtGlueReadingCommand:[123,2,1,""],ReadingCommand:[123,2,1,""],demo:[123,1,1,""],discourse_demo:[123,1,1,""],drt_discourse_demo:[123,1,1,""],load_fol:[123,1,1,""],spacer:[123,1,1,""]},"nltk.inference.discourse.CfgReadingCommand":{__init__:[123,3,1,""],combine_readings:[123,3,1,""],parse_to_readings:[123,3,1,""],to_fol:[123,3,1,""]},"nltk.inference.discourse.DiscourseTester":{__init__:[123,3,1,""],add_background:[123,3,1,""],add_sentence:[123,3,1,""],background:[123,3,1,""],expand_threads:[123,3,1,""],grammar:[123,3,1,""],models:[123,3,1,""],multiply:[123,3,1,""],readings:[123,3,1,""],retract_sentence:[123,3,1,""],sentences:[123,3,1,""]},"nltk.inference.discourse.DrtGlueReadingCommand":{__init__:[123,3,1,""],combine_readings:[123,3,1,""],parse_to_readings:[123,3,1,""],process_thread:[123,3,1,""],to_fol:[123,3,1,""]},"nltk.inference.discourse.ReadingCommand":{combine_readings:[123,3,1,""],parse_to_readings:[123,3,1,""],process_thread:[123,3,1,""],to_fol:[123,3,1,""]},"nltk.inference.mace":{Mace:[124,2,1,""],MaceCommand:[124,2,1,""],decode_result:[124,1,1,""],demo:[124,1,1,""],spacer:[124,1,1,""],test_build_model:[124,1,1,""],test_make_relation_set:[124,1,1,""],test_model_found:[124,1,1,""],test_transform_output:[124,1,1,""]},"nltk.inference.mace.Mace":{__init__:[124,3,1,""]},"nltk.inference.mace.MaceCommand":{__init__:[124,3,1,""],valuation:[124,5,1,""]},"nltk.inference.nonmonotonic":{ClosedDomainProver:[125,2,1,""],ClosedWorldProver:[125,2,1,""],PredHolder:[125,2,1,""],ProverParseError:[125,6,1,""],SetHolder:[125,2,1,""],UniqueNamesProver:[125,2,1,""],closed_domain_demo:[125,1,1,""],closed_world_demo:[125,1,1,""],combination_prover_demo:[125,1,1,""],default_reasoning_demo:[125,1,1,""],demo:[125,1,1,""],get_domain:[125,1,1,""],print_proof:[125,1,1,""],unique_names_demo:[125,1,1,""]},"nltk.inference.nonmonotonic.ClosedDomainProver":{assumptions:[125,3,1,""],goal:[125,3,1,""],replace_quants:[125,3,1,""]},"nltk.inference.nonmonotonic.ClosedWorldProver":{assumptions:[125,3,1,""]},"nltk.inference.nonmonotonic.PredHolder":{__init__:[125,3,1,""],append_prop:[125,3,1,""],append_sig:[125,3,1,""],validate_sig_len:[125,3,1,""]},"nltk.inference.nonmonotonic.UniqueNamesProver":{assumptions:[125,3,1,""]},"nltk.inference.prover9":{Prover9:[126,2,1,""],Prover9Command:[126,2,1,""],Prover9CommandParent:[126,2,1,""],Prover9Exception:[126,6,1,""],Prover9FatalException:[126,6,1,""],Prover9LimitExceededException:[126,6,1,""],Prover9Parent:[126,2,1,""],convert_to_prover9:[126,1,1,""],demo:[126,1,1,""],spacer:[126,1,1,""],test_config:[126,1,1,""],test_convert_to_prover9:[126,1,1,""],test_prove:[126,1,1,""]},"nltk.inference.prover9.Prover9":{__init__:[126,3,1,""],prover9_input:[126,3,1,""]},"nltk.inference.prover9.Prover9Command":{__init__:[126,3,1,""],decorate_proof:[126,3,1,""]},"nltk.inference.prover9.Prover9CommandParent":{print_assumptions:[126,3,1,""]},"nltk.inference.prover9.Prover9Exception":{__init__:[126,3,1,""]},"nltk.inference.prover9.Prover9Parent":{binary_locations:[126,3,1,""],config_prover9:[126,3,1,""],prover9_input:[126,3,1,""]},"nltk.inference.resolution":{BindingDict:[127,2,1,""],BindingException:[127,6,1,""],Clause:[127,2,1,""],DebugObject:[127,2,1,""],ProverParseError:[127,6,1,""],ResolutionProver:[127,2,1,""],ResolutionProverCommand:[127,2,1,""],UnificationException:[127,6,1,""],clausify:[127,1,1,""],demo:[127,1,1,""],most_general_unification:[127,1,1,""],resolution_test:[127,1,1,""],testResolutionProver:[127,1,1,""],test_clausify:[127,1,1,""]},"nltk.inference.resolution.BindingDict":{__init__:[127,3,1,""]},"nltk.inference.resolution.BindingException":{__init__:[127,3,1,""]},"nltk.inference.resolution.Clause":{__init__:[127,3,1,""],free:[127,3,1,""],isSubsetOf:[127,3,1,""],is_tautology:[127,3,1,""],replace:[127,3,1,""],substitute_bindings:[127,3,1,""],subsumes:[127,3,1,""],unify:[127,3,1,""]},"nltk.inference.resolution.DebugObject":{__init__:[127,3,1,""],line:[127,3,1,""]},"nltk.inference.resolution.ResolutionProver":{ANSWER_KEY:[127,4,1,""]},"nltk.inference.resolution.ResolutionProverCommand":{__init__:[127,3,1,""],find_answers:[127,3,1,""],prove:[127,3,1,""]},"nltk.inference.resolution.UnificationException":{__init__:[127,3,1,""]},"nltk.inference.tableau":{Agenda:[128,2,1,""],Categories:[128,2,1,""],Debug:[128,2,1,""],ProverParseError:[128,6,1,""],TableauProver:[128,2,1,""],TableauProverCommand:[128,2,1,""],demo:[128,1,1,""],tableau_test:[128,1,1,""],testHigherOrderTableauProver:[128,1,1,""],testTableauProver:[128,1,1,""]},"nltk.inference.tableau.Agenda":{__init__:[128,3,1,""],clone:[128,3,1,""],mark_alls_fresh:[128,3,1,""],mark_neqs_fresh:[128,3,1,""],pop_first:[128,3,1,""],put:[128,3,1,""],put_all:[128,3,1,""],put_atoms:[128,3,1,""],replace_all:[128,3,1,""]},"nltk.inference.tableau.Categories":{ALL:[128,4,1,""],AND:[128,4,1,""],APP:[128,4,1,""],ATOM:[128,4,1,""],D_NEG:[128,4,1,""],EQ:[128,4,1,""],EXISTS:[128,4,1,""],IFF:[128,4,1,""],IMP:[128,4,1,""],N_ALL:[128,4,1,""],N_AND:[128,4,1,""],N_APP:[128,4,1,""],N_ATOM:[128,4,1,""],N_EQ:[128,4,1,""],N_EXISTS:[128,4,1,""],N_IFF:[128,4,1,""],N_IMP:[128,4,1,""],N_OR:[128,4,1,""],N_PROP:[128,4,1,""],OR:[128,4,1,""],PROP:[128,4,1,""]},"nltk.inference.tableau.Debug":{__init__:[128,3,1,""],line:[128,3,1,""]},"nltk.inference.tableau.TableauProver":{is_atom:[128,3,1,""]},"nltk.inference.tableau.TableauProverCommand":{__init__:[128,3,1,""]},"nltk.internals":{Counter:[129,2,1,""],Deprecated:[129,2,1,""],ElementWrapper:[129,2,1,""],ReadError:[129,6,1,""],config_java:[129,1,1,""],deprecated:[129,1,1,""],find_binary:[129,1,1,""],find_binary_iter:[129,1,1,""],find_dir:[129,1,1,""],find_file:[129,1,1,""],find_file_iter:[129,1,1,""],find_jar:[129,1,1,""],find_jar_iter:[129,1,1,""],find_jars_within_path:[129,1,1,""],import_from_stdlib:[129,1,1,""],is_writable:[129,1,1,""],java:[129,1,1,""],overridden:[129,1,1,""],raise_unorderable_types:[129,1,1,""],read_int:[129,1,1,""],read_number:[129,1,1,""],read_str:[129,1,1,""],slice_bounds:[129,1,1,""]},"nltk.internals.Counter":{__init__:[129,3,1,""],get:[129,3,1,""]},"nltk.internals.Deprecated":{__new__:[129,3,1,""]},"nltk.internals.ElementWrapper":{__init__:[129,3,1,""],__new__:[129,3,1,""],find:[129,3,1,""],findall:[129,3,1,""],getchildren:[129,3,1,""],getiterator:[129,3,1,""],makeelement:[129,3,1,""],unwrap:[129,3,1,""]},"nltk.internals.ReadError":{__init__:[129,3,1,""]},"nltk.jsontags":{JSONTaggedDecoder:[130,2,1,""],JSONTaggedEncoder:[130,2,1,""],register_tag:[130,1,1,""]},"nltk.jsontags.JSONTaggedDecoder":{decode:[130,3,1,""],decode_obj:[130,3,1,""]},"nltk.jsontags.JSONTaggedEncoder":{"default":[130,3,1,""]},"nltk.lazyimport":{LazyModule:[131,2,1,""]},"nltk.lazyimport.LazyModule":{__init__:[131,3,1,""]},"nltk.lm":{AbsoluteDiscountingInterpolated:[132,2,1,""],KneserNeyInterpolated:[132,2,1,""],Laplace:[132,2,1,""],Lidstone:[132,2,1,""],MLE:[132,2,1,""],NgramCounter:[132,2,1,""],StupidBackoff:[132,2,1,""],Vocabulary:[132,2,1,""],WittenBellInterpolated:[132,2,1,""],api:[133,0,0,"-"],counter:[134,0,0,"-"],models:[135,0,0,"-"],preprocessing:[136,0,0,"-"],smoothing:[137,0,0,"-"],util:[138,0,0,"-"],vocabulary:[139,0,0,"-"]},"nltk.lm.AbsoluteDiscountingInterpolated":{__init__:[132,3,1,""]},"nltk.lm.KneserNeyInterpolated":{__init__:[132,3,1,""]},"nltk.lm.Laplace":{__init__:[132,3,1,""]},"nltk.lm.Lidstone":{__init__:[132,3,1,""],unmasked_score:[132,3,1,""]},"nltk.lm.MLE":{unmasked_score:[132,3,1,""]},"nltk.lm.NgramCounter":{N:[132,3,1,""],__init__:[132,3,1,""],update:[132,3,1,""]},"nltk.lm.StupidBackoff":{__init__:[132,3,1,""],unmasked_score:[132,3,1,""]},"nltk.lm.Vocabulary":{__init__:[132,3,1,""],cutoff:[132,5,1,""],lookup:[132,3,1,""],update:[132,3,1,""]},"nltk.lm.WittenBellInterpolated":{__init__:[132,3,1,""]},"nltk.lm.api":{LanguageModel:[133,2,1,""],Smoothing:[133,2,1,""]},"nltk.lm.api.LanguageModel":{__init__:[133,3,1,""],context_counts:[133,3,1,""],entropy:[133,3,1,""],fit:[133,3,1,""],generate:[133,3,1,""],logscore:[133,3,1,""],perplexity:[133,3,1,""],score:[133,3,1,""],unmasked_score:[133,3,1,""]},"nltk.lm.api.Smoothing":{__init__:[133,3,1,""],alpha_gamma:[133,3,1,""],unigram_score:[133,3,1,""]},"nltk.lm.counter":{NgramCounter:[134,2,1,""]},"nltk.lm.counter.NgramCounter":{N:[134,3,1,""],__init__:[134,3,1,""],update:[134,3,1,""]},"nltk.lm.models":{AbsoluteDiscountingInterpolated:[135,2,1,""],InterpolatedLanguageModel:[135,2,1,""],KneserNeyInterpolated:[135,2,1,""],Laplace:[135,2,1,""],Lidstone:[135,2,1,""],MLE:[135,2,1,""],StupidBackoff:[135,2,1,""],WittenBellInterpolated:[135,2,1,""]},"nltk.lm.models.AbsoluteDiscountingInterpolated":{__init__:[135,3,1,""]},"nltk.lm.models.InterpolatedLanguageModel":{__init__:[135,3,1,""],unmasked_score:[135,3,1,""]},"nltk.lm.models.KneserNeyInterpolated":{__init__:[135,3,1,""]},"nltk.lm.models.Laplace":{__init__:[135,3,1,""]},"nltk.lm.models.Lidstone":{__init__:[135,3,1,""],unmasked_score:[135,3,1,""]},"nltk.lm.models.MLE":{unmasked_score:[135,3,1,""]},"nltk.lm.models.StupidBackoff":{__init__:[135,3,1,""],unmasked_score:[135,3,1,""]},"nltk.lm.models.WittenBellInterpolated":{__init__:[135,3,1,""]},"nltk.lm.preprocessing":{flatten:[136,1,1,""],padded_everygram_pipeline:[136,1,1,""],padded_everygrams:[136,1,1,""]},"nltk.lm.smoothing":{AbsoluteDiscounting:[137,2,1,""],KneserNey:[137,2,1,""],WittenBell:[137,2,1,""]},"nltk.lm.smoothing.AbsoluteDiscounting":{__init__:[137,3,1,""],alpha_gamma:[137,3,1,""],unigram_score:[137,3,1,""]},"nltk.lm.smoothing.KneserNey":{__init__:[137,3,1,""],alpha_gamma:[137,3,1,""],unigram_score:[137,3,1,""]},"nltk.lm.smoothing.WittenBell":{__init__:[137,3,1,""],alpha_gamma:[137,3,1,""],unigram_score:[137,3,1,""]},"nltk.lm.util":{log_base2:[138,1,1,""]},"nltk.lm.vocabulary":{Vocabulary:[139,2,1,""]},"nltk.lm.vocabulary.Vocabulary":{__init__:[139,3,1,""],cutoff:[139,5,1,""],lookup:[139,3,1,""],update:[139,3,1,""]},"nltk.metrics":{agreement:[141,0,0,"-"],aline:[142,0,0,"-"],association:[143,0,0,"-"],confusionmatrix:[144,0,0,"-"],distance:[145,0,0,"-"],paice:[146,0,0,"-"],scores:[147,0,0,"-"],segmentation:[148,0,0,"-"],spearman:[149,0,0,"-"]},"nltk.metrics.agreement":{AnnotationTask:[141,2,1,""]},"nltk.metrics.agreement.AnnotationTask":{Ae_kappa:[141,3,1,""],Ao:[141,3,1,""],Disagreement:[141,3,1,""],Do_Kw:[141,3,1,""],Do_Kw_pairwise:[141,3,1,""],N:[141,3,1,""],Nck:[141,3,1,""],Nik:[141,3,1,""],Nk:[141,3,1,""],S:[141,3,1,""],__init__:[141,3,1,""],agr:[141,3,1,""],alpha:[141,3,1,""],avg_Ao:[141,3,1,""],kappa:[141,3,1,""],kappa_pairwise:[141,3,1,""],load_array:[141,3,1,""],multi_kappa:[141,3,1,""],pi:[141,3,1,""],weighted_kappa:[141,3,1,""],weighted_kappa_pairwise:[141,3,1,""]},"nltk.metrics.aline":{R:[142,1,1,""],V:[142,1,1,""],align:[142,1,1,""],delta:[142,1,1,""],demo:[142,1,1,""],diff:[142,1,1,""],sigma_exp:[142,1,1,""],sigma_skip:[142,1,1,""],sigma_sub:[142,1,1,""]},"nltk.metrics.association":{BigramAssocMeasures:[143,2,1,""],ContingencyMeasures:[143,2,1,""],NGRAM:[143,7,1,""],NgramAssocMeasures:[143,2,1,""],QuadgramAssocMeasures:[143,2,1,""],TOTAL:[143,7,1,""],TrigramAssocMeasures:[143,2,1,""],UNIGRAMS:[143,7,1,""],fisher_exact:[143,1,1,""]},"nltk.metrics.association.BigramAssocMeasures":{chi_sq:[143,3,1,""],dice:[143,3,1,""],fisher:[143,3,1,""],phi_sq:[143,3,1,""]},"nltk.metrics.association.ContingencyMeasures":{__init__:[143,3,1,""]},"nltk.metrics.association.NgramAssocMeasures":{chi_sq:[143,3,1,""],jaccard:[143,3,1,""],likelihood_ratio:[143,3,1,""],mi_like:[143,3,1,""],pmi:[143,3,1,""],poisson_stirling:[143,3,1,""],raw_freq:[143,3,1,""],student_t:[143,3,1,""]},"nltk.metrics.confusionmatrix":{ConfusionMatrix:[144,2,1,""],demo:[144,1,1,""]},"nltk.metrics.confusionmatrix.ConfusionMatrix":{__init__:[144,3,1,""],key:[144,3,1,""],pretty_format:[144,3,1,""]},"nltk.metrics.distance":{binary_distance:[145,1,1,""],custom_distance:[145,1,1,""],demo:[145,1,1,""],edit_distance:[145,1,1,""],edit_distance_align:[145,1,1,""],fractional_presence:[145,1,1,""],interval_distance:[145,1,1,""],jaccard_distance:[145,1,1,""],jaro_similarity:[145,1,1,""],jaro_winkler_similarity:[145,1,1,""],masi_distance:[145,1,1,""],presence:[145,1,1,""]},"nltk.metrics.paice":{Paice:[146,2,1,""],demo:[146,1,1,""],get_words_from_dictionary:[146,1,1,""]},"nltk.metrics.paice.Paice":{__init__:[146,3,1,""],update:[146,3,1,""]},"nltk.metrics.scores":{accuracy:[147,1,1,""],approxrand:[147,1,1,""],demo:[147,1,1,""],f_measure:[147,1,1,""],log_likelihood:[147,1,1,""],precision:[147,1,1,""],recall:[147,1,1,""]},"nltk.metrics.segmentation":{ghd:[148,1,1,""],pk:[148,1,1,""],windowdiff:[148,1,1,""]},"nltk.metrics.spearman":{ranks_from_scores:[149,1,1,""],ranks_from_sequence:[149,1,1,""],spearman_correlation:[149,1,1,""]},"nltk.misc":{babelfish:[151,0,0,"-"],chomsky:[152,0,0,"-"],minimalset:[153,0,0,"-"],sort:[154,0,0,"-"],wordfinder:[155,0,0,"-"]},"nltk.misc.babelfish":{babelize_shell:[151,1,1,""]},"nltk.misc.chomsky":{generate_chomsky:[152,1,1,""]},"nltk.misc.minimalset":{MinimalSet:[153,2,1,""]},"nltk.misc.minimalset.MinimalSet":{__init__:[153,3,1,""],add:[153,3,1,""],contexts:[153,3,1,""],display:[153,3,1,""],display_all:[153,3,1,""],targets:[153,3,1,""]},"nltk.misc.sort":{bubble:[154,1,1,""],demo:[154,1,1,""],merge:[154,1,1,""],quick:[154,1,1,""],selection:[154,1,1,""]},"nltk.misc.wordfinder":{check:[155,1,1,""],revword:[155,1,1,""],step:[155,1,1,""],word_finder:[155,1,1,""],wordfinder:[155,1,1,""]},"nltk.parse":{api:[157,0,0,"-"],bllip:[158,0,0,"-"],chart:[159,0,0,"-"],corenlp:[160,0,0,"-"],dependencygraph:[161,0,0,"-"],earleychart:[162,0,0,"-"],evaluate:[163,0,0,"-"],featurechart:[164,0,0,"-"],generate:[165,0,0,"-"],malt:[166,0,0,"-"],nonprojectivedependencyparser:[167,0,0,"-"],pchart:[168,0,0,"-"],projectivedependencyparser:[169,0,0,"-"],recursivedescent:[170,0,0,"-"],shiftreduce:[171,0,0,"-"],stanford:[172,0,0,"-"],transitionparser:[173,0,0,"-"],util:[174,0,0,"-"],viterbi:[175,0,0,"-"]},"nltk.parse.api":{ParserI:[157,2,1,""]},"nltk.parse.api.ParserI":{grammar:[157,3,1,""],parse:[157,3,1,""],parse_all:[157,3,1,""],parse_one:[157,3,1,""],parse_sents:[157,3,1,""]},"nltk.parse.bllip":{BllipParser:[158,2,1,""]},"nltk.parse.bllip.BllipParser":{__init__:[158,3,1,""],from_unified_model_dir:[158,3,1,""],parse:[158,3,1,""],tagged_parse:[158,3,1,""]},"nltk.parse.chart":{AbstractChartRule:[159,2,1,""],BottomUpChartParser:[159,2,1,""],BottomUpLeftCornerChartParser:[159,2,1,""],BottomUpPredictCombineRule:[159,2,1,""],BottomUpPredictRule:[159,2,1,""],CachedTopDownPredictRule:[159,2,1,""],Chart:[159,2,1,""],ChartParser:[159,2,1,""],ChartRuleI:[159,2,1,""],EdgeI:[159,2,1,""],EmptyPredictRule:[159,2,1,""],FilteredBottomUpPredictCombineRule:[159,2,1,""],FilteredSingleEdgeFundamentalRule:[159,2,1,""],FundamentalRule:[159,2,1,""],LeafEdge:[159,2,1,""],LeafInitRule:[159,2,1,""],LeftCornerChartParser:[159,2,1,""],SingleEdgeFundamentalRule:[159,2,1,""],SteppingChartParser:[159,2,1,""],TopDownChartParser:[159,2,1,""],TopDownInitRule:[159,2,1,""],TopDownPredictRule:[159,2,1,""],TreeEdge:[159,2,1,""],demo:[159,1,1,""],demo_grammar:[159,1,1,""]},"nltk.parse.chart.AbstractChartRule":{apply:[159,3,1,""],apply_everywhere:[159,3,1,""]},"nltk.parse.chart.BottomUpChartParser":{__init__:[159,3,1,""]},"nltk.parse.chart.BottomUpLeftCornerChartParser":{__init__:[159,3,1,""]},"nltk.parse.chart.BottomUpPredictCombineRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.BottomUpPredictRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.CachedTopDownPredictRule":{__init__:[159,3,1,""],apply:[159,3,1,""]},"nltk.parse.chart.Chart":{__init__:[159,3,1,""],child_pointer_lists:[159,3,1,""],dot_digraph:[159,3,1,""],edges:[159,3,1,""],initialize:[159,3,1,""],insert:[159,3,1,""],insert_with_backpointer:[159,3,1,""],iteredges:[159,3,1,""],leaf:[159,3,1,""],leaves:[159,3,1,""],num_edges:[159,3,1,""],num_leaves:[159,3,1,""],parses:[159,3,1,""],pretty_format:[159,3,1,""],pretty_format_edge:[159,3,1,""],pretty_format_leaves:[159,3,1,""],select:[159,3,1,""],trees:[159,3,1,""]},"nltk.parse.chart.ChartParser":{__init__:[159,3,1,""],chart_parse:[159,3,1,""],grammar:[159,3,1,""],parse:[159,3,1,""]},"nltk.parse.chart.ChartRuleI":{apply:[159,3,1,""],apply_everywhere:[159,3,1,""]},"nltk.parse.chart.EdgeI":{__init__:[159,3,1,""],dot:[159,3,1,""],end:[159,3,1,""],is_complete:[159,3,1,""],is_incomplete:[159,3,1,""],length:[159,3,1,""],lhs:[159,3,1,""],nextsym:[159,3,1,""],rhs:[159,3,1,""],span:[159,3,1,""],start:[159,3,1,""]},"nltk.parse.chart.EmptyPredictRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.FilteredBottomUpPredictCombineRule":{apply:[159,3,1,""]},"nltk.parse.chart.FundamentalRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.LeafEdge":{__init__:[159,3,1,""],dot:[159,3,1,""],end:[159,3,1,""],is_complete:[159,3,1,""],is_incomplete:[159,3,1,""],length:[159,3,1,""],lhs:[159,3,1,""],nextsym:[159,3,1,""],rhs:[159,3,1,""],span:[159,3,1,""],start:[159,3,1,""]},"nltk.parse.chart.LeafInitRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.LeftCornerChartParser":{__init__:[159,3,1,""]},"nltk.parse.chart.SingleEdgeFundamentalRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.SteppingChartParser":{__init__:[159,3,1,""],chart:[159,3,1,""],current_chartrule:[159,3,1,""],grammar:[159,3,1,""],initialize:[159,3,1,""],parse:[159,3,1,""],parses:[159,3,1,""],set_chart:[159,3,1,""],set_grammar:[159,3,1,""],set_strategy:[159,3,1,""],step:[159,3,1,""],strategy:[159,3,1,""]},"nltk.parse.chart.TopDownChartParser":{__init__:[159,3,1,""]},"nltk.parse.chart.TopDownInitRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.TopDownPredictRule":{NUM_EDGES:[159,4,1,""],apply:[159,3,1,""]},"nltk.parse.chart.TreeEdge":{__init__:[159,3,1,""],dot:[159,3,1,""],end:[159,3,1,""],from_production:[159,3,1,""],is_complete:[159,3,1,""],is_incomplete:[159,3,1,""],length:[159,3,1,""],lhs:[159,3,1,""],move_dot_forward:[159,3,1,""],nextsym:[159,3,1,""],rhs:[159,3,1,""],span:[159,3,1,""],start:[159,3,1,""]},"nltk.parse.corenlp":{CoreNLPDependencyParser:[160,2,1,""],CoreNLPParser:[160,2,1,""],CoreNLPServer:[160,2,1,""],CoreNLPServerError:[160,6,1,""],GenericCoreNLPParser:[160,2,1,""],transform:[160,1,1,""],try_port:[160,1,1,""]},"nltk.parse.corenlp.CoreNLPDependencyParser":{make_tree:[160,3,1,""],parser_annotator:[160,4,1,""]},"nltk.parse.corenlp.CoreNLPParser":{make_tree:[160,3,1,""],parser_annotator:[160,4,1,""]},"nltk.parse.corenlp.CoreNLPServer":{__init__:[160,3,1,""],start:[160,3,1,""],stop:[160,3,1,""]},"nltk.parse.corenlp.GenericCoreNLPParser":{__init__:[160,3,1,""],api_call:[160,3,1,""],parse_sents:[160,3,1,""],parse_text:[160,3,1,""],raw_parse:[160,3,1,""],raw_parse_sents:[160,3,1,""],raw_tag_sents:[160,3,1,""],tag:[160,3,1,""],tag_sents:[160,3,1,""],tokenize:[160,3,1,""]},"nltk.parse.dependencygraph":{DependencyGraph:[161,2,1,""],DependencyGraphError:[161,6,1,""],conll_demo:[161,1,1,""],conll_file_demo:[161,1,1,""],cycle_finding_demo:[161,1,1,""],demo:[161,1,1,""],dot2img:[161,1,1,""],malt_demo:[161,1,1,""]},"nltk.parse.dependencygraph.DependencyGraph":{__init__:[161,3,1,""],add_arc:[161,3,1,""],add_node:[161,3,1,""],connect_graph:[161,3,1,""],contains_address:[161,3,1,""],contains_cycle:[161,3,1,""],get_by_address:[161,3,1,""],get_cycle_path:[161,3,1,""],left_children:[161,3,1,""],load:[161,3,1,""],nx_graph:[161,3,1,""],redirect_arcs:[161,3,1,""],remove_by_address:[161,3,1,""],right_children:[161,3,1,""],to_conll:[161,3,1,""],to_dot:[161,3,1,""],tree:[161,3,1,""],triples:[161,3,1,""]},"nltk.parse.earleychart":{CompleteFundamentalRule:[162,2,1,""],CompleterRule:[162,2,1,""],EarleyChartParser:[162,2,1,""],FeatureCompleteFundamentalRule:[162,2,1,""],FeatureCompleterRule:[162,2,1,""],FeatureEarleyChartParser:[162,2,1,""],FeatureIncrementalBottomUpChartParser:[162,2,1,""],FeatureIncrementalBottomUpLeftCornerChartParser:[162,2,1,""],FeatureIncrementalChart:[162,2,1,""],FeatureIncrementalChartParser:[162,2,1,""],FeatureIncrementalTopDownChartParser:[162,2,1,""],FeaturePredictorRule:[162,2,1,""],FeatureScannerRule:[162,2,1,""],FilteredCompleteFundamentalRule:[162,2,1,""],IncrementalBottomUpChartParser:[162,2,1,""],IncrementalBottomUpLeftCornerChartParser:[162,2,1,""],IncrementalChart:[162,2,1,""],IncrementalChartParser:[162,2,1,""],IncrementalLeftCornerChartParser:[162,2,1,""],IncrementalTopDownChartParser:[162,2,1,""],PredictorRule:[162,2,1,""],ScannerRule:[162,2,1,""],demo:[162,1,1,""]},"nltk.parse.earleychart.CompleterRule":{apply:[162,3,1,""]},"nltk.parse.earleychart.EarleyChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.FeatureEarleyChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.FeatureIncrementalBottomUpChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.FeatureIncrementalBottomUpLeftCornerChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.FeatureIncrementalChart":{select:[162,3,1,""]},"nltk.parse.earleychart.FeatureIncrementalChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.FeatureIncrementalTopDownChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.FilteredCompleteFundamentalRule":{apply:[162,3,1,""]},"nltk.parse.earleychart.IncrementalBottomUpChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.IncrementalBottomUpLeftCornerChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.IncrementalChart":{edges:[162,3,1,""],initialize:[162,3,1,""],iteredges:[162,3,1,""],select:[162,3,1,""]},"nltk.parse.earleychart.IncrementalChartParser":{__init__:[162,3,1,""],chart_parse:[162,3,1,""]},"nltk.parse.earleychart.IncrementalLeftCornerChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.IncrementalTopDownChartParser":{__init__:[162,3,1,""]},"nltk.parse.earleychart.ScannerRule":{apply:[162,3,1,""]},"nltk.parse.evaluate":{DependencyEvaluator:[163,2,1,""]},"nltk.parse.evaluate.DependencyEvaluator":{__init__:[163,3,1,""],eval:[163,3,1,""]},"nltk.parse.featurechart":{FeatureBottomUpChartParser:[164,2,1,""],FeatureBottomUpLeftCornerChartParser:[164,2,1,""],FeatureBottomUpPredictCombineRule:[164,2,1,""],FeatureBottomUpPredictRule:[164,2,1,""],FeatureChart:[164,2,1,""],FeatureChartParser:[164,2,1,""],FeatureEmptyPredictRule:[164,2,1,""],FeatureFundamentalRule:[164,2,1,""],FeatureSingleEdgeFundamentalRule:[164,2,1,""],FeatureTopDownChartParser:[164,2,1,""],FeatureTopDownInitRule:[164,2,1,""],FeatureTopDownPredictRule:[164,2,1,""],FeatureTreeEdge:[164,2,1,""],InstantiateVarsChart:[164,2,1,""],demo:[164,1,1,""],demo_grammar:[164,1,1,""],run_profile:[164,1,1,""]},"nltk.parse.featurechart.FeatureBottomUpChartParser":{__init__:[164,3,1,""]},"nltk.parse.featurechart.FeatureBottomUpLeftCornerChartParser":{__init__:[164,3,1,""]},"nltk.parse.featurechart.FeatureBottomUpPredictCombineRule":{apply:[164,3,1,""]},"nltk.parse.featurechart.FeatureBottomUpPredictRule":{apply:[164,3,1,""]},"nltk.parse.featurechart.FeatureChart":{parses:[164,3,1,""],select:[164,3,1,""]},"nltk.parse.featurechart.FeatureChartParser":{__init__:[164,3,1,""]},"nltk.parse.featurechart.FeatureEmptyPredictRule":{apply:[164,3,1,""]},"nltk.parse.featurechart.FeatureFundamentalRule":{apply:[164,3,1,""]},"nltk.parse.featurechart.FeatureTopDownChartParser":{__init__:[164,3,1,""]},"nltk.parse.featurechart.FeatureTopDownInitRule":{apply:[164,3,1,""]},"nltk.parse.featurechart.FeatureTopDownPredictRule":{apply:[164,3,1,""]},"nltk.parse.featurechart.FeatureTreeEdge":{__init__:[164,3,1,""],bindings:[164,3,1,""],from_production:[164,3,1,""],move_dot_forward:[164,3,1,""],next_with_bindings:[164,3,1,""],variables:[164,3,1,""]},"nltk.parse.featurechart.InstantiateVarsChart":{__init__:[164,3,1,""],initialize:[164,3,1,""],insert:[164,3,1,""],inst_vars:[164,3,1,""],instantiate_edge:[164,3,1,""]},"nltk.parse.generate":{demo:[165,1,1,""],generate:[165,1,1,""]},"nltk.parse.malt":{MaltParser:[166,2,1,""],find_malt_model:[166,1,1,""],find_maltparser:[166,1,1,""],malt_regex_tagger:[166,1,1,""]},"nltk.parse.malt.MaltParser":{__init__:[166,3,1,""],generate_malt_command:[166,3,1,""],parse_sents:[166,3,1,""],parse_tagged_sents:[166,3,1,""],train:[166,3,1,""],train_from_file:[166,3,1,""]},"nltk.parse.nonprojectivedependencyparser":{DemoScorer:[167,2,1,""],DependencyScorerI:[167,2,1,""],NaiveBayesDependencyScorer:[167,2,1,""],NonprojectiveDependencyParser:[167,2,1,""],ProbabilisticNonprojectiveParser:[167,2,1,""],demo:[167,1,1,""],hall_demo:[167,1,1,""],nonprojective_conll_parse_demo:[167,1,1,""],rule_based_demo:[167,1,1,""]},"nltk.parse.nonprojectivedependencyparser.DemoScorer":{score:[167,3,1,""],train:[167,3,1,""]},"nltk.parse.nonprojectivedependencyparser.DependencyScorerI":{__init__:[167,3,1,""],score:[167,3,1,""],train:[167,3,1,""]},"nltk.parse.nonprojectivedependencyparser.NaiveBayesDependencyScorer":{__init__:[167,3,1,""],score:[167,3,1,""],train:[167,3,1,""]},"nltk.parse.nonprojectivedependencyparser.NonprojectiveDependencyParser":{__init__:[167,3,1,""],parse:[167,3,1,""]},"nltk.parse.nonprojectivedependencyparser.ProbabilisticNonprojectiveParser":{__init__:[167,3,1,""],best_incoming_arc:[167,3,1,""],collapse_nodes:[167,3,1,""],compute_max_subtract_score:[167,3,1,""],compute_original_indexes:[167,3,1,""],initialize_edge_scores:[167,3,1,""],original_best_arc:[167,3,1,""],parse:[167,3,1,""],train:[167,3,1,""],update_edge_scores:[167,3,1,""]},"nltk.parse.pchart":{BottomUpProbabilisticChartParser:[168,2,1,""],InsideChartParser:[168,2,1,""],LongestChartParser:[168,2,1,""],ProbabilisticBottomUpInitRule:[168,2,1,""],ProbabilisticBottomUpPredictRule:[168,2,1,""],ProbabilisticFundamentalRule:[168,2,1,""],ProbabilisticLeafEdge:[168,2,1,""],ProbabilisticTreeEdge:[168,2,1,""],RandomChartParser:[168,2,1,""],SingleEdgeProbabilisticFundamentalRule:[168,2,1,""],UnsortedChartParser:[168,2,1,""],demo:[168,1,1,""]},"nltk.parse.pchart.BottomUpProbabilisticChartParser":{__init__:[168,3,1,""],grammar:[168,3,1,""],parse:[168,3,1,""],sort_queue:[168,3,1,""],trace:[168,3,1,""]},"nltk.parse.pchart.InsideChartParser":{sort_queue:[168,3,1,""]},"nltk.parse.pchart.LongestChartParser":{sort_queue:[168,3,1,""]},"nltk.parse.pchart.ProbabilisticBottomUpInitRule":{NUM_EDGES:[168,4,1,""],apply:[168,3,1,""]},"nltk.parse.pchart.ProbabilisticBottomUpPredictRule":{NUM_EDGES:[168,4,1,""],apply:[168,3,1,""]},"nltk.parse.pchart.ProbabilisticFundamentalRule":{NUM_EDGES:[168,4,1,""],apply:[168,3,1,""]},"nltk.parse.pchart.ProbabilisticLeafEdge":{prob:[168,3,1,""]},"nltk.parse.pchart.ProbabilisticTreeEdge":{__init__:[168,3,1,""],from_production:[168,3,1,""],prob:[168,3,1,""]},"nltk.parse.pchart.RandomChartParser":{sort_queue:[168,3,1,""]},"nltk.parse.pchart.SingleEdgeProbabilisticFundamentalRule":{NUM_EDGES:[168,4,1,""],apply:[168,3,1,""]},"nltk.parse.pchart.UnsortedChartParser":{sort_queue:[168,3,1,""]},"nltk.parse.projectivedependencyparser":{ChartCell:[169,2,1,""],DependencySpan:[169,2,1,""],ProbabilisticProjectiveDependencyParser:[169,2,1,""],ProjectiveDependencyParser:[169,2,1,""],arity_parse_demo:[169,1,1,""],demo:[169,1,1,""],projective_prob_parse_demo:[169,1,1,""],projective_rule_parse_demo:[169,1,1,""]},"nltk.parse.projectivedependencyparser.ChartCell":{__init__:[169,3,1,""],add:[169,3,1,""]},"nltk.parse.projectivedependencyparser.DependencySpan":{__init__:[169,3,1,""],head_index:[169,3,1,""]},"nltk.parse.projectivedependencyparser.ProbabilisticProjectiveDependencyParser":{__init__:[169,3,1,""],compute_prob:[169,3,1,""],concatenate:[169,3,1,""],parse:[169,3,1,""],train:[169,3,1,""]},"nltk.parse.projectivedependencyparser.ProjectiveDependencyParser":{__init__:[169,3,1,""],concatenate:[169,3,1,""],parse:[169,3,1,""]},"nltk.parse.recursivedescent":{RecursiveDescentParser:[170,2,1,""],SteppingRecursiveDescentParser:[170,2,1,""],demo:[170,1,1,""]},"nltk.parse.recursivedescent.RecursiveDescentParser":{__init__:[170,3,1,""],grammar:[170,3,1,""],parse:[170,3,1,""],trace:[170,3,1,""]},"nltk.parse.recursivedescent.SteppingRecursiveDescentParser":{__init__:[170,3,1,""],backtrack:[170,3,1,""],currently_complete:[170,3,1,""],expand:[170,3,1,""],expandable_productions:[170,3,1,""],frontier:[170,3,1,""],initialize:[170,3,1,""],match:[170,3,1,""],parse:[170,3,1,""],parses:[170,3,1,""],remaining_text:[170,3,1,""],set_grammar:[170,3,1,""],step:[170,3,1,""],tree:[170,3,1,""],untried_expandable_productions:[170,3,1,""],untried_match:[170,3,1,""]},"nltk.parse.shiftreduce":{ShiftReduceParser:[171,2,1,""],SteppingShiftReduceParser:[171,2,1,""],demo:[171,1,1,""]},"nltk.parse.shiftreduce.ShiftReduceParser":{__init__:[171,3,1,""],grammar:[171,3,1,""],parse:[171,3,1,""],trace:[171,3,1,""]},"nltk.parse.shiftreduce.SteppingShiftReduceParser":{__init__:[171,3,1,""],initialize:[171,3,1,""],parse:[171,3,1,""],parses:[171,3,1,""],reduce:[171,3,1,""],reducible_productions:[171,3,1,""],remaining_text:[171,3,1,""],set_grammar:[171,3,1,""],shift:[171,3,1,""],stack:[171,3,1,""],step:[171,3,1,""],undo:[171,3,1,""]},"nltk.parse.stanford":{GenericStanfordParser:[172,2,1,""],StanfordDependencyParser:[172,2,1,""],StanfordNeuralDependencyParser:[172,2,1,""],StanfordParser:[172,2,1,""]},"nltk.parse.stanford.GenericStanfordParser":{__init__:[172,3,1,""],parse_sents:[172,3,1,""],raw_parse:[172,3,1,""],raw_parse_sents:[172,3,1,""],tagged_parse:[172,3,1,""],tagged_parse_sents:[172,3,1,""]},"nltk.parse.stanford.StanfordDependencyParser":{__init__:[172,3,1,""]},"nltk.parse.stanford.StanfordNeuralDependencyParser":{__init__:[172,3,1,""],tagged_parse_sents:[172,3,1,""]},"nltk.parse.stanford.StanfordParser":{__init__:[172,3,1,""]},"nltk.parse.transitionparser":{Configuration:[173,2,1,""],Transition:[173,2,1,""],TransitionParser:[173,2,1,""],demo:[173,1,1,""]},"nltk.parse.transitionparser.Configuration":{__init__:[173,3,1,""],extract_features:[173,3,1,""]},"nltk.parse.transitionparser.Transition":{LEFT_ARC:[173,4,1,""],REDUCE:[173,4,1,""],RIGHT_ARC:[173,4,1,""],SHIFT:[173,4,1,""],__init__:[173,3,1,""],left_arc:[173,3,1,""],reduce:[173,3,1,""],right_arc:[173,3,1,""],shift:[173,3,1,""]},"nltk.parse.transitionparser.TransitionParser":{ARC_EAGER:[173,4,1,""],ARC_STANDARD:[173,4,1,""],__init__:[173,3,1,""],parse:[173,3,1,""],train:[173,3,1,""]},"nltk.parse.util":{TestGrammar:[174,2,1,""],extract_test_sentences:[174,1,1,""],load_parser:[174,1,1,""],taggedsent_to_conll:[174,1,1,""],taggedsents_to_conll:[174,1,1,""]},"nltk.parse.util.TestGrammar":{__init__:[174,3,1,""],run:[174,3,1,""]},"nltk.parse.viterbi":{ViterbiParser:[175,2,1,""],demo:[175,1,1,""]},"nltk.parse.viterbi.ViterbiParser":{__init__:[175,3,1,""],grammar:[175,3,1,""],parse:[175,3,1,""],trace:[175,3,1,""]},"nltk.probability":{ConditionalFreqDist:[176,2,1,""],ConditionalProbDist:[176,2,1,""],ConditionalProbDistI:[176,2,1,""],CrossValidationProbDist:[176,2,1,""],DictionaryConditionalProbDist:[176,2,1,""],DictionaryProbDist:[176,2,1,""],ELEProbDist:[176,2,1,""],FreqDist:[176,2,1,""],HeldoutProbDist:[176,2,1,""],ImmutableProbabilisticMixIn:[176,2,1,""],KneserNeyProbDist:[176,2,1,""],LaplaceProbDist:[176,2,1,""],LidstoneProbDist:[176,2,1,""],MLEProbDist:[176,2,1,""],MutableProbDist:[176,2,1,""],ProbDistI:[176,2,1,""],ProbabilisticMixIn:[176,2,1,""],SimpleGoodTuringProbDist:[176,2,1,""],UniformProbDist:[176,2,1,""],WittenBellProbDist:[176,2,1,""],add_logs:[176,1,1,""],entropy:[176,1,1,""],log_likelihood:[176,1,1,""],sum_logs:[176,1,1,""]},"nltk.probability.ConditionalFreqDist":{N:[176,3,1,""],__init__:[176,3,1,""],conditions:[176,3,1,""],plot:[176,3,1,""],tabulate:[176,3,1,""]},"nltk.probability.ConditionalProbDist":{__init__:[176,3,1,""]},"nltk.probability.ConditionalProbDistI":{__init__:[176,3,1,""],conditions:[176,3,1,""]},"nltk.probability.CrossValidationProbDist":{SUM_TO_ONE:[176,4,1,""],__init__:[176,3,1,""],discount:[176,3,1,""],freqdists:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.probability.DictionaryConditionalProbDist":{__init__:[176,3,1,""]},"nltk.probability.DictionaryProbDist":{__init__:[176,3,1,""],logprob:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.probability.ELEProbDist":{__init__:[176,3,1,""]},"nltk.probability.FreqDist":{B:[176,3,1,""],N:[176,3,1,""],Nr:[176,3,1,""],__init__:[176,3,1,""],copy:[176,3,1,""],freq:[176,3,1,""],hapaxes:[176,3,1,""],max:[176,3,1,""],pformat:[176,3,1,""],plot:[176,3,1,""],pprint:[176,3,1,""],r_Nr:[176,3,1,""],setdefault:[176,3,1,""],tabulate:[176,3,1,""],update:[176,3,1,""]},"nltk.probability.HeldoutProbDist":{SUM_TO_ONE:[176,4,1,""],__init__:[176,3,1,""],base_fdist:[176,3,1,""],discount:[176,3,1,""],heldout_fdist:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.probability.ImmutableProbabilisticMixIn":{set_logprob:[176,3,1,""],set_prob:[176,3,1,""]},"nltk.probability.KneserNeyProbDist":{__init__:[176,3,1,""],discount:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""],set_discount:[176,3,1,""]},"nltk.probability.LaplaceProbDist":{__init__:[176,3,1,""]},"nltk.probability.LidstoneProbDist":{SUM_TO_ONE:[176,4,1,""],__init__:[176,3,1,""],discount:[176,3,1,""],freqdist:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.probability.MLEProbDist":{__init__:[176,3,1,""],freqdist:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.probability.MutableProbDist":{__init__:[176,3,1,""],logprob:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""],update:[176,3,1,""]},"nltk.probability.ProbDistI":{SUM_TO_ONE:[176,4,1,""],__init__:[176,3,1,""],discount:[176,3,1,""],generate:[176,3,1,""],logprob:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.probability.ProbabilisticMixIn":{__init__:[176,3,1,""],logprob:[176,3,1,""],prob:[176,3,1,""],set_logprob:[176,3,1,""],set_prob:[176,3,1,""]},"nltk.probability.SimpleGoodTuringProbDist":{SUM_TO_ONE:[176,4,1,""],__init__:[176,3,1,""],check:[176,3,1,""],discount:[176,3,1,""],find_best_fit:[176,3,1,""],freqdist:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""],smoothedNr:[176,3,1,""]},"nltk.probability.UniformProbDist":{__init__:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.probability.WittenBellProbDist":{__init__:[176,3,1,""],discount:[176,3,1,""],freqdist:[176,3,1,""],max:[176,3,1,""],prob:[176,3,1,""],samples:[176,3,1,""]},"nltk.sem":{boxer:[178,0,0,"-"],chat80:[179,0,0,"-"],cooper_storage:[180,0,0,"-"],drt:[181,0,0,"-"],drt_glue_demo:[182,0,0,"-"],evaluate:[183,0,0,"-"],glue:[184,0,0,"-"],hole:[185,0,0,"-"],lfg:[186,0,0,"-"],linearlogic:[187,0,0,"-"],logic:[188,0,0,"-"],relextract:[189,0,0,"-"],skolemize:[190,0,0,"-"],util:[191,0,0,"-"]},"nltk.sem.boxer":{AbstractBoxerDrs:[178,2,1,""],Boxer:[178,2,1,""],BoxerCard:[178,2,1,""],BoxerDrs:[178,2,1,""],BoxerDrsParser:[178,2,1,""],BoxerEq:[178,2,1,""],BoxerIndexed:[178,2,1,""],BoxerNamed:[178,2,1,""],BoxerNot:[178,2,1,""],BoxerOr:[178,2,1,""],BoxerOutputDrsParser:[178,2,1,""],BoxerPred:[178,2,1,""],BoxerProp:[178,2,1,""],BoxerRel:[178,2,1,""],BoxerWhq:[178,2,1,""],NltkDrtBoxerDrsInterpreter:[178,2,1,""],PassthroughBoxerDrsInterpreter:[178,2,1,""],UnparseableInputException:[178,6,1,""]},"nltk.sem.boxer.AbstractBoxerDrs":{atoms:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""],variable_types:[178,3,1,""],variables:[178,3,1,""]},"nltk.sem.boxer.Boxer":{__init__:[178,3,1,""],interpret:[178,3,1,""],interpret_multi:[178,3,1,""],interpret_multi_sents:[178,3,1,""],interpret_sents:[178,3,1,""],set_bin_dir:[178,3,1,""]},"nltk.sem.boxer.BoxerCard":{__init__:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerDrs":{__init__:[178,3,1,""],atoms:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerDrsParser":{__init__:[178,3,1,""],attempt_adjuncts:[178,3,1,""],get_all_symbols:[178,3,1,""],get_next_token_variable:[178,3,1,""],handle:[178,3,1,""],nullableIntToken:[178,3,1,""]},"nltk.sem.boxer.BoxerEq":{__init__:[178,3,1,""],atoms:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerIndexed":{__init__:[178,3,1,""],atoms:[178,3,1,""]},"nltk.sem.boxer.BoxerNamed":{__init__:[178,3,1,""],change_var:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerNot":{__init__:[178,3,1,""],atoms:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerOr":{__init__:[178,3,1,""],atoms:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerOutputDrsParser":{__init__:[178,3,1,""],attempt_adjuncts:[178,3,1,""],get_all_symbols:[178,3,1,""],handle:[178,3,1,""],handle_condition:[178,3,1,""],handle_drs:[178,3,1,""],parse:[178,3,1,""],parse_condition:[178,3,1,""],parse_drs:[178,3,1,""],parse_index:[178,3,1,""],parse_variable:[178,3,1,""]},"nltk.sem.boxer.BoxerPred":{__init__:[178,3,1,""],change_var:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerProp":{__init__:[178,3,1,""],atoms:[178,3,1,""],clean:[178,3,1,""],referenced_labels:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerRel":{__init__:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.BoxerWhq":{__init__:[178,3,1,""],atoms:[178,3,1,""],clean:[178,3,1,""],renumber_sentences:[178,3,1,""]},"nltk.sem.boxer.NltkDrtBoxerDrsInterpreter":{__init__:[178,3,1,""],interpret:[178,3,1,""]},"nltk.sem.boxer.PassthroughBoxerDrsInterpreter":{interpret:[178,3,1,""]},"nltk.sem.chat80":{Concept:[179,2,1,""],binary_concept:[179,1,1,""],cities2table:[179,1,1,""],clause2concepts:[179,1,1,""],concepts:[179,1,1,""],label_indivs:[179,1,1,""],main:[179,1,1,""],make_lex:[179,1,1,""],make_valuation:[179,1,1,""],process_bundle:[179,1,1,""],sql_demo:[179,1,1,""],sql_query:[179,1,1,""],unary_concept:[179,1,1,""],val_dump:[179,1,1,""],val_load:[179,1,1,""]},"nltk.sem.chat80.Concept":{__init__:[179,3,1,""],augment:[179,3,1,""],close:[179,3,1,""]},"nltk.sem.cooper_storage":{CooperStore:[180,2,1,""],demo:[180,1,1,""],parse_with_bindops:[180,1,1,""]},"nltk.sem.cooper_storage.CooperStore":{__init__:[180,3,1,""],s_retrieve:[180,3,1,""]},"nltk.sem.drt":{AnaphoraResolutionException:[181,6,1,""],DRS:[181,2,1,""],DrsDrawer:[181,2,1,""],DrtAbstractVariableExpression:[181,2,1,""],DrtApplicationExpression:[181,2,1,""],DrtBinaryExpression:[181,2,1,""],DrtBooleanExpression:[181,2,1,""],DrtConcatenation:[181,2,1,""],DrtConstantExpression:[181,2,1,""],DrtEqualityExpression:[181,2,1,""],DrtEventVariableExpression:[181,2,1,""],DrtExpression:[181,2,1,""],DrtFunctionVariableExpression:[181,2,1,""],DrtIndividualVariableExpression:[181,2,1,""],DrtLambdaExpression:[181,2,1,""],DrtNegatedExpression:[181,2,1,""],DrtOrExpression:[181,2,1,""],DrtParser:[181,2,1,""],DrtProposition:[181,2,1,""],DrtTokens:[181,2,1,""],DrtVariableExpression:[181,1,1,""],PossibleAntecedents:[181,2,1,""],demo:[181,1,1,""],resolve_anaphora:[181,1,1,""],test_draw:[181,1,1,""]},"nltk.sem.drt.DRS":{__init__:[181,3,1,""],eliminate_equality:[181,3,1,""],fol:[181,3,1,""],free:[181,3,1,""],get_refs:[181,3,1,""],replace:[181,3,1,""],visit:[181,3,1,""],visit_structured:[181,3,1,""]},"nltk.sem.drt.DrsDrawer":{BUFFER:[181,4,1,""],OUTERSPACE:[181,4,1,""],TOPSPACE:[181,4,1,""],__init__:[181,3,1,""],draw:[181,3,1,""]},"nltk.sem.drt.DrtAbstractVariableExpression":{eliminate_equality:[181,3,1,""],fol:[181,3,1,""],get_refs:[181,3,1,""]},"nltk.sem.drt.DrtApplicationExpression":{fol:[181,3,1,""],get_refs:[181,3,1,""]},"nltk.sem.drt.DrtBinaryExpression":{get_refs:[181,3,1,""]},"nltk.sem.drt.DrtConcatenation":{__init__:[181,3,1,""],eliminate_equality:[181,3,1,""],fol:[181,3,1,""],getOp:[181,3,1,""],get_refs:[181,3,1,""],replace:[181,3,1,""],simplify:[181,3,1,""],visit:[181,3,1,""]},"nltk.sem.drt.DrtEqualityExpression":{fol:[181,3,1,""]},"nltk.sem.drt.DrtExpression":{applyto:[181,3,1,""],draw:[181,3,1,""],eliminate_equality:[181,3,1,""],equiv:[181,3,1,""],fromstring:[181,3,1,""],get_refs:[181,3,1,""],is_pronoun_function:[181,3,1,""],make_EqualityExpression:[181,3,1,""],make_VariableExpression:[181,3,1,""],pretty_format:[181,3,1,""],pretty_print:[181,3,1,""],resolve_anaphora:[181,3,1,""],type:[181,5,1,""],typecheck:[181,3,1,""]},"nltk.sem.drt.DrtLambdaExpression":{alpha_convert:[181,3,1,""],fol:[181,3,1,""],get_refs:[181,3,1,""]},"nltk.sem.drt.DrtNegatedExpression":{fol:[181,3,1,""],get_refs:[181,3,1,""]},"nltk.sem.drt.DrtOrExpression":{fol:[181,3,1,""]},"nltk.sem.drt.DrtParser":{__init__:[181,3,1,""],get_BooleanExpression_factory:[181,3,1,""],get_all_symbols:[181,3,1,""],handle:[181,3,1,""],handle_DRS:[181,3,1,""],handle_conds:[181,3,1,""],handle_prop:[181,3,1,""],handle_refs:[181,3,1,""],isvariable:[181,3,1,""],make_ApplicationExpression:[181,3,1,""],make_BooleanExpression:[181,3,1,""],make_EqualityExpression:[181,3,1,""],make_LambdaExpression:[181,3,1,""],make_NegatedExpression:[181,3,1,""],make_VariableExpression:[181,3,1,""]},"nltk.sem.drt.DrtProposition":{__init__:[181,3,1,""],eliminate_equality:[181,3,1,""],fol:[181,3,1,""],get_refs:[181,3,1,""],replace:[181,3,1,""],visit:[181,3,1,""],visit_structured:[181,3,1,""]},"nltk.sem.drt.DrtTokens":{CLOSE_BRACKET:[181,4,1,""],COLON:[181,4,1,""],DRS:[181,4,1,""],DRS_CONC:[181,4,1,""],OPEN_BRACKET:[181,4,1,""],PRONOUN:[181,4,1,""],PUNCT:[181,4,1,""],SYMBOLS:[181,4,1,""],TOKENS:[181,4,1,""]},"nltk.sem.drt.PossibleAntecedents":{free:[181,3,1,""],replace:[181,3,1,""]},"nltk.sem.drt_glue_demo":{DrsWidget:[182,2,1,""],DrtGlueDemo:[182,2,1,""],demo:[182,1,1,""]},"nltk.sem.drt_glue_demo.DrsWidget":{__init__:[182,3,1,""],clear:[182,3,1,""],draw:[182,3,1,""]},"nltk.sem.drt_glue_demo.DrtGlueDemo":{__init__:[182,3,1,""],about:[182,3,1,""],destroy:[182,3,1,""],mainloop:[182,3,1,""],next:[182,3,1,""],postscript:[182,3,1,""],prev:[182,3,1,""],resize:[182,3,1,""]},"nltk.sem.evaluate":{Assignment:[183,2,1,""],Error:[183,6,1,""],Model:[183,2,1,""],Undefined:[183,6,1,""],Valuation:[183,2,1,""],arity:[183,1,1,""],demo:[183,1,1,""],foldemo:[183,1,1,""],folmodel:[183,1,1,""],is_rel:[183,1,1,""],propdemo:[183,1,1,""],read_valuation:[183,1,1,""],satdemo:[183,1,1,""],set2rel:[183,1,1,""],trace:[183,1,1,""]},"nltk.sem.evaluate.Assignment":{__init__:[183,3,1,""],add:[183,3,1,""],copy:[183,3,1,""],purge:[183,3,1,""]},"nltk.sem.evaluate.Model":{__init__:[183,3,1,""],evaluate:[183,3,1,""],i:[183,3,1,""],satisfiers:[183,3,1,""],satisfy:[183,3,1,""]},"nltk.sem.evaluate.Valuation":{__init__:[183,3,1,""],domain:[183,5,1,""],fromstring:[183,3,1,""],symbols:[183,5,1,""]},"nltk.sem.glue":{DrtGlue:[184,2,1,""],DrtGlueDict:[184,2,1,""],DrtGlueFormula:[184,2,1,""],Glue:[184,2,1,""],GlueDict:[184,2,1,""],GlueFormula:[184,2,1,""],demo:[184,1,1,""]},"nltk.sem.glue.DrtGlue":{__init__:[184,3,1,""],get_glue_dict:[184,3,1,""]},"nltk.sem.glue.DrtGlueDict":{get_GlueFormula_factory:[184,3,1,""]},"nltk.sem.glue.DrtGlueFormula":{__init__:[184,3,1,""],make_LambdaExpression:[184,3,1,""],make_VariableExpression:[184,3,1,""]},"nltk.sem.glue.Glue":{__init__:[184,3,1,""],dep_parse:[184,3,1,""],depgraph_to_glue:[184,3,1,""],get_glue_dict:[184,3,1,""],get_pos_tagger:[184,3,1,""],get_readings:[184,3,1,""],gfl_to_compiled:[184,3,1,""],parse_to_compiled:[184,3,1,""],parse_to_meaning:[184,3,1,""],train_depparser:[184,3,1,""]},"nltk.sem.glue.GlueDict":{__init__:[184,3,1,""],add_missing_dependencies:[184,3,1,""],find_label_name:[184,3,1,""],get_GlueFormula_factory:[184,3,1,""],get_glueformulas_from_semtype_entry:[184,3,1,""],get_label:[184,3,1,""],get_meaning_formula:[184,3,1,""],get_semtypes:[184,3,1,""],initialize_labels:[184,3,1,""],lookup:[184,3,1,""],lookup_unique:[184,3,1,""],read_file:[184,3,1,""],to_glueformula_list:[184,3,1,""]},"nltk.sem.glue.GlueFormula":{__init__:[184,3,1,""],applyto:[184,3,1,""],compile:[184,3,1,""],lambda_abstract:[184,3,1,""],make_LambdaExpression:[184,3,1,""],make_VariableExpression:[184,3,1,""],simplify:[184,3,1,""]},"nltk.sem.hole":{Constants:[185,2,1,""],Constraint:[185,2,1,""],HoleSemantics:[185,2,1,""],hole_readings:[185,1,1,""]},"nltk.sem.hole.Constants":{ALL:[185,4,1,""],AND:[185,4,1,""],EXISTS:[185,4,1,""],HOLE:[185,4,1,""],IFF:[185,4,1,""],IMP:[185,4,1,""],LABEL:[185,4,1,""],LEQ:[185,4,1,""],MAP:[185,4,1,""],NOT:[185,4,1,""],OR:[185,4,1,""],PRED:[185,4,1,""]},"nltk.sem.hole.Constraint":{__init__:[185,3,1,""]},"nltk.sem.hole.HoleSemantics":{__init__:[185,3,1,""],formula_tree:[185,3,1,""],is_node:[185,3,1,""],pluggings:[185,3,1,""]},"nltk.sem.lfg":{FStructure:[186,2,1,""],demo_read_depgraph:[186,1,1,""]},"nltk.sem.lfg.FStructure":{pretty_format:[186,3,1,""],read_depgraph:[186,3,1,""],safeappend:[186,3,1,""],to_depgraph:[186,3,1,""],to_glueformula_list:[186,3,1,""]},"nltk.sem.linearlogic":{ApplicationExpression:[187,2,1,""],AtomicExpression:[187,2,1,""],BindingDict:[187,2,1,""],ConstantExpression:[187,2,1,""],Expression:[187,2,1,""],ImpExpression:[187,2,1,""],LinearLogicApplicationException:[187,6,1,""],LinearLogicParser:[187,2,1,""],Tokens:[187,2,1,""],UnificationException:[187,6,1,""],VariableBindingException:[187,6,1,""],VariableExpression:[187,2,1,""],demo:[187,1,1,""]},"nltk.sem.linearlogic.ApplicationExpression":{__init__:[187,3,1,""],simplify:[187,3,1,""]},"nltk.sem.linearlogic.AtomicExpression":{__init__:[187,3,1,""],compile_neg:[187,3,1,""],compile_pos:[187,3,1,""],initialize_labels:[187,3,1,""],simplify:[187,3,1,""]},"nltk.sem.linearlogic.BindingDict":{__init__:[187,3,1,""]},"nltk.sem.linearlogic.ConstantExpression":{unify:[187,3,1,""]},"nltk.sem.linearlogic.Expression":{applyto:[187,3,1,""],fromstring:[187,3,1,""]},"nltk.sem.linearlogic.ImpExpression":{__init__:[187,3,1,""],compile_neg:[187,3,1,""],compile_pos:[187,3,1,""],initialize_labels:[187,3,1,""],simplify:[187,3,1,""],unify:[187,3,1,""]},"nltk.sem.linearlogic.LinearLogicParser":{__init__:[187,3,1,""],attempt_ApplicationExpression:[187,3,1,""],get_BooleanExpression_factory:[187,3,1,""],get_all_symbols:[187,3,1,""],handle:[187,3,1,""],make_BooleanExpression:[187,3,1,""],make_VariableExpression:[187,3,1,""]},"nltk.sem.linearlogic.Tokens":{CLOSE:[187,4,1,""],IMP:[187,4,1,""],OPEN:[187,4,1,""],PUNCT:[187,4,1,""],TOKENS:[187,4,1,""]},"nltk.sem.linearlogic.UnificationException":{__init__:[187,3,1,""]},"nltk.sem.linearlogic.VariableExpression":{unify:[187,3,1,""]},"nltk.sem.logic":{AbstractVariableExpression:[188,2,1,""],AllExpression:[188,2,1,""],AndExpression:[188,2,1,""],AnyType:[188,2,1,""],ApplicationExpression:[188,2,1,""],BasicType:[188,2,1,""],BinaryExpression:[188,2,1,""],BooleanExpression:[188,2,1,""],ComplexType:[188,2,1,""],ConstantExpression:[188,2,1,""],EntityType:[188,2,1,""],EqualityExpression:[188,2,1,""],EventType:[188,2,1,""],EventVariableExpression:[188,2,1,""],ExistsExpression:[188,2,1,""],ExpectedMoreTokensException:[188,6,1,""],Expression:[188,2,1,""],FunctionVariableExpression:[188,2,1,""],IffExpression:[188,2,1,""],IllegalTypeException:[188,6,1,""],ImpExpression:[188,2,1,""],InconsistentTypeHierarchyException:[188,6,1,""],IndividualVariableExpression:[188,2,1,""],LambdaExpression:[188,2,1,""],LogicParser:[188,2,1,""],LogicalExpressionException:[188,6,1,""],NegatedExpression:[188,2,1,""],OrExpression:[188,2,1,""],QuantifiedExpression:[188,2,1,""],SubstituteBindingsI:[188,2,1,""],Tokens:[188,2,1,""],TruthValueType:[188,2,1,""],Type:[188,2,1,""],TypeException:[188,6,1,""],TypeResolutionException:[188,6,1,""],UnexpectedTokenException:[188,6,1,""],Variable:[188,2,1,""],VariableBinderExpression:[188,2,1,""],VariableExpression:[188,1,1,""],binding_ops:[188,1,1,""],boolean_ops:[188,1,1,""],demo:[188,1,1,""],demoException:[188,1,1,""],demo_errors:[188,1,1,""],equality_preds:[188,1,1,""],is_eventvar:[188,1,1,""],is_funcvar:[188,1,1,""],is_indvar:[188,1,1,""],printtype:[188,1,1,""],read_logic:[188,1,1,""],read_type:[188,1,1,""],skolem_function:[188,1,1,""],typecheck:[188,1,1,""],unique_variable:[188,1,1,""]},"nltk.sem.logic.AbstractVariableExpression":{__init__:[188,3,1,""],findtype:[188,3,1,""],predicates:[188,3,1,""],replace:[188,3,1,""],simplify:[188,3,1,""]},"nltk.sem.logic.AllExpression":{getQuantifier:[188,3,1,""]},"nltk.sem.logic.AndExpression":{getOp:[188,3,1,""]},"nltk.sem.logic.AnyType":{__init__:[188,3,1,""],first:[188,5,1,""],matches:[188,3,1,""],resolve:[188,3,1,""],second:[188,5,1,""],str:[188,3,1,""]},"nltk.sem.logic.ApplicationExpression":{__init__:[188,3,1,""],args:[188,5,1,""],constants:[188,3,1,""],findtype:[188,3,1,""],is_atom:[188,3,1,""],pred:[188,5,1,""],predicates:[188,3,1,""],simplify:[188,3,1,""],type:[188,5,1,""],uncurry:[188,3,1,""],visit:[188,3,1,""]},"nltk.sem.logic.BasicType":{matches:[188,3,1,""],resolve:[188,3,1,""]},"nltk.sem.logic.BinaryExpression":{__init__:[188,3,1,""],findtype:[188,3,1,""],type:[188,5,1,""],visit:[188,3,1,""]},"nltk.sem.logic.ComplexType":{__init__:[188,3,1,""],matches:[188,3,1,""],resolve:[188,3,1,""],str:[188,3,1,""]},"nltk.sem.logic.ConstantExpression":{constants:[188,3,1,""],free:[188,3,1,""],type:[188,4,1,""]},"nltk.sem.logic.EntityType":{str:[188,3,1,""]},"nltk.sem.logic.EqualityExpression":{getOp:[188,3,1,""]},"nltk.sem.logic.EventType":{str:[188,3,1,""]},"nltk.sem.logic.EventVariableExpression":{type:[188,4,1,""]},"nltk.sem.logic.ExistsExpression":{getQuantifier:[188,3,1,""]},"nltk.sem.logic.ExpectedMoreTokensException":{__init__:[188,3,1,""]},"nltk.sem.logic.Expression":{applyto:[188,3,1,""],constants:[188,3,1,""],equiv:[188,3,1,""],findtype:[188,3,1,""],free:[188,3,1,""],fromstring:[188,3,1,""],make_VariableExpression:[188,3,1,""],negate:[188,3,1,""],normalize:[188,3,1,""],predicates:[188,3,1,""],replace:[188,3,1,""],simplify:[188,3,1,""],substitute_bindings:[188,3,1,""],typecheck:[188,3,1,""],variables:[188,3,1,""],visit:[188,3,1,""],visit_structured:[188,3,1,""]},"nltk.sem.logic.FunctionVariableExpression":{constants:[188,3,1,""],free:[188,3,1,""],type:[188,4,1,""]},"nltk.sem.logic.IffExpression":{getOp:[188,3,1,""]},"nltk.sem.logic.IllegalTypeException":{__init__:[188,3,1,""]},"nltk.sem.logic.ImpExpression":{getOp:[188,3,1,""]},"nltk.sem.logic.InconsistentTypeHierarchyException":{__init__:[188,3,1,""]},"nltk.sem.logic.IndividualVariableExpression":{constants:[188,3,1,""],free:[188,3,1,""],type:[188,5,1,""]},"nltk.sem.logic.LambdaExpression":{type:[188,5,1,""]},"nltk.sem.logic.LogicParser":{__init__:[188,3,1,""],assertNextToken:[188,3,1,""],assertToken:[188,3,1,""],attempt_ApplicationExpression:[188,3,1,""],attempt_BooleanExpression:[188,3,1,""],attempt_EqualityExpression:[188,3,1,""],attempt_adjuncts:[188,3,1,""],get_BooleanExpression_factory:[188,3,1,""],get_QuantifiedExpression_factory:[188,3,1,""],get_all_symbols:[188,3,1,""],get_next_token_variable:[188,3,1,""],handle:[188,3,1,""],handle_lambda:[188,3,1,""],handle_negation:[188,3,1,""],handle_open:[188,3,1,""],handle_quant:[188,3,1,""],handle_variable:[188,3,1,""],has_priority:[188,3,1,""],inRange:[188,3,1,""],isvariable:[188,3,1,""],make_ApplicationExpression:[188,3,1,""],make_BooleanExpression:[188,3,1,""],make_EqualityExpression:[188,3,1,""],make_LambdaExpression:[188,3,1,""],make_NegatedExpression:[188,3,1,""],make_QuanifiedExpression:[188,3,1,""],make_VariableExpression:[188,3,1,""],parse:[188,3,1,""],process:[188,3,1,""],process_next_expression:[188,3,1,""],process_quoted_token:[188,3,1,""],token:[188,3,1,""],type_check:[188,4,1,""]},"nltk.sem.logic.LogicalExpressionException":{__init__:[188,3,1,""]},"nltk.sem.logic.NegatedExpression":{__init__:[188,3,1,""],findtype:[188,3,1,""],negate:[188,3,1,""],type:[188,5,1,""],visit:[188,3,1,""]},"nltk.sem.logic.OrExpression":{getOp:[188,3,1,""]},"nltk.sem.logic.QuantifiedExpression":{type:[188,5,1,""]},"nltk.sem.logic.SubstituteBindingsI":{substitute_bindings:[188,3,1,""],variables:[188,3,1,""]},"nltk.sem.logic.Tokens":{ALL:[188,4,1,""],ALL_LIST:[188,4,1,""],AND:[188,4,1,""],AND_LIST:[188,4,1,""],BINOPS:[188,4,1,""],CLOSE:[188,4,1,""],COMMA:[188,4,1,""],DOT:[188,4,1,""],EQ:[188,4,1,""],EQ_LIST:[188,4,1,""],EXISTS:[188,4,1,""],EXISTS_LIST:[188,4,1,""],IFF:[188,4,1,""],IFF_LIST:[188,4,1,""],IMP:[188,4,1,""],IMP_LIST:[188,4,1,""],LAMBDA:[188,4,1,""],LAMBDA_LIST:[188,4,1,""],NEQ:[188,4,1,""],NEQ_LIST:[188,4,1,""],NOT:[188,4,1,""],NOT_LIST:[188,4,1,""],OPEN:[188,4,1,""],OR:[188,4,1,""],OR_LIST:[188,4,1,""],PUNCT:[188,4,1,""],QUANTS:[188,4,1,""],SYMBOLS:[188,4,1,""],TOKENS:[188,4,1,""]},"nltk.sem.logic.TruthValueType":{str:[188,3,1,""]},"nltk.sem.logic.Type":{fromstring:[188,3,1,""]},"nltk.sem.logic.TypeException":{__init__:[188,3,1,""]},"nltk.sem.logic.TypeResolutionException":{__init__:[188,3,1,""]},"nltk.sem.logic.UnexpectedTokenException":{__init__:[188,3,1,""]},"nltk.sem.logic.Variable":{__init__:[188,3,1,""],substitute_bindings:[188,3,1,""]},"nltk.sem.logic.VariableBinderExpression":{__init__:[188,3,1,""],alpha_convert:[188,3,1,""],findtype:[188,3,1,""],free:[188,3,1,""],replace:[188,3,1,""],visit:[188,3,1,""],visit_structured:[188,3,1,""]},"nltk.sem.relextract":{class_abbrev:[189,1,1,""],clause:[189,1,1,""],conllesp:[189,1,1,""],conllned:[189,1,1,""],descape_entity:[189,1,1,""],extract_rels:[189,1,1,""],ieer_headlines:[189,1,1,""],in_demo:[189,1,1,""],list2sym:[189,1,1,""],ne_chunked:[189,1,1,""],roles_demo:[189,1,1,""],rtuple:[189,1,1,""],semi_rel2reldict:[189,1,1,""],tree2semi_rel:[189,1,1,""]},"nltk.sem.skolemize":{skolemize:[190,1,1,""],to_cnf:[190,1,1,""]},"nltk.sem.util":{demo:[191,1,1,""],demo_legacy_grammar:[191,1,1,""],demo_model0:[191,1,1,""],evaluate_sents:[191,1,1,""],interpret_sents:[191,1,1,""],parse_sents:[191,1,1,""],read_sents:[191,1,1,""],root_semrep:[191,1,1,""]},"nltk.sentiment":{sentiment_analyzer:[193,0,0,"-"],util:[194,0,0,"-"],vader:[195,0,0,"-"]},"nltk.sentiment.sentiment_analyzer":{SentimentAnalyzer:[193,2,1,""]},"nltk.sentiment.sentiment_analyzer.SentimentAnalyzer":{__init__:[193,3,1,""],add_feat_extractor:[193,3,1,""],all_words:[193,3,1,""],apply_features:[193,3,1,""],bigram_collocation_feats:[193,3,1,""],classify:[193,3,1,""],evaluate:[193,3,1,""],extract_features:[193,3,1,""],save_file:[193,3,1,""],train:[193,3,1,""],unigram_word_feats:[193,3,1,""]},"nltk.sentiment.util":{demo_liu_hu_lexicon:[194,1,1,""],demo_movie_reviews:[194,1,1,""],demo_sent_subjectivity:[194,1,1,""],demo_subjectivity:[194,1,1,""],demo_tweets:[194,1,1,""],demo_vader_instance:[194,1,1,""],demo_vader_tweets:[194,1,1,""],extract_bigram_feats:[194,1,1,""],extract_unigram_feats:[194,1,1,""],json2csv_preprocess:[194,1,1,""],mark_negation:[194,1,1,""],output_markdown:[194,1,1,""],parse_tweets_set:[194,1,1,""],split_train_test:[194,1,1,""],timer:[194,1,1,""]},"nltk.sentiment.vader":{SentiText:[195,2,1,""],SentimentIntensityAnalyzer:[195,2,1,""],VaderConstants:[195,2,1,""]},"nltk.sentiment.vader.SentiText":{__init__:[195,3,1,""],allcap_differential:[195,3,1,""]},"nltk.sentiment.vader.SentimentIntensityAnalyzer":{__init__:[195,3,1,""],make_lex_dict:[195,3,1,""],polarity_scores:[195,3,1,""],score_valence:[195,3,1,""],sentiment_valence:[195,3,1,""]},"nltk.sentiment.vader.VaderConstants":{BOOSTER_DICT:[195,4,1,""],B_DECR:[195,4,1,""],B_INCR:[195,4,1,""],C_INCR:[195,4,1,""],NEGATE:[195,4,1,""],N_SCALAR:[195,4,1,""],PUNC_LIST:[195,4,1,""],REGEX_REMOVE_PUNCTUATION:[195,4,1,""],SPECIAL_CASE_IDIOMS:[195,4,1,""],__init__:[195,3,1,""],negated:[195,3,1,""],normalize:[195,3,1,""],scalar_inc_dec:[195,3,1,""]},"nltk.stem":{api:[197,0,0,"-"],arlstem2:[199,0,0,"-"],arlstem:[198,0,0,"-"],cistem:[200,0,0,"-"],isri:[201,0,0,"-"],lancaster:[202,0,0,"-"],porter:[203,0,0,"-"],regexp:[204,0,0,"-"],rslp:[205,0,0,"-"],snowball:[206,0,0,"-"],util:[207,0,0,"-"],wordnet:[208,0,0,"-"]},"nltk.stem.api":{StemmerI:[197,2,1,""]},"nltk.stem.api.StemmerI":{stem:[197,3,1,""]},"nltk.stem.arlstem":{ARLSTem:[198,2,1,""]},"nltk.stem.arlstem.ARLSTem":{__init__:[198,3,1,""],fem2masc:[198,3,1,""],norm:[198,3,1,""],plur2sing:[198,3,1,""],pref:[198,3,1,""],stem:[198,3,1,""],suff:[198,3,1,""],verb:[198,3,1,""],verb_t1:[198,3,1,""],verb_t2:[198,3,1,""],verb_t3:[198,3,1,""],verb_t4:[198,3,1,""],verb_t5:[198,3,1,""],verb_t6:[198,3,1,""]},"nltk.stem.arlstem2":{ARLSTem2:[199,2,1,""]},"nltk.stem.arlstem2.ARLSTem2":{__init__:[199,3,1,""],adjective:[199,3,1,""],fem2masc:[199,3,1,""],norm:[199,3,1,""],plur2sing:[199,3,1,""],pref:[199,3,1,""],stem1:[199,3,1,""],stem:[199,3,1,""],suff:[199,3,1,""],verb:[199,3,1,""],verb_t1:[199,3,1,""],verb_t2:[199,3,1,""],verb_t3:[199,3,1,""],verb_t4:[199,3,1,""],verb_t5:[199,3,1,""],verb_t6:[199,3,1,""]},"nltk.stem.cistem":{Cistem:[200,2,1,""]},"nltk.stem.cistem.Cistem":{__init__:[200,3,1,""],repl_xx:[200,4,1,""],repl_xx_back:[200,4,1,""],replace_back:[200,3,1,""],replace_to:[200,3,1,""],segment:[200,3,1,""],stem:[200,3,1,""],strip_emr:[200,4,1,""],strip_esn:[200,4,1,""],strip_ge:[200,4,1,""],strip_nd:[200,4,1,""],strip_t:[200,4,1,""]},"nltk.stem.isri":{ISRIStemmer:[201,2,1,""]},"nltk.stem.isri.ISRIStemmer":{__init__:[201,3,1,""],end_w5:[201,3,1,""],end_w6:[201,3,1,""],norm:[201,3,1,""],pre1:[201,3,1,""],pre32:[201,3,1,""],pro_w4:[201,3,1,""],pro_w53:[201,3,1,""],pro_w54:[201,3,1,""],pro_w64:[201,3,1,""],pro_w6:[201,3,1,""],stem:[201,3,1,""],suf1:[201,3,1,""],suf32:[201,3,1,""],waw:[201,3,1,""]},"nltk.stem.lancaster":{LancasterStemmer:[202,2,1,""]},"nltk.stem.lancaster.LancasterStemmer":{__init__:[202,3,1,""],default_rule_tuple:[202,4,1,""],parseRules:[202,3,1,""],stem:[202,3,1,""]},"nltk.stem.porter":{PorterStemmer:[203,2,1,""],demo:[203,1,1,""]},"nltk.stem.porter.PorterStemmer":{MARTIN_EXTENSIONS:[203,4,1,""],NLTK_EXTENSIONS:[203,4,1,""],ORIGINAL_ALGORITHM:[203,4,1,""],__init__:[203,3,1,""],stem:[203,3,1,""]},"nltk.stem.regexp":{RegexpStemmer:[204,2,1,""]},"nltk.stem.regexp.RegexpStemmer":{__init__:[204,3,1,""],stem:[204,3,1,""]},"nltk.stem.rslp":{RSLPStemmer:[205,2,1,""]},"nltk.stem.rslp.RSLPStemmer":{__init__:[205,3,1,""],apply_rule:[205,3,1,""],read_rule:[205,3,1,""],stem:[205,3,1,""]},"nltk.stem.snowball":{ArabicStemmer:[206,2,1,""],DanishStemmer:[206,2,1,""],DutchStemmer:[206,2,1,""],EnglishStemmer:[206,2,1,""],FinnishStemmer:[206,2,1,""],FrenchStemmer:[206,2,1,""],GermanStemmer:[206,2,1,""],HungarianStemmer:[206,2,1,""],ItalianStemmer:[206,2,1,""],NorwegianStemmer:[206,2,1,""],PorterStemmer:[206,2,1,""],PortugueseStemmer:[206,2,1,""],RomanianStemmer:[206,2,1,""],RussianStemmer:[206,2,1,""],SnowballStemmer:[206,2,1,""],SpanishStemmer:[206,2,1,""],SwedishStemmer:[206,2,1,""],demo:[206,1,1,""]},"nltk.stem.snowball.ArabicStemmer":{is_defined:[206,4,1,""],is_noun:[206,4,1,""],is_verb:[206,4,1,""],prefix_step2a_success:[206,4,1,""],prefix_step3a_noun_success:[206,4,1,""],prefix_step3b_noun_success:[206,4,1,""],stem:[206,3,1,""],suffix_noun_step1a_success:[206,4,1,""],suffix_noun_step2a_success:[206,4,1,""],suffix_noun_step2b_success:[206,4,1,""],suffix_noun_step2c2_success:[206,4,1,""],suffix_verb_step2a_success:[206,4,1,""],suffix_verb_step2b_success:[206,4,1,""],suffixe_noun_step1b_success:[206,4,1,""],suffixes_verb_step1_success:[206,4,1,""]},"nltk.stem.snowball.DanishStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.DutchStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.EnglishStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.FinnishStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.FrenchStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.GermanStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.HungarianStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.ItalianStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.NorwegianStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.PorterStemmer":{__init__:[206,3,1,""]},"nltk.stem.snowball.PortugueseStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.RomanianStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.RussianStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.SnowballStemmer":{__init__:[206,3,1,""],languages:[206,4,1,""],stem:[206,3,1,""]},"nltk.stem.snowball.SpanishStemmer":{stem:[206,3,1,""]},"nltk.stem.snowball.SwedishStemmer":{stem:[206,3,1,""]},"nltk.stem.util":{prefix_replace:[207,1,1,""],suffix_replace:[207,1,1,""]},"nltk.stem.wordnet":{WordNetLemmatizer:[208,2,1,""]},"nltk.stem.wordnet.WordNetLemmatizer":{lemmatize:[208,3,1,""]},"nltk.tag":{api:[210,0,0,"-"],brill:[211,0,0,"-"],brill_trainer:[212,0,0,"-"],crf:[213,0,0,"-"],hmm:[214,0,0,"-"],hunpos:[215,0,0,"-"],mapping:[216,0,0,"-"],perceptron:[217,0,0,"-"],pos_tag:[209,1,1,""],pos_tag_sents:[209,1,1,""],senna:[218,0,0,"-"],sequential:[219,0,0,"-"],stanford:[220,0,0,"-"],tnt:[221,0,0,"-"],util:[222,0,0,"-"]},"nltk.tag.api":{FeaturesetTaggerI:[210,2,1,""],TaggerI:[210,2,1,""]},"nltk.tag.api.TaggerI":{evaluate:[210,3,1,""],tag:[210,3,1,""],tag_sents:[210,3,1,""]},"nltk.tag.brill":{BrillTagger:[211,2,1,""],Pos:[211,2,1,""],Word:[211,2,1,""],brill24:[211,1,1,""],describe_template_sets:[211,1,1,""],fntbl37:[211,1,1,""],nltkdemo18:[211,1,1,""],nltkdemo18plus:[211,1,1,""]},"nltk.tag.brill.BrillTagger":{__init__:[211,3,1,""],batch_tag_incremental:[211,3,1,""],decode_json_obj:[211,3,1,""],encode_json_obj:[211,3,1,""],json_tag:[211,4,1,""],print_template_statistics:[211,3,1,""],rules:[211,3,1,""],tag:[211,3,1,""],train_stats:[211,3,1,""]},"nltk.tag.brill.Pos":{extract_property:[211,3,1,""],json_tag:[211,4,1,""]},"nltk.tag.brill.Word":{extract_property:[211,3,1,""],json_tag:[211,4,1,""]},"nltk.tag.brill_trainer":{BrillTaggerTrainer:[212,2,1,""]},"nltk.tag.brill_trainer.BrillTaggerTrainer":{__init__:[212,3,1,""],train:[212,3,1,""]},"nltk.tag.crf":{CRFTagger:[213,2,1,""]},"nltk.tag.crf.CRFTagger":{__init__:[213,3,1,""],set_model_file:[213,3,1,""],tag:[213,3,1,""],tag_sents:[213,3,1,""],train:[213,3,1,""]},"nltk.tag.hmm":{HiddenMarkovModelTagger:[214,2,1,""],HiddenMarkovModelTrainer:[214,2,1,""],demo:[214,1,1,""],demo_bw:[214,1,1,""],demo_pos:[214,1,1,""],demo_pos_bw:[214,1,1,""],load_pos:[214,1,1,""],logsumexp2:[214,1,1,""]},"nltk.tag.hmm.HiddenMarkovModelTagger":{__init__:[214,3,1,""],best_path:[214,3,1,""],best_path_simple:[214,3,1,""],entropy:[214,3,1,""],log_probability:[214,3,1,""],point_entropy:[214,3,1,""],probability:[214,3,1,""],random_sample:[214,3,1,""],reset_cache:[214,3,1,""],tag:[214,3,1,""],test:[214,3,1,""],train:[214,3,1,""]},"nltk.tag.hmm.HiddenMarkovModelTrainer":{__init__:[214,3,1,""],train:[214,3,1,""],train_supervised:[214,3,1,""],train_unsupervised:[214,3,1,""]},"nltk.tag.hunpos":{HunposTagger:[215,2,1,""]},"nltk.tag.hunpos.HunposTagger":{__init__:[215,3,1,""],close:[215,3,1,""],tag:[215,3,1,""]},"nltk.tag.mapping":{map_tag:[216,1,1,""],tagset_mapping:[216,1,1,""]},"nltk.tag.perceptron":{AveragedPerceptron:[217,2,1,""],PerceptronTagger:[217,2,1,""]},"nltk.tag.perceptron.AveragedPerceptron":{__init__:[217,3,1,""],average_weights:[217,3,1,""],decode_json_obj:[217,3,1,""],encode_json_obj:[217,3,1,""],json_tag:[217,4,1,""],load:[217,3,1,""],predict:[217,3,1,""],save:[217,3,1,""],update:[217,3,1,""]},"nltk.tag.perceptron.PerceptronTagger":{END:[217,4,1,""],START:[217,4,1,""],__init__:[217,3,1,""],decode_json_obj:[217,3,1,""],encode_json_obj:[217,3,1,""],json_tag:[217,4,1,""],load:[217,3,1,""],normalize:[217,3,1,""],tag:[217,3,1,""],train:[217,3,1,""]},"nltk.tag.senna":{SennaChunkTagger:[218,2,1,""],SennaNERTagger:[218,2,1,""],SennaTagger:[218,2,1,""]},"nltk.tag.senna.SennaChunkTagger":{__init__:[218,3,1,""],bio_to_chunks:[218,3,1,""],tag_sents:[218,3,1,""]},"nltk.tag.senna.SennaNERTagger":{__init__:[218,3,1,""],tag_sents:[218,3,1,""]},"nltk.tag.senna.SennaTagger":{__init__:[218,3,1,""],tag_sents:[218,3,1,""]},"nltk.tag.sequential":{AffixTagger:[219,2,1,""],BigramTagger:[219,2,1,""],ClassifierBasedPOSTagger:[219,2,1,""],ClassifierBasedTagger:[219,2,1,""],ContextTagger:[219,2,1,""],DefaultTagger:[219,2,1,""],NgramTagger:[219,2,1,""],RegexpTagger:[219,2,1,""],SequentialBackoffTagger:[219,2,1,""],TrigramTagger:[219,2,1,""],UnigramTagger:[219,2,1,""]},"nltk.tag.sequential.AffixTagger":{__init__:[219,3,1,""],context:[219,3,1,""],decode_json_obj:[219,3,1,""],encode_json_obj:[219,3,1,""],json_tag:[219,4,1,""]},"nltk.tag.sequential.BigramTagger":{__init__:[219,3,1,""],json_tag:[219,4,1,""]},"nltk.tag.sequential.ClassifierBasedPOSTagger":{feature_detector:[219,3,1,""]},"nltk.tag.sequential.ClassifierBasedTagger":{__init__:[219,3,1,""],choose_tag:[219,3,1,""],classifier:[219,3,1,""],feature_detector:[219,3,1,""]},"nltk.tag.sequential.ContextTagger":{__init__:[219,3,1,""],choose_tag:[219,3,1,""],context:[219,3,1,""],size:[219,3,1,""]},"nltk.tag.sequential.DefaultTagger":{__init__:[219,3,1,""],choose_tag:[219,3,1,""],decode_json_obj:[219,3,1,""],encode_json_obj:[219,3,1,""],json_tag:[219,4,1,""]},"nltk.tag.sequential.NgramTagger":{__init__:[219,3,1,""],context:[219,3,1,""],decode_json_obj:[219,3,1,""],encode_json_obj:[219,3,1,""],json_tag:[219,4,1,""]},"nltk.tag.sequential.RegexpTagger":{__init__:[219,3,1,""],choose_tag:[219,3,1,""],decode_json_obj:[219,3,1,""],encode_json_obj:[219,3,1,""],json_tag:[219,4,1,""]},"nltk.tag.sequential.SequentialBackoffTagger":{__init__:[219,3,1,""],backoff:[219,5,1,""],choose_tag:[219,3,1,""],tag:[219,3,1,""],tag_one:[219,3,1,""]},"nltk.tag.sequential.TrigramTagger":{__init__:[219,3,1,""],json_tag:[219,4,1,""]},"nltk.tag.sequential.UnigramTagger":{__init__:[219,3,1,""],context:[219,3,1,""],json_tag:[219,4,1,""]},"nltk.tag.stanford":{StanfordNERTagger:[220,2,1,""],StanfordPOSTagger:[220,2,1,""],StanfordTagger:[220,2,1,""]},"nltk.tag.stanford.StanfordNERTagger":{__init__:[220,3,1,""],parse_output:[220,3,1,""]},"nltk.tag.stanford.StanfordPOSTagger":{__init__:[220,3,1,""]},"nltk.tag.stanford.StanfordTagger":{__init__:[220,3,1,""],parse_output:[220,3,1,""],tag:[220,3,1,""],tag_sents:[220,3,1,""]},"nltk.tag.tnt":{TnT:[221,2,1,""],basic_sent_chop:[221,1,1,""],demo2:[221,1,1,""],demo3:[221,1,1,""],demo:[221,1,1,""]},"nltk.tag.tnt.TnT":{__init__:[221,3,1,""],tag:[221,3,1,""],tagdata:[221,3,1,""],train:[221,3,1,""]},"nltk.tag.util":{str2tuple:[222,1,1,""],tuple2str:[222,1,1,""],untag:[222,1,1,""]},"nltk.tbl":{demo:[224,0,0,"-"],erroranalysis:[225,0,0,"-"],feature:[226,0,0,"-"],rule:[227,0,0,"-"],template:[228,0,0,"-"]},"nltk.tbl.demo":{corpus_size:[224,1,1,""],demo:[224,1,1,""],demo_error_analysis:[224,1,1,""],demo_generated_templates:[224,1,1,""],demo_high_accuracy_rules:[224,1,1,""],demo_learning_curve:[224,1,1,""],demo_multifeature_template:[224,1,1,""],demo_multiposition_feature:[224,1,1,""],demo_repr_rule_format:[224,1,1,""],demo_serialize_tagger:[224,1,1,""],demo_str_rule_format:[224,1,1,""],demo_template_statistics:[224,1,1,""],demo_verbose_rule_format:[224,1,1,""],postag:[224,1,1,""]},"nltk.tbl.erroranalysis":{error_list:[225,1,1,""]},"nltk.tbl.feature":{Feature:[226,2,1,""]},"nltk.tbl.feature.Feature":{PROPERTY_NAME:[226,4,1,""],__init__:[226,3,1,""],decode_json_obj:[226,3,1,""],encode_json_obj:[226,3,1,""],expand:[226,3,1,""],extract_property:[226,3,1,""],intersects:[226,3,1,""],issuperset:[226,3,1,""],json_tag:[226,4,1,""]},"nltk.tbl.rule":{Rule:[227,2,1,""],TagRule:[227,2,1,""]},"nltk.tbl.rule.Rule":{__init__:[227,3,1,""],applies:[227,3,1,""],decode_json_obj:[227,3,1,""],encode_json_obj:[227,3,1,""],format:[227,3,1,""],json_tag:[227,4,1,""]},"nltk.tbl.rule.TagRule":{__init__:[227,3,1,""],applies:[227,3,1,""],apply:[227,3,1,""],original_tag:[227,4,1,""],replacement_tag:[227,4,1,""]},"nltk.tbl.template":{BrillTemplateI:[228,2,1,""],Template:[228,2,1,""]},"nltk.tbl.template.BrillTemplateI":{applicable_rules:[228,3,1,""],get_neighborhood:[228,3,1,""]},"nltk.tbl.template.Template":{ALLTEMPLATES:[228,4,1,""],__init__:[228,3,1,""],applicable_rules:[228,3,1,""],expand:[228,3,1,""],get_neighborhood:[228,3,1,""]},"nltk.test":{all:[230,0,0,"-"],childes_fixt:[231,0,0,"-"],classify_fixt:[232,0,0,"-"],discourse_fixt:[234,0,0,"-"],gensim_fixt:[235,0,0,"-"],gluesemantics_malt_fixt:[236,0,0,"-"],inference_fixt:[237,0,0,"-"],nonmonotonic_fixt:[238,0,0,"-"],portuguese_en_fixt:[239,0,0,"-"],probability_fixt:[240,0,0,"-"],unit:[241,0,0,"-"]},"nltk.test.all":{additional_tests:[230,1,1,""]},"nltk.test.childes_fixt":{setup_module:[231,1,1,""]},"nltk.test.classify_fixt":{setup_module:[232,1,1,""]},"nltk.test.discourse_fixt":{setup_module:[234,1,1,""]},"nltk.test.gensim_fixt":{setup_module:[235,1,1,""]},"nltk.test.gluesemantics_malt_fixt":{setup_module:[236,1,1,""]},"nltk.test.inference_fixt":{setup_module:[237,1,1,""]},"nltk.test.nonmonotonic_fixt":{setup_module:[238,1,1,""]},"nltk.test.portuguese_en_fixt":{setup_module:[239,1,1,""]},"nltk.test.probability_fixt":{setup_module:[240,1,1,""]},"nltk.test.unit":{lm:[242,0,0,"-"],test_aline:[247,0,0,"-"],test_brill:[248,0,0,"-"],test_cfg2chomsky:[250,0,0,"-"],test_chunk:[251,0,0,"-"],test_collocations:[253,0,0,"-"],test_concordance:[254,0,0,"-"],test_corpus_views:[257,0,0,"-"],test_disagreement:[259,0,0,"-"],test_freqdist:[261,0,0,"-"],test_json_serialization:[264,0,0,"-"],test_metrics:[265,0,0,"-"],test_naivebayes:[266,0,0,"-"],test_nombank:[267,0,0,"-"],test_pl196x:[268,0,0,"-"],test_pos_tag:[269,0,0,"-"],test_ribes:[270,0,0,"-"],test_senna:[273,0,0,"-"],test_stem:[274,0,0,"-"],test_tag:[275,0,0,"-"],test_tgrep:[276,0,0,"-"],test_wordnet:[280,0,0,"-"],translate:[281,0,0,"-"]},"nltk.test.unit.lm":{test_preprocessing:[245,0,0,"-"],test_vocabulary:[246,0,0,"-"]},"nltk.test.unit.lm.test_preprocessing":{TestPreprocessing:[245,2,1,""]},"nltk.test.unit.lm.test_preprocessing.TestPreprocessing":{test_padded_everygram_pipeline:[245,3,1,""]},"nltk.test.unit.lm.test_vocabulary":{NgramModelVocabularyTests:[246,2,1,""]},"nltk.test.unit.lm.test_vocabulary.NgramModelVocabularyTests":{setUpClass:[246,3,1,""],test_counts_set_correctly:[246,3,1,""],test_creation_with_counter:[246,3,1,""],test_cutoff_setter_checks_value:[246,3,1,""],test_cutoff_value_set_correctly:[246,3,1,""],test_eqality:[246,3,1,""],test_len_is_constant:[246,3,1,""],test_lookup:[246,3,1,""],test_lookup_None:[246,3,1,""],test_lookup_empty_iterables:[246,3,1,""],test_lookup_empty_str:[246,3,1,""],test_lookup_int:[246,3,1,""],test_lookup_iterables:[246,3,1,""],test_lookup_recursive:[246,3,1,""],test_membership_check_respects_cutoff:[246,3,1,""],test_str:[246,3,1,""],test_truthiness:[246,3,1,""],test_unable_to_change_cutoff:[246,3,1,""],test_update_empty_vocab:[246,3,1,""],test_vocab_iter_respects_cutoff:[246,3,1,""],test_vocab_len_respects_cutoff:[246,3,1,""]},"nltk.test.unit.test_aline":{test_aline:[247,1,1,""],test_aline_delta:[247,1,1,""]},"nltk.test.unit.test_brill":{TestBrill:[248,2,1,""]},"nltk.test.unit.test_brill.TestBrill":{test_brill_demo:[248,3,1,""],test_pos_template:[248,3,1,""]},"nltk.test.unit.test_cfg2chomsky":{ChomskyNormalFormForCFGTest:[250,2,1,""]},"nltk.test.unit.test_cfg2chomsky.ChomskyNormalFormForCFGTest":{test_complex:[250,3,1,""],test_simple:[250,3,1,""]},"nltk.test.unit.test_chunk":{TestChunkRule:[251,2,1,""]},"nltk.test.unit.test_chunk.TestChunkRule":{test_tag_pattern2re_pattern_quantifier:[251,3,1,""]},"nltk.test.unit.test_collocations":{close_enough:[253,1,1,""],test_bigram2:[253,1,1,""],test_bigram3:[253,1,1,""],test_bigram5:[253,1,1,""]},"nltk.test.unit.test_concordance":{TestConcordance:[254,2,1,""],stdout_redirect:[254,1,1,""]},"nltk.test.unit.test_concordance.TestConcordance":{setUp:[254,3,1,""],setUpClass:[254,3,1,""],tearDown:[254,3,1,""],tearDownClass:[254,3,1,""],test_concordance_lines:[254,3,1,""],test_concordance_list:[254,3,1,""],test_concordance_print:[254,3,1,""],test_concordance_width:[254,3,1,""]},"nltk.test.unit.test_corpus_views":{TestCorpusViews:[257,2,1,""]},"nltk.test.unit.test_corpus_views.TestCorpusViews":{data:[257,3,1,""],linetok:[257,4,1,""],names:[257,4,1,""],test_correct_length:[257,3,1,""],test_correct_values:[257,3,1,""]},"nltk.test.unit.test_disagreement":{TestDisagreement:[259,2,1,""]},"nltk.test.unit.test_disagreement.TestDisagreement":{test_advanced2:[259,3,1,""],test_advanced:[259,3,1,""],test_easy2:[259,3,1,""],test_easy:[259,3,1,""]},"nltk.test.unit.test_freqdist":{test_iterating_returns_an_iterator_ordered_by_frequency:[261,1,1,""]},"nltk.test.unit.test_json_serialization":{TestJSONSerialization:[264,2,1,""]},"nltk.test.unit.test_json_serialization.TestJSONSerialization":{setUp:[264,3,1,""],test_affix_tagger:[264,3,1,""],test_brill_tagger:[264,3,1,""],test_default_tagger:[264,3,1,""],test_ngram_taggers:[264,3,1,""],test_perceptron_tagger:[264,3,1,""],test_regexp_tagger:[264,3,1,""]},"nltk.test.unit.test_metrics":{TestLikelihoodRatio:[265,2,1,""]},"nltk.test.unit.test_metrics.TestLikelihoodRatio":{test_lr_bigram:[265,3,1,""],test_lr_quadgram:[265,3,1,""],test_lr_trigram:[265,3,1,""]},"nltk.test.unit.test_naivebayes":{NaiveBayesClassifierTest:[266,2,1,""]},"nltk.test.unit.test_naivebayes.NaiveBayesClassifierTest":{test_simple:[266,3,1,""]},"nltk.test.unit.test_nombank":{NombankDemo:[267,2,1,""]},"nltk.test.unit.test_nombank.NombankDemo":{test_framefiles_fileids:[267,3,1,""],test_instance:[267,3,1,""],test_numbers:[267,3,1,""]},"nltk.test.unit.test_pl196x":{TestCorpusViews:[268,2,1,""]},"nltk.test.unit.test_pl196x.TestCorpusViews":{test_corpus_reader:[268,3,1,""]},"nltk.test.unit.test_pos_tag":{TestPosTag:[269,2,1,""]},"nltk.test.unit.test_pos_tag.TestPosTag":{test_pos_tag_eng:[269,3,1,""],test_pos_tag_eng_universal:[269,3,1,""],test_pos_tag_rus:[269,3,1,""],test_pos_tag_rus_universal:[269,3,1,""],test_pos_tag_unknown_lang:[269,3,1,""],test_unspecified_lang:[269,3,1,""]},"nltk.test.unit.test_ribes":{test_no_zero_div:[270,1,1,""],test_ribes:[270,1,1,""],test_ribes_empty_worder:[270,1,1,""],test_ribes_one_worder:[270,1,1,""],test_ribes_two_worder:[270,1,1,""]},"nltk.test.unit.test_senna":{TestSennaPipeline:[273,2,1,""],TestSennaTagger:[273,2,1,""]},"nltk.test.unit.test_senna.TestSennaPipeline":{test_senna_pipeline:[273,3,1,""]},"nltk.test.unit.test_senna.TestSennaTagger":{test_senna_chunk_tagger:[273,3,1,""],test_senna_ner_tagger:[273,3,1,""],test_senna_tagger:[273,3,1,""]},"nltk.test.unit.test_stem":{PorterTest:[274,2,1,""],SnowballTest:[274,2,1,""]},"nltk.test.unit.test_stem.PorterTest":{test_lowercase_option:[274,3,1,""],test_oed_bug:[274,3,1,""],test_vocabulary_martin_mode:[274,3,1,""],test_vocabulary_nltk_mode:[274,3,1,""],test_vocabulary_original_mode:[274,3,1,""]},"nltk.test.unit.test_stem.SnowballTest":{test_arabic:[274,3,1,""],test_german:[274,3,1,""],test_russian:[274,3,1,""],test_short_strings_bug:[274,3,1,""],test_spanish:[274,3,1,""]},"nltk.test.unit.test_tag":{setup_module:[275,1,1,""],test_basic:[275,1,1,""]},"nltk.test.unit.test_tgrep":{TestSequenceFunctions:[276,2,1,""]},"nltk.test.unit.test_tgrep.TestSequenceFunctions":{test_bad_operator:[276,3,1,""],test_comments:[276,3,1,""],test_examples:[276,3,1,""],test_labeled_nodes:[276,3,1,""],test_multiple_conjs:[276,3,1,""],test_node_encoding:[276,3,1,""],test_node_nocase:[276,3,1,""],test_node_noleaves:[276,3,1,""],test_node_printing:[276,3,1,""],test_node_quoted:[276,3,1,""],test_node_regex:[276,3,1,""],test_node_regex_2:[276,3,1,""],test_node_simple:[276,3,1,""],test_node_tree_position:[276,3,1,""],test_rel_precedence:[276,3,1,""],test_rel_sister_nodes:[276,3,1,""],test_tokenize_encoding:[276,3,1,""],test_tokenize_examples:[276,3,1,""],test_tokenize_link_types:[276,3,1,""],test_tokenize_macros:[276,3,1,""],test_tokenize_node_labels:[276,3,1,""],test_tokenize_nodenames:[276,3,1,""],test_tokenize_quoting:[276,3,1,""],test_tokenize_segmented_patterns:[276,3,1,""],test_tokenize_simple:[276,3,1,""],test_trailing_semicolon:[276,3,1,""],test_use_macros:[276,3,1,""],tests_rel_dominance:[276,3,1,""],tests_rel_indexed_children:[276,3,1,""]},"nltk.test.unit.test_wordnet":{WordnNetDemo:[280,2,1,""]},"nltk.test.unit.test_wordnet.WordnNetDemo":{test_antonyms:[280,3,1,""],test_derivationally_related_forms:[280,3,1,""],test_domains:[280,3,1,""],test_hyperhyponyms:[280,3,1,""],test_in_topic_domains:[280,3,1,""],test_iterable_type_for_all_lemma_names:[280,3,1,""],test_lch:[280,3,1,""],test_meronyms_holonyms:[280,3,1,""],test_misc_relations:[280,3,1,""],test_omw_lemma_no_trailing_underscore:[280,3,1,""],test_retrieve_synset:[280,3,1,""],test_retrieve_synsets:[280,3,1,""],test_wordnet_similarities:[280,3,1,""]},"nltk.test.unit.translate":{test_bleu:[282,0,0,"-"],test_gdfa:[283,0,0,"-"],test_ibm1:[284,0,0,"-"],test_ibm2:[285,0,0,"-"],test_ibm3:[286,0,0,"-"],test_ibm4:[287,0,0,"-"],test_ibm5:[288,0,0,"-"],test_ibm_model:[289,0,0,"-"],test_meteor:[290,0,0,"-"],test_nist:[291,0,0,"-"],test_stack_decoder:[292,0,0,"-"]},"nltk.test.unit.translate.test_bleu":{TestBLEU:[282,2,1,""],TestBLEUFringeCases:[282,2,1,""],TestBLEUWithBadSentence:[282,2,1,""],TestBLEUvsMteval13a:[282,2,1,""]},"nltk.test.unit.translate.test_bleu.TestBLEU":{test_brevity_penalty:[282,3,1,""],test_full_matches:[282,3,1,""],test_modified_precision:[282,3,1,""],test_partial_matches_hypothesis_longer_than_reference:[282,3,1,""],test_zero_matches:[282,3,1,""]},"nltk.test.unit.translate.test_bleu.TestBLEUFringeCases":{test_case_where_n_is_bigger_than_hypothesis_length:[282,3,1,""],test_empty_hypothesis:[282,3,1,""],test_empty_references:[282,3,1,""],test_empty_references_and_hypothesis:[282,3,1,""],test_length_one_hypothesis:[282,3,1,""],test_reference_or_hypothesis_shorter_than_fourgrams:[282,3,1,""]},"nltk.test.unit.translate.test_bleu.TestBLEUWithBadSentence":{test_corpus_bleu_with_bad_sentence:[282,3,1,""]},"nltk.test.unit.translate.test_bleu.TestBLEUvsMteval13a":{test_corpus_bleu:[282,3,1,""]},"nltk.test.unit.translate.test_gdfa":{TestGDFA:[283,2,1,""]},"nltk.test.unit.translate.test_gdfa.TestGDFA":{test_from_eflomal_outputs:[283,3,1,""]},"nltk.test.unit.translate.test_ibm1":{TestIBMModel1:[284,2,1,""]},"nltk.test.unit.translate.test_ibm1.TestIBMModel1":{test_prob_t_a_given_s:[284,3,1,""],test_set_uniform_translation_probabilities:[284,3,1,""],test_set_uniform_translation_probabilities_of_non_domain_values:[284,3,1,""]},"nltk.test.unit.translate.test_ibm2":{TestIBMModel2:[285,2,1,""]},"nltk.test.unit.translate.test_ibm2.TestIBMModel2":{test_prob_t_a_given_s:[285,3,1,""],test_set_uniform_alignment_probabilities:[285,3,1,""],test_set_uniform_alignment_probabilities_of_non_domain_values:[285,3,1,""]},"nltk.test.unit.translate.test_ibm3":{TestIBMModel3:[286,2,1,""]},"nltk.test.unit.translate.test_ibm3.TestIBMModel3":{test_prob_t_a_given_s:[286,3,1,""],test_set_uniform_distortion_probabilities:[286,3,1,""],test_set_uniform_distortion_probabilities_of_non_domain_values:[286,3,1,""]},"nltk.test.unit.translate.test_ibm4":{TestIBMModel4:[287,2,1,""]},"nltk.test.unit.translate.test_ibm4.TestIBMModel4":{test_prob_t_a_given_s:[287,3,1,""],test_set_uniform_distortion_probabilities_of_max_displacements:[287,3,1,""],test_set_uniform_distortion_probabilities_of_non_domain_values:[287,3,1,""]},"nltk.test.unit.translate.test_ibm5":{TestIBMModel5:[288,2,1,""]},"nltk.test.unit.translate.test_ibm5.TestIBMModel5":{test_prob_t_a_given_s:[288,3,1,""],test_prune:[288,3,1,""],test_set_uniform_vacancy_probabilities_of_max_displacements:[288,3,1,""],test_set_uniform_vacancy_probabilities_of_non_domain_values:[288,3,1,""]},"nltk.test.unit.translate.test_ibm_model":{TestIBMModel:[289,2,1,""]},"nltk.test.unit.translate.test_ibm_model.TestIBMModel":{test_best_model2_alignment:[289,3,1,""],test_best_model2_alignment_does_not_change_pegged_alignment:[289,3,1,""],test_best_model2_alignment_handles_empty_src_sentence:[289,3,1,""],test_best_model2_alignment_handles_empty_trg_sentence:[289,3,1,""],test_best_model2_alignment_handles_fertile_words:[289,3,1,""],test_hillclimb:[289,3,1,""],test_neighboring_finds_neighbor_alignments:[289,3,1,""],test_neighboring_returns_neighbors_with_pegged_alignment:[289,3,1,""],test_neighboring_sets_neighbor_alignment_info:[289,3,1,""],test_sample:[289,3,1,""],test_vocabularies_are_initialized:[289,3,1,""],test_vocabularies_are_initialized_even_with_empty_corpora:[289,3,1,""]},"nltk.test.unit.translate.test_meteor":{TestMETEOR:[290,2,1,""]},"nltk.test.unit.translate.test_meteor.TestMETEOR":{candidate:[290,4,1,""],reference:[290,4,1,""],test_candidate_type_check:[290,3,1,""],test_meteor:[290,3,1,""],test_reference_type_check:[290,3,1,""]},"nltk.test.unit.translate.test_nist":{TestNIST:[291,2,1,""]},"nltk.test.unit.translate.test_nist.TestNIST":{test_sentence_nist:[291,3,1,""]},"nltk.test.unit.translate.test_stack_decoder":{TestHypothesis:[292,2,1,""],TestStack:[292,2,1,""],TestStackDecoder:[292,2,1,""]},"nltk.test.unit.translate.test_stack_decoder.TestHypothesis":{setUp:[292,3,1,""],test_total_translated_words:[292,3,1,""],test_translated_positions:[292,3,1,""],test_translation_so_far:[292,3,1,""],test_translation_so_far_for_empty_hypothesis:[292,3,1,""],test_untranslated_spans:[292,3,1,""],test_untranslated_spans_for_empty_hypothesis:[292,3,1,""]},"nltk.test.unit.translate.test_stack_decoder.TestStack":{test_best_returns_none_when_stack_is_empty:[292,3,1,""],test_best_returns_the_best_hypothesis:[292,3,1,""],test_push_bumps_off_worst_hypothesis_when_stack_is_full:[292,3,1,""],test_push_does_not_add_hypothesis_that_falls_below_beam_threshold:[292,3,1,""],test_push_removes_hypotheses_that_fall_below_beam_threshold:[292,3,1,""]},"nltk.test.unit.translate.test_stack_decoder.TestStackDecoder":{create_fake_language_model:[292,3,1,""],create_fake_phrase_table:[292,3,1,""],test_compute_future_costs:[292,3,1,""],test_compute_future_costs_for_phrases_not_in_phrase_table:[292,3,1,""],test_distortion_score:[292,3,1,""],test_distortion_score_of_first_expansion:[292,3,1,""],test_find_all_src_phrases:[292,3,1,""],test_future_score:[292,3,1,""],test_valid_phrases:[292,3,1,""]},"nltk.text":{ConcordanceIndex:[293,2,1,""],ContextIndex:[293,2,1,""],Text:[293,2,1,""],TextCollection:[293,2,1,""],TokenSearcher:[293,2,1,""]},"nltk.text.ConcordanceIndex":{__init__:[293,3,1,""],find_concordance:[293,3,1,""],offsets:[293,3,1,""],print_concordance:[293,3,1,""],tokens:[293,3,1,""]},"nltk.text.ContextIndex":{__init__:[293,3,1,""],common_contexts:[293,3,1,""],similar_words:[293,3,1,""],tokens:[293,3,1,""],word_similarity_dict:[293,3,1,""]},"nltk.text.Text":{__init__:[293,3,1,""],collocation_list:[293,3,1,""],collocations:[293,3,1,""],common_contexts:[293,3,1,""],concordance:[293,3,1,""],concordance_list:[293,3,1,""],count:[293,3,1,""],dispersion_plot:[293,3,1,""],findall:[293,3,1,""],generate:[293,3,1,""],index:[293,3,1,""],plot:[293,3,1,""],readability:[293,3,1,""],similar:[293,3,1,""],vocab:[293,3,1,""]},"nltk.text.TextCollection":{__init__:[293,3,1,""],idf:[293,3,1,""],tf:[293,3,1,""],tf_idf:[293,3,1,""]},"nltk.text.TokenSearcher":{__init__:[293,3,1,""],findall:[293,3,1,""]},"nltk.tgrep":{TgrepException:[294,6,1,""],ancestors:[294,1,1,""],tgrep_compile:[294,1,1,""],tgrep_nodes:[294,1,1,""],tgrep_positions:[294,1,1,""],tgrep_tokenize:[294,1,1,""],treepositions_no_leaves:[294,1,1,""],unique_ancestors:[294,1,1,""]},"nltk.tokenize":{api:[296,0,0,"-"],casual:[297,0,0,"-"],destructive:[298,0,0,"-"],legality_principle:[299,0,0,"-"],mwe:[300,0,0,"-"],nist:[301,0,0,"-"],punkt:[302,0,0,"-"],regexp:[303,0,0,"-"],repp:[304,0,0,"-"],sent_tokenize:[295,1,1,""],sexpr:[305,0,0,"-"],simple:[306,0,0,"-"],sonority_sequencing:[307,0,0,"-"],stanford:[308,0,0,"-"],stanford_segmenter:[309,0,0,"-"],texttiling:[310,0,0,"-"],toktok:[311,0,0,"-"],treebank:[312,0,0,"-"],util:[313,0,0,"-"],word_tokenize:[295,1,1,""]},"nltk.tokenize.api":{StringTokenizer:[296,2,1,""],TokenizerI:[296,2,1,""]},"nltk.tokenize.api.StringTokenizer":{span_tokenize:[296,3,1,""],tokenize:[296,3,1,""]},"nltk.tokenize.api.TokenizerI":{span_tokenize:[296,3,1,""],span_tokenize_sents:[296,3,1,""],tokenize:[296,3,1,""],tokenize_sents:[296,3,1,""]},"nltk.tokenize.casual":{TweetTokenizer:[297,2,1,""],casual_tokenize:[297,1,1,""],reduce_lengthening:[297,1,1,""],remove_handles:[297,1,1,""]},"nltk.tokenize.casual.TweetTokenizer":{PHONE_WORD_RE:[297,5,1,""],WORD_RE:[297,5,1,""],__init__:[297,3,1,""],tokenize:[297,3,1,""]},"nltk.tokenize.destructive":{MacIntyreContractions:[298,2,1,""],NLTKWordTokenizer:[298,2,1,""]},"nltk.tokenize.destructive.MacIntyreContractions":{CONTRACTIONS2:[298,4,1,""],CONTRACTIONS3:[298,4,1,""],CONTRACTIONS4:[298,4,1,""]},"nltk.tokenize.destructive.NLTKWordTokenizer":{CONTRACTIONS2:[298,4,1,""],CONTRACTIONS3:[298,4,1,""],CONVERT_PARENTHESES:[298,4,1,""],DOUBLE_DASHES:[298,4,1,""],ENDING_QUOTES:[298,4,1,""],PARENS_BRACKETS:[298,4,1,""],PUNCTUATION:[298,4,1,""],STARTING_QUOTES:[298,4,1,""],tokenize:[298,3,1,""]},"nltk.tokenize.legality_principle":{LegalitySyllableTokenizer:[299,2,1,""]},"nltk.tokenize.legality_principle.LegalitySyllableTokenizer":{__init__:[299,3,1,""],find_legal_onsets:[299,3,1,""],onset:[299,3,1,""],tokenize:[299,3,1,""]},"nltk.tokenize.mwe":{MWETokenizer:[300,2,1,""]},"nltk.tokenize.mwe.MWETokenizer":{__init__:[300,3,1,""],add_mwe:[300,3,1,""],tokenize:[300,3,1,""]},"nltk.tokenize.nist":{NISTTokenizer:[301,2,1,""]},"nltk.tokenize.nist.NISTTokenizer":{DASH_PRECEED_DIGIT:[301,4,1,""],INTERNATIONAL_REGEXES:[301,4,1,""],LANG_DEPENDENT_REGEXES:[301,4,1,""],NONASCII:[301,4,1,""],PERIOD_COMMA_FOLLOW:[301,4,1,""],PERIOD_COMMA_PRECEED:[301,4,1,""],PUNCT:[301,4,1,""],PUNCT_1:[301,4,1,""],PUNCT_2:[301,4,1,""],STRIP_EOL_HYPHEN:[301,4,1,""],STRIP_SKIP:[301,4,1,""],SYMBOLS:[301,4,1,""],international_tokenize:[301,3,1,""],lang_independent_sub:[301,3,1,""],number_regex:[301,4,1,""],punct_regex:[301,4,1,""],pup_number:[301,4,1,""],pup_punct:[301,4,1,""],pup_symbol:[301,4,1,""],symbol_regex:[301,4,1,""],tokenize:[301,3,1,""]},"nltk.tokenize.punkt":{PunktBaseClass:[302,2,1,""],PunktLanguageVars:[302,2,1,""],PunktParameters:[302,2,1,""],PunktSentenceTokenizer:[302,2,1,""],PunktToken:[302,2,1,""],PunktTrainer:[302,2,1,""],demo:[302,1,1,""],format_debug_decision:[302,1,1,""]},"nltk.tokenize.punkt.PunktBaseClass":{__init__:[302,3,1,""]},"nltk.tokenize.punkt.PunktLanguageVars":{internal_punctuation:[302,4,1,""],period_context_re:[302,3,1,""],re_boundary_realignment:[302,4,1,""],sent_end_chars:[302,4,1,""],word_tokenize:[302,3,1,""]},"nltk.tokenize.punkt.PunktParameters":{__init__:[302,3,1,""],abbrev_types:[302,4,1,""],add_ortho_context:[302,3,1,""],clear_abbrevs:[302,3,1,""],clear_collocations:[302,3,1,""],clear_ortho_context:[302,3,1,""],clear_sent_starters:[302,3,1,""],collocations:[302,4,1,""],ortho_context:[302,4,1,""],sent_starters:[302,4,1,""]},"nltk.tokenize.punkt.PunktSentenceTokenizer":{PUNCTUATION:[302,4,1,""],__init__:[302,3,1,""],debug_decisions:[302,3,1,""],dump:[302,3,1,""],sentences_from_text:[302,3,1,""],sentences_from_text_legacy:[302,3,1,""],sentences_from_tokens:[302,3,1,""],span_tokenize:[302,3,1,""],text_contains_sentbreak:[302,3,1,""],tokenize:[302,3,1,""],train:[302,3,1,""]},"nltk.tokenize.punkt.PunktToken":{__init__:[302,3,1,""],abbr:[302,4,1,""],ellipsis:[302,4,1,""],first_case:[302,5,1,""],first_lower:[302,5,1,""],first_upper:[302,5,1,""],is_alpha:[302,5,1,""],is_ellipsis:[302,5,1,""],is_initial:[302,5,1,""],is_non_punct:[302,5,1,""],is_number:[302,5,1,""],linestart:[302,4,1,""],parastart:[302,4,1,""],period_final:[302,4,1,""],sentbreak:[302,4,1,""],tok:[302,4,1,""],type:[302,4,1,""],type_no_period:[302,5,1,""],type_no_sentperiod:[302,5,1,""]},"nltk.tokenize.punkt.PunktTrainer":{ABBREV:[302,4,1,""],ABBREV_BACKOFF:[302,4,1,""],COLLOCATION:[302,4,1,""],IGNORE_ABBREV_PENALTY:[302,4,1,""],INCLUDE_ABBREV_COLLOCS:[302,4,1,""],INCLUDE_ALL_COLLOCS:[302,4,1,""],MIN_COLLOC_FREQ:[302,4,1,""],SENT_STARTER:[302,4,1,""],__init__:[302,3,1,""],finalize_training:[302,3,1,""],find_abbrev_types:[302,3,1,""],freq_threshold:[302,3,1,""],get_params:[302,3,1,""],train:[302,3,1,""],train_tokens:[302,3,1,""]},"nltk.tokenize.regexp":{BlanklineTokenizer:[303,2,1,""],RegexpTokenizer:[303,2,1,""],WhitespaceTokenizer:[303,2,1,""],WordPunctTokenizer:[303,2,1,""],blankline_tokenize:[303,1,1,""],regexp_tokenize:[303,1,1,""],wordpunct_tokenize:[303,1,1,""]},"nltk.tokenize.regexp.BlanklineTokenizer":{__init__:[303,3,1,""]},"nltk.tokenize.regexp.RegexpTokenizer":{__init__:[303,3,1,""],span_tokenize:[303,3,1,""],tokenize:[303,3,1,""]},"nltk.tokenize.regexp.WhitespaceTokenizer":{__init__:[303,3,1,""]},"nltk.tokenize.regexp.WordPunctTokenizer":{__init__:[303,3,1,""]},"nltk.tokenize.repp":{ReppTokenizer:[304,2,1,""]},"nltk.tokenize.repp.ReppTokenizer":{__init__:[304,3,1,""],find_repptokenizer:[304,3,1,""],generate_repp_command:[304,3,1,""],parse_repp_outputs:[304,3,1,""],tokenize:[304,3,1,""],tokenize_sents:[304,3,1,""]},"nltk.tokenize.sexpr":{SExprTokenizer:[305,2,1,""],sexpr_tokenize:[305,1,1,""]},"nltk.tokenize.sexpr.SExprTokenizer":{__init__:[305,3,1,""],tokenize:[305,3,1,""]},"nltk.tokenize.simple":{CharTokenizer:[306,2,1,""],LineTokenizer:[306,2,1,""],SpaceTokenizer:[306,2,1,""],TabTokenizer:[306,2,1,""],line_tokenize:[306,1,1,""]},"nltk.tokenize.simple.CharTokenizer":{span_tokenize:[306,3,1,""],tokenize:[306,3,1,""]},"nltk.tokenize.simple.LineTokenizer":{__init__:[306,3,1,""],span_tokenize:[306,3,1,""],tokenize:[306,3,1,""]},"nltk.tokenize.sonority_sequencing":{SyllableTokenizer:[307,2,1,""]},"nltk.tokenize.sonority_sequencing.SyllableTokenizer":{__init__:[307,3,1,""],assign_values:[307,3,1,""],tokenize:[307,3,1,""],validate_syllables:[307,3,1,""]},"nltk.tokenize.stanford":{StanfordTokenizer:[308,2,1,""]},"nltk.tokenize.stanford.StanfordTokenizer":{__init__:[308,3,1,""],tokenize:[308,3,1,""]},"nltk.tokenize.stanford_segmenter":{StanfordSegmenter:[309,2,1,""]},"nltk.tokenize.stanford_segmenter.StanfordSegmenter":{__init__:[309,3,1,""],default_config:[309,3,1,""],segment:[309,3,1,""],segment_file:[309,3,1,""],segment_sents:[309,3,1,""],tokenize:[309,3,1,""]},"nltk.tokenize.texttiling":{TextTilingTokenizer:[310,2,1,""],TokenSequence:[310,2,1,""],TokenTableField:[310,2,1,""],demo:[310,1,1,""],smooth:[310,1,1,""]},"nltk.tokenize.texttiling.TextTilingTokenizer":{__init__:[310,3,1,""],tokenize:[310,3,1,""]},"nltk.tokenize.texttiling.TokenSequence":{__init__:[310,3,1,""]},"nltk.tokenize.texttiling.TokenTableField":{__init__:[310,3,1,""]},"nltk.tokenize.toktok":{ToktokTokenizer:[311,2,1,""]},"nltk.tokenize.toktok.ToktokTokenizer":{AMPERCENT:[311,4,1,""],CLOSE_PUNCT:[311,4,1,""],CLOSE_PUNCT_RE:[311,4,1,""],COMMA_IN_NUM:[311,4,1,""],CURRENCY_SYM:[311,4,1,""],CURRENCY_SYM_RE:[311,4,1,""],EN_EM_DASHES:[311,4,1,""],FINAL_PERIOD_1:[311,4,1,""],FINAL_PERIOD_2:[311,4,1,""],FUNKY_PUNCT_1:[311,4,1,""],FUNKY_PUNCT_2:[311,4,1,""],LSTRIP:[311,4,1,""],MULTI_COMMAS:[311,4,1,""],MULTI_DASHES:[311,4,1,""],MULTI_DOTS:[311,4,1,""],NON_BREAKING:[311,4,1,""],ONE_SPACE:[311,4,1,""],OPEN_PUNCT:[311,4,1,""],OPEN_PUNCT_RE:[311,4,1,""],PIPE:[311,4,1,""],PROB_SINGLE_QUOTES:[311,4,1,""],RSTRIP:[311,4,1,""],STUPID_QUOTES_1:[311,4,1,""],STUPID_QUOTES_2:[311,4,1,""],TAB:[311,4,1,""],TOKTOK_REGEXES:[311,4,1,""],URL_FOE_1:[311,4,1,""],URL_FOE_2:[311,4,1,""],URL_FOE_3:[311,4,1,""],URL_FOE_4:[311,4,1,""],tokenize:[311,3,1,""]},"nltk.tokenize.treebank":{TreebankWordDetokenizer:[312,2,1,""],TreebankWordTokenizer:[312,2,1,""]},"nltk.tokenize.treebank.TreebankWordDetokenizer":{CONTRACTIONS2:[312,4,1,""],CONTRACTIONS3:[312,4,1,""],CONVERT_PARENTHESES:[312,4,1,""],DOUBLE_DASHES:[312,4,1,""],ENDING_QUOTES:[312,4,1,""],PARENS_BRACKETS:[312,4,1,""],PUNCTUATION:[312,4,1,""],STARTING_QUOTES:[312,4,1,""],detokenize:[312,3,1,""],tokenize:[312,3,1,""]},"nltk.tokenize.treebank.TreebankWordTokenizer":{CONTRACTIONS2:[312,4,1,""],CONTRACTIONS3:[312,4,1,""],CONVERT_PARENTHESES:[312,4,1,""],DOUBLE_DASHES:[312,4,1,""],ENDING_QUOTES:[312,4,1,""],PARENS_BRACKETS:[312,4,1,""],PUNCTUATION:[312,4,1,""],STARTING_QUOTES:[312,4,1,""],span_tokenize:[312,3,1,""],tokenize:[312,3,1,""]},"nltk.tokenize.util":{CJKChars:[313,2,1,""],align_tokens:[313,1,1,""],is_cjk:[313,1,1,""],regexp_span_tokenize:[313,1,1,""],spans_to_relative:[313,1,1,""],string_span_tokenize:[313,1,1,""],xml_escape:[313,1,1,""],xml_unescape:[313,1,1,""]},"nltk.tokenize.util.CJKChars":{CJK_Compatibility_Forms:[313,4,1,""],CJK_Compatibility_Ideographs:[313,4,1,""],CJK_Radicals:[313,4,1,""],Hangul_Jamo:[313,4,1,""],Hangul_Syllables:[313,4,1,""],Katakana_Hangul_Halfwidth:[313,4,1,""],Phags_Pa:[313,4,1,""],Supplementary_Ideographic_Plane:[313,4,1,""],ranges:[313,4,1,""]},"nltk.toolbox":{StandardFormat:[314,2,1,""],ToolboxData:[314,2,1,""],ToolboxSettings:[314,2,1,""],add_blank_lines:[314,1,1,""],add_default_fields:[314,1,1,""],demo:[314,1,1,""],remove_blanks:[314,1,1,""],sort_fields:[314,1,1,""],to_settings_string:[314,1,1,""],to_sfm_string:[314,1,1,""]},"nltk.toolbox.StandardFormat":{__init__:[314,3,1,""],close:[314,3,1,""],fields:[314,3,1,""],open:[314,3,1,""],open_string:[314,3,1,""],raw_fields:[314,3,1,""]},"nltk.toolbox.ToolboxData":{parse:[314,3,1,""]},"nltk.toolbox.ToolboxSettings":{__init__:[314,3,1,""],parse:[314,3,1,""]},"nltk.translate":{api:[316,0,0,"-"],bleu_score:[317,0,0,"-"],chrf_score:[318,0,0,"-"],gale_church:[319,0,0,"-"],gdfa:[320,0,0,"-"],gleu_score:[321,0,0,"-"],ibm1:[322,0,0,"-"],ibm2:[323,0,0,"-"],ibm3:[324,0,0,"-"],ibm4:[325,0,0,"-"],ibm5:[326,0,0,"-"],ibm_model:[327,0,0,"-"],meteor_score:[328,0,0,"-"],metrics:[329,0,0,"-"],nist_score:[330,0,0,"-"],phrase_based:[331,0,0,"-"],ribes_score:[332,0,0,"-"],stack_decoder:[333,0,0,"-"]},"nltk.translate.api":{AlignedSent:[316,2,1,""],Alignment:[316,2,1,""],PhraseTable:[316,2,1,""],PhraseTableEntry:[316,2,1,""]},"nltk.translate.api.AlignedSent":{__init__:[316,3,1,""],alignment:[316,5,1,""],invert:[316,3,1,""],mots:[316,5,1,""],words:[316,5,1,""]},"nltk.translate.api.Alignment":{__new__:[316,3,1,""],fromstring:[316,3,1,""],invert:[316,3,1,""],range:[316,3,1,""]},"nltk.translate.api.PhraseTable":{__init__:[316,3,1,""],add:[316,3,1,""],translations_for:[316,3,1,""]},"nltk.translate.api.PhraseTableEntry":{__new__:[316,3,1,""],log_prob:[316,4,1,""],trg_phrase:[316,4,1,""]},"nltk.translate.bleu_score":{SmoothingFunction:[317,2,1,""],brevity_penalty:[317,1,1,""],closest_ref_length:[317,1,1,""],corpus_bleu:[317,1,1,""],modified_precision:[317,1,1,""],sentence_bleu:[317,1,1,""]},"nltk.translate.bleu_score.SmoothingFunction":{__init__:[317,3,1,""],method0:[317,3,1,""],method1:[317,3,1,""],method2:[317,3,1,""],method3:[317,3,1,""],method4:[317,3,1,""],method5:[317,3,1,""],method6:[317,3,1,""],method7:[317,3,1,""]},"nltk.translate.chrf_score":{chrf_precision_recall_fscore_support:[318,1,1,""],corpus_chrf:[318,1,1,""],sentence_chrf:[318,1,1,""]},"nltk.translate.gale_church":{LanguageIndependent:[319,2,1,""],align_blocks:[319,1,1,""],align_log_prob:[319,1,1,""],align_texts:[319,1,1,""],erfcc:[319,1,1,""],norm_cdf:[319,1,1,""],norm_logsf:[319,1,1,""],parse_token_stream:[319,1,1,""],split_at:[319,1,1,""],trace:[319,1,1,""]},"nltk.translate.gale_church.LanguageIndependent":{AVERAGE_CHARACTERS:[319,4,1,""],PRIORS:[319,4,1,""],VARIANCE_CHARACTERS:[319,4,1,""]},"nltk.translate.gdfa":{grow_diag_final_and:[320,1,1,""]},"nltk.translate.gleu_score":{corpus_gleu:[321,1,1,""],sentence_gleu:[321,1,1,""]},"nltk.translate.ibm1":{IBMModel1:[322,2,1,""]},"nltk.translate.ibm1.IBMModel1":{__init__:[322,3,1,""],align:[322,3,1,""],align_all:[322,3,1,""],prob_alignment_point:[322,3,1,""],prob_all_alignments:[322,3,1,""],prob_t_a_given_s:[322,3,1,""],set_uniform_probabilities:[322,3,1,""],train:[322,3,1,""]},"nltk.translate.ibm2":{IBMModel2:[323,2,1,""],Model2Counts:[323,2,1,""]},"nltk.translate.ibm2.IBMModel2":{__init__:[323,3,1,""],align:[323,3,1,""],align_all:[323,3,1,""],maximize_alignment_probabilities:[323,3,1,""],prob_alignment_point:[323,3,1,""],prob_all_alignments:[323,3,1,""],prob_t_a_given_s:[323,3,1,""],set_uniform_probabilities:[323,3,1,""],train:[323,3,1,""]},"nltk.translate.ibm2.Model2Counts":{__init__:[323,3,1,""],update_alignment:[323,3,1,""],update_lexical_translation:[323,3,1,""]},"nltk.translate.ibm3":{IBMModel3:[324,2,1,""],Model3Counts:[324,2,1,""]},"nltk.translate.ibm3.IBMModel3":{__init__:[324,3,1,""],maximize_distortion_probabilities:[324,3,1,""],prob_t_a_given_s:[324,3,1,""],reset_probabilities:[324,3,1,""],set_uniform_probabilities:[324,3,1,""],train:[324,3,1,""]},"nltk.translate.ibm3.Model3Counts":{__init__:[324,3,1,""],update_distortion:[324,3,1,""]},"nltk.translate.ibm4":{IBMModel4:[325,2,1,""],Model4Counts:[325,2,1,""]},"nltk.translate.ibm4.IBMModel4":{__init__:[325,3,1,""],maximize_distortion_probabilities:[325,3,1,""],model4_prob_t_a_given_s:[325,3,1,""],prob_t_a_given_s:[325,3,1,""],reset_probabilities:[325,3,1,""],set_uniform_probabilities:[325,3,1,""],train:[325,3,1,""]},"nltk.translate.ibm4.Model4Counts":{__init__:[325,3,1,""],update_distortion:[325,3,1,""]},"nltk.translate.ibm5":{IBMModel5:[326,2,1,""],Model5Counts:[326,2,1,""],Slots:[326,2,1,""]},"nltk.translate.ibm5.IBMModel5":{MIN_SCORE_FACTOR:[326,4,1,""],__init__:[326,3,1,""],hillclimb:[326,3,1,""],maximize_vacancy_probabilities:[326,3,1,""],prob_t_a_given_s:[326,3,1,""],prune:[326,3,1,""],reset_probabilities:[326,3,1,""],sample:[326,3,1,""],set_uniform_probabilities:[326,3,1,""],train:[326,3,1,""]},"nltk.translate.ibm5.Model5Counts":{__init__:[326,3,1,""],update_vacancy:[326,3,1,""]},"nltk.translate.ibm5.Slots":{__init__:[326,3,1,""],occupy:[326,3,1,""],vacancies_at:[326,3,1,""]},"nltk.translate.ibm_model":{AlignmentInfo:[327,2,1,""],Counts:[327,2,1,""],IBMModel:[327,2,1,""],longest_target_sentence_length:[327,1,1,""]},"nltk.translate.ibm_model.AlignmentInfo":{__init__:[327,3,1,""],alignment:[327,4,1,""],center_of_cept:[327,3,1,""],cepts:[327,4,1,""],fertility_of_i:[327,3,1,""],is_head_word:[327,3,1,""],previous_cept:[327,3,1,""],previous_in_tablet:[327,3,1,""],score:[327,4,1,""],src_sentence:[327,4,1,""],trg_sentence:[327,4,1,""],zero_indexed_alignment:[327,3,1,""]},"nltk.translate.ibm_model.Counts":{__init__:[327,3,1,""],update_fertility:[327,3,1,""],update_lexical_translation:[327,3,1,""],update_null_generation:[327,3,1,""]},"nltk.translate.ibm_model.IBMModel":{MIN_PROB:[327,4,1,""],__init__:[327,3,1,""],best_model2_alignment:[327,3,1,""],hillclimb:[327,3,1,""],init_vocab:[327,3,1,""],maximize_fertility_probabilities:[327,3,1,""],maximize_lexical_translation_probabilities:[327,3,1,""],maximize_null_generation_probabilities:[327,3,1,""],neighboring:[327,3,1,""],prob_of_alignments:[327,3,1,""],prob_t_a_given_s:[327,3,1,""],reset_probabilities:[327,3,1,""],sample:[327,3,1,""],set_uniform_probabilities:[327,3,1,""]},"nltk.translate.meteor_score":{align_words:[328,1,1,""],exact_match:[328,1,1,""],meteor_score:[328,1,1,""],single_meteor_score:[328,1,1,""],stem_match:[328,1,1,""],wordnetsyn_match:[328,1,1,""]},"nltk.translate.metrics":{alignment_error_rate:[329,1,1,""]},"nltk.translate.nist_score":{corpus_nist:[330,1,1,""],nist_length_penalty:[330,1,1,""],sentence_nist:[330,1,1,""]},"nltk.translate.phrase_based":{extract:[331,1,1,""],phrase_extraction:[331,1,1,""]},"nltk.translate.ribes_score":{corpus_ribes:[332,1,1,""],find_increasing_sequences:[332,1,1,""],kendall_tau:[332,1,1,""],position_of_ngram:[332,1,1,""],sentence_ribes:[332,1,1,""],spearman_rho:[332,1,1,""],word_rank_alignment:[332,1,1,""]},"nltk.translate.stack_decoder":{StackDecoder:[333,2,1,""]},"nltk.translate.stack_decoder.StackDecoder":{__init__:[333,3,1,""],beam_threshold:[333,4,1,""],compute_future_scores:[333,3,1,""],distortion_factor:[333,5,1,""],distortion_score:[333,3,1,""],expansion_score:[333,3,1,""],find_all_src_phrases:[333,3,1,""],future_score:[333,3,1,""],stack_size:[333,4,1,""],translate:[333,3,1,""],valid_phrases:[333,3,1,""],word_penalty:[333,4,1,""]},"nltk.tree":{ImmutableMultiParentedTree:[334,2,1,""],ImmutableParentedTree:[334,2,1,""],ImmutableProbabilisticTree:[334,2,1,""],ImmutableTree:[334,2,1,""],MultiParentedTree:[334,2,1,""],ParentedTree:[334,2,1,""],ProbabilisticMixIn:[334,2,1,""],ProbabilisticTree:[334,2,1,""],Tree:[334,2,1,""],bracket_parse:[334,1,1,""],sinica_parse:[334,1,1,""]},"nltk.tree.ImmutableProbabilisticTree":{__init__:[334,3,1,""],convert:[334,3,1,""],copy:[334,3,1,""]},"nltk.tree.ImmutableTree":{__init__:[334,3,1,""],append:[334,3,1,""],extend:[334,3,1,""],pop:[334,3,1,""],remove:[334,3,1,""],reverse:[334,3,1,""],set_label:[334,3,1,""],sort:[334,3,1,""]},"nltk.tree.MultiParentedTree":{__init__:[334,3,1,""],left_siblings:[334,3,1,""],parent_indices:[334,3,1,""],parents:[334,3,1,""],right_siblings:[334,3,1,""],roots:[334,3,1,""],treepositions:[334,3,1,""]},"nltk.tree.ParentedTree":{__init__:[334,3,1,""],left_sibling:[334,3,1,""],parent:[334,3,1,""],parent_index:[334,3,1,""],right_sibling:[334,3,1,""],root:[334,3,1,""],treeposition:[334,3,1,""]},"nltk.tree.ProbabilisticMixIn":{__init__:[334,3,1,""],logprob:[334,3,1,""],prob:[334,3,1,""],set_logprob:[334,3,1,""],set_prob:[334,3,1,""]},"nltk.tree.ProbabilisticTree":{__init__:[334,3,1,""],convert:[334,3,1,""],copy:[334,3,1,""]},"nltk.tree.Tree":{__init__:[334,3,1,""],chomsky_normal_form:[334,3,1,""],collapse_unary:[334,3,1,""],convert:[334,3,1,""],copy:[334,3,1,""],draw:[334,3,1,""],flatten:[334,3,1,""],freeze:[334,3,1,""],fromlist:[334,3,1,""],fromstring:[334,3,1,""],height:[334,3,1,""],label:[334,3,1,""],leaf_treeposition:[334,3,1,""],leaves:[334,3,1,""],node:[334,5,1,""],pformat:[334,3,1,""],pformat_latex_qtree:[334,3,1,""],pos:[334,3,1,""],pprint:[334,3,1,""],pretty_print:[334,3,1,""],productions:[334,3,1,""],set_label:[334,3,1,""],subtrees:[334,3,1,""],treeposition_spanning_leaves:[334,3,1,""],treepositions:[334,3,1,""],un_chomsky_normal_form:[334,3,1,""]},"nltk.treeprettyprinter":{TreePrettyPrinter:[335,2,1,""]},"nltk.treeprettyprinter.TreePrettyPrinter":{__init__:[335,3,1,""],nodecoords:[335,3,1,""],svg:[335,3,1,""],text:[335,3,1,""]},"nltk.treetransforms":{chomsky_normal_form:[336,1,1,""],collapse_unary:[336,1,1,""],un_chomsky_normal_form:[336,1,1,""]},"nltk.twitter":{api:[338,0,0,"-"],common:[339,0,0,"-"],twitter_demo:[340,0,0,"-"],twitterclient:[341,0,0,"-"],util:[342,0,0,"-"]},"nltk.twitter.api":{BasicTweetHandler:[338,2,1,""],LocalTimezoneOffsetWithUTC:[338,2,1,""],TweetHandlerI:[338,2,1,""]},"nltk.twitter.api.BasicTweetHandler":{__init__:[338,3,1,""],counter:[338,4,1,""],do_continue:[338,3,1,""],do_stop:[338,4,1,""]},"nltk.twitter.api.LocalTimezoneOffsetWithUTC":{DSTOFFSET:[338,4,1,""],STDOFFSET:[338,4,1,""],utcoffset:[338,3,1,""]},"nltk.twitter.api.TweetHandlerI":{__init__:[338,3,1,""],check_date_limit:[338,3,1,""],handle:[338,3,1,""],on_finish:[338,3,1,""]},"nltk.twitter.common":{extract_fields:[339,1,1,""],get_header_field_list:[339,1,1,""],json2csv:[339,1,1,""],json2csv_entities:[339,1,1,""]},"nltk.twitter.twitter_demo":{ALL:[340,7,1,""],corpusreader_demo:[340,1,1,""],expand_tweetids_demo:[340,1,1,""],followtoscreen_demo:[340,1,1,""],limit_by_time_demo:[340,1,1,""],lookup_by_userid_demo:[340,1,1,""],sampletoscreen_demo:[340,1,1,""],search_demo:[340,1,1,""],setup:[340,1,1,""],streamtofile_demo:[340,1,1,""],tracktoscreen_demo:[340,1,1,""],tweets_by_user_demo:[340,1,1,""],twitterclass_demo:[340,1,1,""],verbose:[340,1,1,""],yesterday:[340,1,1,""]},"nltk.twitter.twitterclient":{Query:[341,2,1,""],Streamer:[341,2,1,""],TweetViewer:[341,2,1,""],TweetWriter:[341,2,1,""],Twitter:[341,2,1,""]},"nltk.twitter.twitterclient.Query":{__init__:[341,3,1,""],expand_tweetids:[341,3,1,""],register:[341,3,1,""],search_tweets:[341,3,1,""],user_info_from_id:[341,3,1,""],user_tweets:[341,3,1,""]},"nltk.twitter.twitterclient.Streamer":{__init__:[341,3,1,""],filter:[341,3,1,""],on_error:[341,3,1,""],on_success:[341,3,1,""],register:[341,3,1,""],sample:[341,3,1,""]},"nltk.twitter.twitterclient.TweetViewer":{handle:[341,3,1,""],on_finish:[341,3,1,""]},"nltk.twitter.twitterclient.TweetWriter":{__init__:[341,3,1,""],do_continue:[341,3,1,""],handle:[341,3,1,""],on_finish:[341,3,1,""],timestamped_file:[341,3,1,""]},"nltk.twitter.twitterclient.Twitter":{__init__:[341,3,1,""],tweets:[341,3,1,""]},"nltk.twitter.util":{Authenticate:[342,2,1,""],add_access_token:[342,1,1,""],credsfromfile:[342,1,1,""],guess_path:[342,1,1,""]},"nltk.twitter.util.Authenticate":{__init__:[342,3,1,""],load_creds:[342,3,1,""]},"nltk.util":{Index:[343,2,1,""],acyclic_branches_depth_first:[343,1,1,""],acyclic_breadth_first:[343,1,1,""],acyclic_depth_first:[343,1,1,""],acyclic_dic2tree:[343,1,1,""],bigrams:[343,1,1,""],binary_search_file:[343,1,1,""],breadth_first:[343,1,1,""],choose:[343,1,1,""],clean_html:[343,1,1,""],clean_url:[343,1,1,""],edge_closure:[343,1,1,""],edges2dot:[343,1,1,""],elementtree_indent:[343,1,1,""],everygrams:[343,1,1,""],filestring:[343,1,1,""],flatten:[343,1,1,""],guess_encoding:[343,1,1,""],in_idle:[343,1,1,""],invert_dict:[343,1,1,""],invert_graph:[343,1,1,""],ngrams:[343,1,1,""],pad_sequence:[343,1,1,""],pairwise:[343,1,1,""],parallelize_preprocess:[343,1,1,""],pr:[343,1,1,""],print_string:[343,1,1,""],re_show:[343,1,1,""],set_proxy:[343,1,1,""],skipgrams:[343,1,1,""],tokenwrap:[343,1,1,""],transitive_closure:[343,1,1,""],trigrams:[343,1,1,""],unique_list:[343,1,1,""],unweighted_minimum_spanning_dict:[343,1,1,""],unweighted_minimum_spanning_digraph:[343,1,1,""],unweighted_minimum_spanning_tree:[343,1,1,""]},"nltk.util.Index":{__init__:[343,3,1,""]},"nltk.wsd":{lesk:[344,1,1,""]},nltk:{app:[1,0,0,"-"],book:[11,0,0,"-"],ccg:[12,0,0,"-"],chat:[18,0,0,"-"],chunk:[25,0,0,"-"],classify:[30,0,0,"-"],cluster:[46,0,0,"-"],collections:[52,0,0,"-"],collocations:[53,0,0,"-"],compat:[54,0,0,"-"],corpus:[55,0,0,"-"],data:[109,0,0,"-"],decorators:[110,0,0,"-"],demo:[0,1,1,""],downloader:[111,0,0,"-"],draw:[112,0,0,"-"],featstruct:[118,0,0,"-"],grammar:[119,0,0,"-"],help:[120,0,0,"-"],inference:[121,0,0,"-"],internals:[129,0,0,"-"],jsontags:[130,0,0,"-"],lazyimport:[131,0,0,"-"],lm:[132,0,0,"-"],metrics:[140,0,0,"-"],misc:[150,0,0,"-"],parse:[156,0,0,"-"],probability:[176,0,0,"-"],sem:[177,0,0,"-"],sentiment:[192,0,0,"-"],stem:[196,0,0,"-"],tag:[209,0,0,"-"],tbl:[223,0,0,"-"],test:[229,0,0,"-"],text:[293,0,0,"-"],tgrep:[294,0,0,"-"],tokenize:[295,0,0,"-"],toolbox:[314,0,0,"-"],translate:[315,0,0,"-"],tree:[334,0,0,"-"],treeprettyprinter:[335,0,0,"-"],treetransforms:[336,0,0,"-"],twitter:[337,0,0,"-"],util:[343,0,0,"-"],wsd:[344,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","function","Python function"],"2":["py","class","Python class"],"3":["py","method","Python method"],"4":["py","attribute","Python attribute"],"5":["py","property","Python property"],"6":["py","exception","Python exception"],"7":["py","data","Python data"]},objtypes:{"0":"py:module","1":"py:function","2":"py:class","3":"py:method","4":"py:attribute","5":"py:property","6":"py:exception","7":"py:data"},terms:{"0":[14,25,28,29,30,32,33,35,36,39,46,48,52,57,66,67,71,74,75,76,77,78,79,81,82,83,84,89,90,93,98,102,104,105,109,111,115,118,119,124,126,127,128,129,132,134,135,137,139,141,142,143,145,147,148,149,151,159,160,161,162,163,164,167,168,170,171,173,174,175,176,177,180,183,189,191,193,195,204,206,209,211,212,213,214,218,219,224,226,227,228,293,294,295,298,299,301,302,304,310,312,313,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,341,343,348,349,351,353,354,355,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,374,375,376,377,378,379,380,382,383,384,385,386,387,388,389,390,391,392,393,396,397,398,399,400,403,406,408],"00":[148,359,396,403],"000":[25,94,145,148,212,359,383,385,396,408],"000000000000":380,"00001":163,"000010000100":[148,380],"000100000010":[148,380],"001":[212,227,299],"006060":116,"007936507936":353,"0099":319,"01":[57,71,72,93,105,119,160,343,359,365,384,386,387,392,399,403,404,405],"0100":148,"011":[148,319],"0111940298507":353,"011363636":386,"0123456789":397,"017649766667026317":386,"01a":353,"01b":353,"02":[57,71,93,105,160,343,344,392,399,403,404,405],"0222":377,"022727272727272728":386,"028":403,"02a":353,"02b":353,"03":[57,93,105,332,336,353,359,392,403,405],"033":[325,326],"0370":317,"03744e":384,"0391":359,"03a":353,"03b":353,"04":[57,81,89,93,94,105,353,386,387,392,403,405],"05":[32,160,359,377,403,405],"054":324,"0578034682080":353,"06":[48,50,57,67,105,343,384,403,405],"061":322,"064":388,"0666667":384,"07":[384,403,405],"0714286":384,"072":322,"074":391,"075":403,"07692":317,"079":391,"08":[57,71,93,359,392],"08144":321,"08144v2":321,"0842696629213":353,"08433050215340411":386,"089":319,"09":[404,405],"090163934426":353,"0909":321,"090909":377,"092":391,"094":391,"0a0":408,"0a1":408,"0a2":[404,408],"0a4":408,"0b":408,"0b1":408,"0b2":408,"0b9":408,"0f":357,"0gebofduzn":396,"0th":332,"1":[23,28,29,33,36,46,48,49,50,51,52,53,57,61,66,67,71,74,77,78,79,83,89,90,98,100,101,102,103,104,105,107,109,110,111,115,116,117,118,119,128,129,131,132,133,134,135,137,139,141,142,143,145,147,148,149,159,160,161,162,163,164,166,167,168,173,174,175,176,179,183,184,189,195,200,201,202,206,212,213,214,215,219,221,224,226,227,228,259,284,294,298,301,302,305,310,311,312,313,316,317,318,319,320,321,322,323,324,325,326,327,329,330,331,332,333,334,335,336,338,341,343,344,348,351,352,353,354,355,356,357,358,359,361,362,363,365,366,367,369,370,371,372,374,375,376,377,378,379,380,381,383,384,385,386,387,388,389,390,391,393,396,397,399,400,401,402,408],"10":[28,32,33,35,57,71,81,88,94,98,103,104,111,116,128,129,145,160,161,167,176,181,189,194,212,214,283,301,309,310,312,313,316,319,320,331,332,335,340,352,353,354,356,357,359,361,362,368,369,374,377,380,381,385,386,387,388,389,396,398,399,400,403,405,408],"100":[32,35,52,102,107,145,148,194,212,293,304,341,348,359,365,369,377,380,385,386,391,395,396,399],"1000":[38,43,57,71,194,221,224,352,355,369,385,396,399],"10000":[194,361,369,385],"100000010000":[148,380],"10020":[57,71],"1004":319,"1005":385,"101":304,"1011":[57,71],"1017":[57,104],"1019":[57,71],"102":304,"10200":385,"1024":39,"1027":353,"1029_paper":[57,83],"103":[304,313,387,396],"1031":221,"104":[94,313,396],"1040":[282,317,318],"1044":385,"1047":385,"105":304,"1054":336,"106":304,"107":[57,71,299,307],"10848":385,"1086":385,"109":[57,71,184,187,313,396],"1092":332,"10k":408,"10th":318,"11":[57,89,104,128,163,167,299,310,313,316,317,320,349,353,355,356,358,359,362,366,367,374,381,384,385,389,396,400,403,408],"110":[148,304,313,357,396],"1100":352,"1100000001":148,"1100010000":148,"1100100000":148,"1104":216,"111":[148,160,304],"111111111111":380,"112":[304,391],"113":[304,322],"11370":[57,71],"114":368,"1178":352,"1181":343,"11829":[57,71],"119":[313,357,396],"11904":313,"119400":66,"12":[89,111,128,163,212,216,295,304,312,313,316,317,319,320,327,338,341,353,359,362,366,374,381,384,386,389,399,400,403,408],"120":[304,313,396],"12000":359,"1206":321,"121":304,"1212":374,"122":[304,313,396],"1221":[57,71],"123":[313,366,380,396,402],"12345":381,"12393":[57,71],"124":[311,313,391],"125":[93,145,304,383],"1256":201,"12593":380,"126":304,"1269":212,"127069":66,"1274":353,"1280":352,"12879030":377,"1289189":330,"1289273":330,"129":304,"1291":380,"13":[128,129,295,299,304,307,312,313,316,317,320,353,357,359,362,368,374,381,384,387,389,399,403],"130":[98,203,206,304],"1305":385,"13085":359,"131":[313,396],"131072":313,"132":[212,313,391,396],"133333":384,"1338":391,"134":304,"1341":386,"1342":384,"13444":385,"135":304,"1354":350,"13572":[57,71],"136":[299,307],"1368":[179,352],"137":[203,206,385],"137404580152":353,"1376":212,"138":[385,391],"1397":353,"14":[72,81,88,128,195,203,206,301,304,320,330,353,357,359,361,362,372,374,375,377,381,384,399],"141":[304,335],"14159":129,"142":[304,403],"142857":384,"14287173":357,"143":228,"14307668":[357,380],"14620633":71,"1464":386,"14642":[57,71],"14646110":377,"1468":318,"147":359,"14733":[57,71,367],"14743":[57,71,367],"14744":[57,71],"14751":408,"14826":359,"14894":[57,71],"149":[335,391],"14920":[57,71],"1494":[57,71],"1496":352,"149797":359,"14th":[81,88],"15":[57,72,89,116,128,145,149,163,195,199,304,317,320,341,343,349,353,355,359,362,365,372,374,381,385,399],"150":[380,396],"15019":357,"15082":[57,71],"151":349,"1531":391,"1538":359,"15384615384615385":132,"154696132596":353,"1551":352,"15618":359,"15625":383,"15629":357,"15649":359,"15676":359,"15686289":377,"157":[335,391],"15728":359,"1581":274,"15820":357,"15828":[357,380],"1583":353,"15871":359,"1597":251,"16":[57,98,101,115,128,163,199,212,304,313,318,320,335,353,355,359,362,381,383,387,391,396],"160":[98,359,403],"1609":321,"161":[212,403],"16384":102,"1641":403,"166":391,"1676":[57,71],"1685":359,"1695":391,"169811320754":353,"16khz":[57,98],"17":[128,198,212,295,304,312,313,317,320,349,353,359,362,367,381,384,387,396],"1740":[57,105],"175":[386,403],"17697228144989338":385,"177":148,"1775":212,"1789":359,"1793":[257,359],"1795":352,"1797":359,"18":[57,67,71,88,105,128,211,212,295,312,313,320,353,357,359,362,374,381,384,388,403],"1816":359,"1829":283,"185":[166,299,307],"1855":212,"186":391,"1881":385,"189":[57,71,367],"1899":385,"19":[128,148,199,212,312,320,322,323,324,325,326,327,353,359,374,381],"1904":[57,71,299,307],"1909":[257,359],"1910":22,"192":391,"1926":301,"1945":359,"1946":359,"1954":141,"1955":141,"1960":141,"1962":357,"196607":313,"1968":141,"1970":162,"1972":299,"1973":353,"1976":299,"1980":[141,145,203,206],"1982":141,"1984":[299,307,359],"1985":145,"1986":[344,353],"1988":141,"1989":[145,214],"1990":[53,57,103,145,202],"1991":353,"1993":[319,322,323,324,325,326,327,357],"1994":[86,146,385],"1995":[133,135,137,176,211],"1996":[143,169],"1998":[57,66,72,77,103,131,343,359],"1999":[72,94,131,148],"1a":206,"1b":206,"1e":[48,50,149,318,327,379,380],"1e5":386,"1f":357,"1gram_1":143,"1gram_j":143,"1h15m":408,"1rc1":408,"1rc2":408,"1rc3":408,"1rc4":408,"1st":[145,318],"2":[14,28,33,46,48,49,52,53,57,66,67,71,74,78,81,83,89,90,92,94,98,101,102,103,105,107,109,110,117,118,128,129,132,133,134,138,139,143,145,148,151,153,159,160,161,162,163,166,167,168,170,171,173,174,175,176,179,183,194,195,201,202,206,212,213,218,219,221,224,226,227,228,285,293,294,298,301,302,304,310,311,312,313,316,317,318,319,320,321,322,323,324,325,326,327,329,330,331,332,333,334,335,336,341,342,343,351,352,353,354,355,357,358,359,360,361,362,363,365,366,367,369,370,372,374,377,379,380,381,383,384,385,386,387,389,390,396,397,398,399,400,401,402,406,408],"20":[57,71,111,117,128,145,155,160,162,164,214,293,310,312,313,319,320,338,340,353,357,359,367,369,374,380,381,385,387,388,396,398],"200":[57,71,212,340,341,359,367,380,385,395],"2000":[29,94,341,345,353,359,398],"20000410_nyt":[57,71],"2001":[57,71,103,299,353,365,367,370,408],"2002":[142,148,189,214,302,317,330,359],"2003":[336,398],"2004":[57,62,81,89,94,317],"2005":[62,81,88,131,167,185,201,320],"2006":[57,67,103,145,302,335],"2007":[141,328,351,359],"2008":[67,88,89,374],"2009":[0,148,299,307,406],"201":212,"2010":[57,71,322,323,324,325,326,327,332,333,353,398],"2011":130,"2012":304,"2013":[212,317,365,369,370],"2014":[57,83,195,311,317,348],"2014a":[57,104],"2014b":[57,104],"2015":[57,104,318,338,341],"20150430":359,"2016":[309,318,321],"2017":[198,200,299],"2019":199,"201c":396,"201d":396,"202":367,"2027":[57,104],"203":[299,307],"2031":352,"2039":[57,104],"204":403,"205":332,"2065":403,"2067":396,"2070":[57,71],"2086":216,"209":391,"20k":359,"21":[160,312,313,316,320,353,359,374,381,384,396,403],"210":148,"211852":385,"21578":408,"217":176,"2170":400,"2190":359,"22":[67,81,88,212,304,320,353,359,366,381,384,386,387,388],"221":[357,391,403],"222222":398,"223406":359,"224":403,"2263":391,"227":[57,71],"2271":[57,71],"2278":403,"22nd":[67,88],"23":[57,62,71,165,227,295,304,312,313,320,353,359,374,381,384,391,396],"231":385,"2310":385,"233":[57,71,391],"2341":318,"236":332,"2369":385,"237":176,"23701322869105":377,"239":391,"24":[57,72,89,132,160,202,211,212,295,304,312,313,320,355,357,359,381,387,396],"2417":212,"244":391,"2444":377,"2450142":212,"246":391,"247":353,"248":391,"249":385,"25":[81,93,145,293,304,312,317,320,332,353,359,372,378,381,385,387,392,402,403],"250":408,"2500":355,"2507":274,"252":403,"254":391,"2541":391,"2552":357,"2554":[57,60],"256":[57,71,227,367,385],"26":[57,71,212,295,304,312,313,320,359,381],"262":[57,71],"263":[322,323,324,325,326,327],"2635971381157267":317,"264":391,"265":391,"2650":403,"2656":212,"2673":353,"27":[57,71,90,183,212,295,304,312,313,320,321,359,363,365,380,381,387,388],"270":[357,408],"2721":403,"2732":391,"274":[57,71],"2743":353,"2751":379,"276":391,"277":391,"2770":[57,84],"279":385,"28":[148,317,320,359,374,381,384],"280":408,"28352":384,"284":403,"285":403,"2857":317,"286":391,"29":[72,163,198,304,312,357,359,362,381],"292":385,"292481250360578":132,"292682926829":353,"293":195,"2935416":71,"294":391,"29421":385,"2944":391,"297":391,"299":391,"2a":206,"2b":206,"2d":[57,105,403],"2f":[148,355,357,386],"2gram_1":143,"2gram_k":143,"2nd":[94,176,380],"3":[0,28,29,30,33,42,46,52,53,57,67,69,71,72,78,82,83,87,89,90,98,103,105,107,109,115,117,128,129,132,133,134,139,141,143,144,145,148,159,160,161,163,167,173,174,176,181,183,186,193,195,198,201,202,203,206,212,218,219,221,224,226,228,276,286,293,295,297,298,301,302,303,304,306,308,312,313,316,317,318,319,320,321,322,323,324,325,326,327,328,330,331,332,333,334,338,339,343,351,353,354,355,357,359,360,361,362,363,365,366,367,368,369,370,374,377,380,381,384,385,386,389,390,393,396,397,398,399,400,402,403,404,406,407,408,410],"30":[57,71,111,123,124,148,295,302,304,312,313,338,341,353,359,367,381,385,396],"300":[25,212,224,369,380,396,403,408],"3000":[355,359],"3002":[179,352],"3004":351,"301":[385,391],"305":212,"306":[226,391],"308":[299,307],"309":391,"31":[160,173,212,295,304,309,312,313,359,381,384],"311":[299,307,322,323,324,325,326,327],"312":403,"3128":[343,346],"313":385,"314":391,"3153":391,"316":[299,307],"31607e":384,"31740":357,"31777":357,"318728":344,"32":[212,302,304,312,357,359,374,381],"32154":359,"3238":[57,71],"325":391,"326":391,"327":391,"32k":359,"33":[212,304,317,348,359,365,366,381,384],"3330":318,"3333":[57,105],"333333":398,"33333333":354,"3346":[317,348],"335":403,"34":[148,304,313,359,381,396,403],"340":408,"3412":391,"34200":338,"343":[107,391],"3430":359,"344":391,"345":391,"3481":352,"349":385,"35":[304,313,357,359,372,380,381,396],"351":386,"352962":[57,71],"353":148,"354":145,"3553":391,"3571":355,"35746606334":353,"3574660633484":353,"359":[145,351],"3597":332,"36":[148,212,295,304,312,313,359,381,384,396],"3600":111,"3606":353,"3612":391,"3621":359,"363":391,"366":[367,396],"37":[57,103,211,304,312,313,359,381],"3709":330,"371":226,"375":[148,383,391],"38":[295,304,312,313,357,359],"382":[332,403],"3832":391,"384":391,"39":[66,359,372,384],"3910":318,"3920":317,"394":391,"395":403,"397":212,"3__20000410_nyt":[57,71],"3b":206,"3b6uxpz0l":396,"3class":220,"3f":383,"3p":353,"3point":396,"3rd":115,"3s":353,"4":[32,33,46,53,57,67,71,83,90,94,98,103,105,107,117,128,132,134,135,143,145,160,161,167,173,174,176,183,188,200,204,206,212,217,224,287,295,304,312,313,316,317,319,320,321,322,323,324,325,326,327,328,330,331,332,333,334,338,341,343,348,351,352,353,354,355,357,359,362,365,366,367,368,370,374,377,379,380,381,383,384,385,386,387,389,391,396,398,399,400,402,403,408],"40":[160,312,338,341,357,359,380,396,408],"400":[148,380,397,399],"4000":[310,359,385],"40000":359,"406":[145,304],"409":299,"41":[355,359,380,384,391],"4118":317,"412":391,"4135":317,"414":145,"414213562373095":117,"4167":355,"4199":391,"42":[129,146,160,293,304,357,359,365,368,380,391],"420":[340,341],"4215":391,"42191":313,"423":176,"425":388,"4267":348,"427":403,"428":403,"429":[359,403],"42960":86,"43":[304,313,353,359,396],"430":403,"43072":313,"431":391,"43135":313,"4326":359,"4352":313,"436":299,"4376":353,"4393":321,"43981":369,"43996":212,"44":[295,299,304,312,313,359,396],"44032":313,"4404":391,"4409":353,"44159544":212,"444":[115,391],"4489":317,"449":403,"449489742783178":132,"44k":369,"45":[57,71,111,126,295,312,313,359,367],"450":386,"4543158":403,"456400":357,"456790123456":353,"46":[160,304,312,313,355,359,396],"4600addf38b8":137,"4607":313,"4619":330,"462":391,"466":385,"4667":[357,377],"467":145,"46742":86,"4675":[357,380],"4678":391,"47":[212,304,312,313,359,396],"470":403,"4721":357,"474":372,"4750":212,"4767":391,"477":391,"4792899408284":353,"48":[295,312,313,359,384,408],"482":403,"484":403,"485":302,"487603305785":353,"488372093023":353,"49":[295,313,359,367],"4905":317,"492":391,"492857142857":353,"493":391,"4953":359,"497":391,"4986":384,"4f":[355,357],"4s":352,"5":[0,25,29,33,36,52,53,57,71,77,83,89,90,94,98,104,105,107,111,115,128,132,143,145,147,148,152,159,160,162,163,167,173,174,176,189,195,206,209,212,214,217,218,288,295,302,311,312,313,316,317,319,320,322,323,324,325,326,327,328,330,331,332,333,334,335,340,343,351,353,354,355,357,359,362,365,366,368,370,372,374,379,380,381,383,384,385,386,388,389,390,396,397,398,399,400,402,403,408],"50":[57,71,146,148,155,159,162,304,325,357,359,369,374,386,387,396,406],"500":[57,71,124,160,209,219,224,367,374,386,387],"5000":[176,194,359],"5020":353,"5045":317,"508":391,"5082":359,"51":[142,212,295,304,312,313,359,384,386],"511":386,"512":129,"516":385,"5167":366,"517":391,"5172":366,"52":[142,295,304,312,313,359,384,396],"52266":115,"5228":391,"5249":391,"525":[302,311],"5265":359,"5276":359,"5277":359,"5279":359,"53":[312,359,381,384],"533":[352,386],"5331":359,"534":391,"538":391,"54":[142,313,316,355,359,380,396],"5407":352,"541":403,"5423":391,"5429":359,"5430":359,"5431":359,"5461":391,"55":[295,304,312,313,357,358,359,366,396],"5517":384,"55215":313,"555":160,"556":391,"557":198,"5574":391,"56":[202,295,304,312,313,359,381],"5624":359,"565":94,"5673":321,"57":[304,312,359,381],"571":403,"571007":357,"5714":317,"5728":359,"573":198,"5777":391,"579":[386,391],"58":[212,295,304,312,313,359,391],"5833":355,"5849":391,"5859":391,"588":391,"5882352941176471":317,"59":[295,304,312,313,355,357,359,384,391],"591":332,"5920":317,"59699508":[57,71],"5th":344,"6":[0,52,57,66,71,89,98,105,128,129,148,160,163,173,174,181,206,212,218,312,313,316,317,318,319,320,325,326,331,332,333,343,349,351,353,355,357,359,362,365,366,367,368,374,377,380,381,384,386,387,389,396,398,399,400,402,406,407,408],"60":[126,160,201,312,313,359,361,380,385,396],"600":[209,311,383],"6000":359,"603":212,"60375":[57,101],"6043":[57,71],"606":391,"607":391,"61":[163,202,304,313,359,362,372,384,396,403],"6124":391,"6144":321,"616":391,"62":[304,312,359],"6223":317,"623":403,"625":391,"6270":399,"6281":391,"629":403,"63":[212,304,312,359],"631":403,"6317":374,"633":385,"6349":318,"6369":391,"637":[357,391],"63744":313,"639":[42,57,69,82,105,209,360,403],"64":[212,295,312,313,359,407],"64255":313,"6429":355,"64797":403,"65":[71,357,359,372],"65072":313,"65103":313,"653":359,"65381":313,"655":391,"65500":313,"656":391,"657":391,"6572":391,"65870":369,"659":359,"66":[295,313,359,381,386],"6605":367,"6617":318,"665":[57,71,145,367],"66666":398,"6666666666666667":329,"67":[212,304,313,357,359,381,396],"6732":391,"674":391,"675":391,"6759":391,"67809":369,"68":[304,312,313,359,396],"6830":66,"686":391,"69":[212,312,359],"692":[57,71],"694":391,"697":391,"7":[33,52,57,71,72,98,105,128,129,160,166,173,206,209,212,218,228,302,312,313,316,317,319,320,326,327,331,332,335,350,351,353,355,359,361,362,365,366,372,380,381,384,385,386,387,389,396,397,400,402,403,407,408],"70":[32,304,312,334,343,359,385],"700":[57,71,212],"7000":355,"70039":369,"700439718141092":132,"701":391,"703":391,"7042":391,"705":317,"706":391,"7075187496":380,"71":[304,312,359,384],"71181":369,"713":228,"714":391,"72":[152,304,312,313,357,359,396],"720":381,"722":145,"7227":359,"724":391,"728135":360,"73":[57,71,295,313,359,386,396],"733":195,"735":391,"736":391,"7398":[328,391],"74":[195,336,359,365],"746":391,"75":[28,132,135,137,176,304,312,313,324,359,386,392,396,398],"7500":355,"752":391,"754":391,"756":391,"7565":391,"758":403,"7584":391,"76":[304,312,313,355,359,396],"7616":391,"7661":384,"767":391,"77":[304,312,359],"77098":385,"7725":385,"779":391,"78":[57,71,304,312,359],"7806":379,"783":145,"785fb79":141,"78840":369,"79":[293,336,359],"790":145,"791":[359,391],"7925":391,"7995322418977615":141,"7_e_meio":385,"7b1":408,"7s":361,"8":[39,52,57,93,98,101,109,111,128,145,160,163,167,173,189,194,206,214,218,220,224,309,311,312,314,316,317,319,320,331,332,333,340,353,355,357,359,362,365,366,372,374,377,380,381,383,386,387,389,391,393,396,400,402,403,407,408],"80":[145,148,293,304,359,391],"800":383,"8000":10,"801":353,"805":145,"81":[145,304,312,359],"8199999999999998":141,"82":[304,312,359,386],"8211":391,"822":145,"823":29,"83":[312,313,359,391,396],"8316":391,"832":145,"833":145,"834":391,"839":66,"84":[145,312,313,355,359,391,396],"8439":391,"845":212,"85":[212,304,359,403],"851":391,"853":[57,67,145,359],"8545":391,"856":145,"857":403,"858":145,"86":[304,359,365,386],"8668":317,"867":145,"868":[380,391],"869":145,"87":[94,304,359],"87029140014214645":385,"875":93,"876":391,"8773584905660":353,"88":[141,145,160,295,301,302,303,304,306,308,312,313,359,386,393,396],"880":145,"8804":359,"8851":[365,370],"8859":[57,61,101,109,215,385],"886":403,"8879":71,"888":[115,391],"88888888888":[380,393],"889":[145,322],"88n":312,"89":[313,319,359,396],"89019189765458417":385,"893":145,"894":72,"896":145,"897":385,"8s":366,"9":[52,57,71,75,82,89,90,103,109,128,132,160,163,166,173,206,212,219,228,301,311,312,313,316,320,328,331,332,333,353,355,359,362,365,366,374,380,381,384,385,386,389,396,400,402,403,407,408],"90":[313,359,386,387,391,396],"900":145,"9000":160,"905":145,"906":386,"91":[304,313,359,381],"911":[145,403],"913":145,"917":145,"92":[304,359,381,386],"9200":317,"921":145,"922":145,"9227":391,"926":145,"93":[304,313,359,386],"932":380,"933":145,"9342":391,"9349_d\u00f3lare":359,"9352":387,"9353":387,"938":323,"93winkler_dist":145,"94":[304,359],"943":145,"944":145,"9444":317,"9452_d\u00f3lare":359,"9469":391,"947":145,"95":[304,357],"951":137,"956":145,"96":365,"961":145,"961783439490":353,"9654":379,"967":145,"970":145,"9734":359,"9780596516499":408,"9784873114705":408,"98":[313,384,396],"982":145,"9865":359,"988":353,"9880":359,"99":[212,313,366,396,403],"997":160,"9977":379,"999":[333,380],"9999":357,"9_":118,"9b1":408,"9b2":408,"\u00aa":189,"\u00b2":189,"\u00b3":189,"\u00b5":189,"\u00b9":[117,189],"\u00ba":189,"\u00bc":189,"\u00bd":189,"\u00be":189,"\u00df":189,"\u00e0":[189,385],"\u00e0\u1450\u144d\u18c1\u185d\u1f70\u2ca3\ua32f":301,"\u00e1":189,"\u00e1\u020b":301,"\u00e1\u03b6\u10d3\u19a1\u0d7d\ufcda\u1300\u2d8f":301,"\u00e2":189,"\u00e3":189,"\u00e4":[117,189,365,370],"\u00e4\u1384\u18a5\u1a43":301,"\u00e5":[189,301],"\u00e5\u1ee7":301,"\u00e6":[117,189,365,370],"\u00e6lfric":107,"\u00e6thelr":107,"\u00e7":[117,189],"\u00e7\u062d\u0e1d\ufc05":301,"\u00e8":[117,189],"\u00e8\ua14e\u1e25\u1190":301,"\u00e9":[189,301,365,370,385],"\u00e9\ua0de\ufc1d\u15ab":301,"\u00ea":[189,365,370],"\u00eb":[117,189,301],"\u00ec":[117,189],"\u00ed":[117,189],"\u00ed\u1a3c":301,"\u00edndice":359,"\u00ee":[189,301],"\u00ee\ua472\uaac2\u1bca\u1549\u0452":301,"\u00ef":189,"\u00ef\u12b8\u1410\uabda\ua27c":301,"\u00ef\ua45c":301,"\u00f0":[189,301,365,370],"\u00f1":[189,365,370],"\u00f2":189,"\u00f2\u182d\u310d":301,"\u00f3":[189,301,365,370,385],"\u00f4":[189,301],"\u00f5":189,"\u00f6":189,"\u00f8":[117,189,301,365,370],"\u00f8\ua57e\u1e59\ua5c3":301,"\u00f9":[117,189],"\u00f9\ufb06":301,"\u00fa":[117,189],"\u00fb":[189,301],"\u00fc":[189,365,370],"\u00fd":189,"\u00fe":[113,117,189],"\u00ff":189,"\u00ff\u16b8":301,"\u0101\u1f97\ua546":301,"\u0101\u3170\u31f4\u1c15":301,"\u0103\u066e\ufd52\u168d\u1a2a":301,"\u0105":[301,365,370],"\u0109\u07d4\u0b20\u2d8e":301,"\u010d\u1f66\u2c3f\u090a\u023f\u2da1":301,"\u010f\u10f6\ua21c":301,"\u0111\uab98\ufdbe\u1889":301,"\u0113":301,"\u0115\u1465\u0928\u31f8\ua65d\ufb36\u03b2\ua757\u1226\u04dd\ua910\u1c23\uaa47":301,"\u0119\u02b3":301,"\u011d":301,"\u011f\u1bd7\ufc46":301,"\u011f\u3172":301,"\u0121":301,"\u0123\u0450\ua6cf\ufc0b\u011b\u0c0e":301,"\u0123k":301,"\u0127":301,"\u012b\ua84e\ufef8":301,"\u012d\u0d8c\ufc30":301,"\u012f":301,"\u0131\u024f":301,"\u0135\u0252\uff4c\u00e6":301,"\u0142":[301,365,370],"\u0144":[365,370],"\u0144\u12e2":301,"\u0146":301,"\u0148":[365,370],"\u0148\u1a51\ufebd":301,"\u014d\u16a7\u00e4\ua765\ua5a2":301,"\u014d\ua376\u1768\u2d4c":301,"\u0151":[365,370],"\u0151\ua1ae\u03e9":301,"\u0153":189,"\u0155\u03d9\u15bb\u1da8":301,"\u0159\u1436":301,"\u015b":[301,365,370],"\u015d\u1248":301,"\u0161":189,"\u0163":301,"\u0165\ufda5\u075f\ua3dd\ufd8d":301,"\u0167\u1662\u03cb":301,"\u0169":301,"\u0169\u0c26\u1d85\u1d39\u01d6\u138f\u0b1b":301,"\u016b\u2111\u076d\uabcb\u028b\u1d61\ufb6f\ua593\u1ed5":301,"\u0171":301,"\u0175\u0436":301,"\u0177\u01c3":301,"\u017a\u2cd7":301,"\u017a\ud7cbh":301,"\u017e":301,"\u017f\uab78\u2dd5\ua80e\ua98c\u0e9c":301,"\u0180\u0c12\u10f7\u2c58\ua89f\u117f\u313a\u0927\u1a03\u1bde":301,"\u0183":301,"\u018c\u0447":301,"\u0192":189,"\u0192\ua29f\u0377":301,"\u019a\u0d13":301,"\u019b":301,"\u01a5\u05dc\u0c24":301,"\u01aa":301,"\u01ad\u190d\u2096":301,"\u01b4":301,"\u01b4\u0a5b\ua73d\u019a\uaa6d":301,"\u01bd\ufb87\u1385":301,"\u01be\u1e31\ua398":301,"\u01c1":301,"\u01c6":301,"\u01c9\u2d17":301,"\u01ce":301,"\u01d0\u146c\u183e\ua8a8":301,"\u01d2":301,"\u01d4\u2ca5\ufe79":301,"\u01d6\u3065\ua8f2\u1d8f\uabcc\u11fb":301,"\u01dc":301,"\u01df\u30af\u1648\ua5ae":301,"\u01e1\u2ccf\u15e6":301,"\u01e3\u1180\u098b\u1b0a\u2c40\u18d3":301,"\u01e5\u097f":301,"\u01e5\u0cab":301,"\u01e7\u2ce3\u0529":301,"\u01eb":301,"\u01ed\ufb54\ufd17\u1636\ufdf1\uff59\ua141\uaae6":301,"\u01ef":301,"\u01f3":301,"\u01f5\u09e1":301,"\u01fb":301,"\u01fd\ua519\u31fc":301,"\u01ff\ua3ea\u04d9\ua30b":301,"\u0201":301,"\u0205":301,"\u0207\ua28c\ua81f":301,"\u0209":301,"\u020d":301,"\u020f":301,"\u0211":301,"\u0215\u12d9\u178a\u1e19\u1fa3":301,"\u0217\u0623":301,"\u021d\ua0e9\u1742\ua2d9\u04ef":301,"\u0221":301,"\u0225\ufc2a\u17ab\u1682":301,"\u0227":301,"\u022d\uaa16\u306d\uaa0c":301,"\u022f\ufc03\u1341\u03ce\u15fd\ua6e4\u0493\u1444":301,"\u0231":301,"\u0231\u311f\u1d19\u1212\u0792":301,"\u0233":301,"\u0237":301,"\u0242\u05e6":301,"\u024d":301,"\u024f":301,"\u0252":301,"\u0253":301,"\u0255":301,"\u0256":301,"\u0259":[365,370],"\u025a":301,"\u025b\u0240\ud7ec\u1d86\u1f97\u043f\u1745\ufdbb\ua6de\u1119\u07e7\u1a09":301,"\u025c\u132d\u0725":301,"\u025c\u1402":301,"\u025f\u14d7":301,"\u0261":301,"\u0262\u133a":301,"\u0263":301,"\u0263\u0281\u1d62\ua17d\u3166\uab03":301,"\u0264\u31fd\ua395\u0280\u0cac\u11be\u140a\u1c11":301,"\u0268\u100d\u1509\u2c9d\ua612\ua4dc":301,"\u026a":301,"\u026b\ufcc0\u1302\ufc4c":301,"\u026c\u1d84\u0572\u1f73\uffd7":301,"\u026f":301,"\u026f\u062f\u1561\ua186":301,"\u0271\ua3eb\ua21a\u143b\u14b6\ua777\u0cb1\ud7c3\u1c70\ua946\ufb67":301,"\u0272\u174b\u2cc7\u0920":301,"\u0274\u1078":301,"\u0276":301,"\u0278":301,"\u027a":301,"\u027d\u30ed":301,"\u0282\ud7b6\u081a\u1d67":301,"\u0284":301,"\u0285":301,"\u0288\ua00a":301,"\u0289":301,"\u0289\u12ed\ua14d":301,"\u028a\ua47e":301,"\u028a\ufce7":301,"\u028c\u1da7\ufb31\u0da9\u1a22\u1e23\u14e8\u1a36":301,"\u028f\u08ac\ua6ba":301,"\u0290":301,"\u0291\u15b0\ua852":301,"\u0292\u0b38":301,"\u0292\u1d38\u1237\ufd2a\u1e63":301,"\u0293\u092e\ufcb2\u1e4d\ua116":301,"\u0295":301,"\u0296\u1647":301,"\u0299\u1486":301,"\u029a":301,"\u029d\ua3ec\u3077":301,"\u029e":301,"\u02a3\ufb25\ufcbe\uff9b":301,"\u02a5\u0b25\u0772":301,"\u02a7\ua2a8":301,"\u02ad\u12be":301,"\u02ae\u03eb":301,"\u02b2":301,"\u02b5":301,"\u02b7\ua010\ufe7d\u0430\u2d20\ua358":301,"\u02b8\ufb86":301,"\u02ba":301,"\u02bc":301,"\u02bf\u1fc6\u1fa1\u0e97\u03cb":301,"\u02c1":301,"\u02c6":189,"\u02c6\u03c2\u04bd":301,"\u02c8\u00e6":301,"\u02c8\u00e6m\u0259z\u0252n":301,"\u02c8\u1ee1":301,"\u02cc\uff55\ua0e8\u1c1d":301,"\u02cd\ua931\u1108\ua5d3\u1f21\u0774\u111d":301,"\u02d0":[301,365,370],"\u02e2":301,"\u02e4\u12a6":301,"\u0371":301,"\u0373":301,"\u037b\u043b\uab30\u0aa2\ua2ed":301,"\u037c\u020b\u14e9\u2dd4\ua1f2\ua98b\u183a\ufdac\ua651\u1f82\u029e":301,"\u037c\u1588\u11b5\u1508\u1a06":301,"\u0390":301,"\u03ac\u123c":301,"\u03af":301,"\u03b1":[189,365,370],"\u03b2":[189,318,365,370],"\u03b2\u1265\ua347":301,"\u03b3":[189,365,370],"\u03b3\ua812":301,"\u03b4":189,"\u03b4j":325,"\u03b4v":326,"\u03b5":189,"\u03b6":189,"\u03b6\ua1fc\ua082\u1da0\u1d71\u1ed1\u0260":301,"\u03b7":189,"\u03b7\u1c4e":301,"\u03b8":[142,189],"\u03b8\u144c":301,"\u03b8in":142,"\u03b9":189,"\u03b9\ufc20\u1743":301,"\u03ba":[189,365,370],"\u03bb":[189,365,370],"\u03bc":[189,301,365,370],"\u03bd":189,"\u03bd\u04ff":301,"\u03be":189,"\u03bf":189,"\u03bf\ua932\ua3f9\uab23\u2c38\u1d3b":301,"\u03c0":[189,301,365,370],"\u03c1":[189,365,370],"\u03c1\u1d54\ua616\u0213":301,"\u03c2":189,"\u03c3":[189,365,370],"\u03c4":[189,365,370],"\u03c5":189,"\u03c5\u1422\u2ddb":301,"\u03c6":[189,365,370],"\u03c6\u1d49\u2c34\u1b0d":301,"\u03c7":189,"\u03c7\u0507\u122b\u1a49\u0852\u0642":301,"\u03c8":[189,365,370],"\u03c8\u2d3b":301,"\u03c9":[189,301,365,370],"\u03c9\u1081\u1146\uaba3\ua4f9\u112b\ufb94":301,"\u03cd\u0769\u1279":301,"\u03d0\ufea0":301,"\u03d1":189,"\u03d2":189,"\u03d6":189,"\u03d6\u11fd\u2c4e\u3109\uff9c\u1293\u3139\u026e\u1e99\ua489":301,"\u03db\u1216\u2d59\u099b\u1995\uaa70\u014b\u100c\u14c9":301,"\u03dd":301,"\u03df":301,"\u03e5\ua66e\ua149\u0266":301,"\u03e7\ufb7c\uaa7f\ufb91\u1b08\u1cf6\ua5b3\u0daf":301,"\u03ed":301,"\u03ef\u0195":301,"\u03f2\u0c36\u0eb2\u131a\u1d3d\ua26f\u1e69\u1198\u1214\u12e1":301,"\u03f2\u189f\u07d0\ua3e0\u115b":301,"\u03f3\u1e33":301,"\u03fb":301,"\u0430":301,"\u0431\u0443\u043c\u0430\u0436\u043a\u0443":209,"\u0433\u31b2\u10e8":301,"\u0434\u0432\u0430\u0436\u0434\u044b":209,"\u0437":301,"\u0438":[209,301],"\u0438\u043b\u044c\u044f":209,"\u0438\ua81b\uffcf":301,"\u043a":301,"\u043c\u172c\u1b8b\u30a2\u1631\u01d8\uff52":301,"\u043c\u1b94":301,"\u043d":301,"\u043e\u0442\u043e\u0440\u043e\u043f\u0435\u043b":209,"\u043e\u1406":301,"\u043f\u0435\u0440\u0435\u0447\u0438\u0442\u0430\u043b":209,"\u043f\ua0b1":301,"\u0440\u0b10":301,"\u0441\u14f4\ufc07\u132b\uab5c\u196b\u0b17\ufb8d\ua781":301,"\u0442\uffa8\ufc96\u0a2c":301,"\u0443\ua36e\u11b6\u3142":301,"\u0448\u0677\u051f\u17ae":301,"\u0449":301,"\u044b":301,"\u044c\ua380\u1681\u1f40\ua470\ufb57\ua76d\ufbe6":301,"\u044e\u316b\u2184\ua303":301,"\u044f":301,"\u0451\u1641":301,"\u0453":301,"\u0454\ua753\u1c17":301,"\u0454\uab01\u3069\ua6d2\u03b3\u131e":301,"\u0457\u046b\u2d0a\u163d\uab5a\u04df\ua383":301,"\u0458\u0177\u068c":301,"\u0458\ufc54\u06ae":301,"\u045a":301,"\u045d":301,"\u045e":301,"\u045f\u1527":301,"\u045f\u1eb9\u03e9\u0e88\ua617":301,"\u0461":301,"\u0463":301,"\u0463\u1d47\ua1b8\ufecb":301,"\u0465\ua995\u2d0c\u1476":301,"\u0467\u0973\u1d9b\u12c0\u018d\u02ac\u05f0\u0930":301,"\u046b":301,"\u0471\u1853\ua2e1\u072f":301,"\u0471\u1fa5\u0455\u076b":301,"\u0473\u1954":301,"\u0475":301,"\u0477":301,"\u047b\u00ed":301,"\u047d":301,"\u047f":301,"\u0481":301,"\u048d\ua335\u15c7":301,"\u0499":301,"\u049d\u18da\u026d\uabaa":301,"\u04a5":301,"\u04a5\u0a72":301,"\u04a9":301,"\u04af\uaaa0\ua8ad":301,"\u04b1":301,"\u04b3":301,"\u04b7\ua4f6\ufd9a\u3088":301,"\u04b9":301,"\u04bb\ua1bb\uab3d\u11bc\ua808":301,"\u04bf\u1fd6":301,"\u04c2":301,"\u04ca\u0632\ua84c\u2d1f\u12ac\uff73":301,"\u04ce\ua34e\uaaa8\u0203":301,"\u04cf\u3148\u0d7c\ua56d\uff98":301,"\u04d1\ua099\ufdf3\u02b1":301,"\u04d1\ufc36":301,"\u04d5\u00e0\u157f":301,"\u04d7\u114d\u145f\u14ca\u1f65":301,"\u04dd":301,"\u04df\u14cc":301,"\u04e5\u1f64":301,"\u04e7":301,"\u04e7\u1494\u16c1":301,"\u04e9\ua3ab\ua339\u316c\u1012\u1e6b\uff83\u0436\ua3b1":301,"\u04eb\u0e33\u0799\ua99b\u10e7\u310e\ua71d\u117d":301,"\u04eb\u1a47":301,"\u04ed\u071a\u3076":301,"\u04ef":301,"\u04f3\u2d66\u30d6\u0936\u2d1c\ufe9e\u146f":301,"\u04f5":301,"\u04f5\u1e81\ua891\u0c1c\ua405\ufcc9\u0259\u2dc9\ua0f8":301,"\u04f9":301,"\u04fb\u1321":301,"\u04fd\ua797\ua84f":301,"\u0501\u2da0":301,"\u0505":301,"\u0505\u111b\ua1ce":301,"\u0509\u0b0f\u0c27":301,"\u050b\u00e2\u30ab\ua7a9":301,"\u0511":301,"\u0513\u0f4a\u12c9\u1c06":301,"\u0513\u1f94":301,"\u0515":301,"\u0517\ua31b":301,"\u051b\u3179\u1f30":301,"\u051d\u1ebf\u15aa":301,"\u0521":301,"\u0523":301,"\u0523\ua2d6\ua39b\u152a":301,"\u0527":301,"\u0529\ua533\u306a":301,"\u052b\ua3d7\u1bc7\ua572\u2d57":301,"\u052d\ua3e5\u01b0\u1347\ua362":301,"\u052f":301,"\u0561\u0771\u199f\u2c47":301,"\u0562\ua2d5\ua935":301,"\u0564\u130f":301,"\u0565\u2d06":301,"\u0565\ufd7e":301,"\u0566":301,"\u0567\u199c":301,"\u0568\u0d09\u1eb3":301,"\u0568\u19b0\u1581":301,"\u056a\u18c6\u16f7\u01a1":301,"\u056b\ua56a\u167b":301,"\u056c\u050f\ua5e2":301,"\u056d":301,"\u056e":301,"\u056f\u0570":301,"\u0571\ua0c9":301,"\u0573\ua318\ua990\uaaea\ufdf4\u01d0\ua751\u1c07\ufdbf\ua0da\u03e5\u1705\ua2be":301,"\u0574\ufcdc\u029b":301,"\u0575":301,"\u0577":301,"\u0578\u2c33\u1d72\ufba3":301,"\u057b\ua50d\ua993":301,"\u057f\u129f":301,"\u0581\u1232\u1edb":301,"\u0581\u1bc5":301,"\u0582\ua104\ua9a6":301,"\u0584\u022b\u11a7":301,"\u05d1":301,"\u05d5\u0a91":301,"\u05d7":301,"\u05da":301,"\u05db":301,"\u05e0\u3056\ua287\ua1d0":301,"\u05e1":301,"\u05e2\u1f45":301,"\u05e3\u0125":301,"\u05e9":301,"\u05ea\u1f14\u2d01":301,"\u05f2\ua994":301,"\u0620\ua096\u1ef5\u2138":301,"\u0622\ua095\u068b":301,"\u0627\u0644\u0639\u0631\u0628\u064a":309,"\u0627\u0644\u0643\u0644\u0645\u0627\u062a":309,"\u062a\u0635\u0646\u064a\u0641":309,"\u062a\u0641\u0627\u0639\u064a\u0644":201,"\u062b":301,"\u062c":301,"\u0630\u1e67":301,"\u0633\u062a\u0627\u0646\u0641\u0648\u0631\u062f":309,"\u0633\u066f\ufec5":301,"\u0634":301,"\u0636\u2d60\u01bb\u1182":301,"\u0637\u105d\ua33a":301,"\u0638\u1fb0":301,"\u0639\u0645\u0644":394,"\u063a":301,"\u0641\uab8f\u1dae\u00e2":301,"\u0644":309,"\u0644\u0644\u0643\u0644\u0645\u0627\u062a":309,"\u0645\u1e8b\ua1d2\u1881\u0c2f":301,"\u0647":301,"\u0647\u0630\u0627":309,"\u0647\u0648":309,"\u0648":201,"\u0649\ua10a":301,"\u064a\u0639\u0645\u0644":[199,394],"\u064a\ua0be\ua201\u045a\u079f\u0dbb":301,"\u0673\ufcf4\u0782\u07a1":301,"\u0675":301,"\u0678":301,"\u067a\u01f3":301,"\u067c\ufed1":301,"\u0684\u3137":301,"\u0686\ufd8a\uff66":301,"\u0688":301,"\u068f\ua2dc\u1750\ufdb6":301,"\u0693\ua275":301,"\u0694\ua1f5\ua659":301,"\u069c\u119e":301,"\u069d\uff47\u1193":301,"\u069e\ua5bb\u00f3\ufbde\u159e\ua9a8\ua7a5\u1e7d\ufe73\u1a38\u2cb7":301,"\u06a0\ua523\u0d9b\u1405\u15b5\uaa4b\ufdb4":301,"\u06a1":301,"\u06a2":301,"\u06a4\u3057\u1571\u0580\ua45d":301,"\u06a6":301,"\u06ac\ufeb1\u09bd":301,"\u06b2":301,"\u06b3":301,"\u06b4":301,"\u06b5\u10f8":301,"\u06b8\u16f2\u1b0c\ua3d0":301,"\u06bc\u1149\u17a1":301,"\u06bd\u0c23\u1179":301,"\u06c2":301,"\u06c3":301,"\u06c7\u1c01":301,"\u06c8":301,"\u06ca":301,"\u06cb\ua06f":301,"\u06cek":301,"\u06d0":301,"\u06d5\u2c5b":301,"\u06e5\u141f\ua05a\u1a41":301,"\u06fc\u1e4b\u1014":301,"\u0710\u15c6\u11cb\u158c\u15d6\u1584\u0259":301,"\u0712\u1855\u1462\ua2b5":301,"\u0724\ua2d0":301,"\u072d":301,"\u074d\u161b\u30d4":301,"\u074e":301,"\u074f\ufe86\u028b":301,"\u0752":301,"\u0753\u04bf":301,"\u0755\uab51\uffcd":301,"\u0758\ud7f1":301,"\u075a":301,"\u075d\ua9a0\uffa0\u03c0":301,"\u075e\ud7b7\u1e2d\u210e":301,"\u0760":301,"\u0761\u15fc":301,"\u0763\u0646":301,"\u0764":301,"\u0766\ua72b\u19bc\u161c":301,"\u076e\u0999\u0225\u0444\u113f\ua01a\u1142":301,"\u0773\u0c25\u130a\ua08f":301,"\u0775\u09aa\u2d22\u021f\u0267":301,"\u0777":301,"\u0778":301,"\u077e\u1d3a\ua888":301,"\u0781":301,"\u0784":301,"\u0785":301,"\u0786\u207f\ufb2f\u04f3":301,"\u0789\ua3d2\uaaac\u1f02\ua42b\u00df\u11a1":301,"\u078b\u2cc1\ua52e\u1b8c\ua34a":301,"\u078c":301,"\u0790\ua24e":301,"\u0793\ufdb7\ufeeed\ua44d\u06ad":301,"\u0796\u0768":301,"\u0798":301,"\u079a\u1971\ua6c7\u1205\u0574\ufea9\u0d8d\u1d23\u31f2":301,"\u079b":301,"\u079d":301,"\u079e\u31b6\ua96d":301,"\u07a0\u1fa7\u31a6\ua21b":301,"\u07a2\ua250":301,"\u07a4\u1b25\u0f52\u2d48\ua288\uaa6c\uab25\uff49\u1a12\u1206":301,"\u07ca\ua128":301,"\u07d1\uab57":301,"\u07d6":301,"\u07db\u1d81\uab54\u1053":301,"\u07dd\u029c\u0c88\u1850\u1d93\u09b0\uff85\u1a2f\u063d\u01a3\u1f85\ua0b8\u067f\u3044\ua30e":301,"\u07de\u0559":301,"\u07e4\u1e98":301,"\u07e5":301,"\u07e6\ufb77\u0a28\u138b":301,"\u07e9\u0288":301,"\u07ea\u1e11\u013c\ua799":301,"\u0801":301,"\u0803":301,"\u0806":301,"\u0808\u03f8\ua341\u17af\u025b\u3126":301,"\u080c":301,"\u080e":301,"\u0810\u2d62":301,"\u0811":301,"\u0812\u0cae":301,"\u0841\u1430\u112c":301,"\u0842":301,"\u0843\u2d14\u2d11\u1d4b\ua93c\u084b\u0495\u0856\u1155\ufcbf\u0d4e":301,"\u084e":301,"\u0851\uaadb":301,"\u0855\u0ec2\u3042\u1d00\u2d0b\u0477\u1601\uaabc\ufece\uffc3\u0858":301,"\u08a1\uabdc\ua43d\u1e3f\u1827":301,"\u08a8":301,"\u08ab\u156e\u0bb7":301,"\u08ad\ua038\ua535\uab4c\u0b9a\u14ee\ua998\u30c6":301,"\u08ae\u1e53\u1b47":301,"\u08af\u1f96\ua3ca\ufc19\u2dac":301,"\u08b2\u1620\ufdf2\u1834\u1534\ua8b2":301,"\u0904\ua21e\u1455":301,"\u0908\u1d0c\ufee1\ua333\u150e":301,"\u090e\u0c89\u16bd\ua0cd\u03ba\u0d1c\u16b0\u30c3\ua1b6":301,"\u0910\u1157":301,"\u0911":301,"\u0913":301,"\u0915\u091c\u1f34\u0129":301,"\u091a\u2d88":301,"\u0925\u050b\u1bdf\u3123":301,"\u092a\u0e29\u1a00\u3135":301,"\u092b\u129b":301,"\u0932":301,"\u0934\u0a73\u01c9":301,"\u0939\ufc86":301,"\u095c\u3168\ufc73\ua04d\uff44\u0912\u044f\u2dd0":301,"\u095d\u0726\u1d40\ua77a\ufea8":301,"\u095f\u2dc1\u02d1\ua444\ua5e7\u06af":301,"\u0960\ua611\u2d4e\u3081\u0644\uab38\ua355\u145c":301,"\u0974":301,"\u0978":301,"\u097a\ua8a9":301,"\u097b":301,"\u097d\u025d\u1520\u1703\u0759\ua667\ufb6d\ua699":301,"\u0985\u3136\ufc9c\ua1ab":301,"\u0986\ufb4c\uaabd\ufea7\u02b9\ufe90\u17b3\ua34c":301,"\u098a\u1080\u16f8":301,"\u098c":301,"\u099a\u010f\u0db6\u14e5\u010b\u14bc":301,"\u09a1":301,"\u09a3\uff96\ud7b0\u3122\ua6c6\u1864\ua2ac\u04a3\ua731":301,"\u09a8\u2c54\u0d27":301,"\u09ab\u1959\u2db8\u158a\ua1c2\u2c79\ufeb3":301,"\u09ad":301,"\u09b7":301,"\u09b8\ud7c1":301,"\u09f0\u2137":301,"\u0a06\ufc4d\ua0e1\ufc1a\u1996":301,"\u0a08\u11e9\ufd07\u0d35":301,"\u0a0a\u1b21":301,"\u0a0f\u112a":301,"\u0a17\u1113\u06b7":301,"\u0a18\u02be":301,"\u0a1c":301,"\u0a1d\u2c87\ua9e2":301,"\u0a1e":301,"\u0a1f\u14a0\u0791\u1576\u17a4\u1d02":301,"\u0a23\u1605":301,"\u0a24\u0cad\ua215\u1950":301,"\u0a27\u0b89":301,"\u0a30\ua555":301,"\u0a32\u019e":301,"\u0a33":301,"\u0a35\uaa82":301,"\u0a36":301,"\u0a39\u0117\u2c3a\u0815\ua409\ua870":301,"\u0a5a\u18e2\u12ef":301,"\u0a5e":301,"\u0a89":301,"\u0a8c":301,"\u0a94\ua737\ufea2":301,"\u0a99\ua93f":301,"\u0a9b":301,"\u0a9e\ufcbc":301,"\u0aa5\u1345\ua718":301,"\u0aa6":301,"\u0aa8\ua9e0\ua062\u07cc":301,"\u0aab":301,"\u0aac\u1d6e\u1469":301,"\u0aad\u140c\ua30c\ua7a7\u0809\u317e\ua9fb\ua9ec\ua59a":301,"\u0aae\u0b09\u04cc\u03dd":301,"\u0ab0\u1663\ud7bf\u1a0e":301,"\u0ab3":301,"\u0ab6":301,"\u0ab8":301,"\u0ae0":301,"\u0b05":301,"\u0b08\u12ea":301,"\u0b0c\ua301":301,"\u0b1d\u1162":301,"\u0b2a\ua2d1":301,"\u0b2b\u168f\u0579":301,"\u0b2f":301,"\u0b33\ua18f":301,"\u0b36\u317b":301,"\u0b5d":301,"\u0b5f":301,"\u0b71\u1354s\u1e6b\ua78c":301,"\u0b85":[57,104],"\u0b86":[57,104],"\u0b87":[57,104,301],"\u0b88":[57,104],"\u0b89":[57,104],"\u0b8a\u100f\u1d4d\u317c\ufedc":301,"\u0b90\u1f03":301,"\u0b94\u1a29":301,"\u0b95\u1223":301,"\u0b99\u1d68":301,"\u0b9c":301,"\u0b9f\u1770\ua80f\u1ef3\u02af\u1d1f":301,"\u0ba8\u106e":301,"\u0ba9\ufd37":301,"\u0baa\ufd04\u0800\u1f43":301,"\u0baf":301,"\u0bb2":301,"\u0bb4\ufc23\u1d29\u1ee5":301,"\u0bb5\ua077\ua814\ua989":301,"\u0bb8\ua111":301,"\u0c09":301,"\u0c15":301,"\u0c1d":301,"\u0c20":301,"\u0c21\u1b14\ua9a9":301,"\u0c28":301,"\u0c2a\u2c7a\u01ab":301,"\u0c2e\ufd13\u185f":301,"\u0c33":301,"\u0c35\ua739\u1916":301,"\u0c39":301,"\u0c59\ua71b":301,"\u0c86":301,"\u0c87\u0b07\u110d\uabc5\u1f7b":301,"\u0c8a\u13f8\u03af\u1195\u1668\u1b19":301,"\u0c8b":301,"\u0c8c\u2cc5\u2ca1\ua575\ua2fa":301,"\u0c8f":301,"\u0c92\ufcfc":301,"\u0c93\u0cb6\u19b3":301,"\u0c95\u0f62\ua27e":301,"\u0c99":301,"\u0c9f":301,"\u0ca0\u0c1f\u04c8\u0497\ua78e\u0bae\u1154":301,"\u0ca1\u10dd\ua56f":301,"\u0caf\u0294\u14aa\ua72b":301,"\u0cb3\ua655\u2cd5\u16f3\ua1ba":301,"\u0cb7\u1e95":301,"\u0d05\u2d03\u2d56\u03c7\u12f9":301,"\u0d0a\u0165\u1213":301,"\u0d0b\u2c8f":301,"\u0d0e":301,"\u0d0f":301,"\u0d14\u1db2":301,"\u0d16":301,"\u0d18\u0e07\u1269\uab90":301,"\u0d19":301,"\u0d1f\u2ddd\u0e95\u077c":301,"\u0d22":301,"\u0d25\u1e2f\ua57f\u12f5\u072c":301,"\u0d26":301,"\u0d29\uaa73\u0c3d\u0256\ua126":301,"\u0d2b":301,"\u0d30\ua81c\u037d\u1c5b":301,"\u0d31\ua260\ua2f7\u04c8":301,"\u0d33\ua18a\u04a1":301,"\u0d37\ua8b0\u021b\uaab9":301,"\u0d38":301,"\u0d39":301,"\u0d3a\u170e\ua3af\u0b61\ua006\u0185\u1202\u127d\u14a1":301,"\u0d3d\u1e83":301,"\u0d60":301,"\u0d7b\ua241\ua460\u2c83":301,"\u0d7f\ua1ff":301,"\u0d85\u01bf\u11c5":301,"\u0d87":301,"\u0d88\ufeb4":301,"\u0d89\ufb90\u0b0a\ufed3":301,"\u0d8f":301,"\u0d91":301,"\u0d94":301,"\u0d9d\u027f\u1e9d\ua084\ufc51\uff8c":301,"\u0d9e\ua3fe":301,"\u0da0\ua741":301,"\u0da4":301,"\u0da5\u3154\u15d4\uab44\ua0ca\ua1bc\ua5d1\u16c0\ua486\ua022":301,"\u0da6\u1e43\u12ca\ud7ea\u0217":301,"\u0dac":301,"\u0dae\u1267":301,"\u0db5\u0a13\ufb72\u0a14":301,"\u0dba":301,"\u0dbd":301,"\u0dc1":301,"\u0dc5":301,"\u0e01":301,"\u0e03\ua68f":301,"\u0e05\u1f13\ua178\u1c7b":301,"\u0e08\u1da3\u077f\uab88\u3116\u2d2d\u00ea\ufc35\ua6bc":301,"\u0e0a\ua51b\uaaf2":301,"\u0e12\u03b0":301,"\u0e14\u16dd":301,"\u0e18\uab73":301,"\u0e1b\u131d\u09b9":301,"\u0e1c\u0d9a\ua468\u2c41\u0987":301,"\u0e1f\ua683\ua05e":301,"\u0e20":301,"\u0e22\u1617":301,"\u0e24\ufd71":301,"\u0e2b\u2c59\u18c4\u1e55\ua246":301,"\u0e2f":301,"\u0e40\u0685":301,"\u0e41":301,"\u0e82\u06a5\u315f\u2d54\u01e9":301,"\u0e84":301,"\u0e87\ufd98\u03bf\u134f\ufee3":301,"\u0e9b\u13f9\uaa8e\u0445\u1ef3\u149e":301,"\u0ea2\u03e3":301,"\u0ea5\u14e4\u1f07\u1bc0":301,"\u0eab\u1f75\ua209q\u0183":301,"\u0eaf":301,"\u0eb0\u0155":301,"\u0ebd\u304d\u11f0":301,"\u0ec0\u18a2":301,"\u0ec3\ua793":301,"\u0ec4":301,"\u0ec6":301,"\u0edc":301,"\u0edf\ua6c1":301,"\u0f00":301,"\u0f40\ud7cf\u0572":301,"\u0f41\u1292\u12a5":301,"\u0f44\uffaf\u1593\ua00e\ua360\u0e0e":301,"\u0f46":301,"\u0f4b\u143c\u1e0f":301,"\u0f4e":301,"\u0f51\uff86":301,"\u0f55":301,"\u0f56":301,"\u0f57":301,"\u0f5d\u1f95\ua373":301,"\u0f68":301,"\u0f69":301,"\u0f6b":301,"\u0f8a\u1c1b\u02ca":301,"\u1000\u1343\u3134\ua783":301,"\u1002\uaa44\u2c4a\u115f\u1863\ufc3c\u12a0":301,"\u1003\ua233":301,"\u1004\ua272\ua992\ufb4d":301,"\u1005\u1f00":301,"\u1008":301,"\u100a\u1d07\u1a14\u1851\ua40e\ufd2c":301,"\u1010\ua1dd\uab11":301,"\u1018\ua2f4":301,"\u1019":301,"\u101c\ua3d9\u0979":301,"\u101e":301,"\u1021":301,"\u1022":301,"\u1023\u0d12\u30d7\uaa71\u06c4\ufb3b":301,"\u1026\ua1e9\u1167":301,"\u102a":301,"\u1051\u0b22":301,"\u1052":301,"\u1061":301,"\u1066\uaa6e":301,"\u1075\u157c":301,"\u1076":301,"\u1077\u2c49\ua23e":301,"\u107b\u2d80":301,"\u107c\u057c":301,"\u107d\uab0b\u0624\u06e6":301,"\u10db\u1493\u01d4\u1625":301,"\u10df\ufc4e\u090f\u044a":301,"\u10e1\u0286":301,"\u10e5":301,"\u10e9\uab72\u0570":301,"\u10ed\ua3c0\u1500\ud7e1\u051d":301,"\u10ee\u16f6\u1336\u1417":301,"\u10ef\uab8c":301,"\u10f2\u03f0\u1e73":301,"\u10f4\u0687\u1194\u1894\u02a9\u1f04":301,"\u10f9\ufcc7\u2d12\u03ae":301,"\u10fc\u2124":301,"\u10ff\u133c\u191a":301,"\u1100\ua847\u1272\ufc8d\u07e1\u2d16":301,"\u1101\ufcd6\u1664\u168a\u0850\u2c4b":301,"\u1107\u1120":301,"\u110a\ua9e8\u0d0c\u1690\u0180":301,"\u110c":301,"\u110e\u097e\u14c4":301,"\u1112\u153b\ua482\u133b\u176c\u0433\u0e28\ufd7c\u09a5\u3176\u0254":301,"\u1119":[365,370],"\u111a\u02e0":301,"\u111e\u068e\ua725\u1e7b":301,"\u1121":301,"\u1128\u1b89\uffd2":301,"\u112d\u1f53\uab21\u1b29":301,"\u112f":301,"\u1133\u1958\u029f\ua18e\u1749":301,"\u1134\u1d6b\u2c6c":301,"\u1135\u1459":301,"\u1137\u1519\u15c4":301,"\u1138":301,"\u113c\uab55":301,"\u1141\u1600":301,"\u1143":301,"\u114e\uaa0b\u105a":301,"\u1151\u143f\u04f7":301,"\u1152\u30a5":301,"\u1159":301,"\u115c\u11ee\u2c68\u1eeb":301,"\u1160":301,"\u1161\ufe7a\u107a\u0aa3":301,"\u1165\u2d09\u11e1":301,"\u116a\u1c7d\u01a5\ua1e7\ufeaa\u013a":301,"\u1171\u1b2f\ufd9f":301,"\u1174":301,"\u1176\u1477\u156b\u18c5":301,"\u1178\u0457\u0a9c":301,"\u117b":301,"\u117c\ua2a7\u160c\ua27d\ua1f6\u2112\ufb6b\ua012\ufb35":301,"\u117e\u1478":301,"\u1185\u0445\u1472":301,"\u1186":301,"\u118a\uff76\u1201":301,"\u118b\u0586":301,"\u118c":301,"\u118d\u1d1e\u164f\u1f87\u1e09\u1744\ua7fb\u152d\u1c1c":301,"\u118e\ufec7":301,"\u118f\u1989\ufcfa":301,"\u1191\u14d0":301,"\u1192\u1538":301,"\u119d":301,"\u119f":301,"\u11a0\ua2c5\u0d36":301,"\u11a2\u1992":301,"\u11a5":301,"\u11a6\u1e9a\u31f6\u1299\ufdc2\ufc94\u08a9":301,"\u11a8\u02e3":301,"\u11ad":301,"\u11b4\u1453\u3032\u03ac":301,"\u11ba\u056b\u1e5d":301,"\u11bf\u170b":301,"\u11c1\ufc89\u04a3\ua3ac":301,"\u11c4\u14db\u1275":301,"\u11c7":301,"\u11cd\u12c8\uffca\u1eb7":301,"\u11d0":301,"\u11d2\ufba6\uff75\u1bc9":301,"\u11d4":301,"\u11d6\ua24f\u0a97\u1264\u2d12\ua42e\u2c61\u0db7\ua5e8":301,"\u11d7\ufbfb\ua327":301,"\u11de\u3153\u0716":301,"\u11e3\u0bd0":301,"\u11e5":301,"\u11e6\ua64b\ufce4\u184e\u1cee\uaae9\u04a1\ua5f0\ua0ae\ua80a":301,"\u11e7\ufc8f\uaa83\ufb14\u0f54\u115d\u179b":301,"\u11ea":301,"\u11ec\u1278":301,"\u11ed\u1432\u1dbd":301,"\u11ef\u18bf\ufb04":301,"\u11f2":301,"\u11f5":301,"\u11f8":301,"\u11fc":301,"\u11ff":301,"\u1208\u0c16\u0a15\u1e63":301,"\u120b":301,"\u1215\ufc24":301,"\u1218\u126a":301,"\u121a\u188a\u1893\u0a1b\u1b2a":301,"\u121b":301,"\u121c\u0283":301,"\u121d":301,"\u121f\u0921\ufea4\uab79\u18c3":301,"\u1220\u1fa6":301,"\u1224":301,"\u1229\u1a45\u1d08\u1bd9\u0990\u075c\u1d33":301,"\u122c":301,"\u122d":301,"\u122f":301,"\u1233\u30c2":301,"\u1235":301,"\u1238":301,"\u1239":301,"\u123b\ua5c6":301,"\u1241":301,"\u1242\u04c6":301,"\u1245\ua0c6\ua13c\u0449\u1649":301,"\u1246\ua35f\u07d9\u04b7\u1e5f":301,"\u124a\ua45b\u1ea3":301,"\u124b\u1f07\u04ab\u1d80\ua85d\ua0ea":301,"\u124c\ua359":301,"\u124d":301,"\u1252\u1fb1":301,"\u1254\u0ca8":301,"\u1256\u0192":301,"\u125b\u0b32\ua105":301,"\u125c":301,"\u125d\u1ba0":301,"\u1260\u0d06\u1ff3\ua751\uaab6\ufc16\u02bb\u023c\u091f":301,"\u1261":301,"\u1263\uaa88\u06d1":301,"\u126b\ufd06\ufeed\ua641\uabb8":301,"\u126c":301,"\u126f\u09b6\ua2d4\ua85c\ufcd8\u0247":301,"\u1271\ua001":301,"\u1273":301,"\u1274\u0c2b\ufb15\u11d8":301,"\u1277\u16d0":301,"\u1282\ua6b8\u1452\u19a8":301,"\u1288":301,"\u128a\u0918\u0439f\u048f":301,"\u128b\u3147":301,"\u1290\uff77\u18c9":301,"\u1294\u1552\ua07a\ua2cf":301,"\u1296\u1e8d":301,"\u12a3\u0780\u1960\u1c6a\u05df\u145d\ua030":301,"\u12aa\ua5af\u144b":301,"\u12af\u01d8":301,"\u12b4":301,"\u12b5\u1123\u1eb1":301,"\u12ba\ufcdd\u0975\u1c74\u3079":301,"\u12bd":301,"\u12c2":301,"\u12c3":301,"\u12c4\u2c41\uff84":301,"\u12c5":301,"\u12ce\u19a6\u1b13\u16e1\ua573":301,"\u12cf\ua1f3\ufe94\u1c71":301,"\u12d2\ufbae":301,"\u12d5\ua43f":301,"\u12da\u1102\ua809":301,"\u12db\ud7b9":301,"\u12dc\ua805\ufebf\u1901":301,"\u12de\u14a8":301,"\u12df":301,"\u12e0":301,"\u12e3\ufeba\u2cd3\u30c7\ua02a\ua603\u2d63\uaa04":301,"\u12e5":301,"\u12e6":301,"\u12e7":301,"\u12f1\u01cc\u14b8\u0c1e\u1ef7\u046f\u1e53":301,"\u12f4":301,"\u12f7\uaba1\u18b0":301,"\u12fa\u0919\ua371\ua898\u1e1f\u1f24\ufcba\u1516\u315d\ua5f5":301,"\u12fb\u027e\u123a":301,"\u12fd\u0dc2\ufb9c\ud7b1":301,"\u1305":301,"\u1307":301,"\u1308":301,"\u130b\u1551":301,"\u130d\u0689\u07cd":301,"\u130e\u00f5\ua3c1\uaac0":301,"\u1313":301,"\u1319":301,"\u131b\u2c30\u3180\ua583\u0a2b\ufcc8\u0a86\ua11c":301,"\u1320\u317a\u0111":301,"\u1323\u03d7":301,"\u1326\ua33c\u0e2a\ua55f\u0695\u0c19\u1188\u1dbe":301,"\u1329":301,"\u132a":301,"\u132e":301,"\u1334":301,"\u133d":301,"\u1340\u1591\ua39f\u30ef":301,"\u1348":301,"\u134a\ufd10\u19be":301,"\u134b":301,"\u134e\ufc67":301,"\u1351\u2d25\u03e1\u2ccb":301,"\u1352":301,"\u1353":301,"\u1355":301,"\u1356\uaa14":301,"\u1357\ua5b7\u15cf":301,"\u1358\u31aa\u2c7b\ufb55":301,"\u1359\u1466\u0c94\u110b\ua18d":301,"\u1380\u1d0f":301,"\u1381\u12ff\ufd53\ufd7a":301,"\u1382\ua544":301,"\u1383\u1a53\u1f40":301,"\u1386\u1eed\u0e8a":301,"\u1387\ua669\u2c8b\u15c8\ua781\ua558\u2d32":301,"\u1388":301,"\u138a":301,"\u138c":301,"\u138e":301,"\u13fa\u1131":301,"\u13fc\u1f23\ufb7b\u0467\u08a5\ufc28\u0173\ua249":301,"\u1401\ua3cf":301,"\u1408":301,"\u1409\ufecc":301,"\u140d":301,"\u140f":301,"\u1412\u01b6\ufd88":301,"\u1414\ua841\u158d\u1228\ufec3\ua4ec":301,"\u1419\ua2e0\u1285":301,"\u141a":301,"\u141d\u1efd\ufd35\ua307\u12b2":301,"\u141e\ua1d6\u0453\ua27b\uab22\u03df\u1871":301,"\u1425\u1a4d":301,"\u1429\u1550\u06ee\uff70":301,"\u142a":301,"\u142e":301,"\u142f\ufc17\ua5dc\ua741\uaa10\uaba6\u1709\ua919\u03ba\u2c54\ufc09":301,"\u1431\ua1b5\ufbda\u07d2\u2c7d\u0629\u161f\u18b3\u0e9d":301,"\u1438\ua655\ufcbd\ua550":301,"\u143d":301,"\u1440\u15e2\u077a\u11d3":301,"\u1447\u0451":301,"\u144a\ua5d9\uaa93":301,"\u144e\ua26e\uffda":301,"\u144f":301,"\u1454":301,"\u145e":301,"\u1461\u05d9\ufcc1\u1696\ufcb5":301,"\u1467\u1646\u0cf1\u0140\u0511\u1474":301,"\u1468\ua52b":301,"\u146b":301,"\u146ez":301,"\u1473\u0e9a":301,"\u1475":301,"\u147b\u1828":301,"\u147e":301,"\u1484\u0b0b":301,"\u148b\u014f":301,"\u148c\u04af\ufcfe\u01fb\u19aa\ua3f0":301,"\u1496\u0b2d\u188b":301,"\u149d":301,"\u14a2":301,"\u14a7":301,"\u14ad":301,"\u14b0\u0519\u0802\u14fb\u3052":301,"\u14b1\ua52a":301,"\u14ba":301,"\u14bb\u0995\ua1c4\u14a3":301,"\u14bd\uaa86\u112e":301,"\u14bf\u2d1a\u1298":301,"\u14c0\uff45":301,"\u14c1":301,"\u14c3\u18d0":301,"\u14cb\u0c90":301,"\u14d1\ua354\u30df\u1686":301,"\u14d2":301,"\u14d3":301,"\u14d6\ua243":301,"\u14da\ua73b\ua848\u06a3\u044d":301,"\u14de\ua733\u0272":301,"\u14df":301,"\u14e0":301,"\u14eb\u091d":301,"\u14ef\ua657\ua524\u1570":301,"\u14f1\u2ddc":301,"\u14f3\u0b92":301,"\u14f5":301,"\u14f9\u1e93\ua199\u1565\u07df":301,"\u14fa\u10d6":301,"\u14fc\ufed2\u0b3d\ua04b\u0c98\u1dac":301,"\u14fd":301,"\u14ff":301,"\u1506":301,"\u150a":301,"\u150b\u1c75\u14d4":301,"\u150c\u1234\u04ed":301,"\u150f\u0729\u1145":301,"\u1510":301,"\u1514":301,"\u1515\u0723":301,"\u1517\u178e\u01b0\u1fe3\u1b12":301,"\u1524":301,"\u1525\u0dad\u0175":301,"\u1526":301,"\u1528\u1909":301,"\u1529\u09dc":301,"\u152c":301,"\u152f\ud7f6":301,"\u1532\u30d9\ua48c":301,"\u1535\ua592\u1460\ufc87":301,"\u1537":301,"\u153a\u15a3\ufe7f":301,"\u153d\u095b":301,"\u153e":301,"\u153f\uabd6\u1e2d\u165e":301,"\u1541":301,"\u1543\ua77a\u31ae":301,"\u154d":301,"\u154e":301,"\u1556":301,"\u1557":301,"\u155b\u0996\u0ede\u2c85\u191e":301,"\u155c":301,"\u155e":301,"\u1560":301,"\u1562":301,"\u1564\u071d":301,"\u1566\u18d9":301,"\u1569\u0435\u11c3":301,"\u1574\u2d27\u2d5c\ua697\uaa6b":301,"\u1575":301,"\u1577":301,"\u1578\ua161\ua135":301,"\u1579":301,"\u157a":301,"\u157e\ua385":301,"\u1582\u19b2":301,"\u1587":301,"\u159f\u1760":301,"\u15a0\ua795\uaa9d":301,"\u15a2\uaa62\ua2da\ud7e2\ufb38":301,"\u15a4":301,"\u15a5\ufd75\ua513\u01da\u03b8\u3049\u03ad":301,"\u15a7\u0185":301,"\u15a8":301,"\u15a9":301,"\u15ac\u1615\ua08a\u198e\u0491\u0240\u1edf":301,"\u15ad":301,"\u15af\u0e9e\u2d5a":301,"\u15b1":301,"\u15b6":301,"\u15b8":301,"\u15ba\ua300\ua4d6\u1b99\u06cd\u1471\ua1fa\u151b\u30e1":301,"\u15bd":301,"\u15bf\ua512\ua96a":301,"\u15c5":301,"\u15c9":301,"\u15cb\ua237":301,"\u15ce\u1563":301,"\u15d2\u0373\u1230\ua098\ufee6\ua048":301,"\u15d5":301,"\u15d8\u1e47":301,"\u15d9\u0f64":301,"\u15dd":301,"\u15de\uaa05":301,"\u15e4\uabc1\u0848\ua1da\u1f92":301,"\u15e5":301,"\u15e7\u1bc3":301,"\u15ec":301,"\u15ed\u2c3b":301,"\u15ee\u1488":301,"\u15ef":301,"\u15f0":301,"\u15f2":301,"\u15f5":301,"\u15f6":301,"\u15fb\u043a\ua051":301,"\u15fe\u0265\uaba2\ufd19\ufef9":301,"\u15ff\u1dad\ua0fa\uff99\u16d9":301,"\u1609\ua334\ufed8\u0564":301,"\u160b":301,"\u160d":301,"\u160f":301,"\u1610\ufd7f\u129c\u017e":301,"\u1612":301,"\u1613":301,"\u161a\u0857\ufeb8\u30dd":301,"\u1621\u0203":301,"\u1627\u1c79\ua1a7\ua6a4\ud7cc\u0680\u17a3":301,"\u1628\ua73b\u2c47\u10ea":301,"\u162a\ua4d4\ufbfd":301,"\u162d":301,"\u162e":301,"\u162f":301,"\u1630\u169a":301,"\u1632\u1efb":301,"\u1634\ufb26":301,"\u1640\ud7e6\ua817\u0441":301,"\u1642\u2cb1":301,"\u164a\u1539\u0563":301,"\u164b\ua41f\u0b16\ua71f":301,"\u164c\u0db0":301,"\u1650":301,"\u1651\u1d12":301,"\u1654":301,"\u1659\u0446\u1156":301,"\u165b":301,"\u165d\ua0fb\ua4d3\u2db6\u1b84":301,"\u1661":301,"\u1665\u1b98\u051b\u1d6f\u1fd7\u2c78\u0c17\ufdc6\u18b8\u1d70\u1211\u1262":301,"\u1669":301,"\u166a":301,"\u166b":301,"\u1672\ua5cb\u2ccd\u03f8\ua42c\u0722\u0edd\ufba2":301,"\u1674\ua102\u1a0a\ua845\u2d33\u1122\ua108\u0268\u0958\u1d8b":301,"\u1678":301,"\u167c\ua854":301,"\u167d\ua581\u0d9f":301,"\u1685":301,"\u1687\u2dd6\ua97c\u0297\ua0a9\u0148\ud7d2\u07dc\u1ceb\ua3e9\ufda1\u1f83\u155f":301,"\u1688\ua35a\ufe77":301,"\u1689\u212f":301,"\u1691\u2139\u1c5a\ua09e":301,"\u1694\ua3bb":301,"\u16a5":301,"\u16ab":301,"\u16ad\u1c6d":301,"\u16af\ud7b8\u15e1":301,"\u16b1\u191c\u0434\u164e\u2c35\u30bc\u0f5e\ua818\u2dba\ua20a\u1281\ufb27":301,"\u16b2\u1389":301,"\u16b3\u1116":301,"\u16b6":301,"\u16b9":301,"\u16ba":301,"\u16bc\u0a25\ufc52":301,"\u16c2":301,"\u16c3\u209a":301,"\u16c4":301,"\u16c7\u19a2\u1505\u050f\ua5a6\ufb8e\u1658\ua46f\ua595\ufeb7":301,"\u16c8\ufef0\u2d8a":301,"\u16c9\u01bd\u1105\ua0c3\uffb2":301,"\u16ce":301,"\u16d5\ua137\u1f65":301,"\u16d6":301,"\u16d7\u019e":301,"\u16d8\u1e0b":301,"\u16dc\u1795\u0205":301,"\u16de\ua99e":301,"\u16e2\u2d1d\u1ef9\ua46d":301,"\u16e3\u0d24\uffb9":301,"\u16e4":301,"\u16e5\u09a4\ua687\u2cc3\u147c":301,"\u16e7\u105c":301,"\u16e9":301,"\u16ea\ua48a\ua5d2\u1619\ua9e3":301,"\u16f5\u127b\u1fa3\ua35b\u013a\u0c0f\ua193\u0679\u113a":301,"\u1701\ua93b":301,"\u1706":301,"\u1707\u1eb5":301,"\u1708":301,"\u170a\u1132\ua6cc\ufda9":301,"\u170c":301,"\u1710\u151d\u0e99\u312a\u07d7":301,"\u1724\u176f":301,"\u1725\u10dc\u16a4\ufca4":301,"\u172a\ua4d9":301,"\u1746\u127e":301,"\u1747\u15f9\ua601":301,"\u174a\ua3a4\u05d0\u1a16\uaa95":301,"\u174f\u1f05":301,"\u1751":301,"\u1763\u1eb3\u2d45":301,"\u1766\ua056":301,"\u1767":301,"\u176e\ua0c0\u16bb\u1e05\u03ca\ua918":301,"\u1782":301,"\u1784":301,"\u1785\uaa91\u1ed9\u028c\u156a\u14a9\u162c":301,"\u1789\u11b8":301,"\u178c":301,"\u178d":301,"\u1794":301,"\u1796\u1ed9\ua0bb":301,"\u179a\u1f61\ufbd3\u172b\u0580":301,"\u179f\u1f93\u00f5":301,"\u17a0":301,"\u17a6\u00fd\u11ce\u0c1a":301,"\u17a8\uab20\u1a0b\u166c":301,"\u17a9\u147d":301,"\u17aa\u1558\u1d1d\u0a05\u0a09":301,"\u17ad":301,"\u17b0":301,"\u17b1\ua8a6\u1017\u03b8\u2cb5":301,"\u1820":301,"\u1822":301,"\u1823\u14c7\ua0ad\u2c42":301,"\u182b\uaae8\u04c6":301,"\u182e\uab35\ua059":301,"\u182f":301,"\u1839":301,"\u183d\u0a2a":301,"\u1840":301,"\u1842":301,"\u1845\u3071":301,"\u1846\u2d61":301,"\u1847\u08a7\u1130\uaa1e\u0585\u1be3":301,"\u1848":301,"\u1849\u027b":301,"\u184a":301,"\u184c\u09ae\ua2ea\u0997":301,"\u184f\u1172\u1443":301,"\u1858\uffc7\u0cb8\u0f42\ua35e\ua129\ua458":301,"\u185c":301,"\u1866\u03b1\u0c37":301,"\u1867\ua6bf":301,"\u1868":301,"\u1869\u043e\u1622":301,"\u186a\uaa9bp\u11bd\u03b5\u1498\u2d47":301,"\u186d":301,"\u1870":301,"\u1872\u0207":301,"\u1873\u3055":301,"\u1874":301,"\u1877":301,"\u1880\u1489\u310c\u3113":301,"\u1887\ua39d\u03eb":301,"\u1888":301,"\u188c\u1164\u1f51":301,"\u188d":301,"\u188e\u0a07":301,"\u1890\u057f\u16f4":301,"\u1892\u1480\u1c62":301,"\u1895\ufe95\ufce0\u1843\u15db":301,"\u1898\u2c83":301,"\u1899\ufce6":301,"\u189a":301,"\u189b":301,"\u189c\u1590":301,"\u18a4":301,"\u18a6\ufb17":301,"\u18a7":301,"\u18a8":301,"\u18aa\ua422":301,"\u18b1":301,"\u18b2":301,"\u18b5\u2c6a\u14f2\u1608\u1788\ua37c\u1266":301,"\u18bb\u307e\u1ead\u308f\u0157\ufd9d":301,"\u18c0":301,"\u18c7\u1821\u1e7f\u0926\u211a":301,"\u18c8\u1c6b\u1eaf\u1222":301,"\u18ca":301,"\u18cd":301,"\u18ce\u1258\u1b0bf":301,"\u18d2":301,"\u18d4":301,"\u18d6":301,"\u18d7":301,"\u18d8":301,"\u18db\u1c09\u114b\u048b\u0125":301,"\u18dd\u0ea1":301,"\u18e4\u0a98\ua8a1":301,"\u18e6\u1177\u11a4":301,"\u18e7":301,"\u18e9\u1016\u1163":301,"\u18ea":301,"\u18ef":301,"\u18f2\u0a8d\u18bc":301,"\u1903\u0195\u31b1\uab97":301,"\u1906\ua290":301,"\u1908":301,"\u190a":301,"\u190b\u0627\ua439\u075b\u31a3\ua51e":301,"\u190c\u1f06":301,"\u190e":301,"\u1910\u31b5":301,"\u1911\u1eb1\ua063\u2d18\u045c":301,"\u1914\ua194\ua534\u14cf\u0a8f":301,"\u1917\u0eae":301,"\u1953":301,"\u1955":301,"\u1957\u1fe5\u1a11":301,"\u195a\ua8af":301,"\u195d\u2146\u3045":301,"\u195e\ua12f\u2cee\ua3c5":301,"\u1962\u10d4":301,"\u1964\u2da8":301,"\u1967\u0e30":301,"\u1969":301,"\u196c":301,"\u196d\u15bc":301,"\u1972\u0149\ua4f3":301,"\u1981":301,"\u1982":301,"\u1985":301,"\u1986":301,"\u1988\u2c81":301,"\u198d\u2c65\u1276\u02a1\u1f7a\u1fe0\u07fa\ua8aa\ua110":301,"\u198f\u1fa2\u1fd0\ua5eb\ua9b1\u3067\uff50":301,"\u1991":301,"\u1998\u1723\u2d5e\ua364\ufb2c":301,"\u19a3\u03b7":301,"\u19a4\u2099\uabd2":301,"\u19a5":301,"\u19a7\u0d2c":301,"\u19a9\ua1c1\ua91b":301,"\u19b1\u04e3\u1730\u1e09\u03e3":301,"\u19b4\ufce2":301,"\u19b5\uff6e":301,"\u19b8":301,"\u19bb\uab32\u2c5a\u02a6\u0e0f\u07b1\u1125\uff4f":301,"\u19bf\u1104":301,"\u19c2":301,"\u19c4":301,"\u19c7":301,"\u1a01":301,"\u1a02\u1b2c":301,"\u1a04\u123f\u0f60\u0e0d":301,"\u1a05":301,"\u1a07\u022b\ua023\u1b8e":301,"\u1a08":301,"\u1a0c\u314f\u0e44\u07a3":301,"\u1a0d":301,"\u1a0f":301,"\u1a10\u1e49":301,"\u1a13":301,"\u1a24\u120e\u1a2e":301,"\u1a28\u1497\ua66d\u172d\uaae4":301,"\u1a33\u18be\u1900\u1905\u0b24\u2d00\u1652\uff4f\ua99a\u00e7":301,"\u1a35":301,"\u1a37\u1d52":301,"\u1a3a\u0959\u152e\ua930\u3144\ua1eb\ufc43\ua1ec\u0c08\u1832":301,"\u1a3d\u1726\u1f00\ua268\u07d5":301,"\u1a3f":301,"\u1a40\u2d50\u170f\ud7d0\u0d21":301,"\u1a42\u1314":301,"\u1a44\u149b\ufcb6":301,"\u1a46\ua57d":301,"\u1a48":301,"\u1a4a":301,"\u1a4b":301,"\u1a4c\u3053":301,"\u1a4e\u00aa":301,"\u1a4f\u30c9\u1f41\u0d2a":301,"\u1a50":301,"\u1b05":301,"\u1b06\u1110":301,"\u1b09":301,"\u1b0f\ufbdb\ua991":301,"\u1b10\u0279\u0497":301,"\u1b11":301,"\u1b17\u1607":301,"\u1b1b\u0287\u30bb\u0374":301,"\u1b1d\ua345\u0b35\u0ab9\u1c6e":301,"\u1b1e\ua749":301,"\u1b20\u04a7\u2d5d":301,"\u1b22\ufb33\u11c9":301,"\u1b24\ufd59\u1d1a":301,"\u1b26":301,"\u1b28":301,"\u1b2b":301,"\u1b2d\u31f3\u2d1e\ua60a\ua25b\u1029\ua528":301,"\u1b31":301,"\u1b46\ua561":301,"\u1b49":301,"\u1b4a\u1199\ufb98\u1644\ua86f":301,"\u1b4b\ufbd5\u06d3":301,"\u1b85\u06d2\u2db1":301,"\u1b86":301,"\u1b88":301,"\u1b90\u1da4\u1d06":301,"\u1b91\u063f\u1d64\ufd93\u18e3\u0c9a":301,"\u1b96\u14ed\u0da3\u0e19":301,"\u1b9a":301,"\u1b9c\u1783\u113e\u1f76\ua43e\ua9fd\u2119":301,"\u1b9e\u1f90":301,"\u1b9f\ua101\uaa42":301,"\u1bae":301,"\u1baf\u157d":301,"\u1bbb\u2d03":301,"\u1bbe":301,"\u1bc8":301,"\u1bcd":301,"\u1bd1":301,"\u1bd5\u10e3\ufd51\uff7f\u0aaf\u04cc\u11d9\u11b7\u1b07\u0107\u0720\u1309":301,"\u1bd6\ua597\u10e6\u2e2f\u1301":301,"\u1bda\u0233\u026b\u01df\ua917":301,"\u1be1\ua924\u09a0\ua142":301,"\u1be4\u1291\u1994\u209c\ua6bd\u18e5":301,"\u1be5\u107e\ua01d\ufbe7\ua1c0":301,"\u1c00":301,"\u1c02\u114a\ua471\u01c6\u123d\ua168":301,"\u1c03":301,"\u1c05\ua388":301,"\u1c0a\u2c9f\ua12a":301,"\u1c0e\ud7f8":301,"\u1c0f\u11c0\u30ac":301,"\u1c10\u09ce":301,"\u1c12":301,"\u1c14\ua58b\ufcf7\u057a\u10da\u127f":301,"\u1c18\ua76f\u06b1\ua554\ua897":301,"\u1c1a":301,"\u1c1f":301,"\u1c20\u18c2\u15da\u0c34\u1e1f":301,"\u1c22\u0938\u1200\u0a96":301,"\u1c4d\ua0cf\ufca5\u158e\u0137\u1168\u113b":301,"\u1c5d":301,"\u1c5e\u0c38":301,"\u1c64":301,"\u1c67":301,"\u1c68\u0e17":301,"\u1c69":301,"\u1c72":301,"\u1c73\u0c0c\u116f\u023f\ua745":301,"\u1c77":301,"\u1c7a\u14ea\ua153\u1d3e\ua7a3":301,"\u1cea\u0718":301,"\u1cf0":301,"\u1cf5\ua3ed\u01d2\ua610\u1b9d\ua987\ua5b8":301,"\u1d01\u045c\u0c10":301,"\u1d03":301,"\u1d09\u06c1":301,"\u1d0a":301,"\u1d0b\ua065\u1677\u0265\u1799\u0640":301,"\u1d10\u06fb\ua36f\ufca6\uaa87":301,"\u1d11\u1fa6":301,"\u1d13":301,"\u1d14":301,"\u1d18":301,"\u1d1c":301,"\u1d24\u0153":301,"\u1d25\ua10b\u0e1e\uaa99\ua18c":301,"\u1d26":301,"\u1d27":301,"\u1d2a":301,"\u1d30\u15b3\ufd6b\ua8f4\u0521":301,"\u1d31\u1836\u2c85\u1219\ua43c\u1ed1\uff56":301,"\u1d34\u3062":301,"\u1d35":301,"\u1d36\u03be\u108e\u16e8\ua07c\u1487":301,"\u1d3c":301,"\u1d3f\u0754":301,"\u1d41\u084f":301,"\u1d43\ua97b\u0aaa\ua212\u16ae":301,"\u1d46\ufbe3\u0c1b":301,"\u1d4a\u0931":301,"\u1d4f\ufb5e":301,"\u1d50\ufd16\uff87\u1ee1\u1ebd":301,"\u1d51":301,"\u1d53\ua244\ua20c\ua9b2":301,"\u1d55\u11f1":301,"\u1d56\u1050\ufc80":301,"\u1d59":301,"\u1d5a":301,"\u1d5b":301,"\u1d5c\u06c0":301,"\u1d60\ua75f\u1415\u1d76\u2dc0":301,"\u1d66":301,"\u1d6c\u1f91\ua2f9\ua2a3\ua1bd":301,"\u1d6d":301,"\u1d75\ua0e5\u1be2":301,"\u1d79\u0269\ufb56":301,"\u1d7a\u163f\u03f5\u0c0a\u14e1":301,"\u1d7c":301,"\u1d7d":301,"\u1d7f\u1e75\u015d":301,"\u1d83\u063e\u0575":301,"\u1d88\u1b16":301,"\u1d8c":301,"\u1d8e\u2148g\u0f89":301,"\u1d92":301,"\u1d96":301,"\u1d98\uab13":301,"\u1d99":301,"\u1d9a":301,"\u1d9c":301,"\u1d9d\ufc84\ua3f1\u1e15\ua240\u1013":301,"\u1d9e":301,"\u1da1\u1a23":301,"\u1da6\u114f\u2149":301,"\u1da9\u1bc4\ua999\ua50a\u06b6\u049d\u0235\ua8f3\u155d":301,"\u1dab":301,"\u1daf":301,"\u1db3\ua5ef":301,"\u1db5":301,"\u1db8":301,"\u1db9\u00f0":301,"\u1dba":301,"\u1dbb":301,"\u1dbc\ua05b\u1297\u199b\ua192\ua5c4":301,"\u1dbf\u2cd9":301,"\u1e01":301,"\u1e03\ua44c":301,"\u1e05":301,"\u1e07":301,"\u1e0d\u04db":301,"\u1e0d\u11f7\u1e6fo\ua1cb":301,"\u1e13":301,"\u1e1b\u099f\ua84d\ufd23\u315e\u0e0b":301,"\u1e1d":301,"\u1e21":301,"\u1e23":301,"\u1e27\u1243\u1fd0\u2d3d":301,"\u1e2f\u1236":301,"\u1e35\ufeb6\u04d3\u12a7\u1c19":301,"\u1e37\u16c6":301,"\u1e39":301,"\u1e39\u2c91\u3163\u1629":301,"\u1e3b\u3112\ufcab\u15c3\u1542\u1eeb\ua548\ufb9a\u17dc\ufd05\u3108\u117a":301,"\u1e3b\ua402\u156d":301,"\u1e3d\u2d04":301,"\u1e3f\u01a8\u1824\ua921\u0173":301,"\u1e41":301,"\u1e43\u02b4\ua6e0\u1b1c\u1f71":301,"\u1e45\u1999":301,"\u1e4d\u1e51\u166f":301,"\u1e51":301,"\u1e55\u0227\u1e49":301,"\u1e57":301,"\u1e57\ua99d\u090c\u0479":301,"\u1e5b":301,"\u1e5b\ua6be\u119a\u15f7\u1d16":301,"\u1e65\ufec2":301,"\u1e67\u1b32\u2c30\ua66b\u18eb\u1324\u151a\ua2ff\u03d7\ua2b0\u12f6":301,"\u1e6d\u141c":301,"\u1e6d\ufdbd\uaa1f\ufba1":301,"\u1e71\u0639\u1762\u1f7d":301,"\u1e73\u0dab":301,"\u1e77\u027d\u18f0\ufc3e\u0787\u1973":301,"\u1e77\ua1de\u1028\ua978":301,"\u1e79":301,"\u1e7b\u0493\u184d":301,"\u1e7d\u12d6":301,"\u1e81\ua857\u0db3\u10f1\u0714":301,"\u1e83":301,"\u1e87":301,"\u1e87\ua969\u15d1\ud7e4\u30e4":301,"\u1e8b\u0509\u0f50\ua25d\ua6b2\u148d\uff47":301,"\u1e8d\u11e8\u1f57\ua3fb":301,"\u1e8f":301,"\u1e8f\u1451":301,"\u1e93\u1875\u01e7\u1546":301,"\u1e97":301,"\u1ea3\ua437\u03c9\u1f76":301,"\u1ea5\u0161":301,"\u1ea9\ua271\ua800":301,"\u1eab\u0525":301,"\u1eaf\u03c3":301,"\u1eb5":301,"\u1eb9":301,"\u1ebb\u12bb\u04bd":301,"\u1ebb\ua890\u1f70\u1c4f\u0aa7":301,"\u1ebd\u0469":301,"\u1ec1\u028d\u2d53":301,"\u1ec1\u0b2e\u11fe\u0727\ufdc7\u1865":301,"\u1ec3\u3151":301,"\u1ec3\ua553\u077b":301,"\u1ec5\ua3b5\u0f5a\u1f30":301,"\u1ec7":301,"\u1ec9\ua298\ufd36":301,"\u1ecb\u2da4":301,"\u1ecd":301,"\u1ecf\u057d":301,"\u1ed3":301,"\u1ed7":301,"\u1edb":301,"\u1edd":301,"\u1edf\ua589":301,"\u1ee3\u07d8":301,"\u1ee3\uff51\u1231\u213c\u0e32\u3143\u2d1c":301,"\u1ee9":301,"\u1ef1\u0503\u1147":301,"\u1ef5":301,"\u1ef7\u1a3e\uaa96\u2d0d\uabca\u1728\u1426\u046d\u11e2\u16a6":301,"\u1efd\u0807":301,"\u1eff\u0e02\u1fb0y\u2c9d\ufd1a":301,"\u1f03\ua912":301,"\u1f04\u2c52":301,"\u1f05":301,"\u1f06\ua6af\u0e2e\u1106":301,"\u1f10":301,"\u1f10\u0563":301,"\u1f11":301,"\u1f12":301,"\u1f13":301,"\u1f20":301,"\u1f22\u06a7\ua442\ua3f8":301,"\u1f23":301,"\u1f24":301,"\u1f26":301,"\u1f27\ua868\u1cef":301,"\u1f32":301,"\u1f33":301,"\u1f35\ua645\uaae5\u1e61\u0db9":301,"\u1f36":301,"\u1f36\u0234":301,"\u1f37":301,"\u1f37\uab80":301,"\u1f41":301,"\u1f42\u11c8":301,"\u1f43":301,"\u1f44\u0ba3":301,"\u1f50\ua69b":301,"\u1f53":301,"\u1f54\ua663":301,"\u1f57\u1e13\ua9e4":301,"\u1f60\ua31a":301,"\u1f62\u0ce1":301,"\u1f63":301,"\u1f63\u0251":301,"\u1f64\u1598\u1db6\ua37a\u168b\u1bbd\u1f32\u150d\u057e":301,"\u1f67\u1ff6\u1568\u2c45":301,"\u1f72\u0ca6":301,"\u1f74\u091b":301,"\u1f75\u00ff\u0671\u100e\ufb66\u1f85\u2cec\ufef2\ua0d6\ua3b4\ua26a\ua51f":301,"\u1f77\u12cc\u1e91\ua262\u11e4\u1502\u02a8":301,"\u1f78\u0905\ua55e\u0db8\ufc92\ufcde\u1227\u00fc\u149f\u1e5d":301,"\u1f79":301,"\u1f79\u1966\uffbd\u14f8\u01cc\u1fe5":301,"\u1f7a\ufeca\u1f7b\u1626\u084a\u3162":301,"\u1f7c":301,"\u1f7d\u0631\u15ea\u057b":301,"\u1f80\u046d\uaa17\u2c4b\u0e81\ua3d6\u01f3\u1cf3":301,"\u1f82\ua23cr":301,"\u1f84":301,"\u1f84\u1d05":301,"\u1f87":301,"\u1f93\u14be":301,"\u1f94\u1070\u0a2d\u309d\u049b\uffb0\u1883\u1325":301,"\u1f95":301,"\u1f96":301,"\u1fa4\u315c\u19ba\u01bf\ufe9d\ua3e3\ufbf4\u1f61":301,"\u1fa5\u06b9\u30aa":301,"\u1fa7\u0d1b":301,"\u1fb1":301,"\u1fb3":301,"\u1fb4\u0127\u1148\u056a\ua447\ua88f":301,"\u1fb6\uaa46\ufe98\u03ad\u1a3b":301,"\u1fb7":301,"\u1fc2\u18a0\ufc18\u1a31":301,"\u1fc4":301,"\u1fc7":301,"\u1fd2":301,"\u1fe1":301,"\u1fe1\ufbdc\ua6e3\ufb21":301,"\u1fe4\ua064\ua788\ua5a4\u2c52\u1350":301,"\u1fe7\u1635\u18e1\u1987\u0b27\ua65b\u2131\u1d04\u05d8":301,"\u1ff3\u1547\u05d3\ua5c2\ufd67\uff94\u1792\u1463\u00fa":301,"\u1ff4\u3072\u15f4\u1919\ufc6d\u0906\u167f\u091e":301,"\u2090":301,"\u2091\u308c\ua1e2\ua370":301,"\u2092\u02ce":301,"\u2093\u10d5\u1169\ufd20":301,"\u2094\ua302\u0253\u1844":301,"\u2097":301,"\u2102":301,"\u2107\uff80":301,"\u210c":301,"\u210d\ua08e\ua6ce\u1e65\u10f0\u2098\ua223\ua37e\u090b\u14a6\u120a\ufef6\u028e":301,"\u2111":189,"\u211c":189,"\u211d\u015b":301,"\u212c":301,"\u2133\u03e7\u0587\u12dd\u11db":301,"\u2134":301,"\u2135":[189,301],"\u2136":301,"\u213d\ufb69":301,"\u213e":301,"\u213f\u1338":301,"\u2c32\ua729\u10de\u0d20\u1da5":301,"\u2c36":301,"\u2c36\ua64d\u0b18\ufcc4\u1d28\ufcef\u1912\u048f":301,"\u2c37\ua181\ufbed\ufd30\u1f26\ua769\u1533":301,"\u2c37\uff48\ua2b9\ua3d4":301,"\u2c39":301,"\u2c3a\ua03a\u18df":301,"\u2c3b":301,"\u2c3d\uff45":301,"\u2c3e\u1c6f":301,"\u2c3f\u0b1f":301,"\u2c43\u01a3":301,"\u2c44":301,"\u2c44\ua23d":301,"\u2c45\u022d\u1544\ufc5b":301,"\u2c46":301,"\u2c48":301,"\u2c49":301,"\u2c4c\u3124":301,"\u2c4d":301,"\u2c4f":301,"\u2c50\ua113":301,"\u2c50\ua29a\u1761\uff8a":301,"\u2c51":301,"\u2c55\u0909\u16da\u0434\u07e8":301,"\u2c55\ua00b\u2d09\u1d87":301,"\u2c56\u1c16":301,"\u2c58\ua6dd":301,"\u2c5a\uaadc":301,"\u2c5b\ua68d":301,"\u2c5c":301,"\u2c5c\u1a39\u0e8d\ua3ef\u1b8a":301,"\u2c5d\u2cee\u05e5\u1e0b\u0e06":301,"\u2c65\uab94\ua38b":301,"\u2c66":301,"\u2c66\u015f\ufed7":301,"\u2c68\ua691\uabde\uff97":301,"\u2c73\u0583\u2c39\u013c\u1db4":301,"\u2c73\ua619":301,"\u2c74\u1ef9\u04c4\u0d2e\u2cb9":301,"\u2c76\u1956":301,"\u2c76\ua899\u19b6":301,"\u2c7c\ua944\u0717":301,"\u2c81":301,"\u2c89":301,"\u2c8b":301,"\u2c8d\u03ef\u1312":301,"\u2c8f":301,"\u2c93":301,"\u2c95\u0377":301,"\u2c97\uaa21":301,"\u2c99":301,"\u2c99\u145b":301,"\u2ca1":301,"\u2ca3\u154a":301,"\u2ca5":301,"\u2ca7":301,"\u2ca9\ua859":301,"\u2cab\u1f35\u07a5":301,"\u2cab\u2c9b\u014f":301,"\u2cad\u02cf":301,"\u2caf\u186f\u1f15":301,"\u2caf\ua559":301,"\u2cb1":301,"\u2cb3":301,"\u2cb3\u0b88":301,"\u2cb5\u0977\u069b":301,"\u2cb9":301,"\u2cbb\u174c":301,"\u2cbb\ua367\ua029\ua069\ua4e9\u0b23":301,"\u2cbd":301,"\u2cbd\u1437":301,"\u2cbf\ua615":301,"\u2cbf\ufd97\u184b\u1545\u00f6\ua21d\ud7f7\uaa13":301,"\u2cc1\u1e27\u05d4\uff46\uaa24\u03b4\ua025x":301,"\u2cc7\u155a":301,"\u2cc9":301,"\u2ccb":301,"\u2ccd\u06c6":301,"\u2cd1\u3074\u05f1\ufb40\u31fe":301,"\u2cd3":301,"\u2cd5":301,"\u2cd9\u0929\u08a6\u3070\u0d86\u044e":301,"\u2cdb":301,"\u2cdd\u30a4":301,"\u2cdd\ufed5\u2cec\ua8f7":301,"\u2ce1":301,"\u2ce1\u1d90\ufd0e\u1c04\u1413\u07da\u1db7":301,"\u2ce3\u3092\u1700":301,"\u2d00\ua5f2\ufe78\uab7d\u14e2":301,"\u2d02":301,"\u2d05\uaba0":301,"\u2d07\ufd95":301,"\u2d08\ua4e1\u07f4\ua74d\u0c30":301,"\u2d08q\ua794\ua89b\ua5d8":301,"\u2d0c":301,"\u2d0e":301,"\u2d0f":301,"\u2d0f\u15e3\ua15f\ufb32":301,"\u2d10":301,"\u2d13\u0229":301,"\u2d13\ua4f4":301,"\u2d15\ufcbb\u1136\ufc2d":301,"\u2d15\ufe8c":301,"\u2d17":301,"\u2d18\ua74b":301,"\u2d19":301,"\u2d19\uff78":301,"\u2d1b":301,"\u2d1b\u0bb6\u07cb":301,"\u2d21":301,"\u2d22\u1449":301,"\u2d23\u080a\u049f\u1585\ua01c\u10d7":301,"\u2d23\u2c4c\u15dc\u101b\u107f\u1209\u3110\u1eef\u10fd":301,"\u2d24":301,"\u2d25\u0e0c":301,"\u2d27":301,"\u2d30":301,"\u2d31":301,"\u2d34":301,"\u2d35\ua52d\ua685\uabc3":301,"\u2d36\u06cf\ua6d4\u11e0":301,"\u2d37":301,"\u2d38\u0d10\uabcf\ua5d4":301,"\u2d39":301,"\u2d3c":301,"\u2d3e":301,"\u2d3f":301,"\u2d40":301,"\u2d44\u2c95\u1e4b\u1d9f\u165c\ua36c\ua411\u132c\u016f\u1ec7\u3128\uaadd\u121e":301,"\u2d4a":301,"\u2d4b\u2c4d":301,"\u2d4d":301,"\u2d4f\u30b9\ua144\ufc04\u31f5\ua016\ufd22":301,"\u2d52":301,"\u2d5b":301,"\u2d64\u01dd\ua872\u0da2":301,"\u2d6f":301,"\u2d81\u072b":301,"\u2d82":301,"\u2d83":301,"\u2d85\u2d0e\u01eb\u3085":301,"\u2d86\u1993":301,"\u2d89\ua292\ufda2\u179e\u037d\u1676\u168e\ufdc0":301,"\u2d8d\u1ce9\u056f\u126e\u16e6\u1b1f":301,"\u2d90\u1001\ua0e6\u1337":301,"\u2d92\u31b8\u0844\u3156\u15c2\ua061":301,"\u2d93\ua002\ua02b\u011d\u06ff\u15e8":301,"\u2d95":301,"\u2da3\u09a6\u1798\ua5da\ua988\u1fb2":301,"\u2da6\ufc53\u142d":301,"\u2da9\ua02d":301,"\u2daa\u0d7a\u0495\uaa81\u024b\u18d5":301,"\u2dab\uab24\ufefa":301,"\u2db0\u1339\ua5b9":301,"\u2db4\ua60c\u308a\uab95\u06bb\uaa12\u191b\u0569\u11b3\u1b87\u2d3a\u1882\u04b5\u1c13":301,"\u2db5":301,"\u2dc3\u3091\ua2fb":301,"\u2dc5\ua5bd\u018c":301,"\u2dc6\ua151":301,"\u2dc8\ua6d3":301,"\u2dca\uff43\u149c":301,"\u2dd2\ua044\u30f4\ua89e":301,"\u2dd8\ua36a\u0dc3\u023c":301,"\u2dda\ua187":301,"\u2dde\u057a":301,"\u3031":301,"\u3033\ufc83\u02ec":301,"\u3034\u0481\u2d0d":301,"\u3035":301,"\u303c\ua0ce\u0b8e\ufc10":301,"\u3041\u2d43\ua6ad\u03cc\u2d04":301,"\u304e\u131c\u0ea3\u3075":301,"\u3058\u1310\u128d\u30a7\u2c56\ua774\ua32c":301,"\u305a\u11b1\u1225":301,"\u3061\u0f5f\ua541\ua5fb":301,"\u3064\u0757\u0f59\u1548":301,"\u3066\u3146\u1411":301,"\u3068\uaa84\ua4de":301,"\u306b\u148e\u1861\ua3be\u0bb0\ua179\u0715":301,"\u306c\u140e\u0f88":301,"\u306f\ufcb7\ua3ee\ua515":301,"\u3073\u30c8\u1bd4":301,"\u307a\u1197\ua964\u0937":301,"\u3082\u3157\ua330\u1602\uab26\u047b\u13fb\u0209\ua84b":301,"\u3095\ua06b\u16a0":301,"\u3096\ua53e\ua887":301,"\u309e\ua2c8":301,"\u309f\u1445\u2d67\u2c38\ua1b3\ua22e":301,"\u30a3":301,"\u30a8":301,"\u30a9\u186b\ua15b\u114c\ufc26":301,"\u30b1":301,"\u30b2\u0b1a\u045b\u315au\u1e4f":301,"\u30b4\u1f21\ua972\ufeab":301,"\u30b6":301,"\u30b7\u0238\u08b1":301,"\u30b8\u1c76\ufd74\u095a\u0448\ua0e7\u1d73\ufc13\u1103\uff5a\ua075\ua295\u00ba":301,"\u30ba":301,"\u30be":301,"\u30bf":301,"\u30c0\u14f0\u1d1b\uffa6\u0199":301,"\u30c1\ua76b\ua47f\ua96b\u0439\u1c6c\u11bb\ud7c2":301,"\u30c4\u0f61\u0a21":301,"\u30cb\ua97a":301,"\u30cc":301,"\u30cd\u076a\uabd4\u03e1\u044b\u2c6a":301,"\u30ce":301,"\u30cf\u30ed\u30fc":320,"\u30d0":301,"\u30d2":301,"\u30d8\u1b93":301,"\u30da\u12a8\u12f3":301,"\u30db":301,"\u30dc\u020d":301,"\u30de\u1124":301,"\u30e0\ua02c\u1ea1":301,"\u30e2":301,"\u30e3":301,"\u30e5":301,"\u30e6":301,"\u30e8\u1b45":301,"\u30e9":301,"\u30eb\u30bd\ua299":301,"\u30ee":301,"\u30f0":301,"\u30f2":301,"\u30f3":301,"\u30f5\ua58c\u016d\u16d3\ua2e4\u2147\u04b9\u0250\ua125\u11ae":301,"\u30f7":301,"\u30fa\ua24a\ufbf9":301,"\u30fd":301,"\u30fe\ua6c8":301,"\u30ff\u313e\u151e\u0cde\u1638\ua5cf\u0432\ua556":301,"\u3105\u06be":301,"\u3106\ua55b\u1513":301,"\u3107\ufdab":301,"\u310a\u0249\uffdc":301,"\u310b\u2c59\u01ce\ua74d":301,"\u310f\ua11a\u3093":301,"\u3111":301,"\u3114\u1ec5\ufd8c":301,"\u3115\u14f6\uff89\u044c":301,"\u3119":301,"\u311b":301,"\u311c\ua614\u154f\u190f\u1d15\ua16b":301,"\u311d":301,"\u3120\u0788\u0229\u16c5":301,"\u3125\ua4e4\uaa4a\ud7d6\u01ef\u1c66\u0161\u1f15\ua811\ua2d2\u101d":301,"\u3127\ud7f0\u2da5\u052b\u2c87\ua0ab":301,"\u3129":301,"\u312c\u2c3c":301,"\u312d\ua6e5\u1cf2":301,"\u3131":301,"\u3132\ufd1c":301,"\u3133":301,"\u313b":301,"\u313c\u1485\ud7b2\uffa9\ua61a\u1ec9\ufbb1":301,"\u313f\u1492\u0a10":301,"\u3140\u00fe\u1349\ua415":301,"\u3145\ua1c7\u2c5e":301,"\u3149\u0767\u1790\u3059\ua265\u101a\u1a15\u01ad":301,"\u314a":301,"\u314b\u31ff\u11f9":301,"\u314c\uab5e\uaa85":301,"\u3155\u1eab\u1660\u0a22":301,"\u3158":301,"\u3159\ua6bb\u17b2\u0371\u30a1\u03fb":301,"\u3160":301,"\u3161\ua394":301,"\u3164":301,"\u3165\ua1c3\u308b\ua915\u1501":301,"\u3167":301,"\u316a":301,"\u316d":301,"\u316e\uab92":301,"\u316f":301,"\u3177\ua5e5\ua997\ua03c\u1b8f\u1c5c\ua20f":301,"\u3178\ua61e":301,"\u317f\u1655\ua65f\u12f2\u158b":301,"\u3182\u0dc6\u110f":301,"\u3183\u2ca7":301,"\u3185":301,"\u3186\u1675\u0e94":301,"\u3187\u1826":301,"\u3189":301,"\u318b\u2cf3":301,"\u318c\u1d91\ua5fd\ud7f9":301,"\u318d":301,"\u31a0\u0683\ua16d\u1f91\ua693":301,"\u31a1\u0924\u15df\u1bbc\u09e0\ufc29\ufc6f\u056d\u0994":301,"\u31a5\ua803":301,"\u31a7\ua280\u120f":301,"\u31a8\ufb51\ua3b3\ua258\u03ca\ufcad\ua180\u18cc\ua5ad":301,"\u31a9\u0853":301,"\u31ab\u1c7c":301,"\u31ac\ufd2f":301,"\u31af\u127c\u120d":301,"\u31b0\uff52":301,"\u31b3":301,"\u31b4\u3169":301,"\u31b9\ua98a":301,"\u31f1\ua074\ua323\ufbe0\ua239":301,"\u31f7\ua251\ufd08\u0431\u1d0e\u1eed\uff42":301,"\u31fa":301,"\u31fb\u14fe\u0e2d\u11c6":301,"\u4e0d\u9023\u7d9a":320,"\u4e2d\u6587":309,"\u4eba":[365,370],"\u4ee5":[365,370],"\u5171":320,"\u5206\u8bcd\u5668":309,"\u56fd":[365,370],"\u5897\u52a0":320,"\u65af\u5766\u798f":309,"\u661f":320,"\u662f":309,"\u6709\u9650\u516c\u53f8":301,"\u671f\u5f85":320,"\u697d\u5929\u682a\u5f0f\u4f1a\u793e":301,"\u6d4b\u8bd5":309,"\u7231":[365,370],"\u732b":[365,370],"\u751a":[365,370],"\u767d\u8272":320,"\u793a\u3057":320,"\u81f3":[365,370],"\u8d35":[365,370],"\u8fd9":309,"\u8fd9\u662f\u65af\u5766\u798f\u4e2d\u6587\u5206\u8bcd\u5668\u6d4b\u8bd5":309,"\u95a2\u6570":320,"\u963f\u91cc\u5df4\u5df4\u96c6\u56e2\u63a7\u80a1":301,"\ua003\u0b83\ua86a\ufea5\u1504\u1653":301,"\ua004\ua321\u0b13\ua59d\u1d2d\u0d07\ufd3c\u15e9":301,"\ua005\ud7d9\u1740":301,"\ua007\ua9a4":301,"\ua008":301,"\ua00c\u00f4":301,"\ua00f\u1283\ua1f9\u11b9":301,"\ua011\u0577\ua71c":301,"\ua013":301,"\ua014\u1b8d":301,"\ua015":301,"\ua017\ua1b1\u2d1f\ua577\u1603\u1764\u1f92":301,"\ua018\u0c0b\u12f0":301,"\ua019\ua456":301,"\ua01e":301,"\ua020":301,"\ua021\ua390\ua5b1\ua91c\u11a9\u12eb\u2d91\ua90b\u19c9":301,"\ua027\ua185\u1158\u0b1e\u0c60\ua02f\ufca2\u0c05\ud7bc\u116b\u2113":301,"\ua034\ua38a\u15be":301,"\ua035\u1c21":301,"\ua037\u30b5\u103f":301,"\ua039":301,"\ua03d\u067e\ufd3b":301,"\ua03e\u03b5\ua96c":301,"\ua03f\u119b":301,"\ua041\u2ce4\ua386\ua195":301,"\ua042\ua00d\u0d1e\u0eaa\u2cdb\ua641\u04e1\u11b2\u147f\ufc60\u1e96":301,"\ua043\u15a6":301,"\ua047\u14e6":301,"\ua04a\ua2cb\u05e7\ua31d\u0a2e\u1673\ua076\ua521\uabc7\ufe99\u30ec":301,"\ua04e":301,"\ua050\u0c9d\u0cb0\ua594":301,"\ua052\u1e89\u1f77\u012b":301,"\ua054\ua0f4\u0b14\ufb5a":301,"\ua055\u2d21\ufebb\ufcf3\uff48\ufcea":301,"\ua057\u03c5\u04ca":301,"\ua05c\uab40\ufc34\ufc8e\ufc65\u2dbe\u06a9\u10d8":301,"\ua05d\ua162\ua3f2\u1c78":301,"\ua05f\ua0d4":301,"\ua060\u0f43\u318e":301,"\ua068\u133f\u1eff\u305d\ua160":301,"\ua06a\u157b":301,"\ua06d":301,"\ua06e\u0e16\u1559\u15d0\u12ee":301,"\ua072\ufb6c":301,"\ua073\ua64f\u18a1\u022f\u0849\u0b85":301,"\ua079\u01f5":301,"\ua07d\uabd1\u01ed\ua79f\u1d2f\u0452\u04ad":301,"\ua07f":301,"\ua086\ufb22\ufd72\u1e69":301,"\ua087\ua0b3\ufcce":301,"\ua089\u0c61\ua20d\u1e25":301,"\ua08b\u0846\u1d44":301,"\ua08c":301,"\ua090\u0201\ua0f9":301,"\ua091\u1ea5":301,"\ua092\ua3ba\u1862\u10e0":301,"\ua093":301,"\ua094\ufbf2\u1ecf\u0443":301,"\ua09c":301,"\ua09d":301,"\ua09f":301,"\ua0a0":301,"\ua0a2\u2dad\ua1d3\ufc3f\u165fw\ufb8a\u1fa1":301,"\ua0a3\ua80c\u0b1c\u1217\u026c":301,"\ua0a4\ufc48":301,"\ua0a5\u1e37\ua228":301,"\ua0a6":301,"\ua0a7\ua183\ua2bb":301,"\ua0a8":301,"\ua0af":301,"\ua0b0":301,"\ua0b4\u1d69\u1bdc\u1bc1":301,"\ua0b6\ua5f1\uab96":301,"\ua0b7\ufd55":301,"\ua0b9\ua088\ua488\u1786\u1e21\u18ee":301,"\ua0ba":301,"\ua0bf\u0f8b\uaae1\u15ca\u18ed":301,"\ua0c1\u0b2c\ua43b":301,"\ua0c2\ua32a\u0f8c\ua3a1\u1da2":301,"\ua0c4\ufb5f\u1720\ua8a5\u1b9b":301,"\ua0c5\ua01f\uaa9a\u12a2\u1109":301,"\ua0c7":301,"\ua0cb\u15cc":301,"\ua0cc\ud7fb":301,"\ua0d0\u037a\u165a\u16d2":301,"\ua0d1":301,"\ua0d3\ua350\ua27f":301,"\ua0d5\uab33\u0a9a\ua4e8\ua79d":301,"\ua0d7":301,"\ua0d8":301,"\ua0d9":301,"\ua0db\u12ec":301,"\ua0dc":301,"\ua0dd\u1e85":301,"\ua0df\u143a\u14b2\u068d\ua729\ua8a0":301,"\ua0e0\u183f":301,"\ua0e3":301,"\ua0e4\u02b6\u1594\ua5ce\u0847\ufc7e":301,"\ua0eb\ufc7a\u1904\u30f8\uaae0\ufecf\ufdfa":301,"\ua0ec\u04e1":301,"\ua0ed":301,"\ua0ef\ua518\u0da7":301,"\ua0f0\ua397":301,"\ua0f3":301,"\ua0f7\u1315\u17ac":301,"\ua0fe\u154c":301,"\ua0ff\ua046\u0c13\u0583":301,"\ua103":301,"\ua106":301,"\ua107\u1a34":301,"\ua109":301,"\ua10d\u16ca\ua590":301,"\ua10e\u18cf\u0437":301,"\ua114\ufe83\u021d\u2c57\u133e\uab5f\ufc3d\ua6c3\u129e\ua6d8\u1780\u0a9d\uaa94":301,"\ua115\u311e":301,"\ua118\u2db3\u174d\ua2e2":301,"\ua119\u179c\ufbac\u00fc\ufcf2\u0e1a\u0e46\u0621":301,"\ua11b\ufba4\u099e\u14dc\u0cb9\u1841":301,"\ua120":301,"\ua121":301,"\ua123\ua1a9":301,"\ua124\u078f\u0567\u0a5c":301,"\ua12b\uab7b\ua1d7":301,"\ua12c\u11ac\u189e\u2c97\u1611\u0e11\ufcd3":301,"\ua12d\ua85f":301,"\ua12e\u0277":301,"\ua132\uffbb\u1fe2\u2dc2\u3150":301,"\ua133":301,"\ua134\ua147\u1482\ua446\u1e29":301,"\ua136\ud7f4":301,"\ua140\ua719":301,"\ua145\u1f45":301,"\ua146\ua965":301,"\ua14a\u0935\u0993":301,"\ua14b\u0585":301,"\ua14c\u1433\u1bc6\u078a":301,"\ua14f\ua078\ufce9\u3141":301,"\ua150":301,"\ua154\ua98e\u313d\u12bc\u18a3\u04c2\u045e":301,"\ua155\ua509":301,"\ua156\u012f\u1d89\u00b5\u0c2d":301,"\ua157\u198b":301,"\ua158":301,"\ua159\ua32d":301,"\ua15a\ua53d":301,"\ua15e\u1025\u0a26\ua311\u2d20\u30e7\u1054":301,"\ua163\ua30f\u09f1":301,"\ua164\u1f34\u1eb7":301,"\ua166\uab2c\ua853":301,"\ua167\ua229":301,"\ua169":301,"\ua16a\ua259\ua5b4":301,"\ua16c":301,"\ua16e\ua305\u01b9":301,"\ua16f\u03cc\ua172\u03b1\u2d94\u04b5\ua739\uab71":301,"\ua170\ua867\u163c":301,"\ua173\uab4f":301,"\ua175\ua563\ua28e\u0e04\u1ebf":301,"\ua176\u12b0":301,"\ua177":301,"\ua17a":301,"\ua17c":301,"\ua17e\ua9ed\u0188\ua1af":301,"\ua17f\ua122\u0a9f\u11af\ufb74\u1d17":301,"\ua182\u048d\u02c7":301,"\ua188\ua1a0":301,"\ua18b":301,"\ua191\ua93d":301,"\ua196\ua4ef\uff88\ua532":301,"\ua197\u2d5f\ua42d\u068a":301,"\ua198":301,"\ua19a":301,"\ua19d\u1b15\u05de":301,"\ua19e":301,"\ua19f\u172e\u1c0b":301,"\ua1a1\uff54\u078d\ua791\ua2b1\u04db\ud7ef":301,"\ua1a2\ua441\ua045\u1423":301,"\ua1a3":301,"\ua1a5\u0c32":301,"\ua1a6\ufc8a\u0442":301,"\ua1a8\u134c\u1a21\u18f1":301,"\ua1aa\u1427":301,"\ua1ac\u1f31\u1439\u1d22":301,"\ua1b0\u195f\u0961\u0814":301,"\ua1b4\u0b9e":301,"\ua1b9\ufce3":301,"\ua1c5\ua871":301,"\ua1c8\u2c6c\ua763":301,"\ua1c9":301,"\ua1cc\u14c2\u18bd\u1856\ua7fe":301,"\ua1cf":301,"\ua1d1\ua2af\uff4e\u1859":301,"\ua1d8\u11df":301,"\ua1d9":301,"\ua1dc\ufd26\ua235":301,"\ua1df\ua96e\ufba8\u1f56\u1331\u1797\ua3c8\u057e":301,"\ua1e3\ua085\ufc14":301,"\ua1e5":301,"\ua1e6\ua234\ua538":301,"\ua1ea\ufc97\ua775\u317d\ufc90":301,"\ua1ed\u1683\uab64\u0151\u1183":301,"\ua1ee\u1d8a":301,"\ua1ef\u0440\ufd0b\u11cf\uaa1a\u05dd\u11aa\ua22c\ua55a":301,"\ua1f7":301,"\ua1f8\u06c9\u2dd9\ufec4":301,"\ua1fe\ufd2b\ufd28":301,"\ua202\ua61f\ufd15\u0475\ufd82\ua933\uab0c\u19ab\u1d4e\ud7df\u01b6\u0562":301,"\ua203\uaa25":301,"\ua204\u1117\u3174\ua776":301,"\ua205\u06aa\u1e2b":301,"\ua206\u0674":301,"\ua20b\u1f62":301,"\ua20e\uff6a\ua381":301,"\ua210\ufbab\u3175\u1a26":301,"\ua211\ua226\u1667\u1721\ua689":301,"\ua214\ua25f\ua349":301,"\ua216\u15a1\ua143\ufcb0\u0783\u08a4\ua75b\u1a2c":301,"\ua218\ua76d\uab41":301,"\ua219":301,"\ua21f\ufe72":301,"\ua221":301,"\ua224\u092c\u052d\ua331":301,"\ua225\u0133\u12a4":301,"\ua227\u04a7\u0f66\u2cd1\u0236":301,"\ua22a":301,"\ua22b":301,"\ua22d":301,"\ua22f":301,"\ua230\u1b23":301,"\ua231\u02c0\u0142\u049b\ua28f\u18e8\u0c9b":301,"\ua232":301,"\ua236\ua283\u1913":301,"\ua238\u0917":301,"\ua23a":301,"\ua23b\u05e4\u1ea1\uab65\ufc5f\ua925\uff4c\u1346\ufbdd\ufb1f\u2dbc\ua2e8\u069a\ua357\u0907":301,"\ua242\u199e\u315b\ufc42":301,"\ua245\u2ccf\ua46c\ufd0f":301,"\ua247\u1e3d\u14dd":301,"\ua24b\u0275":301,"\ua24c":301,"\ua24d\ua3dc\uff8b":301,"\ua252":301,"\ua253":301,"\ua254\u16cc\ufb3a":301,"\ua255\u14d5\u00ea\u2d8b\u0133":301,"\ua257\ua09b\u1825\u00e5\u18e0\u2dd3\ua6c0\u2d0a\u1210\u16df":301,"\ua25a":301,"\ua25c\u1d32":301,"\ua263":301,"\ua266\u0a85":301,"\ua267\u1523":301,"\ua269\u0283\u0269":301,"\ua26c\u0dc4\u0f6c\u1221":301,"\ua26dj\u0f4f\u0f5b":301,"\ua270\ua433":301,"\ua273":301,"\ua274\ua822":301,"\ua276\u19c1\u1965":301,"\ua277\uffae":301,"\ua278\u1a25":301,"\ua279\ua543\u0d7e\u10fe\u1f90\u30d5\u0c18":301,"\ua27a":301,"\ua281\ufd61\u01a8":301,"\ua282\u1ea9\u04cf":301,"\ua284\u0270":301,"\ua286\ua78c":301,"\ua289":301,"\ua28a\u2d65\ua2bd\ua40a\u185e\ufd57\ua73f\u0713":301,"\ua28b\u2130":301,"\ua28d\ua5f8":301,"\ua291":301,"\ua294\u3188\u1fb3\u099d\u1bce":301,"\ua296\ua431":301,"\ua297\u1699\ufd14\u0d93\u071c\ua2d8":301,"\ua29b":301,"\ua29c\u1bd0":301,"\ua29d\u0107":301,"\ua29e\uaaa1\ud7b3\u0239\ufb5c\ua6d6\u092d\u19bd":301,"\ua2a0\ua7a9\u105b\u0ca3":301,"\ua2a2\ua889\ua329\u1416\u1f25\ufd12\uab29":301,"\ua2a4":301,"\ua2a6":301,"\ua2a9\u1e17\u1a2b":301,"\ua2aa\u156c":301,"\ua2ad\u095e":301,"\ua2ae":301,"\ua2b3":301,"\ua2b4":301,"\ua2b6\u1e41\ufd76":301,"\ua2b8\u01c0\u0f5c":301,"\ua2bc\ua59b":301,"\ua2bf\ufd5a":301,"\ua2c0":301,"\ua2c1\ua584\u0119\u04b3\u15fa":301,"\ua2c2\u14c6\ud7d4\ua8f6":301,"\ua2c4":301,"\ua2c6":301,"\ua2c7\u2c3e":301,"\ua2c9\u1a2d\ua9e1\ufb76\u1fa0\ufedd":301,"\ua2ca\ufc4a\u045d\u0576\u0950\u1907":301,"\ua2cc\u0cbd\u07f5":301,"\ua2cd":301,"\ua2d3":301,"\ua2d7\u05d6\u12a9\u115a":301,"\ua2db\uff41":301,"\ua2de":301,"\ua2e3\ua54a\u0469\ua568\u12e4\u0a74\u06ab\u3078":301,"\ua2e6\u1835\ua1f0\u0db4\ua026":301,"\ua2e7":301,"\ua2ec\ua855\ua328":301,"\ua2ef\u1c65\u1240":301,"\ua2f0\u2071\ufceb\u19b9\u1857\u0bb3\uaba7\u149a\ufdb3":301,"\ua2f1":301,"\ua2f2\ua130\ua4ed":301,"\ua2f3\ufd1b\u134d":301,"\ua2f5\uabbb\u2d8c\ua869\ua17b\ua1c6\ua571":301,"\ua2f8\u16ac\u0d34":301,"\ua2fc\ua040":301,"\ua2fe\ua080":301,"\ua304":301,"\ua308":301,"\ua30a\ua5a5":301,"\ua310\u0456\ufbec\ufed9\u1a20":301,"\ua312\ua3a5\ufb30\u00e6\u0988":301,"\ua313\u06fa\u052f":301,"\ua314\ua4f8\u16a3\u03ce\u076c":301,"\ua315\ua99c":301,"\ua316\u08aa\u186e\ua913\u084d\ua5e0\ua138":301,"\ua31c\ua081\ufd5b\ua071\ua44e\ua11f\u16d4\u1457":301,"\ua31e\uabe2\u0275\u04d5\u1902":301,"\ua31f\u16b5":301,"\ua324\u16a9\u11d5":301,"\ua325\ua727\u2d42\u2d87":301,"\ua326\u1f44":301,"\ua32b":301,"\ua32e\u185a\u160e":301,"\ua33b":301,"\ua33d\uff5a\u1831\u1d37\u08b0\ua41b\ua6b1\u1fc3":301,"\ua33e\u06ba\ufd68\u0d1d\ua53b\uabd0":301,"\ua33f\ua745":301,"\ua340":301,"\ua342\ufd11":301,"\ua346\ua400\ufc82":301,"\ua348\u1e1b\u0ca5\u011b":301,"\ua34d\ua5c5":301,"\ua34f\ua070":301,"\ua351\u12ad":301,"\ua352\u189d\u0435":301,"\ua353\u0251":301,"\ua356\u0ec1":301,"\ua35c\ua653\ufc77":301,"\ua35d\ua083\u0e13\u2c9f\u0ae1":301,"\ua361\u11ab":301,"\ua363":301,"\ua365":301,"\ua366\u0b28\ua44a":301,"\ua36b\ufbf7\u115e\u209b\u1a30\ua338\u30a6\ua222":301,"\ua372\ua61d\u2d14\u057c\u12fc":301,"\ua374\ua2a1\u0d28":301,"\ua377\u15e0\ufc25\u0ead\u0779\u03d4\u2ca9":301,"\ua378":301,"\ua37b\u2c40\u014b":301,"\ua37d\uff93\uff9f\u044d":301,"\ua37f\u06c5\u1781\u0525\ua435\u0e27":301,"\ua384":301,"\ua387\u19c8\u2dc4":301,"\ua389\u1d0d\u0d17":301,"\ua38c":301,"\ua38d\ufbf3\ua2ee\u1518":301,"\ua38f\ufb60":301,"\ua391\ufbfe\u1e4f\u1f27\ufc2b\u1e85":301,"\ua392\ufcca":301,"\ua393":301,"\ua399":301,"\ua39a":301,"\ua39c":301,"\ua3a0\ua4ee\ua031\u099c":301,"\ua3a2\u1322":301,"\ua3a3":301,"\ua3a8\u3184\uffce\u15c0":301,"\ua3a9\ua248\u0cf2\u1007\u1bcb":301,"\ua3aa":301,"\ua3ad":301,"\ua3ae\ufc9f":301,"\ua3b0\u3171":301,"\ua3b2\u12e9u":301,"\ua3b6\u0765\u07cf\u125a":301,"\ua3b8":301,"\ua3bd":301,"\ua3c2":301,"\ua3c3":301,"\ua3cc":301,"\ua3ce\u02cb\u11a3\u1583\u1727\u1d21":301,"\ua3d1\ufcd0\u0254":301,"\ua3d3\u1765\u2d58\u11b0":301,"\ua3d8":301,"\ua3da":301,"\ua3db":301,"\ua3de\u0250\uaa48\u1695\ufd79":301,"\ua3e2\ufb39\u1837":301,"\ua3f3\ua9b0":301,"\ua3f4":301,"\ua3f5\ua6a6\ufb62\ufcd4\u30ea\ua54d\u2dbb\ua396":301,"\ua3f6\u153c\u1e01":301,"\ua3f7\u0159":301,"\ua3fa\u1fe0\u1521\u30fc":301,"\ua3fc\u0260\uab70\u1573\ufbd4\u071f\ua028\ua4da\ua1d5\u2cdf":301,"\ua3fd\u0980\u15f8\ufef3\uff4d":301,"\ua3ff":301,"\ua401\u0d2d":301,"\ua407":301,"\ua40b\u06cc\u1671\ua47a":301,"\ua40c\ua50c\uaba9\u00df":301,"\ua410\u06bf":301,"\ua412\u1d6a":301,"\ua413\u1d82":301,"\ua416\ufc7c":301,"\ua417":301,"\ua418\ua797\u19b7":301,"\ua419\u1592\ufd86\uab45":301,"\ua41a\u1e95":301,"\ua421\u0c58\u18f5\ua607":301,"\ua423\ua0c8\uff57\uff6b\u3118\u1e15":301,"\ua424":301,"\ua425\uaa11\u1722":301,"\ua426\ua520":301,"\ua427\u1983\u1860\u18dc\u0eb3\u2c5e":301,"\ua428\u043b":301,"\ua429\u2d11\ua148":301,"\ua432":301,"\ua434":301,"\ua438\u3046\u11f3":301,"\ua43a\u1250":301,"\ua440\u1327\uaa27\u10d1":301,"\ua443":301,"\ua445":301,"\ua448\uff92":301,"\ua449":301,"\ua44b":301,"\ua44f\u2c51":301,"\ua450":301,"\ua451\ua56e\ua77c":301,"\ua452\u1e5f\ufb2b\u1961\u1e33\ua892\u02a4\uab8b":301,"\ua453\u2c3d\u31a4\ua07e":301,"\ua454\u0813":301,"\ua455\u0cb5\u1009":301,"\ua459\u1458\u2c34\uab48\u12d3\ufcc3":301,"\ua45a\u14e3":301,"\ua45f":301,"\ua461\u182a":301,"\ua462\u1990":301,"\ua463":301,"\ua465":301,"\ua46b\uff41":301,"\ua473\ua9af":301,"\ua475\u1b0e":301,"\ua477\uffb4\u1830":301,"\ua478\ua1fb\uaa68\u1f72\u30ad\ua516\u016f\u18cb\u021b\u0abd":301,"\ua479\u1306\u0697\u02a2\uab3e\u1624\u16aa":301,"\ua47b\u1d79\ua0f5\u1ed5":301,"\ua47d":301,"\ua480":301,"\ua481\u1139\u0e09\ua5dd":301,"\ua483":301,"\ua484\u00ec\u14e7":301,"\ua485":301,"\ua487":301,"\ua48b\ua50f\u03c6\u179d":301,"\ua4d0":301,"\ua4d1\u31f0\ua0ac\ua6b3\u14c5\u2c8d":301,"\ua4d5":301,"\ua4d8\u03ae\u0998":301,"\ua4db":301,"\ua4dd":301,"\ua4e0\ua466\u1150\ua3c7":301,"\ua4e2\u0280\u1b18":301,"\ua4e5\uab16":301,"\ua4e6\u1f51\ua13e\u097c\u1fc3\u156f":301,"\ua4ea\u0a93\u0ca7\ua938\u01f9\uff72":301,"\ua4eb\ua261\ufec6\ua842\ud7d3\uabcd\ua80d\u0828\uaaa9\u0d8a\u1bdd":301,"\ua4f0\ua6d7\u1891":301,"\ua4f1\ua687\u12ab\u1424\ua725":301,"\ua4f2\u1512\ua264":301,"\ua4fb":301,"\ua4fd\u1f55":301,"\ua500\u049f":301,"\ua501\u0e2c\u050d":301,"\ua502\uaa49\u0b5c\ufc0a":301,"\ua503\u0257\u062e":301,"\ua505":301,"\ua506\u1854\u2c42\ufc6e\ua895\u14af":301,"\ua50bt\ud7ee\ua467\ua1bf\u1567":301,"\ua510\u1657\ufedf\u0b8f":301,"\ua511":301,"\ua517\ua8ae\ufca0":301,"\ua51a":301,"\ua51d\ua960\ua343":301,"\ua522\ua09a\u146a\ufb7e\ufb64\u1d5e\uaabb\u0c96\uff43\u15d7\u00eb\u2d49":301,"\ua526\u2c46":301,"\ua529\ufdbc":301,"\ua52cb":301,"\ua52f\u2d1e\ud7dd\u1f01":301,"\ua530":301,"\ua531\u1286":301,"\ua536\u04ad\ua189\u1421":301,"\ua537\u16cb":301,"\ua53a\ufc66\ufee7\u0635\u1645\u1c0d":301,"\ua53f\u2d16\uaa45":301,"\ua540\ua40d\u1f7c\ua2b7\ufc76":301,"\ua542\u02bd":301,"\ua545m\u1896":301,"\ua547":301,"\ua549\u123e\u0a90":301,"\ua54b\u090d\u1268":301,"\ua551\u1530\ua15d\ua1be\ud7b5":301,"\ua552\ua507":301,"\ua557\uffb7":301,"\ua55c\u1e07\u1fe6":301,"\ua55d":301,"\ua560":301,"\ua562":301,"\ua564\ua88a\ua337\u210b\u2c71\ua1a4\u159b\u07e3\u1111\u1db1":301,"\ua565\u1d7d\ufb88":301,"\ua566\u1606":301,"\ua569\ua850\u2095\ua414\ufda4":301,"\ua56b":301,"\ua56c\u2cc9\u1126\u1f83\u076f\u19c5\u15c1":301,"\ua576\ufda8\ufc45o":301,"\ua578\u0e25\u306e\u02e1\u1e9b\u1711\u08a0":301,"\ua579\u0515\uab3b\ufbfc\u0e21\u0971\ua165\u1442":301,"\ua57a":301,"\ua57b\u0109\u071e":301,"\ua57c":301,"\ua580\u12d0\ua220\ua13f\u020f\u1e9c":301,"\ua585\ua819":301,"\ua586\u1852\u1984":301,"\ua587":301,"\ua588\u178b\u162b\ua67f":301,"\ua58a\ua606":301,"\ua58d\u04d7\u2c5d\u0da1":301,"\ua58e":301,"\ua58f\uab36\u03db":301,"\ua591\u1884\ua5bf\u12d8":301,"\ua596\u1481\ua403":301,"\ua598\uffc6\u03c3":301,"\ua599":301,"\ua59c":301,"\ua5a1":301,"\ua5a3\ud7d8\ua067\u04d3":301,"\ua5a7\u1499":301,"\ua5aa\u0f45\uab2a\uab4d\u2cf3\ufd5c\u3006\u1244":301,"\ua5ab\u1b95\ufc1b":301,"\ua5ac\u185b\u1144":301,"\ua5b0\ua06c":301,"\ua5b5":301,"\ua5bc":301,"\ua5be\ua10c\u07d3\ua2dd":301,"\ua5c0\ua3cb\u0840\u0f67":301,"\ua5c1":301,"\ua5c8\u10ec":301,"\ua5cc\u18ec":301,"\ua5cd\u1495\ua64f\u0a8b":301,"\ua5d0\u077d":301,"\ua5d6\ua369":301,"\ua5d7\u1404":301,"\ua5db\ua699\u11da\u1fbe":301,"\ua5de\ud7cd\u195c\ua464\u1f67\u199d\u31ba\u11f6\ua39e\u1448":301,"\ua5df":301,"\ua5e1\u1f71\ufd64\ua04c\ua23f":301,"\ua5e3\ua25e":301,"\ua5e4":301,"\ua5e9\ua801\u1127\ua885":301,"\ua5ec\u1e2b":301,"\ua5ee\u19a0\u1e89":301,"\ua5f3\u30d3":301,"\ua5f4":301,"\ua5f6":301,"\ua5f7":301,"\ua5f9":301,"\ua5fc\u1ee7":301,"\ua5fe\ua1f1":301,"\ua5ff":301,"\ua600\u0455\u130c\u1270\u072e\u2184\ufe85\u1c1e\u0223\u0199\ufcf9\u2115\ua5b2\u03c8\ua5ea\u1536\u1e6f":301,"\ua602\u174e\ufef5\u0ab5\ua476\u0854\u0f4d\u1e75\u0aa0\u18b7":301,"\ua604\ua03b":301,"\ua605\u1184":301,"\ua608\ua8ac":301,"\ua613":301,"\ua618\u0f4c\u05d2\ua139":301,"\ua61b\u2d05":301,"\ua61c\uaaae\ua759":301,"\ua62a\u0d92":301,"\ua62b":301,"\ua643":301,"\ua645":301,"\ua647":301,"\ua647\u1f25":301,"\ua649":301,"\ua649\u0287":301,"\ua64b\ua171\u31a2\u1d2b\ufb23\ua9a7":301,"\ua64d\u1f80":301,"\ua651\u196a\u30ca\u1170\u1d48\u1791\u1b97\ufc44\u12cd":301,"\ua653\u062a\u027c\u1f31":301,"\ua657":301,"\ua659\u1187\u0447\u210f\u15d3":301,"\ua65b\u0473\u1153\u0153":301,"\ua65f":301,"\ua661\u1741\u161d\u0691":301,"\ua663":301,"\ua665":301,"\ua665\u1511\u056c":301,"\ua667\ua3a7\ufeb5":301,"\ua669\u3094\u186c":301,"\ua66d\uab7a\ufb47":301,"\ua681\u1952\u1bbf\ufc72":301,"\ua683":301,"\ua685\ufe82\u0b39":301,"\ua689\u017c":301,"\ua68b\u050d\ua749":301,"\ua68b\u1d97\ua5ca":301,"\ua68d\u0aa1":301,"\ua68f\ufb2d\ua13b\u1f86\ua911\ua322\ua009\u2d0b":301,"\ua691":301,"\ua693\u056e\ua5c9\u01dd":301,"\ua695\u1175":301,"\ua697":301,"\ua69c":301,"\ua6a0\ua2a5":301,"\ua6a1\u1daa":301,"\ua6a2\u2c32\ua89a\u0b15\ua6b4\ua4fa\uabe1\ufed6":301,"\ua6a5":301,"\ua6a7":301,"\ua6a8\u1f81\u15cd\ua217\ua1ad\u15f1\ua38e\ufc95\u2d10\u1833":301,"\ua6a9\uaae2\u03b4\u03d3\ufc5a":301,"\ua6aa":301,"\ua6ab":301,"\ua6ac\u098f":301,"\ua6b5\u159a":301,"\ua6b9":301,"\ua6c2":301,"\ua6c4":301,"\ua6c9":301,"\ua6ca":301,"\ua6cd\uff79\u1253\u211c\ua2eb":301,"\ua6d1\u158f\ufd0c\u00f1":301,"\ua6d5":301,"\ua6d9\uabac\ua962":301,"\ua6da\ufd6d\u17a5\ua4df\u1980":301,"\ua6db\u2d96":301,"\ua6dc\u1342\u0d9c":301,"\ua6df\u163e":301,"\ua6e1\ua4e3\ud7b4\u0ca4\u2c48\ua4d7\u1a54":301,"\ua717\uab87":301,"\ua71a\u0261":301,"\ua723\u0266\u113d\u1280\ua941\ua13a\u138d":301,"\ua723\ufd00":301,"\ua727\ufb4a\uffb5\ua0bd\u30f1":301,"\ua72d":301,"\ua72f\u03d2":301,"\ua72f\u0c22\u0499":301,"\ua730\ufcc5\uaa80\ua2b2\u1e0f\u212d":301,"\ua735":301,"\ua735\u1f81\u0776\u1618m":301,"\ua73d\u0561\u1318\u128c\u2dce\ua504":301,"\ua73f":301,"\ua743":301,"\ua747":301,"\ua74b\u06ef":301,"\ua74f":301,"\ua74f\u2c31":301,"\ua753\ufb7d\u2cad\u30c5":301,"\ua755":301,"\ua757\ufcb1\u143e\u2cc3\u214e\u308d\ua256":301,"\ua75b\u1643":301,"\ua75d":301,"\ua75d\u14a4\u18f4":301,"\ua75f\u182c\ua19b":301,"\ua761":301,"\ua761\u067b":301,"\ua763\u2dcd\u0762":301,"\ua767\u148f":301,"\ua769":301,"\ua76b\u159c\ua0f6\u04fb\ua200\ua807\u0247\ua3bf":301,"\ua76f\u30ae\ufc6b\u1006\u1589\u0211\u0c07":301,"\ua770":301,"\ua771\u04c4":301,"\ua772\u0b30":301,"\ua773\u0ab7":301,"\ua778":301,"\ua77c":301,"\ua77f\u03c4\ua131\u0167":301,"\ua77f\u0d2f":301,"\ua783\ua865\u0b93\u02ab":301,"\ua787":301,"\ua787\ua2ce":301,"\ua791":301,"\ua793":301,"\ua799\u1020":301,"\ua79b\u025e\u2db2\u0584\u0699":301,"\ua79b\ua54e":301,"\ua79d":301,"\ua7a1":301,"\ua7a3":301,"\ua7a5\u163a\ufd39":301,"\ua7f7":301,"\ua7f8\ua66b\u17a2\u0a88\ua98f":301,"\ua7f9\u1a52\u1aa7\u142b\ua3bc":301,"\ua7fc":301,"\ua7fd\ua066\ufefc\u159d":301,"\ua7ff":301,"\ua804":301,"\ua813\u2c57\ufe96":301,"\ua816\u0527\u0c14":301,"\ua81a\ufbe1":301,"\ua81e":301,"\ua821\ua0aa\u2c53\ufc85\u0249":301,"\ua843\u314e":301,"\ua844\ua319":301,"\ua846\u0672\ua5ba\ua100":301,"\ua849\u11eb\ua097":301,"\ua84a\u1951":301,"\ua856":301,"\ua858\u01a1\u04ab":301,"\ua85a":301,"\ua85b":301,"\ua860\u0690\u129d":301,"\ua861\ua127":301,"\ua862\u16bf\ua3a6\u1483\u127a\u152b\u010b\u1e1d\ua567\u1f60\ud7ba\u03b9\u146d\u14b3\u10e4\u1e03\u304a":301,"\ua863\u1e9f":301,"\ua864\u1115\uaa6a":301,"\ua866\u305e":301,"\ua86b\ua26b\u0da8\ua767\ufdc3\ufd18":301,"\ua86c":301,"\ua86d":301,"\ua86e\u0d08\u0692":301,"\ua873":301,"\ua882":301,"\ua883\ud7f5":301,"\ua884":301,"\ua886":301,"\ua88c\uaa07\u04ff":301,"\ua88d\ua5c7\u199a":301,"\ua88e":301,"\ua893":301,"\ua894":301,"\ua896\u0461\u0573\u01f0\u0750":301,"\ua89c\u0b19\ufcf8":301,"\ua89d":301,"\ua8a2\u0e42\ua3df\ua6a3":301,"\ua8a4\u1434\u1cec":301,"\ua8ab":301,"\ua8b3\u0676\u02c9\u1f52\u03fc":301,"\ua8fb\u1679\ua469":301,"\ua90c\ua4d2\u0976\ua024\u2db9":301,"\ua90d\u03d5":301,"\ua90e\uff54\u111c\u1129":301,"\ua90f\u167e":301,"\ua914\uab93":301,"\ua916\ua3e1":301,"\ua91a\u1ea7\u079c":301,"\ua91d\u18f3\u2cdf":301,"\ua91e":301,"\ua91f\ufb01\u1b33\ufce5":301,"\ua920\u0157":301,"\ua922":301,"\ua923\ufbfa\u1693\u1963":301,"\ua934\u1897\ua970":301,"\ua936":301,"\ua937\u0719":301,"\ua940\ufd0a\u151f\u1697\u12ae":301,"\ua943\u1e7f":301,"\ua945":301,"\ua961\u12cb":301,"\ua963\u1181\u2c33":301,"\ua966\u1769\ufccd\u0271":301,"\ua967\uaaab\u1287\u044a":301,"\ua968\ua765\u0f49":301,"\ua971\ufc01":301,"\ua974\uaa90\ua2c3\u1670":301,"\ua975\ufcf6\u1731\u1bba":301,"\ua976\ufef1\u1748\u2cb7\u05e8\u1e35\u12fe\u0432":301,"\ua977\u1604":301,"\ua979":301,"\ua984":301,"\ua986":301,"\ua996\u14ec":301,"\ua99f\u2dae\u063b\u183c\ua406\u0586":301,"\ua9a1\ua08d\u02ee\ufc3a":301,"\ua9a2\u161e":301,"\ua9a3":301,"\ua9a5":301,"\ua9ab\u0795\u1333\u12e8":301,"\ua9ac":301,"\ua9ad\u0431\u1441":301,"\ua9ae":301,"\ua9cf":301,"\ua9e6":301,"\ua9e7":301,"\ua9e9\u30b0\u30f9\ua0b5":301,"\ua9ea\u0a87\u11d1":301,"\ua9eb":301,"\ua9ee\ua81d":301,"\ua9fa\uab86":301,"\ua9fe\u06a8":301,"\uaa00":301,"\uaa01\u04f1":301,"\uaa02\u1d94":301,"\uaa03\u069f":301,"\uaa06\uab49\u043d\u17a7":301,"\uaa08":301,"\uaa09\u0e43\u0135":301,"\uaa0a\ua755\ud7f3":301,"\uaa0d":301,"\uaa0f\u0643\u0797\u1595":301,"\uaa15\u016b\u1d5d\ua7fa":301,"\uaa18\uff9e\u00f1\u0b60\u06b0\u211b":301,"\uaa19\u0e9f\uff74\ufc9e":301,"\uaa1b":301,"\uaa1c\u0569\ua5a9\u0681":301,"\uaa1d\u0c97\u0579\u1b30\u03c1":301,"\uaa20":301,"\uaa22\ufc79\ua8f5\u2dcb\ua000\ua3c4\u1470\uffb6\u0751\ua1cd\ua0a1":301,"\uaa26\ufb03\ufec8":301,"\uaa28\u00ec":301,"\uaa40\u051f\u148a":301,"\uaa41\u1d4c\u0459":301,"\uaa61":301,"\uaa63\u0576\u1f78\ua117\u14f7\u1e59\ua815":301,"\uaa65\u19c0\ua8a7\u03bd":301,"\uaa66":301,"\uaa67\u16f1\ua3b9\ua514\ua65d\u163b":301,"\uaa69":301,"\uaa74\u1599\u03be":301,"\uaa7a":301,"\uaa7e":301,"\uaa89\uabb6\ua41e":301,"\uaa8b\u168c":301,"\uaa8c\ufe89\ufead":301,"\uaa8d":301,"\uaa8f":301,"\uaa92\ua293\ufd9b\ua19c\u1055":301,"\uaa98":301,"\uaa9c":301,"\uaa9e\ua5a8":301,"\uaa9f":301,"\uaaa2\ufd5d":301,"\uaaa3\u0989":301,"\uaaa4\u08a3\u024b\ua90a":301,"\uaaa5\u1204":301,"\uaaa7\u00fb\uab37":301,"\uaaaa\ua69d\u1f42":301,"\uaaaf\u04a9\u0922":301,"\uaab1\u1335\u0d96":301,"\uaab5":301,"\uaae3\ufbe5":301,"\uaaf4\u0503\u2c53\u0696":301,"\uab04\uffad\u14b4\u214e\u1614\ufcec\u116c":301,"\uab06":301,"\uab09\ua609":301,"\uab0a\u0517\u01f9\u132f":301,"\uab0d":301,"\uab0e\u09af\u013e\u03cd\u3054\ua42f\ua4f7\uaa72\u0a16":301,"\uab15\ufb20":301,"\uab28\u01e3\u1173\ua457\u0491\u16d1":301,"\uab2b\u14d8\ufcf0":301,"\uab2d\ufd8f\ua404\ua59e\ua2e9":301,"\uab2e\ufc1c\u318a":301,"\uab31\ua527\ua213\u30f6\u1d7b\u1ead":301,"\uab34\ufb48":301,"\uab39":301,"\uab3a\u00f6":301,"\uab3c\u14ab":301,"\uab3f":301,"\uab42\ua3d5\ua3e4\u0756\u16b4":301,"\uab43\ua939\u1114\u3043\ua058":301,"\uab46":301,"\uab47":301,"\uab4a":301,"\uab4b\u11dd\u135a":301,"\uab4e\u1015\u0ea7":301,"\uab50\u0c9c\u1d78":301,"\uab52\ua1f4\u10d0\u1eef\u1d58":301,"\uab53\u1876\u311a":301,"\uab58\u0bb1\u0b37":301,"\uab59\u2d1d\u00f9\u198c\u1b2e\u1616\ua5ed\u1531":301,"\uab5d":301,"\uab74\u1edd":301,"\uab75":301,"\uab77":301,"\uab7c\u191d":301,"\uab7e":301,"\uab7f\u1c60":301,"\uab81\u067d\ua8b1\u31ad":301,"\uab82\ufd85\u14ae":301,"\uab83\u18b9\ua851\ua04f":301,"\uab84":301,"\uab85":301,"\uab89":301,"\uab8a\u1698\u0625":301,"\uab8d":301,"\uab8e\ua408":301,"\uab91":301,"\uab99":301,"\uab9a":301,"\uab9b":301,"\uab9c":301,"\uab9d":301,"\uab9e\ua42a\uff68\ufcb4":301,"\uab9f\u2c4a\ud7c0\u1b27":301,"\uaba4":301,"\uaba8":301,"\uabab\u106f":301,"\uabad\u14a5\u1bd8":301,"\uabae":301,"\uabaf\ua430\ua053":301,"\uabb0":301,"\uabb1\u1f12":301,"\uabb4\ua309\ua0d2":301,"\uabb5\u15b4":301,"\uabb7":301,"\uabb9":301,"\uabba":301,"\uabbd\u09a2":301,"\uabbf\uabb3\ua0ee\ua840\ufe87\u0e10\u1b92\u16a2":301,"\uabc0":301,"\uabc2":301,"\uabc4\u0c2c\u0f58":301,"\uabc6\u1f01\ua51c\ua049\u178f":301,"\uabc8\u3086":301,"\uabce\ufc7f":301,"\uabd3":301,"\uabd5\u09df\u1491\ua539":301,"\uabd7\u1d45\u013e":301,"\uabd8\u0ab2":301,"\uabd9":301,"\uabdb":301,"\uabdd\u1597\u0129":301,"\uabdf":301,"\uabe0\ufebc\u1586":301,"\ud7bb\ua036\u1fd1\u2c91":301,"\ud7bd":301,"\ud7be\u0e23":301,"\ud7c4\u1702":301,"\ud7c6\uabc9\u015f\u1456\u0219\ufba7":301,"\ud7ce\ua3b7":301,"\ud7d1\ua6d0":301,"\ud7d5\u0273":301,"\ud7d7":301,"\ud7da\uaa97\u1196\ufeda\ufd09":301,"\ud7db":301,"\ud7de\uff9d":301,"\ud7e3\uabbc\u1490\u04e5\u210a":301,"\ud7e5":301,"\ud7e7\u2d46":301,"\ud7e8i":301,"\ud7e9":301,"\ud7eb\u0c8e\u0d1a\ua5e6\uff7a":301,"\ud7ed":301,"\ud7f2\u0e45\ua582":301,"\ud7fa":301,"\ufb00":301,"\ufb02":301,"\ufb1d\ufb4f\u0845":301,"\ufb24":301,"\ufb28\u1079\u1247\u046f\ua1b2\u0caa\u04d9\u1255":301,"\ufb2a\ua474\ua785\u1b48\ua07b":301,"\ufb2e\u126d\uaa60":301,"\ufb41\u0578\ua34b\ua2df":301,"\ufb43\u04e9":301,"\ufb44\u1580\ua4e7\ua40f\u2c93":301,"\ufb46\u1fd1\ua4f5":301,"\ufb49\u0213\ufef7":301,"\ufb4e":301,"\ufb50\u0459\uaa64\u0140":301,"\ufb52":301,"\ufb53\ufc9d\ua5d5\u1328\u1428":301,"\ufb59\uff4a":301,"\ufb5b\u07e2\ufeb0":301,"\ufb5d\ufc0f":301,"\ufb61":301,"\ufb63\u0456\u0219":301,"\ufb65\u0e96":301,"\ufb68\uaae7":301,"\ufb6a":301,"\ufb6e\ua3e6":301,"\ufb70":301,"\ufb71\u0a95\u03ed":301,"\ufb73":301,"\ufb75\ua9fc":301,"\ufb78\u1e47":301,"\ufb79":301,"\ufb7a\u0b86\u1065\ua60b\ua190":301,"\ufb7f\ua695\ufee8":301,"\ufb80":301,"\ufb81\u14ce":301,"\ufb82\u0257\u0f53":301,"\ufb83":301,"\ufb84\ua6b0\uff8f\u1027\u1623\u01ff":301,"\ufb85\u19c3\u1d5f":301,"\ufb89\u0446":301,"\ufb8b":301,"\ufb8c":301,"\ufb8f\ufd5f":301,"\ufb92\ua285\u183b\u0ce0":301,"\ufb93\u1970":301,"\ufb95\ua681":301,"\ufb96\u1503\ufda0\ua45e\u0933\ua30d\ua8a3":301,"\ufb99":301,"\ufb9b\u024d\u0115":301,"\ufb9d\ua11e":301,"\ufb9f\u142c\ufd02":301,"\ufba0\u1793\u0db1":301,"\ufba9\ua2f6\u3138\ufbff\u0507\u2c4f":301,"\ufbaa":301,"\ufbad\u08a2":301,"\ufbb0\u1bcc\u1787\u0648\u2dcc\u1ea7\uaaf3":301,"\ufbd6":301,"\ufbd7\u1d8dv":301,"\ufbd8":301,"\ufbd9\ufd84":301,"\ufbdf":301,"\ufbe2\ua6e2\u00e3\u1f55\u1684\ua317":301,"\ufbe4\u0571":301,"\ufbe8\u1446\u1bdb":301,"\ufbe9":301,"\ufbea":301,"\ufbeb":301,"\ufbee":301,"\ufbef":301,"\ufbf0\u1522\u02b0\u1554":301,"\ufbf1\u0ad0\u141b\u1295\u1011":301,"\ufbf5":301,"\ufbf6\ufc39\u0d61\ufeb2\u16be":301,"\ufbf8i\u0728\u0b21":301,"\ufc00\u016d\u1332":301,"\ufc02":301,"\ufc08\u119c":301,"\ufc0c":301,"\ufc0d\u1553":301,"\ufc0e\ufd63":301,"\ufc11\u1a32\ua973\ua174":301,"\ufc12\ua3e7\ufcac\u1140":301,"\ufc15":301,"\ufc1f":301,"\ufc21":301,"\ufc22\u1d2e\ua985":301,"\ufc27":301,"\ufc2c":301,"\ufc2e":301,"\ufc2f":301,"\ufc31\u00f2":301,"\ufc33\u01c9":301,"\ufc37":301,"\ufc38\u11fa":301,"\ufc3b\u0698":301,"\ufc40":301,"\ufc47":301,"\ufc49\u01ba\u1bd3":301,"\ufc4b":301,"\ufc4f":301,"\ufc50\u1729\u1fa2\u0d32":301,"\ufc55\ua033\u047f":301,"\ufc57\u2dd1\ua54f":301,"\ufc58\u063c\u1d20":301,"\ufc59\ua85e\u154b\u1974\u1e61\u09a7":301,"\ufc5c\u2d41\u0188\ua98d":301,"\ufc5d\ufdb5\u1692":301,"\ufc61\u172f":301,"\ufc62\u03d9\ua79f\uab05\uabbe":301,"\ufc63\u140b\ua032\u1d63\uab56":301,"\ufc68":301,"\ufc69":301,"\ufc6a\ua320\ua2e5\u1ff2":301,"\ufc70\u15b2":301,"\ufc71\u11ca\ua336\u10f5":301,"\ufc74":301,"\ufc75\u30b3":301,"\ufc78\u0d23\ufe9b":301,"\ufc7b":301,"\ufc7d\u0298\ua733":301,"\ufc81\u0d8b":301,"\ufc88":301,"\ufc8b\u0b26\u0c9e\u1704":301,"\ufc8c\u0444\uabb2\u1e71\ufd3a\u0c31\u1f86":301,"\ufc91\u1344\u04e3\ua02e":301,"\ufc93\uaaad\u0d95":301,"\ufc98\u11dc":301,"\ufc9a\u0804\u010d\u0103":301,"\ufc9b\ufdf5\ufc32\uffb8\u307b":301,"\ufca1":301,"\ufca3\u1efb\ua3cd":301,"\ufca7\u1be0\u047d\u0794\u1f74\u1c0c":301,"\ufca8\ufed0":301,"\ufca9\uffc2\ua96f\u304b":301,"\ufcaa\u3084\u01da\ua508":301,"\ufcae":301,"\ufcaf":301,"\ufcb3e\uffd3\u2c35\u1f22":301,"\ufcb9\u0daa":301,"\ufcc2":301,"\ufcc6\ufcb8\u3060\u1915":301,"\ufccb":301,"\ufccc\u057d\u1d57\u2c77\u116d\ua184\u147a\ua50e\ufd73":301,"\ufccf":301,"\ufcd1":301,"\ufcd2\u1f14\u0c06":301,"\ufcd5\u1e31\u1d65":301,"\ufcd7\u0465\u1ecb":301,"\ufcd9":301,"\ufcdb\u100b":301,"\ufcdf\u0a8a\u1284\u1479":301,"\ufce1":301,"\ufce8":301,"\ufced\ufb3e\u10fal\u122e\u1118":301,"\ufcee\uaaa6\ua525\u01e9":301,"\ufcf1\ua71e\u048b\u1303\ua5b6\u1540\u1f02\u18b6\u1c5f":301,"\ufcf5\u037b\ua306\u02a0":301,"\ufcfb":301,"\ufcfd\u16cd":301,"\ufcff":301,"\ufd01\u2da2\u12b9":301,"\ufd03":301,"\ufd0d":301,"\ufd1d\ua53c\u0628\u14ac\u1b1a\ua69b\ua41c\u1b83":301,"\ufd1e\u0f6a\u04f7\u0d90":301,"\ufd1f":301,"\ufd21\u14cd":301,"\ufd24\u131f\u092f":301,"\ufd25":301,"\ufd27":301,"\ufd29\u1e29\u2d1a\uaa23\u0117\u0501\u03bb":301,"\ufd2d\u160a\u0a1a":301,"\ufd2e\u1d95":301,"\ufd31":301,"\ufd33\u15ae\u021f":301,"\ufd34\ua01b\u0171\u2dbd":301,"\ufd38":301,"\ufd3d\ufe93":301,"\ufd50":301,"\ufd54":301,"\ufd56\ua3c6":301,"\ufd58":301,"\ufd5e\ufb9e":301,"\ufd60":301,"\ufd62\ufe9c":301,"\ufd65\u07e0\u1997\u017c\ufe97":301,"\ufd66\u16a8\ufdf7":301,"\ufd69\uaba5":301,"\ufd6a\u1d2c\u1ee9\u2cd7\uaa76":301,"\ufd6c":301,"\ufd6e\u072a":301,"\ufd6f\u0479":301,"\ufd70\u18de\ua820\u0f47\u2d07\ua379\ud7c5":301,"\ufd77":301,"\ufd78":301,"\ufd7b\u0a38\u10d2\u04fd\u1304\ua570\ufc99\u0916":301,"\ufd7d\u093d\u03c4":301,"\ufd80":301,"\ufd83":301,"\ufd87\u080b\u1435\u0105\u0a20\uff4b\u1fa0\u3121":301,"\ufd89\u176a\ufc5e":301,"\ufd8b":301,"\ufd8e\u0805\u2c43\u0bb9\uab12\u1418":301,"\ufd92":301,"\ufd94\u1829":301,"\ufd96":301,"\ufd99\u30d1\ua13d":301,"\ufd9c\u071b":301,"\ufd9e\u0566":301,"\ufda3\ufea3\u080d\uaa6f\u12f8\u2128\ufdf9\u12b3\u12a1\u1596\u1ed3\u0d8e\u10e2c":301,"\ufda7":301,"\ufdaa":301,"\ufdad\ua0bc\u11c2\ua41d":301,"\ufdae\ua2ba\u2110\u2cc5\u151c":301,"\ufdaf\u16db\ua5a0":301,"\ufdb0\ufc1e":301,"\ufdb2":301,"\ufdb8\u2d01":301,"\ufdb9\ua59f\u1633\ua1ca\ufb3c\u1203\ua747\u01c6\u0ba4\u1666\u1330\uaaba\uab14\u31b7\u09dd\u04f1\ua9aa\ua942\u307c\ua1e0\u1e91\ufb05":301,"\ufdba\ufeea":301,"\ufdc1":301,"\ufdc5\ua6ae\u19c6":301,"\ufdf0\ufe92":301,"\ufdf6\u1e11":301,"\ufdf8\u145a\ufda6\u1db0\u0f65\ua661":301,"\ufdfb\ua6cb":301,"\ufe70":301,"\ufe71\ua207":301,"\ufe74":301,"\ufe76\u0113\u0914":301,"\ufe7c\u1656":301,"\ufe7e\ufd81\u1cf1\u2c4e\ufb58\u15eb\u0144":301,"\ufe80\u15f3\u080f\u116e":301,"\ufe81\uff46":301,"\ufe84\u167a\u1bd2\u1f66\u0b06\u18d1\ua1e1":301,"\ufe88":301,"\ufe8a\u314d\u078e\u07ce":301,"\ufe8d\u0923\ua6b6\ua785\ufe7b\u2c61":301,"\ufe8e\ua47c":301,"\ufe8f":301,"\ufe91":301,"\ufe9a\u0137\u18ba":301,"\ufe9f":301,"\ufea1\uaa0e":301,"\ufea6\u01cc\u1464":301,"\ufeb9":301,"\ufebe":301,"\ufec0":301,"\ufec1\u1838":301,"\ufec9\ufc56":301,"\ufecd\ua0f2":301,"\ufed4":301,"\ufedba":301,"\ufede":301,"\ufee0\u1207\u1507\u0a2f\ud7dc\u2d02\u30cf\uab76\u03f1\u1407\u0121\u0e15":301,"\ufee2\u01b9":301,"\ufee4":301,"\ufee5\u045b\ua93e\u1ed7\u03f3\u14b9":301,"\ufee9\u1a27\u1f73":301,"\ufeeb\u188f\u1d42\u1bcf\uffd4\ua1d4\u1639\u084c\u1251\u03d1\ua737\u1fa4\u198a":301,"\ufeec\u00fa\u3181\u14d9\ua0e2":301,"\ufef4\u1637":301,"\ufefb":301,"\uff42\ufeef":301,"\uff44":301,"\uff49":301,"\uff4a\u15b9\ufe8b\u16e0\ua9ef\u0582\u0770":301,"\uff4b\uab02":301,"\uff4c":320,"\uff4d":301,"\uff4e\u0223":301,"\uff50":301,"\uff51\ua5fa\u2d2d\ua2ab\uffd5\u2c9b":301,"\uff53\u09b2v\ufb13\u0dc0\uaa8a\u1420\ufc64":301,"\uff55\u012d\u1e45\u0258\u31f9\u1fd3\u1c61\ufb34\u04bb":301,"\uff56\u0824\ufd32\ua420\ua4fc":301,"\uff58":301,"\uff58\u10eb\u1403":301,"\uff59\u11cc\ufb97":301,"\uff67\u16cf":301,"\uff69\u164d":301,"\uff6c\uff8d\u00e8\u0450":301,"\uff6d\u1d74\u04b1\u0a19":301,"\uff6f":301,"\uff71\ua54c\u129a\u195b\ufc41\ufdc4\u312b\ua10f\u2d51":301,"\uff7b\u0f63\ua0fd":301,"\uff7c\u0519":301,"\uff7d":301,"\uff7e\ua2fd":301,"\uff81\u01e1":301,"\uff82\u10d9\ua436\u1968":301,"\uff8e\u16b7":301,"\uff90\u0215":301,"\uff91":301,"\uff95\u1ef1\ua72d\u1555\uff57":301,"\uff9a\u1d7e\ua6c5\ua3e8":301,"\uffa1\ua3c9\u00fe\u12d4\u00fd\ua0b2\u02aa":301,"\uffa2\u122a":301,"\uffa3\ua88b\u2d06\u17d7":301,"\uffa4\u0682\u11f4":301,"\uffa5":301,"\uffa7\u2d84\u03bc\u101f\u0c85\u04ce":301,"\uffaa":301,"\uffab\ua368\ua46e":301,"\uffac\ua574":301,"\uffb1":301,"\uffb3\u2d55\u0242\u1918":301,"\uffba\ua759":301,"\uffbc\ua344":301,"\uffbe\ufeae":301,"\uffc4\ua643\u2145\u09ac\u1e17\ufc6c\u2c3c":301,"\uffc5":301,"\uffcb\u1e19":301,"\uffcc":301,"\uffd6":301,"\uffdb\ua7a7\ud7e0":301,"_\ufba5\ua208\u1ee5":301,"aben\u00e7o":385,"abr\u00e3o":385,"abstract":[13,15,46,47,51,52,57,59,79,87,109,117,122,123,129,133,135,143,159,168,176,181,188,197,210,219,226,227,228,296,312,327,338,354,366,370,375,378,403],"anunci\u00f3":350,"at\u00e9":385,"boolean":[13,30,33,38,44,48,49,50,51,57,71,148,181,187,188,194,200,213,214,221,316,332,366,374],"br\u00e1":385,"break":[57,62,67,85,88,89,100,101,160,302,307,310,343,350,359,369,385,391,399,402,403],"byte":[57,59,60,63,64,68,69,70,71,74,80,82,83,91,96,103,105,106,107,109,111,276,343,359,361,376,403],"c\u00e1":385,"c\u00e9rebro":385,"c\u00e9u":385,"c\u0721":301,"case":[15,33,36,39,43,46,52,55,57,59,64,71,94,97,102,105,110,111,114,118,119,129,130,132,134,139,141,145,153,160,171,172,179,194,200,209,210,214,217,219,245,246,248,250,251,254,257,259,264,265,266,267,268,269,273,274,276,280,282,283,284,285,286,287,288,289,290,291,292,293,297,299,307,317,321,325,326,328,341,346,352,357,359,361,362,363,365,366,367,373,374,375,382,384,387,388,396,399,403,404],"catch":[36,105,343],"ch\u00e3o":385,"char":[57,104,174,305,306,313,359,378],"circunst\u00e2ncia":385,"class":[13,14,15,16,18,23,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,42,43,44,46,47,48,49,50,51,52,53,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,113,115,116,117,118,119,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,137,139,140,141,143,144,145,146,153,156,157,158,159,160,161,162,163,164,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,193,195,197,198,199,200,201,202,203,204,205,206,208,209,210,211,212,213,214,215,217,218,219,220,221,224,226,227,228,245,246,248,250,251,254,257,259,264,265,266,267,268,269,273,274,276,280,282,283,284,285,286,287,288,289,290,291,292,293,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,316,317,319,322,323,324,325,326,327,333,334,335,337,338,339,340,341,342,343,352,353,355,357,361,365,366,367,373,374,375,388,391,396,398,399,403,408],"come\u00e7":385,"compr\u00f3":350,"conseq\u00fcentement":385,"contr\u00e1rio":385,"cor\u00e1":385,"crian\u00e7a":385,"d\u00e1":385,"d\u1c63\u0972":301,"default":[10,14,28,29,32,33,36,38,39,43,52,57,59,60,61,62,63,64,65,67,68,69,70,71,74,77,80,82,83,85,88,89,91,96,97,100,102,103,105,106,107,109,110,111,115,116,117,118,122,129,130,131,132,133,136,139,141,142,143,145,153,159,162,164,174,176,178,194,200,203,206,212,214,215,217,218,219,220,224,226,228,293,294,297,299,300,302,303,305,307,310,313,314,317,328,329,333,334,335,336,341,342,343,357,359,361,362,363,366,371,374,378,385,386,396,399,403,404],"desminti\u00f3":350,"dicion\u00e1rio":385,"diminu\u00eda":385,"do":[10,23,24,29,37,38,52,57,59,71,76,89,105,109,110,115,117,118,119,122,129,132,134,135,144,147,169,170,171,173,176,179,188,194,199,203,206,211,219,226,228,293,294,302,306,312,333,334,335,336,339,346,352,357,358,359,363,365,366,367,370,374,382,384,385,396,399,401,403,405,407],"ent\u00e3o":385,"enum":[114,353],"esa\u00fa":385,"est\u00e3o":385,"est\u00fapido":385,"esvoa\u00e7":385,"eus\u00e9bia":385,"f\u00falgido":385,"fei\u00e7\u00e3o":385,"final":[25,28,57,65,109,141,159,162,175,188,206,212,302,311,314,317,326,340,357,359,397,408],"float":[29,33,38,47,48,50,51,57,64,76,105,129,132,133,135,142,147,148,163,176,193,195,210,212,214,224,299,316,317,318,321,322,323,327,328,329,332,333,334],"fortaleci\u00f3":359,"fr\u00e1ncfort":359,"function":[13,15,17,25,28,29,30,33,34,35,39,43,47,50,51,52,53,58,61,64,65,70,71,75,77,78,79,84,85,87,97,98,102,105,106,109,110,111,115,116,117,118,119,126,129,132,133,135,138,141,142,143,145,166,173,174,176,177,179,181,182,183,185,187,188,191,193,194,198,199,202,206,208,213,214,216,219,221,224,228,293,294,297,301,303,305,306,313,317,318,319,320,321,326,327,328,330,331,332,334,336,339,340,341,342,343,344,345,346,351,352,357,358,359,361,363,365,366,367,369,374,378,384,385,388,399,403,407,408],"g\u00eanesi":385,"g\u2c31\u176b\u0e26\ua1e8":301,"genu\u00edna":385,"gl\u00f3ria":385,"gra\u00e7a":385,"gro\u00df":[322,323,324,325,326],"h\u00e1":385,"h\u14c8\ua810\ua93a\u14b7\ua46a\ufbaf\u3152\ufb16\uaa75\u120c\u0cb2\ufdb1\ua15c\u12d1":301,"i\u0307":301,"impass\u00edvel":385,"import":[28,30,34,36,38,39,46,52,55,57,62,67,71,75,78,81,88,89,93,100,102,104,105,117,118,129,131,132,133,134,139,141,144,145,163,166,167,169,172,173,174,176,177,183,193,199,200,202,204,205,206,208,209,212,213,215,217,218,219,220,221,222,226,227,228,293,294,295,297,299,300,301,302,303,305,306,307,308,309,310,312,313,316,318,329,333,334,335,343,346,348,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408],"int":[14,28,31,33,48,49,50,51,57,64,71,82,89,90,105,109,115,117,124,126,129,132,133,134,139,144,145,148,153,155,159,161,162,164,168,169,170,171,174,175,176,178,179,183,184,187,189,204,212,214,219,221,224,226,227,228,293,296,303,306,307,310,313,317,318,319,320,321,322,323,324,325,326,327,328,330,331,332,333,334,338,341,343,353,359,366,400,402],"j\u00e1":385,"jal\u00e3o":385,"je\u00fa":385,"l\u00e1":385,"l\ua112":301,"ljungl\u00f6f":410,"long":[24,52,57,103,205,206,304,322,341,343,359,374,375,385,396,398,400,402,403,405],"m\u00e3e":385,"m\u00e3o":385,"m\u00fasica":205,"ma\u00f1ana":359,"mem\u00f3ria":385,"mo\u00e7a":385,"n\u00e3o":385,"n\u10f3\ua1dbx\u0626":301,"n\ua6b7\u0ca2\ua382":301,"new":[2,10,13,14,25,28,33,46,52,57,58,61,63,65,70,71,72,73,74,77,78,79,81,83,84,85,87,90,92,93,97,98,100,101,102,105,106,107,109,111,115,116,117,118,119,122,128,129,132,133,134,135,139,144,153,159,160,162,164,167,168,169,170,171,173,175,176,179,181,183,187,188,193,199,206,207,209,213,214,219,227,228,293,295,297,299,301,303,306,308,312,313,316,322,323,324,325,326,327,333,334,336,338,341,343,346,352,354,361,363,365,366,369,374,377,385,386,387,393,394,396,397,398,399,403,408],"null":[161,166,317,322,323,324,325,326,327,362],"p\u00e9":385,"p\u00f3stuma":385,"p\u00fablico":385,"p\u3173\u1024\ufc06\ua332\u1ff7":301,"panader\u00eda":350,"pesco\u00e7o":385,"po\u00e7o":385,"princ\u00edpio":385,"public":[137,143,340,341,359,408],"r\ua1b7\ua1e4\u00e3\ua36d":301,"return":[7,13,14,16,23,25,26,28,29,30,31,32,33,34,35,36,38,39,41,42,43,44,47,48,49,50,51,52,53,55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,73,74,75,76,77,78,79,80,81,82,83,84,85,87,88,89,90,91,92,96,97,98,100,102,103,104,105,106,107,109,110,111,115,116,117,118,119,122,123,124,125,126,127,129,130,132,133,134,135,136,139,141,142,143,144,146,147,148,149,155,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,178,179,181,183,184,185,187,188,189,191,193,194,195,197,198,199,200,201,204,205,206,207,208,209,210,211,212,213,214,217,218,219,220,221,222,225,226,227,228,230,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,309,310,311,312,313,314,316,317,318,319,320,321,322,323,326,327,328,329,331,332,333,334,335,336,338,339,341,343,344,352,353,355,357,359,360,361,362,366,367,374,375,381,384,385,386,387,390,395,397,399,403,404,405,408],"s\u00e3o":[385,408],"s\u00faplice":385,"ser\u00e1":385,"short":[28,57,78,90,103,123,201,211,317,331,336,359,384],"sime\u00e3o":385,"speicherbeh\u00e4l":200,"speicherbeh\u00e4lt":200,"speicherbeh\u00e4ltern":200,"static":[28,32,36,44,52,77,79,83,87,111,117,118,123,128,129,143,159,161,164,168,186,200,211,226,292,304,316,325,333,335],"super":[350,391],"switch":[57,88,321,359,384,408],"t\u03bb\u0138\u01c2\u0aa4\u1189\u18b4\ua1fd\u1166":301,"throw":[37,274,356,359,395],"transient":310,"true":[13,14,27,29,30,32,33,34,36,37,38,43,44,46,49,52,57,58,59,60,62,64,65,67,68,70,71,74,78,85,88,89,91,97,98,99,100,102,104,105,109,111,115,116,117,118,119,122,123,125,126,127,129,132,134,135,139,144,159,160,161,162,163,164,170,171,173,176,178,179,180,181,183,184,185,188,189,193,194,195,200,202,203,206,211,212,214,216,217,219,221,224,226,227,228,274,293,294,297,300,301,302,303,304,305,308,310,311,312,313,314,316,317,318,320,332,334,335,336,339,341,343,349,351,352,353,354,355,356,357,359,361,362,363,364,365,366,369,370,371,374,375,376,377,378,379,380,382,384,386,389,390,394,396,399,400,401,403,404],"try":[7,57,64,74,122,124,126,130,132,139,155,227,327,350,355,359,363,364,365,366,370,374,375,378,390],"var":[13,16,25,118,178,183,350,351,366,374],"var\u00e3o":385,"while":[14,29,33,132,139,143,160,167,171,179,193,200,214,221,340,350,352,357,359,366,367,374,388,403,408],"x\u00f5e":385,"y\u1bc2":301,"z\u16a1\u01fd\u3117\u111f\ua152\ua375\u1c08\ua0fc\ufeac\u14b5\uff53":301,A:[0,2,3,7,8,10,13,14,16,18,22,23,25,26,28,29,30,31,32,33,34,35,36,38,39,41,42,43,44,52,53,55,57,58,59,60,61,62,63,64,65,67,68,69,70,71,72,73,74,77,78,79,80,82,83,84,85,86,87,89,91,95,96,97,98,100,101,102,103,104,105,106,107,109,110,111,113,114,115,116,117,118,119,122,123,124,125,126,127,129,132,133,141,142,143,144,145,146,147,148,157,158,159,161,162,164,166,167,168,169,170,171,173,174,175,176,177,178,179,180,181,183,184,185,187,188,189,193,194,195,197,198,199,200,201,202,203,204,205,206,209,210,211,212,213,214,215,216,217,218,219,220,221,223,226,227,228,293,294,295,296,299,300,302,303,304,305,306,310,311,312,316,317,319,320,322,323,324,325,326,327,329,331,333,334,336,338,341,343,344,346,348,351,352,354,357,358,359,360,361,363,364,365,366,367,371,374,375,378,381,382,384,385,387,388,389,391,396,397,398,399,400,403,408],AND:[57,82,128,185,188,227],AS:[57,82,98],AT:[209,212,219,359,362],And:[15,116,132,302,317,359,386,398],As:[28,57,74,129,132,145,167,179,214,317,322,352,359,363,366,374,375,403],At:[2,28,33,102,147,359,391,406],BE:221,BUT:391,BY:[57,82],But:[25,57,97,102,105,118,157,219,304,358,359,361,366,375,386,391,403],By:[39,52,57,65,68,74,97,105,106,111,116,117,118,122,132,133,139,176,228,297,303,305,318,334,352,354,359,361,374,399,403],For:[10,12,23,25,30,33,35,39,40,42,55,57,59,60,64,69,71,78,79,85,86,87,89,91,93,95,103,105,106,109,111,115,116,117,118,119,123,125,130,132,133,145,156,159,162,167,168,170,171,175,176,178,179,183,188,193,200,203,209,210,212,214,220,224,226,227,228,229,294,295,302,303,305,306,307,317,318,321,325,326,327,333,334,336,339,340,341,342,343,346,352,357,359,360,361,365,366,367,369,371,374,377,380,385,387,388,398,399,403,407,408],IN:[28,118,144,160,163,172,173,174,209,212,215,217,218,219,220,334,354,359,362,380,384,387,388,393,400,401,406],IS:[57,71,341,367],IT:397,If:[0,7,8,10,13,16,25,28,32,33,34,35,36,39,43,44,52,55,57,58,59,60,63,64,65,68,69,70,71,73,74,76,77,78,80,81,82,83,84,89,91,92,96,97,98,100,102,103,105,106,107,108,109,111,115,116,117,118,119,122,123,125,126,129,132,133,134,135,139,141,144,147,153,159,160,161,162,164,166,168,170,171,172,174,175,176,180,181,183,186,187,188,189,193,194,195,201,202,206,211,214,217,218,219,220,221,222,226,227,228,293,295,297,302,305,306,307,309,314,316,317,322,323,324,325,326,327,328,333,334,335,339,340,341,342,343,346,348,357,359,361,363,366,367,373,374,377,379,382,397,399,403,406,407],In:[8,14,25,28,30,33,35,36,39,43,52,57,71,83,84,90,98,102,117,118,119,129,132,135,139,141,142,145,146,147,159,164,167,168,170,171,175,176,179,189,200,214,219,226,228,299,300,303,304,305,307,316,317,318,320,322,323,324,325,326,328,331,332,333,334,336,338,346,348,352,355,358,359,360,361,362,363,364,365,366,367,374,378,385,387,388,390,399,403,404],Ine:107,Is:[36,117,174,181,188,290,311],It:[7,10,14,15,24,25,28,29,35,37,38,43,46,48,49,50,52,53,57,77,79,87,97,98,100,101,102,103,105,107,110,111,115,117,119,124,126,132,134,139,142,152,156,159,162,166,167,168,169,170,171,175,179,183,185,194,198,199,200,203,209,219,221,226,297,298,302,312,317,318,321,326,328,330,332,336,339,348,352,357,359,360,361,363,366,369,374,375,385,388,391,396,398,399,406,408],Its:[2,8,25,159,293,300,378],NOT:[71,185,188,193,221,330],No:[143,169,198,202,305,317,331,336,343,359,363,365,370,382,385,396],Not:[57,71,359,363],OF:358,ON:[57,82,358],OR:[125,128,185,188,224,227,317,336],Of:[24,66,208,358,363],On:[8,57,88,94,102,111,299,307,346,359,360,396,403],One:[91,102,132,133,145,334,366,374,389],Or:[357,396,403],Such:302,THE:358,THERE:358,TO:[37,212,359,384,387],That:[57,71,78,358,365,366,374],The:[0,2,7,8,10,14,15,16,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,38,39,41,42,43,46,48,49,50,51,52,53,55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,94,95,96,97,98,100,101,102,103,104,105,106,107,108,109,110,111,114,115,116,117,118,119,122,123,124,125,126,127,129,131,132,133,134,135,137,139,141,143,144,145,147,149,153,154,156,157,158,159,160,161,162,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,183,184,185,188,189,193,194,195,197,198,199,200,201,204,205,206,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,224,225,226,227,228,274,293,294,297,298,299,300,301,302,303,305,306,307,308,310,311,312,313,317,318,319,321,322,323,324,325,326,327,328,330,331,332,333,334,335,336,338,339,340,341,342,343,344,345,346,348,351,352,353,354,357,358,359,360,361,363,364,365,366,367,368,369,370,375,377,378,379,380,381,382,384,385,387,388,390,391,394,396,397,398,399,403,405,406,408,410],Their:396,Then:[36,102,119,310,321,324,359,384],There:[8,25,28,57,66,69,85,98,118,119,141,156,159,176,183,184,187,203,206,211,294,295,312,317,326,327,336,359,363,365,366,369,374,391,396,397,398,399,408],These:[18,28,30,33,46,55,57,59,60,68,71,77,79,87,90,104,105,109,111,115,117,132,134,142,145,157,167,176,179,206,211,214,229,295,302,306,315,340,352,354,359,366,367,380,399,407],To:[7,25,36,38,57,74,108,109,111,115,132,134,135,152,159,176,200,214,221,317,318,322,323,333,334,346,358,359,360,363,364,365,366,367,385,404],WITH:[221,359],Will:[24,132,134,408],With:[129,166,183,214,228,326,327,336,350,359,363,399,408],_:[13,57,97,98,160,174,195,300,301,359,362,370,380,388,397],__:[57,71,364],___:[160,364,399,400],____:[160,335,400],_____:[160,399,400],______:[160,400],_______:[160,400],________:[160,364,399,400],_________:[160,364,400],__________:[160,364,400],___________:[160,400],____________:364,_____________:160,______________:160,_______________:160,_________________:399,__________________:[399,400],____________________:400,_____________________:400,__________________________:[160,400],_____________________________:364,_______________________________:[364,400],____________________________________:364,______________________________________________:400,__a__:336,__adjectival_suffix:206,__callback:117,__canva:117,__children:117,__class__:108,__conson:206,__contains__:366,__delitem__:[366,399],__derivational_suffix:206,__dict__:[108,110],__digraph:206,__doc__:131,__double_conson:206,__drag_i:117,__drag_x:117,__draggabl:117,__eq__:[366,378],__file__:[141,385],__getattr__:131,__getitem__:[115,117,366],__init__:[13,14,15,16,23,27,28,29,32,33,35,37,38,39,40,42,43,44,48,49,50,51,52,53,57,58,59,60,61,62,63,64,65,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,97,98,100,101,102,103,105,106,107,108,109,111,113,115,116,117,118,119,122,123,124,125,126,127,128,129,131,132,133,134,135,137,139,141,143,144,146,153,158,159,160,161,162,163,164,166,167,168,169,170,171,172,173,174,175,176,178,179,180,181,182,183,184,185,187,188,193,195,198,199,200,201,202,203,204,205,206,211,212,213,214,215,217,218,219,220,221,226,227,228,293,297,299,300,302,303,304,305,306,307,308,309,310,314,316,317,322,323,324,325,326,327,333,334,335,338,341,342,343,359,361,366],__iter__:366,__len__:[52,366],__li_end:206,__long_vowel:206,__lt__:366,__name__:[110,384],__new__:[52,83,118,129,316],__nonzero__:115,__noun_suffix:206,__parent:117,__perfective_gerund_suffix:206,__press:117,__reflexive_suffix:206,__repr__:[52,129,354,361,366],__restricted_vowel:206,__run:374,__s_end:206,__setitem__:[115,117,366,399],__special_word:206,__st_end:206,__step0_suffix:206,__step1_suffix:206,__step1a_suffix:206,__step1b_suffix:206,__step2_suffix:206,__step2a_suffix:206,__step2b_suffix:206,__step3_suffix:206,__step3b_suffix:206,__step4_suffix:206,__step5_suffix:206,__step6_suffix:206,__step7_suffix:206,__step8_suffix:206,__step9_suffix:206,__str__:[129,159],__superlative_suffix:206,__updat:117,__verb_suffix:206,__vowel:206,_add_child_widget:117,_anim:399,_arg:143,_block_read:102,_bytes_written:361,_cach:102,_cl:[83,316],_cleartempl:212,_cmd:220,_context_to_tag:219,_conting:143,_counter:[177,363,364,365,371,374,378,382,389,390],_create_training_examples_arc_eag:173,_create_training_examples_arc_std:173,_cumulative_frequ:386,_datestamphasfourdigityear:397,_debug:28,_delta:33,_dev:90,_edg:159,_edge_to_cpl:159,_elementinterfac:[314,343],_end_index:169,_entiti:399,_eofpo:102,_epsilon:253,_estim:176,_evoked_:[57,71,367],_filepo:102,_fn:29,_fn_num:29,_fp:29,_fp_num:29,_gap:303,_get_featur:213,_grammar:[168,175],_head_index:169,_histori:[170,171],_hypothesi:333,_ident:214,_index:159,_init_colortag:117,_input:123,_intercept:176,_io:111,_item_repr:117,_jar:220,_kwarg:143,_lang_var:396,_languagespecificstemm:206,_len:102,_lh:119,_lx:221,_manag:117,_max_r:176,_minimal_:403,_minimum:403,_mlb:115,_mwe:300,_n:143,_neg:194,_nfmap:33,_normal:[57,61],_not_:[57,105,403],_num_leav:159,_package_to_column:111,_param:396,_pars:[57,59,61,75],_path:109,_piec:28,_read:123,_read_block:[57,59,75],_read_int_r:129,_read_number_valu:129,_read_word_block:359,_regex:297,_restart:159,_result:122,_rh:119,_row:115,_rule:28,_rule_tupl:202,_scandinavianstemm:206,_scc_:403,_sentenc:123,_separ:220,_sh:397,_slope:176,_span:403,_stage:28,_standardstemm:206,_start:28,_start_index:169,_str:28,_stream:102,_string_end_r:129,_string_start_r:129,_symbol:119,_tag:[57,59,61,75,117],_tagger:219,_tagword:221,_test:90,_tgrep_exprs_act:294,_token:159,_toknum:102,_tp:29,_tp_num:29,_trace:[28,168,175],_train:219,_tried_:170,_tried_m:170,_type:[57,71],_unload:403,_updat:117,_valu:[177,363,364,365,371,374,378,382,389,390],_var0:[350,351],_verifi:354,_willy65:396,_word:[57,59,61,75],_wordnetobject:105,a00:221,a1:[352,374],a2:[352,374],a3:[352,374],a4:374,a5:374,a6:374,a7:374,a8:374,a_:214,a_and_c:359,a_coru:388,a_dog:370,a_littl:300,a_little_bit:300,a_lot:300,a_man:370,a_man_walk:370,aa0:359,aa:[66,359,360,378],aaa:360,aaaaaaaaaaaa:317,aaaaaaaaaaaaaaa:317,aaaaaaaaaaaaaaaaa:317,aaaababaaccbacb:377,aaai:[67,89],aab:386,aabbbcccc:396,aachen:137,aacut:189,aaddvark:377,aal:359,aalii:359,aam:359,aamir:359,aani:359,aardvark:359,aardwolf:[57,105,208,359,403],aardwolv:[57,105,208,403],aaron:359,ab1:382,ab2:382,ab3:382,ab:[145,163,216,219,359,379,380,396],abaci:[57,105,208,403],abacu:[57,105,208,403],abagael:359,abagail:359,abainia:[198,199],abandon:[359,387],abat:387,abb:359,abbb:386,abbbb:386,abbbc:386,abbei:359,abbi:359,abbot:359,abbott:359,abbr:[302,335],abbrev:[57,71,302],abbrev_backoff:302,abbrev_typ:302,abbrevi:[57,71,189,216,302,335,374,400],abc:[52,109,111,133,296,359,361,366,396],abcabc:366,abcabcabcabcabcabcabcabcabcabca:366,abcabz:366,abcd:366,abcdef:396,abcdefghijklmnopqrstuvwxyz:155,abdelkrim:206,abdic:387,abdomin:349,abet:387,abhaya:328,abid:387,abk:360,abkh:[57,101],abkhaz:[57,101],abl:[15,109,200,204,212,219,340,399,403,405],abn:360,abnorm:[57,81,359,382],abolish:[57,81,359],abomin:[57,81,359],abort:[57,81,359],abounding_with:[57,71],about:[2,25,30,35,36,42,57,69,71,74,78,79,87,89,93,102,103,105,111,118,123,125,158,159,162,164,170,176,182,189,198,199,200,203,210,211,219,224,302,327,333,334,336,340,341,343,357,359,363,365,366,367,369,385,387,388,391,396,400,405,408],abov:[57,90,105,136,179,188,212,221,299,302,327,346,352,357,359,360,366,367,385,386,387,398,399,403],above_scor:357,abram:145,abri:385,abrom:145,absenc:194,absent:[168,393],absolut:[52,57,59,60,63,64,68,69,70,71,74,80,82,83,91,96,103,105,106,107,109,132,135,137,195,342,359,361],absolutediscount:137,absolutediscountinginterpol:[132,135],absorb_heat:[57,71,367],abspath:[57,59,359,361],abstractboxerdr:178,abstractcanvascontain:117,abstractccgcategori:13,abstractchartrul:[14,159,168],abstractcollocationfind:53,abstractcontainerwidget:117,abstracteat:375,abstractexpress:181,abstractlazysequ:[52,102],abstractparentedtre:334,abstractvariableexpress:[127,181,188],ac:[57,60,107,123,189,343,385],acabada:385,academ:0,academia:94,acc:[212,365,384],acceler:359,accent:359,accept:[33,57,59,98,115,119,130,174,335,341,359,361,373,405],access:[29,52,57,60,64,68,69,71,74,91,102,105,106,108,109,111,115,117,118,119,120,132,134,176,293,334,338,341,342,346,361,365,374,378,386,387,388,399,408],access_tim:403,access_token:[341,342],accessor:29,accident:[119,229],acclaim:359,accommod:[57,84,178],accompani:[324,325,326,342,359],accomplish:[167,382],accord:[36,53,118,119,137,167,174,179,185,189,211,214,307,317,321,326,327,334,340,352,359,373,383,388],accordingli:[322,323,324,327,346,386],account:[57,71,123,129,132,145,167,317,323,326,346,405],accru:359,accur:[105,179,336,352,404],accuraci:[26,29,43,52,147,193,210,212,221,224,299,304,307,336,380,385,391],acero:214,acet:359,aceto:359,acetochlor:359,aceton:359,achar:385,achiev:[24,200,304,370,403],acirc:189,acknowledg:117,acl2014:[57,104,317,348],acl:[62,92,304,317,328,336,408],aclantholog:[221,352],aclweb:[57,104,179,282,304,317,318,319,332,336,351],acm:[67,81,89,202,330,344,408],acolhuan:381,acontecendo:385,acoust:[30,359],acq:359,acquisit:46,across:[57,71,127,141,188,299,307,388],act:[25,28,52,57,71,79,87,102,115,118,179,183,352,359,366,391,405],action:[57,71,202,317,318,321,328,330,332,338,341,408],activ:[33,87,115,122,194,317,318,321,328,330,346,361,405,406,408],activest:115,activestyl:115,actual:[29,33,57,71,108,122,127,131,147,152,156,160,176,184,294,341,359,366,367,391,399],actual_pdist:176,acudir:385,acut:189,acycl:[343,403],acyclic_branches_depth_first:343,acyclic_breadth_first:343,acyclic_depth_first:[105,343],acyclic_dic2tre:343,acyclic_tre:[105,343],ad:[2,15,36,57,90,102,105,115,117,119,123,159,162,164,168,176,193,201,216,299,300,302,307,312,320,336,343,354,359,365,374,386,403,408],adam:[177,359,374,390],adapt:[71,297,298,335],add:[7,14,15,25,28,33,37,52,57,68,71,74,78,89,109,115,116,117,122,123,125,132,135,139,144,153,159,161,162,164,168,169,170,179,183,188,191,193,294,300,307,312,313,314,316,317,320,321,326,327,333,343,352,354,359,363,369,374,382,384,386,390,395,399,403,408],add_access_token:342,add_arc:161,add_assumpt:[122,374],add_background:[123,363],add_blank_lin:314,add_callback:117,add_child:117,add_default_field:314,add_feat_extractor:[193,391],add_lin:89,add_log:176,add_missing_depend:184,add_mw:300,add_nod:161,add_ortho_context:302,add_py3_data:54,add_root:[57,78],add_sent:[123,363],add_widget:117,addit:[52,57,62,88,98,105,109,117,118,123,129,132,135,139,166,168,169,176,179,188,193,200,201,214,221,226,293,299,302,312,316,320,326,336,343,352,359,361,366,378,388,396,399,403,408],addition:[55,57,105,129,141,189,203,211,339,359],additional_java_arg:166,additional_test:230,address1:365,address2:365,address:[161,167,346,359,362,365],adj:[28,57,77,105,167,209,213,216,349,353,357,359,385,403],adj_sat:[57,105],adjac:[154,159,169,335,380],adjd:350,adject:[28,57,71,105,199,208,212,216,219,325,367,403,408],adji:350,adjourn:359,adjp:[400,401],adjud:212,adjunct:374,adjust:[117,142,201,386,391],adjut:87,administr:[86,346],administra:385,admir:359,admiss:[341,365],admit:[299,359],adopt:[353,359,365,388,408],adp:[77,216,357,359],adposit:216,adrian:107,adv:[57,71,77,105,209,216,349,353,357,359,365,367,385,403],advanc:[25,145,188,259,346],advantag:[39,42,131],advd:350,adventur:[359,386],adverb:[57,71,105,208,212,216,219,367],advi:[204,350],advic:334,advis:[132,134,204,359],advmod:160,advp:401,adwait:86,ae1:[66,359],ae:[66,359],ae_kappa:141,aeioui:299,aelig:189,aer:329,aesthetic:381,afecto:385,afenegus6:[57,101],affect:[29,117,377,396],affirm:194,affix:[57,105,196,197,198,199,204,205,206,219,394],affix_length:219,affixtagg:219,afournei:377,africa:[179,352,388],african:388,after:[25,28,33,37,51,57,71,102,111,116,117,119,123,129,132,134,139,146,154,159,162,168,175,176,185,194,199,206,211,224,254,302,306,312,313,317,343,359,360,363,369,396,397,399,403,407],afternoon:24,ag:[57,64,117,353,359,365],again:[7,118,132,259,341,363,366,374,396,403],against:[7,26,29,57,71,144,147,170,181,188,203,210,365,367,390,398],agarw:328,age_year:[57,64],agenda:[128,159,164,184],agent:387,agglom:[46,49],aggreg:[188,224,321],agnew:384,agnost:[299,307,318],ago:353,agor:385,agr:[141,365,366,384,393,394],agra:[57,101],agrav:189,agre:[57,105,141,226,394,403],agreement:[140,145,259,373,408],agreestat:259,aguirr:[57,64],ah0:[66,359],ah:[66,349],ahc:370,ahd:107,ahead:[333,408],ai:[202,217],aid:[57,59,152],ailment:[57,71],aim:[173,196],ain:195,ainda:385,aint:195,air:153,aircraft:405,airspe:[160,215,218,220],aj0:349,aka:[168,317,320,321,329,330,343,387],akin:377,al:[57,71,103,104,214,299,307,317,321,326,332,335,353,359,369,398,403],ala:396,alavi:328,albania:[179,352],albb:301,albert:141,albufeira:385,alcorc:388,alcuin:107,aldjfalskfjaldsf:380,aldjfalssjjlldss:380,alefsym:189,alegava:385,alert:[57,62,129,359,391],alex:[321,365],alexand:[107,200],alexi:410,alfr:107,alg_opt:173,algarv:385,algarvia:385,algarvio:385,algasai:408,algeria:[198,199],algier:198,algnsent:316,algorithm:[2,33,35,37,38,42,46,48,50,57,86,104,133,137,142,146,154,159,162,164,167,169,173,175,176,185,196,198,199,201,202,203,206,213,214,221,228,247,274,299,302,307,310,320,322,323,324,325,326,327,331,332,334,335,336,343,344,355,366,384,396,408],alguma:385,algun:385,alia:[33,57,62,67,81,83,85,88,89,100,132,134,316],alias:[118,188,366],alibaba:301,alien:[132,134,139],alif:[198,199],alifmaqsura:[198,199],align:[57,60,104,117,142,145,247,283,316,319,320,322,323,324,325,326,327,328,329,331,332,348,396,408],align_al:[322,323],align_block:319,align_log_prob:319,align_text:319,align_token:[312,313],align_word:328,aligned_s:[57,58,316,398],alignedcorpusread:[57,58],aligneds:[57,58,316,322,323,324,325,326,327,398,408],alignedsent_block_read:[57,58],alignedsentcorpusview:58,alignment_error_r:[329,398],alignment_info:[322,323,324,325,326,327],alignment_t:[323,324,325,326],alignmentinfo:[326,327],alik:406,alin:[140,247,408],all12:370,all:[8,15,16,20,22,28,29,32,33,34,35,38,42,43,44,46,51,52,53,57,59,60,62,63,64,66,67,68,69,70,71,72,74,76,77,79,80,81,82,83,84,87,88,89,91,96,98,103,105,106,107,109,111,113,115,116,117,118,119,123,125,127,128,129,131,132,133,134,135,141,142,143,159,160,161,162,164,165,167,169,170,171,175,176,178,179,180,181,183,185,187,188,193,194,195,198,200,201,203,209,211,214,216,217,219,220,221,224,226,227,228,229,274,293,294,299,302,303,305,306,314,317,320,321,322,323,324,325,326,327,328,330,331,333,334,336,340,343,346,352,355,357,359,361,363,364,365,366,367,368,370,371,373,374,375,377,378,382,384,385,387,389,390,392,395,396,398,399,406,408],all_express:125,all_inst:194,all_lemma_nam:[57,105,403],all_list:188,all_mptre:399,all_phrases_from:333,all_ptre:399,all_senti_synset:[57,93,392],all_synset:[57,105,403],all_word:[193,391],all_words_neg:391,allcap:391,allcap_differenti:195,allcat:359,allen:396,allexpress:[188,374],allianc:359,alloc:[46,50,176],allon:357,allot:159,allow:[2,10,13,33,46,49,57,62,68,71,105,108,109,115,118,119,129,131,132,133,139,145,155,159,164,167,168,169,170,171,177,179,188,214,228,302,305,318,321,334,336,339,341,343,352,357,359,363,364,365,366,374,378,388,396,399,408],allow_step:129,allowed_typ:188,alltempl:228,almost:[38,43,57,105,132,195],alon:[328,357],along:[2,57,71,79,87,176,295,316,367,387,406,408],alongsid:406,alph:155,alpha:[29,57,82,132,135,141,147,159,164,181,188,189,195,214,317,328,332,408],alpha_0:214,alpha_convert:[181,188,378],alpha_gamma:[133,137],alpha_t:214,alphabet:[57,81,117,155,184,214,299,302,303,307,359,366],alphanumer:[118,366],alpharetta:357,alpino:[57,61,111],alpinocorpusread:[57,61],alreadi:[10,28,57,59,103,109,111,115,117,132,141,158,159,164,166,167,172,176,183,188,193,206,219,228,300,312,326,361,363,366,369,370,374,385,396,399,407],also:[10,13,14,25,28,29,30,33,36,37,43,46,53,57,71,74,89,105,109,110,111,117,118,119,123,130,132,134,135,139,141,143,145,156,157,158,159,161,162,171,173,174,176,179,181,183,200,201,206,209,211,214,224,228,280,293,295,301,302,303,305,310,313,316,325,331,332,334,336,341,343,352,357,358,359,361,363,364,365,366,367,369,374,375,378,385,387,388,390,396,399,403,408],also_se:[105,343,403],alter:200,altern:[66,111,129,136,169,176,206,226,228,305,335,357,359,361,364,374,378,384,407],although:[102,115,302,336,355,359,369,388,396],altlabel:179,aludir:385,alum:359,alumiar:385,alun:205,aluno:205,alva:304,alwai:[7,26,33,57,59,71,102,105,117,122,132,135,176,188,193,203,226,294,303,317,318,321,325,326,328,330,332,334,336,403],alwayson_featur:[33,355],am:[24,57,89,357,358,359],amahuaca:[57,101],amalgam:359,amaz:406,amazingli:195,amazon:[89,301,359],ambigu:[30,92,123,156,180,196,201,344,359,363,384,405],ambiguous_word:344,american:[98,145,160,301,308],americana:385,amhar:[57,101],amigo:385,ammi:357,amod:[160,163,172,362],among:[98,328,332,357,359],amongst:169,amor:385,amount:[38,111,116,117,141,167,176,224,333,336,341,359,385],amp:[57,74,189,311,313,388],amperc:311,amr:98,amus:[214,359],amz:301,an:[0,7,10,13,14,15,16,24,25,28,33,34,35,36,37,38,39,41,42,44,46,48,51,52,53,55,57,58,59,60,65,66,67,68,69,70,71,73,74,77,78,79,82,84,87,90,91,92,93,95,98,102,103,105,106,107,108,109,110,111,115,117,118,119,123,125,127,129,131,132,133,135,136,137,139,141,142,143,144,145,146,147,148,149,151,152,157,158,159,160,161,162,164,165,166,167,168,169,170,171,173,174,175,176,178,179,181,183,184,185,187,188,189,194,195,199,200,202,203,206,209,211,212,213,214,215,217,218,219,220,221,222,226,227,228,274,293,294,295,299,300,301,302,304,305,310,313,314,316,317,318,319,321,322,323,324,325,326,327,328,329,330,331,333,334,335,336,338,340,341,342,343,344,346,348,351,352,354,357,358,359,360,361,362,363,364,365,366,367,368,374,375,377,378,382,384,386,387,388,390,391,395,396,397,398,399,403,404,405,406,408],ana:350,anaconda:407,analog:[43,352],analys:[145,293],analysi:[62,89,142,145,146,173,192,193,194,195,200,293,310,359,408],analyst:388,analyt:[176,355],analyz:[57,71,74,81,88,154,177,367,406],anaphor:178,anaphora:123,anaphoraresolutionexcept:[181,363,364],anc:[57,71],ancestor:[57,105,117,294,334,403],ancestri:106,anchor:117,and_list:188,andexpress:[185,188],andiron:403,andr:131,andrea:335,andreasvc:335,anduru:385,ang:189,angel:[388,396],angl:[28,51,293],anglo:107,angrili:160,angu:[363,378],anhalt:160,ani:[7,24,25,28,29,31,32,33,34,35,39,51,53,57,58,59,60,63,64,65,68,69,70,71,73,74,80,82,83,84,85,89,91,92,96,98,102,103,105,106,107,108,109,110,111,115,116,117,118,119,122,123,129,132,134,141,143,144,145,148,159,161,162,164,167,175,176,183,187,188,189,198,199,200,204,211,212,214,215,219,224,226,227,228,293,299,303,305,306,307,317,328,333,334,336,343,348,359,361,365,366,374,377,378,379,385,386,390,391,396,397,398,399,403],anim:[8,20,57,105,343,359,382,385,399,403],animal_sound:359,ann:[195,389],ann_morphosyntax:78,ann_segment:78,annan:388,anni:107,annie_lennox:388,annot:[39,46,57,64,67,68,71,77,79,87,89,90,94,107,141,145,213,302,334,336,359,369,387,408],annotationset:[57,71,367],annotationtask:141,announc:[334,406],annual:344,ano:[359,385],anonym:359,anoth:[29,57,71,118,119,145,147,173,179,188,202,203,214,219,227,228,324,325,326,332,359,363,370,374,384,385,391,397,398,399,403],ans_typ:178,ansi:335,answer:[24,127,129,352,355,366],answer_kei:127,ant:385,anteced:187,antholog:[57,104,179,282,304,317,318,319,332,336,351],anti:359,antiga:385,antonym:[105,403],antwerpen:388,any_typ:378,anyon:[130,160,350,359],anyth:[25,117,122,125,127,187,294,364,382,403],anytyp:188,anywai:132,anywher:[106,386],ao2:359,ao:[66,141,205,359,385],aoaeoeoe:380,ap:[57,82,352,359,385,388],apareceu:385,apart:[127,346],apelido:385,apha:259,api:[12,14,25,27,28,30,32,33,35,38,39,44,46,51,57,58,61,62,63,65,66,67,68,69,70,72,73,74,75,76,79,82,84,85,86,87,88,89,92,93,94,95,96,97,98,99,100,104,105,106,107,108,121,124,125,126,127,128,132,135,137,156,158,159,160,166,168,170,171,172,173,175,181,188,196,198,199,200,201,202,203,204,205,206,209,211,213,214,215,217,219,220,221,228,295,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,315,328,337,339,340,341,359,374,406,408],api_cal:160,api_vers:341,apo:[57,71,313],apolloniu:107,apostroph:42,app:[0,16,118,128,342,359,363,366],app_kei:[341,342],app_secret:[341,342],appal:381,appeal:[162,359],appear:[8,53,57,61,64,76,89,103,105,106,111,119,128,143,147,153,194,206,211,228,293,302,303,311,312,325,326,332,334,357,359,369,386,388,408],append:[57,74,109,115,118,141,169,179,186,194,322,323,324,325,326,333,334,342,359,361,369,382,397,399],append_no_spac:[57,74],append_prop:125,append_sig:125,append_spac:[57,74],appendix:326,appli:[2,7,8,14,15,25,28,30,31,39,43,44,52,53,57,59,71,77,79,84,87,89,106,115,118,125,136,141,145,154,157,159,162,164,168,171,173,175,176,180,187,188,193,194,210,211,214,218,220,226,227,228,296,298,299,302,307,317,327,328,333,334,335,357,359,361,364,366,370,378,380,391],applic:[1,7,10,13,14,15,16,52,69,129,159,170,187,188,199,211,214,227,228,302,324,335,341,351,374,408],applicable_rul:228,applicationexpress:[181,185,187,188,374,378],applicationruleset:350,apply_everywher:159,apply_featur:[43,193,391],apply_freq_filt:357,apply_heat:[57,71,367],apply_ngram_filt:357,apply_rul:205,apply_word_filt:357,applyto:[181,184,187,188,370],appo:160,appoint:[357,359],appraoch:89,approach:[46,57,100,193,194,302,324,325,326,335,363],appropri:[30,31,32,33,35,44,48,49,50,51,53,57,60,71,78,91,102,106,109,117,176,181,188,203,210,211,214,219,220,293,319,338,346,359,364,366,367,374,391],approv:359,approxim:[57,71,147,176,195,213,333,343,367],approxrand:147,april:[72,338,341,408],apuleiu:107,apurva:321,apw19980314:359,apw_19980314:[72,359],apw_19980424:[72,359],apw_19980429:[72,359],aquel:[359,385],aquela:[359,385],aquilo:385,ar:[2,7,8,10,13,14,15,20,24,25,26,28,29,30,31,33,34,35,36,37,39,42,43,44,46,48,50,52,53,55,57,58,59,60,61,62,64,65,66,68,69,71,72,73,74,77,78,79,80,81,82,84,85,87,89,90,96,97,98,102,103,104,105,106,109,111,115,116,117,118,119,122,123,125,127,129,131,132,133,134,135,139,141,142,143,145,146,147,148,149,153,154,156,157,158,159,160,161,162,164,167,168,169,170,171,173,174,175,176,177,178,179,180,181,183,185,187,188,189,193,194,195,198,199,203,206,208,209,210,211,214,215,217,218,219,221,224,227,228,229,253,274,276,293,294,295,297,299,302,303,304,305,306,309,310,312,313,314,315,316,317,320,321,322,323,324,325,326,327,328,330,331,332,333,334,335,336,339,340,341,343,346,349,350,351,352,353,355,357,358,359,360,361,363,365,366,367,369,371,373,374,380,382,384,385,386,387,388,390,391,394,395,396,397,398,399,403,405,408,410],arab:[57,101,198,199,201,206,274,359,394,408],arabicstemm:206,aram:357,arb:403,arbitrari:[46,50,130,179],arbitrarili:[46,48,366],arbor:195,arc:[161,167,169,173],arc_eag:173,arc_standard:173,arceag:173,archiv:[57,60,107,129,365,370],arcstd:173,area:[117,319,374,408],aren:195,arent:195,arff:44,arff_formatt:44,arg0:[79,87],arg1:387,arg3:387,arg4:387,arg:[13,15,34,40,41,57,61,71,81,83,84,85,97,108,109,110,111,113,115,116,117,118,122,127,129,132,135,157,160,168,172,176,182,183,184,188,220,224,226,228,293,317,343,387],argid:[68,79,87,387],argloc:[79,87,387],argm:[79,87,387],argmax:221,argnam:110,argspan:68,argu:[299,391],argument:[13,15,17,29,33,34,41,52,55,57,59,60,61,68,71,77,78,79,84,85,87,91,97,102,105,106,108,110,111,115,117,119,122,124,126,129,132,133,135,136,141,143,147,149,161,164,166,168,176,179,181,183,187,188,193,202,203,214,219,226,294,302,303,305,314,334,338,341,343,351,352,353,357,358,361,363,364,366,371,374,378,386,387,396,399,403,404],argument_indic:187,argument_pair:124,ari:[183,206],aristid:6,aristotl:107,arithmet:330,ariti:[179,183,352],arity_parse_demo:169,arlington:388,arlstem2:196,arlstem:[196,199,408],arm:316,armenian:[57,101],armi:[98,317,321,328,330,332],aromat:357,aronoff:[299,307],around:[28,38,52,57,71,90,98,117,129,167,193,293,359,363,367,391,396],arpa:86,arr:214,arrai:[33,34,38,41,46,48,141,310,341,358],arraign:[57,71,367],arrancar:396,arrang:[117,155,405],arrest:[57,71,367,388],arriv:396,arrombar:385,arrow:[113,118,297,396],art:[22,51,57,71,334,335,367,385,388,389,399],arthur:406,articl:[57,71,152,212,219,350,359,367,408],artifact:[57,71,367],artifici:[67,89,125,198,224,334,336,401],artstein:141,artstein_poesio_exampl:141,arxiv:[216,321],asa:385,asbesto:359,ascend:[115,118,327,333,334,341],ascent:317,ascii:[51,57,71,105,334,335,359,361,399],ash:405,ashteroth:357,asian:332,ask:[24,359],asleep:[57,71,367],aspect:[30,87,89,117,199,359,387],aspen:[95,359],ass:195,assem:206,assembl:[152,359,388],assert:[25,145,311,354,359,366,387,399],assertionerror:[351,395],assertnexttoken:188,asserttoken:188,assess:[214,327,359,398],assi:385,assign:[29,32,34,35,36,46,47,49,51,57,66,74,93,118,123,141,145,161,167,176,177,179,183,191,209,210,211,214,219,221,307,310,318,328,352,353,357,359,360,363,365,366,374,390,399],assign_clust:[47,49,51],assign_valu:307,assim:385,assinado:385,assist:408,assoc_measur:193,associ:[53,57,72,94,98,102,105,108,117,119,123,140,145,148,156,159,160,164,168,176,179,183,184,193,221,253,334,352,359,408],assum:[28,35,36,44,57,58,78,85,97,100,109,118,122,132,133,139,159,161,164,167,169,176,177,179,183,193,214,299,305,312,317,324,325,326,327,331,338,346,352,359,363,365,366,388,407],assuming_posit:359,assumpt:[15,35,122,123,124,125,126,127,128,214,312,340,374],astound:[334,391],asymp:189,at0:349,ate:[66,372,384],athen:[179,352,359],ati:384,atild:189,ating:385,atis_sent:384,atlanta:[219,396],atmospher:359,atom:[95,127,128,169,178,183,187,188,189,365,390],atomicexpress:[127,187],att:173,attach:[57,86,163,359],attack:388,attempt:[25,109,117,118,122,127,129,155,158,171,176,187,188,309,313,334,343,346,359,365,374,395],attempt_adjunct:[178,188],attempt_applicationexpress:[187,188],attempt_booleanexpress:188,attempt_equalityexpress:188,attent:374,attest:[33,43],attested_label:43,attr:[57,82,105,116,343],attrdict:[57,71],attrib:[116,117,129,182,359,387],attribut:[57,60,71,90,91,94,96,105,108,110,115,116,117,131,132,135,159,179,203,299,307,343,352,359,361,365,367,370,397,403,404],attributeerror:[359,397,403],attrnam:[57,71],au:331,audio:[57,98],audiodata:[57,98],aug:81,augment:[57,60,79,87,91,115,179,382],augparsecategori:16,august:[67,88,408],augustin:107,aujourd:396,aumann:408,aumento:359,auml:189,ausgefeil:200,ausgefeilt:200,ausgefeilter:200,austen:359,austin:410,australia:410,autarquia:385,auth_endpoint:341,authent:[341,342,343,346],author:[30,57,60,61,62,81,86,111,131,206,341,343,365,367,370],authorit:357,auto:[43,109,129,188,350,361],auto_format:[109,361],auto_reweigh:317,autobahn:206,autobahnen:206,autom:[374,385,391],automat:[7,8,18,39,57,59,60,63,64,68,69,70,71,74,80,82,83,91,96,102,103,105,106,107,109,111,117,131,132,158,160,166,172,179,181,188,209,215,218,219,221,317,318,328,330,332,334,335,344,353,361,366,385,399],autostep:7,autun:107,aux:[160,353,365,401],av0:349,av:385,avail:[0,7,8,33,46,57,64,79,87,94,98,103,111,118,151,161,170,171,173,179,200,206,209,211,224,301,303,305,306,312,336,340,352,357,359,366,384,385,387,406,408],available_categori:[57,104],available_lang:[57,104],averag:[20,33,46,49,141,147,148,176,217,310,317,318,321,325,327,330,332],average_charact:319,average_weight:217,averagedperceptron:[217,408],avg_ao:141,avistar:385,avoid:[25,43,50,52,57,71,105,106,118,132,161,176,181,188,317,359,396,407],avoid_empty_clust:50,aw:66,awai:37,awar:[57,71,297,367],awb:[359,361],awfulli:195,awkwardli:294,ax:[46,176,359,408],axelrod:320,axi:405,ay:66,ayer:350,az:[57,101],azerbaijani:359,azeri_azerbaijani_cyril:[57,101],azeri_azerbaijani_latin:[57,101],b1:[119,164,177,365,390],b2:[164,177,365,390],b3:164,b:[7,15,33,39,42,52,57,66,71,87,104,107,118,119,123,127,129,132,133,134,139,141,145,147,159,173,176,179,183,187,212,218,227,298,300,301,305,306,310,312,316,318,326,331,336,343,350,351,354,355,359,361,364,365,366,370,374,375,377,378,381,382,383,384,386,388,389,396,397,399,403],b_decr:195,b_graph:167,b_i:214,b_incr:195,b_of:179,ba:[98,145],babavam:385,babelfish:[150,408],babelize_shel:151,bach:302,bachelor:98,back:[25,52,60,78,91,102,176,219,320,332,334,396,401],background0:374,background:[57,71,115,123,361,367],backlink:319,backoff:[133,137,212,219,377,385,395],backslash:[305,397],backtrac:145,backtrack:[7,109,170,213],backward:[14,15,105,118,214,228,320,390,403],backwardbxconstraint:15,backwardcombin:15,backwardonli:15,backwardsxconstraint:15,backwardtconstraint:15,backwardtyperaiserul:14,bacon:351,bacuth:357,bad:[145,194,195,209,328,354,361,366,391,403],bad_person:403,badeater1:375,badeater2:375,badidentifi:359,badscor:[57,76],bag:37,bai:[391,408],baikalfinansgroup:90,bailei:396,bake:[57,71,367],baker:[57,103],balanc:[94,354,391],baldridg:408,ball:36,ballison:343,baltic:[57,101],bam:380,banal:391,bangkok:352,bangla:[73,408],banish:359,bank:[344,405],banker:405,banner:388,bar:[46,60,71,78,91,106,129,307,354,357,361,366,377,386,396,408],barcelona:352,bare:[195,199,222,294,359],bark:[57,61,361,363,364,365,378,390,403],barlei:359,bartlett:[299,307,310],base:[3,8,13,14,15,16,23,24,25,26,27,28,29,31,32,33,34,35,36,37,38,39,40,41,42,43,44,47,48,49,50,51,52,53,55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,111,113,115,116,117,118,119,122,123,124,125,126,127,128,129,130,131,132,133,134,135,137,138,139,141,143,144,145,146,152,153,157,158,159,160,161,162,163,164,166,167,168,169,170,171,172,173,174,175,176,178,179,180,181,182,183,184,185,186,187,188,191,193,195,197,198,199,200,201,202,203,204,205,206,208,209,210,211,212,213,214,215,217,218,219,220,221,223,226,227,228,245,246,248,250,251,254,257,259,264,265,266,267,268,269,273,274,276,280,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,316,317,319,322,323,324,325,326,327,328,330,332,333,334,335,338,339,341,342,343,352,353,359,361,362,364,366,367,375,377,378,385,388,396,398,403,404,408],base_fdist:176,base_path:39,base_url:111,baseform:[57,79,87],baselin:[148,212,224],baseline_backoff_tagg:224,baseline_data:212,basemodelbuildercommand:[122,124],basengrammodel:[132,135],basenji:403,baseprovercommand:[122,126,127,128],basetheoremtoolcommand:122,basewidget:117,basi:[32,57,79,87,111,359,367,387],basic:[46,47,57,71,118,119,123,159,173,194,201,206,221,224,276,297,334,359,366,367],basic_multilingual_plan:313,basic_sent_chop:221,basictweethandl:338,basictyp:188,basqu:408,batch:[38,191],batch_eat:[129,375],batch_tag:211,batch_tag_increment:[211,212],bater:385,batteri:[57,88,359,407],battl:359,baum:214,bay:[35,36,38,194,355],baz:[60,78,91,106,366,377],bb1:202,bb:378,bbccc:386,bbox:[115,117],bcc:386,bccd:386,bdfoi:[57,104],bdquo:189,beam:[168,221],beam_siz:[168,174,384],beam_threshold:333,bearer:341,beat:396,beatti:391,beauti:[217,391,395],bebida:350,becam:[357,388],becaus:[33,57,71,77,78,105,106,109,115,118,122,132,135,139,145,172,173,201,206,219,294,306,312,317,321,324,325,326,327,332,333,350,351,359,366,377,403],becom:[50,95,118,168,336,361,365,366],bede:107,bedz:[359,396],bee:204,beeferman:148,been:[7,8,29,35,57,59,90,100,102,109,115,117,118,123,127,129,132,133,139,141,142,145,146,158,159,166,167,168,170,171,172,175,176,179,189,193,194,201,206,214,221,228,251,300,302,311,312,326,333,338,339,341,346,351,352,358,359,361,365,366,374,385,391,396,398,403,406,408],beer:357,befor:[7,25,30,34,48,50,108,109,111,117,118,122,124,125,126,129,132,144,149,159,160,162,166,168,170,171,172,176,194,206,213,219,221,254,264,292,302,306,307,314,317,319,326,335,341,343,354,359,361,363,365,366,384,385,386,387,397,399,407,408],beforehand:[57,71,367],beg_word:78,began:345,begin:[10,28,57,58,102,109,116,118,129,159,168,170,171,174,198,199,201,214,299,302,310,334,359,385,387,395,397],begsten:148,behav:[115,166,183],behavior:[2,28,50,57,102,105,305,359,361,403,404,405],behaviour:[203,317,339],behind:[93,117,392],being:[28,29,46,48,53,57,71,105,109,132,143,167,168,170,176,187,188,193,200,214,219,221,228,299,316,317,318,319,321,326,328,330,332,333,340,343,359,365,391,395,403,408],being_dress:359,beirut:388,believ:[353,378,389,403],bell:[132,135,137,176,377],belles_lettr:[359,386],bellingham:408,bellring:367,belong:[31,48,51,57,71,89,105,167,226,327,339,359,388],below:[34,57,66,105,110,119,132,139,206,224,317,321,326,332,333,339,341,346,357,359,365,366,367,385,388],ben:[357,396],benchmark:[299,307],bend:359,bender:276,benedictin:107,benefit:148,bennet:141,bennett:141,bentho:403,benzahia:206,berger:148,berkelei:[57,71,299],berlin:[352,369],bernoulli:[34,355],bernoullinb:355,beroep:388,berri:359,besid:405,best:[26,38,57,64,82,167,168,193,200,203,217,221,304,322,323,324,326,327,328,332,333,359,367,386,396,406,408],best_binary_stump:32,best_incoming_arc:167,best_model2_align:327,best_path:214,best_path_simpl:214,best_stump:32,beta:[119,159,164,181,188,189,214,318,328,330,332,408],beta_0:214,beta_t:214,bethard:408,better:[14,24,37,57,68,151,200,212,221,228,293,375,394,408],betteroffout:359,betti:[177,390],between:[23,25,28,37,42,50,51,52,53,57,71,74,85,89,102,105,116,117,118,119,123,141,142,144,145,147,149,164,169,176,189,194,195,212,214,216,224,247,293,300,303,310,316,317,319,321,325,328,329,332,333,334,335,336,341,359,365,366,367,369,384,388,398,403,405],beyond:[123,141,298,336,359],bez:359,bf:403,bfg:[33,213],bg:[115,363],bhojpuri:[57,101],bi:221,bia:[34,48],bias:29,bib:[57,59,105],bibl:403,bicondit:188,bidirect:[220,293,320],big:[25,185,209,294,322,323,324,325,326,359,364,403],biggest:[57,62,359],bigram:[53,132,134,143,193,194,302,317,343,357,380,385],bigram_collocation_feat:193,bigram_fd:[53,357],bigram_find:53,bigram_measur:[53,357],bigram_score_fn:143,bigramassocmeasur:[143,193,194,357,380],bigramcollocationfind:[53,357],bigramtagg:[219,385],bilingu:[317,319,332],bill:[90,125,145,359,382],billi:[145,382],billion:[57,71,90,212,359,367,369],bin:[34,41,129,176,178,214,369,374,386,388,408],bin_dir:178,binar:[119,336],binari:[14,15,25,27,32,33,34,36,39,41,43,119,126,129,141,167,174,179,183,188,215,304,343,352,355,374],binaris:119,binary_concept:179,binary_dist:[141,145,380],binary_loc:[126,374],binary_nam:129,binary_names_demo_featur:43,binary_search_fil:343,binary_stump:32,binarycombinatorrul:14,binaryexpress:[181,188],binarymaxentfeatureencod:33,bind:[115,116,117,118,127,164,178,180,187,188,294],bind_click:117,bind_click_leav:116,bind_click_nod:116,bind_click_tre:116,bind_drag:117,bind_drag_leav:116,bind_drag_nod:116,bind_drag_tre:116,bind_to_column:115,bind_to_label:115,bind_to_listbox:115,binder:[181,188],binding_list:127,binding_op:[188,378],bindingdict:[127,187,370],bindingexcept:127,bindop:[180,365],bing:[67,81,88,89],binomi:343,binomial_coeffici:343,binop:188,bio:218,bio_to_chunk:218,biographi:160,birch:320,bird:[0,125,365,370,382,406,408,410],birth:[57,71,98,353],birthdai:[57,71,353,367],birthdat:[98,359],biserr:381,bisneto:385,bit:[57,71,98,224,294,300,335,349,401],bitext:[322,323,324,325,326],bitstr:86,bitt:398,bkfiubydz2:396,black:[98,408],blackboard:[159,162],blackburn:[123,185,378],blackman:310,blah:386,blanch:[57,71,367],blanchard:377,blank:[57,65,68,73,81,85,97,104,155,303,306,314,359],blank_befor:314,blanklin:[212,306],blankline_token:303,blanklinetoken:[194,303],blanks_befor:314,blanks_between:314,bleibt:331,bleu:[282,301,317,318,321,330,332,347,408],bleu_scor:[315,321,348],blickl:107,blico:385,blili:145,blindado:385,blk:98,bllip:[156,408],bllippars:158,blob:[206,229,301,311,313,408],block:[57,58,59,61,65,70,73,74,75,81,84,85,89,92,98,101,102,129,183,310,319,359],block_comparison:310,block_read:[57,58,63,65,70,73,74,81,84,92,97,102,359],block_siz:[102,359],blog:[217,396],blogspot:408,bloomer:403,blow:359,blue:[7,160,308,335],blur:359,bn:119,bnc:[57,71,91,347,408],bnccorpusread:[57,60,349],bncsentenc:60,bncwordview:60,bo:[62,123,178,180,185,365,378],boa:385,board:[163,359,362],boat:398,bob:[332,366,384,389],boban:388,boca:385,bod:[57,71],bodenstab:408,bodi:[77,404,405],bodlei:107,body_internal_mot:359,body_internal_st:359,body_system:[57,71],boethiu:107,boi:[57,62,177,332,359,363,364,365,384,390],boil:[57,71,132,367],boisen:408,bold:115,boll:391,bolli:145,bom:397,bomb:[195,388],bombai:[73,352],bone:[358,408],book1:123,book:[0,57,77,105,151,152,173,209,251,254,293,295,317,321,322,323,324,325,326,332,346,351,359,365,370,378,385,391,396,398,403,406,408],book_1:406,book_grammar:[352,361,363,364,365,373,384,390],bookstein:148,bool:[13,14,33,37,57,105,109,114,115,117,118,119,122,124,127,145,148,159,164,170,171,174,176,178,179,181,183,187,188,191,200,206,211,212,214,221,224,226,227,228,293,294,295,297,303,313,314,317,318,334,336,341,343,378],boolean_op:[188,378],booleanexpress:[181,188],booster:391,booster_dict:195,bop:359,borba:385,borboleta:385,borboletinha:385,border:[179,352],borderwidth:115,bore:[93,392,396],borel:[42,69],boskov:388,bot:22,both:[25,28,31,36,37,39,53,55,57,68,71,84,90,91,105,109,111,115,117,118,119,122,126,132,133,137,149,159,169,173,176,187,188,194,198,199,201,210,214,221,224,294,296,299,302,310,317,333,334,354,359,361,366,367,374,384,386,387,391,399],both_most_common:386,bothbackward:15,bothforward:15,bothord:334,bottl:160,bottleneck:[46,361],bottom:[2,115,116,117,159,168,169,171,175,335,384],bottomupchartpars:[159,384],bottomupleftcornerchartpars:[159,384],bottomuppredictcombinerul:[159,162,164],bottomuppredictrul:[159,164],bottomupprobabilisticchartpars:168,bought:[90,396],bound:[52,57,71,105,115,117,118,127,129,145,179,181,187,188,193,219,302,313,339,343,365,366,367,370,396,403],boundari:[28,57,97,148,293,302,310,319,396,398],bounding_box:339,box:[115,117,317,339,346,348],boxer:[123,177,361,363,374,378],boxer_drs_interpret:178,boxercard:178,boxerdog:[123,361,363,374],boxerdr:178,boxerdrspars:178,boxereq:178,boxerindex:178,boxernam:178,boxernot:178,boxeror:178,boxeroutputdrspars:178,boxerpr:178,boxerprop:178,boxerrel:178,boxerwhq:178,boxwidget:117,boyd:408,bp1:317,bp2:317,bp:[332,396],br:[359,408],brace:[28,343,366],bracket:[16,28,29,57,60,61,91,107,117,118,132,134,185,251,293,334,354,366,399],bracket_pars:[57,107,334],bracket_s:91,bracketparsecorpusread:[57,61,107,359],bracketwidget:117,braid:359,braini:380,bramanismo:385,branch:[32,51,105,116,167,169,334,343,403],brand:89,brant:221,brasil:359,brat:98,brave:293,brazilian:[385,408],breadth:[105,343,403],breadth_first:[343,402],breakbefor:[57,71],breakdown:[57,61,93,392],breakfast:369,breaklin:[57,71],breath:359,breed:403,breviti:[317,330,332],brevity_penalti:317,bridg:321,brief:[57,71],bright:359,brill24:211,brill:[209,212,224,226,227,228,248],brill_train:209,brillrul:228,brilltagg:[211,212,223],brilltaggertrain:212,brilltemplatei:228,brincar:385,bring:[293,359,380,408],britain:359,british:[57,60,384],bro:293,broad:[57,103],broader:160,broadli:353,broil:[57,71,367],broken:[109,185,212,299,307,359],broken_tre:399,brook:[145,160,220,388],brookhaven:145,brought:[358,359],brown2:359,brown:[55,57,71,105,109,160,172,176,209,217,219,310,322,323,324,325,326,327,346,353,357,359,367,369,377,386,396,403,408],brown_ic:403,brown_tagset:120,brows:10,browser:[1,10,57,64,408],browserv:10,brrokhaven:145,brunch:403,brutedom:381,brvbar:189,bs:[98,359],bsd:110,btjwrpbmoi:359,bubbl:[154,381],buch:[322,323,324,325,326,398],buf_0_form_econom:173,buf_0_form_market:173,buf_0_ldep_att:173,buf_0_lemma_econom:173,buf_0_lemma_market:173,buf_0_pos_jj:173,buf_0_pos_nn:173,buf_1_form_:173,buf_1_form_new:173,buf_1_pos_:173,buf_1_pos_nn:173,buf_2_pos_vbd:173,buf_3_pos_jj:173,buffer:[25,109,173,181,188,341,359],bufferedgzipfil:[109,361],bug:[229,251,274,372,381,403,408],bugfix:[345,408],bui:[160,295,303,306,308,312,313,393,396],build:[7,8,36,37,38,52,57,71,83,90,117,121,122,123,124,132,161,174,179,183,211,213,228,302,303,306,333,343,359,363,364,366,385,405,406,408],build_index:111,build_model:[27,122,374],build_preprocessed_fil:78,builder:[122,123,124],buildindex:[57,71],built:[30,33,52,105,116,117,132,139,162,167,173,185,188,208,209,343,354,359,378,408],builtin:[109,314],buitenlandse_handel:388,bul:403,bulg:359,bulgarian:359,bulk:358,bull:189,bump:359,bundl:[179,352],burger:390,burmese_myanmar:[57,101],burn:405,burnt:357,bush:359,busi:[39,388,405],butter:359,butterless:293,button:[2,7,8,116,117],buttonpress:117,bye:359,bypass:[293,361],byrhtferth:107,bytebuff:109,bytesio:359,c123:359,c1:[57,71,141,213,334,352,354,359,363,374],c2:[141,213,334,352,359,363,374],c3:[352,359,374],c5:[57,60,349],c:[25,33,39,46,49,52,57,61,71,76,77,104,107,111,117,119,123,128,131,132,133,134,139,141,145,148,168,169,176,178,179,195,212,216,221,224,226,227,228,300,305,306,316,319,320,332,335,336,343,346,352,354,355,359,364,365,366,367,370,374,375,377,378,380,381,382,384,385,386,389,396,399,403],c_exp:142,c_graph:167,c_incr:195,c_skip:142,c_sub:142,c_vwl:142,ca01:[357,359],ca02:359,ca03:359,ca04:359,ca05:359,ca06:359,ca07:359,ca:[57,104,141,142,205,307,312],cabe:385,cabeceira:385,cabinet:358,cach:[52,57,68,102,109,111,159,164,176,224,343,386,403],cache_baseline_tagg:224,cache_s:52,cache_to_tempfil:102,cachedepth:343,cachedtopdownpredictrul:[159,162,164],cachorra:403,cachorro:403,caclcul:[57,64],cad:403,cada:385,cadela:403,cake:[57,71,350,359,367],calc_dist:42,calcul:[33,35,37,39,42,46,48,52,53,57,64,102,105,111,119,132,133,139,141,143,145,147,149,167,176,185,214,219,221,302,317,318,321,328,330,332,333,343,355,357,380,398,403,408],calculate_delta:33,calculate_empirical_fcount:33,calculate_estimated_fcount:33,calculate_leftcorn:119,calculate_nfmap:33,calculu:[181,188,365,408],calcutta:352,calibratable_co:359,calif:388,california:[89,95,359],call:[2,7,14,25,33,34,41,51,52,57,59,60,71,77,78,79,87,98,100,102,105,106,110,111,115,116,117,118,119,122,123,129,130,131,132,134,139,156,159,162,166,168,170,171,176,182,183,188,193,198,199,202,206,215,221,226,227,228,294,297,302,305,317,321,334,340,341,343,351,354,355,358,359,360,361,365,366,367,374,375,386,387,390,396,397,398,399,403,406,408],call_megam:34,call_tadm:41,callabl:[33,71,110,122,214,328],callback:[115,116,117],caller:[110,215],callison:320,calv:359,calypso:385,cama:385,cambridg:[299,307,322,323,324,325,326,327,333,398],came:[187,358],camera:[57,89,359],camera_review:[57,89,359],caminho:385,campo:385,can:[2,7,8,13,14,15,24,25,28,29,30,31,33,34,36,39,43,44,46,49,52,55,57,58,59,60,61,63,64,65,67,68,69,70,71,73,74,77,78,79,80,81,82,83,84,85,87,89,91,92,96,97,98,100,102,103,104,105,106,107,108,109,110,111,115,116,117,118,119,122,123,127,129,132,133,134,139,141,142,153,155,156,157,158,159,162,164,166,167,168,169,170,171,174,176,177,178,179,183,188,189,193,194,195,198,199,200,203,206,211,212,214,218,219,221,224,226,228,251,274,293,294,295,298,299,302,303,305,306,307,312,314,317,319,324,325,326,328,332,333,334,335,336,338,339,341,343,346,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,369,370,371,372,374,378,380,382,384,385,386,387,388,390,391,392,394,395,396,397,398,399,401,403,404,407,408],can_combin:15,can_compos:13,can_cross:13,can_unifi:13,cancel:7,candc:178,candid:[57,105,156,224,228,290,302,324,325,326,329,333,348,374,379,403,408],candidatur:357,cane:403,cani:403,canin:[57,105,343,399,403],canis_familiari:403,cannib:358,cannot:[13,57,105,118,125,127,132,133,134,187,193,195,208,211,224,294,319,326,335,370,374,377,378,390,396,403],canon:[57,89,105,107,189,359],canon_g3:[57,89,359],canonic:189,cant:195,canton:352,canva:[7,8,116,117,181,182],canvasfram:117,canvaswidget:[116,117],cao:321,cap:[189,195,385],capabl:188,capacidad:385,capaz:385,cape:293,capit:[221,302,303,391,403],capital_of:179,capitalis:297,captain:359,captor:160,captur:[129,167,189,293,303,369,396],capword_token:303,car:[204,365,384],carcass:359,card:147,cardin:[29,189,212,216,219,325,326,359,388],care:[102,176,211,359,391,396],caress:394,carnegi:66,carnivor:[57,105,343,399,403],carri:[123,145,179,180,352,359,388,391],carro:385,cartaz:385,carter:391,cartesian:[123,228],carv:359,casa:385,casada:385,casamento:385,cascad:[28,408],case_insensit:200,casinha:205,casmurro:385,caso:385,cassati:388,cast:[129,317,332,358],castellan:141,castor:359,castro:385,casual:[57,100,295,359],casual_token:297,cat:[36,119,153,160,172,213,217,294,317,318,321,328,334,354,359,364,365,372,379,384,393,399,403],cat_delimit:[57,59,84],cat_fil:[57,59,61,84,85,97],cat_map:[57,59,61,84,85,97],cat_pattern:[57,59,61,84,85,97],catalan:[57,104,408],catalog:408,categ:[13,14,15,16],categor:[42,57,59,61,62,85,95,97,325,406,408],categori:[12,13,15,16,30,31,32,33,35,57,59,61,62,74,84,85,88,97,104,119,128,141,209,219,355,359,365,366,377,386,391,408],categorized_s:57,categorizedbracketparsecorpusread:[57,61],categorizedcorpusread:[57,59,61,62,84,85,88,97,386],categorizedplaintextcorpusread:[57,85],categorizedsentencescorpusread:[57,62],categorizedtaggedcorpusread:[57,97,359],catfish:[57,71,367],cathi:[167,169],cathol:107,cathstaincliff:396,cato:107,caught:390,caus:[25,105,109,129,176,227,276,334,359,361,366,367],cause_change_of_scalar_posit:[57,71,367],cause_mot:367,cause_t:367,cause_to_make_nois:[57,71,367],caution:[295,303],caveat:[28,118,340,366],cavnar:42,cb01:359,cb:141,cby:[57,71],cc0:[57,83],cc:[89,212,378,396],ccb:[57,104],ccedil:189,ccg:[0,347,408],ccg_semant:347,ccgbank:351,ccgchart:14,ccgchartpars:[14,350,351],ccgedg:14,ccgleafedg:14,ccglexicon:16,ccgvar:[13,16],cd:[160,163,212,219,362,384,387,396,406],cdate:[57,71],cdp:382,ce01:359,ce02:359,ce03:359,ce04:359,ce05:359,ce06:359,ce07:359,ce:[299,307],ceas:341,cedil:189,cee:364,ceil:[196,325,327,391],cell:[115,155,161,169,362],cell_extractor:[161,362],cell_separ:161,cell_valu:115,cem:202,cement:202,censu:145,cent:189,center:[39,48,117,325,326],center_of_cept:327,central:[346,388],central_africa:[179,352],centrifug:405,centroid:[46,49,50],cept:[325,326,327],cereal:369,certain:[24,57,71,132,133,227,293,325,333,382],certainli:359,certifi:343,certo:385,ceuta:388,cf01:359,cf02:359,cf03:359,cf04:359,cf:359,cfd1:386,cfd2:386,cfd:386,cfdist:176,cfg:[7,8,14,109,112,119,159,162,164,165,170,171,174,179,361,365,368,370,372,393,408],cfgdemo:113,cfgeditor:113,cfgreadingcommand:123,cfm:[330,344],cgi:408,ch01:254,ch07:251,ch:[53,57,66,105,359],cha:[57,64,298],chad:107,chain:[79,87,136,214,386,387],chair:388,chairman:359,chalk:166,challeng:[57,90,408],chamejant:385,chanc:[132,141,388],chancellor:359,chang:[25,39,48,105,117,129,132,139,142,145,159,170,171,201,202,203,212,214,221,227,228,229,315,333,346,354,366,374,404,408],change_bodily_st:359,change_of_consist:[57,71],change_var:178,changelog:408,channel:[57,74,98,405],chapter:[57,77,85,176,209,214,295,299,307,358,365,380,385,408],char_po:332,char_seek_forward:109,charact:[28,57,61,71,74,82,102,104,109,111,113,117,132,145,159,174,184,188,215,220,293,297,299,302,303,305,306,307,312,313,318,319,334,335,340,359,391,396,397,399],character:[117,359],character_bas:332,characteris:214,characterist:[57,71],charg:[367,396],charl:367,charm:391,charset:215,chart:[1,2,12,119,156,162,164,168,169,174,336,350,365,408],chart_class:[159,162,164,174,365],chart_pars:[159,162,384],chartcel:169,chartoken:306,chartpars:[1,14,159,162,164,174],chartparser_app:1,chartrulei:[159,164],chase:[123,334,359,363,364,371,372,382,384,390,393,399,403],chasen:57,chasencorpusread:[57,63],chasencorpusview:63,chasenread:63,chases_a_dog:370,chases_yprim:370,chat80:[109,177,347],chat:[0,359,408],chat_pnam:[179,352],chatbot:[18,20,23,24],chatti:110,chdir:361,che:[57,105],cheat:359,check:[15,28,43,57,105,109,115,117,118,119,123,129,132,133,139,155,161,167,173,176,178,179,181,183,187,188,191,194,195,209,227,313,331,334,343,346,352,359,365,366,371,374,384,397,399,403],check_cach:[57,71],check_coverag:119,check_date_limit:338,check_except:[57,105],check_megam_config:43,check_reentr:[118,366],checker:408,checksum:111,cheek:20,chees:66,chef:[105,350,403,404],chega:385,chegar:385,cheguei:385,chelli:206,chen:[94,133,135,137,317,321,348],chencherri:317,cheng:[81,88],cherri:[317,348],chew:[354,359],chi2:38,chi:[38,57,64,143,189,353,357],chi_sq:[143,357,380],chiba:[81,88],chicago:[67,81,89],chief:[357,388],chil:365,child1:161,child2:161,child3:161,child4:161,child:[24,32,57,71,116,117,119,129,159,167,169,170,314,334,335,336,347,365,367,399,401,408],child_edg:159,child_pointer_list:[159,164],child_widget:117,childchar:[334,336],childes_fixt:[229,353,363],childes_url_bas:[57,64],childescorpusread:[57,64],childless:159,children:[7,14,32,105,111,116,117,119,159,160,161,168,169,170,175,334,336,343,350,365,399,401],chin:317,china:352,chines:[94,301,334],chinese_mandarin:[57,101],chit_chat:359,chk:39,chktagger:218,chodorow:[57,105,403],choic:[57,64,159,168,405],cholesterol:359,chomski:[119,150,334,336,372,401],chomsky_normal_form:[119,334,336,401,408],chomskynormalformforcfgtest:250,choos:[7,8,31,33,118,119,171,203,206,219,228,332,341,343,359,361,366],choose_tag:219,chordat:[57,105,343,399,403],chosen:[30,33,46,48,50,111,328,330],chrf:[318,408],chrf_precision_recall_fscore_support:318,chrf_score:315,chri:[146,202,336],christian:388,christoph:[107,299],chrodegang:107,chronicl:107,chu:94,chuck:396,chungk:352,chunk:[0,1,3,16,57,68,84,91,98,189,214,218,251,331,347,380,385,388,396,406,408],chunk_label:[28,29,354],chunk_pars:354,chunk_rul:354,chunk_sent:[57,91,359],chunk_siz:341,chunk_struct:28,chunk_tag:28,chunk_tag_char:28,chunk_tag_pattern:[25,28],chunk_typ:[29,57,68,70,218,359],chunked_para:[57,65],chunked_s:[55,57,65,68,359,388],chunked_text:354,chunked_word:[57,65,68],chunkedcorpusread:[57,65],chunkedcorpusview:65,chunker:[25,26,27,28,29,346,380,408],chunkpars:[1,28,29],chunkparser_app:1,chunkparseri:[25,26,27,28,29],chunkrul:[25,28,354],chunkrulewithcontext:[28,354],chunkscor:[25,26,28,29,354],chunkstr:[25,28],chunktre:[57,65],church:[53,57,105,107,208,319,403],ci2:202,ci:[57,86,104,179,200,312,352,408],cim:205,cima:205,cine:350,cinematographi:391,circ:189,circle_of_lat:[179,352],circle_of_long:[179,352],circumv:189,circunspecta:385,cistem:[196,408],citat:[57,59,105,330,344],cite:[0,195,406],citi:[95,98,109,179,352,359,365,388,396],cities2t:179,citizen:[293,359],city3:202,city_databas:352,city_t:352,cjk:313,cjk_compatibility_form:313,cjk_compatibility_ideograph:313,cjk_radic:313,cjkchar:313,ckip:94,cl:[52,118,129,130,137,316,334,357,361],claim:[160,365,391],clarifi:359,clariss:205,clarissa:205,clash:[181,188],clasp:359,class_abbrev:189,classic:[375,405],classid:[57,103,359],classif:[30,31,33,34,36,37,38,40,41,52,89,90,95,167,193,194,355,406,408],classifi:[0,46,47,51,52,90,105,167,193,194,210,218,219,273,302,343,347,359,391,403,408],classification_probdist:[46,47],classifier_build:219,classifierbasedpostagg:219,classifierbasedtagg:[27,219],classifieri:[31,32,33,35,38,44,355],classify_fixt:[229,355],classify_mani:[31,38,44,355],classify_vectorspac:[48,49,50,51],classmethod:[13,33,35,44,53,102,119,130,143,158,181,183,187,188,211,214,217,219,226,227,228,246,254,316,334,361,399],classnam:226,classpath:[44,129,220],claus:[28,127,179,189,365,374,388,389],clausal:189,clause2concept:[179,352],clausifi:[127,374,389],claws5_tagset:120,clean:[178,361,398,408],clean_html:343,clean_url:343,cleanup:408,clear:[52,109,115,117,118,159,162,164,182,183,212,359,361,366,374,386],clear_abbrev:302,clear_cach:[109,361],clear_colloc:302,clear_ortho_context:302,clear_sent_start:302,clear_status_cach:111,clearli:[14,57,71,203,294,350,367,384],clever:[359,391],cli:0,clich:396,click1:117,click2:117,click3:117,click:[7,8,115,117,346],click_to_sort:115,client:[10,338,341,391],client_arg:341,clientela:385,cliff:321,clig:117,climb:[46,50,324,325,326,327],cling:359,clip:358,clock:406,clone:128,close:[25,102,109,116,117,125,129,179,182,183,187,188,215,302,305,312,314,352,359,361,369,375,391],close_bracket:181,close_enough:253,close_punct:[311,396],close_punct_r:311,close_punctu:[57,104],closed_domain_demo:125,closed_world_demo:125,closeddomainprov:[125,382],closedworldprov:[125,382],closer:[183,357],closest:[46,49,50,57,102,105,310,317,359,403],closest_ref_len:317,closest_ref_length:317,closur:[105,119,125,179,343,352],cloud:358,clr:400,club:[189,358],clue:[57,67],cluster:[0,86,299,408],cluster_nam:47,cluster_vectorspac:[48,49,50,51],clusteri:[46,47,51],cm:[144,380],cmd:[129,382,384],cmn:403,cmp_chunk:27,cmu:[57,61,64,66,328,359,408],cmudict:[57,359],cmudictcorpusread:[57,66],cn1t:202,cn:[117,334],cnf:[115,117,190,334,336],cnftree:401,cnr:93,cnut:107,co:[33,57,71,199,310,330,332,353,357,359,367,396,408],coadrian:107,coaelhom:107,coaeliv:107,coalcuin:107,coalex:107,coalit:359,coapollo:107,coars:[57,79,87,216,387],coaugust:107,cobed:107,cobenrul:107,coblick:107,coboeth:107,cobweb:359,cobyrhtf:107,cocanedgd:107,cocanedgx:107,cocathom1:107,cocathom2:107,cochad:107,cochdrul:107,cochristoph:107,cochron:107,cochrona:107,cochronc:107,cochrond:107,cocoa:359,coconut:359,cocura:107,cocurac:107,cod:[57,71,367],coda:299,code:[25,28,42,57,69,82,105,107,115,129,151,175,176,179,185,189,201,206,209,214,216,228,229,294,306,313,318,332,335,340,341,352,353,359,369,374,403,408],codebas:408,codec:[109,314,339,359],coder:141,codict:107,codocu1:107,codocu2:107,codocu3:107,codocu4:107,coeffici:[141,143,149,213,332,343,357],coeluc1:107,coeluc2:107,coepigen:107,coerc:366,coeuphr:107,coeust:107,coexodusp:107,coffe:359,cogenes:107,cogniz:[57,71],cogregdc:107,cogregdh:107,cohen:141,coher:[359,396],coherbar:107,cohes:160,cohn:388,coil:[153,359],coinspold:107,coinspolx:107,coisa:385,cojam:107,cojn:399,col:[115,155,380],col_index:115,colacnu:107,colaec:107,colaw1cn:107,colaw2cn:107,colaw5atr:107,colaw6atr:107,colawaf:107,colawafint:107,colawg:107,colawin:107,colawnorthu:107,colawwllad:107,cold:358,cole:[67,88,317],coleofri:107,collaps:[116,167,334,336,359,391,401],collapse_nod:167,collapse_unari:[334,336,401],collapsed_tre:116,collapsedtre:401,collapsedtree2:401,collapsepo:[334,336,401],collapseroot:[334,336,401],collapseunari:334,colleagu:367,colleciton:111,collect:[0,46,53,57,71,73,100,102,111,117,132,134,139,143,159,164,171,175,176,188,206,211,214,224,293,302,323,324,325,326,333,336,338,341,343,346,347,359,385,408],collin:[317,348],collis:118,colloc:[0,1,57,105,193,293,302,347,380,385,408],colloc_thr:302,collocation_list:[293,408],collocations_app:1,colon:[98,174,181,220,312,361,394],color:[25,115,116,117,160,308,335,357,359],colorado:[57,103],colorizedlist:[113,117],colortag:117,colour:[160,308],colowick:[57,83],colsigef:107,colsigewb:107,colsigewz:107,colum:115,columbu:311,column:[33,57,68,111,115,155,161,167,335],column_index:[115,167],column_label:115,column_nam:115,column_typ:[57,68],column_weight:[111,115],column_width:111,columnconfig:115,columnconfigur:115,columntyp:[57,68],colwgeat:107,colwsiget:107,colwsigexa:107,colwstan1:107,colwstan2:107,com:[71,89,111,115,131,137,166,205,206,216,229,251,259,274,283,301,311,313,318,335,339,340,341,343,346,350,353,358,359,361,365,369,370,377,385,396,403,407,408],com_ibm1:398,comargac:107,comargat:107,comari:107,comart1:107,comart2:107,comart3:107,comarvel:107,comb:343,combat:336,combin:[8,12,13,14,29,33,51,52,57,61,111,118,123,132,154,159,162,171,181,187,188,209,214,226,228,299,326,339,341,343,359,363,365,366,370,385,391,398,403],combination_prover_demo:125,combinatori:[12,13,57,71,367,403],combine_read:123,come:[24,132,135,158,341,346,359,385,407],coming_to_believ:[57,71],comint:25,comma:[118,119,188,312,341,366],comma_in_num:311,command:[10,18,122,129,166,220,304,317,318,321,328,330,332,374,382,407],comment:[28,36,57,61,98,102,142,174,224,276,359,374,377,408],comment_char:[57,61,102,174,359],commerc:301,commiss:330,commission:388,commit:[367,408],committe:396,committing_crim:[57,71],common:[46,52,55,57,59,64,71,94,105,126,129,132,133,135,139,145,159,193,196,199,214,216,289,293,302,327,337,353,357,365,367,369,370,385,403,408],common_context:293,common_hypernym:105,commonli:[46,50,343,357,398],commun:[111,215,406],commut:[365,403],como:385,comp:[57,78,216,353,359,365],comp_typ:67,compact:403,compani:[90,301,358,359,396],companion:107,compar:[7,29,42,57,67,76,81,88,105,132,139,144,145,147,149,154,164,198,199,200,219,226,227,357,359,366,380,399,403,408],comparative_s:[57,359],comparative_sent:[57,67],comparativesentencescorpusread:[57,67],comparison:[57,67,89,141,142,145,176,211,295,299,310,317,334,338,348,359,398,399,403],compat:[0,105,109,118,176,191,203,228,294,334,390,408],compel:391,compet:89,compil:[118,184,187,195,200,202,221,297,298,301,302,303,311,312,354,365,374,388],compile_neg:187,compile_po:[187,370],compileflag:374,compl:[57,105,367],complain:[354,359],complementari:[36,319],complet:[7,25,28,55,57,60,64,91,102,109,116,117,118,125,141,158,159,162,164,168,170,171,176,182,188,195,293,333,334,336,346,359,365,367,382,385,408],completefundamentalrul:162,completerrul:162,complex:[25,57,71,109,117,119,183,336,354,365,367],complextyp:[188,378],compli:107,complic:196,compon:[90,109,177,185,302,363,364,367,403],component_sound:[57,71],compos:[57,60,68,71,78,91,106,159,162,194,367],composit:[13,15,351,364],compositionruleset:350,compostura:385,compound:391,compr:67,comprehens:[385,406,408],compress:[33,109,111,142,194,339,341,361,408],compris:188,compromis:408,comput:[17,29,33,35,36,37,38,50,52,57,67,81,88,89,94,105,132,133,135,138,141,142,143,145,148,169,176,179,199,200,204,210,214,226,227,247,302,316,317,318,321,322,323,324,325,326,327,330,336,343,346,352,353,359,369,374,403,406,408],compute_composition_semant:[17,351],compute_function_semant:[17,351],compute_future_scor:333,compute_max_subtract_scor:167,compute_original_index:167,compute_prob:169,compute_semant:14,compute_substitution_semant:[17,351],compute_type_raised_semant:[17,351],computer_sci:[105,403],comsem:123,comtran:[316,398],comunit:385,con:[57,88,359],con_list:358,concat:[102,359],concaten:[52,55,57,102,169,200,222,366],concatenatedcorpusview:[102,359],conceal:359,conceiv:[332,403],concept:[33,327,359,367],conceptu:[57,71,105,132,176,224,227,367],concern:[105,403],concis:[317,359],conclud:374,concord:[1,293,347,385,408],concordanc:[1,293],concordance_app:1,concordance_list:[293,358],concordanceindex:293,concret:[79,87,132,133,135,226,359,403],cond:[178,181,364,385],cond_sampl:176,condit:[32,33,36,132,133,134,159,169,173,176,178,181,211,213,214,227,293,338,364,386,396],conditionalexponentialclassifi:[33,355],conditionalfreqdist:[132,134,176,408],conditionalprobdist:[176,386],conditionalprobdisti:[176,214],conduct:[200,396,400],cone:344,coneot:107,conf:[57,83,173],confer:[67,81,86,88,89,195,199,318,344,408],confess:[57,103,359],confid:[167,359,405,408],config:[52,57,77,304],config_java:129,config_megam:34,config_prover9:[126,374],config_tadm:41,config_weka:44,configur:[34,43,115,129,173,359,374,408],confin:[105,343,359,403],conflict:[106,118],conform:[221,408],conftest:229,confus:[33,57,85,144,349,396],confusionmatrix:[140,380],cong:189,congen:403,conicod:107,conicoda:107,conicodc:107,conicodd:107,conj:[77,117,209,216,357,385,388,400],conjectur:359,conjp:399,conjunct:[13,57,71,167,185,188,190,216,276,367,378,385,391],conll2000:[29,359],conll2002:[189,359,388,408],conll2007:359,conll:[29,57,161,166,174,189,359,365,370,408],conll_data2:[167,169],conll_demo:161,conll_fil:166,conll_file_demo:161,conllchunkcorpusread:[57,68,359],conllcorpusread:[57,68,359],conllesp:189,conlln:189,conllsrlinst:68,conllsrlinstancelist:68,conllstr2tre:29,conlltags2tre:29,connect:[10,57,71,105,116,161,167,201,215,334,336,346,403],connect_graph:161,connexion:358,conrath:[57,105,403],consecut:[53,57,68,397],conseguiss:385,conseq:385,consequ:[178,181,187,299,317],conserv:[52,408],consid:[8,33,36,39,57,67,71,89,111,117,118,119,132,139,167,175,193,194,219,224,226,294,299,302,306,317,323,324,325,326,327,333,335,357,359,363,366,398,403],consider:[89,102,193,195,199,227,326,327,333,359],considera:385,considerablement:359,consist:[13,14,25,33,52,57,59,61,64,66,67,68,71,76,79,85,87,91,95,100,102,109,111,113,119,123,127,132,134,139,145,155,159,168,170,177,179,209,212,216,219,293,299,301,303,305,319,321,331,334,341,343,352,359,366,367,374,385,387,388,403,408],consistchk:[123,363],consol:[107,293,340,374],consolid:384,conson:[202,206,299],conspir:359,constant:[33,111,125,145,148,176,177,179,183,185,187,188,195,203,220,310,352,363,364,365,378,390],constantexpress:[181,187,188,191,370,389],constantino:385,constitu:[2,26,57,67,79,87,159,175,334,336,387],constitut:[57,71,143,175,363,367],constrain:[122,169,214,326,327,403],constraint:[15,118,119,158,169,185,228,299],constrast:391,construct:[14,25,28,30,32,33,38,43,44,51,52,53,57,58,59,60,63,64,68,69,70,71,74,77,80,82,83,85,91,93,94,96,97,98,100,101,102,103,105,106,107,109,115,116,117,118,119,122,123,143,144,158,159,164,167,168,170,176,179,183,186,189,212,219,221,226,227,228,254,293,294,298,334,350,354,357,359,365,366,398],constructor:[28,33,44,57,60,61,68,77,78,85,91,97,100,102,105,106,111,116,117,118,122,123,136,141,158,161,164,168,176,183,185,187,188,203,217,302,305,334,354,363,364,366,374],consult:[209,214,219,318,385],consum:[28,319,359,403],cont_bigram_measur:357,contact:[67,81,88,89,111,131,396],contain:[7,8,13,25,28,29,30,32,33,34,36,39,41,43,46,52,57,59,62,65,66,67,68,71,72,74,76,77,79,84,86,87,89,90,97,98,102,103,105,109,110,111,115,116,117,118,119,123,124,125,126,127,129,130,132,134,141,147,156,159,160,161,162,164,166,170,171,176,177,179,180,183,184,185,188,194,195,202,206,209,214,215,218,222,226,259,276,294,300,303,305,306,316,317,319,326,330,331,333,334,335,337,339,340,341,342,351,352,354,357,359,361,365,366,367,377,382,385,387,390,393,397,399,403,405,408],contains0:[179,352],contains_address:161,contains_cycl:161,contar:385,contempl:348,content:[8,14,16,52,53,55,57,58,59,60,63,64,65,68,69,70,71,72,73,74,77,80,81,82,83,84,91,92,96,97,100,102,103,105,106,107,109,111,117,123,145,159,188,193,314,339,343,361,398,403,408],context:[28,30,33,53,60,69,78,91,92,106,109,113,119,128,132,133,134,135,137,153,168,178,181,187,188,189,215,219,224,293,302,333,336,344,354,358,359,361,377,385,388,395,405],context_count:[133,377],context_func:293,context_sent:344,context_to_tag:219,contextindex:293,contexttagg:219,contigu:[25,53,169,194,224,226,334],contiguous_loc:359,contin:179,conting:143,contingencymeasur:[143,357],continu:[7,28,33,46,48,49,132,134,145,159,168,169,175,359,374,408],continua:385,conto:385,contract:[298,312,391],contractions2:[298,312],contractions3:[298,312],contractions4:298,contrast:[57,71,109,304,322,323,325,327],contrib:[12,408],contribut:[211,224,322,323,359,408],contributor:[203,345],control:[2,7,8,36,57,84,111,117,168,217,295,328,385,405],conv_test:50,conv_threshold:48,conven:359,conveni:[115,129,132,134,138,297,326,327,342,343,365,366,378,408],convent:[141,169,226,228,303,306,327,359,365,385,388],converg:[33,46,48,50,213,214],convergence_logprob:214,convers:[23,24,33,57,65,77,119,194,374,391,401],convert:[16,24,25,28,29,30,34,41,44,57,59,60,61,63,64,68,69,70,71,74,75,77,78,79,80,82,83,87,91,96,103,105,106,107,109,110,115,116,118,119,123,126,129,132,135,161,167,174,178,179,181,183,185,188,189,190,191,194,195,215,216,293,314,334,335,336,339,340,341,343,352,353,354,359,364,374,387,390,396,399,401],convert_ag:[57,64],convert_parenthes:[298,312],convert_to_prover9:126,convolut:[310,332],convolv:310,coo:399,coocurr:380,cook:[57,71,350,351,359,363,367,374],cookbook:[115,408],cooki:[159,162,164,359,384,399],cooking_cr:[57,71,367],cool:132,coomb:201,cooool:[297,396],cooper:[180,359],cooper_storag:[177,390],cooperstor:[180,390],coord:[335,353,383],coordin:[117,169,335],coorosiu:107,cootest:107,cop:[160,353],cope:359,copi:[25,52,57,64,109,110,111,118,159,164,176,183,189,205,295,296,298,301,303,306,309,310,311,312,334,359,363,366,381,385,399,401],copiar:[205,385],copper:359,copra:359,coprefcath1:107,coprefcath2:107,coprefcura:107,coprefgen:107,coprefl:107,coprefsolilo:107,copula:189,copyright:[66,110,111,131,142],coquadru:107,core:[57,71,160,179,180,297,365,390],corefer:408,corenlp:[156,408],corenlp_opt:[160,172],corenlpdependencypars:160,corenlppars:160,corenlpserv:160,corenlpservererror:160,coretyp:[57,71],corgi:403,corn:359,cornel:[57,62,64],corner:[115,117,119,159,167,359,384],cornet_d:388,corood:107,corpid:[57,71],corpnam:[57,71,367],corpo:385,corpora:[53,57,58,59,61,62,64,65,68,69,77,78,85,89,90,97,100,101,106,108,109,111,179,189,224,251,257,302,319,328,346,352,353,361,406,408],corpu:[0,30,33,34,37,41,42,43,52,111,132,143,176,189,194,203,206,209,211,212,219,224,225,227,228,257,267,280,293,295,299,302,310,316,317,318,321,322,323,324,325,326,327,328,330,332,334,336,343,345,346,347,351,355,357,358,361,362,367,369,370,377,384,386,387,388,391,392,396,399,400,403,404,405,406,408],corpus1:[57,75],corpus_bleu:317,corpus_chrf:318,corpus_data:353,corpus_fil:[57,58,63,70,73,84,97],corpus_gleu:321,corpus_nist:330,corpus_particip:353,corpus_property_kei:[57,64],corpus_read:359,corpus_rib:332,corpus_root:[64,353],corpus_s:224,corpus_view:102,corpusread:[57,58,59,62,63,65,66,67,68,69,72,73,74,76,79,81,82,85,86,87,88,89,92,93,95,96,97,98,99,100,104,105,106,107,108,306,359],corpusreader_demo:340,corpusview:[57,62,67,81,85,88,89,100,102,359],corrado:321,correct:[16,27,29,33,109,119,144,156,167,188,211,212,214,219,221,225,228,302,353,355,359,366,374,384,395,398,408],correct_sent:29,correct_tag:228,correctli:[29,109,176,200,276,346,359,391,395,399],correcttag:228,correl:[143,149,321,328,332,359,380],correm:385,correspond:[2,13,14,15,28,32,33,34,35,41,47,51,55,57,59,60,68,71,74,77,78,79,84,87,91,98,102,105,106,108,109,115,116,118,119,123,129,144,146,147,148,159,161,164,167,169,179,183,210,211,214,218,219,220,222,227,296,302,303,306,312,313,316,325,332,334,336,339,340,341,352,359,360,363,365,366,367,374,379,385,387,388,398,399],corrod:403,corrupt:[111,354,391],cosevensl:107,cosin:[49,51,369,408],cosine_dist:51,cosolilo:107,cosolsat1:107,cosolsat2:107,cost:[34,39,108,145,148,160,168,295,301,303,306,308,312,313,319,333,359,393,396],costura:385,cotempo:107,cotizaba:359,cotton:[107,359],could:[39,57,62,68,71,105,127,130,132,145,168,175,176,294,310,318,332,333,334,335,357,358,359,361,365,396,403],couldn:[195,391,396],couldnt:195,council:388,counsel:388,count:[52,53,57,105,109,119,132,133,134,135,137,139,143,144,146,147,176,194,211,293,302,317,321,322,323,324,325,326,327,332,338,340,353,357,361,369,377,380,383,385,387,403],count_cutoff:[33,355],count_of_1gram_1:143,count_of_1gram_n:143,count_of_n:143,count_of_ngram:143,count_of_total_word:143,counter:[66,122,129,132,133,135,137,139,176,184,187,338,370,374,390],counter_arg:[132,139],counter_kwarg:[132,139],counterexampl:374,counterpart:[322,323,327,331],counti:[55,57,95,219,346,359,396],countri:[179,352],country_of:352,coupl:363,cours:[24,137,141,363,366,408],court:[36,396],covari:[46,48],covariance_matric:48,cover:[47,119,132,159,170,171,175,331,333,359,395,405,408],coverag:[57,103,178],coverhom:107,coverhoml:107,covinceb:107,covins:107,cow:66,cowsgosp:107,cowulf:107,coxilh:205,coxilha:205,cp1250:[57,101],cp1251:[57,101],cp1256:[57,101],cp2:365,cp:[352,365,370],cpan:[57,104],cpd2:386,cpd:386,cpdist:176,cpi:359,cprobdisti:214,cpu:359,cr:385,cramp:403,crane:359,cranenburgh:335,crarr:189,crash:[160,313,396],crawler:360,cream:344,creat:[6,7,8,28,29,33,42,48,52,57,58,63,65,69,70,71,73,74,77,78,81,83,84,85,92,97,102,105,106,108,109,111,113,115,116,117,118,119,126,129,131,132,133,134,135,136,139,141,153,158,159,161,162,164,167,168,169,170,171,175,176,177,179,181,182,183,187,194,202,206,211,212,214,221,224,228,230,293,297,312,316,319,322,323,324,325,326,335,338,341,346,352,354,355,360,361,363,364,366,379,384,386,387,394,395,398,399,403],create_fake_language_model:292,create_fake_phrase_t:292,creation:[52,169,334,361],creativ:[94,365,370],creativecommon:[57,83,94],creator:[396,406],creatur:357,credenti:342,creds_fil:342,credsfromfil:342,cree:350,crf:[209,220],crfsuit:[213,408],crftagger:213,crim:[57,71,367],crime:367,crime_scenario:[57,71],criminal_investig:[57,71,367],criminal_process:[57,71,367],criou:385,crisi:359,criteria:[57,71,94,320,353],criterion:[213,294],critiqu:148,cross:[15,132,133,167,169,176,319,328,335,408],crosseddir:15,crossroad:408,crossvalidationprobdist:[176,386],crubadan:[42,57,347],crubadan_to_iso:[57,69,360],crubadancorpusread:[57,69],crucial:377,crude:359,crushingh:396,cry:202,cs124:145,cs:[57,61,62,66,67,81,88,89,104,142,148,211,219,328,354,374,390,396],cs_semrep:390,csli:185,csv:[194,339],ct:213,ctrl:[7,8],cuba:385,cullei:[57,71],cultur:160,cumul:[176,386],cunha:385,cunnigham:145,cup:189,cur:[385,403],cura:107,curli:251,curr_nod:161,curren:189,currency_sym:311,currency_sym_r:311,currency_symbol:[57,104],current:[13,25,28,33,34,46,49,50,57,68,71,76,77,103,109,111,115,117,122,123,125,126,129,132,159,168,170,171,172,173,174,175,176,179,180,187,209,211,214,217,223,224,226,227,295,307,338,341,352,359,361,363,365,366,374,403],current_chartrul:159,currentindex:188,currently_complet:170,curri:188,curselect:115,cursor:25,cursori:28,curt:123,curtsei:359,curv:224,custom:[57,65,81,85,89,97,100,105,118,293,305,307,317,322,323,324,325,326,346,359,362,378,390,408],custom_dist:145,custom_extractor:362,custom_lemma:[57,105],customfeaturevalu:366,customis:119,customiz:359,cut:[46,49,51,62,195,302,340,357,359],cut_mark:[105,343,403],cutoff:[33,43,132,139,219,395],cutoff_polici:310,cutoff_prob:219,cutoffcheck:43,cvar:366,cve:408,cvenam:408,cwp:382,cxe3o:403,cyber:160,cyberia:388,cycl:[57,105,161,167,343,366,403],cycle_finding_demo:161,cycle_index:167,cycle_path:167,cyclic:[118,366],cyclic_dg:161,cyk:[169,336,365,370],cyr:[57,101],cyril:[57,101],czech:[57,101,104,311,359],czech_ceski:[57,101],d0:[123,363],d10:332,d12:370,d1:[125,177,352,363,370,390],d2:[125,352,363,370,390],d3:[363,390],d4:[363,370,390],d5:[363,390],d6:[363,390],d7:[363,390],d8:390,d:[46,48,49,52,57,66,71,77,92,104,105,107,111,118,119,123,132,134,139,145,146,148,183,202,212,298,300,302,303,305,306,310,311,312,320,324,332,334,336,343,346,354,358,359,361,362,363,364,365,366,367,370,375,378,380,381,384,385,386,391,396,399,403,408],d_head:325,d_neg:128,d_non_head:325,da:[216,316,322,323,324,325,326,385,398],dab:334,dachshund:167,dado:385,dagger:189,dai:[57,71,217,358,359,391,395],daili:385,dairen:352,dale:408,dali:385,dallakhelv:[57,101],dalmatian:403,dami:385,dan:[336,377,403,408,410],danc:363,danger:293,daniel:299,danish:[206,359,394],danishstemm:206,daquel:385,dar:385,daran:396,daren:195,darent:195,darpa:330,darr:189,darul:388,dash:118,dash_preceed_digit:301,dass:331,dat:[57,105,403],data:[0,2,33,34,38,39,41,44,46,47,48,49,51,52,53,57,60,62,64,68,69,71,72,74,81,89,90,91,92,93,94,98,102,108,111,115,117,118,119,123,127,128,129,130,136,141,143,144,159,160,161,162,166,173,174,176,178,179,183,188,194,209,212,214,215,218,219,220,221,224,257,302,310,314,316,322,323,324,325,326,327,333,334,336,338,341,342,343,347,352,353,363,365,366,369,374,380,381,384,385,387,388,390,393,403,406,408],data_idx:188,data_sect:44,databas:[10,57,71,82,99,104,105,179,189,314,352,367],datadir:111,dataserv:111,dataset:[57,62,67,88,89,90,194,203,359,369,391,407],date:[29,57,64,95,98,111,189,338,340,341,353,359,388],date_limit:[338,341],date_tim:[72,359],datetim:[338,340,341],daughter:294,davi:141,david:[57,83,408],davon:331,db:[57,71,179,352,397],dbname:179,dcl:359,dct:353,dd1:202,dd:[298,312],de2:202,de:[57,104,107,117,137,148,163,173,200,205,358,359,385,388,398],deactiv:194,dead:358,deal:[101,132,185,221,274,302,338,359,404],dealer:[401,405],dean:321,death:195,debon:359,debt:[57,71,212,367],debug:[28,37,109,122,127,128,213],debug_decis:302,debug_level:[28,354],debugobject:127,decatir:145,decatur:145,deceler:[93,392],decemb:[199,408],decent:391,decid:[7,8,15,30,32,102,109,156,159,164,170,171,176,219,295,302,326,334,338,359,366,384,394],decidedli:195,decim:122,decis:[30,32,38,145,302,336,355,359,366,408],decisiontre:30,decisiontreeclassifi:[32,355],declar:[78,203,206,294,359,398,408],declin:359,decod:[57,59,109,124,130,161,169,201,292,295,314,333,343,359,385,387,397,403,408],decode_json_obj:[211,217,219,226,227,361],decode_obj:130,decode_result:124,decode_tag:[57,84],decoded_unicod:343,decomposit:[46,51,369],decompress:111,deconstruct:[254,318],decor:[0,122,125,129,130,194,340],decorate_proof:[122,126],decorativo:385,decreas:[102,117,168,195,316,336,359,385,386],decri:209,dedent:359,dedic:359,dee1:202,dee:[66,364],deec2ss:202,deem:[48,50],deep:[118,159,334,366,399],deepcopi:[381,399,401],deepest:[57,105,403],deepli:195,def:[30,36,52,95,102,110,118,129,130,167,176,189,334,355,359,361,362,364,366,375,384,385,386,396,399,403],default_column_width:111,default_config:309,default_download_dir:111,default_field:314,default_reasoning_demo:125,default_rule_tupl:202,default_smooth:310,default_tagg:219,default_url:111,default_w:53,defaultdict:[176,189,333,343,402],defaultruleset:[14,350,351],defaulttagg:[211,219,385],defconst:25,defend:359,defer:[108,131,361],defici:326,defin:[15,25,28,30,31,33,46,52,55,57,59,64,66,67,68,71,79,87,98,102,103,105,111,115,117,118,119,125,129,132,133,135,141,143,147,156,157,159,167,168,169,173,176,191,196,209,210,214,219,220,226,276,293,296,297,303,325,326,327,333,334,336,341,343,352,355,359,361,363,365,366,367,375,382,384,385,387,395,398,399,403],definit:[57,71,105,117,168,276,293,294,326,359,367,403,405],definitionmarkup:[57,71],deg:189,degre:[57,71,98,102,221,343,367],dehdari:311,dei3i:202,deixa:385,deixei:385,dekang:[57,76],del:[359,361,366,388,399],del_cost:148,del_testcorpu:359,dela:385,delai:[302,340],deleg:[115,183,338,390],delet:[7,8,57,59,84,102,115,118,145,148,173,206,224,341,359,366,371,399],delete_on_gc:102,delhi:352,deliber:352,delimit:[28,29,57,59,84,95,100,119,221,303,306,334,341,343,354],delin:[57,61,359],della:[322,323,324,325,326,327],delta:[33,115,142,189,213],dem:[353,373],demand:[52,57,71,132],demetriu:359,demo2:[113,221],demo3:[113,221],demo:[0,7,8,14,19,20,21,22,24,28,29,32,33,35,36,42,46,48,49,50,55,63,64,71,75,76,99,113,115,116,117,123,124,125,126,127,128,142,144,145,146,147,154,159,161,162,164,165,167,168,169,170,171,173,175,180,181,182,183,184,187,188,191,203,206,211,214,221,223,302,310,314,340,366,384,408],demo_:224,demo_bw:214,demo_error:188,demo_error_analysi:224,demo_ev:28,demo_generated_templ:224,demo_grammar:[159,164,368,384],demo_high_accuracy_rul:224,demo_learning_curv:224,demo_legacy_grammar:191,demo_liu_hu_lexicon:194,demo_mod:310,demo_model0:191,demo_movie_review:194,demo_multifeature_templ:224,demo_multiposition_featur:224,demo_po:214,demo_pos_bw:214,demo_read_depgraph:186,demo_repr_rule_format:224,demo_sent_subject:194,demo_serialize_tagg:224,demo_str_rule_format:224,demo_subject:194,demo_template_statist:224,demo_tweet:194,demo_vader_inst:194,demo_vader_tweet:194,demo_verbose_rule_format:224,democrat:388,demoexcept:188,demonstr:[2,14,28,48,49,117,125,132,139,142,146,159,161,162,168,169,170,171,175,193,203,206,214,224,336,359,408],demoscor:167,den:[365,373],dendrogram:[46,49,51],deni:[359,361,374,394,403],denken:396,dennyc:137,denomin:317,denot:[13,57,105,176,363,403],dens:[33,93,392],dentro:385,denver:[95,359],dep:[160,161,350,362],dep_graph:[172,173],dep_pars:[160,172,184],depart:[67,81,89,198],depend:[28,30,33,52,53,57,71,77,105,119,123,132,139,145,160,161,163,166,167,169,172,173,174,184,187,193,194,214,227,228,302,305,310,317,335,341,343,347,351,359,363,367,403,408],dependency_grammar:[167,169],dependency_scor:167,dependency_treebank:362,dependencycorpusread:[57,70],dependencycorpusview:70,dependencyevalu:[163,173],dependencygrammar:[119,167,169,362],dependencygraph:[57,105,156,163,166,167,169,173,184,343,359,362,370],dependencygrapherror:161,dependencyproduct:119,dependencyscoreri:167,dependencyspan:169,dependent_index:[57,64],depgraph:[166,173,184,186,370],depgraph_to_glu:184,depict:169,depoi:385,deposit:[344,405],depository_financial_institut:405,deppars:[123,160,184,363,371],deprec:[40,109,129,141,203,228,408],depressa:385,depth:[25,28,32,46,49,51,57,65,105,165,343,368,403],depth_cutoff:32,der:[178,299,307,373],deregist:117,deriv:[14,82,86,117,156,157,159,162,169,176,179,196,214,221,293,302,322,323,324,327,334,350,351,352,357,359,365,367,370,374],derivationally_related_form:[105,403],derredor:385,desahc:334,desapareceram:385,desc:[57,60,82,95],descape_ent:189,descend:[115,117,118,168,294,334,341,385,403],descent:[1,7,170,294],descer:385,descr:[28,359,361,387],describ:[28,30,33,46,57,71,77,87,104,109,111,141,142,148,173,176,185,198,199,201,214,302,304,318,321,328,332,346,359,361,367,385,387,388,399,404],describe_template_set:211,descript:[14,28,33,57,60,71,72,79,87,103,109,111,115,159,164,175,178,179,188,206,211,214,303,320,352,359,367,387,399],descriptor:129,desctruct:381,desd:385,desempleo:359,desensit:381,deserialis:[57,100],desert:[313,396],deserv:396,desferirem:385,design:[28,57,74,78,89,94,297,302,321,396],desir:[57,71,111,116,142,214,345,359,367],despit:[195,302,391],destin:167,destroi:[111,113,116,117,182,302,359],destroy_widget:117,destruct:295,destruir:385,det:[28,77,144,160,171,172,209,213,216,350,352,353,357,359,361,363,365,368,372,378,380,384,385,390,393,399,400],detail:[34,42,57,60,69,71,74,78,93,103,105,111,115,176,198,199,206,213,214,217,220,224,318,341,353,365,367,401,407,408],detect:[43,52,57,59,75,102,219,302,310,346,403],detect_block:[57,61],detect_featur:102,detector:[30,219,302,408],determin:[2,13,28,33,39,44,52,53,109,115,116,153,167,177,195,210,211,216,219,220,299,302,310,322,323,324,326,327,333,334,335,341,354,359,361,371,374,380,385],determinist:[212,214,224],detoken:[298,312,313,396,408],detroit:384,deu:385,dev:[205,229,339,340,341,345,359,407],devaneio:385,develop:[72,86,123,178,179,200,206,212,229,330,345,352,361,365,396,408],devem:205,deviat:203,devis:203,devnul:[129,160],devour:359,devset:86,devtest:72,dexpr:364,dfl:359,dg:[161,169,362],dg_str:362,dh:[66,359],di:[359,385,387,394],dia:385,diacrit:[198,199,201],diagnost:[33,224],diagon:144,diagram:[116,334],dialect:[57,98],dialog:[113,117,391],dialogu:[107,359],diam:189,diamant:385,dic:343,dice:[143,380],dicion:385,dick:358,dickson:145,dict:[23,30,33,34,36,38,41,43,44,52,57,64,66,71,82,100,105,107,110,118,123,146,149,158,164,176,178,179,183,184,186,187,188,191,193,211,219,300,302,314,319,322,323,324,325,326,333,343,355,359,367,378,399],dictionari:[30,32,33,34,35,39,41,52,57,59,60,63,64,66,68,69,70,71,72,74,80,82,83,84,91,96,98,100,103,105,106,107,109,110,111,117,118,122,125,127,132,134,139,146,158,159,164,167,176,179,183,187,189,193,194,195,198,199,201,206,210,211,213,216,219,293,294,302,313,319,335,343,344,352,359,360,361,365,380,388,397,403,408],dictionaryconditionalprobdist:[176,386],dictionaryprobdist:176,did:[23,36,57,95,105,132,159,164,346,359,396,406],didn:[132,194,195,366,396,406],didnt:195,die:[333,365,373,394],diff:142,differ:[2,24,25,28,33,34,37,50,57,68,71,89,105,111,118,132,139,141,142,145,149,154,168,173,176,178,181,187,188,200,201,214,219,228,247,303,310,317,321,326,327,330,332,333,334,335,336,341,343,346,350,354,357,359,362,363,365,366,367,368,378,384,390,403,405],differenti:221,difficult:[46,196,214,302,321],difficulti:[333,408],digit:[57,105,148,188,217,312],digo:385,digraph:[57,105,161,206,343],dimens:[46,48,49,50,51,167,310,369],dimension:[46,51,167],dimensionsion:[48,49,50,51],dimitriadi:410,dine:[358,359],ding:89,dinheiro:385,dinner:[358,369],dir:[13,155,161,173,361,387],direct:[13,14,15,57,71,115,117,155,161,167,176,214,317,318,321,322,323,324,325,326,328,330,332,336,341,343,359,387,391],directedbinarycombin:15,direction:316,directli:[33,40,43,53,57,58,89,97,100,109,111,118,133,135,161,176,206,211,293,295,302,306,324,334,336,354,359,364,365,366,384,399,408],director:[163,359,362,388],directori:[39,57,58,59,60,61,62,63,64,65,67,68,69,70,71,74,76,77,79,80,82,83,85,87,88,89,91,95,96,97,98,100,101,103,105,106,107,109,126,129,158,166,178,179,218,341,342,346,352,353,359,361,365,374,387],dirnam:141,disabl:[57,71,105,109,145,302,335,343],disadvantag:302,disagr:[141,259],disallow:[13,57,105],disamb_onli:[57,74],disambigu:[30,33,57,74,92,105,344,359],disappear:[160,359,365],disassembl:359,discard:[33,46,105,169,201,212,224,303,306,343,385,399,403],discard_empti:[57,58,62,65,67,70,85,88,89,97,303],discard_lin:109,disco:335,discontinu:[320,332,335,387,400],discoteca:385,discount:[132,135,137,176],discourag:366,discours:[57,96,121,156,157,177,178,181,183,347,374],discourse_demo:123,discourse_fixt:229,discourse_id:178,discoursetest:[123,363],discov:[46,359],discoveri:[81,89,293],discuss:[33,119,132,134,179,214,302,352,359,365,366,385,403,406,408],disj:117,disjoint:228,disjunct:[188,190,378,388],disk:[52,102,158],dislik:23,dismast:293,dismiss:160,disp:385,dispers:112,dispersion_plot:[114,293],displac:325,displai:[2,10,28,32,52,57,71,108,109,111,114,115,116,117,118,123,153,159,162,164,168,175,176,293,335,340,341,343,358,359,361,366,374,385,403,406,408],display_al:153,diss:[299,385],dissert:[142,184,187,299,311],dist:[95,359],distanc:[42,50,51,57,105,117,140,141,148,317,325,335,343,408],distant:332,distinct:[33,57,105,153,214,359,365,398],distinguish:[37,119,156,325,380],distort:[324,325,326],distortion_factor:333,distortion_scor:333,distortion_t:324,distribut:[31,33,35,36,46,47,55,57,62,67,71,73,76,81,82,86,88,89,92,94,107,110,111,132,134,135,147,176,203,211,214,221,293,319,322,323,324,325,326,327,359,361,380,385,386,387,391,407,408],distsim:220,dita:385,ditto:202,div:77,dive:359,divers:359,divid:[2,29,35,57,61,62,79,85,87,97,101,105,107,174,176,189,222,293,295,296,300,302,305,306,317,341,359,366,373,386,387],divis:[317,328,403],divisa:359,dixon:145,dizendo:385,dizer:385,dizia:385,dj:325,dl:[330,344],dlr:359,dm:[299,341],dnx2:[57,82],dnx:[57,82],do_continu:[338,341],do_kw:141,do_kw_pairwis:141,do_stop:338,dobj:[160,362],doc:[12,28,30,57,60,71,72,100,102,110,132,189,193,338,339,341,359,365,367,370,373,388,391,407],docid:[57,71],docnamepattern:[57,71],docno:[72,359],docs_metadata:[57,71],docstart:[57,68],docstr:[57,90,110,115,129,224],doctest:[129,212,230,280,301,374,377,384],doctor:[98,160,311],doctyp:[72,359],document:[28,30,34,38,55,57,58,60,61,63,65,67,68,70,71,72,73,74,77,81,84,85,89,90,92,94,97,100,101,102,105,106,107,108,110,117,120,130,131,170,174,176,189,193,194,206,214,220,224,293,310,334,337,340,341,343,344,359,388,391,401,406,408],document_featur:30,doddington:330,dodg:[57,71],dodger:391,doe:[24,25,28,29,32,33,34,46,48,52,53,57,71,77,78,79,87,105,106,109,115,117,118,129,132,133,135,145,156,171,201,211,212,214,221,222,226,293,299,302,307,313,321,327,333,334,346,356,359,361,364,365,366,374,385,395,397,399,403],doesn:[57,71,109,129,172,179,195,214,307,312,317,322,363,374,375,391,396,397],doesnt:195,doesnt_match:369,dog:[25,57,61,105,119,159,160,162,164,171,172,176,177,188,208,213,217,294,334,343,352,354,359,361,362,363,364,365,368,371,372,374,378,382,384,390,393,399,403],dog_lemma:403,doi:385,dolist:384,dom0:390,dom1:390,dom2:390,dom3:390,dom:[177,183,352,385,390],domain:[57,74,103,125,156,177,179,183,297,302,352,363,390,403,408],domest:[399,403],domestic_anim:[57,105,343,403],domestic_dog:403,domin:[36,276,294,334],don:[102,119,125,132,141,183,195,202,229,312,313,354,366,375,382,386,395,396,399],donat:408,done:[28,46,48,57,71,117,131,132,145,164,167,178,187,214,297,305,331,355,359,370,395],dont:195,dooh4:202,door:388,dop:335,dot2img:[57,105,161,343],dot:[14,33,57,71,105,159,161,162,164,168,188,217,343,367],dot_digraph:159,dot_str:[57,105,161,343],dotal:[57,58,62,65,67,70,85,88,89,97,303],doto:385,dotprod:33,doubl:[117,194,206,363,366,396],double_dash:[298,312],double_neg_flip:194,dough:350,dove:382,down:[2,82,111,115,117,132,159,162,164,170,185,359,365,384],downcas:297,download:[0,55,64,109,129,137,220,343,346,359,374,407,408],download_dir:111,download_gui:111,download_shel:111,downloadergui:111,downloadermessag:111,downloadershel:111,downward:[116,117],dozen:[345,408],dp1:399,dp2:399,dp:[361,362,365,374,399],dpd:386,dr1:[98,359],dr:[98,178,181,182,359,364,365,370,371,374,396],drag:[116,117],draggabl:[116,117],drain:153,draw:[0,57,105,147,181,182,293,334,335,385,406],draw_pars:168,draw_tre:116,drawback:321,drawn:[36,90,94,117,181,367],dream:[160,359],dreamteam:396,dress:359,dressing_wel:359,dridan:304,drive:[57,105,179,352,359],driven:[325,406],driver:185,drizzl:359,drop:[10,33,333,359,361,408],drs1:[178,364],drs2:[178,364],drs3:364,drs_conc:181,drsdrawer:181,drss:[178,364,371,374],drswidget:182,drt:[177,178,347,363,365,370,374],drt_discourse_demo:123,drt_glue_demo:177,drtabstractvariableexpress:181,drtapplicationexpress:181,drtbinaryexpress:181,drtbooleanexpress:181,drtconcaten:181,drtconstantexpress:181,drtequalityexpress:181,drteventvariableexpress:181,drtexpress:[178,181,364],drtfunctionvariableexpress:181,drtglue:[184,371],drtgluedemo:182,drtgluedict:184,drtglueformula:184,drtgluereadingcommand:[123,363],drtindividualvariableexpress:181,drtlambdaexpress:181,drtnegatedexpress:181,drtorexpress:181,drtparser:[178,181,363,364,374],drtproposit:181,drtstring:364,drttoken:181,drtvariableexpress:[181,364],dstoffset:338,dt:[25,28,39,57,61,160,163,172,174,209,212,215,217,218,220,227,294,338,354,359,362,363,365,384,387,388,401],dti:[219,396],dtv:350,dtype:38,du:[322,373],dua:385,duan:145,dub:359,dublin:385,duck:312,ductor:400,due:[89,194,196,221,304,317,359,403],duh:332,duke:359,dull:[93,392],dum:205,duma:205,dummi:[161,173,327],dummysmilei:[297,396],dump:[57,100,102,179,302,356,384,386,396],dun:357,dunningham:145,dunt:381,duplic:[105,123,132,134,193,228,317,343,359],durat:[29,189,213],dure:[102,118,119,127,132,167,209,211,213,219,302,312,323,324,325,326,327,328,359],durwood:396,dusseldorf:39,dutch:[57,61,104,189,206,304,359,388,394,408],dutchstemm:206,duti:359,dv:326,dvar:366,dwarf:320,dwayn:145,dx:117,dy:117,dynam:[156,159,162,175,214,336],e01:[364,378],e02:[364,378],e083:378,e092:378,e0:389,e1:[202,378,389],e2:[145,378,389],e2f:320,e:[2,7,14,16,23,24,25,28,29,31,33,34,35,36,37,38,43,44,46,48,52,57,58,59,61,63,64,65,66,68,70,71,73,74,78,81,84,85,86,89,90,92,97,102,104,105,106,107,108,109,111,115,116,117,118,119,125,127,128,129,132,134,141,142,143,144,145,148,153,159,161,162,164,166,168,174,175,176,180,181,182,183,185,188,193,194,195,198,199,200,204,205,209,210,212,214,218,219,220,222,227,228,293,294,295,296,299,301,302,303,305,312,317,318,321,322,323,324,325,326,327,331,332,334,336,338,339,340,341,343,346,353,355,357,359,361,364,365,366,367,370,374,378,380,381,385,386,388,390,396,399,401,403,408],e_end:331,e_start:331,ea:378,each:[2,14,23,25,27,28,29,30,31,32,33,34,35,36,38,43,44,46,47,48,49,50,51,52,53,55,57,58,59,60,62,63,64,65,66,68,69,70,71,74,76,77,78,79,80,82,83,84,85,87,88,89,90,91,92,93,96,97,98,102,103,105,106,107,108,109,111,115,116,117,118,119,123,129,132,134,139,141,143,144,149,153,154,155,156,157,159,160,162,164,166,167,168,169,170,172,173,175,176,178,179,180,188,191,193,194,209,210,211,212,213,214,217,218,219,220,221,222,224,226,227,228,253,293,296,300,302,307,310,312,313,314,316,317,319,321,322,323,325,326,327,328,331,332,333,334,335,341,352,353,357,359,363,365,366,367,369,374,381,384,385,387,388,391,401,403,405],eacut:189,eager:[173,349],eamt:398,ear:202,earlei:[159,162,384,408],earleychart:[156,384],earleychartpars:[162,384],earli:[179,352,358,408],earlier:[28,46,49,132,134,170,171,179,212,352,363,365,374,385,388],earliest:[7,8,171],earth:[359,403],easi:[57,88,132,185,211,228,297,359,371,398,406,408],easier:[29,117,179,293,327,339,352,359,387],easili:[118,176,214,224,339,359,364,366,369,398],east:[57,77,88,107,357,359,408],east_germani:352,easy_instal:407,eat:[16,57,66,88,129,213,322,350,351,359,365,375,403],eater:359,eateri:[129,375],ebook:408,ecirc:189,econom:[57,88,173,359],economist:388,ed:[57,66,105,123,212,219,299,307,343,380],edg:[2,14,105,144,148,159,161,162,164,167,168,174,214,335,343,357,365,384],edgar:107,edge1:168,edge_closur:343,edgei:[14,159,162,164,168],edges2dot:343,edinburgh:320,edit:[111,113,141,145,176,361,365,380,408],edit_dist:[145,380,408],edit_distance_align:[145,380],editor:[60,388],editori:[359,386,408],edu:[42,53,57,61,62,64,66,67,69,71,81,86,88,89,92,94,98,103,104,105,110,137,145,148,172,179,211,220,312,328,334,352,359,374],educ:[62,98,406],edward:[0,365,370,406,408],eet:378,ef:[195,396],efeito:385,effbot:189,effect:[28,33,173,198,199,302,335,340,388,405],efficacit:148,effici:[46,49,102,109,119,129,132,156,159,162,176,209,211,214,295,306,343,359,363,403,408],effort:[345,408],efg:396,eflom:283,eg:[57,68,103,105,196,221],egeu:359,egg:[230,357,403],egrav:189,egypt:[107,313,396],eh1:359,eh:[66,359],eht:334,eid:[160,220],eight:[228,406],eighth:195,eighti:353,ein:[322,323,324,325,326,398],einen:396,einzukaufen:396,eisner:169,either:[14,31,38,52,57,58,59,60,63,64,65,68,69,70,71,73,74,77,79,80,81,82,83,84,87,91,92,96,97,102,103,105,106,107,111,115,117,118,119,122,129,147,159,162,164,168,185,203,210,213,214,228,294,302,303,305,318,334,335,336,357,359,361,366,367,374,375,391,401],ej:388,ekaterinburg:410,el:[57,104,313,350,359,382,385,386,396],ela:385,elect:[219,359,396],electr:400,electron:[160,301,388],eleg:152,elem:[314,343],element:[7,8,14,31,33,43,44,52,57,60,61,71,77,78,79,82,87,90,91,96,98,106,109,115,117,118,125,129,149,154,157,159,170,171,175,179,183,210,220,222,296,305,307,314,316,318,327,331,334,335,339,343,352,359,365,366,367,387],elementari:385,elementtre:[55,57,60,77,78,91,103,106,129,314,343,359,387,397],elementtree_ind:343,elementwrapp:129,eleph:[166,362,382],eleprobdist:[35,36,176,386],elif:[366,384],elimeq:178,elimin:[119,159,363,370],eliminate_equ:[181,364],eliminate_start:119,elisabeth:[299,307],elisp:25,eliza:[18,24],eliza_chat:19,elkouri:201,ell:403,ellips:[57,71],ellipsi:302,elogio:385,els:[52,118,119,122,129,130,145,183,219,326,366,375,385,386,390,399,402],elt:[60,77,78,91,106],elt_handl:[60,77,78,91,106],elucidarium:107,elvish:360,elziu:388,em:[46,205,214,322,323,324,325,326,327,385],em_ibm1:398,email:[111,321,396],emb:364,embed:[57,61],emcluster:48,emerg:405,emili:276,emit:214,emma:359,emnlp:332,emot:359,emoticon:[194,297,391,396],emphasi:[359,391],empir:33,emploi:401,employ:[57,71,367],employe:[57,71,367],employment_start:[57,71,367],empr:385,empti:[8,24,50,52,79,87,102,109,115,118,119,132,141,147,159,168,170,171,174,175,176,189,228,294,303,313,318,326,334,354,359,366,373,374,387,388,391,395,397,399],empty_first:184,emptypredictrul:[159,162,164],emptyset:117,emsp:189,emul:[332,404],en:[57,74,77,104,145,148,200,216,307,313,341,343,350,357,359,360,398],en_em_dash:311,en_wsj:215,enabl:[57,71,74,105,127,131,408],encapsul:[117,179,316,352],encarei:385,enclos:[117,365,405],encod:[25,28,29,30,33,34,39,41,52,57,58,59,60,61,62,63,64,65,67,68,69,70,71,73,74,75,77,78,79,80,81,82,83,84,85,87,88,89,91,92,93,95,96,97,98,99,100,101,102,103,105,106,107,109,111,118,119,129,130,159,160,172,174,176,183,184,188,189,191,194,198,209,210,211,215,218,219,220,295,304,308,309,311,314,334,339,340,343,355,359,365,366,387,399],encode_json_obj:[211,217,219,226,227,361],encoding_demo:41,encomenda:385,encontramo:385,encount:[33,35,57,71,109,111,219,366],encourag:[332,359],end1:228,end2:[217,228],end:[14,28,52,57,68,71,97,98,102,109,115,118,129,130,132,159,162,164,169,171,174,175,188,198,199,200,201,202,211,217,224,226,228,276,299,300,302,303,306,310,312,313,331,333,334,343,352,354,355,359,364,366,374,378,390,391,396,397,399,403],end_i:[296,303,306],end_index:169,end_of_list:374,end_posit:129,end_r:102,end_siz:[124,374],end_toknum:102,end_w5:201,end_w6:201,end_word:78,ending_quot:[298,312],endors:203,endpoint:[226,341],endswith:385,enfermo:385,enforc:[226,359],enfraquecida:385,eng:[57,64,105,209,353,360,403],engend:359,engin:[25,199,406],england:[98,359],english:[14,57,72,103,104,105,107,119,179,196,206,209,220,295,299,302,304,307,311,325,331,352,357,359,360,361,367,385,394,403],english_fd:360,englishpcfg:172,englishstemm:206,engmalt:166,engvers:94,enhanc:[145,221,345,408],enjoi:399,enlighten:[14,24],enorm:[195,385],enough:[132,153,156,168,195,361,365,381,391,396],enquanto:385,ense2:[57,78],ensp:189,ensur:[33,46,48,57,71,115,117,159,176,188,215,229,251,274,307,317,318,321,328,330,332,336,346,359,380,395],ensure_load:[57,59,108],ent:385,entail:[37,90,105,359,363,374,408],enter:[116,117,182,214,338,341,358,359,385],enteroscop:381,entertain:[391,408],entir:[14,28,52,57,59,79,87,98,102,115,117,159,164,168,169,171,175,195,211,226,316,326,327,346,359,361,374,387,391,408],entiti:[25,27,29,37,39,57,67,68,72,74,91,105,177,179,183,184,188,189,220,313,339,343,352,359,363,367,382,390,399,403,406,408],entity_1:67,entity_2:[57,67,359],entity_field:339,entity_specific_co:359,entity_specific_modes_b:359,entity_typ:[339,378],entitytyp:188,entr:385,entreaberta:385,entri:[16,57,66,71,83,91,99,102,104,105,111,118,132,139,144,167,169,175,179,219,302,322,323,324,325,326,359,361],entropi:[33,86,132,133,176,214,355,377,408],entropy_cutoff:[32,355],entrydialog:117,enumer:[52,167,313,328,332,361,399],env_var:129,environ:[39,57,100,129,166,178,218,220,309,343,346,374],environment:[57,100],eof:[102,306],epilogu:107,episod:390,eprint:321,epsilon:[117,119,142,189,213,317,318],eq:[128,188,330],eq_list:188,equ:67,equal:[33,35,43,51,52,57,67,77,102,105,115,117,118,119,132,139,145,147,148,164,170,176,178,181,187,188,194,299,307,322,334,359,371,386,399],equal_valu:[118,366],equality_pr:[188,378],equalityexpress:[181,188],equat:[33,57,105,176,403],equilibrar:385,equip:359,equiv:[181,188,189,371],equival:[15,25,28,51,52,118,132,134,141,176,181,183,188,228,318,322,334,336,350,371,374,378,399],er0:[66,359],er:[57,66,72,105,200,331,358,373,385],er_99:72,era:385,erfcc:319,erico:205,ern:200,err:[57,101],errant:386,errata:398,erron:224,error:[25,32,39,57,71,99,105,109,111,119,129,146,151,161,176,183,194,199,212,224,225,274,276,293,304,314,319,328,329,339,340,341,354,355,359,366,375,383,384,386,403],error_list:225,error_output:224,erroranalysi:223,errormessag:111,errt:[146,383],erwartet:333,es:[57,103,104,105,350,385],escap:[109,129,188,313,335,359,366],eschbach:335,eschbachguentherbecker2006:335,esconjuro:385,escutava:385,esequ:385,esn:200,esp:[359,385,388],especi:[43,52,98,193,195,359,405],esperam:385,esperanto:[57,101],espetado:385,espraiavam:385,ess:385,essa:385,essenti:[52,119],est:[57,105,385,386],esta:359,establish:[167,215],estado:385,estadounidens:359,estar:385,estava:385,estim:[33,35,36,38,46,48,132,176,214,317,322,323,324,325,326,327,359],estivess:385,estonian:359,et:[57,71,103,104,107,214,299,307,317,321,326,332,335,353,359,369,378],eta:189,etc:[57,71,102,105,111,117,118,176,177,226,227,293,302,325,334,336,346,359,367,380],eterno:385,eth:189,etre:[129,359,387,397],eu:[316,359,385,403],euc:[57,75,101],euclidean:51,euclidean_dist:[46,51],euml:189,euphrosyn:107,euro:[189,359],europarl:[57,85],europarl_raw:55,europarlcorpusread:[57,85],european:[301,302],eurythm:388,eustac:107,ev:374,evad:[57,71,367],eval:[129,163,173,320,384],evalu:[25,26,28,29,52,72,92,129,132,133,136,146,148,156,177,179,191,193,194,198,200,209,210,212,213,219,282,291,301,304,317,318,320,321,328,330,332,352,366,380,385,386,391,408],evaluate_s:[191,390],even:[25,57,59,118,132,139,167,224,340,359,365,366,403],event:[57,60,71,115,117,119,143,176,178,188,214,359,367,380,386],eventtyp:188,eventu:[105,343,366,374],eventvariableexpress:[181,188],ever:[23,306,358,374,384],everi:[13,33,39,43,57,61,79,87,102,106,117,118,123,127,132,159,164,167,175,176,179,181,188,194,219,226,227,317,333,334,336,354,358,359,360,363,364,365,366,371,374,385,390,398,399],every_girl:370,every_girl_chas:370,every_girl_chases_a_dog:370,every_girl_chases_yprim:370,everygram:[132,136,343,377,408],everyth:[57,85,109,132,297,354],everywher:[311,326],evid:[57,71,219,302,357,385,396],evok:[57,71,367],ew:357,ewan:[0,365,370,406,408],ex2:[57,82],ex:[57,82,125,161,178,188,214,341,359,385],ex_quant:[363,370,371],exact:[127,143,147,181,221,328,357],exact_match:328,exactli:[31,36,71,118,132,184,188,327,386,399],exahust:[159,162],examin:[30,211,359,385,399],examlpl:161,exampl:[14,25,28,29,30,33,34,36,39,46,52,55,57,58,60,62,66,71,77,78,79,85,86,87,90,91,95,97,98,101,102,103,105,106,107,109,110,117,119,123,129,130,131,132,133,137,141,143,145,148,152,156,159,166,167,171,173,176,178,179,182,183,188,189,193,194,209,210,215,220,251,259,276,282,294,295,297,300,303,305,306,309,310,312,317,318,321,325,326,327,332,333,334,336,339,340,341,343,344,346,351,352,357,359,361,365,366,367,368,369,371,374,377,378,380,387,388,390,391,395,398,399,403,408],example_word:78,exce:[25,189,359],exceed:[25,155],excel:391,except:[31,34,42,57,59,71,84,85,105,108,109,115,119,125,126,127,128,129,130,160,161,173,178,179,181,183,187,188,294,297,303,305,314,334,341,352,355,356,359,360,361,364,366,370,377,378,390,396,397,399,403],exception:195,excerpt:[57,71,206],excessivament:385,exchang:359,exchequ:359,exclud:[57,64,71,211,219,226,302,305,323,324,325,326,366,395],excludesf:[57,71],excludezero:[226,228],exclus:[38,98,109,224,333],exec_tim:359,execut:[2,28,39,122,126,129,179,194,215,218,220,340,352,359,374,384,388,396],executivo:385,exemplar:[57,71],exemplifi:224,exemplo:385,exercis:[2,92,254,264,292,346,358,363,398,408],exhal:359,exhaust:[159,162,317],exhibit:[124,126,359],exist:[14,46,57,59,83,103,105,109,111,117,118,125,128,129,146,159,160,161,162,164,168,171,172,176,178,181,183,185,186,187,188,189,199,203,324,334,345,346,351,359,361,363,364,365,370,371,374,378,382,390,397,399,403,408],existenti:378,exists_list:188,existsexpress:188,exit:374,exodu:107,exp:[33,330],expand:[7,25,28,111,116,119,164,168,170,224,226,228,333,334,354,365,408],expand_thread:[123,363],expand_tweetid:341,expand_tweetids_demo:340,expandable_product:170,expanded_tre:116,expandleftrul:[25,28,354],expandrightrul:[25,28,354],expandunari:[334,336],expans:[7,142,170,333,365],expansion_scor:333,expect:[27,28,30,46,48,52,57,59,71,90,111,129,132,133,134,135,136,141,159,174,176,188,193,195,221,228,300,301,306,311,312,313,320,322,327,333,334,359,363,364,365,366,367,375,378,386,396,399],expected_albb:301,expected_amz:301,expected_cas:301,expected_detoken:312,expected_low:301,expected_output:313,expected_rkt:301,expected_s:301,expected_token:312,expectedmoretokensexcept:188,expens:[46,102,129,143,326],experi:[2,176,304,317,321,341,359],experienc:[359,367],experiment:[154,198,315,333],expert:398,expir:391,explain:[33,350,365],explan:334,explet:359,explicit:[34,312,359],explicitli:[34,35,57,59,60,63,64,68,69,70,71,74,80,82,83,91,96,103,105,106,107,111,132,134,176,334,359,361,366,382],explod:388,exploit:62,explor:[2,3,7,8,251,293],explos:[217,403],expon:143,exponenti:[33,302,333],exportselect:115,expos:[359,387],expose_01:387,exposi:385,expr:[126,183,184,188,384,390],expr_tt:[57,82],expr_uid:[57,82],expres:305,express:[1,3,6,16,23,25,28,29,32,35,36,52,57,59,60,63,64,67,68,69,70,71,74,80,82,83,84,91,96,102,103,105,106,107,109,122,123,124,125,126,127,128,129,160,177,178,181,183,185,187,188,189,190,191,204,214,219,228,293,294,295,297,300,302,303,305,312,313,322,323,324,334,339,343,351,352,354,357,359,361,363,364,365,367,370,374,378,382,385,388,389,390,399,408],expung:361,ext:[359,361],extend:[46,57,71,104,115,118,122,126,132,170,171,176,181,334,359,363,391,399,403,408],extens:[57,65,109,111,122,137,145,164,166,179,203,214,302,316,352,359,361,374,390],extension:179,extent:28,extern:[33,34,44,55,57,124,126,294,327],extra:[28,33,57,71,129,168,176,334,359,375,394,397,408],extract:[14,37,39,43,57,60,72,94,104,111,159,161,162,173,179,185,189,191,193,194,201,213,218,226,228,305,318,321,331,333,334,339,343,350,351,352,369,374,385,386],extract_bigram_feat:194,extract_featur:[173,193],extract_field:339,extract_properti:[211,226,227],extract_rel:[189,388],extract_test_sent:[174,384],extract_unigram_feat:[194,391],extractor:[30,193,194,362],extramur:381,extran:37,extrem:[57,89,195,359,361],ey2:359,ey:[57,66,71,367],f1:[35,118,351,359,363,365],f2:[118,359,365],f2e:320,f3:359,f4:370,f:[28,29,32,33,46,52,57,66,71,87,98,104,105,110,111,118,132,134,142,147,178,183,184,200,212,305,318,331,332,336,343,351,359,361,364,365,366,367,369,374,375,378,380,381,384,385,386,387,389,390,391,397,398,399],f_align:331,f_end:331,f_id:33,f_measur:[29,147,193,354,380,393],f_start:331,fa:[397,403],fabl:358,fabul:195,face:[25,57,81,359],facil:330,facilit:[189,193,224,408],fact:[89,115,159,179,352,359,363],factor:[53,145,176,224,326,330,333,334,336,401],factori:[110,176,181,187,188,219,228],factory_arg:176,factory_kw_arg:176,facugamband:396,faculti:369,fail:[37,109,118,129,183,302,341,350,365,366,374,395],fail_on_unknown:293,failobj:52,failur:[276,388,403],fairi:359,fairli:359,faith:[203,405],fake:[57,105,403],fake_:403,fakecorpu:403,fall:[46,145,219,326,391],fallback:318,fals:[13,16,25,29,32,33,34,36,37,38,43,47,48,49,50,51,57,59,60,64,68,71,74,80,85,88,89,96,98,102,103,105,106,109,111,114,115,116,118,119,122,123,124,126,127,128,129,132,139,144,145,148,159,160,161,162,166,171,172,173,174,176,178,179,180,181,183,184,185,188,189,193,194,200,202,206,212,213,214,215,217,219,220,221,224,226,227,228,276,293,294,295,297,298,301,302,303,304,305,307,308,309,310,311,312,313,317,332,334,335,336,338,339,341,342,343,349,351,355,358,359,361,364,365,366,369,370,374,375,378,380,382,384,389,390,396,398,399,403],falta:385,famili:[16,123],familiarizamo:385,famou:[57,62,317,359],fanci:399,fang:94,fantast:391,far:[14,95,132,170,171,212,333,334,359],faro:385,farsi:359,fartura:385,fass:[324,325,326],fast:[57,66,67,71,200,212,224,228,343,359],faster:[57,71,343,374,385],fat:153,father:[357,359,378],father_of:389,fault:391,fauna:385,favorit:311,favorite_count:339,favour:333,fazer:385,fazia:385,fb:396,fcfg:[109,174,185,191,352,361,363,364,365,373,384,390],fcl:385,fcp1:384,fcp2:384,fd1:[385,386],fd2:386,fd:[385,386,397],fdict_class:[118,364],fdist:176,fdopen:397,fe2:[57,71],fe:[57,71,367],fe_rel:[57,71],feat0:[361,365,384],feat0cfg:365,feat1:365,feat:[57,89,359],feat_val:33,featdict:[118,366],featgram:347,featlist:[118,366],featstruct:[0,180,334,347,364,365,393,399,410],featstructnontermin:[164,364],featstructread:[109,118,364,366],featur:[32,34,35,36,37,38,41,43,44,52,57,67,74,89,102,105,109,132,133,142,158,164,167,173,174,180,191,193,194,201,210,211,213,217,219,223,224,227,228,299,307,315,355,359,361,364,390,391,399,403,408],feature1:228,feature2:228,feature_corpu:102,feature_detector:219,feature_extract:38,feature_func:[43,193,213],feature_nam:32,feature_probdist:35,feature_select:38,feature_valu:32,feature_vector:33,featurebottomupchartpars:[164,384],featurebottomupleftcornerchartpars:[164,384],featurebottomuppredictcombinerul:[162,164],featurebottomuppredictrul:164,featurechart:[156,162,365,384],featurechartpars:[162,164,384],featurecompletefundamentalrul:162,featurecompleterrul:162,featuredict:30,featureearleychartpars:[162,363,365,384],featureemptypredictrul:[162,164],featurefundamentalrul:164,featuregrammar:[174,191,365,384,390],featureincrementalbottomupchartpars:[162,384],featureincrementalbottomupleftcornerchartpars:[162,384],featureincrementalchart:162,featureincrementalchartpars:[162,384],featureincrementaltopdownchartpars:[162,384],featurelist:228,featurepredictorrul:162,features_count:34,featurescannerrul:162,featureset:[31,32,33,34,35,36,38,41,43,44,52,193,210,219,355],featuresettagg:210,featuresettaggeri:[210,219],featuresingleedgefundamentalrul:[162,164],featuretopdownchartpars:[164,384],featuretopdowninitrul:164,featuretopdownpredictrul:[162,164],featuretreeedg:164,featurevalueset:366,featurevaluetupl:366,feb:89,februari:[353,408],fechado:385,fechei:385,fecoreset:[57,71],fed:30,fee:[66,365],feeblemind:403,feed:[132,134,359,365,403],feedback:408,feel:[312,359,400,406],feet:312,fei:385,feid:[57,71],feil1v:202,feito:385,fell:362,fellbaum:[57,103],fellegi:145,fellow:[293,359],fem2masc:[198,199],fem:[365,393],femal:[36,98,353,359],feminin:[198,199],feng:94,ferel:[57,71],ferret:359,fertil:[324,325,326,327],fertility_of_i:327,fertility_t:[324,325,326],fetch:[338,340,341,356],few:[52,201,206,211,311,359,366,385],fewer:[33,50,219,334,341,359,404],ffourth:397,ffreq_empir:33,fg:[115,359],fi2:202,fi:[57,104,307,385],fiction:[359,386],fid:33,fido:[177,188,361,363,390],field:[57,71,83,95,98,99,167,179,194,310,314,316,339,340,352,359,408],field_ord:314,fieldwork:408,fiendish:293,fig:[167,330],fight:343,figur:[359,369],file:[10,34,39,41,42,44,52,55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,96,97,98,100,102,103,104,105,106,107,109,111,117,123,126,129,145,158,161,166,173,174,188,193,194,195,213,215,218,220,224,226,227,228,230,304,314,334,339,340,341,342,343,346,350,353,359,360,363,365,369,374,385,387,390,393,397,403,408],file_id:[57,59,60,63,64,68,69,70,71,74,80,82,83,91,96,103,105,106,107],file_nam:[129,342],file_path:77,file_s:109,fileid:[30,57,58,59,60,61,62,63,64,65,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,97,98,99,100,101,102,103,104,105,106,107,109,349,353,359,384,385,386,387,388],fileid_or_classid:[57,103],filenam:[55,57,71,72,74,78,90,109,111,117,129,161,166,179,184,191,193,194,205,224,314,352,359,361],filepo:[102,359],files:111,filespac:346,filestr:343,filesystempathpoint:[109,359,361],filetyp:[57,98],filho:385,fill:[116,117,132,155,175,224,228,326,359,388],filler:[189,388],fillmor:367,film:[160,391],filter:[53,57,71,74,77,82,119,123,132,139,162,189,228,276,293,294,310,334,341,359,363,384,385,388],filteredbottomuppredictcombinerul:159,filteredcompletefundamentalrul:162,filteredsingleedgefundamentalrul:[159,162],fin:[366,384,385,403],final_period_1:311,final_period_2:311,finalerror:212,finalize_train:302,financi:[173,405],financiada:385,find:[1,6,26,28,33,34,35,42,46,48,49,50,51,53,57,59,61,62,77,78,84,105,106,109,117,119,122,123,127,129,132,145,153,154,166,167,170,171,173,175,176,181,185,188,189,191,194,212,214,219,227,276,293,294,295,302,303,304,305,313,317,320,324,327,331,333,334,353,357,359,363,369,374,385,387,391,399],find_abbrev_typ:302,find_all_src_phras:333,find_answ:[127,389],find_best_fit:176,find_binari:129,find_binary_it:129,find_concord:293,find_corpus_fileid:[57,102],find_diment:117,find_dir:129,find_fil:129,find_file_it:129,find_increasing_sequ:332,find_jar:129,find_jar_it:129,find_jars_within_path:129,find_label_nam:184,find_legal_onset:299,find_malt_model:166,find_maltpars:166,find_repptoken:304,find_vari:118,findal:[129,293,297,359,385,387],finder:[53,381],finder_4gram:357,finding_dir:129,findtext:359,findtyp:[188,378],fine:[2,38,141,224,366],finer:[57,84],fingerprint:42,finish:[111,122],finishcollectionmessag:111,finishdownloadmessag:111,finishpackagemessag:111,finishunzipmessag:111,finit:[31,87,214,408],finnish:[57,104,206,359,394],finnishstemm:206,fire:34,firefox:359,fireman:105,firm:388,first:[7,8,10,16,23,25,28,33,34,35,36,41,44,46,51,52,53,57,59,60,63,64,67,68,69,70,71,74,76,80,82,83,84,89,91,96,98,102,103,105,106,107,108,109,111,115,117,118,119,122,123,125,127,128,129,132,134,145,154,159,161,168,170,176,177,178,179,181,183,185,187,188,189,190,191,193,194,199,201,221,222,226,283,293,295,299,302,303,307,317,324,325,326,327,331,332,334,343,346,352,353,359,361,363,365,366,368,369,370,374,377,378,384,388,390,397,398,399,408],first_cas:302,first_low:302,first_po:310,first_upp:302,fish:386,fisher:[143,380],fisher_exact:143,fit:[38,132,133,155,359,367,377],fit_transform:369,fitando:385,fito:385,fitzenreit:145,fitzrureit:145,five:[143,201,403],fix:[31,33,39,63,159,212,219,227,293,310,326,327,359,361,386,399,408],fixm:212,fixtur:[254,264,292],flag:[57,58,62,65,67,70,85,88,89,97,105,111,114,118,176,214,221,276,295,297,302,303,332,334,338,341,346,404],flat:[106,132,136,310,314,334,359],flatten:[57,68,132,136,334,343,354],flaw:20,fledg:[118,366],fleiss:141,fleuri:385,flexibl:[2,119,132,134,159,297,351],fli:394,flight:[382,384,405],flinch:359,flip:[195,214,325],flippin:195,flist_class:118,flood:358,floresta:408,florida:145,floss:359,flush:159,fly:[209,222,382,403],flying_ostrich:382,fmla:390,fmt:[27,227],fn1_sent:[57,71],fn:[35,57,71,118,367],fn_docid:[57,71],fn_fid:[57,71],fn_fid_or_fnam:[57,71],fn_fname:[57,71],fn_luid:[57,71],fname:[33,35,44,397],fnof:189,fnreport:[57,71],fntbl37:211,fntbl:211,fo:358,focu:[115,117,359,374],fog:359,foi:385,fol:[109,123,177,179,181,352,361,363,374,378],foldemo:183,folder:[57,64,346,360],folg:373,folgar:385,folgen:373,folgst:373,folgt:373,folha:385,follow:[7,8,14,25,28,30,33,36,38,52,53,57,59,60,63,64,68,69,70,71,72,74,77,80,82,83,84,86,90,91,96,98,100,102,103,105,106,107,109,111,113,115,117,118,119,129,132,136,141,143,145,151,159,167,168,170,171,174,176,178,179,183,185,188,191,203,206,209,214,216,219,222,228,293,295,302,303,305,306,307,312,317,318,322,323,324,325,326,332,334,336,340,341,346,352,355,358,359,360,361,363,365,366,367,369,374,378,380,381,385,387,388,396,398,399,403,406,408,410],followers_count:339,followtoscreen_demo:340,folmodel:183,fom:161,font:[25,57,101,115,117,335,384],foo:[60,71,78,91,106,129,301,307,354,361,366,377,386,390,396,408],foobar:[174,307,386],food:[57,71,129,367,375],foolswood:259,fora:385,foral:[117,181,188,189],forbid:359,forc:[111,213,317,318,321,328,330,332,359,388,405],foreground:115,foreign:[212,216,331],foreman:388,forese:[57,71,367],forev:[317,318,321,328,330,332,403],forg:359,forget:350,form:[15,24,28,29,33,43,52,57,71,87,90,91,92,97,102,105,109,111,117,119,122,123,126,153,159,161,164,168,169,173,175,178,179,180,181,185,188,189,190,198,199,200,201,206,212,214,219,221,222,224,226,228,300,303,305,313,316,334,336,352,359,363,366,367,370,372,374,385,387,388,398,401,403,408],forma:385,formal:[176,214,227],format:[28,29,33,34,44,55,57,59,64,66,76,77,85,86,90,98,105,109,119,122,124,126,129,132,161,166,174,179,183,212,219,224,227,304,313,314,316,320,331,333,335,338,339,340,341,352,353,359,361,362,363,369,374,378,383,391,408],format_debug_decis:302,formatt:44,former:[160,358,384],formul:162,formula:[109,122,123,145,177,183,184,185,187,188,317,332,361,364,374],formula_tre:185,forth:[332,359],fortun:132,forum:[202,388,406],forw:320,forward:[14,15,57,60,67,78,91,106,109,161,214,320,359,366],forwardcombin:15,forwardonli:15,forwardsconstraint:15,forwardtconstraint:15,forwardtyperaiserul:14,fought:396,found:[34,39,46,48,49,57,61,64,77,102,104,105,106,108,109,118,122,123,124,129,159,168,170,171,175,176,201,203,206,208,212,218,224,293,299,302,317,326,327,334,346,359,360,363,364,374,378,382,386,387,394,396,398,399,403],foundat:[125,388,408],four:[28,117,143,201,293,305,353,363,364,378,403],fourgram:[317,357,380],fourgram_measur:357,fourth:[353,397,408],fox:[160,172,217],foxu:115,fp:339,fpa:359,fpath1:366,fpath2:366,fprefix:341,fpt:359,fr:[57,104,360,370,398],fra:[360,403],frac12:189,frac14:189,frac34:189,fraction:[147,224,317],fractional_pres:145,fragment:[185,221,328,359],frame2:[57,71],frame:[57,71,103,115,117,359],frame_by_id:[57,71],frame_by_nam:[57,71],frame_config:115,frame_id:[105,403],frame_ids_and_nam:[57,71],frame_rel:[57,71],frame_relation_typ:[57,71],frame_str:[105,403],framefil:[57,79,87],frameid:[57,71],framenam:[57,71],framenet2:[57,71],framenet:[57,103,347,408],framenetcorpusread:[57,71],frameneterror:71,framerel:[57,71,367],framerelationtyp:[57,71],frames_by_lemma:[57,71,367],frameset:[57,79,87,387],framework:[33,109,179,352,408],franc:[179,369,403],frank:403,franz:317,fraser:200,frasl:189,frauenfeld:299,fraze:408,freaki:403,free:[0,109,113,119,125,127,181,183,188,215,359,361,364,406,408],freedom:388,freez:[118,334,359,366,384],frel:[57,71],french:[57,104,206,311,325,359,394,403],frenchstemm:206,frente_a:359,freq:176,freq_threshold:302,freqdist:[42,53,57,69,176,214,293,357,359,360,380,385,408],frequenc:[33,36,42,53,57,105,132,134,139,143,176,193,213,214,221,293,299,302,317,357,385,386,408],frequent:[20,132,139,144,176,194,209,219,293,302,385],fresh:118,fri:[57,71,367],frick:195,frickin:195,fridai:[219,396],friend:[160,172],friendli:[132,134,341],friends_count:339,frig:195,friggin:195,fring:[7,170],from:[7,8,14,16,22,24,28,29,30,31,32,33,34,36,37,38,39,43,44,46,47,48,49,51,52,53,55,57,58,59,60,62,64,65,67,68,69,70,71,72,73,74,75,77,78,79,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,98,100,102,103,104,105,106,109,111,115,116,117,118,119,122,123,125,126,127,129,131,132,133,134,135,139,141,142,143,144,145,146,147,149,152,157,158,159,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,187,188,189,191,193,194,196,197,198,199,200,202,203,204,205,206,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,224,226,227,228,274,276,282,283,293,294,295,297,298,299,300,301,302,303,305,306,307,308,309,310,311,312,313,314,316,317,318,319,320,321,322,323,324,325,326,327,329,330,331,332,333,334,335,336,339,340,341,342,343,344,346,348,350,352,353,354,355,356,357,358,359,360,362,364,365,366,367,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,408],from_product:[159,164,168],from_train:44,from_tt:[57,82],from_uid:[57,82],from_unified_model_dir:158,from_word:[53,357],fromkei:366,fromlist:334,fromstr:[14,16,28,109,118,119,129,167,181,183,187,188,294,316,334,335,350,351,361,362,364,365,366,368,370,372,374,378,382,384,389,390,393,398,399,400,401],fromxml:111,frontier:[7,170,388],frozen:[118,203,366],frozenset:316,frt:[57,71],frump:403,fry:367,fs0:365,fs1:[365,366,393],fs1_name:366,fs2:[365,366,393],fs2_name:366,fs3:[365,366],fs4:366,fs5:366,fs:[31,33,44,365],fs_class:[118,366],fscore:318,fsnlp:53,fsrl:[365,370],fstruct1:118,fstruct2:118,fstruct:[118,187,366,370],fstruct_read:[109,364],fstructur:[186,370],ft_sent:[57,71],ftp:[66,361],ftype:44,fuck:195,fuel:46,fueron:350,fugir:385,ful:299,fulfil:[227,294,359],full:[28,34,57,60,71,74,100,104,105,110,118,129,132,134,333,339,340,341,359,366,367,369,385,408],full_text:[57,71],fuller:367,fulli:[14,130,159,161,167,195,319,365],fullsignatur:110,fulltextannot:[57,71],fulton:[55,57,219,346,359,396],func:[33,110,115,117,200,340,343],funccolor:335,functionalcategori:13,functionbackedmaxentfeatureencod:33,functionvariableexpress:[181,188],functool:[110,366],functor:15,fund:[405,408],fundament:[164,168,365,406],fundamentalrul:[159,164],fundo:385,funky_punct_1:311,funky_punct_2:311,funnel:359,funni:[57,62,359,391],fur:396,furiou:293,further:[28,131,167,169,179,183,203,221,295,299,302,332,352,358,359,369,396,408],furthermor:28,futur:[71,87,122,164,198,199,333,341,359,399,405],future_hav:359,future_scor:333,future_score_t:333,fv:370,fval1:118,fval2:118,fval:[33,34,35],fvmh0:[98,359],fx8:349,fx:359,g0:390,g12345:370,g134:370,g14:370,g1:[177,370,390],g21:390,g25:370,g2:[177,370,374,390],g3:[57,89,183,359,370,390],g4:[183,370],g5:370,g:[2,7,8,16,23,25,28,36,38,57,59,61,64,66,68,71,74,78,85,87,89,104,105,108,109,111,115,116,117,118,119,122,141,142,144,145,148,153,161,166,182,183,193,194,209,218,219,293,295,298,301,302,303,305,312,317,332,334,338,339,340,341,343,346,352,353,359,361,362,364,365,366,367,370,372,374,378,380,381,385,386,390,396,399,408],g_graph:167,ga2:202,gaac:46,gaacluster:49,gaan:400,gag:397,gai3i:202,gai:359,gain:[221,336],gaisha:301,gale:[176,319],gale_church:315,galileo:95,galopava:385,gambl:405,game:[36,398,405],gamma:[132,135,159,164,176,189,214,328,377,386],ganapathibhotla:[67,88],gang:408,gao:[94,317,321],gap:[57,58,60,62,65,67,70,85,88,89,97,303,310,321,396],garbag:[102,214],garner:176,garrett:[408,410],gasp:403,gasta:385,gat:385,gate:343,gather:[69,299,302,343],gaussian:[33,46,48],gaussian_prior_sigma:33,gawron:408,gaze:358,gb2312:[57,101],gb:369,gdfa:[283,315],gdmt:383,gdnt:383,ge:[189,200,397],gee:66,geht:331,gelo:385,gem:[24,391],gender:[57,98,359,366,384],gener:[13,14,18,23,25,26,28,33,34,35,39,41,44,50,52,102,105,106,110,114,118,122,126,127,129,132,133,134,139,141,143,147,148,152,156,157,158,159,162,164,166,167,168,170,171,174,175,176,179,183,184,188,189,210,213,214,219,223,224,226,228,293,295,299,301,302,303,304,306,311,316,321,324,325,326,327,332,334,336,338,341,343,347,352,354,355,357,359,360,361,363,365,366,371,374,385,394,395,403,404,408],generate_chomski:152,generate_malt_command:166,generate_repp_command:304,genericcorenlppars:160,genericstanfordpars:172,genesereth:125,genesi:[107,357,359,403],genesis_:403,genesis_dir:359,genr:[359,408],gensim:347,gensim_fixt:[229,369],genu:[385,403],genuin:403,geographi:[179,352],geoloc:339,geometr:[317,330],georg:[321,330,359],geraldin:145,gerefa:107,german:[57,104,200,206,299,311,365,373,394,396,408],germani:[57,71,369],germanstemm:206,germantest1:373,germantest2:373,germantest3:373,germantestsuit:373,gerund:[87,212,219],get:[38,42,57,71,74,90,98,102,105,115,117,118,129,132,134,139,142,146,173,188,198,199,203,212,221,224,316,317,340,341,343,359,361,366,367,369,370,374,375,377,385,386,390,391,395,396,399,403,408],get_a_job:[57,71,367],get_all_symbol:[178,181,187,188],get_booleanexpression_factori:[181,187,188],get_by_address:161,get_cycle_path:161,get_domain:125,get_glue_dict:184,get_glueformula_factori:184,get_glueformulas_from_semtype_entri:184,get_header_field_list:339,get_label:184,get_languag:[57,83],get_macrolanguag:[57,83],get_meaning_formula:184,get_model_build:122,get_neighborhood:228,get_next_token_vari:[178,188],get_param:302,get_path:[57,78],get_pos_tagg:184,get_prov:122,get_quantifiedexpression_factori:188,get_read:[184,370],get_ref:[181,364],get_segm_id:78,get_semtyp:184,get_sent:78,get_sent_beg:78,get_sent_end:78,get_unified_model_paramet:158,get_vers:[57,105],get_words_from_dictionari:146,getchildren:129,getinfo:110,getiter:129,getop:[181,188],getquantifi:188,gettempdir:173,gf:[57,71,367],gfl:[184,370],gfl_to_compil:[184,370],gg1:202,gh:[111,346,396],ghd:148,ght:358,gi:[33,355],gid:[90,359],gilbert:195,gile:22,gim:[298,312],gio:385,girl:[123,177,363,364,365,371,382,390,396],girlfriend:404,gisencod:33,gist:71,github:[71,206,229,251,259,274,283,301,311,313,318,335,343,345,350,358,369,377,396,403,407,408],githubusercont:[111,346,361],give:[28,35,46,49,57,71,74,76,84,86,98,105,109,111,122,132,134,160,174,178,188,195,214,224,226,311,317,334,340,350,351,354,357,359,361,363,364,365,366,367,374,378,398,399,403,404,408],given:[2,10,14,23,25,26,28,29,30,31,32,33,34,35,36,41,43,44,46,48,49,50,51,52,53,55,57,58,59,60,62,64,65,67,68,69,71,77,78,79,80,81,84,85,87,88,89,91,96,97,98,100,101,102,103,104,105,106,107,109,111,115,116,117,118,119,122,123,127,129,131,132,133,134,135,139,141,143,145,146,147,149,159,161,162,164,168,169,170,171,175,176,179,181,185,187,188,189,193,201,208,209,210,211,214,219,220,221,222,224,225,226,227,228,276,293,294,302,305,307,313,316,319,320,322,323,324,325,326,327,328,332,333,334,335,338,340,341,343,352,357,358,359,361,363,365,367,370,374,380,383,384,385,388,397,399,403,404,405],giz:205,giza:[316,326],gl:[166,385],gleu:[321,408],gleu_scor:315,glg:403,glint:[57,71],glob:374,global:[57,90,105,131,194,340,343,403],glue:[177,187],glue_dict:186,gluedict:[184,370],glueformula:[184,187,370],glueformulafactori:187,gluesemant:347,gluesemantics_malt:347,gluesemantics_malt_fixt:[229,371],gmt:359,gnd:[365,393],gni3:202,go:[57,71,160,359,366,387,407,408],goal:[69,122,124,125,126,127,128,334,374,382],goal_node_index:161,goalkeep:36,gobbl:359,god:[334,359],godwin:388,goe:[129,386],gold:[26,29,43,52,87,90,193,200,210,211,329,359,387,398],gold_chunked_text:354,gold_data:212,gold_relation_list:[57,64],gold_sent:[163,173,213],goldberg:408,goldstein:141,golf:[167,362,388],gon:[298,312],gone:363,goo:166,good:[24,43,46,57,61,160,176,213,217,221,293,295,299,301,302,303,306,307,308,311,312,313,324,325,326,333,359,386,391,393,396,400,403,406],goodeater1:375,goodeater2:375,goodeater3:375,goodeater4:375,goodman:[133,135,137],googl:[216,321,365,369,370,403,408],googlecod:408,googlenew:369,gooo:396,gorg:359,gorki:388,goslin:299,gospel:107,got:[298,312,366,399],gothenburg:410,gouw:321,gov:[72,145],govern:[359,373,386],governar:385,governo:385,governor:[160,357,388],gp:397,gr:370,gra:385,grabar:396,graber:408,gradabl:67,gradient:317,gradual:326,graem:408,grai:357,grail:359,grain:[2,57,79,87,359,387],gram:[42,57,69,176,219,253,317,318,321,330,343,360],gramfil:[123,363,390],grammar:[0,2,7,8,12,13,14,16,20,28,94,109,111,113,123,157,159,162,164,165,167,168,169,170,171,174,175,180,185,191,314,336,346,347,352,359,361,363,364,366,370,374,390,393,408],grammar_filenam:185,grammar_url:174,grammartestsuit:347,grammat:[167,174,196,373],grand:[55,57,132,134,219,346,359,385,396],grandmoth:[57,71],grandpar:[60,78,91,106,336],grang:6,grant:408,granular:[57,84],graph:[57,105,117,161,166,167,169,173,176,184,214,335,343,403,408],grapher:117,graphic:[2,3,7,8,57,105,111,116,117,334,408],graphviz:[57,105,161,343],grate:408,great:[38,131,152,293,353,391,396,408],great_pyrene:403,greater:[52,57,105,115,117,132,139,145,175,176,213,297,334,396],greatest:[115,176,335],greatli:195,greec:[179,352],greedi:217,greedili:214,greek:[57,101,104],green:[7,66,335,357],greenland:381,greet:359,greg:[321,408],gregori:107,grei:[160,172],grenzpo:200,grenzpost:200,grenzposten:200,grid:[57,68,115,117,155,335],griffon:403,grito:385,groom:359,ground:[127,214,321],group:[25,26,28,46,49,51,57,59,71,82,84,94,103,111,116,122,146,189,217,301,325,332,334,353,359,367,385,387,388,395,396,408],group_by_para:[57,63,65,84,97],group_by_s:[57,58,63,65,70,73,84,97],groupbi:386,grow:359,grow_diag_final_and:320,grsampson:176,grzegorz:142,gscl:200,gt:[189,313,386],guarante:[26,129,159,162,171,298,317,318,321,326,327,328,330,332,359,403],guardar:385,guerrilla:388,guess:[27,29,33,57,71,217,219,342,397],guess_encod:343,guess_languag:42,guess_path:342,gui:[18,359,408],guid:[179,317,318,321,328,330,332,406,407],guidebook:[317,318,321,328,330],guidelin:94,gujarati:[57,101],gulp:[57,88,359],gumt:383,gust:359,gutenberg:[22,30,293,358,359],gv:370,gwmt:383,gz:[172,220,361,369],gzip:[109,194,339,341],gzip_compress:[194,339,341],gzipfil:109,gzipfilesystempathpoint:109,h0:332,h1:332,h2:332,h5:332,h:[7,8,10,57,90,104,107,198,199,212,214,359,362,364,366,370,378,380,381,385,386,396,398,399],ha:[7,8,13,28,32,33,35,36,39,43,52,57,59,61,65,71,90,102,105,106,109,111,115,116,117,118,119,123,129,131,132,133,139,142,145,156,159,167,168,170,173,175,176,177,179,185,194,203,209,211,212,214,221,228,251,294,298,300,302,307,311,312,317,321,325,326,327,328,330,331,333,334,335,336,338,339,341,343,346,348,351,352,358,359,361,363,365,366,367,374,379,385,388,391,396,397,398,399,401,403,406,408],haar:400,habitava:385,hack:[180,293,378],had:[36,57,67,176,358,359,396,400,408],hadn:195,hadnt:195,hagiwara:408,hail:359,hair:357,hajim:332,half:[145,148,152,154],hall:[167,359],hall_demo:167,hallow:145,halo:320,halt:33,halt_on_error:111,halv:154,ham:[148,310,322,355,357],hamlet:359,hammer:403,hamza:201,hamzat:[198,199],han:310,hand:[8,14,30,102,117,119,159,164,168,170,175,195,211,336,359,365,403,406],handl:[34,44,102,109,117,123,129,131,132,141,164,178,180,181,187,188,194,219,228,276,297,306,314,338,341,350,359,377,378,391,396,403,408],handle_cond:181,handle_condit:178,handle_dr:[178,181],handle_elt:[60,77,78,91,106],handle_head:60,handle_lambda:188,handle_neg:[188,194],handle_open:188,handle_prop:181,handle_qu:188,handle_queri:78,handle_ref:181,handle_s:[60,91],handle_vari:188,handle_word:[60,91],handler:[77,78,106,118,183,341],handsom:391,hang:396,hangul_jamo:313,hangul_syl:313,hank:53,hapax:176,happen:[57,62,227,359,403],happi:[93,194,392],happy0:93,happyyi:396,har:230,harbin:352,hard1:359,hard:[92,304,359,396],hard_delimit:319,hardcopi:408,hardi:408,hardin:145,hardli:195,hardrock:[57,105,208,403],harmon:[15,147],harr:189,harri:359,harsh:317,has_kei:[118,366],has_prior:188,hash:[118,176,334,384],hashabl:[119,141,219,227],hashtag:[57,100,339],hasn:195,hasnt:195,hat:[57,71,367],hate:391,hau:[316,322,323,324,325,326,331,398],hav:358,have:[7,24,25,28,29,33,34,36,39,46,47,49,51,52,57,62,66,67,71,81,86,88,89,91,97,98,100,102,105,109,111,115,117,118,119,127,129,131,132,133,139,141,142,144,145,146,147,153,158,159,160,166,167,168,169,170,171,172,174,175,176,179,181,187,188,189,193,194,201,203,206,214,224,226,228,293,295,302,307,313,317,321,324,326,327,333,334,336,339,340,341,345,351,352,353,358,359,360,361,363,365,366,367,374,375,377,380,382,385,387,388,394,396,398,399,403,405,407,408],haven:[195,355],havent:195,hawaii:95,hc:310,he:[57,66,71,203,312,317,321,331,332,359,363,396],head:[57,68,105,119,161,167,169,173,325,326,327,334,336,362,374,387,388],head_address:161,head_distortion_t:[325,326],head_id:[57,105],head_index:169,head_len:[57,84],head_vacancy_t:326,head_word:[57,105],header:[44,57,78,111,159,194,341,397],header_mod:[57,78],header_sect:44,headlin:[72,359],headquart:218,headword:[57,71],health:[30,355],heap:129,heapsiz:166,hear:[317,318,321,328,330],hearst:[148,396],heart:[189,359],heat:[57,71,367],heathenish:358,heating_instru:[57,71,367],heaven:359,heavi:[57,71,359,367],heb:403,hebrew:[57,101],heed:[317,318,321,328,330,332],heidelberg:137,height:[79,87,98,115,117,160,334,387,399],held:[176,405],heldout:176,heldout_fdist:176,heldoutprobdist:[176,386],helf:373,hella:195,hellip:189,hello:[115,129,312,359,361,366,386,396],help:[0,7,8,10,24,57,59,71,78,105,108,111,117,129,132,293,302,312,346,359,366,385,388,390,391,403,408],helper:[17,43,78,118,131,133,136,327,359,384,399],helpfulli:132,helsinki:[57,107],helvetica:115,hen:[167,169],henc:[46,50,57,71,327,336,363,367,381],hench:299,heptateuch:107,her:[57,71,160,359,367],herbarium:107,herd:359,here:[57,71,98,105,110,118,129,132,141,143,145,179,200,206,217,220,230,294,322,326,327,332,333,352,358,359,363,366,369,374,375,377,385,388,390,396,398,403,408],herm:408,hermia:359,hernia:359,hessian:213,het:388,heurist:[171,302,384],hh:[66,359],hi:[57,62,107,167,203,312,359,362,391,398,403,408],hiccup:359,hidden:[115,117,214],hiddenmarkovmodeltagg:214,hiddenmarkovmodeltrain:[214,386],hide:[7,8,66,115,117],hide_column:115,hideki:332,hideto:321,hierarch:[57,103,116,117,334,336],hierarchi:[57,64,117,178,307,339],higbe:145,high:[98,102,111,198,209,212,224,299,304,317,340,357,359,366],highe:145,higher:[28,33,132,134,145,159,162,164,168,170,171,174,175,317,327,333,378,405],highest:[33,35,132,167,212,214,330,344,405],highlevel:328,highli:[195,357,374],highlight:[117,334,335],highlightthick:115,higve:145,hike:359,hilfst:373,hilft:373,hill:[25,46,50,324,325,326,327,384],hillclimb:[326,327],him:[57,62,334,359,396],himmal:358,hindi:[73,408],hing:399,hip:385,hirao:332,hire:[57,71,367],histogram:333,histor:142,histori:[107,170,171,219,317,321,326,332],hit:[57,62,332,359,385,403],hllw:145,hlt94:86,hlt:[299,307,330,398],hmm:[176,209,408],hobbi:[359,386],hoc:[312,396],hockenmai:351,hof:388,hoi:359,hold:[109,115,122,164,173,185,224,227,301,310,327,359,367,374],holder:[111,374],hole:[177,408],hole_read:185,holesemant:185,holi:107,holidai:396,holist:89,home:[57,78,109,111,166,304,342,405,408],homem:385,homepag:[117,123,203,274,343],homili:107,hon:214,honest:343,honey_bunni:390,honnib:217,honoriu:107,hood:[57,66,71],hook:[25,181,187,188,254,264,292,408],hope:[57,64,367],hor:[300,396],horizont:[116,117,334,335,336],horribl:391,horzmarkov:[334,336,401],hosam:408,host:[22,109,203,408],hour:[57,71,160,358,367],hous:[57,71,316,322,323,324,325,326,331,353,388,398,405],how:[14,24,28,30,33,57,95,102,105,111,117,118,119,132,133,135,154,159,161,168,175,176,224,226,293,302,306,318,322,323,324,325,326,334,341,344,359,361,363,365,366,367,374,385,388,396,403,407],howev:[33,36,57,71,100,102,105,111,115,116,118,119,132,145,156,164,201,203,212,221,294,302,326,359,361,363,365,366,367,371,374,385,387,391,403,404],howl:359,howto:[340,359,365,370,385],hr:370,hrv:403,hs:98,hsi3:202,hsieh:94,hsiug5ct:202,hss:[57,105],ht:[98,202,215,359],htm:[72,94,107],html:[42,57,60,69,78,92,94,103,104,110,117,123,148,161,166,176,179,206,251,254,334,335,338,339,343,346,352,365,370,407,408],http:[0,10,22,38,42,53,55,57,60,62,64,67,69,71,72,74,77,78,81,83,86,88,89,92,93,94,103,104,105,107,109,110,111,115,117,123,137,142,145,148,160,161,166,176,179,200,203,206,211,213,216,217,220,221,229,251,254,259,274,282,283,301,304,311,312,313,317,318,319,321,328,330,332,334,335,336,338,339,340,341,343,344,346,348,350,351,352,353,358,359,361,365,369,370,374,377,381,385,396,403,406,407,408],hu:[57,81,88,89,104,194,408],huang:[94,214],huge:[195,333,388],hugh:321,hui:396,hum:95,human:[86,132,134,206,224,225,299,321,328,363,367,385,398,406,408],humana:385,humanli:313,humbl:394,humor:[359,386,391],hund:[365,373,403],hundr:[357,388,403],hung:388,hungarian:[57,104,206,359,394],hungarian_magyar:[57,101],hungarianstemm:206,hunpo:209,hunpostagg:215,hunt:359,hunting_dog:403,hurt:[66,224,359],husband:359,husk:202,huston:408,hut:66,hutto:195,hv:370,hvd:396,hyderabad:[73,352],hyp1:[317,318,321,332],hyp2:[317,318,321,332],hyp:[90,105,148,318,321,330,332,359],hyp_extra:37,hyp_len:[317,330],hyper:[302,403],hypergraph:335,hypernym:[57,105,343,399,403],hypernym_dist:105,hypernym_path:[105,403],hyperparamet:332,hyperspher:46,hyphen:[57,71,367],hypnoym:[57,105,403],hypo:403,hyponym:[105,403],hypothes:[2,14,159,162,168,317,318,321,330,332,333],hypothesi:[2,37,90,143,159,162,317,318,321,328,329,330,332,333,359],hypothesis1:[317,328,330],hypothesis2:[317,328,330],hypothet:148,hz:[57,101],i1:[116,334],i1i:202,i2:[116,334],i3:370,i4:370,i6:137,i:[23,24,25,28,29,30,31,33,34,35,36,37,39,43,44,52,57,58,63,65,67,70,71,73,74,81,84,86,87,89,90,92,97,98,102,104,105,106,107,109,111,115,116,118,119,123,129,141,142,143,144,147,148,159,160,162,164,166,172,174,175,176,180,183,185,194,195,198,199,202,210,212,214,218,219,220,222,227,228,294,296,298,299,302,312,316,317,318,319,320,321,322,323,324,325,326,327,331,332,334,340,341,343,344,350,351,353,354,355,356,357,358,359,361,362,363,366,367,370,372,374,380,381,384,386,388,391,395,396,397,398,399,400,401,403,405,408],iN:334,i_peg:327,ia:385,iacut:189,iad:72,ial:385,ibm1:[315,331],ibm2:315,ibm3:315,ibm4:315,ibm5:315,ibm:[284,285,286,287,288,289,322,323,324,325,326,327,408],ibm_model:[315,322,323,324,325,326],ibmmodel1:[322,327,398],ibmmodel2:[323,327],ibmmodel3:[324,327],ibmmodel4:[325,326,327],ibmmodel5:[326,327],ibmmodel:[322,323,324,325,326,327],ic:[57,105,194,344,403],icassp:137,iceland:[57,104],icfil:[57,105],ich:[324,325,326,373,396],icirc:189,icl:332,icsi:[57,71],ictaac:199,icwsm:195,id:[13,33,57,62,67,68,71,81,82,88,89,90,91,96,98,103,105,107,111,118,123,212,227,325,326,328,330,335,338,339,340,341,344,353,359,366,367,378,399],id_str:339,iddo:[184,187],idea:[43,117,123,125,133,135,209,331,367],ideal:[133,359],idealab:388,ident:[57,84,105,132,135,145,149,169,179,214,352,357,387,399,403],identif:[42,69,167,408],identifi:[13,25,26,28,32,42,47,49,51,53,55,57,59,60,61,62,63,64,67,68,69,70,71,74,78,79,80,82,83,84,85,87,90,91,92,96,97,98,103,105,106,107,109,111,115,118,161,167,178,184,189,195,204,210,294,296,302,303,306,317,331,335,343,359,363,366,387,388,403,406],idf:[38,293],idiomat:[57,71,367],idl:[18,201,343,365,370],ids_f:341,ie:[57,72,80,84,90,106,127,188,221,334,336],ie_er_99:72,ieee:214,ieer:[29,57,189,359,388],ieer_headlin:189,ieercorpusread:[57,72],ieerdocu:[72,359],ieerstr2tre:29,iesha:18,iesha_chat:20,iexcl:189,iff:[33,34,119,127,128,159,164,181,185,188,227],iff_list:188,iffexpress:[185,188],ight:399,ignor:[25,28,33,35,57,60,68,71,73,80,81,84,89,98,104,106,109,114,117,118,132,163,168,174,188,215,259,276,293,314,318,322,326,357,366,397,403],ignore_abbrev_penalti:302,ignore_cas:114,ignore_lines_startswith:[57,104],ignore_stopword:[206,394],ignore_whitespac:318,ignorecas:[298,312],ignored_word:357,ignorekei:[57,71],ignorereadmecorpusview:[57,81],igrav:189,ih0:359,ih1:359,ih:66,ihr:373,ii:[33,53,57,71,94,107,320,331,355,403,408],iii:[53,107,331],iiit:73,iit:73,iixi:53,ij:[214,216],ijcnlp:94,ill:305,illeg:[226,354,364,378,396],illegaltypeexcept:188,illinoi:[67,81,89],illustr:[57,71,102,123,154,167,359,367,388],im:[331,359],imag:[57,105,161,189,343],image_impress:359,immedi:[14,28,119,129,159],immut:[31,32,33,35,43,118,119],immutablemultiparentedtre:334,immutableparentedtre:334,immutableprobabilisticmixin:[119,176],immutableprobabilistictre:[334,399],immutabletre:[334,399],imp:[117,128,185,187,188],imp_list:188,impact:[131,330],imper:199,impexpress:[185,187,188,370],implement:[13,14,15,25,28,33,34,38,42,43,52,57,59,71,75,77,84,105,117,118,119,122,130,132,133,135,141,142,143,145,148,156,159,162,164,167,168,169,170,171,173,176,185,193,198,199,200,203,211,214,217,221,226,295,299,307,312,317,318,321,322,323,324,327,330,332,333,334,338,344,348,355,365,366,369,374,375,398,399,403,408],impli:[181,188,328,364],implic:[187,188,378],implicitli:[57,59,60,63,64,68,69,70,71,74,80,82,83,91,96,103,105,106,107,117,119,359],import_from_stdlib:129,importantli:[307,317],importorskip:380,impos:[113,118,119,359,366],imposing_oblig:[57,71],imposs:[25,366],imprensa:385,improb:378,improp:28,improv:[33,110,148,199,200,201,203,213,274,295,298,323,324,325,336,361,385,396,408],imprtre:399,imptre:399,in_chunk_pattern:28,in_demo:189,in_idl:343,in_region_domain:403,in_spite_of:300,in_strip_pattern:[28,354],in_topic_domain:403,in_usage_domain:403,inadvert:386,inaugur:[257,359,408],inaugura:359,inc:[0,301,330,406,408],incept:408,incl:408,includ:[25,28,29,30,33,36,38,46,50,53,57,59,60,71,74,77,78,79,87,91,97,100,105,106,107,110,115,117,118,119,129,132,134,169,173,176,188,189,203,214,293,299,302,305,323,324,325,326,327,330,331,333,334,336,341,359,366,367,369,374,377,385,387,388,398,399,403,407,408],include_abbrev_colloc:302,include_all_colloc:302,include_encod:[57,59],include_fileid:[57,59],include_nt:195,include_rt:341,include_semant:[16,351],include_tre:68,inclus:[60,78,91,106,224,226],incom:[167,396],incompat:[115,118,408],incompet:403,incomplet:[102,159,162,164,365],inconsist:[118,363,366,374,408],inconsistenttypehierarchyexcept:[188,378],incorpor:[25,28,57,71,179,352,408],incorporar:385,incorporatedf:[57,71],incorrect:[28,29,39,102,144,212,218,228,334,343,354,399,403],incorrectli:200,incr_download:111,increas:[46,48,102,117,129,132,135,149,159,164,176,195,201,221,320,327,332,333,357,359,374,403],incred:[195,391],increment:[117,123,129,159,162,175,176,224,302,408],incremental_stat:224,incrementalbottomupchartpars:[162,384],incrementalbottomupleftcornerchartpars:[162,384],incrementalchart:162,incrementalchartpars:162,incrementalleftcornerchartpars:[162,384],incrementaltopdownchartpars:[162,384],incub:388,ind:[378,403],inde:[358,366],indel:142,indent:[32,57,100,103,127,128,186,334,343],independ:[35,57,103,116,143,147,228,301,317,325,359,408],indetermin:386,index:[14,30,33,42,47,48,49,50,51,52,57,64,68,69,71,79,87,102,111,115,116,117,118,129,132,134,143,146,159,160,161,162,164,167,168,169,175,176,178,179,183,184,188,198,211,213,219,226,227,228,276,293,310,319,322,323,327,331,332,334,343,359,362,365,366,383,385,398,399],index_count:187,index_timeout:111,indexerror:[118,334,395,398,399],india:[73,352,403],indian:[57,98,359,408],indiancorpusread:[57,73],indiancorpusview:73,indic:[14,25,28,29,30,37,44,51,52,55,57,60,61,68,74,77,78,79,87,91,98,102,106,111,115,116,117,118,119,127,129,132,141,143,147,148,159,170,178,179,183,184,185,187,188,210,214,218,219,221,225,227,228,293,297,302,306,316,322,323,331,332,334,335,338,352,357,359,363,366,374,397,398,408],indirect:359,individu:[28,32,52,57,59,100,105,111,115,141,176,178,183,188,202,211,214,219,224,306,317,332,334,346,359,360,370,374,378,390,410],individualvariableexpress:[181,183,188,365],indp:385,induc:[119,214,336,384],induce_pcfg:[119,384],induct:408,industri:406,ineffici:211,inequ:378,inf:[123,343,377,385,400],infam:[318,321],infer:[0,57,71,177,181,185,188,347,363,364,378,382,389,408,410],inference_fixt:[229,374],infil:339,infin:189,infinit:[87,359,375,377,403],infix:[198,199],infl:387,inflat:317,inflect:[87,387,403],influenc:[132,139,333],info:[57,64,71,98,106,110,111,115,335,367],info_or_id:111,inform:[12,13,28,29,30,33,35,37,46,55,57,58,62,63,65,67,70,71,72,73,74,79,81,82,84,86,87,88,89,92,94,97,98,102,105,111,117,118,123,125,131,132,143,148,153,156,158,159,162,164,168,170,173,174,176,189,198,199,201,206,209,210,211,214,219,221,295,325,327,333,334,336,341,343,345,353,355,357,359,360,365,366,367,374,380,385,387,403,408],informatik:137,information_cont:[105,403],informchk:[123,363],infrequ:357,ing:[57,105,204,212,219,350,358,388],inher:365,inherit:[57,71,85,129,132,135,143,176,359,367],ini:[57,71],init_func:54,init_vocab:327,initi:[7,8,13,14,23,25,28,46,50,57,59,61,75,76,81,84,85,97,102,105,109,115,117,118,123,127,129,132,134,135,141,153,159,162,164,167,168,170,171,173,176,177,187,201,211,212,213,214,221,293,299,300,302,309,317,321,322,323,324,326,327,334,340,341,343,359,360,374,386,399],initial_column:111,initial_mean:[48,50],initial_tagg:[211,212],initial_valu:129,initialerror:212,initialis:[46,51,57,100,294],initialize_edge_scor:167,initialize_label:[184,187],inner:[57,76,167],innermostfunct:15,inp:361,input:[16,23,25,27,28,29,30,34,35,38,39,41,44,57,58,59,65,70,73,74,75,84,90,92,102,105,109,110,115,119,122,123,124,126,127,128,129,132,139,158,160,161,166,167,169,172,173,174,178,179,183,188,189,191,194,195,198,199,200,208,212,215,218,219,220,221,297,298,301,304,310,311,313,314,318,341,352,354,359,364,365,374,378,380,390,403],input_feat:33,input_fil:173,input_file_path:309,input_str:313,input_word:195,inputfilenam:[166,304],inquir:359,inquisit:333,inrang:188,ins_cost:148,insanomania:396,insensit:[33,57,71,200,276,293],insert:[52,57,74,115,117,118,145,148,159,164,168,178,300,399],insert_child:[116,117],insert_with_backpoint:159,insid:[57,71,109,160,168,172,180,251,297,305,336,366,396,405],insidechartpars:[168,384],insight:160,inspect:[129,179,195,352,363,365,374],inspir:[189,311],inst:[359,387],inst_var:164,instal:[55,108,109,111,176,178,293,295,339,340,359,360,393,408],install3:407,instanc:[44,53,57,59,62,68,71,79,83,87,89,92,118,119,125,127,130,131,132,133,139,158,159,160,166,167,172,176,181,183,188,193,194,202,212,214,219,221,224,226,227,228,293,294,297,302,316,319,332,338,341,353,366,374,380,387,390,391,394,396,403],instance_hypernym:105,instance_hyponym:105,instant:[6,385],instanti:[57,60,71,119,132,133,135,164,181,188,206,217,297,302,341,354,365,403],instantiate_edg:164,instantiatevarschart:[164,365],instead:[25,33,43,52,57,59,60,62,64,74,85,109,129,132,133,135,141,159,161,176,203,211,219,221,293,301,303,306,310,317,318,321,326,328,332,334,335,353,359,361,366,395,403,408],institut:[94,107,201,388,405],instr_commun:359,instruct:[108,109,117,229,340,374,385,407,408],instrument:[57,71,357,367,403],insur:[317,318,321,328,330],insurg:343,intact:202,integ:[33,67,90,102,115,118,129,141,167,176,183,194,295,296,303,306,310,319,325,326,335,341,343,359,363,365,366,381],integr:[57,71,408],integratedpro:[57,88,359],intellig:[67,89,125,198,391],inten:385,intend:[30,57,64,117,123,178,179,181,187,188,229,293,338,359,361,363,366,385,390,395],intens:[36,195,391],intent:[20,363],intentionally_affect:367,intentionally_cr:[57,71],inter:[141,408],interact:[1,25,48,49,111,116,117,129,182,293,359,365,374],intercept:176,interchang:381,interesant:350,interest:[57,71,92,105,153,224,316,317,321,332,335,350,359,363,365,391,398,408],interest_4:359,interest_5:359,interest_6:359,interfac:[10,13,25,26,30,31,33,34,39,46,47,57,69,93,94,103,105,106,111,121,122,129,132,133,139,151,156,158,159,160,164,166,167,168,172,176,178,188,196,197,209,210,211,215,216,220,227,228,273,293,296,306,308,309,315,333,338,354,355,357,359,375,406,408],interior:117,interj:349,interject:[57,71,367],interleav:359,intermedi:375,interment:381,intern:[0,28,38,39,57,67,69,71,81,88,89,102,118,156,168,188,189,195,199,302,322,323,344,347,359,360,361,367,385],internal_punctu:302,international_regex:301,international_token:[301,408],internet:301,interpformat:124,interpol:[132,133,135,137,221,317],interpolatedlanguagemodel:[132,135],interpret:[33,119,177,178,183,346,363,374,385,390,407,408],interpret_multi:178,interpret_multi_s:178,interpret_s:[178,191,390],interrog:[179,352],intersect:[117,147,226,228,320,366,369,388],interv:[141,145,226],interval_dist:145,interven:[189,334,357],intj:[57,71,367],intran:365,intransit:[359,365],intransvsg:351,intro:[57,71],introduc:[28,33,39,119,179,181,188,229,310,323,326,336,352,363,406],introduct:[107,145,406],introductori:385,intuit:332,inv:[353,365],invalid:[28,129,176,354,363,375,386,399],invari:214,inventor:[86,203],inventori:378,invers:[119,213],invert:[316,343,398],invert_dict:[343,402],invert_graph:343,invest:312,investig:[219,353,359,363,396],invoc:122,invok:[57,100,122,206,215,219,221,312,363],involv:[38,43,46,51,57,71,145,226,294,359,367],io:[359,361,369],ioannidi:343,iob:[27,29,57,68],iob_sent:[57,68],iob_word:[57,68],ioerror:[109,359],iona:145,iota:189,iowa:145,ipa:[299,307],ipi:[57,74],ipipan:57,ipipancorpusread:[57,74],ipipancorpusview:74,ipod:89,iptre:399,iquest:189,ir:[370,385],iraq:160,iraqi:160,iraqu:385,irlandes:385,irlandesa:385,iron:[57,71,367],irregular:[196,219,396],is_alpha:302,is_atom:[128,188],is_backward:13,is_binaris:119,is_cap_diff:195,is_chomsky_normal_form:119,is_cjk:313,is_complet:[14,159,162],is_defin:206,is_ellipsi:302,is_eventvar:188,is_flexible_chomsky_normal_form:119,is_forward:13,is_funct:13,is_funcvar:188,is_head_word:327,is_incomplet:[14,159,162],is_indvar:188,is_initi:302,is_instal:111,is_leftcorn:119,is_lex:119,is_nod:185,is_non_punct:302,is_nonempti:119,is_nonlex:119,is_noun:206,is_numb:302,is_primit:13,is_pronoun_funct:181,is_regex:129,is_rel:[183,390],is_stal:111,is_tautolog:127,is_unseen:33,is_var:13,is_vari:13,is_verb:206,is_writ:129,isalnum:[57,104],isalpha:[57,104],isawjohn:384,isbndb:408,isfil:361,isin:189,isinst:[57,71,366,390,399,402],islic:[384,403],islow:[57,104],isn:[57,104,169,195,209,312,360,391],isnt:195,iso2022_jp:[57,101],iso639:83,iso639_typ:83,iso63_cod:[57,83],iso:[42,57,61,69,82,101,105,109,131,189,209,215,360,385,403],iso_to_crubadan:[57,69,360],isolated_plac:[57,71,367],isort:[0,57,209,223,315,408],isozaki:332,isra:388,isri:[196,408],isristemm:201,issc:[57,104],isso:[57,104],issu:[57,71,251,274,283,301,336,343,359,374,379,386,403,408],issubset:316,issubsetof:127,issue9025:381,issuecom:377,issuperset:226,ist:[145,316,322,323,324,325,326],istanc:[57,62],istest:290,isti:93,isupp:[57,104],isvari:[181,188],ita:403,itali:388,italian:[57,104,206,304,394],italianstemm:206,item1:357,item2:357,item3:357,item4:357,item5:357,item:[46,51,52,55,57,60,71,77,78,89,91,98,102,106,107,111,117,118,119,132,133,134,137,139,141,145,148,153,154,176,184,186,189,194,195,259,334,338,341,343,357,359,361,362,365,366,378,385,386,388,390,391,394,396,399],itemcget:115,itemconfig:115,itemconfigur:115,itemgett:[57,71,367],itemid:98,iter:[14,33,36,38,43,46,49,51,52,57,98,102,105,118,130,132,133,134,136,139,157,158,159,160,162,164,165,166,167,168,169,170,171,172,174,175,180,193,213,214,217,218,227,293,294,296,303,304,305,306,313,314,319,322,323,324,325,326,327,328,334,341,343,344,385,397,403],iterate_from:[52,102,359],iteredg:[159,162],itertool:[384,386,403],iti:400,itj:349,itl:72,itman:145,its:[7,8,14,25,28,30,33,39,42,52,57,59,64,67,69,71,79,82,87,89,102,105,108,109,110,111,115,116,117,118,119,129,132,133,135,139,145,154,159,160,161,164,166,167,168,169,170,171,176,179,183,187,188,189,209,210,214,219,226,227,228,302,303,304,305,306,307,310,314,316,321,322,323,324,325,326,327,331,332,333,334,336,343,359,360,361,365,366,374,385,387,388,396,397,399,401,403,405,408],itself:[25,28,43,52,57,71,105,106,108,111,115,118,119,131,132,133,134,141,174,310,334,336,359,366,375,388,403],iuml:189,iv:[352,365,370,390],ivan:396,iwslt:320,ix:359,ixi:53,ixii:53,ixxi:53,iy1:359,iy:66,iz_th:380,j07:351,j1:202,j82:[179,352],j93:319,j:[42,57,104,115,123,125,148,159,162,164,195,201,202,214,301,316,319,322,323,324,325,326,327,331,351,359,363,367,381,384,385,386,399],j_caesar:359,j_peg:[326,327],ja:[322,323,324,325,326],jaccard:[143,380],jaccard_dist:[145,380],jack:384,jacob:408,jai:162,jakarta:352,jakobsen:408,jambon:322,jame:107,jan:[145,302,374],jane:359,jane_eyr:358,janel:205,janela:205,januari:408,japan:[81,88],japanes:[301,347,408],japanese_nihongo:[57,101],jar:[129,166,220,309],jaro:145,jaro_scor:145,jaro_sim:145,jaro_similar:145,jaro_winkler_sim:145,jaro_winkler_similar:145,jason:[321,408],java:[129,166],java_class:309,java_opt:[160,172,220,308,309],javascript:10,jbo:123,jcn:403,jcn_similar:[57,105,403],jct:353,jd:98,je:322,jean:408,jeff:321,jefferson:403,jeffrei:321,jegar:357,jeh1r:202,jenkin:408,jeraldin:145,jeremi:299,jersei:385,jespersen:[299,307],jesperson:307,jessica:160,jetai:198,jgaa:335,jh:[66,359],jhu:211,ji1d:202,ji:[57,101],jiang:[57,105,403],jiann:94,jindal:[57,67],jing:317,jj:[25,28,39,57,61,144,160,163,172,173,174,209,212,217,219,220,294,354,359,362,380,384,393,396,400,406],jjt:359,jn1d:202,jo1d:202,joakim:173,job:[367,404],jodi:[365,390],joel:[408,410],joh:365,johan:178,johan_rotti:388,johann:302,john:[115,125,145,159,160,161,162,164,184,188,209,222,332,334,348,363,364,365,366,371,372,374,378,379,382,384,387,389,390,396,403,405],johnson:[145,321],join:[55,57,59,60,63,64,68,69,70,71,74,80,82,83,91,96,102,103,105,106,107,109,111,141,159,160,163,173,206,224,227,297,302,332,334,336,352,355,359,361,362,366,368,369,385,388,394,398,403,406],joinchar:[334,336],joint:[34,214],joint_feat:33,joke:390,jon:[145,311],jonathan:[57,83],jone:[145,396],jonsafari:311,jordan:408,josef:317,joseph:408,journal:[86,145,176,198,335,359,408],journalist:385,journei:24,joven:385,joyster2012:396,jp:[57,75,101,332,408],jpeg:161,jpn:403,jr:396,jrev1t:202,jrmy:396,jscore:145,jsim2t:202,json2csv:339,json2csv_ent:339,json2csv_preprocess:194,json:[57,100,109,130,194,339,340,341,359],json_fil:194,json_tag:[211,217,219,226,227,361],jsondecod:130,jsonencod:130,jsonserializ:361,jsontag:[0,361],jsontaggeddecod:[130,361],jsontaggedencod:[130,361],ju1d:202,ju:307,judg:[307,396],judgement:359,judgment:328,juf1:202,jule:[378,390],juli:[145,408],juliu:145,jump:[160,172,217,364,403],june:[195,381,408],junki:20,junsheng:[81,88],juntar:385,junto:385,jurafski:176,juri:[55,57,219,346,359,396],jussi:408,just:[35,57,68,71,74,76,85,89,90,105,115,119,122,123,132,179,183,195,211,222,302,303,306,313,321,339,340,346,352,353,359,363,366,377,385,386,391,396,398,399,403],justif:[117,307],justifi:117,juvenil:[57,62,359],juzgado_de_instrucci:388,juzgado_de_lo_penal_n:388,k1:228,k2:228,k:[38,46,48,50,52,57,60,66,111,118,141,148,159,164,167,198,199,201,212,214,228,259,301,310,317,343,353,359,365,366,370,378,381,386,388,391,399,403],kaa:397,kabushiki:301,kahn:299,kaiser:321,kamholz:[57,83],kappa:[141,189],kappa_pairwis:141,kardinaal_van_roey_instituut:388,karg:108,karnaim:357,katakana_hangul_halfwidth:313,kath:381,kato:321,katsuhito:332,katz:365,katzen:373,kaufmann:330,kazawa:321,kbeval:[57,71],kcl:359,kdd:[81,89],ke:307,kecl:332,keep:[52,105,117,118,132,139,145,167,169,195,306,312,326,333,336,343,359,366,391,399,405],keep_token_posit:304,keep_whitespac:309,keepend:109,keh:94,kei:[32,35,36,52,57,66,71,72,82,99,105,109,110,115,117,118,119,132,134,139,144,146,149,176,179,183,184,186,189,293,300,319,334,339,340,341,343,352,353,357,359,366,367,369,370,371,377,380,385,386,387,391,397,403,408],keith:[167,321],kelab:388,kendal:332,kendall_tau:332,kenya:359,kenyan:359,kept:[151,194,228,359],kevin:332,kexw_5:408,keyboard:[7,8,115],keyedvector:369,keyerror:[52,115,117,118,183,366],keyword:[25,29,57,59,67,84,108,115,117,122,143,174,176,179,314,340,341,359,361,384],keywords_readm:[57,67],kharagpur:73,khoja:201,kibwana:388,kickback:361,kid:36,kidnap:391,kill:343,kilomet:202,kim:[365,389],kimmo:408,kin:404,kind:[14,25,57,91,132,135,156,157,159,195,294,359,388,391,396],kinda:[195,391],kindof:195,king:[357,369],kipper:[57,103],kiriyenko:388,kirkku:359,kishor:317,kiss:[195,302,390],kit:359,kivi:408,kivutha:388,kjv:359,klau:321,klein:[0,316,322,323,324,325,326,336,365,370,406,408],klingner:321,kmean:46,kmeanscluster:[46,50],kn:[57,75,386],knbc:[57,376],knbcorpusread:[57,75],knead:359,knee:66,kneel:383,knelt:383,kneser:[132,135,137,176],kneser_ney_mini_exampl:137,knesernei:137,kneserneyinterpol:[132,135],kneserneyprobdist:[176,386],know:[125,129,130,132,206,214,302,365,374,385,403],knowledg:[46,81,89,94,109,179,352,359,361,365],known:[29,30,31,33,36,39,57,59,90,102,105,111,119,132,156,157,197,214,302,336,343,357,359,398,399],known_len:52,koehn2010:398,koehn:[320,322,323,324,325,326,327,333,398],komm:373,kommen:373,kommst:373,kommt:373,kondrak:142,konrad:117,koran:403,korobov:[408,410],korpu:[57,74],kowloon:352,krasnoy:388,krikun:321,krippendorf:408,krippendorff:[141,145,259,408],krippendorffs_alpha:259,ks:385,kuang:94,kubler:173,kuchen:396,kudo:321,kulyukin:148,kumaran:73,kunnen:400,kurian:321,kw:[110,111,115,117,183],kwarg:[29,33,40,52,57,59,61,62,71,74,78,81,83,84,85,88,97,99,108,109,111,113,115,116,117,118,122,129,132,135,137,141,143,147,157,160,168,172,176,182,193,194,214,220,314,317,334,343],l123:359,l162:301,l1:[213,221,359],l2:[213,221,359],l309:313,l3:[221,359],l926:301,l:[10,33,35,57,61,66,107,117,141,145,148,185,212,213,228,294,306,319,322,323,324,325,326,327,334,358,359,363,364,365,378,381,386,388,396,397,399],la2:202,la:[163,201,350,359,385,396,398],label1:[35,145],label2:[35,145],label:[29,30,31,32,33,34,35,36,38,41,43,44,47,51,57,59,68,71,79,84,87,90,115,116,132,133,135,139,141,144,145,161,163,167,175,179,185,189,191,193,194,214,217,219,276,294,334,335,336,352,355,359,363,364,369,387,388,390,391,399,400,401,402,408],label_:115,label_config:115,label_foreground:115,label_freq:141,label_indiv:179,label_probdist:35,labeled_featureset:[32,35,38],labeled_sequ:214,labelled_sequ:214,labor:359,lack:[52,118,357,359],lacnunga:107,lacura:145,lad:107,laddish:[57,62,359],lado:385,lafferti:148,lagu:107,lahai:357,lai3:202,lakhdar:206,lamb:357,lambda:[25,57,75,105,117,181,185,188,189,293,294,333,334,343,351,357,365,370,386,390,399,403,408],lambda_abstract:[184,370],lambda_list:188,lambdaexpress:[181,188,365],lambdalog:366,lambek:408,lancast:[196,408],lancasterstemm:202,land:405,lang22:107,lang:[42,57,69,104,105,189,209,307,309,341,353,360,403],lang_cod:[57,83],lang_dependent_regex:301,lang_dist:42,lang_freq:[57,69,360],lang_independent_sub:301,lang_var:[302,396],languag:[0,42,46,57,69,73,82,86,94,104,105,133,135,137,138,139,142,167,176,179,185,200,203,206,209,214,293,295,299,301,302,304,307,309,311,316,320,322,323,324,325,326,327,331,332,333,334,336,341,343,345,352,353,359,365,370,378,385,388,394,398,403,408],language_cod:[57,83],language_model:333,language_prob:333,language_varieti:[57,82],languageindepend:319,languagemodel:[132,133,135],langvar_uid:83,lao:[57,101],lapdog:403,laplac:[132,135,176,386],laplaceprobdist:[176,386],laportea:381,laquo:189,larg:[14,25,46,102,109,176,214,224,302,359,361],large_grammar:384,larger:[29,39,102,167,175,176,214,359],largest:[57,103,136,154,403],larr:189,last:[25,52,102,115,118,145,213,226,227,228,305,314,326,334,335,338,341,351,354,355,359,360,361,366,374,375,385,386,387,390,396,397,398,399,403,408],last_dist:42,last_par:310,last_tok_seq:310,lastnam:353,lat0117:[57,101],later:[28,46,49,90,131,211,310,386,397,405],latest:[178,341,408],latex:334,latin1:[57,58,101],latin2:[57,101],latin4:[57,101],latin5:[57,101],latin:[57,101,109,189,215,343,385],latter:[90,325,326],lattic:167,latvian:[57,104],lau3:202,launch:10,lavi:328,lavrar:385,law:[42,107,343],lawrenc:214,lawyer:388,layer:[57,71],layout:[57,98],lazi:[43,52,57,60,64,77,91,131,132,160,172,217],lazili:[43,52,71,102,108,136,359],lazyconcaten:[52,71],lazycorpusload:[57,59,75,108],lazyenumer:52,lazyimport:0,lazyiteratorlist:[52,71],lazyload:[109,361],lazymap:[43,52,71,102,193],lazymodul:131,lazysubsequ:[52,359],lazyzip:52,lba3:202,lbai4i:202,lbaifi6:202,lbfg:213,lbi3:202,lc1:202,lc:[57,82,105,310,397,403],lc_all:343,lcb:[298,312],lceil:189,lch:404,lch_similar:[57,105,403],lcon:[189,388],ldquo:189,le:[57,101,189,321],leacock:[57,105,403],lead:[102,167,171,219,317,359,388,406],leader:388,leaf:[7,14,32,51,52,116,119,159,164,170,294,334,336,399,401],leaf_:116,leaf_freez:334,leaf_label:51,leaf_pattern:[334,399],leaf_treeposit:334,leafcolor:335,leafedg:[14,159,168],leafinitrul:[159,162,164],leafless:358,leaflet:408,leak:115,learn:[38,40,46,47,49,51,148,193,211,212,213,214,223,224,302,327,343,355,359,386,407],learner:224,learning_curve_output:224,learning_curve_tak:224,learnt:211,least:[14,28,33,51,57,77,102,105,106,115,119,145,157,158,159,168,170,171,175,176,202,212,227,307,328,331,359,391,403],leav:[7,14,25,29,32,51,55,57,60,64,65,102,116,119,158,159,164,170,194,196,306,314,334,335,359,364,374,388,394,399],lebanes:388,lebanon:388,lec:145,lectur:359,led:[214,410],ledger:403,lee:[62,66,194,350,365,408],leechdom:107,left:[7,8,14,15,25,28,30,102,111,115,117,119,132,144,154,159,161,164,168,170,171,173,189,200,219,224,312,326,334,335,336,343,354,358,359,365,371,384,385,388,396,399,401],left_arc:173,left_children:161,left_context_tag_pattern:28,left_edg:[14,159,164,168],left_pad_symbol:[132,343],left_sibl:[334,399],left_tag_pattern:28,leftarc:173,leftcorn:119,leftcorner_par:119,leftcornerchartpars:[159,384],leftmost:[7,169],leftward:[25,117,169],leg:403,legaci:[191,390],legal:[185,299,343],legal_frequency_threshold:299,legalcod:[57,83],legality_principl:295,legalitysyllabletoken:299,legitim:343,legomena:176,lehrbuch:[299,307],lei:350,leipzig:[299,307],leitor:385,lem:[298,312],lembr:385,lembra:385,lemburg:131,lemma:[57,71,75,77,79,87,91,103,105,146,208,359,367,383,387],lemma_count:[57,105],lemma_from_kei:[57,105,403],lemma_nam:[105,403],lemma_para:[57,77],lemma_s:[57,77],lemma_word:[57,77],lemmaid:[57,71],lemmat:[37,208,403],lemmatis:403,len:[30,33,52,57,67,71,75,89,102,129,132,139,147,160,164,167,175,176,228,310,313,317,320,330,332,334,343,349,351,353,355,356,357,358,359,362,366,367,368,369,377,384,385,386,391,399,403],lend:405,length:[14,25,33,48,49,50,51,52,57,90,102,105,115,119,136,144,145,147,148,159,162,167,168,174,176,189,201,204,214,217,219,226,293,297,299,310,313,317,319,322,323,324,325,326,330,331,333,334,341,343,356,359,366,374,390,396,397,399,403],leofric:107,leonberg:403,leoni:200,leq:185,lesk:344,lesk_sens:344,less:[28,33,52,102,132,134,139,143,144,149,168,195,211,219,228,317,357,359],let:[7,8,25,36,42,129,130,132,214,341,359,363,365,374,385,403],letter:[107,143,145,153,155,183,200,202,206,360,403,408],lev:[184,187],levant:385,level:[28,33,57,60,71,78,89,91,98,105,106,117,147,159,162,164,168,170,171,174,175,195,212,224,294,295,307,316,317,318,321,328,330,334,335,343,346,348,359,360,367,374,388,390,399,403,408],levenshtein:[145,380,408],levi:385,lex:[350,351,359,399],lex_filenum:[57,105],lex_id:[57,105],lex_sens:[57,105],lex_str:16,lexem:[57,71,367],lexic:[57,61,71,83,93,103,104,105,114,119,310,322,323,324,325,326,327,359,365,406],lexicalis:119,lexicograph:[57,71,105],lexicon:[2,12,14,57,66,71,81,89,103,179,194,195,214,300,350,408],lexicon_fil:195,lexnam:105,lexname_index:105,lexpars:172,lexunit:[57,71,367],lf:[365,370],lfg:177,lfilter:310,lfloor:189,lgtm:408,lh:[14,119,159,162,164,168,175,185,372,384],lhe:385,li:[206,307,363,408],lib2l:202,lib:[57,104,109,111],libero:110,librari:[0,30,33,38,57,100,109,129,338,339,341,406,408],librarian:388,libri:385,libro:350,licens:[14,57,59,83,94,105,110,111,159,162,164,168,365,370,408],lidston:[132,135,176,214,377,386],lidstoneprobdist:[176,386],life:[160,367],lifetim:102,lifo:[52,118],light:[118,198,199,206,274],light_emiss:359,lightn:359,lign256:137,like:[14,23,28,31,32,43,46,52,57,61,71,74,80,84,102,105,106,109,110,115,118,119,130,131,132,153,157,158,159,161,166,168,170,171,175,176,179,183,188,194,219,224,302,307,312,322,325,326,327,333,334,339,352,357,359,362,365,366,367,374,378,386,388,391,392,403],like_neg:194,likelihood:[33,43,46,47,48,51,53,119,132,143,147,176,213,302,316,317,333,357,380],likelihood_ratio:[143,357,380],likelihood_vectorspac:[48,51],liken:167,lil:356,lile:410,lillian:62,limit:[24,25,57,100,105,147,166,194,213,221,224,301,333,336,338,340,341,359,397,403],limit_by_time_demo:340,lin:[57,105,317,403],lin_similar:[57,105,403],lincoln:403,line:[10,16,18,28,29,32,44,57,58,61,65,66,68,71,73,79,81,85,86,87,89,92,94,97,100,102,103,104,109,116,117,127,128,144,159,162,164,174,176,194,198,205,213,226,227,228,293,303,306,311,312,314,317,334,335,339,340,341,343,354,358,359,374,385,397,398,403,407,408],line_color:116,line_length:152,line_separ:[57,104],line_token:306,linear:[38,166,176,187,221,359,403],linearlog:[177,370],linearlogicapplicationexcept:[187,370],linearlogicpars:187,linearsvc:38,linebreak:314,linebuff:[109,359],lines_list:391,linesearch:213,linestart:302,linetok:257,linetoken:[257,306],ling:334,linger:359,lingfil:161,linguateca:385,linguist:[25,28,67,88,94,117,141,142,148,152,167,176,200,302,322,323,324,325,326,327,359,365,406,408],link:[57,71,78,105,274,276,294,319,346,359,367,403,408],linkag:145,linspac:310,linthesauruscorpusread:[57,76],linux:406,linuxfest:408,lion:359,lionel:22,lip:307,lippincott:408,lirg:332,lisboa:385,lisp:[57,76],list2sym:189,list:[7,8,13,14,16,23,25,26,27,28,29,30,31,32,33,34,35,38,39,41,43,44,47,51,52,55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,87,88,89,90,91,92,93,95,96,97,98,100,101,102,103,104,105,106,107,109,110,111,114,115,116,117,118,119,122,123,124,125,126,127,128,129,130,132,133,134,141,142,144,145,146,147,148,149,153,154,155,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,183,184,186,187,188,189,191,193,194,195,209,210,211,212,213,214,215,217,218,219,220,221,222,224,225,226,227,228,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,309,310,311,312,313,316,317,318,319,321,322,323,324,325,326,327,328,330,331,332,333,334,335,339,340,341,343,345,346,351,352,353,354,357,361,363,364,365,367,368,369,373,374,376,377,381,382,383,384,386,387,388,391,392,396,397,399,402,403,408],list_of_list:52,list_of_refer:[317,321,330,332],listbox:115,listbox_:115,listbox_config:115,listbox_foreground:115,listdir:[359,361],listofkeyword:[57,67],lite:[82,408],liter:129,literatur:33,lithuanian_lietuviskai:[57,101],littl:[57,61,71,90,173,195,224,300,332,367,401],liu:[57,67,81,88,89,194,321],liub:[67,81,88,89],lium:301,live:[107,341,357,359,367],lively_plac:[57,71],living_t:399,living_th:[57,105,343,403],ll1:202,ll:[52,63,158,298,312,358,359,361,375,377,385,391,407],lld0:408,lm:[0,33,241,293,347],lmu:200,ln:317,lo:[359,385,388],load:[52,55,57,59,71,77,105,108,109,111,123,131,141,158,159,161,174,179,189,217,293,302,343,352,356,359,363,365,369,374,385,386,390,396,403,408],load_ace_data:27,load_ace_fil:27,load_arg:174,load_arrai:141,load_cr:342,load_earlei:364,load_fol:[123,363],load_pars:[174,352,364,365,384],load_parser_opt:158,load_po:214,load_reranker_model:158,load_word2vec_format:369,loader:[71,130],lobo:385,loc:[39,95,217,218,359,363,387,388,390],loc_:116,loc_attr:116,local:[46,50,57,85,101,102,109,111,131,189,326,327,334,338,343,346,359,363,366,374,403],local_test:229,localhost:[10,160],localtimezoneoffsetwithutc:338,locat:[7,29,57,58,59,64,68,75,77,79,85,87,97,100,101,107,108,109,116,117,118,129,168,170,174,188,189,217,220,293,325,331,339,346,352,359,374,387,388,395,404,408],location_of_sound_sourc:[57,71],lock:[25,384,387],locomia:385,locura:145,lodg:359,log:[10,33,43,57,105,133,147,176,213,214,221,302,316,319,330,333,334,346,403],log_base2:138,log_likelihood:[43,147,176,380],log_prob:[214,316],logarithm:[33,132,138,143,176],logi:176,logic:[12,109,118,122,123,125,126,129,133,135,177,178,181,183,185,187,191,224,227,294,297,347,351,361,363,364,365,366,371,382,388,389,390],logic_pars:[109,118,188,361,363,364],logicalexpressionexcept:[188,364,378],logicpars:[109,181,187,188,361,378],logist:38,logprob:[176,334],logscor:[132,133],logsumexp2:214,logx:176,lol:391,london:[218,403],londr:[385,403],longer:[57,102,105,117,151,168,215,321,331,333,335,359,361,399,403,408],longest:[57,105,119,299,327,403],longest_target_sentence_length:327,longestchartpars:[168,384],longid:[57,103,359],longitudin:405,longman:401,look:[28,38,52,57,64,71,79,87,109,130,132,135,139,219,224,226,293,297,326,327,333,354,357,358,359,363,365,366,374,385,388,398,403],lookahead:188,lookback:188,lookup:[57,76,105,132,139,184,325,326,341,403],lookup_by_userid_demo:340,lookup_uniqu:184,lookuperror:[34,109,129,374],loop:[28,331,359,385,403],loos:[179,359,401],loper:[0,365,370,406,408],lore:[359,386],lose:336,loss:[334,336],lost:36,lot:[52,57,71,300,302,363,398],lote:385,love:[160,161,177,194,348,374,378,379,384,390,391,405],low:[211,224,357,366,408],lowast:189,lower:[36,46,49,57,105,132,135,160,176,212,217,293,317,326,327,328,333,341,357,359,385],lower_date_limit:[338,341],lowercas:[57,66,188,191,200,203,274,301,302,390,396],lowest:[35,105,168,299,333,334,403],lowest_common_hypernym:[105,403,404],loz:189,lp:299,lpq:378,lrb:[160,172,298,312],lrec2014:[57,83],lrec:[57,83],lrec_skipgram:343,lrm:189,lsaquo:189,lsb:[298,312],lsquo:189,lst:[52,189,301],lstrip:311,lt:[189,313,359,408],ltext:33,lu256:[57,71],lu2:202,lu:[57,71,367],lu_bas:[57,71],lu_ids_and_nam:[57,71],lucid:408,lucorpu:[57,71],luf3:202,lufi4i:202,luid:[57,71],lukasz:321,luminos:320,lunam:[57,71],lunamepattern:[57,71],lunch:369,lv:[57,82,104],lvdmaaten:369,lwidth:14,lx:[57,99,397],ly:[212,219],lynch:160,lynx:408,lysand:359,m0:390,m1:390,m2:390,m3:390,m:[7,22,42,46,48,57,66,77,83,90,98,111,145,148,160,172,177,183,189,203,206,226,227,274,276,294,298,301,318,319,320,322,323,324,325,326,335,346,352,353,364,365,370,374,378,381,385,386,387,390,396,403,407],ma:[98,204,385],mac:[346,385,406],mac_morpho:[57,97,385],macduff:321,mace4:[123,124,363,408],mace:[121,123,126,374],macecommand:[124,126,374],macedonian:359,macherei:321,machin:[38,40,46,57,104,148,193,214,315,316,317,318,321,322,323,324,325,326,327,330,332,333,344,346,359,367,379,398,407],macintosh:408,macintyr:298,macintyrecontract:298,macmorphi:408,macmorphocorpusread:[57,97],macn001:385,macn002:385,macn003:385,macr:189,macro:[276,294,317,318,321,332],macwhinnei:353,made:[15,28,52,57,62,71,85,89,90,100,111,115,118,129,160,194,201,206,302,312,326,331,359,367,396],madnani:408,madra:352,mag:373,magahi:[57,101],magst:373,mai:[13,15,18,25,28,31,34,38,44,46,50,52,53,57,71,79,81,87,88,89,97,102,105,106,109,111,115,117,118,119,122,129,143,157,158,159,168,176,188,200,206,214,219,221,224,226,227,228,293,295,302,305,317,324,327,334,340,343,357,359,363,364,366,367,371,374,378,385,398,399,404,408],mail:345,mailto:131,main:[7,8,57,71,84,89,115,117,129,132,162,177,179,201,330,339,359,374,408],main_field:339,mainentri:[57,71],mainli:[57,62,118,306,339,359,361],mainloop:[111,113,116,117,182,343],maintain:[7,8,13,52,102,109,115,118,126,159,168,171,203,221,299,304,334,359,361,399,410],maja:318,major:[46,299,307],majorli:195,make:[28,29,32,35,44,52,57,59,60,63,64,68,69,70,71,74,78,79,80,82,83,87,91,96,98,103,105,106,107,109,116,117,118,122,124,125,126,129,132,133,134,141,145,156,170,171,179,187,188,212,219,228,293,295,302,321,330,333,340,341,343,352,354,357,359,363,365,366,367,374,385,391,395,396,401,408],make_applicationexpress:[181,188],make_booleanexpress:[181,187,188],make_equalityexpress:[181,188],make_lambdaexpress:[181,184,188],make_leaf:116,make_lex:179,make_lex_dict:195,make_mptre:399,make_negatedexpress:[181,188],make_nod:116,make_nois:[57,71],make_ptre:399,make_quanifiedexpress:188,make_testcorpu:359,make_tre:160,make_valu:[179,352],make_variableexpress:[181,184,187,188],makeel:129,mal:131,male:[36,98,359,403],mallet:408,malt:[156,161,371],malt_demo:161,malt_model:166,malt_pars:166,malt_regex_tagg:166,maltpars:[166,363,371],maltxml:161,mammal:[57,105,343,399,403],man:[53,57,105,143,167,171,180,293,336,357,362,364,368,369,372,374,378,380,382,388,389,390,403],man_x:364,manag:[116,117,215,359,363,388,407,408],manchest:[67,88],maneuv:[57,88,359,405],mang:322,mangl:48,mani:[38,43,52,57,62,66,71,105,109,115,117,118,123,132,133,154,168,175,176,201,203,211,224,226,294,302,322,323,324,325,326,336,346,357,359,366,367,385,396,403,408],manifold:369,manipul:[99,314,369,408],manner:[46,159,178,341,363,367,374,390,396],manner_speak:359,manter:385,manual:[7,8,44,57,64,71,102,107,158,219,228,276,294,313,357,367,374],map:[16,23,30,32,33,36,38,43,51,52,57,59,64,71,77,84,90,98,102,103,105,109,117,118,119,123,127,132,139,145,159,164,176,178,179,183,185,188,191,209,210,214,219,293,294,302,316,325,326,328,352,359,361,363,366,380,398],map_tag:216,mapping_msd_univers:77,mar:132,mar_tin:396,marathi:[57,73,101,408],marc:131,march:[72,353,408],marek:408,margaret:[107,408],margin:[117,119,143,144,195,322,323,334,357],marhta:145,mari:[57,71,107,125,160,161,188,222,335,348,364,371,374,378,379,382,389,390,405],mark:[29,66,77,102,117,118,129,130,132,139,148,194,221,293,300,302,305,310,326,334,343,354,366,388,396,408],mark_alls_fresh:128,mark_neg:[194,391],mark_neqs_fresh:128,marker:[28,57,74,221,314,343],market:173,markonli:117,markov:[214,221,334,336,401],markovtre:401,marm05:385,marri:[359,361,363,405],martha:145,martin:[176,203,206,274],martin_extens:203,martinez:145,martyrolog:107,marvel:[107,359],masato:408,masc:365,masculin:[198,199],masi:[141,145],masi_dist:[145,380],masiu:396,mask:[132,133,135],masquerad:359,mass:[176,204,228],massa:385,massei:145,massi:145,master:[98,115,117,152,206,259,301,311,313],mat:[317,318,321,354,379],match:[7,16,18,23,25,28,32,57,59,60,61,63,64,68,69,70,71,74,77,78,80,82,83,84,91,96,102,103,105,106,107,111,118,119,127,129,141,145,170,171,188,204,219,276,293,294,303,305,313,317,321,328,331,334,343,358,365,366,367,378,385,388,398,405],match_phone_numb:297,matchbracket:16,mateo:388,materi:[23,25,28,37,303,385],mathemat:[322,323,324,325,326,327,386,408],matild:[57,71,367],matplotlib:[176,224,369],matric:[33,38,46,48],matrix:[38,46,48,144,213,214,365],matter:[259,322,359],matthew:[145,217],matur:293,mau:385,max:[30,35,57,82,145,176,195,321,366,368,385],max_count:369,max_depth:[105,403],max_dist:141,max_fn_exampl:29,max_fp_exampl:29,max_id:341,max_it:[33,193,355],max_iter:[213,214],max_l:145,max_len:[119,132,318,321,343,377],max_linesearch:213,max_model:124,max_phrase_length:331,max_proof:374,max_rul:[212,224],max_sentence_s:39,max_tp_exampl:29,max_v:326,maxdepth:[57,105,343,402],maxent:[30,34,38,167,355],maxentclassifi:[33,34,167,193,355],maxentfeatureencodingi:[33,34],maxim:[33,165,202,299,321,327,331],maxima:327,maximis:[46,48,214],maximize_alignment_prob:323,maximize_distortion_prob:[324,325],maximize_fertility_prob:327,maximize_lexical_translation_prob:327,maximize_null_generation_prob:327,maximize_vacancy_prob:326,maximum:[25,28,29,33,39,46,48,50,57,86,105,109,124,129,132,155,165,167,168,174,176,202,213,214,221,224,293,317,318,321,326,327,333,334,335,341,343,355,359,374,403,408],maxlen:176,maxreprs:[57,71],maxwidth:[335,400],mayb:[10,24,188],mayor:396,mb:374,mccune:374,mcdonal:173,mcdonald:[167,216],mcglashan:388,mcintyr:312,mco:166,md5:111,md5_hexdigest:111,md:[98,111,160,163,362],mdash:189,me:[24,66,160,295,298,303,306,308,312,313,357,359,384,385,393,396],mean:[24,28,46,48,50,57,71,82,89,105,119,122,129,131,132,147,148,161,176,178,184,188,214,227,299,307,317,321,327,330,341,359,367,374,391,403],meander:359,meaning:[149,380],meaning_q:[57,82],meant:[202,327,354],measur:[28,29,42,50,53,143,145,147,163,189,193,194,200,211,321,385,391,403,404,408],meat:213,med:145,media:[0,195,339,406,408],median:359,medical_specialti:[57,71],medicina:107,medit:24,medium:137,meet:[169,359,394],megabyt:129,megam:[30,33,43,355,408],mehl:396,meio:385,melbourn:410,mellon:66,melt:194,melvil:[293,358],melvin:321,mem:385,member:[29,33,34,41,57,103,132,176,179,183,189,352,359,387,388,403],member_holonym:[105,403],member_meronym:105,membership:[46,48,49,50,132,139,390],membro:385,memoiz:110,memori:[38,43,52,102,115,132,189,213,221,224,228,302,316,343,359,385],men:[57,105],meno:385,mental:396,mention:[118,189,359,366,374,388,403],mentor:408,menu:[117,346],menubutton:117,mercado:359,mercer:[322,323,324,325,326,327],mere:15,merg:[25,28,46,49,51,117,154,178,300,354,359,364,366,381],merger:[57,71],mergerul:[25,28,354],mesma:385,mesmo:385,mesol:381,messag:[10,109,111,126,129,151,188,213,341,354,366,374,388,408],mesur:148,met:[202,320,400],metadata:[57,74,98,179,352,359],meteor:[328,347,408],meteor_scor:315,method0:317,method1:317,method2:317,method3:317,method4:[317,348],method5:317,method6:317,method7:317,method:[13,25,26,28,29,33,34,39,52,57,59,60,61,63,64,68,69,70,71,74,75,77,78,80,81,82,83,84,85,91,96,102,103,105,106,107,109,110,115,117,118,122,123,124,126,129,130,132,133,134,135,139,140,143,145,146,154,158,159,167,168,169,170,171,173,176,178,181,183,187,188,193,194,200,202,213,214,215,218,219,221,224,226,228,254,264,284,285,286,287,288,289,292,293,294,295,297,302,303,304,306,310,312,314,317,322,323,325,327,328,333,334,336,338,341,342,352,354,361,363,364,365,375,381,385,386,387,390,396,403,404,408],methodolog:[145,299],metric:[0,29,49,57,105,193,259,282,291,301,315,317,321,328,347,357,383,398,403,408,410],metz:107,meu:385,mexican_hairless:403,mi:195,mi_lik:[143,380],mia:[361,363,378,390],mich:373,michael:[125,145,331,344,391],michel:[57,71,110,145,367],michelesimionato:110,micro:[189,317,332,396],microblog:359,microchip:86,microsoft:73,mid:359,middl:[2,57,71,102,117,173,299,357,359,367],middot:189,midland:98,midsumm:359,midwai:[159,171],might:[30,33,108,109,159,160,164,174,316,326,327,350,358,363,366,369,388,407],mightn:195,mightnt:195,mihalcea2003:398,mihalcea:398,mike:[321,388],mikhail:[408,410],mikheev:302,mikolov:369,mil:399,mileston:408,milh:385,militari:[317,318,321,328,330,332],milk:399,miller:[57,103],million:[57,71,107,367,385],milstein:359,mim:385,mimic:105,mimic_wrap:71,min:[42,145,204,212,330,366],min_acc:[212,224],min_colloc_freq:302,min_depth:105,min_freq:[193,391],min_len:[119,318,321,343],min_ll:33,min_lldelta:33,min_prob:327,min_scor:[212,224],min_score_factor:326,min_siz:[52,359],min_stem_length:219,mind:[24,118,336,363,366],mine:[67,81,88,89,93,359],minfreq:213,ming:94,minha:385,minim:[25,118,145,153,176,302,310,330,331,335,338],minimalset:150,minimum:[52,62,105,145,153,193,204,213,224,302,318,321,341,343,357,403,408],minist:[361,385],ministro:[350,385],minnesota:92,minor:[201,206,408],minq:[81,88,89],minu:[51,118,189],minut:[57,71,313,341,359,367,396],mir:373,mirror:[57,64],miryanov:71,misalign:39,misc:[0,346,347,408],miscellan:[57,71],mishpat:357,mismatch:28,miss:[28,29,52,57,71,141,351,354,398,399],misss:29,mist:359,mistak:20,mistaken:359,mit:[299,307],mitchel:[209,408],mitr:408,mix:[2,36,57,105,118,176,334,354,359,366,391,399],mixin:[57,59],mixtur:[46,48],mizzl:359,mk:397,mk_reldict:189,mkdtemp:[359,361],mkstemp:397,mlb:115,mlc:175,mle:[132,133,135,214,377,386],mleprobdist:[176,380,386],mlu:[57,64,353],mm1:202,mm:[298,312],mn:[57,82],mobi:[293,358],moby_dick:[293,358],mod:[119,161,353],mod_address:161,modal:350,mode:[10,25,109,111,166,203,216],mode_fil:213,model2count:323,model3count:324,model4_prob_t_a_given_:325,model4count:325,model5count:326,model:[32,33,34,41,46,48,57,67,86,109,110,111,121,122,123,124,133,137,138,139,145,148,158,166,169,172,173,176,177,178,179,183,184,185,191,195,213,214,215,217,219,220,221,284,285,286,287,288,289,293,295,302,304,311,317,322,323,324,325,326,327,333,346,352,359,363,385,395,407,408],model_build:124,model_dir:158,model_fil:213,model_filenam:[44,166,220],model_found:124,model_path:172,model_tsn:369,modelbuild:[122,124,374],modelbuildercommand:[122,374],modelbuildercommanddecor:122,modelfil:173,moder:25,modern:94,modern_e_wordtyp:94,modes_of_being_with_mot:359,modesto:[95,359],modif:[52,118,201,203,206,302,336,343,366,399],modifi:[25,28,52,57,102,103,105,108,115,117,118,119,122,129,159,164,169,176,179,194,203,214,293,294,302,307,317,326,334,336,343,352,354,359,363,366,374,399],modified_precis:317,modul:[0,1,12,18,25,30,46,55,57,112,121,140,150,156,177,192,196,209,223,229,241,242,281,295,315,337,352,359,361,363,366,371,374,387,388,390,399,403,408],moeder:400,moest:388,mohammad:321,moin:408,mole:385,molina:214,moment:[57,89,318,365],mon:374,monei:[29,189,303,344,351,359,405],moni:293,monolingu:[57,104],monoton:[332,333],monstrou:358,montant:385,month:[57,64,353,408],monthli:396,moonshin:359,mor:353,more:[12,14,25,28,29,30,31,33,37,52,55,57,58,59,60,62,63,65,66,67,70,71,73,74,77,78,79,81,82,84,87,92,97,102,105,106,109,111,115,116,117,119,129,132,134,139,141,143,144,145,153,156,158,159,162,164,168,170,171,174,175,176,179,183,185,188,194,195,201,206,209,210,211,213,214,217,219,220,221,224,226,227,228,259,276,293,295,298,306,307,312,317,318,321,324,325,326,333,334,336,341,351,352,354,357,358,359,360,361,363,364,365,366,367,374,380,385,387,388,395,396,397,398,399,401,403,404,406,408],more_prompt:111,morefeatur:118,moreov:[57,71,132,367],moreovo:110,morethuent:213,morgan:330,morn:[358,406],morphem:156,morphi:[57,105,208],morphlist:[57,75],morpho:385,morpholog:[13,57,74,105,156,157,196,197,204,219,334,394,408],morphological_substitut:[57,105],morphs2str:[57,75],mortal:[374,382,389],mortgag:396,mose:[57,104,301,313,408],mosesdecod:[301,313],mosqu:388,most:[7,8,14,28,30,31,32,33,35,38,46,50,52,57,59,71,102,105,115,117,119,127,132,157,158,159,168,169,170,171,175,176,193,194,195,200,209,210,211,212,214,219,220,226,227,228,293,305,312,324,326,327,334,336,341,351,353,354,355,357,358,359,360,361,366,367,369,374,375,385,386,387,390,391,396,397,398,399,403,408],most_common:386,most_general_unif:127,most_informative_featur:[33,35],most_similar:369,mostli:[101,214,224,302],mot:[57,64,316,322,323,324,325,326,353,398],moth:359,mother:[57,64,71,353,367,404],mother_of:389,motion:[57,71,153,367],motiv:[109,211,325],mots_index:316,mountain:358,mous:[364,365],mouth:195,move:[8,98,109,115,117,171,310,327,408],move_dot_forward:[159,164],movement:117,moveto:117,movi:[62,194,391,408],movie_neg:194,movie_review:359,movimenta:385,mp:166,mpalmer:[57,103],mpcheck:399,mpdbvq:166,mpre:385,mptree:399,mr:[200,302,358,359,391],mrf:317,mrg:[359,387,406],ms:[98,388],msd:[57,77,359],msd_to_univers:77,msg:188,msi3:202,msnm:20,mst:[105,167,343],mt:[318,320,328,330,408],mte:57,mtecorpusread:[57,77,408],mtecorpusview:77,mtefileread:77,mtetagconvert:77,mteval:[301,317,330],mtevalbleu:301,mu:[189,202],much:[14,24,46,57,67,71,84,105,111,132,159,224,297,302,336,359,361,369,374,396,403],muffin:[160,295,301,303,306,308,312,313,393,396],mui3:202,muita:385,muito:385,mukden:352,mule:394,mulher:385,mult:[115,378],multex:77,multext:[57,77,359,408],multi:[31,46,57,68,71,115,117,141,144,211,300,332,334,346,358,399,408],multi_comma:311,multi_dash:311,multi_dot:311,multi_kappa:141,multiclassifieri:31,multidimension:167,multidisciplinari:408,multilin:[29,57,58,62,65,67,70,85,88,89,97,302,303,397],multilingu:[57,105,302,403],multilistbox:115,multinomialnb:38,multiparentedtre:334,multipl:[8,28,29,39,57,68,69,71,102,105,118,124,126,141,145,156,160,166,171,172,193,221,276,304,308,324,328,332,333,334,341,357,366,367,378,381,382,384,389,390,399,403],multipli:[123,143,202,317],multiword:[57,71,359,367],mung:298,munic:385,munn:408,murder:359,murthi:[67,88],museo_de_art:388,museo_de_bellas_art:388,mushroom:350,muss:396,must:[15,28,31,33,34,38,46,51,57,71,109,111,113,115,116,117,118,119,122,127,129,141,145,157,158,159,166,167,169,172,176,182,187,188,202,210,214,215,219,220,221,226,227,293,294,296,299,302,303,313,322,323,324,325,326,333,335,336,340,343,350,351,354,359,366,370,374,375,378,384,386,395,396,398,399],mustard:195,mustardse:359,mustn:195,mustnt:195,mutabl:[52,118,176,396],mutableoptionmenu:117,mutableprobdist:[176,386],mutual:[86,143,357],mwa:[57,104,408],mwa_ppdb_xxxl_fil:[57,104],mwappdbcorpusread:[57,104],mwe:295,mwetoken:300,mx1000m:[220,308],mx2g:309,mx4g:172,mxpost:312,my:[24,36,109,159,160,162,164,166,172,311,312,346,359,362,372,384,399],my_al:398,my_corpu:359,my_genesi:359,my_sent_token:359,mybg:363,mysteri:[359,386],mytext:293,mytoi:361,n0:396,n12:359,n:[32,33,35,42,43,46,49,51,57,58,60,65,66,69,70,71,77,82,86,87,89,90,91,93,97,104,105,111,117,118,119,122,129,132,134,141,142,143,152,160,161,162,165,167,168,169,171,175,176,183,185,194,208,209,211,212,219,221,227,228,253,293,294,295,298,302,303,306,311,312,313,317,318,321,324,330,334,335,336,343,344,350,351,352,353,357,359,361,363,364,365,366,367,368,369,372,377,378,380,381,384,385,386,387,388,390,392,393,396,397,399,400,403,404,405,406],n_all:[128,143,321],n_all_output:321,n_all_target:321,n_and:128,n_app:128,n_atom:128,n_compani:380,n_compon:369,n_eq:128,n_exist:128,n_iff:128,n_ii:[143,357],n_iii:143,n_iiii:143,n_iiix:143,n_iix:143,n_iixi:143,n_iixx:143,n_imp:128,n_instanc:[194,391],n_io:[143,357],n_ix:[143,357],n_ix_xi_tupl:143,n_ixi:143,n_ixii:143,n_ixix:143,n_ixx:143,n_ixxi:143,n_ixxx:143,n_match:321,n_new:380,n_new_compani:380,n_oi:[143,357],n_oo:[143,357],n_or:128,n_prop:128,n_review:[57,89,359],n_scalar:195,n_w1:380,n_w1_w2:380,n_w1_w2_w3:380,n_w1_w2_w3_w4:380,n_w1_w3:380,n_w2:380,n_w2_w3:380,n_w2_w3_w4:380,n_w3:380,n_w4:380,n_xi:[143,357],n_xii:143,n_xiii:143,n_xiix:143,n_xix:143,n_xixi:143,n_xixx:143,n_xx:[143,357],n_xxi:143,n_xxii:143,n_xxix:143,n_xxx:143,n_xxxi:143,n_xxxx:143,na2:202,na:[298,312,385],naacl:[299,307,317,398],nabla:189,nacion:385,nacionalismo:385,nada:385,nai3:202,nairobi:359,naiv:[35,36,38,141,194,352,355],naive_bay:[38,355],naivebay:[30,36,44,219,408],naivebayesclassifi:[35,36,167,193,219,355,391],naivebayesclassifiertest:266,naivebayesdependencyscor:167,name:[25,27,28,29,30,32,33,36,37,39,43,47,55,57,59,60,61,63,64,68,69,70,71,72,74,79,80,82,83,84,87,91,96,98,103,105,106,107,108,109,110,111,115,117,118,119,122,123,125,129,131,141,145,159,161,164,166,173,176,178,179,181,184,187,188,189,191,206,210,211,220,230,257,276,293,294,295,302,312,314,339,340,341,352,353,359,361,363,364,365,366,367,374,378,384,385,387,390,395,396,403,406,408],name_or_path:118,name_pattern:129,named_ent:25,namedtemporaryfil:173,nameerror:374,names_demo:[41,43],names_demo_featur:43,namespac:[77,78,131],nand:397,nanswer:359,naquel:385,narad:408,nasiriya:160,natcorp:60,nathan:[57,61,359,408],nation:[57,60,67,78,89,209,388,408],natur:[0,46,66,94,179,185,221,336,345,352,359,378,408],natural_language_processing_with_python:408,navaho:[57,101],navaho_din:[57,101],navajo:[57,101],navig:359,nb:[38,145,209,213,366,375,406,408],nba:359,nbest:357,nbp:[57,104],nbsp:189,nc:[94,176,216,359],ncheirass:385,nck:[141,343],ncollect:111,ncomo:385,nd:200,ndash:189,ndiciou:385,ndirectori:111,ndito:385,ndownload:111,ndp:167,ne2:202,ne:[37,57,68,91,117,189,359,365,370],ne_chunk:[25,189,406],ne_chunk_s:25,ne_label:189,nearbi:211,nearest:115,necess:385,necessari:[13,33,57,78,79,87,111,117,132,167,169,188,294,295,299,307,312,317,346,359,407],necessarili:[57,105,219,326,334,403],nechunkpars:27,nechunkparsertagg:27,ned:[388,396],nee0:202,need:[14,28,30,33,39,43,46,52,57,64,71,78,89,102,105,108,109,111,115,117,119,132,134,141,145,159,167,176,179,187,203,213,214,215,220,226,294,295,302,312,313,319,332,334,336,339,346,358,359,364,365,366,367,374,381,386,390,398,399,407,408],needn:195,neednt:195,neg:[29,33,52,57,62,81,89,93,109,115,117,129,160,167,194,195,219,302,325,326,333,359,363,365,369,391,399],neg_scor:[57,93,392],negat:[117,122,188,194,195,363,374,378,391],negatedexpress:[181,185,188],negative300:369,negative_tweet:359,neglect:359,negr:205,negro:205,negscor:[93,392],nei:[132,135,137,176,398],neighbor:[57,71,320,326,327],neighborhood:228,neither:[57,64,119,188,195,391],nek:397,nem:385,nemo:[1,6],nemo_app:1,nenhuma:385,neot:107,nep:334,neq:188,neq_list:188,ner:[39,160,218,408],nertagg:218,nervou:293,ness:[202,212,219,385],nessa:385,nest:[28,32,77,78,106,118,183,202,305,314,334,343,354,366,382,396,399],nestado:385,net:[53,176,212,408],netherland:[57,71,410],network:340,networkx:161,neu:391,neural:[132,172,321,408],neurophysiolog:311,neutral:[194,385],nevada:201,neve:385,never:[35,43,102,117,159,195,203,334,335,343,359,391,399],new_assumpt:122,new_edg:159,new_end:[159,164],new_id:13,new_index:167,new_model:369,new_nod:167,new_p:175,new_prop:125,new_sig:125,new_token_pad:119,new_tre:175,new_var:118,new_weight:33,new_wrapp:110,newchild:[116,117],newclassnam:129,newcom:408,newfoundland:403,newli:[115,187,399],newlin:[109,174,215,302,303,306,314,397],newnod:399,newspap:385,newswir:72,newton:33,newvar:[181,188],next:[7,8,16,50,102,109,111,117,159,160,162,170,179,182,187,188,314,319,332,346,352,359,385,388],next_src_phrase_span:333,next_with_bind:164,nextcategori:16,nextsym:[14,159,162],nf:33,nfarrai:33,nfmap:33,nftranspos:33,ng:[66,332,359,385],ngram1:[57,76],ngram2:[57,76],ngram:[53,57,76,132,133,134,135,136,137,143,193,317,318,330,332,343,357,365,370,377,385,395,408],ngram_count:[132,134],ngram_fd:357,ngram_ord:377,ngram_text:[132,134],ngramassocmeasur:[143,193],ngramcount:[132,133,134,135,137],ngrammodelvocabularytest:246,ngrams_fn:[132,133,135],ngramtagg:219,nhaa:334,ni:189,nice:[14,110,132,227],niceee:396,nicer:129,nichleson:145,nichulson:145,nicodemu:107,niemand:333,night:[359,403],nik:141,nil:125,nilsson:125,nin:[111,160,295,303,306,308,312,313,393,396],nine:408,nineteen:353,nineteeth:89,nishant:321,nist:[72,98,291,295,317,330,408],nist_length_penalti:330,nist_scor:315,nisttoken:301,nit:359,nitin:[67,408],nivr:[161,173],njindal:67,nk:141,nkeyboard:111,nkjp:[57,408],nkjpcorpus_header_view:78,nkjpcorpus_morph_view:78,nkjpcorpus_segmentation_view:78,nkjpcorpus_text_view:78,nkjpcorpusread:[57,78],nl:[42,57,104],nld:403,nlg:408,nline:397,nlp:[38,53,90,137,160,172,199,214,220,365,370,380,406,408],nltk2:105,nltk3:[105,408],nltk:[348,350,351,352,353,354,355,356,357,358,359,360,362,363,364,365,366,367,368,369,370,371,372,373,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,401,402,403,404,405,408],nltk_contrib:46,nltk_data:[55,57,64,78,85,101,109,111,328,346,353,359,360],nltk_data_dir:64,nltk_data_subdir:108,nltk_extens:203,nltk_org:340,nltkdemo18:211,nltkdemo18plu:211,nltkdestructivewordtoken:298,nltkdrtboxerdrsinterpret:178,nltkwordtoken:298,nmod:[160,163,172,362],nmodel:111,nmore:397,nmtpy:301,nn1:[57,71,202,349],nn:[25,28,39,57,61,144,160,163,172,173,174,209,212,215,216,217,218,219,220,222,227,294,354,359,362,363,370,371,380,384,387,393,396,401,403,406],nno:403,nnp:[39,160,163,209,222,334,359,362,370,371,384,387,395,401,406],noam:152,nob:403,noblest:359,nobodi:333,nocturna:385,node:[7,28,29,32,51,57,80,84,91,105,106,107,116,119,159,160,161,164,166,167,168,169,170,175,180,184,185,191,276,294,334,335,336,343,359,362,385,387,388,399,401,403],node_:116,node_address:161,node_attr:116,node_index:[161,167],node_pattern:[334,399],nodecolor:335,nodecoord:335,nodedist:[335,399,400],nodelist:161,nodesep:334,noi3:202,nois4j:202,noise_mak:[57,71],noit:385,noix4ct:202,nom:[363,365,384,390],nombank:[57,267,359],nombank_ptb:359,nombankchaintreepoint:79,nombankcorpusread:[57,79],nombankdemo:267,nombankinst:[57,79],nombankpoint:79,nombanksplittreepoint:79,nombanktreepoint:79,nomfil:[57,79],nomin:[44,396],non:[25,26,28,33,48,49,53,57,58,59,60,61,63,64,65,67,68,69,70,71,73,74,77,78,80,81,82,83,84,85,91,92,96,97,102,103,105,106,107,108,116,117,118,119,129,131,160,161,167,168,174,177,182,183,188,214,226,228,293,301,302,303,305,312,314,325,326,328,334,335,336,359,363,365,373,377,382,390,397,399,403],non_break:311,non_head_distortion_t:[325,326],non_head_vacancy_t:326,nonascii:301,nonbreak:[57,104],nonbreaking_prefix:[57,104],nonbreakingprefixescorpusread:[57,104],noncommerci:[94,365,370],none:[13,14,16,23,28,29,32,33,34,35,37,41,43,44,48,49,50,51,52,57,58,59,60,61,62,63,64,65,67,68,69,70,71,72,73,74,76,77,78,79,80,81,82,83,84,85,87,88,89,90,91,92,93,95,96,97,98,99,100,102,103,104,105,106,107,109,111,113,115,116,117,118,119,120,122,123,124,126,127,128,129,131,132,133,134,135,139,141,144,147,148,153,157,158,159,160,161,162,164,165,166,167,168,170,171,172,174,175,176,178,180,181,182,183,184,185,186,187,188,189,190,193,194,195,202,209,211,212,213,214,215,217,219,220,221,222,224,226,227,228,293,294,300,302,303,308,309,310,314,316,317,322,323,324,325,326,327,329,334,335,336,338,341,342,343,344,346,355,356,359,362,365,366,373,374,382,384,387,389,390,396,397,398,399,403],nonequ:[118,366],nonexecut:[163,362],nonlex:[119,209],nonmonoton:[121,347],nonmonotonic_fixt:[229,382],nonneg:[176,343],nonproject:167,nonprojective_conll_parse_demo:167,nonprojectivedependencypars:[156,362],nonterm_pars:119,nontermin:[14,28,113,119,159,164,165,168,365,384],nonvehicl:359,nonverbal_express:359,nonzero:[33,129,176],noosa:390,nope:195,nor:[188,195,391],norm:[90,198,199,201,336],norm_cdf:319,norm_logsf:319,normal0117:[57,101],normal:[28,35,57,59,60,61,63,64,68,69,70,71,74,80,82,83,90,91,96,97,103,105,106,107,119,176,188,190,195,198,199,201,217,219,293,310,317,319,321,332,334,335,336,343,353,359,363,364,370,371,372,378,382,397,401,403],normalis:[46,48,49,50,51,214],normpath:359,norouzi:321,nort:385,north:98,northern:98,northumbra:107,northwest:[384,408],norwegian:[206,394],norwegianstemm:206,nos:385,nosub_pars:350,not_found:227,not_instal:111,not_list:188,notabl:141,notat:[94,132,134,141,183,322,323,324,325,326,334,365,366],note1:212,note:[8,15,25,28,33,39,44,46,53,57,59,71,77,78,81,89,96,98,102,105,106,109,115,116,117,118,119,132,133,134,135,139,141,142,144,152,156,159,163,164,171,173,176,193,203,206,209,211,218,219,221,224,226,228,302,303,307,312,313,321,322,326,330,332,334,341,350,359,361,363,365,366,367,384,386,387,399,403],notequ:117,noth:[25,28,57,103,117,122,170,171,195,227,359,363,397],nothman:[408,410],notic:[57,71,132,385],notimplementederror:[109,129,354,355,375],notin:189,notion:[179,352],notsubset:117,noun1:[57,86,359],noun2:[57,86,359],noun:[25,26,28,57,71,77,79,86,87,105,153,208,209,212,213,216,219,325,354,357,359,367,370,384,385,399,400,403,404],nounsfil:[57,79],nov:[163,212,362],novel:198,novemb:408,now:[57,105,132,228,321,357,358,359,361,363,365,366,370,375,384,385,386,391,396,399,403,408],nowher:[57,71,195,367],np:[25,28,29,39,57,61,86,119,160,171,172,209,218,219,294,334,335,350,351,352,354,359,361,363,365,366,368,369,372,378,384,385,387,390,393,396,399,400,401,408],npp:167,nprop:385,nps_chat:57,npschatcorpusread:[57,80],nr:[176,219,384,396,400],nr_iter:217,nr_tree:384,ns:[77,353],nschneid:[57,61],nsp:53,nsub:189,nsubj:[160,172],nt1:384,nt2:384,nth:317,nthank:[160,295,303,306,308,312,313,393,396],nthat:111,nthe:111,nthere:358,nti:[57,71],ntild:189,ntop:161,ntt:332,ntu:[57,105],ntwo:[160,295,303,306,308,312,396],nu:189,nueva:350,null_binary_rel:390,nullableinttoken:178,nullifi:195,num:[57,60,71,77,91,95,123,124,126,179,183,201,212,216,293,352,353,357,359,363,365,367,384,385,390,393,399],num_clust:[47,48,49,50],num_edg:[14,159,162,164,168,384],num_increasing_pair:332,num_leav:159,num_mean:50,num_memori:213,num_possible_pair:332,num_sent:[214,224],num_word:133,numa:385,number:[14,23,25,28,29,30,31,33,38,43,46,47,48,49,50,51,52,53,57,64,71,79,83,87,90,98,102,104,105,109,111,115,117,118,122,123,124,126,129,132,134,135,139,141,143,144,145,147,153,155,159,160,161,162,164,165,167,168,169,170,171,173,174,175,176,179,183,189,193,194,211,212,213,214,216,217,219,221,224,293,297,302,310,311,316,317,320,321,322,323,324,325,326,327,328,331,332,333,334,335,336,338,341,343,352,353,357,359,363,365,366,367,368,373,374,381,384,387,397,398,403,405,408],number_possible_pair:332,number_regex:301,numedg:14,numer:[29,33,35,38,44,137,141,143,295,317,359],nummod:160,numpars:[159,162,384],numpi:[34,41,46,48,310,369,380,407,408],nw:[117,318],nwhen:111,nwt:72,nx:[161,359],nx_graph:161,ny:[160,218,220],nyt_19980315:[72,359,388],nyt_19980403:[72,359],nyt_19980407:[72,359],o12:107,o1:107,o23:107,o24:107,o2:107,o34:107,o3:107,o4:107,o:[0,39,87,130,160,184,187,212,214,218,220,224,274,366,370,380,381,385,386,388,403,406,408],o_k:214,o_t:214,oacut:189,oana:[57,77,359],oat:66,oath:359,oauth:[341,342],oauth_token:[341,342],oauth_token_secret:[341,342],oauth_vers:341,obei:[317,318,321,328,330,332],obj:[57,62,130,163,173,179,189,211,217,219,226,227,353,359,361,362,364,365,370,391],obj_doc:391,obj_scor:[57,93,392],objcas:365,objclass:[189,388],object:[13,15,16,23,26,28,29,31,33,35,37,38,40,42,43,44,46,47,51,52,57,58,59,67,68,70,71,72,77,78,79,82,85,86,87,89,90,92,93,96,97,98,100,102,105,106,108,109,110,111,113,115,116,117,118,119,122,123,125,126,127,128,129,130,131,132,133,134,135,137,139,141,143,144,146,153,157,158,159,160,161,162,163,164,166,167,168,169,173,174,176,178,179,180,181,182,183,184,185,187,188,189,193,194,195,197,208,210,212,213,214,215,217,226,227,228,257,293,297,298,302,310,313,314,316,317,319,321,322,323,324,325,326,327,328,333,334,335,338,339,340,341,342,343,344,352,356,361,363,365,366,367,374,375,378,387,388,391,397,399,403,405,408],objsym:[189,388],objtext:[189,388],oblig:359,obra:385,observ:[81,88,141,176,214,325,385],obtain:[46,52,57,60,71,78,98,105,118,127,188,194,304,322,323,325,326,327,334,340,359,367,391],occasion:195,occupi:[326,388],occur:[30,33,37,46,50,57,105,117,129,132,139,144,153,159,176,189,199,219,222,293,302,305,307,322,334,344,357,397,399,403,405],occur_index:178,occurr:[13,28,52,53,118,119,143,178,181,188,189,193,222,293,310,313,330,334,358,359,365],ocean:179,och2000:398,och:[317,398],ocirc:189,oct:321,octob:[396,408],odd:[66,310],ody1ysutd7o:137,oehrl:[299,307],oelig:189,oepen:304,oeuvr:[300,396],of2ss:[57,105],off:[36,57,62,74,88,144,176,194,203,209,302,312,313,340,359,363,371,374,384,385,396,408],offens:[367,391],offer:[341,357],offic:359,offici:[200,330,332,343],offset:[52,57,98,102,105,109,293,296,303,306,312,313,319,338,359,369,403],often:[30,46,50,53,117,119,132,141,159,161,171,176,211,214,224,293,302,357,358,359,366,398],oftentim:358,ograv:189,oh:[311,353],ohio:311,oi:[146,383],oil:[90,359],oili:359,ok:[126,375,397,399],okai:353,oken:[159,168],old:[57,107,128,163,207,343,353,359,362,396,403,408],old_dir:361,old_p:175,oldchild:[116,117],oldclassnam:129,older:[105,109,309,366,407],oldi:[179,352],oleomargar:381,olhar:385,olho:385,olin:189,omega:189,omicron:189,omit:[97,98,161,179,334,341,352],omw:[57,105],omw_read:[57,105],on_and_on:399,on_error:341,on_finish:[338,341],on_success:341,onc:[28,33,39,102,105,108,109,118,132,159,176,194,333,334,357,359,363,366,403],ond:385,one:[8,14,25,28,29,30,31,33,34,35,36,46,49,50,52,53,55,57,59,62,63,64,65,71,73,77,78,79,80,84,87,97,98,102,104,105,106,111,115,117,118,119,122,123,126,129,132,133,135,139,141,145,149,153,154,156,157,159,160,162,164,166,168,169,171,173,174,175,176,179,183,184,186,188,189,193,202,203,209,211,214,219,224,226,227,228,293,300,302,307,311,317,322,323,324,325,326,327,331,332,333,334,336,339,341,346,352,358,359,361,363,370,371,375,378,384,385,387,391,397,398,399,401,403,404,408],one_spac:311,one_tag:[57,74],ones:[25,36,98,168,211,324,380],ong:358,ongo:[123,343],onkrippendorffalpha:259,onli:[8,25,28,33,36,39,46,49,52,53,57,62,71,80,84,89,96,98,100,102,103,105,106,109,111,116,117,118,119,123,132,134,139,144,149,159,162,167,169,170,171,173,174,176,178,183,193,194,196,200,203,212,214,219,224,226,228,259,294,299,302,303,306,311,312,317,318,321,324,326,327,331,334,335,340,341,343,357,359,363,366,369,370,374,377,382,384,387,394,395,397,398,399,401,403],onlin:[0,151,199,398,406,408],onset:299,ontem:385,onto:294,onward:302,onyx:381,oov:[91,132,133,135],op:358,open:[0,10,57,59,64,102,105,109,111,116,117,122,129,141,183,187,188,205,215,293,305,307,314,334,346,359,361,363,366,369,375,385,388,403,406],open_bracket:181,open_punct:[311,396],open_punct_r:311,open_punctu:[57,104],open_str:314,openondemandzipfil:109,oper:[7,8,13,16,28,29,39,46,109,117,118,119,145,148,159,164,167,169,170,171,173,176,178,180,181,185,187,188,276,294,295,312,355,359,364,367,378,381,384,386,388,398,399,408],opinion:[57,67,81,88,89,93,194,359],opinion_lexicon:57,opinionlexiconcorpusread:[57,81,359],oplu:189,opportun:408,oppos:188,opposit:[119,122,149,357],opt:109,optim:[34,119,142,159,164,213,214,332,403],optimis:214,option:[10,33,34,39,44,51,57,65,67,71,74,77,89,105,117,129,131,132,133,134,135,139,145,158,160,166,168,173,174,176,179,183,193,203,208,213,214,215,218,219,220,224,226,293,297,304,306,308,309,317,318,322,323,324,325,326,327,334,335,341,343,346,359,363,366,367,374,399,403,407,408],or_list:188,orang:317,orchestra:359,order:[14,28,29,30,33,34,35,46,49,51,52,53,57,61,71,81,82,84,90,102,105,109,111,115,117,118,119,123,127,128,129,132,133,134,135,136,137,139,141,142,143,144,145,147,151,154,159,167,168,175,176,177,178,179,183,184,185,188,189,191,198,201,211,214,219,221,224,293,297,314,316,317,318,321,322,323,325,327,330,332,333,334,335,336,341,343,352,359,361,363,364,365,366,370,374,378,380,385,386,390,391,393,395,399,405,408],ordereddict:52,ordf:189,ordin:302,ordinari:37,ordm:189,oreilli:408,orelha:385,orexpress:[181,185,188],org:[0,22,38,55,57,64,77,83,92,94,104,109,130,145,166,179,189,203,206,213,216,218,221,251,254,274,282,304,313,317,318,319,321,330,332,336,338,339,341,343,344,346,348,351,352,353,359,365,370,381,385,388,406,407,408],organ:[29,57,92,105,117,156,160,168,189,220,343,359,388,399,403,404],organismo:385,orient:[98,116],origin:[57,64,90,98,104,105,110,117,119,146,161,162,167,194,199,200,201,203,206,207,211,227,282,297,298,301,302,310,312,313,317,318,320,325,332,334,335,336,344,353,374,387,394,401,403,406,408],original_algorithm:203,original_best_arc:167,original_length:310,original_tag:227,original_text:117,oriol:321,ork:358,ormiston:408,orn:98,orosiu:107,orphan:359,orth:335,ortho_context:302,ortho_thresh:302,orthograph:302,orwel:359,os:[57,100,141,173,205,359,361,382,385,397,406],osborn:320,oserror:[129,160],oslash:189,ostara:381,ostrich:[125,382],ota:[57,60,107],other:[7,10,13,15,33,36,51,53,57,61,67,71,75,76,79,87,91,98,102,103,105,108,111,118,122,127,129,132,139,145,156,157,158,159,161,164,167,169,176,179,181,183,184,187,188,198,200,203,212,214,216,217,219,226,227,228,253,293,294,295,299,301,302,303,305,307,311,317,320,322,323,324,325,326,327,334,339,350,354,357,366,367,374,375,380,385,391,403,408],other_co:359,other_indic:187,other_typ:188,otherwai:396,otherwis:[7,33,34,39,52,57,60,64,76,102,109,111,115,116,117,118,122,129,132,134,139,161,170,171,176,182,187,188,214,226,227,302,314,341,346,357,359,384,386,399],othr:141,otild:189,otim:189,otto:[299,307],ou:385,ouamour:198,ought:66,oughtn:195,oughtnt:195,ouml:189,ounc:160,our:[132,193,230,304,321,354,357,359,365,370,374,385,386,391,396,398,399,408],ouro:385,out:[42,51,52,57,61,102,106,107,109,111,118,123,132,133,145,154,160,176,179,180,185,189,200,206,228,276,294,303,306,310,316,326,327,334,340,352,358,359,361,365,369,385,388,391,399],out_fil:369,outcom:[176,385],outdat:334,outdoor:358,outer:167,outerspac:181,outfil:[44,194,339],outlin:[116,117,167],output:[28,29,33,34,41,44,46,50,57,71,105,118,124,129,132,142,145,159,160,162,163,164,166,168,170,171,174,175,178,179,189,193,194,212,213,214,221,224,226,228,274,283,294,298,302,304,310,313,318,320,321,325,326,327,330,331,332,333,335,339,340,341,343,350,352,354,357,359,361,362,366,369,388,391,403],output_fil:102,output_format:[126,374],output_markdown:194,outputfilenam:166,outra:385,outro:385,outsid:[327,331,336,398,405],oval:117,ovalwidget:117,over:[2,25,28,31,33,35,36,38,39,46,47,52,57,58,59,60,63,64,65,68,69,70,71,73,74,80,81,82,83,84,91,92,96,97,102,103,105,106,107,110,116,132,134,136,139,141,147,148,159,160,162,164,169,172,179,188,198,213,214,217,218,221,293,295,318,322,323,328,331,354,358,359,366,384,385,396,397,398,403,406],overal:[2,29,317],overflow:176,overgedaan:388,overhead:43,overheard:359,overlap:[25,26,28,37,118,226,317,318,330,335,344,366,405],overload:378,overrid:[28,57,59,85,111,122,129,131,176,302,327,334,359,361,386],overridden:[57,60,61,78,91,106,111,117,129,178,181,187,188,219,302],overs:357,overstem:146,overt:[57,71],overtli:[57,71],overview:[339,340],overweigh:145,overwhelm:359,overwrit:361,ow0:359,ow2:359,ow:[66,202],own:[35,57,71,90,100,108,109,117,118,228,305,334,359,363,367,387,388,394,399],ox2:107,ox:[57,60],oxford:107,oy:66,p02:[282,317,318],p03:336,p0:[324,325,326],p103:176,p108:[184,187],p12:304,p1:[324,325,326,332,374,382,384,389],p1y9m21d:353,p1y9m28d:353,p2:[374,382,384,389],p2y1m12d:353,p2y1m23d:353,p2y1m3d:353,p3:[382,389],p4:[382,389],p5:[57,77,382],p:[7,8,10,35,36,57,66,77,87,89,105,107,109,118,119,125,127,132,139,142,145,147,163,168,175,176,179,180,188,194,214,216,221,227,297,299,304,320,327,334,351,352,354,355,359,361,362,364,365,366,368,369,370,372,374,377,378,381,382,383,384,385,386,387,389,390,393,396,398,399,403],p_factor:145,p_n:317,pa:397,pablo:350,pabo:62,pack:[115,117,403],packag:[34,38,44,53,105,106,108,109,124,126,161,174,302,334,343,346,355,357,359,374,378,380,407,408,410],package_id:111,packt:408,pad:[119,132,133,135,136,227,312,343,377],pad_both_end:[132,136],pad_fn:[132,133,135],pad_left:[132,343],pad_right:[132,343],pad_sequ:[132,343],paddan:357,padded_bigram:132,padded_everygram:136,padded_everygram_pipelin:[132,136,377],pagam:385,pagamento:385,page:[111,137,173,203,304,346,351,408],pagin:338,pai:[90,205,359,385,391],paic:[140,198,202,347],pain:359,pair:[23,32,33,34,35,41,46,49,52,57,71,74,90,104,105,117,118,119,123,125,141,145,148,158,159,171,176,177,179,183,189,191,219,227,228,302,316,317,319,322,323,326,327,328,331,332,343,352,354,359,366,367,376,380,388],pair_count:380,pairwis:343,paisagem:[205,385],pajama:[166,362],palavra:385,pale:293,palmellacea:381,palmer:[57,105,403],pan:[57,74,385],pang:[62,194,408],panlex:[57,82,83,408],panlex_lit:57,panlex_swadesh:57,panlex_uid:83,panlexlanguag:83,panlexlitecorpusread:[57,82],panlexswadeshcorpusread:[57,83],panlingu:[57,83],pant:403,pap:385,paper:[62,67,81,86,88,89,141,145,152,167,200,203,211,282,317,318,332,357,408],papineni:317,par_count:310,para:[55,57,59,63,65,74,77,84,85,97,107,189,359,385],para_block_read:[57,65,70,85,97,101],para_path:77,paradigm:408,paragno:[57,71],paragraph:[57,62,63,65,67,71,77,85,88,89,97,101,301,310,319,359,391],parallel:[57,68,122,322,323,324,325,326,327,374],parallel_corpu:[322,323,324,325,326],parallelize_preprocess:343,parallelproverbuild:122,parallelproverbuildercommand:122,param:[33,51,57,71,98,105,119,122,123,127,129,132,133,135,146,148,158,161,166,167,173,178,179,181,183,184,188,189,193,213,217,221,224,226,227,293,302,317,319,341,396],paramater:35,paramet:[13,14,23,26,28,29,30,32,33,34,35,36,37,38,41,43,44,46,47,48,49,50,51,52,57,58,59,60,61,62,63,64,65,67,68,69,70,71,73,74,76,77,78,79,80,81,82,83,84,85,87,88,89,90,91,92,95,96,97,98,100,101,102,103,105,106,107,108,109,114,115,116,117,118,119,122,123,124,125,126,127,128,129,132,133,134,135,136,137,139,142,144,145,146,147,148,153,155,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,183,184,187,188,189,191,193,194,195,197,198,199,200,203,204,205,206,207,208,209,210,211,212,213,214,215,217,218,219,221,222,224,225,226,227,228,293,294,295,297,299,300,302,303,304,305,306,307,310,312,313,314,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,338,339,341,342,343,344,359,361,363,364,390,396,399,403],parameter:[33,176,228],paramfil:41,paraphras:[57,104],paras_mod:74,parastart:302,parecia:385,paren:[28,187,188,305,334,359],parens_bracket:[298,312],parent:[57,60,71,78,91,106,113,115,116,117,119,129,169,176,276,294,334,335,336,353,367],parent_index:[334,399],parent_indic:[334,399],parent_of:389,parentchar:[334,336],parentedtre:[294,334],parenthas:117,parenthasi:117,parenthes:[23,25,28,118,293,303,305,312,358,365,366,396],parenthesi:[25,57,61,302,305,359],parenwidget:117,paresd:118,pari:[179,365,369],park:368,parliament:359,pars:[0,2,7,8,14,16,25,26,27,28,29,57,59,61,65,68,75,77,79,87,94,97,105,107,109,118,119,123,126,178,179,180,183,184,185,188,191,193,194,276,294,304,314,319,334,336,343,347,350,351,352,354,361,363,368,371,373,374,385,387,390,406,408,410],parse_al:157,parse_condit:178,parse_corpu:[57,79,87],parse_dog:160,parse_dr:178,parse_error:364,parse_fileid_xform:[57,79,87],parse_fox:160,parse_friend:160,parse_index:178,parse_john:160,parse_mari:160,parse_megam_weight:34,parse_on:[157,166],parse_output:220,parse_repp_output:304,parse_s:[157,160,166,172,191,390],parse_sexpr_block:359,parse_tadm_weight:41,parse_tagged_s:166,parse_text:160,parse_to_compil:184,parse_to_mean:[184,371],parse_to_read:123,parse_token_stream:319,parse_tweets_set:194,parse_vari:178,parse_weka_distribut:44,parse_weka_output:44,parse_with_bindop:[180,390],parse_wolf:160,parseappl:16,parsed_doc:[57,72,359,388],parsed_docu:[57,68],parsed_expr:390,parsed_para:[55,57,61,359],parsed_s:[55,57,59,61,68,70,107,163,359,362,384,385,387,406],parsed_word:[57,61],parseprimitivecategori:16,parser:[1,2,3,7,8,14,16,25,26,27,28,29,109,118,119,123,156,157,158,159,160,162,163,164,166,167,168,169,170,171,172,173,174,175,178,181,187,188,214,304,350,351,354,363,365,366,370,408],parser_annot:160,parser_arg:[159,162,164],parser_dirnam:166,parser_eag:173,parser_model:158,parser_opt:158,parser_std:173,parseri:[14,26,156,157,158,159,160,166,168,170,171,172,173,175],parserul:202,parsesubscript:16,parsimoni:195,parsnip:350,part:[1,28,55,57,64,68,71,74,77,79,87,91,97,103,105,115,117,132,133,139,153,159,162,169,179,189,196,206,208,209,210,214,217,219,221,293,294,302,310,325,333,334,336,344,353,359,366,367,369,386,391,397,403,408],part_holonym:105,part_meronym:105,parti:[317,318,321,328,330,332,341,374,408],partial:[2,14,36,57,71,102,111,118,145,159,164,168,170,173,214,305,333,359,366],partial_names_demo:43,particip:[57,64,71,103,353,367,385],participant_property_kei:[57,64],participl:87,particl:216,particular:[14,25,28,33,43,52,57,71,78,90,102,117,118,119,143,147,159,164,167,168,169,170,175,176,179,189,219,228,294,295,305,306,323,324,325,326,334,338,352,359,363,366,367,371,374,385,399,408],particularli:[14,52,57,71,195],partli:195,partner:388,pascal:365,pasim:397,pass:[14,34,43,57,61,62,64,71,85,88,97,105,110,116,122,129,130,132,139,154,159,162,164,167,168,172,176,181,183,188,193,203,214,215,293,294,302,310,314,322,323,324,325,326,334,335,336,338,344,358,359,364,366,367,373,374,375,390,399,403,404,408],passar:385,passeio:385,passiv:[87,159],passonneau:145,passthroughboxerdrsinterpret:178,password:[343,346],past:[87,115,212,219,340,341,353,365,366],pastorali:107,pat:[57,71,166],patch:[301,403],path:[34,39,54,57,58,59,60,63,64,65,68,69,70,71,73,74,77,80,81,82,83,84,91,92,96,97,100,102,103,105,106,107,108,109,111,116,118,127,129,141,158,166,167,170,178,214,215,217,218,220,294,304,334,339,342,343,359,361,365,374,385,387,403,408],path_point:[57,59],path_similar:[57,105,403],path_to_bin:[129,215],path_to_dict:309,path_to_jar:[129,160,172,220,308,309],path_to_model:[215,309],path_to_models_jar:[160,172],path_to_sihan_corpora_dict:309,path_to_slf4j:309,path_to_tre:116,pathnam:342,pathpoint:[57,58,59,60,63,64,65,68,69,70,71,73,74,80,81,82,83,84,91,92,96,97,102,103,105,106,107,109],patient:408,patil:321,pattern:[18,23,28,57,58,59,62,65,67,70,71,84,85,88,89,97,102,152,188,189,201,276,293,294,297,303,310,334,354,359,367,388,395,396],paul:408,paulo:[359,385,408],paus:60,pawl:403,payload:359,payment:396,pb:60,pb_instanc:387,pbb737gold:396,pbbabscbn:396,pbc:396,pc:173,pca:369,pcfg:[109,119,168,174,175,361,372,384,408],pchart:[156,384],pcheck:399,pcl:359,pcm:98,pcp:385,pcv:102,pd:386,pdf:[12,42,53,57,83,104,137,145,179,221,259,282,304,317,318,319,321,328,332,335,343,348,351,352,408],pdist1:380,pdist2:380,pdist:[176,355],pdt:209,pe08:408,pea:375,peacekeep:388,peak:310,pearson:143,peaseblossom:359,peculiar:89,pedersen:[57,92,105,143,398,403],pedi:385,pee:66,peer:359,peg:[326,327],pek:388,peke:352,pela:385,pelo:385,pelt:359,pen:396,penalti:[145,302,317,328,330,332,333,398],pencil:396,penetr:[358,385],penn:[57,61,79,87,161,203,209,312,334,336,351,359,362,367,387,408],pensamento:385,peopl:[62,105,141,151,336,405,410],pep8:408,per:[28,38,57,65,71,73,79,87,98,102,104,105,174,218,224,311,321,328,339,359,365,367,385,387,388,393],perak:388,percent:[29,160,176,189,396],perceptron:209,perceptrontagg:[217,395],perder:385,pereira:[179,352],perf:353,perf_count:359,perfect:[87,329,398],perfect_and_progress:87,perfectli:219,perform:[7,8,14,15,18,26,28,30,33,36,39,51,52,57,65,102,117,118,122,125,127,144,146,169,170,171,178,181,187,188,193,194,198,200,211,214,219,221,224,227,293,294,299,301,302,307,312,359,361,366,369,375,380,399,401,405],perhap:365,perif:385,period:[213,302,311,312,358,359],period_comma_follow:301,period_comma_prece:301,period_context_r:302,period_fin:302,periodcategori:334,peripher:[57,71],perkin:408,perl:[53,57,104,105,313,403],perldoc:[57,104],perluniprop:[57,104],permil:189,permiss:[62,67,73,81,86,88,89,92,111],permit:[228,357,359,385,399],permut:[13,15,147,180,390],pero:350,perp:189,perplex:[132,133],persian:311,persist:52,person01:365,person:[23,29,87,111,160,189,220,357,361,363,365,366,367,373,374,387,388,403,404,405,406],persona:359,perspect:[57,71,367,398],perspectiv:[57,71,367],perspective_on:[57,71,367],persuad:350,pertain:359,pertainym:[105,403],pessoa:385,pessoal:385,peter:[322,323,324,325,326,327,408,410],petersburg:[313,396],petro:408,petrograph:381,petrov:216,pevzner:148,pfeat:228,pformat:[176,334,387],pformat_latex_qtre:[334,399],pg:[299,307],phags_pa:313,pharaoh:320,phd:[98,142,184,187,299],phenomena:167,phenomenon:324,phi:[143,189,324,325,326],phi_sq:[143,380],philadelphia:388,philip:89,philipp:[322,323,324,325,326,327,333],philosophi:107,phn:[57,98,359],phone:[57,98,160,297,359,408],phone_tim:[57,98,359],phone_tre:[57,98,359],phone_word_r:297,phonem:[30,66,98,299,307,359],phonet:[57,98,142,247,359],phonetik:[299,307],phonolog:[142,299,307],photo:396,php:[57,64],phr:400,phrasal:[119,335,359],phrase:[25,26,28,57,67,71,86,152,293,316,317,331,333,334,358,359,367,399,400],phrase_bas:315,phrase_extract:331,phrase_t:333,phraset:[316,333],phrasetableentri:[316,333],phyast:110,physic:399,physical_ent:[57,105,343,403],pi0:317,pi:[94,129,141,189],picasso:403,pick:[184,328,359],pickl:[102,109,194,217,224,302,356,361,384,385,386,396],picklecorpusview:102,pictur:[57,89,358,359],piec:[52,57,68,71,79,87,119,156,160,359,387],pierr:[163,359,362],pietra:[322,323,324,325,326,327],pihs4:202,pile:405,pili:381,pine:344,ping:66,pinhal:385,piou:293,pip:407,pipe:[129,160,161,215,311,384],pipelin:[38,39,172,273],pit:359,pitt:110,piv:189,pivot:169,pixel:117,pk:148,pl196x:57,pl196xcorpusread:[57,84],pl:[16,57,74,78,104,109,179,301,311,317,330,352,353,365,366,384,393],place:[25,42,57,74,116,117,118,131,155,161,164,168,194,213,219,326,333,334,335,339,348,358,359,365,367,378,381,384,396,408],placehold:[57,71,370],placement:[326,335],placent:[57,105,343,403],placenta:399,plai:[57,98,167,359,362,363,366,406],plain:[57,67,335],plaintext:[57,101,161,302],plaintextcorpusread:[57,85,101,108,359],plan:[118,366,388],plane:[313,396],planeamento:385,plastika:388,platform:[109,111,359,406,408],plausibl:[184,341],player:353,pleas:[0,55,160,173,195,209,214,221,229,295,303,306,308,312,313,332,359,360,385,393,396,398,406,407,408],plot:[114,176,194,224,293,310,369,391,394,408],plt:369,plu:[57,91,118,122,188,189,294,334,343,359,378,385,406,408],plug:185,plur2s:[198,199],plural:[198,199,212,219,394,400],pluralis:394,plusmn:189,ply:202,pm:[57,104,338,341,359,361,396],pmi:[143,193,194,357,380],pmod:[163,362],pn:317,png:161,pnp:349,po:[14,27,33,39,57,59,60,61,62,63,64,68,69,70,71,73,74,80,82,83,91,93,96,103,105,106,107,118,160,166,172,174,178,208,209,211,212,213,214,215,216,217,218,220,221,224,226,227,228,334,344,353,359,365,366,367,384,385,388,391,393,401,403,405,408],pobj:353,pocket:359,pode:385,poder:385,poderemo:385,poesio:141,poi:385,point:[28,57,64,71,77,79,87,108,109,160,176,211,214,224,226,313,319,320,326,327,331,346,350,363,367,369,398],point_entropi:214,pointer:[57,59,60,63,64,68,69,70,71,74,79,80,82,83,87,91,96,103,105,106,107,109,159,334,361,366,387,399],pointwis:[143,214,357],poison:359,poisson:143,poisson_stirl:[143,380],poke:359,pol:[385,403],polar:[57,62,194,359],polarity_scor:[195,391],polic:194,policeman:[403,404],polici:310,poliqarp:[57,78],polish:[57,78,104,359],polit:388,politi:107,polonian:381,polysem:[57,71,367],pooch:403,poodl:403,pool:[57,83],poor:[160,173,361,391],pop:[118,128,334,366,386,399],pop_first:128,popen:129,popitem:[52,118,366,386],popov:318,popul:[132,139,179,193,194,352],popular:[336,407],population_of:[179,352],popup:8,por:[385,396,403],porpois:386,porqu:385,port:[10,57,104,142,160,205,206,301,311,312,313,319,408],porta:205,porter:[196,206,274,408],porterstemm:[203,206,274,328,394,408],portertest:274,portim:385,portion:[7,8,14,16,28,57,59,98,159,168,170,171,175,221,302,359],portugues:[57,104,205,206,359,394,408],portuguesa:385,portuguese_en:347,portuguese_en_fixt:[229,385],portuguesecategorizedplaintextcorpusread:[57,85],portuguesestemm:206,pos1:[57,75],pos2:[57,75],pos3:[57,75],pos_concord:359,pos_in_tre:[57,68],pos_m2:228,pos_m33:228,pos_scor:[57,93,392],pos_tag:[91,166,174,209,269,359,395,406],pos_tag_s:209,pos_tagset:[57,71,367],pose:[57,71],posid1:[57,75],posid2:[57,75],posid3:[57,75],posit:[14,23,28,29,33,36,57,58,63,65,70,71,73,74,79,81,84,87,89,92,93,97,102,105,109,115,117,118,119,129,148,154,155,159,164,167,168,179,194,195,213,214,224,226,227,228,276,293,294,316,318,322,323,324,325,326,327,332,333,334,343,359,369,385,391,397,399],position_of_ngram:332,positive_featureset:36,positive_prob_prior:36,positive_tweet:359,positivenaivebay:30,positivenaivebayesclassifi:36,posix:109,poss:[160,353,367],posscor:[93,392],possibl:[13,14,16,23,28,33,34,36,46,53,57,62,64,68,71,74,105,109,111,116,117,118,119,132,139,145,147,153,157,159,162,164,167,168,169,170,171,174,175,176,180,183,189,211,213,214,221,224,226,228,294,298,302,312,316,322,323,324,325,326,329,331,334,341,343,344,358,359,361,363,366,367,374,384,388,391,396,398,403,404],possible_st:213,possible_transit:213,possibleanteced:181,post:[57,80,312,341,346,353,359,388,396,408],postag:[211,224],postag_tre:27,posthum:385,postord:334,postpl:228,postposit:216,postscript:[117,182],postura:385,potato:[57,71,367],potenti:[69,118,119,141,302,359,363,366],pouco:385,pound:[189,359],pour:[148,359],power:[143,219,359],powershot:[57,89,359],pp1:202,pp3n:359,pp560:94,pp:[29,86,94,148,160,172,176,198,218,299,307,350,352,354,359,361,368,372,384,385,387,393,400,401],ppattach:57,ppattachmentcorpusread:[57,86],ppdb:[57,104,408],ppdp:169,pprint:[57,68,71,103,105,176,334,343,359,362,366,367,370,403],pprint_fram:[57,103],pprint_memb:[57,103],pprint_subclass:[57,103],pprint_themrol:[57,103],pr:[214,343],practic:[57,71,317,321,328,330,332,336,359,403,406],pragmat:145,praguej:385,prais:396,prasa:[57,74],prd:400,pre1:201,pre32:201,pre:[25,57,85,111,132,139,166,172,173,213,217,302,303,328,334,365,390,404,408],prec_count:317,preced:[28,33,57,71,98,102,132,133,145,195,201,219,276,293,302,312,363,367],precipit:359,precis:[28,29,57,77,147,193,221,226,317,318,321,328,332,354,380,391,393],precomput:176,precondit:[33,173],pred:[185,188,294,353,370,378],predhold:125,predic:[15,57,79,87,125,178,179,185,188,294,352,359,366,378,382,387,390],predicate_valu:359,predict:[34,38,52,57,71,164,176,217,323,324,325,365,367,396],predictor:[159,365],predictorrul:162,predid:[79,87],predominatement:385,pref:[198,199],prefac:[57,58,63,65,70,73,74,81,84,85,92,97,102,107],prefer:[176,179,299,301,333,350],prefix:[32,57,104,111,116,118,130,145,159,168,173,198,199,201,202,207,274,341,365,366,399],prefix_replac:207,prefix_step2a_success:206,prefix_step3a_noun_success:206,prefix_step3b_noun_success:206,preflabel:[179,352],prehistor:403,prejudic:[57,71],preliminari:25,premis:[125,370,382],preorder:334,preosta:107,prep:[57,71,86,353,359,367,384,385,388,399,400],prepar:[92,310,359,408],prepend:109,prepnadjn:350,prepnadjnp:350,prepnpcompl:350,preposit:[57,71,86,189,216,325,354,359,367],preprocess:[38,119,132,194,328,377],presenc:[145,194],present:[33,42,52,57,87,105,118,122,129,149,167,168,169,193,198,199,203,317,322,323,324,325,326,334,348,357,366,369,388,396,408],preserv:[46,105,115,118,132,139,178,297,314,366,397],preserve_cas:[297,396],preserve_lin:295,presid:[36,359,388],president:350,presidi:381,press:[72,299,307,322,323,324,325,326,327,333,398],presum:202,presuppos:[57,71,312,367],preta:385,pretermin:119,pretrain:217,pretti:[32,57,103,159,181,189,334,335,343,359,385,396,400],pretty_format:[32,144,159,181,186,364,380],pretty_format_edg:159,pretty_format_leav:159,pretty_print:[160,181,334,364,399],prettydict:[57,71],prettylazyconcaten:71,prettylazyiteratorlist:71,prettylazymap:71,prettylist:[57,71,367],prev:182,prevent:[28,33,115,119,122,127,194,359],previou:[30,57,71,111,149,159,170,171,199,219,302,325,326,327,335,341,346,395],previous:[57,105,151,193,314,326,334,403],previous_cept:327,previous_edg:159,previous_in_tablet:327,prevword:[30,33],price:[359,362,408],pricktimb:381,prim_onli:13,primari:[15,43,66,179,219,352,359,396],primarili:[117,359],prime:[189,361],primeiro:385,primit:[13,16,52],primitivecategori:13,primou:385,princeton:[57,105,403],princip:359,principl:[57,71,299,307,317,318,321,328,330,332,363],print:[7,8,14,28,29,30,32,33,46,51,55,57,71,89,93,100,103,105,109,110,115,117,122,123,124,126,127,129,132,144,159,160,161,173,174,176,179,181,183,188,189,193,199,205,206,208,209,211,214,219,224,226,276,293,302,304,309,311,316,317,322,323,324,325,326,331,334,335,340,341,343,351,352,353,354,355,357,359,360,361,362,363,365,366,368,369,370,371,374,378,380,382,384,385,386,387,388,389,390,391,392,393,394,398,399,400,401,402,403,405,408],print_assumpt:[122,124,126,374],print_concord:293,print_error_to:111,print_grammar:[159,162,164,384],print_maxent_test_head:355,print_mptre:399,print_pars:168,print_proof:125,print_ptre:399,print_sent:164,print_str:[343,402],print_template_statist:[211,212],print_tim:[159,162,164,384],print_to_fil:117,print_tre:[159,162,164],printccgderiv:[14,350,351],printccgtre:14,printtyp:188,printunus:[211,212],prior:[33,36,46,48,117,214,302,317,319,322,323,324,325,326,332],prison:[160,357],privileg:346,prn:[160,172],pro:[57,88,181,350,353,359,363,364,385],pro_w4:201,pro_w53:201,pro_w54:201,pro_w64:201,pro_w6:201,proadj:385,prob:[33,35,119,168,176,293,334,355,384,386,399],prob_alignment_point:[322,323],prob_all_align:[322,323],prob_classifi:[31,33,35,44,355],prob_classify_mani:[31,38,44,355],prob_dict:176,prob_dist:176,prob_kwarg:[176,334],prob_of_align:327,prob_single_quot:311,prob_t_a_given_:[322,323,324,325,326,327],probabilist:[109,119,156,167,168,169,174,175,176,214,361,372,408],probabilistica:[176,334],probabilisticbottomupinitrul:168,probabilisticbottomuppredictrul:168,probabilisticdependencygrammar:[119,169],probabilisticfundamentalrul:168,probabilisticleafedg:168,probabilisticmixin:[176,334],probabilisticnonprojectivepars:167,probabilisticproduct:119,probabilisticprojectivedependencypars:169,probabilistictre:334,probabilistictreeedg:168,probability_chang:333,probability_fixt:[229,386],probability_t:[322,323,324,325,326],probabl:[0,14,28,31,33,35,36,38,46,47,48,119,132,135,147,156,168,169,175,214,219,221,316,319,322,323,324,325,326,327,333,334,336,341,347,378,380,398,403,408],probdist:176,probdist_dict:176,probdist_factori:176,probdisti:[31,33,35,38,44,147,176,214],problem:[25,38,46,57,71,109,154,196,304,317,350,359,366,399,408],problemat:[354,384],proc:384,proce:310,procedur:[119,167,170],proceed:[57,62,67,81,83,86,88,89,94,145,146,168,173,200,317,318,328,330,332,344],process:[0,2,14,25,26,31,46,48,49,50,57,59,60,63,64,68,69,70,71,74,80,82,83,85,91,94,96,102,103,105,106,107,109,117,118,123,140,157,159,167,168,170,171,176,179,188,191,197,198,201,210,214,217,221,224,230,296,300,305,310,314,326,327,333,334,338,339,341,343,352,359,361,374,378,384,388,406,408],process_bundl:179,process_next_express:188,process_quoted_token:188,process_thread:123,procura:385,prod1:384,prod2:384,prod:[119,175,189,365,370,384],produc:[14,29,33,46,48,52,57,105,118,119,136,141,156,159,161,162,164,167,168,170,171,174,175,176,178,185,198,212,219,221,224,293,295,302,310,319,320,324,325,326,332,334,335,340,359,366,376,388,396,398,399,403],product:[7,8,33,57,89,113,119,123,145,159,164,168,169,170,171,175,214,217,228,334,336,359,361,362,365,368,372,384,391],product_reviews_1:[57,89],product_reviews_2:89,productionlist:113,professor:388,profil:42,program:[57,105,116,117,123,129,156,159,161,162,175,178,182,203,206,214,293,319,336,343,353,406,408],programm:408,progress:[7,87,111],progress_bar:343,progressmessag:111,proj:221,project:[22,42,57,66,103,167,169,173,221,230,335,345,359,360,367,406,408,410],projectista:385,projective_prob_parse_demo:169,projective_rule_parse_demo:169,projectivedependencypars:[156,362],projecto:385,projet:396,prolog:[178,179,352],prologu:359,promin:131,promis:198,promo:53,prompt:[117,168,175,359,385],pron:[77,167,216,349,357,359,384,385,388],prone:315,pronoun:[89,181,216,357],pronounc:66,pronunci:[57,66,359,408],proof:[122,124,126,127,128,374],proof_str:[122,126],prooftran:374,prop:[57,71,128,183,189,353,367,385],propag:[57,71,117,183],propagate_semtyp:[57,71],propbank:[57,71,347,367,408],propbank_ptb:[359,387],propbankchaintreepoint:[87,387],propbankcorpusread:[57,87],propbankinflect:[87,387],propbankinst:[57,87,359,387],propbankpoint:87,propbanksplittreepoint:[87,387],propbanktreepoint:[87,387],propdemo:183,proper:[179,216,359],properli:[108,276,384],properti:[33,57,59,71,79,87,102,104,109,111,115,116,118,122,123,124,125,132,139,143,160,174,176,179,181,183,188,195,209,210,214,219,220,226,227,228,294,297,302,316,321,333,334,346,352,353,359,363,374,382],property_nam:226,propfil:[57,87],propn:[179,352,363,365,390],proport:398,proportion:317,propos:[200,203,299,307,333],proposit:[178,183,374],propriament:385,pros_con:57,prosconscorpusread:[57,88],prose:[57,107],prosodi:299,protest:359,protocol:[102,109,174,361],prototyp:408,provabl:374,prove:[121,122,123,124,125,126,127,128,145,363,374,382],proven:[125,382],prover9:[121,124,408],prover9_input:126,prover9_search:374,prover9command:[126,374,382],prover9commandpar:[124,126],prover9except:126,prover9fatalexcept:126,prover9hom:374,prover9limitexceededexcept:126,prover9par:[124,126],prover:[122,123,125,126,127,128,181,188,371,382,408],provercommand:[122,126],provercommanddecor:[122,125],proverparseerror:[125,127,128],provid:[14,42,52,53,55,57,59,68,71,79,81,84,87,89,90,96,98,106,109,111,117,118,120,126,129,132,133,135,142,143,145,151,154,158,159,161,166,167,168,173,176,177,179,183,185,202,206,214,221,274,293,295,338,341,343,352,357,359,360,361,366,367,374,380,387,388,390,398,399,406,408],provied:309,provis:202,proxi:[71,108,341,343],prp:[160,172,217,359,363,385,400],prt:[77,209,216,357],prtree:399,prune:[326,333,403],ps:[86,128,397],psalm:358,psent1:385,psent2:385,psent:385,pseudo:[107,147,175],pseudocod:32,pseudosent:310,psi:189,psoe:388,pst2:396,pst:[57,71,396],psy:[57,64],pt:[57,71,104,367,385,396],ptb:[216,359,387],ptbtoken:308,ptcl:216,ptext1:385,ptext2:385,ptext3:385,ptext4:385,pth:342,ptree:[334,399],pu:173,publicdomain:[57,83],publicli:200,publicystyczni:[57,74],publish:[142,203,330,357,406,408],pukka:403,pull:[160,189,350,396,403],pumpkin:390,pun:349,punc:[167,359,388],punc_list:195,punct:[160,172,181,187,188,301,400],punct_1:301,punct_2:301,punct_regex:301,punctuat:[42,57,58,60,61,62,64,65,67,77,80,81,84,85,88,89,91,97,100,104,106,163,167,194,216,221,295,298,302,312,357,385,391,396],punkt:[57,85,295,361,385,396,408,410],punktbaseclass:[302,396],punktlanguagevar:[302,396],punktparamet:[302,396],punktsentencetoken:[57,85,295,302,361,385],punkttoken:302,punkttrain:[302,396],pup:[57,104],pup_numb:301,pup_punct:301,pup_symbol:301,purchas:[57,89,359,408],pure:195,purg:[183,390],purpos:[28,35,43,57,105,110,115,117,129,132,169,193,214,223,322,323,324,325,326,336,338,359,365,367,403],push:359,put:[57,61,103,118,128,132,155,297,359,365,366,387,405],put_al:128,put_atom:128,put_direct:359,put_spati:359,py3_data:54,py:[10,14,39,111,218,226,227,228,230,301,359,361,366,384,401,408],pye:396,pyfound:408,pylab:293,pypi:[213,407],pyplot:369,pyramu:359,pytest:380,python25:111,python2:350,python38:407,python:[0,10,14,25,38,52,57,98,102,104,108,109,110,111,115,118,129,130,132,134,141,178,179,200,201,203,212,213,217,224,294,301,303,311,313,338,339,341,343,346,352,359,361,365,366,370,374,378,381,403,406,408,410],pythonhom:111,pyupgrad:408,q14:[57,104],q:[7,8,77,111,142,188,351,352,364,365,366,370,374,378,381,386,389,390,399],qam:380,qc:359,qcn:403,qin:321,qtree:334,quadgram:[53,143],quadgram_fd:53,quadgramassocmeasur:[143,357,380,408],quadgramcollocationfind:[53,357],quadro:[205,385],quadrupedibu:107,qualifi:[130,391],qualiti:[46,57,82,89,307,330,332,359],quando:385,quant:[188,353],quanti:[57,71,367],quantif:370,quantifi:[122,180,188,251,363,364,378],quantifiedexpress:188,quantit:176,quantiti:[46,334],quarter:160,que:[205,350,359,385],queen:369,queer:293,quel:148,quer:385,querem:385,queri:[22,117,179,189,293,334,340,341,359,388],question:[23,24,95,111,143,178,340,358,388,408],queu:384,queue:[168,174],quick:[154,160,172,217,381],quicker:374,quiet:[44,111,183,348],quieta:385,quinca:385,quit:[7,8,23,25,57,77,89,173,195,321,366],quoc:321,quot:[22,57,74,118,129,188,189,276,312,313,334,365,366,375,378,385,396],quotat:396,quote_char:378,r0:[123,332,363],r1:[123,332,354,363,370],r2:[332,354,370],r3:354,r4:354,r5:[332,354],r6:354,r7209:386,r7:354,r8:354,r9:354,r:[8,23,25,57,60,66,71,75,77,93,105,109,110,123,125,132,139,142,144,147,176,188,201,208,212,219,227,312,313,317,318,321,330,332,343,349,351,352,354,355,358,359,362,363,364,365,366,367,370,371,374,378,380,381,382,385,386,388,389,390,392,396,397,398,399],r_nr:176,ra2:202,rabin:214,race:[98,359],radic:189,radiu:117,radnei:358,rae0:202,rag:359,rai:396,rain:[145,358,359,380,390,396],rais:[13,14,15,28,34,52,57,59,84,108,109,115,117,118,119,127,129,130,132,134,139,144,147,176,183,187,188,206,226,227,228,293,305,328,334,341,343,351,360,361,366,374,375,377,390,396,399],raise_on_error:111,raise_unorderable_typ:129,raita:148,rakuten:301,ramakrishnan:343,rami:[160,220],ran:[384,399],randn:310,random:[46,50,102,133,147,168,214,224,293,359,380,381,385],random_sampl:214,random_se:[132,133,293],random_st:369,randomchartpars:[168,384],randomis:50,randomli:[147,176,194,214],rang:[29,30,33,57,105,118,149,159,176,189,214,224,226,313,316,321,334,354,355,356,359,361,366,383,386,395,398,399,403],range_r:118,rangefeatur:[118,366],rank:[33,53,57,71,149,211,332,380],rank_gap:[149,357],ranks1:149,ranks2:149,ranks_from_scor:[149,357],ranks_from_sequ:[149,357],rape:160,rapid:153,raquo:189,rare:[33,195,302],rarr:189,rate:[25,62,146,209,259,304,329,340,341,359,383,405],rater:408,rather:[32,35,53,57,71,105,109,118,143,161,164,176,179,201,211,224,302,314,352,357,359,360,365,366,367],ratio:[53,117,143,176,317,321,357,398],ratnaparkhi:86,raw:[55,57,59,78,100,109,111,166,221,259,310,340,346,359,361,377,385,396,398],raw_field:314,raw_freq:[143,357,380],raw_mod:[57,78],raw_pars:[160,172],raw_parse_s:[160,172],raw_tag_s:160,raw_text:385,rb:[160,209,212,219,359,361,385,400,401,406],rbr:[359,396],rc:363,rcb:[298,312],rceil:189,rcon:[189,388],rd:393,rdb:352,rdparser:1,rdparser_app:1,rdquo:189,re2:202,re:[13,25,28,33,43,57,58,62,65,67,70,85,88,89,97,102,108,109,111,117,118,122,127,132,179,195,200,214,298,300,301,302,303,311,312,317,325,332,352,354,359,374,388,407,408],re_boundary_realign:302,re_show:[343,402],reach:[7,32,57,89,102,105,159,194,334,338,341,359,374,403],reachabl:[51,403],read:[39,42,52,55,57,58,59,61,62,63,65,66,67,68,70,73,74,75,77,81,83,84,86,88,89,92,95,97,98,99,102,104,105,106,109,115,118,123,129,161,183,185,200,224,314,316,317,321,327,332,334,340,342,350,351,353,365,370,371,374,378,384,390],read_:129,read_alignedsent_block:[57,58,102],read_app_valu:118,read_blankline_block:[57,65,70,85,97,102],read_block:[57,58,63,65,70,73,74,77,78,84,92,97,102,106,359],read_cmudict_block:66,read_depgraph:[186,370],read_expr:[351,370,374,378,382,389],read_fil:184,read_fstruct_valu:118,read_grammar:119,read_int:129,read_int_valu:118,read_leaf:[334,399,400],read_line_block:102,read_log:188,read_logic_valu:118,read_nod:[334,399],read_numb:129,read_parti:118,read_regexp_block:102,read_rul:205,read_sent:191,read_set_valu:118,read_sexpr_block:[102,359],read_str:129,read_str_valu:118,read_sym_valu:118,read_timit_block:98,read_tuple_valu:118,read_typ:188,read_valu:[118,183],read_var_valu:118,read_whitespace_block:[102,359],read_wordpunct_block:102,readabl:[109,141,225,293,302,313,344,363,367,385,401,408],reader:[42,43,52,109,188,328,361,366,367,370,403,406,408],reader_cl:108,readerror:[129,375],readi:[132,365],readili:357,reading_command:123,readingcommand:123,readlin:[102,109,359],readm:[57,59,81,82,105,257,359,360,374,385],real:[132,146,176,189,228,324,383,403],realign:302,realign_boundari:[302,396],realist:403,realiti:[57,71,367],realiz:[57,71],realli:[24,188,195,312,365,366,391,396],reason:[57,85,123,125,132,135,188,214,311,340,343,366,374,398,406],rebbani:199,rebecca:304,rebuilt:39,recal:[28,29,147,193,318,321,328,354,365,380,391,393],recalcul:[46,50,132,139,302],recdat:[98,359],recebido:385,receiv:[35,123,209,340,363,374],recent:[7,8,57,89,102,115,159,170,171,219,226,227,228,305,341,351,354,355,359,360,361,366,374,375,386,387,390,396,397,398,399,403],rechunk:[26,29],recip:[115,154,408],recogn:[33,90,162,183,359,366,388,396],recognis:408,recognit:[72,214,388,408],recommend:[25,28,57,89,100,118,148,203,209,219,295,304,346,366,369,403,407],reconstruct:[52,142,159,164,359],record:[2,7,8,29,30,60,91,98,105,111,119,145,159,168,170,171,175,176,179,312,314,321,352,359,397,403],recov:[294,341],rectangl:117,recurs:[1,7,25,57,105,111,170,177,181,183,188,221,343,375,390],recursivedesc:[156,393],recursivedescentpars:[156,170,384,393],red:[115,117,217,335],redefin:[176,334,386],redesign:408,redirect:[129,160,161],redirect_arc:161,redo:[359,408],reduc:[1,8,46,48,49,50,51,52,109,171,173,199,212,302,359,369,405],reduce_len:[297,396],reduce_lengthen:297,reducible_product:171,reduct:[8,43,51,57,71,171,178,367],redund:[13,105,363,403],redupl:359,reed:358,reentran:366,reentranc:[118,365],reentrant:[118,366],ref1:[318,321],ref1a:[317,321,332],ref1b:[317,321,332],ref1c:[317,321,332],ref2:318,ref2a:[317,321,332],ref:[60,72,144,148,178,181,318,321,329,330,332],ref_len:330,refactor:408,refer:[25,28,33,52,60,78,94,115,118,143,144,146,147,148,161,164,167,170,178,179,181,226,290,299,307,311,317,318,320,321,322,323,324,325,326,327,328,329,330,332,333,334,335,336,338,341,343,348,352,359,360,361,364,365,379,380,385,393,394,396,403,408],referenc:[319,333],reference1:[317,328,330],reference2:[317,328,330],reference3:[317,328,330],reference_set:[380,393],referenced_label:178,referringentri:[57,71],refin:[32,224,336],reflect:[23,26,167,194,310,334,343,359],reflex:[119,179,343],reflexive_appear:359,refrain:359,refrescant:385,refus:366,reg:189,regard:[42,214,304],regardless:[109,159,359,398],regex:[57,71,129,276,297,298,312],regex_remove_punctu:195,regexflag:303,regexp:[25,57,58,59,60,61,62,63,64,65,67,68,69,70,71,74,77,79,80,81,82,83,85,87,88,89,91,95,96,97,100,101,102,103,105,106,107,118,196,219,293,295,297,313,343,354,359,395],regexp_pattern:354,regexp_span_token:313,regexp_tagg:219,regexp_token:[303,396],regexpchunkpars:[3,28],regexpchunkrul:[28,354],regexppars:28,regexpstemm:204,regexptagg:[212,219,363,371,395],regexptoken:[57,58,65,70,97,303,359],regim:214,region:[57,98,117,179],region_domain:[105,403],regist:[116,117,130,131,341,359],register_tag:[130,361],regress:[38,176,257],regular:[1,3,6,23,25,28,29,33,57,59,60,63,64,68,69,70,71,74,80,82,83,84,85,90,91,96,103,105,106,107,129,189,204,213,219,293,294,295,297,302,303,312,313,334,343,354,359,367,369,385,388],rei3i:202,rei:385,reilli:[0,406,408],reivindicar:385,reject:[174,359,363,373],rel:[29,53,57,59,79,87,102,105,109,115,132,146,161,173,178,179,183,184,186,211,224,226,228,313,325,328,334,359,362,363,383,385,387,388,399,404],rel_nam:179,relat:[57,62,64,67,71,81,88,89,90,105,111,118,119,129,148,161,167,169,173,178,179,183,184,188,189,276,294,325,352,353,359,362,363,366,367,385],relation_list:[57,64],relationship:[52,57,62,105,119,169,353,403],relax:396,reldict:[189,388],releas:[109,359,406,410],relev:[30,57,100,142,179,183,195,338,352,390],relextract:[177,347],reli:[132,178,359],reliabl:408,relief:115,religion:[359,386],reload:224,relsym:[189,388],remain:[8,33,57,61,71,85,97,109,117,129,132,170,171,173,176,214,326,327,333,359,361,363,386,403],remaind:[153,154,159,162,164],remaining_text:[170,171],remark:195,rememb:[7,170],remi:[297,396],remind:363,remov:[26,28,51,52,57,85,109,116,117,118,119,123,149,161,167,170,173,178,183,188,194,196,197,198,199,200,201,202,204,228,259,297,302,312,314,326,333,334,336,357,359,361,374,377,394,396,399,408],remove_blank:314,remove_by_address:161,remove_callback:117,remove_child:[116,117],remove_choic:78,remove_dupl:[123,184,194],remove_empty_top_bracket:[334,399],remove_handl:297,remove_preprocessed_fil:78,remove_punctu:42,remove_unitary_rul:119,remove_vari:[118,366],remove_widget:117,ren:94,renam:[118,181,188,366,384],rename_var:[118,366],rename_vari:[118,366],render:[117,189,363],renumber_sent:178,reorder:[325,333,408],reparameteris:46,repars:[178,180],repeat:[46,50,102,118,119,297,302,317,326,327,341,396],repeatedli:[170,171,176],repetit:[25,28,185,391],repl:28,repl_xx:200,repl_xx_back:200,replac:[1,6,13,16,28,51,57,64,71,74,109,115,116,117,118,119,125,127,131,159,164,167,181,184,188,194,198,199,202,207,227,297,324,333,339,340,353,359,361,382,396,399],replace_al:128,replace_back:200,replace_bound:[181,188],replace_child:[116,117],replace_qu:125,replace_to:200,replace_xmlent:[57,74],replacement_tag:227,replic:142,report:[28,86,123,153,194,212,318,359,396,408],repp:295,repp_dir:304,repp_dirnam:304,repp_output:304,repptoken:304,repr:[71,118,189,224,227,354,359,399],repres:[2,13,14,15,16,25,30,33,34,41,49,51,57,62,67,71,82,89,91,98,100,105,107,118,119,122,123,141,156,157,159,162,167,168,169,170,173,176,177,183,185,188,193,194,214,217,220,221,294,295,300,302,310,316,322,323,325,326,327,331,333,334,343,355,359,365,366,367,369,380,382,385,388,391,397,403],represent:[13,14,16,28,32,44,57,75,103,105,117,118,122,130,144,159,161,166,167,169,173,175,176,178,179,180,181,185,188,191,194,222,226,227,299,314,316,334,335,343,359,365,366,369,374,378,385,388,390,391,399],reprfunc:115,repris:398,reproduc:[132,133,293,377,381],request:[10,102,117,131,152,310,341,359,408],requir:[10,25,33,46,52,53,57,64,71,84,102,109,117,126,132,135,139,141,143,145,146,166,168,169,176,179,183,193,196,198,201,210,214,224,293,294,295,302,306,317,321,324,325,327,334,336,339,341,344,346,352,355,357,359,360,361,366,370,374,381,396,407],requiresf:[57,71],rerank:158,reranker_featur:158,reranker_opt:158,reranker_weight:158,rerankingpars:158,res_similar:[57,105,403],rescal:145,research:[0,57,64,73,103,145,161,201,388,406],research_pap:259,reserv:[35,122,131,159,162,164,176,327,405],reset:[7,8,386],reset_cach:214,reset_id:13,reset_prob:[324,325,326,327],reset_workspac:113,resid:109,resin:357,resiz:[111,116,182],resnik:[57,105,403],resolut:[89,121,178,347,388,408],resolution_test:127,resolutionprov:[127,374],resolutionprovercommand:[127,374,389],resolv:[57,79,87,123,178,188,364,374,403,408],resolve_anaphora:181,reson:299,resourc:[57,83,93,103,109,137,215,359,406],resource_nam:109,resource_url:109,resp:[57,60,71],respect:[38,57,62,71,89,117,122,129,132,143,167,179,214,317,321,325,329,330,334,352,359,363,367,374,388,390],respond:[18,22,23,24],respons:[23,24,60,115,117,126,340,341,359,374,408],responsabilidad:385,rest:[16,33,57,61,119,200,340,341,386],restart:[108,159],restr:13,restrict:[13,15,57,59,71,105,113,118,132,159,162,164,189,334,341,343,366,403],result:[13,15,28,29,52,57,71,74,77,86,98,102,105,106,109,117,118,119,122,123,124,125,126,127,132,139,141,142,143,145,149,156,159,160,161,168,173,174,175,176,179,188,193,194,198,199,203,209,212,221,293,302,311,313,317,318,332,333,334,339,340,343,352,355,357,358,359,363,365,366,369,374,375,377,384,386,390,391,399,403,404],resultado:385,results_list2:357,results_list:357,results_scor:357,resum:[14,159,162,164,168],resumpt:398,retag:210,retain:[53,105,302],retard:403,retir:341,retoken:300,retract:[122,363,370,374],retract_assumpt:[122,374],retract_bind:[118,366],retract_sent:[123,363],retrain:385,retri:341,retries_after_twython_except:341,retriev:[57,62,67,68,71,81,96,105,109,111,133,148,176,180,189,199,216,319,321,337,341,342,359,363,380,408],retry_count:341,retry_in:341,return_conf:217,return_str:[298,301,311,312],returncod:126,retweet:[194,341],retweet_count:339,reus:[224,334,365],reuter:[359,377,408],rev:366,reveng:[57,71,367],revers:[115,118,145,312,316,317,334,357,360],revert:298,review:[57,62,81,194,359,386,391,408],review_lin:89,reviewlin:89,reviewscorpusread:[57,89],revis:[111,365,366,408],revivalismo:385,revword:155,reward:321,rewards_and_punish:[57,71,367],rewind:[57,67,359],rewrit:408,rewritten:[57,71],rfloor:189,rflorian:211,rgoodtur:176,rh:[14,119,159,162,164,168,175,185,384],rho:[189,332],ri:144,ria:385,ribe:[332,408],ribes_scor:315,rica:385,rico_amorim:385,rid:[42,123,363,370],ridden:396,ridg:405,ridzuan:388,riedel:408,riesa:321,rifir:380,right:[7,8,14,15,24,25,28,30,57,64,110,111,117,119,131,132,144,154,159,161,164,168,170,171,173,174,175,189,195,206,215,219,224,312,325,326,334,336,343,346,349,354,359,365,366,385,388,396,399,401,408],right_arc:173,right_children:161,right_context_tag_pattern:28,right_edg:[14,159,164,168],right_pad_symbol:[132,343],right_sibl:[334,399],right_tag_pattern:28,rightarc:173,rightarrow:117,rightmost:[8,154,169,171,222],rightward:[116,117,169],ring:383,ringer:367,riot:194,risc:205,risca:205,rise:[46,49,374,387],risk:359,ritheu:107,ritualiza:385,river:[179,352],rkt:301,rl:321,rlevi:137,rlm:189,rmdir:[359,361],rnc:216,rng:[50,214],ro2:202,ro:[57,104],road:405,roar:359,robber:390,robert:[298,312,322,323,324,325,326,327,408],robin:408,robinson:408,robust:[132,198],rocl:94,rogberi:388,roger:391,roi:357,role:[57,68,71,79,87,103,196,353,359,363,367,387,388,404],roles_demo:189,roleset:[57,68,79,87,359,387],roleset_id:[57,79,87],roll:359,romanc:[57,64,359,385,386],romanian:[57,104,206,359,394],romanianstemm:206,rood:107,roof:116,roof_color:116,roof_fil:116,room:[359,401],root:[27,28,29,57,58,59,60,61,62,63,64,65,67,68,69,70,71,74,75,76,77,78,79,80,82,83,85,87,88,89,90,91,93,95,96,97,98,100,101,102,103,105,106,107,111,116,117,119,159,160,161,163,164,169,170,172,173,178,191,201,334,335,336,343,349,353,359,362,370,399,403],root_hypernym:[105,403],root_label:[28,29,57,68],root_semrep:[191,390],roottyp:[57,71],rose:387,rosneft:90,rot:[57,71],roth:408,rotoka:[359,397],roughli:359,rouko:317,round:[145,317,323,324,325,326,328,332,338,341,377,379,398],rover:382,row:[57,62,115,155,179,194,335,352,380,405],row_index:115,rowconfig:115,rowconfigur:115,rowvalu:115,royalti:408,rr1:202,rr93:145,rr94:145,rr:118,rrb:[160,172,298,312],rrc:359,rsaquo:189,rsb:[298,312],rslp:[196,385],rslpstemmer:[205,385],rsquo:189,rst:229,rstrip:311,rt1:202,rt:[359,396],rte1:[90,359],rte1_dev:359,rte1_test:359,rte2:[90,359],rte2_dev:359,rte2_test:359,rte3:[90,359],rte3_dev:359,rte3_test:359,rte:[37,57,408],rte_classifi:30,rte_featur:37,rte_pair:37,rtecorpusread:[57,90],rtefeatureextractor:37,rtepair:[37,57,90,359],rtext:170,rthird:397,rtupl:[189,388],rtype:[47,57,71,100,122,132,133,134,135,145,146,148,153,157,158,167,183,189,213,317,333],ru2:202,ru:[57,104,209,216],rude:18,rude_chat:21,rudnick:321,rue:365,rug:[42,384,393],rule:[2,7,8,13,14,15,25,28,35,57,71,107,119,145,159,162,164,167,168,169,176,179,195,196,202,211,212,223,224,226,228,251,293,304,334,352,359,363,365,370,384,403],rule_based_demo:167,rule_dictionari:202,rule_index:205,rule_tupl:202,rulebas:304,ruleformat:[212,224],rulescor:212,ruleset:14,rummag:[358,359],run:[10,14,25,28,52,57,71,102,108,111,122,129,160,161,168,174,175,176,183,199,224,230,318,322,323,324,325,326,334,340,343,346,359,363,364,367,373,378,384,385,393,394,399,403,407,408],run_profil:164,run_var:378,rung:383,runtim:206,runtimeerror:360,ruppenhof:[57,71],rural:[109,359,361],rush:359,russia:410,russian:[57,90,104,206,209,311,394,408],russian_russki:[57,101],russianstemm:206,rutilant:385,rwth:137,ryan:173,s0:[119,123,202,214,297,343,363,396],s0_sigma:119,s10:396,s11:396,s12:396,s14:[57,104],s15:[57,104],s1:[57,105,119,123,145,148,200,297,316,343,363,374,380,390,396,403],s1t:202,s2:[57,105,145,148,200,316,343,363,374,380,390,396,403],s3:[148,200,343,363,380,390,396],s4:[380,390,396],s5:[380,396],s6:396,s7:396,s8:396,s9:396,s:[7,8,10,14,22,23,25,27,28,29,30,33,34,35,36,38,39,42,43,44,47,51,52,53,55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,73,74,76,77,78,79,80,81,82,83,84,85,87,88,89,90,91,92,93,96,97,98,100,102,103,104,105,106,107,109,111,115,116,117,118,119,122,123,125,129,130,131,132,133,139,141,142,143,145,146,149,156,158,159,160,162,164,166,167,168,169,170,171,172,174,175,176,178,179,180,181,183,184,187,188,191,198,199,201,202,203,204,205,206,208,209,211,212,213,214,219,222,224,227,228,230,274,293,294,295,296,297,298,299,301,302,303,305,306,308,309,310,311,312,313,314,316,317,318,321,322,323,324,325,326,327,330,331,332,334,335,340,341,342,343,346,349,350,351,352,353,354,355,356,357,359,361,362,363,364,365,366,367,368,370,372,374,375,377,378,380,381,384,385,387,388,390,391,392,393,395,396,397,398,399,400,401,403,404,405,406,408],s_0:214,s_1:145,s_2:145,s_:214,s_i:[145,214],s_j:214,s_retriev:[180,390],s_t:214,sa1:[98,359],sa2:[98,359],sa:94,sad:194,saem:385,safe:[115,132,312,328],safeappend:186,saga:353,sahadutha:357,saharan:388,sai:[22,36,57,71,132,162,174,181,188,202,359,363,367,391,403],said:[55,57,160,219,312,346,359,377,387,388,396],sain:145,saint:107,sale:[90,408],salienc:142,salienta:385,salim:317,salin:105,salmela:408,salt:[105,357],salti:105,salvatori:107,sam:[125,357,382,408],same:[14,24,25,28,29,57,68,71,85,89,98,105,110,111,115,118,119,127,129,132,133,135,139,141,144,147,148,154,159,168,169,170,173,175,176,188,193,203,219,221,224,226,259,276,293,295,302,306,313,314,317,324,325,326,327,333,334,335,336,343,354,355,357,359,365,366,371,374,380,382,384,386,390,396,397,399,403,404],same_r:379,samp:176,sampl:[24,33,35,38,57,94,98,109,133,147,176,203,214,274,293,325,326,327,340,341,347,408],sample_grammar:[361,365,370,374,390],sample_n:37,sampletoscreen_demo:340,sampson:176,san:388,san_sebasti:388,sandra:173,sandt:178,sao:[359,385],sara:36,sarrail:388,sat:[354,372,379,384],satdemo:183,satellit:[57,105,208],satisfact:183,satisfi:[57,89,122,132,139,145,173,183,227,359],saturdai:[313,359,396],saturn:107,savag:358,save:[64,109,111,173,193,213,217,224,293,312,353,361,363,365,369],save_analyz:194,save_classifi:193,save_fil:193,save_loc:217,savings_bank:[344,405],saw:[25,159,162,164,171,222,334,365,368,372,384,385,399],sax:313,saxon:107,saxutil:313,sayoud:198,sb:117,sbar:[359,365,387],sbird1:[109,328],sbj:[173,359,387,400],sbl:206,sbquo:189,sc:385,scalar:226,scalar_inc_dec:195,scale:[33,53,62,123,132,135,145,176,310,317,363,379],scan:154,scan_dragto:115,scan_mark:115,scanner:365,scannerrul:162,scarc:195,scaron:189,scatter:369,scc:403,scenario:375,scene:358,schema:[179,352],schemaloc:353,scheme:[57,77,314,408],schneider:[57,61],school:[98,369,408],schuler:[57,103],schuster:321,schutz:[53,143,357,380],scienc:[94,199,201,359],science_fict:[359,386],scikit:[38,40,355],scikitlearn:[30,40],scipi:[143,310,343,407],scipylib:407,scitaminea:381,scon:[57,71,367],scope:[28,180,194,363],score1:[317,321],score2:[317,321],score:[25,26,28,29,53,57,76,89,93,105,132,133,135,138,140,142,143,148,149,163,167,169,193,194,195,210,211,212,224,293,310,317,318,321,326,327,328,330,332,333,348,354,357,359,369,377,379,391,398,403,408],score_fn:[53,143],score_ngram:[53,357],score_val:195,scored_4gram:357,scored_synonym:[57,76],scorer:167,scott:141,scotti:359,scout:358,scratch:[52,158,359],screen:[57,62,152,340,341,359],screen_nam:341,screenanam:[57,100],screw:359,scribbl:359,scribe6:137,script:[57,71,83,116,117,131,182,301,312,313,330,332,367,391,403],scriptur:403,scroll:117,scrollbar:[115,117],scrollregion:117,scrollwatcherwidget:117,sd500:89,sdf123:402,sdf:402,sdot:189,se:[57,105,161,359,385],sea:[66,179,357],sean:[145,408],search:[24,28,34,39,57,71,76,89,102,104,105,109,114,126,129,167,168,174,199,213,218,221,276,293,320,326,327,332,333,340,341,343,346,358,359,361,367,374,403,408],search_demo:340,search_leav:[276,294],search_tweet:341,searchpath:129,seat:359,seattl:81,sebastian:408,sec:341,second:[23,25,33,34,41,52,57,67,71,76,82,98,109,125,126,127,132,179,181,187,188,189,190,221,226,303,307,331,338,352,359,361,363,370,374,378,408],second_on:399,secondari:[66,297,359],secret:[341,385],secript:[116,117,182],sect:189,section:[2,23,44,57,58,61,63,65,70,73,74,81,84,85,92,97,102,105,145,179,310,326,341,346,352,359,365,385,396,408],secur:[388,408],sed:312,see:[12,14,28,33,34,38,42,46,53,55,57,58,62,63,65,69,70,71,73,74,78,81,82,84,90,92,93,97,102,103,104,105,106,108,109,110,111,115,117,118,119,122,123,125,126,129,131,132,133,135,143,145,158,159,162,164,166,170,171,174,176,179,181,188,189,201,203,206,210,212,213,214,216,217,219,220,221,224,229,280,293,295,302,303,307,310,322,323,324,325,326,327,330,334,336,338,339,340,341,343,346,352,353,359,363,364,365,366,367,370,371,374,378,381,382,385,389,390,394,395,396,401,403,407,408],see_also:[57,71],seealso:[33,293],seed:[132,133,139,293,327,381],seek:[24,102,109,160,214],seekabl:109,seekableunicodestreamread:109,seen:[35,132,139,176,209,219,355,361,365,391,403],seg1:148,seg2:148,seg:309,segm:78,segment:[116,140,142,200,247,276,309,310,312,317,318,333,348,359,380,408],segment_fil:309,segment_s:309,seguint:385,segundo:385,sehe:373,sehen:373,sei3i:202,sei:385,seizur:66,seja:385,seldom:195,select:[2,8,13,32,38,57,59,64,71,79,82,87,98,105,106,111,115,117,152,154,159,162,164,168,175,176,189,203,214,228,276,294,303,333,339,340,341,346,352,357,359,366,381,384,387,403],select_anchor:115,select_clear:115,select_includ:115,select_set:115,selectbackground:115,selectborderwidth:115,selectdownloaddirmessag:111,selected_row:115,selectforeground:115,selection_anchor:115,selection_clear:115,selection_includ:115,selection_set:115,selectkbest:38,selector:[294,359],self:[14,31,33,44,52,57,77,78,97,102,105,106,109,110,111,115,117,118,122,123,127,129,130,132,139,157,159,167,176,181,183,184,187,188,202,210,219,220,296,327,330,333,334,355,359,361,366,375,388,403],selkirk:[299,307],seller:408,selrestr:359,sem2:390,sem4:363,sem:[0,57,91,109,118,122,123,124,126,127,128,351,352,361,363,364,365,366,370,371,374,378,382,385,388,389,390,410],sem_tag:91,semant:[16,17,28,57,68,71,91,103,109,123,145,177,178,179,180,183,185,191,194,228,295,347,350,359,361,365,366,367,369,406,408,410],semanticb:408,semcor:[57,403],semcor_:403,semcorcorpusread:[57,91],semcorsent:91,semcorwordview:91,semi:189,semi_rel2reldict:[189,388],semicolon:276,semin:211,semkei:[191,390],sempr:385,semrel:390,semrep:390,semtyp:[57,71,184,370],semtype_fil:[123,184],semtype_inherit:[57,71],semvalu:390,sen:[299,307],senat:359,send:[340,341,359],senna:[30,209,273,408],senna_main:39,senna_path:39,sennachunktagg:218,sennanertagg:218,sennatagg:218,sens:[30,33,57,71,79,87,92,105,122,132,141,145,169,178,196,210,321,344,359,363,367,387,403,408],sensat:394,sense_kei:[57,105],senseidx5wn:[57,105],sensenumb:[79,87],sensev:[57,408],sensevalcorpusread:[57,92],sensevalcorpusview:92,sensevalinst:[92,359],sensibl:[224,322,323,324,325,326],sensit:[143,209,210,293,403],sent:[11,14,39,55,57,58,59,60,62,63,64,65,67,68,70,71,73,74,75,77,78,84,85,86,88,89,91,97,98,107,132,134,157,159,162,164,168,169,170,171,174,175,176,194,209,210,213,215,218,219,220,221,301,304,309,332,343,349,350,353,359,365,369,376,377,384,385,386,387,390,391,393,405],sent_detector:302,sent_end_char:302,sent_index:178,sent_path:77,sent_segm:78,sent_splitt:[57,63],sent_start:302,sent_tim:[57,98,359],sent_token:[57,58,62,65,67,70,85,97,101,174,194,295,312,359,385,391],sent_typ:[57,98],sentbreak:302,sentenc:[2,14,18,25,26,28,29,30,36,39,57,58,59,60,61,62,63,64,65,67,68,71,75,77,78,79,85,87,88,89,91,94,97,98,101,119,123,132,133,134,135,136,152,156,157,158,159,160,161,162,164,165,166,168,170,171,172,173,174,175,176,178,179,180,183,184,185,189,191,194,195,209,210,211,213,214,215,217,218,219,220,221,222,224,226,227,228,295,299,301,302,304,307,308,309,310,311,312,313,316,317,318,319,321,322,323,324,325,326,327,328,330,331,332,333,334,335,344,348,350,352,353,354,359,363,365,366,367,371,373,374,376,377,384,386,387,390,391,395,396,398,400,405,406,408],sentence1:384,sentence2:384,sentence_aligned_corpu:[322,323,324,325,326,327],sentence_bleu:[317,321,348],sentence_chrf:318,sentence_gleu:321,sentence_id:[57,86],sentence_length:333,sentence_nist:330,sentence_pair:[322,323,326,327],sentence_polar:[57,62],sentence_read:123,sentence_rib:332,sentencecount:[57,71],sentences_from_text:302,sentences_from_text_legaci:302,sentences_from_token:302,senti_synset:[57,93,392],sentid:[57,98,359],sentido:385,sentim_analyz:391,sentiment:[0,62,89,93,346,347,359,408],sentiment_analyz:192,sentiment_val:195,sentimentanalyz:[193,194,391],sentimentintensityanalyz:[195,391],sentisynset:[57,93],sentitext:[195,408],sentiwordnet:[57,347,408],sentiwordnetcorpusread:[57,93],sentno:[57,71],sentnum:[79,87,387],sents_mod:[57,74,78],sentstart_thresh:302,sep:[29,57,58,97,222,313,359],separ:[16,30,42,57,58,59,60,68,71,78,85,89,91,97,102,104,106,109,119,129,161,172,194,220,221,222,294,300,302,303,305,306,307,310,312,313,334,339,341,343,359,391,396,408],separate_baseline_data:224,septemb:[396,408],seq:[48,51,149,212,214,224,332,399],sequenc:[25,28,33,46,50,51,52,53,79,87,102,105,115,116,117,118,119,123,129,132,133,134,135,136,139,141,142,145,148,149,157,159,164,169,175,176,179,209,210,211,214,219,220,221,226,247,253,293,294,297,300,302,303,305,307,313,314,316,317,321,332,334,335,343,366,385,390,396,398],sequence1:52,sequence2:52,sequencewidget:117,sequenti:[27,122,209,214,217,314,326,328],sequentialbackofftagg:219,ser:[172,220,385],serbian:359,serem:385,sergei:71,seri:[57,68,214,219,327,354,408],seria:385,serial:[102,109,180,189,224,361],serialis:100,serializ:130,serialize_output:224,serugu:385,serv:[10,57,71,92,160,169,181,187,188,219,338,341,359,367],server:[10,109,160,343,408],server_index_url:111,servic:[151,359,403],session:[25,129,341,359,398],sessionapi:341,set2rel:[183,390],set:[2,13,14,16,25,28,30,31,32,33,34,36,38,46,47,48,49,50,51,52,57,58,59,63,64,67,68,71,76,77,79,82,85,86,87,97,98,99,100,101,105,107,111,114,115,116,117,118,119,122,123,125,126,127,129,132,141,143,145,146,147,148,153,156,159,161,162,164,166,167,168,169,170,171,173,175,176,177,178,179,181,183,187,188,189,193,194,200,201,202,206,211,212,213,214,220,221,227,228,254,264,276,292,294,297,299,302,304,305,314,316,317,320,322,323,324,325,326,327,331,334,335,341,343,346,351,352,354,355,357,358,359,362,364,365,367,369,374,379,380,384,385,390,391,393,396,398,403,404,408],set_bin_dir:178,set_callback:117,set_cfg_callback:113,set_chart:159,set_child:117,set_discount:176,set_grammar:[159,170,171],set_height:117,set_label:[116,334,399],set_logprob:[176,334],set_model_fil:213,set_prob:[176,334],set_proxi:[343,346],set_strategi:159,set_symbol:117,set_text:117,set_uniform_prob:[322,323,324,325,326,327],set_weight:33,set_width:117,setdefault:[52,118,176,366],sethold:125,setlocal:343,setp:171,settl:359,setup:[179,230,254,264,292,340,377,390],setup_modul:[231,232,234,235,236,237,238,239,240,275,353,355,363,369,371,374,382,385,386],setupclass:[246,254],setuptool:[230,407],seu:385,seven:[57,82,107],sever:[29,57,71,86,111,155,156,160,198,199,203,209,297,303,317,319,334,343,355,359,360,365,367,384,399,403,408],sex:[57,98,353,359],sexpr:[57,61,295,359],sexpr_token:305,sexprtoken:305,sfm_file:314,sg:[57,105,179,351,352,365,384,390],sh:[57,66,105],shackelford:145,shackleford:145,shadow:129,shah:321,shake:359,shakespear:[218,408],shall:358,shallow:[25,52,57,65,118,183,194,214,334,366],shan:195,shane:401,shanghai:352,shant:195,shape:[27,57,105,117,161,328,343,369],shapeabl:116,share:[39,57,68,85,98,101,105,109,111,159,164,169,201,214,218,332,334,346,359,365,374,396,403,404],sharealik:94,sharm:[313,396],sharp:359,she:[57,66,105,160,349,351,353,396],shed:357,sheikh:[313,396],shelf:[209,374,408],shell:25,sheppard:359,shift:[1,8,60,115,117,148,171,173,310,312],shift_cost_coeff:148,shiftreduc:156,shiftreducepars:[156,171,384,408],shin:145,shine:[145,380],shipyard:388,shit:[195,391],shlep:403,shoebox:408,shortcut:[7,8,57,81,111,359],shorten:72,shorter:[52,168,317,333],shortest:[52,57,105,119,317,331,403],shortest_path_dist:[105,403],shortid:[57,103,359],shot:[166,362],should:[14,25,27,28,29,30,33,34,35,37,38,39,41,43,48,49,50,51,52,55,57,58,59,60,62,63,64,65,67,68,69,70,71,73,74,77,78,79,80,81,82,83,84,85,87,88,89,91,92,96,97,98,102,103,105,106,107,109,111,114,115,116,117,118,119,122,123,126,127,129,132,133,137,141,143,149,159,160,162,164,166,168,170,171,174,175,176,178,179,181,183,184,187,188,197,199,200,203,204,205,206,211,213,214,215,219,221,224,226,228,259,293,294,297,300,302,303,305,306,307,309,310,311,316,317,318,321,322,323,324,326,327,333,334,335,338,339,340,341,342,343,346,350,351,355,357,358,359,361,365,366,373,374,375,376,377,380,386,396,397,399,402,403,405],shouldn:[109,195,377],shouldnt:195,show:[7,8,33,36,51,57,71,117,123,144,169,176,198,199,200,224,293,317,318,321,340,341,346,358,361,363,367,369,374,384,387,388,396],show_cfg:[109,352,365],show_collect:111,show_column:115,show_exampl:184,show_most_informative_featur:[33,35,355],show_packag:111,show_perc:144,show_thread_read:[123,363],show_tre:174,shower:359,shown:[7,8,14,66,111,143,206,302,334,363,365,367,374,388,390],showtext:117,shrubberi:358,shuffl:147,shut:[57,71,367],shutdown:10,shy:189,si1466:359,si2096:359,si2:202,si836:359,si:214,sian:352,sibl:[334,336,399],sicenc:[67,81,89],sid:[123,391],side:[7,8,14,117,119,159,164,168,170,175,336,359,365],sidereal_dai:404,sie:373,siegel:141,siehst:373,sieht:[365,373],siez:394,sig:378,sigefyrth:107,sigeweard:107,sight:359,sigir:[67,146,202],sigkdd:[81,89],siglex:92,sigma:[33,176,189,221],sigma_exp:142,sigma_skip:142,sigma_sub:142,sigmaf:189,sign:[117,118,385,406],signal:[30,90,109,310,391],signatur:[77,78,106,110,115,118,125,178,181,188,219,334,344,359,378,388],signifi:188,signific:[33,43,147,221,357,363,385],significantli:[29,46,48,52],significativament:385,sihan_post_process:309,sil:408,silb:[299,307],silent:10,silli:[57,62,359],sim:189,similar:[46,49,57,63,71,76,104,105,117,141,142,145,169,173,221,293,302,306,310,317,333,357,359,361,367,369,384,385,390,396,405,408],similar_to:105,similar_word:293,similarity_method:[310,396],similarli:[132,134,200,214,357,359,388],simionato:110,simmer:[57,71,367],simp:385,simpl:[7,8,18,37,48,49,57,59,60,70,91,97,102,111,117,118,123,130,131,132,134,137,145,156,159,170,171,176,193,214,221,257,259,276,293,295,311,336,341,347,355,359,363,365,366,369,388,391,395,396,408],simple_block_read:[102,359],simple_dress:359,simplegoodtur:176,simplegoodturingprobdist:[176,386],simpler:[25,28,117,295,403],simplest:[57,71,141,359,367],simpli:[25,33,36,60,78,91,106,111,116,119,124,132,133,141,160,164,168,176,194,209,214,312,318,321,322,323,336,359,361,366,398],simplic:[132,322,326,408],simplifi:[57,60,122,126,181,184,187,188,340,359,363,370,371,385,408],simplify_po:27,simplify_tag:[57,74,385],simplist:[57,62,212,359],simul:[361,384],simulate_root:[57,105,403],simultan:25,sin:310,sinai:[313,396],sinc:[24,37,38,52,57,62,85,119,159,161,164,179,187,188,203,294,299,334,336,340,345,352,358,359,370,374,386,387,398,403,408],sincer:403,sing:366,singl:[2,8,25,28,29,31,32,33,42,55,57,59,62,68,71,79,82,84,87,97,98,102,105,109,111,115,116,117,118,119,123,132,136,145,156,159,164,170,171,174,175,176,178,188,189,193,194,211,214,215,221,224,228,293,294,299,300,304,307,312,314,317,318,321,328,330,334,336,343,359,365,366,367,387,394,399,401,404,408],single_meteor_scor:328,singleedgefundamentalrul:[159,162,164],singleedgeprobabilisticfundamentalrul:168,singleton:[46,49,390],singular:[46,48,51,198,199,366,399,400],sinica:[57,94,334,359,408],sinica_pars:334,sinica_treebank:[57,359],sinicacorpu:94,sinicatreebankcorpusread:[57,94],sink:214,sinta:385,sion:202,sir:[300,396],siria:388,sis2:202,sister:[276,294],site:[57,103,385,408],situada:385,situat:[38,57,71,367,408],sitzungsperiod:398,six:[201,385,399],size:[33,39,52,102,109,111,115,117,129,132,139,148,159,168,181,182,219,310,333,335,341,358,361,363,369,374,394,395],size_canva:181,size_nam:359,sizehint:109,sj:214,sji:[57,101],sk:[57,104],skardal:408,skikda:199,skillet:[57,71,367],skip:[57,58,63,65,70,73,74,81,84,85,92,97,101,102,127,145,194,301,318,343,381],skip_ambiguous_tweet:194,skip_bnew:27,skip_fil:[0,57,209,223,315],skip_head:194,skip_instal:111,skip_retweet:194,skip_tongue_tweet:194,skipgram:[343,408],skipintersect:228,sklearn:[38,355,369],sklearnclassifi:[38,355],sko:179,skolem:[127,177,188,363],skolem_funct:188,sl:[57,104],slang:[391,403],slap:403,slash:[29,60,78,91,106,109,118,311,366,384],slashfeatur:118,sleep:[57,71,351,367,378],sleeper:107,sleet:359,slept:368,slf4j:309,slice:[52,129,295,313,358,399],slice_bound:129,slice_obj:129,slide:[359,380,408],slight:343,slightli:[179,195,200,321,352],slip:[57,71,367,408],slocum:396,slope:[176,405],slot:[326,405],slovak:[57,104,359],slovenian:[57,104,359],slow:[93,224,340,391,392,398],slowli:[93,392],slu:[42,69],slv:403,smain:400,small:[105,119,122,132,143,224,316,322,323,324,325,326,330,336,359,365,367,381,386,391,403,404,408],smaller:[25,29,57,100,102,154,317,359,403],smallest:154,smart:[57,62,359,391],smell_emiss:359,smith:[145,302,321,408],smooth:[57,105,132,133,135,176,214,310,317,330,334,336,348,377,386,396,401,403,408],smoothednr:176,smoothing_cl:135,smoothing_funct:[317,348],smoothing_method:[310,396],smoothing_round:310,smoothing_width:310,smoothingfunct:[317,348],smoothli:[57,67,359],smt:[137,301,313],sn:374,sne:369,snlp:380,snooz:359,snow:359,snowbal:[196,200,274,408],snowballstem:206,snowballstemm:[206,394],snowballtest:274,snyder:359,so:[25,28,33,35,57,62,71,77,97,102,105,115,130,132,133,134,139,141,154,159,169,170,171,176,188,194,195,200,203,211,227,294,306,310,313,316,317,321,326,327,333,335,340,343,355,358,359,363,366,369,370,374,381,385,387,388,391,396,398,403,405,408],sobr:385,social:[195,404],societi:200,sociolinguist:142,socrat:[125,374,382,389],soda:160,sof:388,soft_delimit:319,softwar:[148,220,353,408],soi:359,sol:385,sold:90,sole:302,solemnli:391,soliloqui:107,sollten:316,solomon:107,solta:385,solu:385,solut:221,solv:[33,154,304],somalia:388,sombr:358,sombra:385,some:[25,33,36,37,53,55,57,61,71,77,89,98,105,106,111,115,117,118,119,123,124,126,129,131,132,133,134,135,136,142,159,167,169,173,176,179,181,185,188,195,203,209,210,211,212,214,226,228,294,297,312,313,317,321,322,323,324,325,326,334,338,340,341,350,354,357,358,359,361,363,365,367,369,370,374,375,380,384,385,387,388,389,393,395,396,398,399,401,403,404,405,408],some_corpu:[57,59],some_corpus_file_id:359,some_data:359,some_fileid:102,some_label:33,somebodi:403,somecorpusread:359,somehow:132,sometest:360,someth:[57,98,110,132,135,359,367,370,388,403],sometim:[28,30,57,71,118,119,129,145,156,302,334,357,359,366,367,396],somewhat:[195,294],song:359,sonor:307,sonority_hiearchi:307,sonority_hierarchi:307,sonority_sequenc:295,soon:[116,117,182],sophist:[327,374],sormovo:388,soron:307,sorri:[353,395,396],sorria:385,sort:[14,57,71,81,115,116,118,119,132,134,139,144,150,157,159,168,170,171,175,193,194,195,227,311,314,320,328,331,334,343,353,354,355,357,359,361,362,364,365,366,367,370,371,377,378,384,385,386,388,389,390,391,403],sort_bi:115,sort_by_count:[144,380],sort_field:314,sort_kei:[57,100],sort_queu:168,sorta:195,sorted_data:381,sorted_set:390,sortof:195,sound:[57,71,98,299,307,359],sound_emiss:359,sound_exist:359,sound_mak:[57,71],sound_mov:[57,71],sound_sourc:[57,71],sourc:[0,2,3,4,5,6,7,8,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,113,114,115,116,117,118,119,120,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,141,142,143,144,145,146,147,148,149,151,152,153,154,155,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,178,179,180,181,182,183,184,185,186,187,188,189,190,191,193,194,195,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,224,225,226,227,228,229,230,231,232,234,235,236,237,238,239,240,245,246,247,248,250,251,253,254,257,259,261,264,265,266,267,268,269,270,273,274,275,276,280,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,338,339,340,341,342,343,344,352,359,363,367,374,385,396,398,406],source_block:319,source_group:82,source_of_legal_author:367,source_s:319,source_sents_len:319,source_tag:216,source_tagset:[29,65],source_word_class:[325,326],sourceforg:[53,408],south:98,southern:[98,388],soviet:388,sp:388,spa:403,space:[7,8,46,51,52,57,60,64,74,102,113,116,117,119,160,167,176,183,214,224,303,306,312,314,326,327,333,343,354,361,369,378,385,397],spacer:[123,124,126],spacetoken:306,spacewidget:117,spade:189,spain:352,spam:[355,362],span1:169,span2:169,span:[14,68,105,159,162,164,167,168,169,175,293,295,302,307,312,313,333,343,357,359,387,403],span_token:[295,296,302,303,306,312,313],span_tokenize_s:296,spanisch:333,spanish:[57,64,98,104,206,333,388,394,408],spanishstemm:206,spank:359,spans_to_rel:313,sparciti:336,spars:[33,38,355],sparsearrai:[46,48],spass:374,spatial_configur:359,spawn:129,speak:366,speaker:[57,64,96,98,353,359],speakerinfo:[98,359],spear:358,spearker:96,spearman:[140,332,357,408],spearman_correl:[149,357,380],spearman_rho:332,spec:[57,106,227,370],special:[16,57,58,60,63,64,71,74,78,91,96,97,107,115,117,118,131,132,139,156,164,185,188,206,214,226,305,343,354,359,365,366,385],special_case_idiom:195,specialis:359,speciallist:71,specif:[39,44,57,59,62,64,69,71,77,78,85,89,91,104,105,106,108,109,124,126,133,143,159,169,193,206,210,214,221,224,226,227,228,304,319,327,359,363,367,394,396,397,403,407],specifi:[10,13,14,28,29,33,34,39,44,51,57,58,59,60,61,62,63,64,65,67,68,69,70,71,73,74,75,76,77,78,79,80,81,82,83,84,85,87,88,89,91,92,93,95,96,97,98,100,101,102,103,104,105,106,107,109,111,115,116,117,118,119,129,132,134,144,145,148,153,155,158,159,160,161,162,164,168,169,170,173,174,175,176,189,193,194,200,206,209,211,213,218,219,220,221,226,227,228,293,294,295,296,297,305,306,309,314,316,322,323,324,325,326,327,331,334,335,336,340,343,344,346,353,359,361,363,364,365,366,371,374,382,385,388,390,396,399,403,408],speech:[1,57,66,68,71,74,77,91,97,105,153,162,176,196,208,209,210,214,217,219,221,299,320,325,334,336,344,359,367,403,408],speed:[39,49,57,71,221,367,377,408],speicherbeh:200,speicherbehalt:200,spell:20,spent:396,sphere:98,spice:391,spiller:408,spit:359,spite:300,spkrid:[57,98,359],spkrinfo:[57,98,359],spkrutteranceid:[57,98],split:[14,25,28,36,39,57,65,68,79,85,87,97,98,102,132,141,144,154,160,161,166,167,169,172,188,190,194,205,215,217,218,219,220,295,296,300,302,303,306,309,312,313,317,318,319,320,321,332,343,348,350,351,352,354,359,363,364,365,369,371,377,380,384,385,387,388,391,393,396,400,405],split_at:319,split_non_ascii:301,split_rul:354,split_train_test:194,split_valu:319,splitrul:[25,28,354],splitter:396,spn:98,spoke:388,spoken:[98,214],sponsor:408,sponsorship:408,sport:[30,36,355],sports_sent:36,spot:359,spous:365,sprai:359,sprinkl:359,spx:160,spy:403,sq:143,sql0:352,sql:[179,189],sql_demo:179,sql_queri:[179,352],sqlite:82,squar:[29,38,118,132,134,143,176,357,366],src_class:[325,326],src_phrase:316,src_phrase_span:333,src_sentenc:[322,323,327,333],srclen:[320,331],srctext:[320,331],srd:145,sre_pattern:189,srl:[57,68],srl_includes_roleset:[57,68],srl_instanc:[57,68],srl_span:[57,68],srparser:1,srparser_app:1,ss0:202,ss15:137,ss2of:[57,105],ss:[57,105,298,310,312,391,405],ss_type:[57,105],ssen4:202,ssl:341,ssp:[307,408],st:[57,71,107,202,204,205,206,214,220,313,396],st_custom:202,st_pre:202,sta:385,stabilis:[46,50],stabl:[118,334],stack:[8,117,171,173,292,333,408],stack_decod:315,stack_siz:333,stackdecod:333,stackoverflow:71,stackwidget:117,stage:[28,159,354],stai:[132,139,331],stale:111,stalemessag:111,stalk:359,stand:[108,118,132,139,178,179,366],standard:[14,25,26,28,29,31,36,51,52,53,57,79,84,87,90,98,106,107,122,127,129,132,134,156,158,159,160,173,176,183,196,200,210,211,306,312,314,329,336,343,359,360,365,387,398,399],standardformat:314,standardli:388,stanford:[53,89,145,156,209,295,309,408],stanford_model:[220,309],stanford_segment:[295,408],stanfordcorenlp:172,stanforddependencypars:172,stanfordnertagg:220,stanfordneuraldependencypars:172,stanfordpars:172,stanfordpostagg:220,stanfordsegment:[309,408],stanfordtagg:220,stanfordtoken:308,star:[62,185],start1:228,start2:[217,228],start:[7,10,14,16,25,28,33,46,48,49,50,52,57,58,61,63,65,68,70,71,73,74,79,81,84,87,89,92,97,98,102,105,109,111,118,119,127,129,132,145,159,160,161,162,164,165,168,169,170,171,175,180,188,202,214,215,217,226,228,302,303,310,312,313,317,326,327,331,333,334,335,343,346,352,359,361,363,365,368,372,374,384,385,387,390,396,397,399,407,408],start_i:[296,303,306],start_index:[52,169],start_posit:129,start_r:102,start_symbol:175,start_t:359,start_tok:102,start_toknum:102,startcollectionmessag:111,startdownloadmessag:111,starter:302,starting_quot:[298,312],startpackagemessag:111,startpo:[57,58,63,65,70,73,74,81,84,92,97,102],startunzipmessag:111,startup:131,stat:[57,89,141,212,224,359],state:[7,28,90,95,115,119,145,164,169,170,171,213,214,293,298,311,326,359,361,365,366,368,370,374,382,384,394,408],state_j:214,state_unio:359,state_union:359,statefulli:294,stateless:109,statement:[23,32,60,127,215,352,359,374,408],statis:221,statist:[29,69,145,146,147,148,169,211,212,221,224,302,311,321,322,323,324,325,326,327,330,333,385,398],statmt:318,statu:[57,71,91,111,341,367],status:341,status_cod:341,stderr:[111,129,160],stdin:[129,226,227,228,343,403],stdlib:129,stdoffset:338,stdout:[34,41,129,160,161,176,341,358,384],stdout_redirect:254,steak:375,steal:359,steam:[57,71,367],steedman:351,stem1:199,stem:[0,57,60,64,105,146,274,328,347,349,353,403,406],stem_match:328,stem_unicod:206,stemmer2:394,stemmer:[196,198,199,200,201,202,203,204,205,206,274,328,346,385,408],stemmeri:[196,197,198,199,200,201,202,203,204,205,206,328],sten:200,step:[2,7,8,38,46,48,57,65,119,129,132,145,155,159,170,171,193,201,206,224,312,320,322,323,324,325,326,384],stephan:[304,321],stephen:[322,323,324,325,326,327],steppingchartpars:159,steppingrecursivedescentpars:170,steppingshiftreducepars:171,steven:[0,321,365,370,406,408,410],sthe:388,still:[109,115,119,161,167,221,293,317,358,359,363,397,406,408],stimulu:[57,71],stimulus_subject:359,stirl:143,stk_0_form_on:173,stk_0_lemma_on:173,stk_0_pos_in:173,stk_0_pos_top:173,stk_1_pos_nn:173,stock:[362,405],stok:385,stoni:[160,220],stop:[37,52,129,160,176,194,201,212,213,338,341,359,364],stopiter:356,stopword:[37,206,293,310,357,359,394],storag:[180,316],store:[30,43,57,59,71,84,102,107,108,109,122,123,125,127,132,134,139,146,158,164,173,174,176,179,180,189,193,194,294,300,302,316,323,324,325,326,327,338,341,352,358,359,361,363,374,388,390,399,408],store_log:176,stori:[160,358,359],storm:359,stp:161,str1:142,str2:142,str2chunktre:[57,65],str2tupl:[57,97,222],str:[14,23,28,29,31,33,34,41,52,55,57,58,59,60,62,63,64,65,67,68,69,70,71,73,74,75,77,78,80,81,82,83,84,85,88,89,90,91,92,96,97,100,102,103,105,106,107,108,109,110,114,115,117,119,122,123,129,130,132,133,134,135,136,139,142,145,146,148,153,157,158,159,160,161,162,166,167,168,169,170,171,172,173,174,175,176,178,179,183,184,187,188,189,191,193,197,199,200,204,205,206,208,209,210,211,212,213,215,217,218,219,220,221,222,224,227,276,293,294,295,296,297,298,299,300,301,303,304,305,306,307,309,310,311,312,313,314,316,317,318,320,321,322,323,324,325,326,327,328,330,331,332,333,334,336,339,341,342,343,344,351,354,359,361,363,364,369,370,371,378,395,399,403],straight:[343,359],straightforward:367,strang:105,strangl:397,strategi:[2,28,159,162,164,168,194,384,403],stream:[34,41,57,58,59,60,63,65,66,70,73,74,75,77,78,84,89,91,92,97,98,102,106,109,111,132,136,174,176,188,319,334,340,341,343],streambackedcorpusview:[57,58,62,63,65,67,70,73,74,81,84,85,88,89,92,97,100,102,106,109,359],streamer:341,streamlin:408,streamread:[109,359],streamtofile_demo:340,street:[86,145,359,365],strength:[57,89,195,406],stress:66,stretch:403,strict:[29,57,99,109,305,314],strictli:[118,357,366,388,395],strida:359,string1:388,string2:388,string3:388,string:[13,14,16,23,25,28,29,30,32,33,38,44,51,52,57,58,59,60,63,64,65,67,68,69,70,71,73,74,76,77,78,79,80,81,82,83,84,85,87,89,90,91,92,93,95,96,97,98,100,102,103,105,106,107,109,111,113,115,117,118,119,122,126,129,132,134,135,139,141,142,144,145,159,160,161,166,169,170,171,172,174,176,177,178,179,180,181,183,184,188,189,193,195,201,202,204,206,207,209,210,211,219,221,222,224,225,227,276,293,294,295,296,297,298,299,300,301,302,303,304,305,306,310,312,313,314,316,318,331,332,333,334,335,336,340,341,343,352,354,355,358,359,361,363,365,367,372,374,375,376,378,380,385,388,390,391,395,396,397,399,402],string_categori:57,string_span_token:313,stringcategorycorpusread:[57,95],stringio:[359,361],stringtoken:[296,306],strip:[25,28,29,57,60,64,82,99,102,105,107,109,194,197,199,203,204,205,206,210,302,306,314,354,359,385,387],strip_emr:200,strip_eol_hyphen:301,strip_esn:200,strip_g:200,strip_handl:[297,396],strip_nd:200,strip_off_emoticon:194,strip_prefix_flag:202,strip_rul:354,strip_skip:301,strip_spac:[57,60,64],strip_t:200,striprul:[25,28,354],strong:[213,299],strongbacktrack:213,strongest:89,strongli:[118,203,358,366,403,407],struct:366,structur:[2,7,8,14,24,25,26,28,29,32,57,60,61,62,68,71,75,79,87,90,91,102,109,117,119,120,123,156,157,159,160,161,162,164,168,169,170,171,177,178,181,183,185,189,299,307,314,333,334,336,343,351,353,354,359,367,374,385,387,388,399,406,408],struggl:359,strunk:302,stuart:408,stuck:[326,327],student:[117,143,357,363,365,406,408],student_t:[143,357,380],studi:[148,160,220,299,307],stump:32,stupid_quotes_1:311,stupid_quotes_2:311,stupidbackoff:[132,135],sturgeon:359,style:[29,53,57,68,109,126,152,161,334,365],stylist:152,stype:[57,71],su:[202,367],sua:385,sub:[13,28,156,163,189,210,321,353,362,388],subcat:365,subcategor:373,subcategori:[13,359],subclass:[25,28,31,52,57,59,60,71,78,79,85,87,91,103,106,109,116,117,118,119,122,129,130,157,159,168,174,176,178,206,210,219,220,226,228,296,303,334,338,359,365,375,394,399],subcorpora:[57,71],subcorpu:[57,71],subdir:[111,341,342],subdirectori:[57,72,98,108,111,342,359],sube:189,subel:314,subev:[57,71,367],subexpress:188,subf:[57,71],subfenam:[57,71],subfold:346,subfram:[57,71,367],subframenam:[57,71],subgraph:403,subid:[57,71],subitament:385,subiter:319,subj:[57,62,179,184,189,353,359,363,364,365,366,370,385,391],subj_doc:391,subjclass:[189,388],subject:[57,62,89,158,169,179,189,194,350,370,388,391],subjsym:[189,388],subjtext:[189,388],sublist:[52,228],submodul:117,subordin:[57,71,367],subpart:117,subprocess:[129,161,384],subscr:16,subscript:[16,52],subsequ:[52,118,141,159,162,179,189,214,299,333,334,352,365,391],subset:[57,104,117,183,194,206,224,228,359,407,408],subseteq:117,subsitut:200,subst:[57,78,349],substance_emiss:359,substance_holonym:105,substance_meronym:105,substanti:[195,326,408],substituit:301,substitut:[13,15,28,60,109,127,142,145,170,188,294,312,350,351],substitute_bind:[118,127,188,366],substitutebindingsi:[118,164,188],substitution_cost:145,substitutionruleset:350,substr:[28,78,102,169,204,219,295,296,303,305,306,334,343,359,396],subsum:[57,105,118,127,167,403],subtask:388,subterm:181,subtoken:[170,214,228],subtop:[310,396],subtract:167,subtre:[25,28,116,119,159,170,175,334,336,343,388,399,401],subtyp:[57,71,181,188,334,367],subvers:[111,178],succ:378,succe:[334,359,374],success:[122,170,171,180,313,325,326,343,354],successful_encod:343,successfulli:[127,155,170,171,407],sudo:[346,407],sudoh:332,sue:390,suf1:201,suf32:201,suff:[198,199],suffer:359,suffici:[89,111,340,357,374],suffix:[15,143,179,194,198,199,200,201,203,206,207,219,274],suffix_noun_step1a_success:206,suffix_noun_step2a_success:206,suffix_noun_step2b_success:206,suffix_noun_step2c2_success:206,suffix_replac:207,suffix_verb_step2a_success:206,suffix_verb_step2b_success:206,suffixe_noun_step1b_success:206,suffixes_verb_step1_success:206,suffoc:359,suficient:385,sufix:201,suggest:[89,119,317,408],suit:[174,230,302,386,403,406],suitabl:[115,149,161,194,313,327,333,343,374,406],sultan:[57,104],sum1:33,sum2:33,sum:[33,35,57,71,82,89,119,132,135,142,172,176,189,211,214,317,321,322,323,332,359,377],sum_:214,sum_log:176,sum_s0:214,sum_t:214,sum_to_on:176,summar:[57,62,71,81,89,324,325,326],summari:[168,175],summat:214,summer:[396,408],summerofcod:408,sun:22,sunter:145,suntsu:18,suntsu_chat:22,suo3:202,sup1:189,sup2:189,sup3:189,sup:189,supe:189,superclass:359,supercon:400,superconductor:400,superf:[57,71],superfenam:[57,71],superflu:371,superfram:[57,71],superframenam:[57,71],superior:396,superl:67,superset:226,superst:[57,71],supertyp:[57,71],supervis:[46,214,302],supid:[57,71],supp:367,supplement:107,supplementari:[210,251],supplementary_ideographic_plan:313,suppli:[33,52,57,84,92,115,118,129,141,176,219,297,307,359,366,367,384,387,405],support:[10,25,28,33,38,39,40,57,68,69,71,74,77,105,109,110,117,118,130,141,145,172,173,174,177,206,215,293,294,295,318,338,355,359,360,361,363,366,369,394,395,396,399,408],support_cutoff:[32,355],supported_oper:39,suppos:[24,363,374],sur:396,sure:[24,28,57,59,122,132,134,295,316,329,346,354,359,366,395,396,398],surfac:[57,75],surround:[28,293,343],survei:[141,145,304],surviv:[358,363],susan:[57,83,145,299,307],suspect:[359,367],suspicion:367,sux:391,suzi:390,sv:[57,104],svar:16,svc:355,svd:[46,48,49,50,51],svd_dimens:[48,49,50,51],svg:[161,335],svm:[30,38,355],svmclassifi:40,svn:[359,366,399],svn_revis:111,sw:[146,383],swadesh:[57,83,408],swadeshcorpusread:[57,104],swallow:[160,215,218,220,319],swap:[154,327],swarm:359,swat:359,swbp:179,swe:403,sweden:410,swedish:[57,104,206,394],swedishstemm:206,swelter:359,swift:340,switchboard:[57,359,408],switchboardcorpusread:[57,96],switchboardturn:96,swn:[93,392],sx116:359,sx1:396,sx206:[98,359],sx26:359,sx296:359,sx2:396,sx:[15,350],sy:[161,341,343,384],sydnei:410,syllab:299,syllabif:[299,307],syllabifi:[299,307],syllabl:[299,307],syllable_list:[299,307],syllabletoken:307,symbol:[7,28,57,58,60,62,64,65,67,77,80,81,84,85,88,89,91,97,100,104,105,106,117,119,132,159,164,170,175,177,179,181,183,188,189,214,301,311,312,341,343,352,363,377,384,386,390],symbol_regex:301,symbolsheet:117,symbolwidget:117,symlink:359,symmetr:[179,320,321,352,365,403],symmetrisat:320,synchron:[142,359],synonym:[57,76,104,105,328,403],synrep:390,synrestr:359,syns:[57,105],synset1:[57,105,403],synset2:[57,105,403],synset:[57,93,105,343,344,399,404,405,408],synset_from_pos_and_offset:[57,105,403],synset_from_sense_kei:[57,105],syntact:[2,26,57,59,105,107,119,156,159,160,191,359,367,369,384,385],syntactic_mark:105,syntax:[2,57,89,103,157,191,211,294,334,354,359,366],syntaxcorpusread:[57,59,61,70,75,94],syntre:[191,390],syrian:359,system:[34,39,109,111,117,129,179,214,215,228,317,320,321,330,332,343,344,346,352,359,360,408],systemat:[317,348],szlig:189,t0:354,t10:354,t1:354,t2:[141,354],t3:[141,354],t4:354,t5:354,t61:[57,101],t6:354,t7:354,t8:354,t9:354,t:[29,33,57,66,71,89,90,102,107,109,111,116,119,125,129,131,132,141,142,143,144,145,148,158,161,169,172,175,176,179,183,188,194,195,200,202,209,212,214,224,229,276,295,298,306,307,310,311,312,313,317,318,321,322,323,324,325,326,330,331,332,334,335,350,351,352,354,355,357,360,362,363,365,366,369,374,375,377,378,380,381,382,384,385,386,390,391,393,395,396,397,398,399,403,406],t_i:221,t_t:221,ta2:202,ta:[57,104,298,312],tab:[10,57,102,105,111,161,303,306,311,362,397,408],tab_fil:[57,105],tabbi:[160,172],tabl:[24,33,36,57,68,98,105,111,112,118,143,145,173,175,179,219,310,322,323,324,325,326,327,333,352,366,384],tableau:121,tableau_test:128,tableauprov:[128,364,374],tableauprovercommand:[128,374],tablet:[325,326,327],tabtoken:[57,70,306],tabul:[176,332,408],tabular:374,tac:334,tacilp4i:202,tacohn:46,tadm:[30,33,355],tadmeventmaxentfeatureencod:[33,41],tadmmaxentclassifi:33,taft:[257,359],tag1:[57,97],tag2:[57,97],tag3:[57,97],tag:[0,26,27,28,29,39,52,57,59,60,61,63,64,65,68,69,70,71,73,74,75,77,78,80,82,83,84,87,89,91,92,96,98,103,105,106,107,117,119,129,130,158,160,161,166,167,169,172,174,176,189,208,223,224,225,226,227,228,273,334,336,347,353,354,357,362,363,365,367,371,376,386,388,397,405,406,408],tag_1:214,tag_:[57,61],tag_mapping_funct:[73,97],tag_n:[77,214],tag_on:219,tag_pattern2re_pattern:[25,28,354],tag_pattern:[28,354],tag_sent:[39,160,210,213,215,218,220],tag_set:386,tagdata:221,tagfil:359,tagged_:[57,59,60,61,63,64,68,69,70,71,74,80,82,83,91,96,103,105,106,107,359],tagged_chunk:[57,91,359],tagged_data:224,tagged_discours:[57,96],tagged_para:[55,57,61,63,65,74,77,84,97,107,359],tagged_pars:[158,172],tagged_parse_s:172,tagged_post:[57,80,359],tagged_s:[55,57,59,60,61,63,64,65,68,70,73,74,77,84,91,97,107,209,212,218,219,349,353,359,376,385,386],tagged_sent:[25,222],tagged_sequ:211,tagged_span:68,tagged_text:354,tagged_tok:209,tagged_token:[25,28,222,228],tagged_treebank_para_block_read:[57,102],tagged_turn:[57,96],tagged_word:[55,57,59,60,61,63,64,65,68,70,73,74,75,77,78,80,84,91,96,97,107,176,349,353,357,359,376,385],taggedcorpusread:[57,77,97,107,359],taggedcorpusview:[63,97,359],taggedsent_to_conl:174,taggedsents_to_conl:174,taggedtyp:28,tagger0:385,tagger1:[212,385],tagger2:[212,385],tagger:[27,87,111,158,160,166,172,209,210,211,212,213,214,215,217,218,219,220,221,224,227,228,248,346,363,371,380,385,387,408],taggeri:[39,160,210,211,213,214,215,217,219,220,221],taggertrain:211,taghva:201,tagpattern:120,tagrul:[211,227],tags_to_ignor:60,tagset:[57,59,60,61,63,64,65,68,69,70,71,73,74,77,80,82,83,84,91,94,96,97,103,105,106,107,209,216,357,359,408],tagset_map:216,tagspec:[77,78,106],tagstr2tre:[29,57,65,354],tagtyp:160,tajik:311,take:[13,14,15,20,30,33,38,42,50,51,52,55,57,59,68,71,75,77,78,90,98,102,105,106,110,117,119,123,129,132,136,141,143,145,158,160,166,167,172,176,183,188,189,193,209,213,214,219,221,224,228,300,303,313,317,318,326,341,343,357,358,359,361,362,363,364,366,367,369,374,377,385,396],takefocu:115,taken:[57,71,72,89,90,122,152,159,164,168,176,180,202,203,211,325,335,343,359,408],taku:321,talbot:320,talk:[24,57,71,359,367,378,408],talkbank:[57,64,353,408],tall:378,taln:148,talvez:385,tam:380,tamb:385,tamil:[57,101,104],tampa:145,tan:410,tangibl:403,tanoaria:385,tanoth:397,tanya:145,tape:[353,359],target:[16,30,33,57,71,82,111,114,122,153,216,226,293,302,316,319,320,321,322,323,324,325,326,327,331,333,353,367,369,385,398],target_block:319,target_child:353,target_s:319,target_sentence_length:326,target_sents_len:319,target_tagset:[29,65],target_word:385,target_word_class:[325,326],tartaru:[203,206,274],task:[25,30,46,52,90,117,141,148,156,193,211,214,297,332,369,374,380,388,396],tasti:390,tau:[189,332],taught:[167,362],tautolog:127,tax:[90,359],taxonomi:[57,105,382,403],taylor:340,tb:306,tbl:[0,211,212],tcl:359,tcp:10,tcud1:202,tcvn:[57,101],td:397,te:399,tea:66,teach:[193,214,406,408],team:[36,396],tear:176,teardown:254,teardownclass:254,tearless:381,technic:[60,336],techniqu:[102,214,317,348,374],technolog:[86,200],ted:[92,105],tediou:[57,62,132,359],teem:359,teen:[20,160],teenag:[57,62,359],tei:[57,77],teicorpusview:[57,84],telecommun:198,telephon:408,telescop:[372,384],televis:[57,62,359],tell:[34,102,109,129,132,293,344,358,371,390],telno:365,telugu:[73,408],tem:385,temp:[57,71,173,361],tempdir:361,temperatur:[57,71],tempfil:[173,359,361,397],templat:[211,212,223,224,226,227,388],template_stat:224,templateid:227,tempo:385,temporari:[25,102,173,224,359,369],temporarili:[117,123],temporibu:107,ten:[299,307,357,385],tend:[325,381,404],tendenc:299,tens:[87,196,199,212,216,219,365,366,387],tenwi:142,ter:385,tere:200,teredo:381,term:[33,35,53,57,94,104,110,127,180,181,184,188,214,224,293,336,341,358,359,369,378,396],termin:[14,33,113,119,159,164,165,166,173,189,304,334,335,336,340,341,359,365,397],terminolog:[141,325,326],tern:200,terra:385,terrassen:400,terreno:385,territ:385,tess:390,test1_lex:350,test2_lex:350,test:[0,25,28,39,63,72,75,86,98,117,119,123,126,129,132,143,144,145,147,173,174,191,193,194,202,211,212,213,214,218,219,221,224,300,302,311,317,329,346,350,353,357,365,369,370,371,372,374,380,382,385,391,398,405,407,408],test_advanc:259,test_advanced2:259,test_affix_tagg:264,test_alin:241,test_aline_delta:247,test_antonym:280,test_arab:274,test_bad_oper:276,test_bas:275,test_best_model2_align:289,test_best_model2_alignment_does_not_change_pegged_align:289,test_best_model2_alignment_handles_empty_src_sent:289,test_best_model2_alignment_handles_empty_trg_sent:289,test_best_model2_alignment_handles_fertile_word:289,test_best_returns_none_when_stack_is_empti:292,test_best_returns_the_best_hypothesi:292,test_bigram2:253,test_bigram3:253,test_bigram5:253,test_bleu:281,test_brevity_penalti:282,test_bril:241,test_brill_demo:248,test_brill_tagg:264,test_build_model:124,test_candidate_type_check:290,test_case_where_n_is_bigger_than_hypothesis_length:282,test_cfd_mut:241,test_cfg2chomski:241,test_chunk:241,test_classifi:241,test_clausifi:127,test_colloc:241,test_com:276,test_complex:250,test_compute_future_cost:292,test_compute_future_costs_for_phrases_not_in_phrase_t:292,test_concord:241,test_concordance_lin:254,test_concordance_list:254,test_concordance_print:254,test_concordance_width:254,test_config:126,test_convert_to_prover9:126,test_corenlp:241,test_corpora:241,test_corpu:386,test_corpus_bleu:282,test_corpus_bleu_with_bad_sent:282,test_corpus_read:268,test_corpus_view:241,test_correct_length:257,test_correct_valu:257,test_count:242,test_counts_set_correctli:246,test_creation_with_count:246,test_cutoff_setter_checks_valu:246,test_cutoff_value_set_correctli:246,test_data:[241,355,381],test_default_tagg:264,test_derivationally_related_form:280,test_disagr:241,test_dist:241,test_distortion_scor:292,test_distortion_score_of_first_expans:292,test_domain:280,test_draw:181,test_easi:259,test_easy2:259,test_empty_hypothesi:282,test_empty_refer:282,test_empty_references_and_hypothesi:282,test_eq:246,test_exampl:276,test_find_all_src_phras:292,test_framefiles_fileid:267,test_freqdist:241,test_from_eflomal_output:283,test_full_match:282,test_future_scor:292,test_gdfa:281,test_german:274,test_hillclimb:289,test_hmm:241,test_hyperhyponym:280,test_ibm1:281,test_ibm2:281,test_ibm3:281,test_ibm4:281,test_ibm5:281,test_ibm_model:281,test_in_topic_domain:280,test_inst:267,test_iterable_type_for_all_lemma_nam:280,test_iterating_returns_an_iterator_ordered_by_frequ:261,test_json2csv_corpu:241,test_json_seri:241,test_labeled_nod:276,test_lch:280,test_len_is_const:246,test_length_one_hypothesi:282,test_lookup:246,test_lookup_empty_iter:246,test_lookup_empty_str:246,test_lookup_int:246,test_lookup_iter:246,test_lookup_non:246,test_lookup_recurs:246,test_lowercase_opt:274,test_lr_bigram:265,test_lr_quadgram:265,test_lr_trigram:265,test_make_relation_set:124,test_max:355,test_membership_check_respects_cutoff:246,test_meronyms_holonym:280,test_meteor:281,test_metr:241,test_misc_rel:280,test_model:242,test_model_found:124,test_modified_precis:282,test_multiple_conj:276,test_naivebay:241,test_neighboring_finds_neighbor_align:289,test_neighboring_returns_neighbors_with_pegged_align:289,test_neighboring_sets_neighbor_alignment_info:289,test_ngram_tagg:264,test_nist:281,test_no_zero_div:270,test_node_encod:276,test_node_nocas:276,test_node_noleav:276,test_node_print:276,test_node_quot:276,test_node_regex:276,test_node_regex_2:276,test_node_simpl:276,test_node_tree_posit:276,test_nombank:241,test_numb:267,test_obj_doc:391,test_oed_bug:274,test_omw_lemma_no_trailing_underscor:280,test_padded_everygram_pipelin:245,test_partial_matches_hypothesis_longer_than_refer:282,test_pdist:176,test_perceptron_tagg:264,test_pl196x:241,test_pos_tag:241,test_pos_tag_eng:269,test_pos_tag_eng_univers:269,test_pos_tag_ru:269,test_pos_tag_rus_univers:269,test_pos_tag_unknown_lang:269,test_pos_templ:248,test_preprocess:242,test_prob_t_a_given_:[284,285,286,287,288],test_prov:126,test_prun:288,test_push_bumps_off_worst_hypothesis_when_stack_is_ful:292,test_push_does_not_add_hypothesis_that_falls_below_beam_threshold:292,test_push_removes_hypotheses_that_fall_below_beam_threshold:292,test_reference_or_hypothesis_shorter_than_fourgram:282,test_reference_type_check:290,test_regexp_tagg:264,test_rel_preced:276,test_rel_sister_nod:276,test_relation_list:[57,64],test_retrieve_synset:280,test_rib:241,test_ribes_empty_word:270,test_ribes_one_word:270,test_ribes_two_word:270,test_rte_classifi:241,test_russian:274,test_sampl:289,test_seekable_unicode_stream_read:241,test_senna:[39,218,241],test_senna_chunk_tagg:273,test_senna_ner_tagg:273,test_senna_pipelin:273,test_senna_tagg:273,test_sent:[219,225,322,323,324,325,326],test_sentence_nist:291,test_sequ:214,test_set:[193,380,391,393],test_set_uniform_alignment_prob:285,test_set_uniform_alignment_probabilities_of_non_domain_valu:285,test_set_uniform_distortion_prob:286,test_set_uniform_distortion_probabilities_of_max_displac:287,test_set_uniform_distortion_probabilities_of_non_domain_valu:[286,287],test_set_uniform_translation_prob:284,test_set_uniform_translation_probabilities_of_non_domain_valu:284,test_set_uniform_vacancy_probabilities_of_max_displac:288,test_set_uniform_vacancy_probabilities_of_non_domain_valu:288,test_short_strings_bug:274,test_simpl:[250,266],test_spanish:274,test_stack_decod:281,test_stat:[211,212],test_stem:[241,394],test_str:246,test_subj_doc:391,test_suit:230,test_tag:241,test_tag_pattern2re_pattern_quantifi:251,test_tgrep:241,test_token:241,test_tokenize_encod:276,test_tokenize_exampl:276,test_tokenize_link_typ:276,test_tokenize_macro:276,test_tokenize_node_label:276,test_tokenize_nodenam:276,test_tokenize_quot:276,test_tokenize_segmented_pattern:276,test_tokenize_simpl:276,test_total_translated_word:292,test_trailing_semicolon:276,test_transform_output:124,test_translated_posit:292,test_translation_so_far:292,test_translation_so_far_for_empty_hypothesi:292,test_truthi:246,test_twitter_auth:241,test_unable_to_change_cutoff:246,test_unspecified_lang:269,test_untranslated_span:292,test_untranslated_spans_for_empty_hypothesi:292,test_update_empty_vocab:246,test_use_macro:276,test_util:241,test_valid_phras:292,test_vocab_iter_respects_cutoff:246,test_vocab_len_respects_cutoff:246,test_vocabulari:242,test_vocabularies_are_initi:289,test_vocabularies_are_initialized_even_with_empty_corpora:289,test_vocabulary_martin_mod:274,test_vocabulary_nltk_mod:274,test_vocabulary_original_mod:274,test_wordnet:241,test_wordnet_similar:280,test_zero_match:282,testbleu:282,testbleufringecas:282,testbleuvsmteval13a:282,testbleuwithbadsent:282,testbril:248,testbuf:361,testcas:[245,246,248,250,251,254,257,259,264,265,266,267,268,269,273,274,276,280,282,283,284,285,286,287,288,289,290,291,292],testchunkrul:251,testconcord:254,testcorpusview:[257,268],testdir:385,testdisagr:259,testeng:380,tester:373,testgdfa:283,testgrammar:[174,373],testhigherordertableauprov:128,testhypothesi:292,testibmmodel1:284,testibmmodel2:285,testibmmodel3:286,testibmmodel4:287,testibmmodel5:288,testibmmodel:289,testing_data:212,testing_doc:391,testjsonseri:264,testlikelihoodratio:265,testmeteor:290,testnist:291,testpostag:269,testpreprocess:245,testresolutionprov:127,tests_rel_domin:276,tests_rel_indexed_children:276,testsennapipelin:273,testsennatagg:273,testsent:384,testsequencefunct:276,teststack:292,teststackdecod:292,testtableauprov:128,testvalu:390,teu:385,teubner:[299,307],text1:[293,358,385,386],text2:[293,386],text3:293,text4:293,text5:293,text9:293,text:[0,1,7,8,11,14,25,26,28,29,31,33,37,38,42,53,57,59,60,64,67,71,72,74,77,78,80,82,84,90,98,100,105,106,107,109,111,113,114,116,117,119,132,133,134,135,136,148,156,159,160,162,164,168,170,171,174,175,176,194,195,200,201,205,206,210,211,214,220,221,226,251,254,295,297,298,299,300,301,302,303,305,306,307,310,311,312,313,314,317,319,335,339,340,341,342,343,354,357,358,359,361,367,387,388,396,400,406,408],text_bigram:[132,134],text_contains_sentbreak:302,text_ngram:133,text_profil:42,text_se:[132,133,293],text_unigram:[132,134],text_word:299,textbook:[183,408],textbox_opt:117,textcat:30,textcollect:293,textcoord:369,textfil:[27,57,104],textid:[57,84],textiowrapp:111,texttil:[295,396],texttilingtoken:310,textual:[57,71,90,359,408],textwidget:[116,117],textwrap:[71,359],tf:[38,293,397],tf_idf:293,tfidf:38,tfidftransform:38,tgrep2:[276,294],tgrep:[0,276,408],tgrep_compil:294,tgrep_nod:294,tgrep_posit:294,tgrep_str:294,tgrep_token:294,tgrepexcept:294,th:[33,52,66,115,227,228,293,316,318,334,358,387],tha:403,thailand:352,than:[24,25,28,29,32,33,35,37,46,49,52,53,57,59,62,67,71,84,102,105,109,115,117,118,132,139,143,149,153,156,159,161,164,171,175,176,179,187,188,194,200,201,209,211,212,213,219,224,227,228,293,302,307,309,314,317,324,326,327,334,335,336,352,357,359,360,363,365,366,367,374,375,385,394,395,404,405],thank:[160,295,303,306,308,312,345,393,396,406,408],thaw:359,theatr:218,thee:66,thei:[13,15,28,30,33,34,35,36,43,46,52,53,55,57,65,71,103,104,105,108,109,115,117,118,119,127,129,132,141,145,154,164,167,176,188,214,215,226,227,228,293,299,306,307,312,314,316,321,334,336,350,355,358,359,361,365,366,367,375,380,382,386,387,388,391,396,398,399,403,404],them:[25,28,30,33,35,51,52,53,102,106,109,117,118,119,126,129,132,139,141,154,159,160,164,167,168,171,176,180,185,189,219,293,295,303,306,308,310,312,313,316,317,326,328,331,334,336,338,339,340,346,351,355,357,358,359,360,363,364,366,371,374,375,385,393,396,398,403],themat:[57,71,103,359],theme:[334,359,391],themrol:[57,103,359],themselv:[57,77,78,79,87,106,303,359,367],theo:299,theorem:[121,122,126,127,128,181,188,363,371,408],theoremtoolcommand:122,theoremtoolcommanddecor:122,theoremtoolthread:122,theoret:[177,183,198,199,299,325],theori:[57,71,178,299,307,367],there4:189,therefor:[25,28,33,57,68,302,317,320,321,357,359,371,374,382,402],thesauru:[57,71,76,367],thesaurus:[57,76],theseu:359,theta:[66,189],thetasym:189,thi:[2,8,10,13,14,20,24,25,26,28,29,31,32,33,34,35,36,37,38,39,43,46,48,49,50,51,52,53,55,57,58,59,60,61,62,63,64,65,67,68,69,70,71,72,73,74,75,77,78,79,80,81,82,83,84,85,87,89,90,91,92,95,96,97,98,100,101,102,103,104,105,106,107,108,109,110,111,115,116,117,118,119,122,123,125,126,127,129,130,131,132,133,134,135,137,139,141,142,143,144,145,151,154,156,157,158,159,160,161,162,164,166,167,168,169,170,171,172,173,174,175,176,177,178,179,181,182,183,185,187,188,189,190,193,194,196,197,198,199,200,201,202,203,206,209,210,211,213,214,215,217,218,219,220,221,222,224,226,227,228,230,251,274,290,293,294,295,297,299,300,301,302,303,304,305,306,307,310,311,312,313,314,317,318,319,320,321,322,323,324,325,326,327,328,331,332,333,334,335,336,337,338,339,340,341,343,344,346,348,352,355,358,359,361,363,364,365,366,367,369,370,371,374,375,376,377,380,381,382,384,385,386,387,388,390,391,392,393,395,396,398,399,401,402,403,404,407,408],thick:[293,358],thing:[127,132,293,294,311,343,363,365,366,387,391,403],thinghood:381,think:[321,365,385,388,396,403],thinsp:189,third:[52,179,212,226,341,352,359,374,397,408],third_person:87,thirteen:[57,62,359,391],thirti:359,this_corpu:108,this_corpus_particip:353,this_fil:353,this_is_an_extremely_long_mark:397,this_neg:194,thisb:359,thislovesthat:384,tho:380,thoma:408,thorn:189,thoroughli:195,thorsten:221,those:[28,42,57,61,67,71,72,105,108,109,118,119,123,125,129,158,159,167,176,196,202,203,226,293,299,302,316,333,334,357,358,359,361,365,366,382,391,399,403],though:[57,71,132,139,141,145,169,211,332,359,366,396],thought:[188,293,336],thousand:359,thread:[122,123,293],thread_id:123,threadgroup:122,three:[2,7,28,53,57,66,68,71,79,87,93,98,117,141,143,145,156,159,167,201,203,214,228,294,300,304,317,322,325,326,333,334,340,357,359,363,364,367,369,374,378,388,397,403],threshold:[48,142,189,213,299,302,333],thrice:200,through:[7,10,24,28,57,71,109,118,143,148,154,159,160,167,169,170,171,180,183,214,293,326,343,366,403,406,407],thrown:37,thu:[24,28,46,57,71,84,102,118,132,143,179,203,211,311,312,322,325,326,352,359,363,366,374,388,398,399,403],thuent:213,thunder:359,thursdai:[406,408],thyroid:359,ti:[212,307,365],tiago:408,tibor:302,tica:385,tico:385,tid:123,tientsin:352,tier:405,tigersearch:408,tighten:228,tigrinya_tigrigna:[57,101],tikteat:353,tild:189,time:[7,14,25,28,29,33,39,52,57,63,68,71,72,97,101,102,108,111,117,129,131,132,144,147,152,155,159,160,162,164,168,170,171,176,179,189,193,194,211,214,219,224,293,302,317,322,323,324,325,326,333,334,338,340,341,343,352,355,357,359,363,365,366,367,369,374,384,385,391,403,408],timedelta:338,timeout:[126,160,340,341],timer:194,timestamp:341,timestamped_fil:341,timezon:338,timi:98,timit:[57,97,408],timitcorpusread:[57,98],timitd:[57,98],timittaggedcorpusread:[57,97],tingl:359,tinha:385,tion:307,tip:405,tirentirentiren:396,titan:403,titl:[60,72,89,107,114,117,176,359],tj:144,tk:[117,343],tkinter:[111,115,116,117,182,343,408],tknzr:[297,396],tl:[219,359,396],tlp:378,tmp:[79,87],tn:390,tna3:202,tne3:202,tnem4:202,tnt:[209,408],to_chunkstruct:28,to_cnf:190,to_conl:[160,161,362],to_depgraph:[186,370],to_dot:161,to_fol:123,to_glueformula_list:[184,186,370],to_lowercas:[203,274],to_screen:341,to_settings_str:314,to_sfm_str:314,to_uid:[57,82],toda:385,todai:[217,359,391,396],todd:317,todo:[57,68,141,144,160,310,351,357,385],togeth:[13,57,71,89,102,136,158,293,300,358,359,367,374,385,398,403],toggl:[8,115],toggle_collaps:116,toi:[66,109,346,361,408],tok:[43,178,181,187,188,193,219,302,311,312],tok_cl:302,token:[0,7,8,14,16,25,26,27,28,29,30,31,32,33,34,39,41,43,44,47,51,52,57,58,60,62,63,64,65,67,70,73,74,84,85,88,89,91,92,97,98,100,101,102,104,111,119,129,132,139,157,158,159,160,162,164,165,166,167,168,169,170,171,172,175,176,181,187,188,193,194,197,198,199,201,204,205,206,209,210,211,212,213,214,215,217,219,220,221,222,226,227,228,257,276,293,294,318,319,320,321,327,328,330,331,332,341,342,343,346,347,354,355,357,359,361,364,365,378,380,384,385,387,391,406,408,410],token_cl:302,token_typ:341,tokenis:340,tokenize_s:[296,304],tokenized_source_text:299,tokenizeri:[160,296,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312],tokensearch:293,tokensequ:310,tokentablefield:310,tokenwrap:343,toknum:[102,359],toktok:295,toktok_regex:311,toktoktoken:[311,408],toktyp:37,tokyo:301,tom:408,tomar:385,tomast:385,tomcavag:408,tongu:20,tonight:[300,396],tonya:145,too:[57,71,88,102,297,324,325,326,332,357,359,366,375,391,396,403],took:[57,71,219,359,396,408],tool:[1,2,3,7,8,53,111,113,117,122,149,161,193,195,359,374,385,388,391,406,408],toolbox:[0,57,347,408],toolbox_data:397,toolboxcorpusread:[57,99],toolboxdata:314,toolboxset:314,toolkit:[0,57,94,104,117,304,345,408],top:[2,8,28,33,60,78,91,106,115,116,117,131,159,161,162,164,170,188,194,294,334,335,336,346,357,362,369,372,374,384,390,399,400,401,405,408],top_n:193,top_relation_label:[161,166,362],topdownchartpars:[159,384],topdowninitrul:[159,164],topdownpredictrul:159,topic:[30,36,105,310,359,406,408],topic_domain:[105,403],topmost:105,topn:369,topspac:181,toronto:[57,107,142],torsten:408,tostr:[359,387,397],tot:[57,89,359],total:[57,71,98,102,111,132,134,143,159,162,164,176,194,195,321,322,323,357,408],total_count:310,total_ord:366,totalannot:[57,71],touch:[358,359,396],tow:403,toward:[29,144,211,358],tower:[57,71,367,391],town:352,toy_pcfg1:[372,384],toy_pcfg2:384,tp:[334,340,389],tpc:359,tpec2iv:202,tpeders:92,tpir2b:202,tpl:212,tpmus2:202,tpp:400,tpro2b:202,tr:[176,179],tra:385,trace:[14,28,33,48,49,50,51,79,87,118,159,162,164,168,170,171,174,175,180,183,189,191,212,224,319,334,354,355,361,363,364,365,387,390],trace_chart_width:[159,162,164],traceback:[226,227,228,305,319,351,354,355,359,360,361,366,374,375,386,387,390,396,397,398,399,403],track:[52,118,167,169,326,333,340,341,366,399,405],tracktoscreen_demo:340,tractabl:[333,403],trade:[189,401],tradeoff:336,tradit:394,trail:[57,60,64,181,219,314,359,397],train:[27,32,33,34,35,36,38,41,44,52,86,98,133,135,153,166,167,169,173,176,189,193,194,209,211,212,213,214,215,217,219,220,221,224,228,284,285,286,287,288,302,317,322,323,324,325,326,327,346,355,359,385,386,388,391,395,398,408],train_and_test:386,train_cl:302,train_corpu:386,train_data:[213,355,377],train_deppars:184,train_from_fil:166,train_maxent_classifi:33,train_maxent_classifier_with_gi:33,train_maxent_classifier_with_ii:33,train_maxent_classifier_with_megam:33,train_obj_doc:391,train_sent:[212,225],train_set:369,train_stat:[211,212],train_subj_doc:391,train_supervis:[214,386],train_text:302,train_tok:[33,34,41,43],train_token:302,train_unsupervis:214,trainer:[43,193,194,212,214,226,228,386,391],training_data:212,training_doc:391,training_opt:213,training_set:[193,391],training_stat:211,tran:365,transact:[388,405],transcr:359,transcrib:359,transcript:[57,66,98,359,408],transcription_dict:[57,98],transfer:[173,176],transfer_mesg:359,transform:[25,28,57,77,78,79,87,106,108,124,145,148,160,198,199,211,212,214,223,227,228,313,334,336,354,359,399,401],transit:[105,119,173,179,213,214,343,352,365,387,403,408],transition_pars:173,transitionpars:156,transitive_closur:343,translat:[0,22,24,33,57,66,82,83,104,125,151,189,241,347,348,352,360,363,379,408],translation_opt:333,translation_q:[57,82],translation_t:[322,323,324,325,326,398],translations_for:316,transpos:33,transposit:145,travers:[57,71,105,167,211,319,335,336,343,403],travi:408,treat:[32,52,57,71,115,122,132,134,141,159,160,172,188,293,303,305,312,363,366,374,388,397,398,399],tree1:388,tree2:[388,399],tree2conllstr:29,tree2conlltag:29,tree2semi_rel:[189,388],tree3:388,tree:[0,2,7,8,14,25,26,27,28,29,32,38,46,49,51,55,57,61,65,68,75,79,87,91,94,105,106,107,112,117,119,129,156,157,158,159,160,161,162,164,165,166,167,168,169,170,171,172,173,174,175,176,180,185,189,191,276,314,335,336,343,347,352,354,355,359,362,363,364,365,370,373,384,385,387,388,390,397,400,401,402,406,408],tree_:[116,403],tree_class:[57,68,159,164],tree_str:[161,401],tree_to_treeseg:116,treebank:[57,61,79,87,94,111,161,203,209,212,216,295,334,336,351,359,362,384,387,399,400,406,408],treebank_data:[161,362],treebanktoken:312,treebankworddetoken:[298,312,396],treebankwordtoken:[295,298,312,313,396],treeedg:[14,159,164,168],treepo:[79,87,334,387,399],treeposit:[294,334,399],treeposition_spanning_leav:334,treepositions_no_leav:294,treeprettyprint:[0,334,347],trees2:365,treeseg:116,treesegmentwidget:116,treetoken:119,treetransform:[0,347],treeview:116,treewidget:116,tremend:195,trenkl:42,tresoldi:408,trg_class:[325,326],trg_phrase:316,trg_sentenc:[322,323,327],trglen:[320,331],trgtext:[320,331],tri:[109,122,141,168,170,219,221,227,304,341,343,395],trial:[50,57,71,213,367],triangular:116,tribunal_supremo:388,trichocyst:381,trick:353,tricky_sent:391,trie:[52,300],trigger:[57,71,105,117,202,227,343],trigram:[42,53,132,143,176,293,317,343,357,380,386],trigram_fd:53,trigram_measur:[53,357],trigram_score_fn:143,trigramassocmeasur:[143,357,380],trigramcollocationfind:[53,357],trigramtagg:219,triniti:387,tripl:[141,160,161,172,189,191,304,362,366,388],triplet_count:380,trippl:170,trivial:[357,359],trn:[98,359],troop:[317,318,321,328,330],troubl:24,trough:307,trouser:403,trq:[57,82],truman:359,truncat:[52,105,144,146,343,380,383],trust:405,truth:[24,152,177,183,191,217,321],truth_typ:378,truthi:[57,71],truthvaluetyp:188,try_port:160,ts_occur:310,tsai:94,tsent:385,tsi3:202,tsis0:202,tsne:369,tst:98,tsu:22,tsukada:332,tsutomu:332,tt1:202,tt:[57,82,212,310,396],tthe:396,tudo:385,tulo2v:202,tune:[142,176],tupl:[14,16,23,26,29,30,33,34,35,41,43,44,52,55,57,59,60,63,64,65,66,68,69,70,71,74,75,76,77,79,80,82,83,86,87,89,91,95,96,97,98,102,103,104,105,106,107,110,115,117,118,122,123,125,127,129,132,133,134,135,139,141,142,143,145,147,149,153,155,158,159,160,163,164,166,168,170,172,174,176,177,179,183,188,189,191,193,194,200,209,210,211,212,213,214,217,218,219,220,221,222,225,227,228,293,294,295,296,297,300,302,303,304,306,307,313,314,316,318,319,320,327,328,331,332,333,334,335,338,340,341,343,352,353,354,357,359,376,390,391,397,398],tuple2str:222,ture:[176,386],turkish:[57,101],turkish_turkc:[57,101],turn:[25,28,57,65,68,74,77,79,85,87,96,97,132,133,135,136,203,346,353,359,365,370,384,387,390,401,405],turn_01:387,turn_on:[57,79,87,387],turner:387,turveydrop:381,tussl:[57,104],tutori:[214,294,336,408],tv:[350,365,390],tw:94,twd:312,tweak:359,tweet:[57,100,194,297,337,338,339,340,341,342,359,408],tweethandl:338,tweethandleri:[338,341],tweeti:[125,382],tweetid:339,tweets_by_user_demo:340,tweets_fil:339,tweettoken:[57,100,194,297,408],tweetview:341,tweetwrit:341,twenti:67,twice:365,twin:391,twitter:[0,57,297,359,396,408],twitter_demo:337,twitterclass_demo:340,twittercli:[337,339,340,342],twittercorpusread:[57,100,340],twizt:293,two:[8,13,25,28,33,35,36,50,51,52,53,57,66,71,74,76,79,87,96,102,105,109,111,117,118,119,127,129,132,136,139,141,142,145,147,148,149,156,159,160,169,171,174,175,176,177,188,189,194,200,201,219,226,228,247,253,293,295,300,302,303,305,306,308,312,313,316,317,319,321,334,336,339,350,353,355,357,359,363,364,365,366,371,373,374,377,378,380,382,386,388,393,396,397,398,399,401,403,404,408],tword:385,twython:[339,341],twythonstream:341,txt:[57,58,67,82,85,87,88,89,97,98,101,107,109,141,195,257,274,293,342,357,358,359,361,369,384,385,398],ty:149,typ:[71,302],type:[13,14,15,18,23,25,26,28,29,31,32,33,35,38,43,44,49,51,55,57,58,59,60,62,64,65,67,68,70,71,73,74,77,80,81,82,84,85,88,89,90,91,92,97,98,100,102,103,105,106,109,116,117,118,119,122,123,129,130,132,133,135,139,141,142,146,147,148,152,155,156,157,158,159,160,161,162,164,166,167,168,169,170,171,172,173,174,175,176,178,179,181,183,184,187,188,189,191,193,200,201,206,208,209,210,211,212,213,214,217,218,219,220,221,224,226,227,228,251,276,293,294,296,297,298,299,300,301,302,303,304,305,306,307,309,310,311,312,313,314,316,317,318,319,320,321,322,323,324,325,326,327,328,329,331,332,333,334,339,341,343,346,351,354,361,367,374,376,384,385,387,396,399,407,408],type_check:[178,181,187,188],type_no_period:302,type_no_sentperiod:302,type_str:188,type_thresh:302,typecheck:[181,188,378],typedmaxentfeatureencod:33,typeerror:[130,132,134,139,226,228,334,366,399],typeexcept:[188,378],typeraiseruleset:350,typeresolutionexcept:188,typic:[26,28,29,30,31,33,34,52,57,58,71,97,105,109,117,118,119,129,156,157,158,159,160,167,171,176,199,214,219,226,293,305,316,327,334,355,359,365,366,367,374,375,385,399,403],typo:216,tyre:107,tzinfo:338,u1:183,u2002:189,u2003:189,u2009:189,u200:189,u200c:189,u200d:189,u200f:189,u201c:396,u201cleft:396,u201d:396,u2028:301,u2:183,u3044u306c:403,u307eu308fu3057u8005:403,u30b9u30d1u30a4:403,u3:183,u4:183,u56deu3057u8005:403,u56deu8005:403,u5bc6u5075:403,u5de5u4f5cu54e1:403,u5efbu3057u8005:403,u5efbu8005:403,u63a2:403,u63a2u308a:403,u72ac:403,u79d8u5bc6u635cu67fbu54e1:403,u8adcu5831u54e1:403,u8adcu8005:403,u9593u8005:403,u9593u8adc:403,u96a0u5bc6:403,u:[51,57,89,104,172,212,293,301,304,309,311,313,350,358,366,378,381,385,388,399,407],ua:163,uacut:189,ualberta:142,uarr:189,uber:195,ubuntu:408,uc:299,ucirc:189,ucsd:137,udhr:[57,408],udhrcorpusread:[57,101],ugh:180,ugli:391,ugo1:202,ugrav:189,uh:[66,195,359],uhuh:195,ui:[57,82,146,383],uic:[67,81,88,89],uk:[57,60,107,123,343,359],ukip:359,ulrich:299,ultim:377,um:[202,385],uma:[205,385],uml:189,umn:92,umsd:343,un:[25,28,57,71,218,305,350,367,385],un_chomsky_normal_form:[334,336,401],unabl:[219,361,374],unambigu:384,unann:[57,71],unannot:[46,359],unari:[13,119,179,183,334,336,352],unary_concept:179,unarychar:[334,336],unattest:33,unavail:374,unbedingt:396,unbeliev:[195,391],unbind_click:117,unbind_drag:117,unbound:118,unchang:[183,200,206,208,326,327,386],unchunk:[25,29,354],unchunked_text:354,unchunkrul:[25,28,354],unclear:[60,359],uncompel:391,uncompress:33,unconstrain:158,uncurri:188,undefin:[50,52,102,176,183,276,390],under:[33,57,59,64,71,94,105,109,110,118,119,131,159,161,176,179,198,199,206,227,297,317,318,319,321,326,327,328,330,332,363,367,372,381,383,384,390,391,396],underflow:[132,176],undergo:408,undergon:408,undergradu:369,underli:[43,52,60,91,102,109,188,354,359],underscor:[57,71,160],underspecifi:[185,318,365],understand:[24,132,203,302,385],understem:146,understood:[169,367],understudi:317,undesir:321,undirect:15,undirectedbinarycombin:15,undirectedcomposit:15,undirectedfunctionappl:15,undirectedsubstitut:15,undirectedtyperais:15,undo:[8,170,171,312],undon:[170,171,312],unemploy:209,unescap:313,unexpand:159,unexpect:[188,364,378,399],unexpectedtokenexcept:188,unexplain:399,unfortun:[57,61,129,359],unfre:[105,343,403],ungrammat:[167,174,373],uni:[117,137,221,385,388],uni_count:380,uni_counts2:380,unicharscorpusread:[57,104],unicod:[25,57,58,59,60,62,63,64,65,67,68,69,70,71,73,74,77,78,80,81,82,83,84,85,88,89,91,92,96,97,101,102,103,104,105,106,107,109,189,198,199,201,206,215,295,303,314,334,335,359,361,396,408],unicode_field:[57,99,314],unicodeerror:343,unicodelin:[335,399,400],unif:[118,127,164,365,384,408],unifi:[13,118,127,158,164,178,187,365,366,370,399],unificationexcept:[127,187,370],unificationfailur:[118,366],uniform:[57,82,159,176,317,322,323,324,325,326,327,335,391],uniformli:[317,324,325,326],uniformprobdist:176,unify_base_valu:118,unigram:[53,132,133,134,135,143,194,209,219,224,317,318,332,385,391],unigram_feat:391,unigram_scor:[133,137],unigram_tagg:219,unigram_word_feat:[193,391],unigramtagg:[209,212,219,385],unimelb:46,unimpl:172,unind:[57,61],unindented_paren:[57,61],unintellig:214,union:[117,125,320,366],uniqu:[13,30,55,57,66,90,96,98,105,111,122,125,164,176,187,188,334,366,395],unique_ancestor:294,unique_index:184,unique_list:[343,386],unique_names_demo:125,unique_vari:188,uniquenamesprov:[125,382],uniquepair:[57,104],unit:[39,46,57,71,91,100,174,214,218,229,293,359,365,370,388],unitari:119,united_kingdom:403,uniti:[132,135],unittest:[245,246,248,250,251,254,257,259,264,265,266,267,268,269,273,274,276,280,282,283,284,285,286,287,288,289,290,291,292,384],univ_qu:[363,371],univ_scop:[188,190],univers:[57,66,67,77,81,83,89,92,122,142,160,198,201,206,209,213,216,220,299,307,311,322,323,324,325,326,327,333,357,359,363,369,378,381,398,408],unix:346,unk:[132,139,221,377],unk_cutoff:[132,139],unk_label:[132,139],unknown:[36,57,59,77,98,111,117,118,132,139,183,219,221,227,361,366],unlabel:[33,36,163,214,219],unlabeled_featureset:36,unlabeled_sequ:214,unladen:[160,215,218,220],unless:[60,78,91,102,106,109,382],unlexic:336,unlik:[26,169,302,326,384,408],unlimp:381,unlink:[361,397],unm:374,unmark:117,unmasked_scor:[132,133,135],unmatch:[305,328,359],unmodifi:[57,64,194,201],unnecessari:[122,127],unord:119,unp:382,unparseableinputexcept:178,unpermut:147,unpickel:396,unpickl:386,unprocess:[55,57,359],unquot:118,unrecogn:374,unrestrict:[15,25,26],unseen:[33,35,132,139,176,221,224,355,377,386],unseen_featur:33,unseg:221,unsort:381,unsortedchartpars:[168,384],unspecifi:[57,82,118,159,374],unsuccess:343,unsuccessfulli:374,unsuit:302,unsupervis:[46,214,302],unsupport:360,unsur:407,untag:[212,222,359],untagged_fil:388,until:[7,25,33,46,48,49,50,57,77,102,105,106,108,118,119,131,155,159,168,175,299,302,334,359,361,366,374,399,403],untouch:[57,61],untrain:[212,221],untransl:333,untri:[7,170],untried_expandable_product:170,untried_match:170,unus:[211,224],unusu:[195,359],unweight:[105,211,343,403],unweighted_minimum_spanning_dict:343,unweighted_minimum_spanning_digraph:343,unweighted_minimum_spanning_tre:[105,343,403],unwrap:[57,99,129,314,397],unzip:[111,346],unzipped_s:111,up:[2,7,8,49,52,57,59,60,63,64,68,69,70,71,74,78,79,80,82,83,87,91,96,97,98,103,105,106,107,109,111,115,116,117,129,130,132,135,136,139,156,158,159,168,169,171,175,219,221,254,264,292,293,299,307,317,318,321,326,327,331,333,335,353,354,358,359,361,366,367,374,384,385,387,388,403,406,408],updat:[25,33,46,48,52,110,111,117,118,123,132,134,139,146,167,175,176,183,213,217,366,379,386,399,406,408],update_align:323,update_clust:49,update_distort:[324,325],update_edge_scor:167,update_fertil:327,update_lexical_transl:[323,327],update_null_gener:327,update_output:214,update_vac:326,update_wrapp:110,upenn:[57,86,104,179,312,334,352],upenn_tagset:120,upo:358,upon:[102,152,167,176,188,217,298,358,359,396],upper:[57,97,117,200,302,334,341,385,403],upper_data_limit:[338,341],upper_date_limit:[338,341],upperbound:145,uppercas:[66,188,302,396],upsih:189,upsilon:189,uptodatemessag:111,upward:117,uq:[57,82],uqi3:202,urg:[60,359],url:[57,62,64,71,100,109,111,129,160,174,339,343,361,365,370],url_foe_1:311,url_foe_2:311,url_foe_3:311,url_foe_4:311,urlbas:[57,64],us:[0,2,7,8,10,13,14,18,24,25,26,27,28,29,30,31,32,33,34,35,36,38,39,40,41,42,43,44,46,48,49,50,51,52,53,55,57,58,59,60,61,62,63,64,65,67,68,69,70,71,73,74,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,96,97,98,100,101,102,103,104,105,106,107,108,109,111,115,116,117,118,119,122,123,124,125,126,127,128,129,130,131,133,134,135,136,139,141,142,143,145,146,148,149,155,156,157,158,159,161,162,164,165,166,167,168,169,170,171,172,174,175,176,178,179,180,181,183,185,187,188,189,191,193,194,195,196,198,199,200,201,202,203,204,206,208,209,210,211,212,213,214,215,217,219,220,221,222,223,224,228,251,254,276,293,294,295,296,297,299,300,301,302,303,304,305,306,307,308,309,310,312,314,316,317,318,319,321,322,323,324,325,326,327,330,331,332,333,334,335,336,337,338,339,340,341,343,344,346,354,355,358,359,361,362,363,364,365,366,367,371,375,377,378,380,382,384,385,387,388,390,391,395,396,398,399,401,403,404,405,406,407,408],usa:[57,64,81,89,201,218,311,353,410],usabl:143,usag:[10,34,46,52,57,58,71,77,85,97,101,107,110,111,129,178,179,221,224,344],usage_domain:[105,403],use_agenda:[159,164],use_lemmat:37,use_min_depth:[105,404],use_tagdict:217,use_thread:111,used_var:118,used_vari:190,user:[10,18,23,33,57,58,59,78,84,97,102,107,109,111,115,116,117,129,166,168,175,176,177,179,213,218,297,303,318,328,332,339,340,341,342,343,346,352,359,374,406,407,408,410],user_info_from_id:341,user_ment:339,user_tweet:341,userid:[340,341],usernam:[297,343,346,396],userwarn:105,usr:[39,57,85,101,109,111,185,218,346,359,374],usthb:198,usu:148,usual:[24,33,57,64,105,118,119,131,145,166,176,183,188,194,211,214,226,293,294,319,325,328,339,353,359,365,366,387,391,398,405],usw:396,utc:338,utcoffset:338,utf8:[57,59,61,62,63,65,67,68,69,70,75,77,79,85,87,88,89,95,97,98,99,100,101,102,107,129,160,172,191,194,220,295,304,308,314,339,343,359,387,397],utf:[39,57,93,101,109,111,189,218,220,309,311,314,340,359,403],util:[0,18,25,30,46,48,49,50,55,57,58,62,63,65,67,70,73,74,75,81,84,85,88,89,92,97,100,105,106,112,113,114,116,123,132,134,136,156,169,177,192,193,196,209,295,299,300,307,337,339,347,354,359,377,382,384,386,390,391,396],utrecht:410,utt:385,utter:[57,58,60,64,65,77,85,96,97,98,156,359],utteranceid:[57,98,359],utterli:195,uu:[118,161],uuml:189,uw1:359,uw:[66,391],ux:359,v0:[57,71,390],v13a:317,v14:301,v1:[141,167,370,378,390],v2:[105,167,370,378,390],v3:[39,167,218,378,390,397,404],v4:378,v5:378,v:[28,33,51,52,57,66,71,77,86,87,93,105,107,118,137,142,148,161,167,171,177,181,183,185,188,208,209,216,224,321,326,334,352,353,354,359,361,365,366,367,370,372,377,378,381,384,385,387,388,390,392,393,397,399,400,403,405,408],v_head:326,v_kei:377,v_non_head:326,va11:359,va4:359,va:385,vacanc:326,vacancies_at:326,vacant:326,vacuous:363,vader:[192,194,408],vader_lexicon:195,vaderconst:195,vagu:24,vai:385,val0:390,val1:390,val2:390,val3:390,val:[33,57,93,109,118,129,176,177,183,334,352,361,366,390,397,399],val_dump:[179,352],val_load:[179,352],valenc:[57,71,195],valian:353,valid:[25,28,57,71,78,115,117,129,145,160,173,176,181,187,188,202,208,224,294,299,300,306,322,323,324,325,326,338,341,343,375,404],valid_phras:333,validate_sig_len:125,validate_syl:307,valu:[14,28,29,30,32,33,34,35,38,39,43,44,46,51,52,57,59,60,63,64,66,67,68,69,70,71,72,74,77,78,80,82,83,90,91,96,98,100,103,105,106,107,109,110,111,115,116,117,118,119,126,129,132,139,141,142,143,144,145,146,147,148,153,159,160,164,168,169,174,175,176,177,178,179,180,183,184,188,189,191,195,210,219,221,224,226,227,228,253,293,294,302,306,307,314,317,318,319,321,322,323,324,325,326,333,334,336,339,340,341,343,354,359,365,369,370,373,374,377,380,388,390,391,397,399,401,403],valuat:[109,122,124,177,179,183,361,374],valuation1:390,value_handl:118,value_str:90,valueerror:[28,109,117,118,119,129,144,147,176,206,226,227,305,334,354,359,361,366,386,387,390,396,399],values_in_chart:[144,380],vamo:385,vamoo:396,van:[178,189,335,388],vannoord:42,var1:178,var2:178,varanda:385,varex:183,vari:[30,57,68,105,147,359],variabl:[13,14,16,28,29,33,39,46,55,57,85,100,102,115,117,118,119,125,127,129,141,159,162,164,168,170,171,175,176,178,181,183,184,185,187,188,206,218,219,220,302,309,317,340,346,359,361,364,365,370,374,384,386,387,390,399],variable_typ:178,variablebinderexpress:188,variablebindingexcept:[187,370],variableexpress:[183,187,188,366,370,378],varianc:[48,176,359],variance_charact:319,variant:[36,57,66,105,143,148,183,214],variat:[15,46,50,53,176,189,302,330,391],varieti:[28,55,57,82,111,117,154,167,176,293,341,359,380,385,388,399,403],variou:[33,36,57,105,111,123,124,167,216,317,323,324,325,326,327,333,359,363,394,399,408],various_sent:36,varnam:183,vb:[144,160,163,212,215,220,227,359,362,363,370,371,380,387,393,396,406],vbb:349,vbd:[28,57,61,172,173,176,212,219,222,354,359,387,396,406],vbg:[160,212,219],vbn:[28,359,396,401],vbp:[160,172,216,372,400],vbz:[39,160,172,174,209,215,216,217,218,220,349,359,396],vc:[163,362],ve:[57,67,105,108,298,312,359,391,408],vector:[33,34,40,41,46,47,48,49,50,51,167,369],vectorspac:46,vectorspacecluster:[48,49,50,51],vee:66,vega:201,vegetoalkali:381,vehicl:359,veio:385,vel:385,venda:385,vender:385,vennemann:299,ventura:385,ver:385,verb:[25,57,68,71,77,86,87,103,105,153,196,198,199,208,209,212,213,216,219,349,354,357,359,367,384,387,399,400],verb_group:[105,403],verb_head:68,verb_stem:68,verb_t1:[198,199],verb_t2:[198,199],verb_t3:[198,199],verb_t4:[198,199],verb_t5:[198,199],verb_t6:[198,199],verbnet:[57,408],verbnetcorpusread:[57,103],verbos:[28,32,33,57,105,109,111,122,123,126,127,128,129,132,134,159,160,162,164,166,168,170,171,172,173,174,175,178,179,183,184,185,193,212,213,214,215,219,220,224,227,302,308,309,338,340,341,342,343,361,363,365,374,388,403],verbrak:388,verbsfil:[57,87],verbspecif:359,vercelli:107,verd:385,verdad:385,verdadeirament:385,veri:[14,24,25,33,57,102,104,123,132,134,173,195,200,228,317,355,357,359,365,366,371,381,385,391,398,403,406],verif:341,verifi:[253,341,363],verissimo:205,verkhogliad:408,versa:169,version:[0,25,28,29,32,52,57,60,64,74,82,90,92,103,105,107,109,123,129,132,135,159,161,164,176,178,179,181,188,199,203,206,212,222,224,293,295,309,313,332,334,341,343,352,353,359,362,363,374,385,387,388,395,403,406,407,408],versu:148,vertebr:[57,105,343,403],vertebra:399,vertex:214,vertic:[116,117,334,335,336],vertmarkov:[334,336,401],vez:385,vg2main:[57,101],vg:[216,400],vi2:202,vi:[28,107,216,385],via:[29,97,105,106,109,118,167,180,189,213,214,215,293,321,341,359,366,385,408],viagem:385,vice:[169,385],vicissitud:359,victoria:388,vida:385,video:[359,408],vie0:202,vierem:385,vietnames:[57,101,311],view:[52,57,58,59,60,63,65,68,70,71,73,74,77,78,79,81,84,87,91,92,97,102,106,117,257,326,336,341,358,367,385],vigil:396,villanueva_de_la_serena:388,vincent:[107,322,323,324,325,326,327,361,363,378,390],vindicta:107,vindo:385,vinken:[163,359,362],vinyal:321,viqr:[57,101],vir:385,virg:385,virgin:385,virtual:[379,407],virtualenv:407,virtutibu:107,vis3j:202,visibl:[115,117,118,131,366],visit:[181,188],visit_structur:[181,188],visitatori:381,vista:346,visual:[113,117,194,335,336,364,369,408],visualis:[57,105],viterbi:[156,214],viterbipars:[175,384],vitii:107,viveu:385,vkulyukin:148,vkweb:148,vl2:334,vm:379,vmh0:359,vmi:359,vmis3:359,vmod:[163,362],vn:[57,103],vn_31_2:359,vncl:359,vnclass:[57,103,359],vnclass_id:[57,103],vnv:388,voar:385,voc:[274,353],vocab2:[132,139],vocab:[132,133,137,139,293,369,377],vocab_data:377,vocabulari:[132,133,135,136,137,148,246,274,325,377,385],vocabulary_introduct:310,vocabulary_text:133,vocal:[57,60,71,403],voic:[87,387,403],vol:[176,198],volta:385,volum:340,voluntari:345,vote:357,vovhund:403,vovs:403,vowel:[66,142,202,206,299,307],vp:[29,39,57,61,86,101,119,160,172,216,218,294,334,335,350,352,354,359,361,363,365,366,368,372,378,384,387,390,393,399,400,401],vpnp:350,vppp:350,vpser:350,vs:[153,316,336,366,384],vulner:[359,408],vvn:349,w0:354,w14:[317,348],w16:318,w1:[53,143,357],w2:[53,143,357],w3:[53,77,143,179,353,357],w4:[53,143],w:[30,36,42,55,57,60,66,77,85,88,89,106,109,111,118,142,183,209,298,301,303,310,317,318,321,330,332,354,357,359,361,364,365,366,377,378,381,384,385,397],w_i:221,wa:[28,33,36,40,42,57,64,69,71,84,89,90,95,105,109,115,116,117,122,129,132,139,160,167,170,171,176,179,194,198,199,200,201,204,209,214,218,219,293,298,299,301,302,312,314,317,321,322,323,324,325,326,332,334,348,352,353,358,359,361,366,374,378,379,384,388,391,396,403,404,408],waaaaayyyi:[297,396],waaayyi:[297,396],waaw:[198,199],wagon:403,wai:[8,24,32,105,108,115,117,132,141,145,156,158,169,176,185,200,206,228,276,294,295,299,324,325,332,333,334,343,359,361,363,365,370,374,375,385,391,398,403],wait:[132,134,188,341],walk:[118,123,125,160,184,335,358,363,364,365,366,368,374,378,382,389,390,403],walk_x:364,wall:[86,160,308,359],waltz:359,wan:[298,312],wander:358,wang:[321,408],wanna:396,want:[36,57,59,71,89,100,102,117,119,132,133,134,135,141,158,176,179,193,194,218,219,346,352,357,359,361,363,364,366,374,384,385,388,395,398],war:[22,160,324,325,326],ward:317,warm:194,warn:[57,71,102,105,122,129,228,327,343,354,374,408],warranti:391,warren:[179,352,391],washington:[81,257,359,388,408],wasn:[195,312,396],wasnt:195,wast:374,wat:[57,64,332],watch:[57,64,117,137,391,408],watcher:117,water:405,waterloo:388,wav:[57,98,359],wave:359,waw:201,wb:[361,369],wd_01:228,wd_0:228,wdt:396,we:[25,28,33,36,57,64,66,68,71,77,89,102,105,106,117,119,122,132,133,134,135,139,141,153,161,176,177,179,180,183,185,193,194,200,209,214,221,230,294,295,300,304,312,313,317,320,321,326,328,336,343,349,350,352,355,357,358,359,360,361,362,363,364,365,366,367,369,370,371,374,377,380,382,384,385,386,387,388,391,395,396,398,399,403,408],weak:361,weakest:89,weakref:361,weather:359,weather_typ:359,web:[10,57,64,81,88,89,109,145,203,229,357,359,360,408],webdoc:142,weblog:195,websit:[57,64,203,311,408],webtex:359,webtext:[57,85,101,359],webview_fil:[57,64],wee:349,weed:326,week:[131,388,408],weekend:359,weekli:72,wei:[317,321],weierp:189,weight:[29,33,34,38,41,57,105,111,118,141,142,146,147,148,158,167,198,211,214,217,317,318,322,323,324,325,326,328,359,374,383],weight_senses_equ:[57,105],weighted_kappa:141,weighted_kappa_pairwis:141,weird:311,weissweil:200,weka:[30,408],wekaclassifi:[44,355],welch:214,welcom:24,well:[57,68,92,105,117,132,179,227,299,302,313,321,336,352,359,366,374,377,385,387,396,398],went:[36,344,405],werd:388,were:[28,29,33,57,71,72,74,80,84,90,105,106,117,127,129,155,159,164,167,169,176,200,201,227,274,312,343,358,359,367,396,403],weren:195,werent:195,weshalb:316,west:107,western:98,western_lang:301,wfeat1:228,wfeat2:228,wfst:[365,370],wh:[353,365],wha:298,whad:298,whale:358,what:[14,15,28,30,33,36,57,71,77,78,95,106,111,115,116,119,129,130,132,134,135,141,146,159,160,164,167,168,170,209,214,215,218,220,302,352,358,359,361,363,374,384,385,395,397],whatev:168,whatwasthat:384,wheat:359,when:[2,14,25,28,30,32,33,34,36,38,41,43,46,50,52,53,57,68,71,95,98,102,104,105,109,111,114,115,116,117,118,122,123,126,129,132,133,135,139,145,146,147,157,159,162,164,166,167,168,169,170,171,174,175,176,178,183,188,202,212,213,214,215,219,221,226,228,274,294,295,297,301,302,303,305,306,312,317,321,322,323,330,332,333,334,335,336,338,341,343,346,353,354,357,358,359,361,363,365,366,367,371,374,375,384,393,395,396,399,403,404],whenc:109,whenev:[33,102,109,117,131,164,224,359,366,399],where:[14,15,25,28,29,32,33,34,38,44,52,57,59,68,71,74,76,82,90,98,102,105,106,108,109,111,115,116,117,118,119,122,123,129,132,145,146,148,153,159,160,162,166,167,168,172,173,174,175,176,177,179,181,184,185,189,193,194,199,214,217,219,221,224,226,227,228,254,293,294,296,297,300,302,303,306,310,311,312,314,317,319,322,324,325,326,327,330,331,334,335,336,338,339,341,342,343,344,352,359,365,366,367,374,376,379,388,390,403,408],wherea:141,wherev:359,whether:[2,13,15,25,37,38,43,44,52,91,111,115,116,117,119,122,127,129,145,159,161,170,176,183,188,195,214,224,294,295,297,302,327,335,338,341,343,359,363,365,366,374,380],which:[2,7,8,10,13,14,15,23,25,28,29,30,31,32,33,34,37,41,43,44,46,49,50,51,52,53,55,57,58,59,61,62,63,65,67,68,70,71,73,74,75,76,77,78,79,81,84,85,87,89,90,91,92,97,98,102,103,105,106,108,109,111,115,117,118,119,122,123,125,126,127,129,132,133,135,139,141,143,149,153,156,159,160,161,162,165,167,168,169,170,171,173,174,175,176,177,178,179,181,183,187,188,189,194,199,200,203,206,209,210,211,212,213,214,215,219,221,224,226,227,228,293,294,295,300,301,302,303,306,312,316,317,318,319,321,323,325,326,327,328,330,332,334,335,338,339,341,343,350,352,354,355,357,358,359,361,363,364,365,366,367,369,374,377,380,385,387,388,390,394,396,399,403,404,405,407,408],whichev:[122,169,318],white:[98,115,293,314,320,388,397],white_hous:388,whitespac:[25,28,29,57,58,68,85,102,117,118,160,161,172,189,194,295,302,303,305,306,312,314,318,334,343,359,396,397,399],whitespacetoken:[57,58,62,67,88,97,194,295,303,313],who:[87,95,111,160,345,359,365,367,391],whoever:357,whole:[36,57,71,78,105,314,343,357,359,386,399,403],whose:[7,8,28,30,33,35,36,52,57,59,60,61,62,63,64,66,67,68,69,70,71,72,74,77,78,80,81,82,83,85,88,89,91,96,97,103,105,106,107,109,111,115,117,118,119,129,159,164,167,170,171,175,176,179,189,194,206,219,226,227,228,314,317,334,338,352,359,361,365,366,367,387,399,403],whquestion:359,wht:[98,359],why:[23,203,302,316],whyi:388,wide:[30,81,88,111,178,304,359,380],wider:[117,359],widget:[115,116,117],widgetnam:117,width:[28,32,111,115,116,117,148,159,162,164,175,293,310,335,343,366],wife:359,wiki:[145,313,343,407,408],wikipedia:[145,313,343],wild:169,wildcard_fd:53,wilk:[57,78],wilkdom:[57,78],wilkwilczi:[57,78],william:[107,145,382],win:407,wind:[153,357,358],window:[2,7,8,18,111,113,116,117,148,189,201,226,293,310,334,340,341,346,380,406,408],window_len:310,window_s:[53,293,357],windowdiff:[148,380],wink:359,winkelen:400,winkler:145,winkler_exampl:145,winkler_scor:145,winlen:226,winresearch:[57,101],winter2008:137,winter:358,wipe_instr:359,wipe_mann:359,wir:373,wisdom:[24,152],wise:[293,332,359],wish:[2,102,117,293,336,359,361,366,396],wit:391,withdraw:359,within:[28,42,53,57,79,87,96,103,105,107,109,117,119,129,145,169,188,189,202,214,253,343,359,360,361,365,374,388,403],without:[52,57,74,78,91,108,115,118,131,132,139,145,166,176,188,189,195,198,201,202,214,274,302,317,350,351,355,359,366,374,377,386,391,397,399,408],witten:[132,135,137,176,377],wittenbel:137,wittenbellinterpol:[132,135,377],wittenbellprobdist:[176,386],witti:391,wlement:106,wmt14:[317,348],wmt15:318,wmt16:318,wmt49:318,wn:[57,105,343,399,403,404,405],wninput5wn:105,wnl:208,wo:388,wolf:[160,172,213,403],wolfgang:321,woman:[364,369,374,378,390,404],won:[36,119,195,276,299,359,391,396,408],wonder:[299,358,406],wont:195,word1:[57,97],word2:[57,97],word2vec:369,word2vec_sampl:369,word3:[57,97],word:[16,24,26,27,29,30,33,36,37,39,43,53,55,57,58,59,60,61,62,63,64,65,66,67,68,70,71,73,74,75,76,77,78,79,80,81,83,84,85,86,87,88,89,91,92,96,97,98,99,100,101,104,105,106,107,111,113,114,118,119,132,133,135,136,137,139,143,145,146,153,155,156,158,159,160,161,166,167,169,172,173,174,176,184,189,193,194,195,196,197,198,199,200,201,202,203,204,205,206,208,209,211,212,214,215,216,217,218,219,221,222,224,226,227,228,274,293,295,299,300,302,303,304,307,309,316,317,320,322,323,324,325,326,327,328,330,331,332,333,334,335,343,344,346,350,353,357,358,362,363,365,366,367,370,376,377,380,381,383,386,387,391,394,398,399,404,408],word_1:214,word_:[57,61],word_and_tag_pair:158,word_class:326,word_class_:325,word_class_t:[325,326],word_fd:[53,357],word_find:[155,381],word_index_in_sourc:322,word_index_in_target:322,word_indic:178,word_n:214,word_path:77,word_penalti:333,word_po:332,word_pres:369,word_r:297,word_rank_align:332,word_seq:377,word_similarity_dict:293,word_tim:[57,98,359],word_token:[57,58,62,67,70,85,88,89,97,100,101,174,176,194,209,295,299,302,307,312,358,379,396,406],worden:388,worder:332,wordfind:150,wordfreq_app:1,wordlist:[57,81,83,359,408],wordlistcorpusread:[57,81,83,104,108,359],wordlistcorpusview:359,wordnet:[1,10,57,91,93,103,196,280,328,343,347,399,405,406,408],wordnet_:[57,105,403],wordnet_app:1,wordnet_corpus_read:105,wordnet_lch:347,wordnetcorpusread:[57,105,328,403],wordneterror:105,wordneticcorpusread:[57,105],wordnetid:[57,103,359],wordnetlemmat:[208,403],wordnetsyn_match:328,wordnnetdemo:280,wordnum:[79,87,387],wordpunct_token:[295,303,357,393],wordpuncttoken:[57,85,88,89,194,303],words_by_iso639:[57,83],words_by_lang:[57,83],words_index:316,words_mod:[57,74,78],wordt:388,wordtpl:228,work:[18,25,38,57,67,74,78,100,105,109,110,111,119,126,129,132,133,137,141,176,180,200,221,294,302,316,334,336,343,359,360,361,365,366,367,370,375,377,380,385,387,390,395,396,397,398,399,406,407,408],workaround:[212,224,294],worker:387,workshop:[94,318,320,332,398],workspace_markprod:113,world0:[179,352],world1:[179,352],world:[81,88,129,160,179,212,228,317,321,332,352,359,396],worldli:24,worri:102,wors:[224,358],worst:[82,391],would:[34,57,68,71,118,125,132,141,145,153,159,167,188,211,219,227,228,294,317,334,336,359,363,366,396,399,403],wouldn:[132,195,366,391],wouldnt:195,wound:349,wp:[160,215,218,220],wrap:[38,52,71,117,119,129,131,132,139,143,153,176,297,334,335,343,357,397],wrap_at:71,wrap_etre:[57,80,103,106],wrapper:[15,38,57,71,76,90,110,119,129,131,193,293,341,357,361,406,408],wrd:[57,98,359],wrdindex_list:310,wrinkleproof:381,write:[44,99,102,109,111,115,152,161,194,203,224,293,314,339,340,341,360,366,369,397,403,406],write_megam_fil:34,write_tadm_fil:41,writer:[194,388],writestr:109,written:[34,41,152,179,228,312,339,341,342,352,359,366,374,399,406,408],wrong:[141,317,359,366,384,403],wscore:145,wsd:[0,30,347],wsd_demo:43,wsd_featur:30,wsdm:89,wsj:[209,334,336,359],wsj_0001:[359,387,406],wsj_0002:359,wsj_0003:359,wsj_0004:[359,387],wsj_0199:387,wsj_0200:387,wu:[57,105,321,403],wulfgeat:107,wulfsig:107,wulfstan:107,wup_similar:[57,105,403],wv:369,www:[0,22,42,55,57,60,62,67,72,77,81,83,86,88,89,92,94,104,107,110,117,137,145,166,176,179,200,203,211,251,254,259,282,312,317,318,328,332,334,336,346,351,352,353,359,365,370,374,385,406,407,408],x00:301,x0:[183,390],x1:[370,378,390,399],x2:[118,366,370,378,399],x3:366,x6f:366,x7f:301,x80:359,x81:359,x83:359,x84:359,x85:359,x89:359,x8a:403,x8b:359,x8d:359,x8f:359,x93:359,x97:385,x98:359,x9a:359,x9c:359,x:[15,16,25,32,33,57,71,77,78,98,105,107,109,110,115,117,118,123,125,129,132,139,141,143,155,166,169,176,180,181,183,184,185,188,212,216,253,294,298,300,310,312,319,334,351,354,355,361,363,364,365,366,367,369,370,374,375,378,381,382,384,386,388,389,390,399,402,406],x_50:369,x_t:214,xa0:[189,311],xa3170:359,xa3:359,xa3o:403,xa6:359,xa7:359,xa8:359,xa:107,xab:[385,396],xabencomendem:385,xabespecializarem:385,xabgeracionista:385,xabn:385,xabnow:396,xabo:385,xaboutro:385,xabvamo:385,xabveri:385,xac:403,xad:[189,359],xae:359,xb0:359,xb3:359,xb8:359,xb9:359,xbb:[385,396,397],xbf:[359,397],xc1rio:385,xc3:403,xc3o:385,xc9:385,xcdtulo:385,xcomp:353,xe0:[359,385],xe1:385,xe1n:388,xe1rio:385,xe2:359,xe2mara:385,xe3o:[385,403],xe4:359,xe5:359,xe6:359,xe7:[359,385,403],xe7a:385,xe7o:385,xe9:385,xe9ci:385,xe9cnico:385,xe9dia:385,xe9i:385,xe9nero:385,xe9rcito:388,xe9rica:385,xe9rio:385,xe:[57,105],xeam:385,xedbano:388,xedlia:385,xedn:388,xedna:385,xedpio:385,xef:397,xemac:25,xf1a:388,xf3:385,xf3n:388,xf3n_europea:388,xf3n_n:388,xf3ria:385,xf3rio:385,xf3stuma:385,xf3tese:385,xf5e:385,xf5em:385,xf8ter:403,xf:155,xfamero_1:388,xfamero_2:388,xfasica:385,xfcentement:385,xform:[28,354],xi:[176,189],xiaob:321,xiaowen:89,xmax:117,xmin:117,xml:[55,57,60,61,64,71,74,77,78,79,80,84,87,90,91,92,99,103,106,111,129,313,349,353,359,365,370,387,397,408],xml_escap:313,xml_n:77,xml_post:[57,80,359],xml_tool:78,xml_unescap:313,xmlcorpusread:[57,60,64,71,78,80,84,90,91,103,106],xmlcorpusview:[60,77,78,91,106],xmldoc:[57,60,64,71,77,78,80,84,90,91,103],xmlinfo:111,xmlschema:353,xmltree:359,xmx1024m:166,xmx512m:129,xpath:77,xprime:370,xprime_chas:370,xprime_chases_a_dog:370,xprime_chases_yprim:370,xreadlin:109,xs:[183,343],xsd:353,xspace:116,xtag:[57,103],xx:334,xxxl:[57,104],xxxx:89,xxxxyi:386,xxxyyyzz:386,xy:[15,369],xytext:369,y332:183,y:[15,16,33,57,66,71,105,110,117,118,125,148,155,169,176,181,183,188,253,310,334,350,351,355,359,361,363,364,365,366,369,370,374,378,381,382,386,388,389,390,399],ya:[298,396],yaa:[198,199],yacut:189,yahoomesseng:20,yaml:[109,361],yanswer:359,yca3:202,ycn2t:202,ycoe:57,ycoecorpusread:[57,107],ycoehom:107,ycoeparsecorpusread:107,ycoetaggedcorpusread:107,ye:[24,90,217,298,312],yeah:195,year:[57,64,163,217,293,353,357,359,362,396],yellow:388,yen:189,yes_no_s:216,yesterdai:[332,340],yet:[7,8,42,102,109,129,168,170,171,176,334,336,355,359,361,366],yevett:145,yew:317,yf:155,yfi3:202,ygo1:202,yhp1:202,yi:[94,176],yield:[14,33,52,66,149,159,162,164,168,174,176,188,212,214,302,343,387],yl2:202,ylb1:202,yli3i:202,ylp0:202,ymax:117,ymin:117,ymo1:202,ynquestion:359,yoav:408,yolanda:390,yonghui:321,york:[57,72,98,107,160,295,301,303,306,308,312,313,322,323,324,325,326,327,333,393,396],yoshikiyo:321,you:[0,2,7,8,14,23,24,25,28,34,35,39,52,57,60,68,71,74,89,98,100,102,105,111,115,117,118,119,122,130,132,134,158,159,170,171,176,194,195,201,203,206,209,211,219,293,295,297,306,317,334,335,336,340,341,346,350,353,358,359,360,361,363,365,366,367,374,385,387,396,398,399,403,404,407],young:321,younger:293,your:[24,28,35,39,57,100,118,132,141,178,228,317,332,340,341,346,359,363,365,366,370,387,407,408],your_app_kei:342,your_app_secret:342,your_path:309,yourself:369,youtub:[137,408],ypo1:202,yprime:370,yra3:202,yro3:202,yrtsi5:202,ys:386,yspace:116,yte3:202,yti3:202,ytl2:202,yu:[89,94],yuan:321,yuganskneftegaz:90,yuko:90,yum:375,yuml:189,yuppi:401,yvett:145,yview:115,yview_moveto:115,yview_scrol:115,z0:118,z101:365,z102:365,z103:365,z104:365,z105:365,z106:365,z107:365,z10:[364,389],z1:[180,363,364,370,371,378,382,390],z22:390,z2:[363,364,365,370,371,374,382,389,390],z3:[363,364,365,378,382,389,390],z472:378,z4:[364,374,378,382,389,390],z5:[363,364,382],z6:[364,374,382,389],z7:[364,382,389],z8:[364,382,389],z938:378,z9:[364,382],z:[8,15,57,66,105,107,118,125,176,183,214,216,303,351,364,366,378,380,381,386,389,390,399,402],z_:118,za:118,zag:[167,169],zc:24,zdg:362,ze:[57,105,400],zeb:385,zee:66,zen:18,zen_chat:24,zero:[25,28,31,33,52,57,79,83,87,102,105,116,117,141,159,161,168,176,185,188,221,226,227,228,317,322,323,324,325,327,328,333,334,359,361,362,369,378,380,386,403],zero_bas:[161,362],zero_indexed_align:327,zerodivisionerror:386,zeta:189,zh:[66,309],zhao:94,zhifeng:321,zhu:317,zi2:202,zip:[33,52,57,104,109,111,129,145,195,346,369,390,408],zipf:42,zipfil:[109,111,361],zipfilepathpoint:[109,361],zloti:[179,352],zodat:388,zpa:360,zpar:[161,362],zpar_data:362,zsm:403,zul:360,zusammen:[324,325,326],zwaaien:[167,169],zwemmen:400,zwj:189,zwnj:189,zy1:202},titles:["nltk package","nltk.app package","nltk.app.chartparser_app module","nltk.app.chunkparser_app module","nltk.app.collocations_app module","nltk.app.concordance_app module","nltk.app.nemo_app module","nltk.app.rdparser_app module","nltk.app.srparser_app module","nltk.app.wordfreq_app module","nltk.app.wordnet_app module","nltk.book module","nltk.ccg package","nltk.ccg.api module","nltk.ccg.chart module","nltk.ccg.combinator module","nltk.ccg.lexicon module","nltk.ccg.logic module","nltk.chat package","nltk.chat.eliza module","nltk.chat.iesha module","nltk.chat.rude module","nltk.chat.suntsu module","nltk.chat.util module","nltk.chat.zen module","nltk.chunk package","nltk.chunk.api module","nltk.chunk.named_entity module","nltk.chunk.regexp module","nltk.chunk.util module","nltk.classify package","nltk.classify.api module","nltk.classify.decisiontree module","nltk.classify.maxent module","nltk.classify.megam module","nltk.classify.naivebayes module","nltk.classify.positivenaivebayes module","nltk.classify.rte_classify module","nltk.classify.scikitlearn module","nltk.classify.senna module","nltk.classify.svm module","nltk.classify.tadm module","nltk.classify.textcat module","nltk.classify.util module","nltk.classify.weka module","nltk.cli module","nltk.cluster package","nltk.cluster.api module","nltk.cluster.em module","nltk.cluster.gaac module","nltk.cluster.kmeans module","nltk.cluster.util module","nltk.collections module","nltk.collocations module","nltk.compat module","nltk.corpus package","nltk.corpus.europarl_raw module","nltk.corpus.reader package","nltk.corpus.reader.aligned module","nltk.corpus.reader.api module","nltk.corpus.reader.bnc module","nltk.corpus.reader.bracket_parse module","nltk.corpus.reader.categorized_sents module","nltk.corpus.reader.chasen module","nltk.corpus.reader.childes module","nltk.corpus.reader.chunked module","nltk.corpus.reader.cmudict module","nltk.corpus.reader.comparative_sents module","nltk.corpus.reader.conll module","nltk.corpus.reader.crubadan module","nltk.corpus.reader.dependency module","nltk.corpus.reader.framenet module","nltk.corpus.reader.ieer module","nltk.corpus.reader.indian module","nltk.corpus.reader.ipipan module","nltk.corpus.reader.knbc module","nltk.corpus.reader.lin module","nltk.corpus.reader.mte module","nltk.corpus.reader.nkjp module","nltk.corpus.reader.nombank module","nltk.corpus.reader.nps_chat module","nltk.corpus.reader.opinion_lexicon module","nltk.corpus.reader.panlex_lite module","nltk.corpus.reader.panlex_swadesh module","nltk.corpus.reader.pl196x module","nltk.corpus.reader.plaintext module","nltk.corpus.reader.ppattach module","nltk.corpus.reader.propbank module","nltk.corpus.reader.pros_cons module","nltk.corpus.reader.reviews module","nltk.corpus.reader.rte module","nltk.corpus.reader.semcor module","nltk.corpus.reader.senseval module","nltk.corpus.reader.sentiwordnet module","nltk.corpus.reader.sinica_treebank module","nltk.corpus.reader.string_category module","nltk.corpus.reader.switchboard module","nltk.corpus.reader.tagged module","nltk.corpus.reader.timit module","nltk.corpus.reader.toolbox module","nltk.corpus.reader.twitter module","nltk.corpus.reader.udhr module","nltk.corpus.reader.util module","nltk.corpus.reader.verbnet module","nltk.corpus.reader.wordlist module","nltk.corpus.reader.wordnet module","nltk.corpus.reader.xmldocs module","nltk.corpus.reader.ycoe module","nltk.corpus.util module","nltk.data module","nltk.decorators module","nltk.downloader module","nltk.draw package","nltk.draw.cfg module","nltk.draw.dispersion module","nltk.draw.table module","nltk.draw.tree module","nltk.draw.util module","nltk.featstruct module","nltk.grammar module","nltk.help module","nltk.inference package","nltk.inference.api module","nltk.inference.discourse module","nltk.inference.mace module","nltk.inference.nonmonotonic module","nltk.inference.prover9 module","nltk.inference.resolution module","nltk.inference.tableau module","nltk.internals module","nltk.jsontags module","nltk.lazyimport module","nltk.lm package","nltk.lm.api module","nltk.lm.counter module","nltk.lm.models module","nltk.lm.preprocessing module","nltk.lm.smoothing module","nltk.lm.util module","nltk.lm.vocabulary module","nltk.metrics package","nltk.metrics.agreement module","nltk.metrics.aline module","nltk.metrics.association module","nltk.metrics.confusionmatrix module","nltk.metrics.distance module","nltk.metrics.paice module","nltk.metrics.scores module","nltk.metrics.segmentation module","nltk.metrics.spearman module","nltk.misc package","nltk.misc.babelfish module","nltk.misc.chomsky module","nltk.misc.minimalset module","nltk.misc.sort module","nltk.misc.wordfinder module","nltk.parse package","nltk.parse.api module","nltk.parse.bllip module","nltk.parse.chart module","nltk.parse.corenlp module","nltk.parse.dependencygraph module","nltk.parse.earleychart module","nltk.parse.evaluate module","nltk.parse.featurechart module","nltk.parse.generate module","nltk.parse.malt module","nltk.parse.nonprojectivedependencyparser module","nltk.parse.pchart module","nltk.parse.projectivedependencyparser module","nltk.parse.recursivedescent module","nltk.parse.shiftreduce module","nltk.parse.stanford module","nltk.parse.transitionparser module","nltk.parse.util module","nltk.parse.viterbi module","nltk.probability module","nltk.sem package","nltk.sem.boxer module","nltk.sem.chat80 module","nltk.sem.cooper_storage module","nltk.sem.drt module","nltk.sem.drt_glue_demo module","nltk.sem.evaluate module","nltk.sem.glue module","nltk.sem.hole module","nltk.sem.lfg module","nltk.sem.linearlogic module","nltk.sem.logic module","nltk.sem.relextract module","nltk.sem.skolemize module","nltk.sem.util module","nltk.sentiment package","nltk.sentiment.sentiment_analyzer module","nltk.sentiment.util module","nltk.sentiment.vader module","nltk.stem package","nltk.stem.api module","nltk.stem.arlstem module","nltk.stem.arlstem2 module","nltk.stem.cistem module","nltk.stem.isri module","nltk.stem.lancaster module","nltk.stem.porter module","nltk.stem.regexp module","nltk.stem.rslp module","nltk.stem.snowball module","nltk.stem.util module","nltk.stem.wordnet module","nltk.tag package","nltk.tag.api module","nltk.tag.brill module","nltk.tag.brill_trainer module","nltk.tag.crf module","nltk.tag.hmm module","nltk.tag.hunpos module","nltk.tag.mapping module","nltk.tag.perceptron module","nltk.tag.senna module","nltk.tag.sequential module","nltk.tag.stanford module","nltk.tag.tnt module","nltk.tag.util module","nltk.tbl package","nltk.tbl.demo module","nltk.tbl.erroranalysis module","nltk.tbl.feature module","nltk.tbl.rule module","nltk.tbl.template module","nltk.test package","nltk.test.all module","nltk.test.childes_fixt module","nltk.test.classify_fixt module","nltk.test.conftest module","nltk.test.discourse_fixt module","nltk.test.gensim_fixt module","nltk.test.gluesemantics_malt_fixt module","nltk.test.inference_fixt module","nltk.test.nonmonotonic_fixt module","nltk.test.portuguese_en_fixt module","nltk.test.probability_fixt module","nltk.test.unit package","nltk.test.unit.lm package","nltk.test.unit.lm.test_counter module","nltk.test.unit.lm.test_models module","nltk.test.unit.lm.test_preprocessing module","nltk.test.unit.lm.test_vocabulary module","nltk.test.unit.test_aline module","nltk.test.unit.test_brill module","nltk.test.unit.test_cfd_mutation module","nltk.test.unit.test_cfg2chomsky module","nltk.test.unit.test_chunk module","nltk.test.unit.test_classify module","nltk.test.unit.test_collocations module","nltk.test.unit.test_concordance module","nltk.test.unit.test_corenlp module","nltk.test.unit.test_corpora module","nltk.test.unit.test_corpus_views module","nltk.test.unit.test_data module","nltk.test.unit.test_disagreement module","nltk.test.unit.test_distance module","nltk.test.unit.test_freqdist module","nltk.test.unit.test_hmm module","nltk.test.unit.test_json2csv_corpus module","nltk.test.unit.test_json_serialization module","nltk.test.unit.test_metrics module","nltk.test.unit.test_naivebayes module","nltk.test.unit.test_nombank module","nltk.test.unit.test_pl196x module","nltk.test.unit.test_pos_tag module","nltk.test.unit.test_ribes module","nltk.test.unit.test_rte_classify module","nltk.test.unit.test_seekable_unicode_stream_reader module","nltk.test.unit.test_senna module","nltk.test.unit.test_stem module","nltk.test.unit.test_tag module","nltk.test.unit.test_tgrep module","nltk.test.unit.test_tokenize module","nltk.test.unit.test_twitter_auth module","nltk.test.unit.test_util module","nltk.test.unit.test_wordnet module","nltk.test.unit.translate package","nltk.test.unit.translate.test_bleu module","nltk.test.unit.translate.test_gdfa module","nltk.test.unit.translate.test_ibm1 module","nltk.test.unit.translate.test_ibm2 module","nltk.test.unit.translate.test_ibm3 module","nltk.test.unit.translate.test_ibm4 module","nltk.test.unit.translate.test_ibm5 module","nltk.test.unit.translate.test_ibm_model module","nltk.test.unit.translate.test_meteor module","nltk.test.unit.translate.test_nist module","nltk.test.unit.translate.test_stack_decoder module","nltk.text module","nltk.tgrep module","nltk.tokenize package","nltk.tokenize.api module","nltk.tokenize.casual module","nltk.tokenize.destructive module","nltk.tokenize.legality_principle module","nltk.tokenize.mwe module","nltk.tokenize.nist module","nltk.tokenize.punkt module","nltk.tokenize.regexp module","nltk.tokenize.repp module","nltk.tokenize.sexpr module","nltk.tokenize.simple module","nltk.tokenize.sonority_sequencing module","nltk.tokenize.stanford module","nltk.tokenize.stanford_segmenter module","nltk.tokenize.texttiling module","nltk.tokenize.toktok module","nltk.tokenize.treebank module","nltk.tokenize.util module","nltk.toolbox module","nltk.translate package","nltk.translate.api module","nltk.translate.bleu_score module","nltk.translate.chrf_score module","nltk.translate.gale_church module","nltk.translate.gdfa module","nltk.translate.gleu_score module","nltk.translate.ibm1 module","nltk.translate.ibm2 module","nltk.translate.ibm3 module","nltk.translate.ibm4 module","nltk.translate.ibm5 module","nltk.translate.ibm_model module","nltk.translate.meteor_score module","nltk.translate.metrics module","nltk.translate.nist_score module","nltk.translate.phrase_based module","nltk.translate.ribes_score module","nltk.translate.stack_decoder module","nltk.tree module","nltk.treeprettyprinter module","nltk.treetransforms module","nltk.twitter package","nltk.twitter.api module","nltk.twitter.common module","nltk.twitter.twitter_demo module","nltk.twitter.twitterclient module","nltk.twitter.util module","nltk.util module","nltk.wsd module","Contributing to NLTK","Installing NLTK Data","Example usage of NLTK modules","Sample usage for bleu","Sample usage for bnc","Sample usage for ccg","Sample usage for ccg_semantics","Sample usage for chat80","Sample usage for childes","Sample usage for chunk","Sample usage for classify","Sample usage for collections","Sample usage for collocations","Sample usage for concordance","Sample usage for corpus","Sample usage for crubadan","Sample usage for data","Sample usage for dependency","Sample usage for discourse","Sample usage for drt","Sample usage for featgram","Sample usage for featstruct","Sample usage for framenet","Sample usage for generate","Sample usage for gensim","Sample usage for gluesemantics","Sample usage for gluesemantics_malt","Sample usage for grammar","Sample usage for grammartestsuites","Sample usage for inference","Sample usage for internals","Sample usage for japanese","Sample usage for lm","Sample usage for logic","Sample usage for meteor","Sample usage for metrics","Sample usage for misc","Sample usage for nonmonotonic","Sample usage for paice","Sample usage for parse","Sample usage for portuguese_en","Sample usage for probability","Sample usage for propbank","Sample usage for relextract","Sample usage for resolution","Sample usage for semantics","Sample usage for sentiment","Sample usage for sentiwordnet","Sample usage for simple","Sample usage for stem","Sample usage for tag","Sample usage for tokenize","Sample usage for toolbox","Sample usage for translate","Sample usage for tree","Sample usage for treeprettyprinter","Sample usage for treetransforms","Sample usage for util","Sample usage for wordnet","Sample usage for wordnet_lch","Sample usage for wsd","Natural Language Toolkit","Installing NLTK","Release Notes","Index","NLTK Team"],titleterms:{"1":[398,403],"1025":395,"167":377,"2":403,"2005":408,"2006":408,"2007":408,"2008":408,"2009":408,"2010":408,"2011":408,"2012":408,"2013":408,"2014":408,"2015":408,"2016":408,"2017":408,"2018":408,"2019":408,"2020":408,"2021":408,"2088":358,"2483":395,"32":407,"367":377,"380":377,"80":[179,352],"case":397,"class":[359,381,384,401],"default":382,"do":406,"function":[55,57,381,390,397,402],"new":359,A:[349,370],No:[351,379],The:374,With:371,access:[349,359,360,366,376,385,403],acyclic_tre:403,ad:363,aer:398,agreement:141,algorithm:[383,398,405],align:[58,379,398],align_token:396,alin:142,all:[230,403],alpha:378,analysi:391,annot:367,answer:389,api:[13,26,31,47,59,122,133,157,197,210,296,316,338],app:[1,2,3,4,5,6,7,8,9,10],applic:390,argument:[359,390],arlstem2:[199,394],arlstem:[198,394],associ:[143,357,380],assumpt:382,autom:371,automat:359,avail:55,babelfish:151,back:359,background:363,base:[379,390],basic:353,batch:390,bell:386,betaconversiontestsuit:378,better:403,binari:[369,407],bind:[365,366],bit:407,blackburn:390,bleu:348,bleu_scor:317,bllip:158,bnc:[60,349],bo:390,book:[11,363],bound:364,boxer:178,bracket_pars:61,brill:211,brill_train:212,buffer:361,bug:[359,366,386,399],build:374,builder:374,cach:361,calculu:378,can:406,candid:357,casual:297,categor:359,categori:[350,351],categorized_s:62,caveat:294,ccg:[12,13,14,15,16,17,350,351],ccg_semant:351,cfg:[113,384],charact:385,chart:[14,159,351,384],chartparser_app:2,chase:370,chasen:63,chat80:[179,352],chat:[18,19,20,21,22,23,24,179,352],check:[349,363],child:[64,353],childes_fixt:231,childescorpusread:353,chomski:152,chrf_score:318,chunk:[25,26,27,28,29,65,354,359],chunkparser_app:3,chunkparseri:354,chunkstr:354,cistem:200,classifi:[30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,355],classify_fixt:232,claus:350,cli:45,close:[382,397],closur:403,cluster:[46,47,48,49,50,51],cmudict:66,code:360,collect:[52,356],colloc:[53,357],collocations_app:4,combin:[15,351],combinatori:[350,351],command:346,common:[339,359,404],comparative_s:67,comparative_sent:359,compat:54,compil:370,complex:351,comput:351,concaten:359,concept:[179,352],concord:358,concordanc:385,concordance_app:5,conditionalfreqdist:386,conflict:366,conftest:233,confus:380,confusionmatrix:144,conjunct:[350,351],conll:[68,362],consist:363,constructor:359,content:[98,359],context:[368,384],conting:357,contribut:345,convers:378,convert:33,cooper:390,cooper_storag:180,copi:361,corenlp:160,corpora:[55,359,385],corpu:[55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,353,359,360,376,385,398],correl:357,counter:134,creat:359,crf:213,crubadan:[69,360],custom:366,data:[109,132,346,359,361,362,407],decisiontre:32,decod:361,decor:[110,382],demo:[224,370],demonstr:369,depend:[70,362,370,371],dependencygraph:161,deriv:371,descent:384,design:359,destruct:298,dict:366,dictionari:366,directori:111,disambigu:405,discours:[123,363,364],discourse_fixt:234,dispers:114,distanc:[145,380],document:367,dog:370,domain:382,don:359,doubl:364,download:111,draw:[112,113,114,115,116,117],drt:[181,364],drt_glue_demo:182,dure:378,earleychart:162,easyinstal:393,effici:25,eliza:19,em:[48,398],emac:25,embed:369,encod:[361,385],endless:403,entiti:388,environ:407,equal:[366,378],equiv:[364,374],error:[364,378,398],erroranalysi:225,estim:386,europarl_raw:56,evalu:[163,183,383,390,398],everi:370,exampl:[142,347,358,379,382,385],express:366,extract:388,f:370,featgram:365,featstruct:[118,366],featstructnontermin:384,featur:[30,33,118,226,365,366,384,393],featurechart:164,featureset:30,field:397,file:[179,352,361,384],filter:357,find:[361,366],finder:357,first:403,floresta:385,fol:364,formula:[370,390],frame:[367,403],framenet:[71,359,367],free:[368,378,384],freqdist:386,frequenc:360,from:[351,361,363,368,390],fromlist:399,gaac:49,gale_church:319,gdfa:320,gener:[165,368],gensim:369,gensim_fixt:235,girl:370,gleu_scor:321,glue:[184,370,371],gluesemant:370,gluesemantics_malt:371,gluesemantics_malt_fixt:236,grammar:[119,350,351,362,365,368,372,373,384],grammartestsuit:373,graph:[365,370],gzip:361,hash:366,help:120,helper:360,hmm:[214,386],hole:185,how:353,hunpo:215,hypernym:404,i:390,ibm1:322,ibm2:323,ibm3:324,ibm4:325,ibm5:326,ibm:398,ibm_model:327,ieer:72,iesha:20,immutablemultiparentedtre:399,immutableparentedtre:399,implement:[294,359],increment:384,index:409,indian:73,individu:[179,352],infer:[121,122,123,124,125,126,127,128,371,374],inference_fixt:237,inform:[363,388],initi:371,input:33,instal:[346,374,407],instanc:359,integr:362,interact:346,interfac:[374,392,403],intern:[129,375],intract:403,introduct:[363,374],ipipan:74,ir:380,is_tautolog:389,isri:201,issu:[25,294,358,377,395],item:[179,352],iter:359,japanes:376,jeita:376,john:370,joint:33,json:361,jsontag:130,kmean:50,knb:376,knbc:75,kneser:386,knowledg:363,known:294,lambda:378,lancast:202,languag:[132,134,360,376,406],larg:384,lazi:[349,361],lazyimport:131,lazyiteratorlist:356,legality_principl:299,lemma:403,lesk:405,lexic:[179,352,367],lexicon:[16,351,359],lfg:[186,370],light:366,lightweight:118,likelihood:386,lin:76,line:346,line_num:397,linear:370,linearlog:187,list:[358,359,366,385],lm:[132,133,134,135,136,137,138,139,242,243,244,245,246,377],load:[361,384],loader:[349,361],local:361,logic:[17,188,370,374,378],lookup:392,lowest:404,mac:407,mace4:374,mace:124,machado:385,macmorpho:385,make:361,malt:[166,362],man:370,manual:346,map:[216,360,390],marker:397,matrix:380,maxent:33,maximum:386,measur:[357,380],megam:34,meteor:379,meteor_scor:328,method:[359,360,366,374,399],metric:[140,141,142,143,144,145,146,147,148,149,329,380,393],minimalset:153,misc:[150,151,152,153,154,155,381],miscellan:[380,381],model:[132,134,135,369,374,390,398],modul:[2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,19,20,21,22,23,24,26,27,28,29,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,56,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,113,114,115,116,117,118,119,120,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,141,142,143,144,145,146,147,148,149,151,152,153,154,155,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,178,179,180,181,182,183,184,185,186,187,188,189,190,191,193,194,195,197,198,199,200,201,202,203,204,205,206,207,208,210,211,212,213,214,215,216,217,218,219,220,221,222,224,225,226,227,228,230,231,232,233,234,235,236,237,238,239,240,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,282,283,284,285,286,287,288,289,290,291,292,293,294,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,338,339,340,341,342,343,344,347,375],morphi:403,most_general_unif:389,mst:403,mte:77,multext_east:359,multi:382,multiparentedtre:399,multipl:359,mutat:366,mwe:300,mwetoken:396,naivebay:35,name:[382,388],named_ent:27,natur:406,nei:386,nemo_app:6,next:406,ngram:360,nist:301,nist_scor:330,nkjp:78,nltk:[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,361,374,375,399,400,406,407,410],nombank:79,non:[362,366],nonmonoton:[125,382],nonmonotonic_fixt:238,nonprojectivedependencypars:167,note:[294,408],nps_chat:[80,359],numer:378,object:[359,398],open:397,open_str:397,oper:365,opinion_lexicon:[81,359],other:[359,361,364],overridden:375,overstem:383,overview:[179,357,359,361,364,366,378,394],packag:[0,1,12,18,25,30,46,55,57,111,112,121,132,140,150,156,177,192,196,209,223,229,241,242,281,295,315,337,361],paic:[146,383],panlex_lit:82,panlex_swadesh:83,paper:351,parent:399,parentedtre:399,pars:[156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,359,362,364,365,366,372,378,384,393,397,399],parser:[362,364,371,378,384],part:385,parti:407,patch:358,path:366,pattern:25,pchart:168,perceptron:217,persist:[179,352],phrase_bas:331,pl196x:84,pl:378,plaintext:[85,359],porter:[203,394],portugues:385,portuguese_en:385,portuguese_en_fixt:239,positivenaivebay:36,ppattach:[86,359],pre:369,precis:398,prepar:132,preprocess:136,pretti:364,print:364,probabilist:384,probabilistictre:399,probability_fixt:240,probabl:[176,386],process:[376,385,390],product_reviews_1:359,product_reviews_2:359,program:385,project:362,projectivedependencypars:169,proof:389,propbank:[87,359,387],proposit:390,pros_con:[88,359],prove:389,prover9:[126,374],prover:[374,389],provercommand:374,proxi:346,prune:369,publish:351,punkt:302,punktsentencetoken:396,python:[385,407],queri:352,question:389,rank:357,rate:398,raw_field:397,rd:384,rdparser_app:7,read:[179,352,359,361,363,366,385],read_str:375,reader:[55,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,353,359,360,398],reason:382,recal:398,recurs:384,recursivedesc:170,reduc:384,reentranc:366,regexp:[28,204,303,396],regexpchunkpars:[25,354],regexpchunkrul:25,regexppars:354,regress:[354,355,359,361,363,364,366,377,378,395,396,399,403],reinvent:359,rel:350,relat:[388,390,403],releas:408,relextract:[189,388],remov:366,replac:[364,378],repp:304,represent:364,resolut:[127,389],resolve_anaphora:364,resourc:361,retract:366,retriev:361,review:89,ribes_scor:332,rslp:205,rte:[90,359],rte_classifi:37,rude:21,rule:[227,354],s:383,same:364,sampl:[348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405],satisfi:390,scikitlearn:38,score:[147,380],search:[294,385],seek:359,seekableunicodestreamread:359,segment:[148,385],sem:[177,178,179,180,181,182,183,184,185,186,187,188,189,190,191],semant:[351,364,370,371,390],semcor:[91,359],senna:[39,218],sens:405,sensev:[92,359],sentenc:[349,368,385],sentence_polar:359,sentiment:[192,193,194,195,391],sentiment_analyz:193,sentisynset:392,sentiwordnet:[93,359,392],sequenti:[219,395],server:[111,346],set:[366,407],setup:[353,363,380],sexpr:305,shakespear:359,shift:384,shiftreduc:171,side:364,similar:403,simpl:[306,351,385,393,406],simplifi:[364,378],sinica_treebank:94,size:359,skolem:190,slice:359,smooth:137,snowbal:[206,394],softwar:407,solut:403,some:[386,406],sonority_sequenc:307,sort:[154,381],sourc:361,spearman:149,speech:385,sql:352,squash:[359,366,386,399],sr:384,srparser_app:8,stack_decod:333,standard:380,standardformat:397,stanford:[172,220,308],stanford_segment:309,statist:383,stem:[196,197,198,199,200,201,202,203,204,205,206,207,208,383,385,394],stemmer:394,step:406,stopword:385,storag:390,stream:359,string:366,string_categori:95,structur:[118,365,366,370,393],stuff:353,subject:359,submodul:[0,1,12,18,25,30,46,55,57,112,121,132,140,150,156,177,192,196,209,223,229,241,242,281,295,315,337],subpackag:[0,55,229,241],subsum:389,suit:373,suntsu:22,support:350,svm:40,switchboard:96,synset:403,syntax:390,t:359,tabl:[115,357,359],tableau:128,tadm:41,tag:[25,97,209,210,211,212,213,214,215,216,217,218,219,220,221,222,359,385,395],tagger:395,task:385,tbl:[223,224,225,226,227,228],team:410,teardown:403,tell:359,templat:228,terminolog:33,test:[229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,348,349,351,354,355,359,361,363,364,366,373,375,377,378,379,381,384,386,389,390,393,394,395,396,397,399,400,401,403],test_alin:247,test_bleu:282,test_bril:248,test_cfd_mut:249,test_cfg2chomski:250,test_chunk:251,test_classifi:252,test_colloc:253,test_concord:254,test_corenlp:255,test_corpora:256,test_corpus_view:257,test_count:243,test_data:258,test_disagr:259,test_dist:260,test_freqdist:261,test_gdfa:283,test_hmm:262,test_ibm1:284,test_ibm2:285,test_ibm3:286,test_ibm4:287,test_ibm5:288,test_ibm_model:289,test_json2csv_corpu:263,test_json_seri:264,test_meteor:290,test_metr:265,test_model:244,test_naivebay:266,test_nist:291,test_nombank:267,test_pl196x:268,test_pos_tag:269,test_preprocess:245,test_rib:270,test_rte_classifi:271,test_seekable_unicode_stream_read:272,test_senna:273,test_stack_decod:292,test_stem:274,test_tag:275,test_tgrep:276,test_token:277,test_twitter_auth:278,test_util:279,test_vocabulari:246,test_wordnet:280,testsuit:390,text:[293,385],textcat:42,texttil:310,texttilingtoken:396,tgrep:294,theorem:[374,389],theori:364,thing:406,third:407,thread:363,timit:[98,359],tip:25,tnt:221,to_sfm_str:397,token:[295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,393,396],toktok:311,toolbox:[99,314,359,397],toolboxdata:397,toolkit:406,trace:[366,384],train:[30,132,369],transitionpars:173,translat:[281,282,283,284,285,286,287,288,289,290,291,292,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,398],tree:[116,294,334,399,403],treebank:[312,385,396],treeprettyprint:[335,400],treetransform:[336,401],trie:356,tupl:366,tweettoken:396,twitter:[100,337,338,339,340,341,342],twitter_demo:340,twitter_sampl:359,twittercli:341,type:[359,366,378],typedmaxentfeatureencod:355,udhr:101,unbound:364,under:378,understem:383,unicod:350,unif:366,unifi:389,uniqu:382,unit:[241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,364,367,375,381,384,390,394,397,399,400,401],unix:407,unresolv:25,untyp:378,up:407,us:[132,351,352,353,357,369,370,374],usag:[142,294,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405],util:[23,29,43,51,102,108,117,138,174,191,194,207,222,313,342,343,375,402],vader:[195,391],valu:[357,366,383],valuat:[352,390],variabl:[366,378],verb:403,verbnet:[103,359],via:346,view:359,viterbi:[175,384],vocabulari:139,vs:403,walk:370,web:346,weight:366,weka:44,wheel:359,wikipedia:379,window:407,witten:386,word:[349,359,369,385,403,405],wordfind:[155,381],wordfreq_app:9,wordlist:104,wordnet:[105,208,359,403,404],wordnet_app:10,wordnet_lch:404,world:382,write:[359,361],wsd:[344,405],xmldoc:106,ycoe:107,you:406,zen:24}}) \ No newline at end of file diff --git a/sphinx/doctrees/api/nltk.app.doctree b/sphinx/doctrees/api/nltk.app.doctree new file mode 100644 index 000000000..372e30035 Binary files /dev/null and b/sphinx/doctrees/api/nltk.app.doctree differ diff --git a/sphinx/doctrees/api/nltk.ccg.doctree b/sphinx/doctrees/api/nltk.ccg.doctree new file mode 100644 index 000000000..288366195 Binary files /dev/null and b/sphinx/doctrees/api/nltk.ccg.doctree differ diff --git a/sphinx/doctrees/api/nltk.chat.doctree b/sphinx/doctrees/api/nltk.chat.doctree new file mode 100644 index 000000000..f81e96acd Binary files /dev/null and b/sphinx/doctrees/api/nltk.chat.doctree differ diff --git a/sphinx/doctrees/api/nltk.chunk.doctree b/sphinx/doctrees/api/nltk.chunk.doctree new file mode 100644 index 000000000..5921f94fe Binary files /dev/null and b/sphinx/doctrees/api/nltk.chunk.doctree differ diff --git a/sphinx/doctrees/api/nltk.classify.doctree b/sphinx/doctrees/api/nltk.classify.doctree new file mode 100644 index 000000000..b3ef35cd9 Binary files /dev/null and b/sphinx/doctrees/api/nltk.classify.doctree differ diff --git a/sphinx/doctrees/api/nltk.cluster.doctree b/sphinx/doctrees/api/nltk.cluster.doctree new file mode 100644 index 000000000..938c56702 Binary files /dev/null and b/sphinx/doctrees/api/nltk.cluster.doctree differ diff --git a/sphinx/doctrees/api/nltk.corpus.doctree b/sphinx/doctrees/api/nltk.corpus.doctree new file mode 100644 index 000000000..77c27dd3d Binary files /dev/null and b/sphinx/doctrees/api/nltk.corpus.doctree differ diff --git a/sphinx/doctrees/api/nltk.corpus.reader.doctree b/sphinx/doctrees/api/nltk.corpus.reader.doctree new file mode 100644 index 000000000..f94088d87 Binary files /dev/null and b/sphinx/doctrees/api/nltk.corpus.reader.doctree differ diff --git a/sphinx/doctrees/api/nltk.doctree b/sphinx/doctrees/api/nltk.doctree new file mode 100644 index 000000000..ffd5b1500 Binary files /dev/null and b/sphinx/doctrees/api/nltk.doctree differ diff --git a/sphinx/doctrees/api/nltk.draw.doctree b/sphinx/doctrees/api/nltk.draw.doctree new file mode 100644 index 000000000..c73138e15 Binary files /dev/null and b/sphinx/doctrees/api/nltk.draw.doctree differ diff --git a/sphinx/doctrees/api/nltk.inference.doctree b/sphinx/doctrees/api/nltk.inference.doctree new file mode 100644 index 000000000..43ac73ee7 Binary files /dev/null and b/sphinx/doctrees/api/nltk.inference.doctree differ diff --git a/sphinx/doctrees/api/nltk.lm.doctree b/sphinx/doctrees/api/nltk.lm.doctree new file mode 100644 index 000000000..60e5a736e Binary files /dev/null and b/sphinx/doctrees/api/nltk.lm.doctree differ diff --git a/sphinx/doctrees/api/nltk.metrics.doctree b/sphinx/doctrees/api/nltk.metrics.doctree new file mode 100644 index 000000000..89f15f323 Binary files /dev/null and b/sphinx/doctrees/api/nltk.metrics.doctree differ diff --git a/sphinx/doctrees/api/nltk.misc.doctree b/sphinx/doctrees/api/nltk.misc.doctree new file mode 100644 index 000000000..a2a8bd2e7 Binary files /dev/null and b/sphinx/doctrees/api/nltk.misc.doctree differ diff --git a/sphinx/doctrees/api/nltk.parse.doctree b/sphinx/doctrees/api/nltk.parse.doctree new file mode 100644 index 000000000..bc31cd5c1 Binary files /dev/null and b/sphinx/doctrees/api/nltk.parse.doctree differ diff --git a/sphinx/doctrees/api/nltk.sem.doctree b/sphinx/doctrees/api/nltk.sem.doctree new file mode 100644 index 000000000..3d4229a44 Binary files /dev/null and b/sphinx/doctrees/api/nltk.sem.doctree differ diff --git a/sphinx/doctrees/api/nltk.sentiment.doctree b/sphinx/doctrees/api/nltk.sentiment.doctree new file mode 100644 index 000000000..aa8174a88 Binary files /dev/null and b/sphinx/doctrees/api/nltk.sentiment.doctree differ diff --git a/sphinx/doctrees/api/nltk.stem.doctree b/sphinx/doctrees/api/nltk.stem.doctree new file mode 100644 index 000000000..48663ae24 Binary files /dev/null and b/sphinx/doctrees/api/nltk.stem.doctree differ diff --git a/sphinx/doctrees/api/nltk.tag.doctree b/sphinx/doctrees/api/nltk.tag.doctree new file mode 100644 index 000000000..6a56fc924 Binary files /dev/null and b/sphinx/doctrees/api/nltk.tag.doctree differ diff --git a/sphinx/doctrees/api/nltk.tbl.doctree b/sphinx/doctrees/api/nltk.tbl.doctree new file mode 100644 index 000000000..ed9d64e24 Binary files /dev/null and b/sphinx/doctrees/api/nltk.tbl.doctree differ diff --git a/sphinx/doctrees/api/nltk.test.doctree b/sphinx/doctrees/api/nltk.test.doctree new file mode 100644 index 000000000..8ad5be1f1 Binary files /dev/null and b/sphinx/doctrees/api/nltk.test.doctree differ diff --git a/sphinx/doctrees/api/nltk.test.unit.doctree b/sphinx/doctrees/api/nltk.test.unit.doctree new file mode 100644 index 000000000..766d83866 Binary files /dev/null and b/sphinx/doctrees/api/nltk.test.unit.doctree differ diff --git a/sphinx/doctrees/api/nltk.test.unit.lm.doctree b/sphinx/doctrees/api/nltk.test.unit.lm.doctree new file mode 100644 index 000000000..7e5ef37e5 Binary files /dev/null and b/sphinx/doctrees/api/nltk.test.unit.lm.doctree differ diff --git a/sphinx/doctrees/api/nltk.test.unit.translate.doctree b/sphinx/doctrees/api/nltk.test.unit.translate.doctree new file mode 100644 index 000000000..651224575 Binary files /dev/null and b/sphinx/doctrees/api/nltk.test.unit.translate.doctree differ diff --git a/sphinx/doctrees/api/nltk.tokenize.doctree b/sphinx/doctrees/api/nltk.tokenize.doctree new file mode 100644 index 000000000..1d1637436 Binary files /dev/null and b/sphinx/doctrees/api/nltk.tokenize.doctree differ diff --git a/sphinx/doctrees/api/nltk.translate.doctree b/sphinx/doctrees/api/nltk.translate.doctree new file mode 100644 index 000000000..cc1dc7ade Binary files /dev/null and b/sphinx/doctrees/api/nltk.translate.doctree differ diff --git a/sphinx/doctrees/api/nltk.twitter.doctree b/sphinx/doctrees/api/nltk.twitter.doctree new file mode 100644 index 000000000..5964ad50d Binary files /dev/null and b/sphinx/doctrees/api/nltk.twitter.doctree differ diff --git a/sphinx/doctrees/contribute.doctree b/sphinx/doctrees/contribute.doctree new file mode 100644 index 000000000..d0289d9c7 Binary files /dev/null and b/sphinx/doctrees/contribute.doctree differ diff --git a/sphinx/doctrees/data.doctree b/sphinx/doctrees/data.doctree new file mode 100644 index 000000000..4bc1d4ee5 Binary files /dev/null and b/sphinx/doctrees/data.doctree differ diff --git a/sphinx/doctrees/environment.pickle b/sphinx/doctrees/environment.pickle new file mode 100644 index 000000000..6020b5dd8 Binary files /dev/null and b/sphinx/doctrees/environment.pickle differ diff --git a/sphinx/doctrees/index.doctree b/sphinx/doctrees/index.doctree new file mode 100644 index 000000000..366f70477 Binary files /dev/null and b/sphinx/doctrees/index.doctree differ diff --git a/sphinx/doctrees/install.doctree b/sphinx/doctrees/install.doctree new file mode 100644 index 000000000..28a257e5e Binary files /dev/null and b/sphinx/doctrees/install.doctree differ diff --git a/sphinx/doctrees/news.doctree b/sphinx/doctrees/news.doctree new file mode 100644 index 000000000..d88b2de01 Binary files /dev/null and b/sphinx/doctrees/news.doctree differ diff --git a/sphinx/man/nltk.1 b/sphinx/man/nltk.1 new file mode 100644 index 000000000..70609653b --- /dev/null +++ b/sphinx/man/nltk.1 @@ -0,0 +1,57643 @@ +.\" Man page generated from reStructuredText. +. +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.TH "NLTK" "1" "Sep 27, 2021" "3.6.2" "NLTK" +.SH NAME +nltk \- NLTK Documentation +.sp +NLTK is a leading platform for building Python programs to work with human language data. +It provides easy\-to\-use interfaces to \fI\%over 50 corpora and lexical +resources\fP such as WordNet, +along with a suite of text processing libraries for classification, tokenization, stemming, tagging, parsing, and semantic reasoning, +wrappers for industrial\-strength NLP libraries, +and an active \fI\%discussion forum\fP\&. +.sp +Thanks to a hands\-on guide introducing programming fundamentals alongside topics in computational linguistics, plus comprehensive API documentation, +NLTK is suitable for linguists, engineers, students, educators, researchers, and industry users alike. +NLTK is available for Windows, Mac OS X, and Linux. Best of all, NLTK is a free, open source, community\-driven project. +.sp +NLTK has been called "a wonderful tool for teaching, and working in, computational linguistics using Python," +and "an amazing library to play with natural language." +.sp +\fI\%Natural Language Processing with Python\fP provides a practical +introduction to programming for language processing. +Written by the creators of NLTK, it guides the reader through the fundamentals +of writing Python programs, working with corpora, categorizing text, analyzing linguistic structure, +and more. +The online version of the book has been been updated for Python 3 and NLTK 3. +(The original Python 2 version is still available at \fI\%http://nltk.org/book_1ed\fP\&.) +.SH SOME SIMPLE THINGS YOU CAN DO WITH NLTK +.sp +Tokenize and tag some text: +.sp +.nf +.ft C +>>> import nltk +>>> sentence = """At eight o\(aqclock on Thursday morning +\&... Arthur didn\(aqt feel very good.""" +>>> tokens = nltk.word_tokenize(sentence) +>>> tokens +[\(aqAt\(aq, \(aqeight\(aq, "o\(aqclock", \(aqon\(aq, \(aqThursday\(aq, \(aqmorning\(aq, +\(aqArthur\(aq, \(aqdid\(aq, "n\(aqt", \(aqfeel\(aq, \(aqvery\(aq, \(aqgood\(aq, \(aq.\(aq] +>>> tagged = nltk.pos_tag(tokens) +>>> tagged[0:6] +[(\(aqAt\(aq, \(aqIN\(aq), (\(aqeight\(aq, \(aqCD\(aq), ("o\(aqclock", \(aqJJ\(aq), (\(aqon\(aq, \(aqIN\(aq), +(\(aqThursday\(aq, \(aqNNP\(aq), (\(aqmorning\(aq, \(aqNN\(aq)] +.ft P +.fi +.sp +Identify named entities: +.sp +.nf +.ft C +>>> entities = nltk.chunk.ne_chunk(tagged) +>>> entities +Tree(\(aqS\(aq, [(\(aqAt\(aq, \(aqIN\(aq), (\(aqeight\(aq, \(aqCD\(aq), ("o\(aqclock", \(aqJJ\(aq), + (\(aqon\(aq, \(aqIN\(aq), (\(aqThursday\(aq, \(aqNNP\(aq), (\(aqmorning\(aq, \(aqNN\(aq), + Tree(\(aqPERSON\(aq, [(\(aqArthur\(aq, \(aqNNP\(aq)]), + (\(aqdid\(aq, \(aqVBD\(aq), ("n\(aqt", \(aqRB\(aq), (\(aqfeel\(aq, \(aqVB\(aq), + (\(aqvery\(aq, \(aqRB\(aq), (\(aqgood\(aq, \(aqJJ\(aq), (\(aq.\(aq, \(aq.\(aq)]) +.ft P +.fi +.sp +Display a parse tree: +.sp +.nf +.ft C +>>> from nltk.corpus import treebank +>>> t = treebank.parsed_sents(\(aqwsj_0001.mrg\(aq)[0] +>>> t.draw() +.ft P +.fi +[image] +.sp +NB. If you publish work that uses NLTK, please cite the NLTK book as +follows: +.INDENT 0.0 +.INDENT 3.5 +Bird, Steven, Edward Loper and Ewan Klein (2009), \fINatural Language Processing with Python\fP\&. O\(aqReilly Media Inc. +.UNINDENT +.UNINDENT +.SH NEXT STEPS +.INDENT 0.0 +.IP \(bu 2 +\fI\%sign up for release announcements\fP +.IP \(bu 2 +\fI\%join in the discussion\fP +.UNINDENT +.SH NLTK NEWS +.SS 2021 +.INDENT 0.0 +.TP +.B NLTK 3.6.3 release: September 2021 +Drop support for Python 3.5, +added pre\-commit hooks (isort, pyupgrade, black), +improvements to WordNet visualization, RIBES score, edit_distance, +METEOR score, Punkt, language model package, TweetTokenizer, +code and comment cleanups, +CI tests now also run on Windows, +moved from Travis CI to GitHub Actions +.TP +.B NLTK 3.6.2 release: April 2021 +Minor enhancements +.TP +.B NLTK 3.6 release: April 2021 +Add support for Python 3.9 +Minor enhancements, bug fixes, code cleanups, efficiency improvements +.UNINDENT +.SS 2020 +.INDENT 0.0 +.TP +.B NLTK 3.5 release: April 2020 +Add support for Python 3.8, drop support for Python 2 +.UNINDENT +.SS 2019 +.INDENT 0.0 +.TP +.B NLTK 3.4.5 release: August 2019 +Fixed security bug in downloader: Zip slip vulnerability \- for the unlikely +situation where a user configures their downloader to use a compromised server +\fI\%https://cve.mitre.org/cgi\-bin/cvename.cgi?name=CVE\-2019\-14751\fP) +.TP +.B NLTK 3.4.4 release: July 2019 +Fix bug in plot function (probability.py) +Add improved PanLex Swadesh corpus reader +.TP +.B NLTK 3.4.3 release: June 2019 +Add Text.generate(), QuadgramAssocMeasures +Add SSP to tokenizers +Return confidence of best tag from AveragedPerceptron +Make plot methods return Axes objects +Minor bug fixes +Update installation instructions +.TP +.B NLTK 3.4.1 release: April 2019 +Add chomsky_normal_form for CFGs +Add meteor score +Add minimum edit/Levenshtein distance based alignment function +Allow access to collocation list via text.collocation_list() +Support corenlp server options +Drop support for Python 3.4 +Other minor fixes +.UNINDENT +.SS 2018 +.INDENT 0.0 +.TP +.B NLTK 3.4 release: November 2018 +Support Python 3.7, +New Language Modeling package, +Cistem Stemmer for German, +Support Russian National Corpus incl POS tag model, +Krippendorf Alpha inter\-rater reliability test, +Comprehensive code clean\-ups, +Switch continuous integration from Jenkins to Travis +.TP +.B NLTK 3.3 release: May 2018 +Support Python 3.6, +New interface to CoreNLP, +Support synset retrieval by sense key, +Minor fixes to CoNLL Corpus Reader, AlignedSent, +Fixed minor inconsistencies in APIs and API documentation, +Better conformance to PEP8, +Drop Moses Tokenizer (incompatible license) +.UNINDENT +.SS 2017 +.INDENT 0.0 +.TP +.B NLTK 3.2.5 release: September 2017 +Arabic stemmers (ARLSTem, Snowball), +NIST MT evaluation metric and added NIST international_tokenize, +Moses tokenizer, +Document Russian tagger, +Fix to Stanford segmenter, +Improve treebank detokenizer, VerbNet, Vader, +Misc code and documentation cleanups, +Implement fixes suggested by LGTM +.TP +.B NLTK 3.2.4 released: May 2017 +Remove load\-time dependency on Python requests library, +Add support for Arabic in StanfordSegmenter +.TP +.B NLTK 3.2.3 released: May 2017 +Interface to Stanford CoreNLP Web API, improved Lancaster stemmer, +improved Treebank tokenizer, support custom tab files for extending WordNet, +speed up TnT tagger, speed up FreqDist and ConditionalFreqDist, +new corpus reader for MWA subset of PPDB; improvements to testing framework +.UNINDENT +.SS 2016 +.INDENT 0.0 +.TP +.B NLTK 3.2.2 released: December 2016 +Support for Aline, ChrF and GLEU MT evaluation metrics, +Russian POS tagger model, Moses detokenizer, +rewrite Porter Stemmer and FrameNet corpus reader, +update FrameNet Corpus to version 1.7, +fixes: stanford_segmenter.py, SentiText, CoNLL Corpus Reader, +BLEU, naivebayes, Krippendorff\(aqs alpha, Punkt, Moses tokenizer, +TweetTokenizer, ToktokTokenizer; +improvements to testing framework +.TP +.B NLTK 3.2.1 released: April 2016 +Support for CCG semantics, Stanford segmenter, VADER lexicon; +Fixes to BLEU score calculation, CHILDES corpus reader. +.TP +.B NLTK 3.2 released +March 2016 +Fixes for Python 3.5, code cleanups now Python 2.6 is no longer +supported, support for PanLex, support for third party download +locations for NLTK data, new support for RIBES score, BLEU +smoothing, corpus\-level BLEU, improvements to TweetTokenizer, +updates for Stanford API, add mathematical operators to +ConditionalFreqDist, fix bug in sentiwordnet for adjectives, +improvements to documentation, code cleanups, consistent handling +of file paths for cross\-platform operation. +.UNINDENT +.SS 2015 +.INDENT 0.0 +.TP +.B NLTK 3.1 released +October 2015 +Add support for Python 3.5, drop support for Python 2.6, +sentiment analysis package and several corpora, +improved POS tagger, Twitter package, +multi\-word expression tokenizer, +wrapper for Stanford Neural Dependency Parser, +improved translation/alignment module including stack decoder, +skipgram and everygram methods, +Multext East Corpus and MTECorpusReader, +minor bugfixes and enhancements +.TP +.B NLTK 3.0.5 released +September 2015 +New Twitter package; updates to IBM models 1\-3, new models 4 and 5, +minor bugfixes and enhancements +.TP +.B NLTK 3.0.4 released +July 2015 +Minor bugfixes and enhancements. +.TP +.B NLTK 3.0.3 released +June 2015 +PanLex Swadesh Corpus, tgrep tree search, minor bugfixes. +.TP +.B NLTK 3.0.2 released +March 2015 +Senna, BLLIP, python\-crfsuite interfaces, transition\-based dependency parsers, +dependency graph visualization, NKJP corpus reader, minor bugfixes and clean\-ups. +.TP +.B NLTK 3.0.1 released +January 2015 +Minor packaging update. +.UNINDENT +.SS 2014 +.INDENT 0.0 +.TP +.B NLTK 3.0.0 released +September 2014 +Minor bugfixes. +.TP +.B NLTK 3.0.0b2 released +August 2014 +Minor bugfixes and clean\-ups. +.TP +.B NLTK Book Updates +July 2014 +The NLTK book is being updated for Python 3 and NLTK 3 \fI\%here\fP\&. +The original Python 2 edition is still available \fI\%here\fP\&. +.TP +.B NLTK 3.0.0b1 released +July 2014 +FrameNet, SentiWordNet, universal tagset, misc efficiency improvements and bugfixes +Several API changes, see \fI\%https://github.com/nltk/nltk/wiki/Porting\-your\-code\-to\-NLTK\-3.0\fP +.TP +.B NLTK 3.0a4 released +June 2014 +FrameNet, universal tagset, misc efficiency improvements and bugfixes +Several API changes, see \fI\%https://github.com/nltk/nltk/wiki/Porting\-your\-code\-to\-NLTK\-3.0\fP +For full details see: +\fI\%https://github.com/nltk/nltk/blob/develop/ChangeLog\fP +\fI\%http://nltk.org/nltk3\-alpha/\fP +.UNINDENT +.SS 2013 +.INDENT 0.0 +.TP +.B NLTK Book Updates +October 2013 +We are updating the NLTK book for Python 3 and NLTK 3; please see +\fI\%http://nltk.org/book3/\fP +.TP +.B NLTK 3.0a2 released +July 2013 +Misc efficiency improvements and bugfixes; for details see +\fI\%https://github.com/nltk/nltk/blob/develop/ChangeLog\fP +\fI\%http://nltk.org/nltk3\-alpha/\fP +.TP +.B NLTK 3.0a1 released +February 2013 +This version adds support for NLTK\(aqs graphical user interfaces. +\fI\%http://nltk.org/nltk3\-alpha/\fP +.TP +.B NLTK 3.0a0 released +January 2013 +The first alpha release of NLTK 3.0 is now available for testing. This version of NLTK works with Python 2.6, 2.7, and Python 3. +\fI\%http://nltk.org/nltk3\-alpha/\fP +.UNINDENT +.SS 2012 +.INDENT 0.0 +.TP +.B Python Grant +November 2012 +The Python Software Foundation is sponsoring Mikhail Korobov\(aqs work on porting NLTK to Python 3. +\fI\%http://pyfound.blogspot.hu/2012/11/grants\-to\-assist\-kivy\-nltk\-in\-porting.html\fP +.TP +.B NLTK 2.0.4 released +November 2012 +Minor fix to remove numpy dependency. +.TP +.B NLTK 2.0.3 released +September 2012 +This release contains minor improvements and bugfixes. This is the final release compatible with Python 2.5. +.TP +.B NLTK 2.0.2 released +July 2012 +This release contains minor improvements and bugfixes. +.TP +.B NLTK 2.0.1 released +May 2012 +The final release of NLTK 2. +.TP +.B NLTK 2.0.1rc4 released +February 2012 +The fourth release candidate for NLTK 2. +.TP +.B NLTK 2.0.1rc3 released +January 2012 +The third release candidate for NLTK 2. +.UNINDENT +.SS 2011 +.INDENT 0.0 +.TP +.B NLTK 2.0.1rc2 released +December 2011 +The second release candidate for NLTK 2. For full details see the ChangeLog. +.TP +.B NLTK development moved to GitHub +October 2011 +The development site for NLTK has moved from GoogleCode to GitHub: \fI\%http://github.com/nltk\fP +.TP +.B NLTK 2.0.1rc1 released +April 2011 +The first release candidate for NLTK 2. For full details see the ChangeLog. +.UNINDENT +.SS 2010 +.INDENT 0.0 +.TP +.B Python Text Processing with NLTK 2.0 Cookbook +December 2010 +Jacob Perkins has written a 250\-page cookbook full of great recipes for text processing using Python and NLTK, published by Packt Publishing. Some of the royalties are being donated to the NLTK project. +.TP +.B Japanese translation of NLTK book +November 2010 +Masato Hagiwara has translated the NLTK book into Japanese, along with an extra chapter on particular issues with Japanese language process. See \fI\%http://www.oreilly.co.jp/books/9784873114705/\fP\&. +.TP +.B NLTK 2.0b9 released +July 2010 +The last beta release before 2.0 final. For full details see the ChangeLog. +.TP +.B NLTK in Ubuntu 10.4 (Lucid Lynx) +February 2010 +NLTK is now in the latest LTS version of Ubuntu, thanks to the efforts of Robin Munn. See \fI\%http://packages.ubuntu.com/lucid/python/python\-nltk\fP +.TP +.B NLTK 2.0b? released +June 2009 \- February 2010 +Bugfix releases in preparation for 2.0 final. For full details see the ChangeLog. +.UNINDENT +.SS 2009 +.INDENT 0.0 +.TP +.B NLTK Book in second printing +December 2009 +The second print run of Natural Language Processing with Python will go on sale in January. We\(aqve taken the opportunity to make about 40 minor corrections. The online version has been updated. +.TP +.B NLTK Book published +June 2009 +Natural Language Processing with Python, by Steven Bird, Ewan Klein and Edward Loper, has been published by O\(aqReilly Media Inc. It can be purchased in hardcopy, ebook, PDF or for online access, at \fI\%http://oreilly.com/catalog/9780596516499/\fP\&. For information about sellers and prices, see \fI\%https://isbndb.com/d/book/natural_language_processing_with_python/prices.html\fP\&. +.TP +.B Version 0.9.9 released +May 2009 +This version finalizes NLTK\(aqs API ahead of the 2.0 release and the publication of the NLTK book. There have been dozens of minor enhancements and bugfixes. Many names of the form nltk.foo.Bar are now available as nltk.Bar. There is expanded functionality in the decision tree, collocations, and Toolbox modules. A new translation toy nltk.misc.babelfish has been added. A new module nltk.help gives access to tagset documentation. Fixed imports so NLTK will build and install without Tkinter (for running on servers). New data includes a maximum entropy chunker model and updated grammars. NLTK Contrib includes updates to the coreference package (Joseph Frazee) and the ISRI Arabic stemmer (Hosam Algasaier). The book has undergone substantial editorial corrections ahead of final publication. For full details see the ChangeLog. +.TP +.B Version 0.9.8 released +February 2009 +This version contains a new off\-the\-shelf tokenizer, POS tagger, and named\-entity tagger. A new metrics package includes inter\-annotator agreement scores and various distance and word association measures (Tom Lippincott and Joel Nothman). There\(aqs a new collocations package (Joel Nothman). There are many improvements to the WordNet package and browser (Steven Bethard, Jordan Boyd\-Graber, Paul Bone), and to the semantics and inference packages (Dan Garrette). The NLTK corpus collection now includes the PE08 Parser Evaluation data, and the CoNLL 2007 Basque and Catalan Dependency Treebanks. We have added an interface for dependency treebanks. Many chapters of the book have been revised in response to feedback from readers. For full details see the ChangeLog. NB some method names have been changed for consistency and simplicity. Use of old names will generate deprecation warnings that indicate the correct name to use. +.UNINDENT +.SS 2008 +.INDENT 0.0 +.TP +.B Version 0.9.7 released +December 2008 +This version contains fixes to the corpus downloader (see instructions) enabling NLTK corpora to be released independently of the software, and to be stored in compressed format. There are improvements in the grammars, chart parsers, probability distributions, sentence segmenter, text classifiers and RTE classifier. There are many further improvements to the book. For full details see the ChangeLog. +.TP +.B Version 0.9.6 released +December 2008 +This version has an incremental corpus downloader (see instructions) enabling NLTK corpora to be released independently of the software. A new WordNet interface has been developed by Steven Bethard (details). NLTK now has support for dependency parsing, developed by Jason Narad (sponsored by Google Summer of Code). There are many enhancements to the semantics and inference packages, contributed by Dan Garrette. The frequency distribution classes have new support for tabulation and plotting. The Brown Corpus reader has human readable category labels instead of letters. A new Swadesh Corpus containing comparative wordlists has been added. NLTK\-Contrib includes a TIGERSearch implementation for searching treebanks (Torsten Marek). Most chapters of the book have been substantially revised. +.TP +.B The NLTK Project has moved +November 2008 +The NLTK project has moved to Google Sites, Google Code and Google Groups. Content for users and the nltk.org domain is hosted on Google Sites. The home of NLTK development is now Google Code. All discussion lists are at Google Groups. Our old site at nltk.sourceforge.net will continue to be available while we complete this transition. Old releases are still available via our SourceForge release page. We\(aqre grateful to SourceForge for hosting our project since its inception in 2001. +.TP +.B Version 0.9.5 released +August 2008 +This version contains several low\-level changes to facilitate installation, plus updates to several NLTK\-Contrib projects. A new text module gives easy access to text corpora for newcomers to NLP. For full details see the ChangeLog. +.TP +.B Version 0.9.4 released +August 2008 +This version contains a substantially expanded semantics package contributed by Dan Garrette, improvements to the chunk, tag, wordnet, tree and feature\-structure modules, Mallet interface, ngram language modeling, new GUI tools (WordNet? browser, chunking, POS\-concordance). The data distribution includes the new NPS Chat Corpus. NLTK\-Contrib includes the following new packages (still undergoing active development) NLG package (Petro Verkhogliad), dependency parsers (Jason Narad), coreference (Joseph Frazee), CCG parser (Graeme Gange), and a first order resolution theorem prover (Dan Garrette). For full details see the ChangeLog. +.TP +.B NLTK presented at ACL conference +June 2008 +A paper on teaching courses using NLTK will be presented at the ACL conference: Multidisciplinary Instruction with the Natural Language Toolkit +.TP +.B Version 0.9.3 released +June 2008 +This version contains an improved WordNet? similarity module using pre\-built information content files (included in the corpus distribution), new/improved interfaces to Weka, MEGAM and Prover9/Mace4 toolkits, improved Unicode support for corpus readers, a BNC corpus reader, and a rewrite of the Punkt sentence segmenter contributed by Joel Nothman. NLTK\-Contrib includes an implementation of incremental algorithm for generating referring expression contributed by Margaret Mitchell. For full details see the ChangeLog. +.TP +.B NLTK presented at LinuxFest Northwest +April 2008 +Sean Boisen presented NLTK at LinuxFest Northwest, which took place in Bellingham, Washington. His presentation slides are available at: \fI\%http://semanticbible.com/other/talks/2008/nltk/main.html\fP +.TP +.B NLTK in Google Summer of Code +April 2008 +Google Summer of Code will sponsor two NLTK projects. Jason Narad won funding for a project on dependency parsers in NLTK (mentored by Sebastian Riedel and Jason Baldridge). Petro Verkhogliad won funding for a project on natural language generation in NLTK (mentored by Robert Dale and Edward Loper). +.TP +.B Python Software Foundation adopts NLTK for Google Summer of Code application +March 2008 +The Python Software Foundation has listed NLTK projects for sponsorship from the 2008 Google Summer of Code program. For details please see \fI\%http://wiki.python.org/moin/SummerOfCode\fP\&. +.TP +.B Version 0.9.2 released +March 2008 +This version contains a new inference module linked to the Prover9/Mace4 theorem\-prover and model checker (Dan Garrette, Ewan Klein). It also includes the VerbNet? and PropBank? corpora along with corpus readers. A bug in the Reuters corpus reader has been fixed. NLTK\-Contrib includes new work on the WordNet? browser (Jussi Salmela). For full details see the ChangeLog +.TP +.B Youtube video about NLTK +January 2008 +The video from of the NLTK talk at the Bay Area Python Interest Group last July has been posted at \fI\%http://www.youtube.com/watch?v=keXW_5\-llD0\fP (1h15m) +.TP +.B Version 0.9.1 released +January 2008 +This version contains new support for accessing text categorization corpora, along with several corpora categorized for topic, genre, question type, or sentiment. It includes several new corpora: Question classification data (Li & Roth), Reuters 21578 Corpus, Movie Reviews corpus (Pang & Lee), Recognising Textual Entailment (RTE) Challenges. NLTK\-Contrib includes expanded support for semantics (Dan Garrette), readability scoring (Thomas Jakobsen, Thomas Skardal), and SIL Toolbox (Greg Aumann). The book contains many improvements in early chapters in response to reader feedback. For full details see the ChangeLog. +.UNINDENT +.SS 2007 +.INDENT 0.0 +.TP +.B NLTK\-Lite 0.9 released +October 2007 +This version is substantially revised and expanded from version 0.8. The entire toolkit can be accessed via a single import statement "import nltk", and there is a more convenient naming scheme. Calling deprecated functions generates messages that help programmers update their code. The corpus, tagger, and classifier modules have been redesigned. All functionality of the old NLTK 1.4.3 is now covered by NLTK\-Lite 0.9. The book has been revised and expanded. A new data package incorporates the existing corpus collection and contains new sections for pre\-specified grammars and pre\-computed models. Several new corpora have been added, including treebanks for Portuguese, Spanish, Catalan and Dutch. A Macintosh distribution is provided. For full details see the ChangeLog. +.TP +.B NLTK\-Lite 0.9b2 released +September 2007 +This version is substantially revised and expanded from version 0.8. The entire toolkit can be accessed via a single import statement "import nltk", and many common NLP functions accessed directly, e.g. nltk.PorterStemmer?, nltk.ShiftReduceParser?. The corpus, tagger, and classifier modules have been redesigned. The book has been revised and expanded, and the chapters have been reordered. NLTK has a new data package incorporating the existing corpus collection and adding new sections for pre\-specified grammars and pre\-computed models. The Floresta Portuguese Treebank has been added. Release 0.9b2 fixes several minor problems with 0.9b1 and removes the numpy dependency. It includes a new corpus and corpus reader for Brazilian Portuguese news text (MacMorphy?) and an improved corpus reader for the Sinica Treebank, and a trained model for Portuguese sentence segmentation. +.TP +.B NLTK\-Lite 0.9b1 released +August 2007 +This version is substantially revised and expanded from version 0.8. The entire toolkit can be accessed via a single import statement "import nltk", and many common NLP functions accessed directly, e.g. nltk.PorterStemmer?, nltk.ShiftReduceParser?. The corpus, tagger, and classifier modules have been redesigned. The book has been revised and expanded, and the chapters have been reordered. NLTK has a new data package incorporating the existing corpus collection and adding new sections for pre\-specified grammars and pre\-computed models. The Floresta Portuguese Treebank has been added. For full details see the ChangeLog?. +.TP +.B NLTK talks in São Paulo +August 2007 +Steven Bird will present NLTK in a series of talks at the First Brazilian School on Computational Linguistics, at the University of São Paulo in the first week of September. +.TP +.B NLTK talk in Bay Area +July 2007 +Steven Bird, Ewan Klein, and Edward Loper will present NLTK at the Bay Area Python Interest Group, at Google on Thursday 12 July. +.TP +.B NLTK\-Lite 0.8 released +July 2007 +This version is substantially revised and expanded from version 0.7. The code now includes improved interfaces to corpora, chunkers, grammars, frequency distributions, full integration with WordNet? 3.0 and WordNet? similarity measures. The book contains substantial revision of Part I (tokenization, tagging, chunking) and Part II (grammars and parsing). NLTK has several new corpora including the Switchboard Telephone Speech Corpus transcript sample (Talkbank Project), CMU Problem Reports Corpus sample, CONLL2002 POS+NER data, Patient Information Leaflet corpus sample, Indian POS\-Tagged data (Bangla, Hindi, Marathi, Telugu), Shakespeare XML corpus sample, and the Universal Declaration of Human Rights corpus with text samples in 300+ languages. +.TP +.B NLTK features in Language Documentation and Conservation article +July 2007 +An article Managing Fieldwork Data with Toolbox and the Natural Language Toolkit by Stuart Robinson, Greg Aumann, and Steven Bird appears in the inaugural issue of \(aq\(aqLanguage Documentation and Conservation\(aq\(aq. It discusses several small Python programs for manipulating field data. +.TP +.B NLTK features in ACM Crossroads article +May 2007 +An article Getting Started on Natural Language Processing with Python by Nitin Madnani will appear in \(aq\(aqACM Crossroads\(aq\(aq, the ACM Student Journal. It discusses NLTK in detail, and provides several helpful examples including an entertaining free word association program. +.TP +.B NLTK\-Lite 0.7.5 released +May 2007 +This version contains improved interfaces for WordNet 3.0 and WordNet\-Similarity, the Lancaster Stemmer (contributed by Steven Tomcavage), and several new corpora including the Switchboard Telephone Speech Corpus transcript sample (Talkbank Project), CMU Problem Reports Corpus sample, CONLL2002 POS+NER data, Patient Information Leaflet corpus sample and WordNet 3.0 data files. With this distribution WordNet no longer needs to be separately installed. +.TP +.B NLTK\-Lite 0.7.4 released +May 2007 +This release contains new corpora and corpus readers for Indian POS\-Tagged data (Bangla, Hindi, Marathi, Telugu), and the Sinica Treebank, and substantial revision of Part II of the book on structured programming, grammars and parsing. +.TP +.B NLTK\-Lite 0.7.3 released +April 2007 +This release contains improved chunker and PCFG interfaces, the Shakespeare XML corpus sample and corpus reader, improved tutorials and improved formatting of code samples, and categorization of problem sets by difficulty. +.TP +.B NLTK\-Lite 0.7.2 released +March 2007 +This release contains new text classifiers (Cosine, NaiveBayes?, Spearman), contributed by Sam Huston, simple feature detectors, the UDHR corpus with text samples in 300+ languages and a corpus interface; improved tutorials (340 pages in total); additions to contrib area including Kimmo finite\-state morphology system, Lambek calculus system, and a demonstration of text classifiers for language identification. +.TP +.B NLTK\-Lite 0.7.1 released +January 2007 +This release contains bugfixes in the WordNet? and HMM modules. +.UNINDENT +.SS 2006 +.INDENT 0.0 +.TP +.B NLTK\-Lite 0.7 released +December 2006 +This release contains: new semantic interpretation package (Ewan Klein), new support for SIL Toolbox format (Greg Aumann), new chunking package including cascaded chunking (Steven Bird), new interface to WordNet? 2.1 and Wordnet similarity measures (David Ormiston Smith), new support for Penn Treebank format (Yoav Goldberg), bringing the codebase to 48,000 lines; substantial new chapters on semantic interpretation and chunking, and substantial revisions to several other chapters, bringing the textbook documentation to 280 pages; +.TP +.B NLTK\-Lite 0.7b1 released +December 2006 +This release contains: new semantic interpretation package (Ewan Klein), new support for SIL Toolbox format (Greg Aumann), new chunking package including cascaded chunking, wordnet package updated for version 2.1 of Wordnet, and prototype wordnet similarity measures (David Ormiston Smith), bringing the codebase to 48,000 lines; substantial new chapters on semantic interpretation and chunking, and substantial revisions to several other chapters, bringing the textbook documentation to 270 pages; +.TP +.B NLTK\-Lite 0.6.6 released +October 2006 +This release contains bugfixes, improvements to Shoebox file format support, and expanded tutorial discussions of programming and feature\-based grammars. +.TP +.B NLTK\-Lite 0.6.5 released +July 2006 +This release contains improvements to Shoebox file format support (by Stuart Robinson and Greg Aumann); an implementation of hole semantics (by Peter Wang); improvements to lambda calculus and semantic interpretation modules (by Ewan Klein); a new corpus (Sinica Treebank sample); and expanded tutorial discussions of trees, feature\-based grammar, unification, PCFGs, and more exercises. +.TP +.B NLTK\-Lite passes 10k download milestone +May 2006 +We have now had 10,000 downloads of NLTK\-Lite in the nine months since it was first released. +.TP +.B NLTK\-Lite 0.6.4 released +April 2006 +This release contains new corpora (Senseval 2, TIMIT sample), a clusterer, cascaded chunker, and several substantially revised tutorials. +.UNINDENT +.SS 2005 +.INDENT 0.0 +.TP +.B NLTK 1.4 no longer supported +December 2005 +The main development has switched to NLTK\-Lite. The latest version of NLTK can still be downloaded; see the installation page for instructions. +.TP +.B NLTK\-Lite 0.6 released +November 2005 +contains bug\-fixes, PDF versions of tutorials, expanded fieldwork tutorial, PCFG grammar induction (by Nathan Bodenstab), and prototype concordance and paradigm display tools (by Peter Spiller and Will Hardy). +.TP +.B NLTK\-Lite 0.5 released +September 2005 +contains bug\-fixes, improved tutorials, more project suggestions, and a pronunciation dictionary. +.TP +.B NLTK\-Lite 0.4 released +September 2005 +contains bug\-fixes, improved tutorials, more project suggestions, and probabilistic parsers. +.TP +.B NLTK\-Lite 0.3 released +August 2005 +contains bug\-fixes, documentation clean\-up, project suggestions, and the chart parser demos including one for Earley parsing by Jean Mark Gawron. +.TP +.B NLTK\-Lite 0.2 released +July 2005 +contains bug\-fixes, documentation clean\-up, and some translations of tutorials into Brazilian Portuguese by Tiago Tresoldi. +.TP +.B NLTK\-Lite 0.1 released +July 2005 +substantially simplified and streamlined version of NLTK has been released +.TP +.B Brazilian Portuguese Translation +April 2005 +top\-level pages of this website have been translated into Brazilian Portuguese by Tiago Tresoldi; translations of the tutorials are in preparation \fI\%http://hermes.sourceforge.net/nltk\-br/\fP +.TP +.B 1.4.3 Release +February 2005 +NLTK 1.4.3 has been released; this is the first version which is compatible with Python 2.4. +.UNINDENT +.SH INSTALLING NLTK +.sp +NLTK requires Python versions 3.6, 3.7, 3.8, or 3.9 +.sp +For Windows users, it is strongly recommended that you go through this guide to install Python 3 successfully \fI\%https://docs.python\-guide.org/starting/install3/win/#install3\-windows\fP +.SS Setting up a Python Environment (Mac/Unix/Windows) +.sp +Please go through this guide to learn how to manage your virtual environment managers before you install NLTK, \fI\%https://docs.python\-guide.org/dev/virtualenvs/\fP +.sp +Alternatively, you can use the Anaconda distribution installer that comes "batteries included" \fI\%https://www.anaconda.com/distribution/\fP +.SS Mac/Unix +.INDENT 0.0 +.IP 1. 3 +Install NLTK: run \fBpip install \-\-user \-U nltk\fP +.IP 2. 3 +Install Numpy (optional): run \fBpip install \-\-user \-U numpy\fP +.IP 3. 3 +Test installation: run \fBpython\fP then type \fBimport nltk\fP +.UNINDENT +.sp +For older versions of Python it might be necessary to install setuptools (see \fI\%http://pypi.python.org/pypi/setuptools\fP) and to install pip (\fBsudo easy_install pip\fP). +.SS Windows +.sp +These instructions assume that you do not already have Python installed on your machine. +.SS 32\-bit binary installation +.INDENT 0.0 +.IP 1. 3 +Install Python 3.8: \fI\%http://www.python.org/downloads/\fP (avoid the 64\-bit versions) +.IP 2. 3 +Install Numpy (optional): \fI\%https://www.scipy.org/scipylib/download.html\fP +.IP 3. 3 +Install NLTK: \fI\%http://pypi.python.org/pypi/nltk\fP +.IP 4. 3 +Test installation: \fBStart>Python38\fP, then type \fBimport nltk\fP +.UNINDENT +.SS Installing Third\-Party Software +.sp +Please see: \fI\%https://github.com/nltk/nltk/wiki/Installing\-Third\-Party\-Software\fP +.SS Installing NLTK Data +.sp +After installing the NLTK package, please do install the necessary datasets/models for specific functions to work. +.sp +If you\(aqre unsure of which datasets/models you\(aqll need, you can install the "popular" subset of NLTK data, on the command line type \fIpython \-m nltk.downloader popular\fP, or in the Python interpreter \fIimport nltk; nltk.download(\(aqpopular\(aq)\fP +.sp +For details, see \fI\%http://www.nltk.org/data.html\fP +.SH INSTALLING NLTK DATA +.sp +NLTK comes with many corpora, toy grammars, trained models, etc. A complete list is posted at: \fI\%http://nltk.org/nltk_data/\fP +.sp +To install the data, first install NLTK (see \fI\%http://nltk.org/install.html\fP), then use NLTK\(aqs data downloader as described below. +.sp +Apart from individual data packages, you can download the entire collection (using "all"), or just the data required for the examples and exercises in the book (using "book"), or just the corpora and no grammars or trained models (using "all\-corpora"). +.SS Interactive installer +.sp +\fIFor central installation on a multi\-user machine, do the following from an administrator account.\fP +.sp +Run the Python interpreter and type the commands: +.sp +.nf +.ft C +>>> import nltk +>>> nltk.download() +.ft P +.fi +.sp +A new window should open, showing the NLTK Downloader. Click on the File menu and select Change Download Directory. For central installation, set this to \fBC:\enltk_data\fP (Windows), \fB/usr/local/share/nltk_data\fP (Mac), or \fB/usr/share/nltk_data\fP (Unix). Next, select the packages or collections you want to download. +.sp +If you did not install the data to one of the above central locations, you will need to set the \fBNLTK_DATA\fP environment variable to specify the location of the data. (On a Windows machine, right click on "My Computer" then select \fBProperties > Advanced > Environment Variables > User Variables > New...\fP) +.sp +Test that the data has been installed as follows. (This assumes you downloaded the Brown Corpus): +.sp +.nf +.ft C +>>> from nltk.corpus import brown +>>> brown.words() +[\(aqThe\(aq, \(aqFulton\(aq, \(aqCounty\(aq, \(aqGrand\(aq, \(aqJury\(aq, \(aqsaid\(aq, ...] +.ft P +.fi +.SS Installing via a proxy web server +.sp +If your web connection uses a proxy server, you should specify the proxy address as follows. In the case of an authenticating proxy, specify a username and password. If the proxy is set to None then this function will attempt to detect the system proxy. +.sp +.nf +.ft C +>>> nltk.set_proxy(\(aqhttp://proxy.example.com:3128\(aq, (\(aqUSERNAME\(aq, \(aqPASSWORD\(aq)) +>>> nltk.download() +.ft P +.fi +.SS Command line installation +.sp +The downloader will search for an existing \fBnltk_data\fP directory to install NLTK data. If one does not exist it will attempt to create one in a central location (when using an administrator account) or otherwise in the user\(aqs filespace. If necessary, run the download command from an administrator account, or using sudo. The recommended system location is \fBC:\enltk_data\fP (Windows); \fB/usr/local/share/nltk_data\fP (Mac); and \fB/usr/share/nltk_data\fP (Unix). You can use the \fB\-d\fP flag to specify a different location (but if you do this, be sure to set the \fBNLTK_DATA\fP environment variable accordingly). +.sp +Run the command \fBpython \-m nltk.downloader all\fP\&. To ensure central installation, run the command \fBsudo python \-m nltk.downloader \-d /usr/local/share/nltk_data all\fP\&. +.sp +Windows: Use the "Run..." option on the Start menu. Windows Vista users need to first turn on this option, using \fBStart \-> Properties \-> Customize\fP to check the box to activate the "Run..." option. +.sp +Test the installation: Check that the user environment and privileges are set correctly by logging in to a user account, +starting the Python interpreter, and accessing the Brown Corpus (see the previous section). +.SS Manual installation +.sp +Create a folder \fBnltk_data\fP, e.g. \fBC:\enltk_data\fP, or \fB/usr/local/share/nltk_data\fP, +and subfolders \fBchunkers\fP, \fBgrammars\fP, \fBmisc\fP, \fBsentiment\fP, \fBtaggers\fP, \fBcorpora\fP, +\fBhelp\fP, \fBmodels\fP, \fBstemmers\fP, \fBtokenizers\fP\&. +.sp +Download individual packages from \fBhttp://nltk.org/nltk_data/\fP (see the "download" links). +Unzip them to the appropriate subfolder. For example, the Brown Corpus, found at: +\fBhttps://raw.githubusercontent.com/nltk/nltk_data/gh\-pages/packages/corpora/brown.zip\fP +is to be unzipped to \fBnltk_data/corpora/brown\fP\&. +.sp +Set your \fBNLTK_DATA\fP environment variable to point to your top level \fBnltk_data\fP folder. +.SH CONTRIBUTE TO NLTK +.sp +The Natural Language Toolkit exists thanks to the efforts of dozens +of voluntary developers who have contributed functionality and +bugfixes since the project began in 2000 (\fI\%contributors\fP). +Information for contributors: +.INDENT 0.0 +.IP \(bu 2 +\fI\%contributing to NLTK\fP +.IP \(bu 2 +\fI\%desired enhancements\fP +.IP \(bu 2 +\fI\%contribute a corpus\fP +.IP \(bu 2 +\fI\%nltk\-dev mailing list\fP +.IP \(bu 2 +\fI\%GitHub Project\fP +.UNINDENT +.SS NLTK Team +.sp +The NLTK project is led by \fI\%Steven Bird and Liling Tan\fP\&. +Individual packages are maintained by the following people: +.INDENT 0.0 +.TP +.B Semantics +\fI\%Dan Garrette\fP, Austin, USA (\fBnltk.sem, nltk.inference\fP) +.TP +.B Parsing +\fI\%Peter Ljunglöf\fP, Gothenburg, Sweden (\fBnltk.parse, nltk.featstruct\fP) +.TP +.B Metrics +\fI\%Joel Nothman\fP, Sydney, Australia (\fBnltk.metrics, nltk.tokenize.punkt\fP) +.TP +.B Python 3 +\fI\%Mikhail Korobov\fP, Ekaterinburg, Russia +.TP +.B Releases +\fI\%Steven Bird\fP, Melbourne, Australia +.TP +.B NLTK\-Users +\fI\%Alexis Dimitriadis\fP, Utrecht, Netherlands +.UNINDENT +.SH NLTK PACKAGE +.SS Subpackages +.SS nltk.app package +.SS Submodules +.SS nltk.app.chartparser_app module +.SS nltk.app.chunkparser_app module +.SS nltk.app.collocations_app module +.SS nltk.app.concordance_app module +.SS nltk.app.nemo_app module +.SS nltk.app.rdparser_app module +.SS nltk.app.srparser_app module +.SS nltk.app.wordfreq_app module +.SS nltk.app.wordnet_app module +.sp +A WordNet Browser application which launches the default browser +(if it is not already running) and opens a new tab with a connection +to \fI\%http://localhost:port/\fP . It also starts an HTTP server on the +specified port and begins serving browser requests. The default +port is 8000. (For command\-line help, run "python wordnet \-h") +This application requires that the user\(aqs web browser supports +Javascript. +.sp +BrowServer is a server for browsing the NLTK Wordnet database It first +launches a browser client to be used for browsing and then starts +serving the requests of that and maybe other clients +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +browserver.py \-h +browserver.py [\-s] [\-p ] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Options: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +\-h or \-\-help + Display this help message. + +\-l or \-\-log\-file + Logs messages to the given file, If this option is not specified + messages are silently dropped. + +\-p or \-\-port + Run the web server on this TCP port, defaults to 8000. + +\-s or \-\-server\-mode + Do not start a web browser, and do not allow a user to + shutdown the server through the web interface. +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.app.wordnet_app.app() +.UNINDENT +.SS Module contents +.sp +Interactive NLTK Applications: +.sp +chartparser: Chart Parser +chunkparser: Regular\-Expression Chunk Parser +collocations: Find collocations in text +concordance: Part\-of\-speech concordancer +nemo: Finding (and Replacing) Nemo regular expression tool +rdparser: Recursive Descent Parser +srparser: Shift\-Reduce Parser +wordnet: WordNet Browser +.SS nltk.ccg package +.SS Submodules +.SS nltk.ccg.api module +.INDENT 0.0 +.TP +.B class nltk.ccg.api.AbstractCCGCategory +Bases: \fBobject\fP +.sp +Interface for categories in combinatory grammars. +.INDENT 7.0 +.TP +.B abstract can_unify(other) +.INDENT 7.0 +.TP +.B Determines whether two categories can be unified. +.INDENT 7.0 +.IP \(bu 2 +Returns None if they cannot be unified +.IP \(bu 2 +Returns a list of necessary substitutions if they can. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract is_function() +Returns true if the category is a function application. +.UNINDENT +.INDENT 7.0 +.TP +.B abstract is_primitive() +Returns true if the category is primitive. +.UNINDENT +.INDENT 7.0 +.TP +.B abstract is_var() +Returns true if the category is a variable. +.UNINDENT +.INDENT 7.0 +.TP +.B abstract substitute(substitutions) +Takes a set of (var, category) substitutions, and replaces every +occurrence of the variable with the corresponding category. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.api.CCGVar(prim_only=False) +Bases: \fI\%nltk.ccg.api.AbstractCCGCategory\fP +.sp +Class representing a variable CCG category. +Used for conjunctions (and possibly type\-raising, if implemented as a +unary rule). +.INDENT 7.0 +.TP +.B can_unify(other) +If the variable can be replaced with other +a substitution is returned. +.UNINDENT +.INDENT 7.0 +.TP +.B id() +.UNINDENT +.INDENT 7.0 +.TP +.B is_function() +Returns true if the category is a function application. +.UNINDENT +.INDENT 7.0 +.TP +.B is_primitive() +Returns true if the category is primitive. +.UNINDENT +.INDENT 7.0 +.TP +.B is_var() +Returns true if the category is a variable. +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod new_id() +A class method allowing generation of unique variable identifiers. +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod reset_id() +.UNINDENT +.INDENT 7.0 +.TP +.B substitute(substitutions) +If there is a substitution corresponding to this variable, +return the substituted category. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.api.Direction(dir, restrictions) +Bases: \fBobject\fP +.sp +Class representing the direction of a function application. +Also contains maintains information as to which combinators +may be used with the category. +.INDENT 7.0 +.TP +.B can_compose() +.UNINDENT +.INDENT 7.0 +.TP +.B can_cross() +.UNINDENT +.INDENT 7.0 +.TP +.B can_unify(other) +.UNINDENT +.INDENT 7.0 +.TP +.B dir() +.UNINDENT +.INDENT 7.0 +.TP +.B is_backward() +.UNINDENT +.INDENT 7.0 +.TP +.B is_forward() +.UNINDENT +.INDENT 7.0 +.TP +.B is_variable() +.UNINDENT +.INDENT 7.0 +.TP +.B restrs() +A list of restrictions on the combinators. +\(aq.\(aq denotes that permuting operations are disallowed +\(aq,\(aq denotes that function composition is disallowed +\(aq_\(aq denotes that the direction has variable restrictions. +(This is redundant in the current implementation of type\-raising) +.UNINDENT +.INDENT 7.0 +.TP +.B substitute(subs) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.api.FunctionalCategory(res, arg, dir) +Bases: \fI\%nltk.ccg.api.AbstractCCGCategory\fP +.sp +Class that represents a function application category. +Consists of argument and result categories, together with +an application direction. +.INDENT 7.0 +.TP +.B arg() +.UNINDENT +.INDENT 7.0 +.TP +.B can_unify(other) +.INDENT 7.0 +.TP +.B Determines whether two categories can be unified. +.INDENT 7.0 +.IP \(bu 2 +Returns None if they cannot be unified +.IP \(bu 2 +Returns a list of necessary substitutions if they can. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B dir() +.UNINDENT +.INDENT 7.0 +.TP +.B is_function() +Returns true if the category is a function application. +.UNINDENT +.INDENT 7.0 +.TP +.B is_primitive() +Returns true if the category is primitive. +.UNINDENT +.INDENT 7.0 +.TP +.B is_var() +Returns true if the category is a variable. +.UNINDENT +.INDENT 7.0 +.TP +.B res() +.UNINDENT +.INDENT 7.0 +.TP +.B substitute(subs) +Takes a set of (var, category) substitutions, and replaces every +occurrence of the variable with the corresponding category. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.api.PrimitiveCategory(categ, restrictions=[]) +Bases: \fI\%nltk.ccg.api.AbstractCCGCategory\fP +.sp +Class representing primitive categories. +Takes a string representation of the category, and a +list of strings specifying the morphological subcategories. +.INDENT 7.0 +.TP +.B can_unify(other) +.INDENT 7.0 +.TP +.B Determines whether two categories can be unified. +.INDENT 7.0 +.IP \(bu 2 +Returns None if they cannot be unified +.IP \(bu 2 +Returns a list of necessary substitutions if they can. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B categ() +.UNINDENT +.INDENT 7.0 +.TP +.B is_function() +Returns true if the category is a function application. +.UNINDENT +.INDENT 7.0 +.TP +.B is_primitive() +Returns true if the category is primitive. +.UNINDENT +.INDENT 7.0 +.TP +.B is_var() +Returns true if the category is a variable. +.UNINDENT +.INDENT 7.0 +.TP +.B restrs() +.UNINDENT +.INDENT 7.0 +.TP +.B substitute(subs) +Takes a set of (var, category) substitutions, and replaces every +occurrence of the variable with the corresponding category. +.UNINDENT +.UNINDENT +.SS nltk.ccg.chart module +.sp +The lexicon is constructed by calling +\fBlexicon.fromstring()\fP\&. +.sp +In order to construct a parser, you also need a rule set. +The standard English rules are provided in chart as +\fBchart.DefaultRuleSet\fP\&. +.sp +The parser can then be constructed by calling, for example: +\fBparser = chart.CCGChartParser(, )\fP +.sp +Parsing is then performed by running +\fBparser.parse(.split())\fP\&. +.sp +While this returns a list of trees, the default representation +of the produced trees is not very enlightening, particularly +given that it uses the same tree class as the CFG parsers. +It is probably better to call: +\fBchart.printCCGDerivation()\fP +which should print a nice representation of the derivation. +.sp +This entire process is shown far more clearly in the demonstration: +python chart.py +.INDENT 0.0 +.TP +.B class nltk.ccg.chart.BackwardTypeRaiseRule +Bases: \fBnltk.parse.chart.AbstractChartRule\fP +.sp +Class for applying backward type raising. +.INDENT 7.0 +.TP +.B NUMEDGES = 2 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar, left_edge, right_edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.chart.BinaryCombinatorRule(combinator) +Bases: \fBnltk.parse.chart.AbstractChartRule\fP +.sp +Class implementing application of a binary combinator to a chart. +Takes the directed combinator to apply. +.INDENT 7.0 +.TP +.B NUMEDGES = 2 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar, left_edge, right_edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.chart.CCGChart(tokens) +Bases: \fBnltk.parse.chart.Chart\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.chart.CCGChartParser(lexicon, rules, trace=0) +Bases: \fBnltk.parse.api.ParserI\fP +.sp +Chart parser for CCGs. +Based largely on the ChartParser class from NLTK. +.INDENT 7.0 +.TP +.B lexicon() +.UNINDENT +.INDENT 7.0 +.TP +.B parse(tokens) +.INDENT 7.0 +.TP +.B Returns +An iterator that generates parse trees for the sentence. +.UNINDENT +.sp +When possible this list is sorted from most likely to least likely. +.INDENT 7.0 +.TP +.B Parameters +\fBsent\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The sentence to be parsed +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.chart.CCGEdge(span, categ, rule) +Bases: \fBnltk.parse.chart.EdgeI\fP +.INDENT 7.0 +.TP +.B categ() +.UNINDENT +.INDENT 7.0 +.TP +.B dot() +Return this edge\(aqs dot position, which indicates how much of +the hypothesized structure is consistent with the +sentence. In particular, \fBself.rhs[:dot]\fP is consistent +with \fBtokens[self.start():self.end()]\fP\&. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B end() +Return the end index of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_complete() +Return True if this edge\(aqs structure is fully consistent +with the text. +.INDENT 7.0 +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_incomplete() +Return True if this edge\(aqs structure is partially consistent +with the text. +.INDENT 7.0 +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B length() +Return the length of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lhs() +Return this edge\(aqs left\-hand side, which specifies what kind +of structure is hypothesized by this edge. +.INDENT 7.0 +.TP +.B See +\fBTreeEdge\fP and \fBLeafEdge\fP for a description of +the left\-hand side values for each edge type. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B nextsym() +Return the element of this edge\(aqs right\-hand side that +immediately follows its dot. +.INDENT 7.0 +.TP +.B Return type +Nonterminal or terminal or None +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B rhs() +Return this edge\(aqs right\-hand side, which specifies +the content of the structure hypothesized by this edge. +.INDENT 7.0 +.TP +.B See +\fBTreeEdge\fP and \fBLeafEdge\fP for a description of +the right\-hand side values for each edge type. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B rule() +.UNINDENT +.INDENT 7.0 +.TP +.B span() +Return a tuple \fB(s, e)\fP, where \fBtokens[s:e]\fP is the +portion of the sentence that is consistent with this +edge\(aqs structure. +.INDENT 7.0 +.TP +.B Return type +tuple(int, int) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B start() +Return the start index of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.chart.CCGLeafEdge(pos, token, leaf) +Bases: \fBnltk.parse.chart.EdgeI\fP +.sp +Class representing leaf edges in a CCG derivation. +.INDENT 7.0 +.TP +.B categ() +.UNINDENT +.INDENT 7.0 +.TP +.B dot() +Return this edge\(aqs dot position, which indicates how much of +the hypothesized structure is consistent with the +sentence. In particular, \fBself.rhs[:dot]\fP is consistent +with \fBtokens[self.start():self.end()]\fP\&. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B end() +Return the end index of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_complete() +Return True if this edge\(aqs structure is fully consistent +with the text. +.INDENT 7.0 +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_incomplete() +Return True if this edge\(aqs structure is partially consistent +with the text. +.INDENT 7.0 +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B leaf() +.UNINDENT +.INDENT 7.0 +.TP +.B length() +Return the length of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lhs() +Return this edge\(aqs left\-hand side, which specifies what kind +of structure is hypothesized by this edge. +.INDENT 7.0 +.TP +.B See +\fBTreeEdge\fP and \fBLeafEdge\fP for a description of +the left\-hand side values for each edge type. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B nextsym() +Return the element of this edge\(aqs right\-hand side that +immediately follows its dot. +.INDENT 7.0 +.TP +.B Return type +Nonterminal or terminal or None +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B rhs() +Return this edge\(aqs right\-hand side, which specifies +the content of the structure hypothesized by this edge. +.INDENT 7.0 +.TP +.B See +\fBTreeEdge\fP and \fBLeafEdge\fP for a description of +the right\-hand side values for each edge type. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B span() +Return a tuple \fB(s, e)\fP, where \fBtokens[s:e]\fP is the +portion of the sentence that is consistent with this +edge\(aqs structure. +.INDENT 7.0 +.TP +.B Return type +tuple(int, int) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B start() +Return the start index of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B token() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.chart.ForwardTypeRaiseRule +Bases: \fBnltk.parse.chart.AbstractChartRule\fP +.sp +Class for applying forward type raising +.INDENT 7.0 +.TP +.B NUMEDGES = 2 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar, left_edge, right_edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.ccg.chart.compute_semantics(children, edge) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.ccg.chart.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.ccg.chart.printCCGDerivation(tree) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.ccg.chart.printCCGTree(lwidth, tree) +.UNINDENT +.SS nltk.ccg.combinator module +.sp +CCG Combinators +.INDENT 0.0 +.TP +.B class nltk.ccg.combinator.BackwardCombinator(combinator, predicate, suffix=\(aq\(aq) +Bases: \fI\%nltk.ccg.combinator.DirectedBinaryCombinator\fP +.sp +The backward equivalent of the ForwardCombinator class. +.INDENT 7.0 +.TP +.B can_combine(left, right) +.UNINDENT +.INDENT 7.0 +.TP +.B combine(left, right) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.combinator.DirectedBinaryCombinator +Bases: \fBobject\fP +.sp +Wrapper for the undirected binary combinator. +It takes left and right categories, and decides which is to be +the function, and which the argument. +It then decides whether or not they can be combined. +.INDENT 7.0 +.TP +.B abstract can_combine(left, right) +.UNINDENT +.INDENT 7.0 +.TP +.B abstract combine(left, right) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.combinator.ForwardCombinator(combinator, predicate, suffix=\(aq\(aq) +Bases: \fI\%nltk.ccg.combinator.DirectedBinaryCombinator\fP +.sp +Class representing combinators where the primary functor is on the left. +.sp +Takes an undirected combinator, and a predicate which adds constraints +restricting the cases in which it may apply. +.INDENT 7.0 +.TP +.B can_combine(left, right) +.UNINDENT +.INDENT 7.0 +.TP +.B combine(left, right) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.combinator.UndirectedBinaryCombinator +Bases: \fBobject\fP +.sp +Abstract class for representing a binary combinator. +Merely defines functions for checking if the function and argument +are able to be combined, and what the resulting category is. +.sp +Note that as no assumptions are made as to direction, the unrestricted +combinators can perform all backward, forward and crossed variations +of the combinators; these restrictions must be added in the rule +class. +.INDENT 7.0 +.TP +.B abstract can_combine(function, argument) +.UNINDENT +.INDENT 7.0 +.TP +.B abstract combine(function, argument) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.combinator.UndirectedComposition +Bases: \fI\%nltk.ccg.combinator.UndirectedBinaryCombinator\fP +.sp +Functional composition (harmonic) combinator. +Implements rules of the form +X/Y Y/Z \-> X/Z (B>) +And the corresponding backwards and crossed variations. +.INDENT 7.0 +.TP +.B can_combine(function, argument) +.UNINDENT +.INDENT 7.0 +.TP +.B combine(function, argument) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.combinator.UndirectedFunctionApplication +Bases: \fI\%nltk.ccg.combinator.UndirectedBinaryCombinator\fP +.sp +Class representing function application. +Implements rules of the form: +X/Y Y \-> X (>) +And the corresponding backwards application rule +.INDENT 7.0 +.TP +.B can_combine(function, argument) +.UNINDENT +.INDENT 7.0 +.TP +.B combine(function, argument) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.ccg.combinator.UndirectedSubstitution +Bases: \fI\%nltk.ccg.combinator.UndirectedBinaryCombinator\fP +.sp +Substitution (permutation) combinator. +Implements rules of the form +Y/Z (XY)/Z \-> X/Z ( category {semantics} +e.g. eat => Svar[pl]/var {x y.eat(x,y)} +.INDENT 7.0 +.IP \(bu 2 +\fItoken\fP (string) +.IP \(bu 2 +\fIcateg\fP (string) +.IP \(bu 2 +\fIsemantics\fP (Expression) +.UNINDENT +.INDENT 7.0 +.TP +.B categ() +.UNINDENT +.INDENT 7.0 +.TP +.B semantics() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.ccg.lexicon.augParseCategory(line, primitives, families, var=None) +Parse a string representing a category, and returns a tuple with +(possibly) the CCG variable for the category +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.ccg.lexicon.fromstring(lex_str, include_semantics=False) +Convert string representation into a lexicon for CCGs. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.ccg.lexicon.matchBrackets(string) +Separate the contents matching the first set of brackets from the rest of +the input. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.ccg.lexicon.nextCategory(string) +Separate the string for the next portion of the category from the rest +of the string +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.ccg.lexicon.parseApplication(app) +Parse an application operator +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.ccg.lexicon.parsePrimitiveCategory(chunks, primitives, families, var) +Parse a primitive category +.sp +If the primitive is the special category \(aqvar\(aq, replace it with the +correct \fICCGVar\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.ccg.lexicon.parseSubscripts(subscr) +Parse the subscripts for a primitive category +.UNINDENT +.SS nltk.ccg.logic module +.sp +Helper functions for CCG semantics computation +.INDENT 0.0 +.TP +.B nltk.ccg.logic.compute_composition_semantics(function, argument) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.ccg.logic.compute_function_semantics(function, argument) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.ccg.logic.compute_substitution_semantics(function, argument) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.ccg.logic.compute_type_raised_semantics(semantics) +.UNINDENT +.SS Module contents +.sp +Combinatory Categorial Grammar. +.sp +For more information see nltk/doc/contrib/ccg/ccg.pdf +.SS nltk.chat package +.SS Submodules +.SS nltk.chat.eliza module +.INDENT 0.0 +.TP +.B nltk.chat.eliza.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chat.eliza.eliza_chat() +.UNINDENT +.SS nltk.chat.iesha module +.sp +This chatbot is a tongue\-in\-cheek take on the average teen +anime junky that frequents YahooMessenger or MSNM. +All spelling mistakes and flawed grammar are intentional. +.INDENT 0.0 +.TP +.B nltk.chat.iesha.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chat.iesha.iesha_chat() +.UNINDENT +.SS nltk.chat.rude module +.INDENT 0.0 +.TP +.B nltk.chat.rude.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chat.rude.rude_chat() +.UNINDENT +.SS nltk.chat.suntsu module +.sp +Tsu bot responds to all queries with a Sun Tsu sayings +.sp +Quoted from Sun Tsu\(aqs The Art of War +Translated by LIONEL GILES, M.A. 1910 +Hosted by the Gutenberg Project +\fI\%http://www.gutenberg.org/\fP +.INDENT 0.0 +.TP +.B nltk.chat.suntsu.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chat.suntsu.suntsu_chat() +.UNINDENT +.SS nltk.chat.util module +.INDENT 0.0 +.TP +.B class nltk.chat.util.Chat(pairs, reflections={}) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B converse(quit=\(aqquit\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B respond(str) +Generate a response to the user input. +.INDENT 7.0 +.TP +.B Parameters +\fBstr\fP (\fIstr\fP) \-\- The string to be mapped +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.chat.zen module +.sp +Zen Chatbot talks in gems of Zen wisdom. +.sp +This is a sample conversation with Zen Chatbot: +ZC: Welcome, my child. +me: Good afternoon. +ZC: Ask the question you have come to ask. +me: How can I achieve enlightenment? +ZC: How do you suppose? +me: Through meditation. +ZC: Form is emptiness, and emptiness form. +me: How can I empty my mind of worldly troubles? +ZC: Will an answer to that really help in your search for enlightenment? +me: Yes. +ZC: It is better to be right than to be certain. +me: I seek truth and wisdom. +ZC: The search for truth is a long journey. +me: Are you sure? +ZC: Maybe sure, maybe not sure. +.sp +The chatbot structure is based on that of chat.eliza. Thus, it uses +a translation table to convert from question to response +i.e. "I am" \-\-> "you are" +.sp +Of course, since Zen Chatbot does not understand the meaning of any words, +responses are very limited. Zen Chatbot will usually answer very vaguely, or +respond to a question by asking a different question, in much the same way +as Eliza. +.INDENT 0.0 +.TP +.B nltk.chat.zen.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chat.zen.zen_chat() +.UNINDENT +.SS Module contents +.sp +A class for simple chatbots. These perform simple pattern matching on sentences +typed by users, and respond with automatically generated sentences. +.sp +These chatbots may not work using the windows command line or the +windows IDLE GUI. +.INDENT 0.0 +.TP +.B nltk.chat.chatbots() +.UNINDENT +.SS nltk.chunk package +.SS Submodules +.SS nltk.chunk.api module +.INDENT 0.0 +.TP +.B class nltk.chunk.api.ChunkParserI +Bases: \fBnltk.parse.api.ParserI\fP +.sp +A processing interface for identifying non\-overlapping groups in +unrestricted text. Typically, chunk parsers are used to find base +syntactic constituents, such as base noun phrases. Unlike +\fBParserI\fP, \fBChunkParserI\fP guarantees that the \fBparse()\fP method +will always generate a parse. +.INDENT 7.0 +.TP +.B evaluate(gold) +Score the accuracy of the chunker against the gold standard. +Remove the chunking the gold standard text, rechunk it using +the chunker, and return a \fBChunkScore\fP object +reflecting the performance of this chunk parser. +.INDENT 7.0 +.TP +.B Parameters +\fBgold\fP (\fIlist\fP\fI(\fP\fITree\fP\fI)\fP) \-\- The list of chunked sentences to score the chunker on. +.TP +.B Return type +ChunkScore +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse(tokens) +Return the best chunk structure for the given tokens +and return a tree. +.INDENT 7.0 +.TP +.B Parameters +\fBtokens\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI)\fP) \-\- The list of (word, tag) tokens to be chunked. +.TP +.B Return type +Tree +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.chunk.named_entity module +.sp +Named entity chunker +.INDENT 0.0 +.TP +.B class nltk.chunk.named_entity.NEChunkParser(train) +Bases: \fI\%nltk.chunk.api.ChunkParserI\fP +.sp +Expected input: list of pos\-tagged words +.INDENT 7.0 +.TP +.B parse(tokens) +Each token should be a pos\-tagged word +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.chunk.named_entity.NEChunkParserTagger(train) +Bases: \fBnltk.tag.sequential.ClassifierBasedTagger\fP +.sp +The IOB tagger used by the chunk parser. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.named_entity.build_model(fmt=\(aqbinary\(aq) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.named_entity.cmp_chunks(correct, guessed) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.named_entity.load_ace_data(roots, fmt=\(aqbinary\(aq, skip_bnews=True) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.named_entity.load_ace_file(textfile, fmt) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.named_entity.postag_tree(tree) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.named_entity.shape(word) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.named_entity.simplify_pos(s) +.UNINDENT +.SS nltk.chunk.regexp module +.INDENT 0.0 +.TP +.B class nltk.chunk.regexp.ChunkRule(tag_pattern, descr) +Bases: \fI\%nltk.chunk.regexp.RegexpChunkRule\fP +.sp +A rule specifying how to add chunks to a \fBChunkString\fP, using a +matching tag pattern. When applied to a \fBChunkString\fP, it will +find any substring that matches this tag pattern and that is not +already part of a chunk, and create a new chunk containing that +substring. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.chunk.regexp.ChunkRuleWithContext(left_context_tag_pattern, chunk_tag_pattern, right_context_tag_pattern, descr) +Bases: \fI\%nltk.chunk.regexp.RegexpChunkRule\fP +.sp +A rule specifying how to add chunks to a \fBChunkString\fP, using +three matching tag patterns: one for the left context, one for the +chunk, and one for the right context. When applied to a +\fBChunkString\fP, it will find any substring that matches the chunk +tag pattern, is surrounded by substrings that match the two +context patterns, and is not already part of a chunk; and create a +new chunk containing the substring that matched the chunk tag +pattern. +.sp +Caveat: Both the left and right context are consumed when this +rule matches; therefore, if you need to find overlapping matches, +you will need to apply your rule more than once. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.chunk.regexp.ChunkString(chunk_struct, debug_level=1) +Bases: \fBobject\fP +.sp +A string\-based encoding of a particular chunking of a text. +Internally, the \fBChunkString\fP class uses a single string to +encode the chunking of the input text. This string contains a +sequence of angle\-bracket delimited tags, with chunking indicated +by braces. An example of this encoding is: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +{
}{
}<.>{
}<.> +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +\fBChunkString\fP are created from tagged texts (i.e., lists of +\fBtokens\fP whose type is \fBTaggedType\fP). Initially, nothing is +chunked. +.sp +The chunking of a \fBChunkString\fP can be modified with the \fBxform()\fP +method, which uses a regular expression to transform the string +representation. These transformations should only add and remove +braces; they should \fInot\fP modify the sequence of angle\-bracket +delimited tags. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB_str\fP \-\- +.sp +The internal string representation of the text\(aqs +encoding. This string representation contains a sequence of +angle\-bracket delimited tags, with chunking indicated by +braces. An example of this encoding is: +.INDENT 2.0 +.INDENT 3.5 +.sp +.nf +.ft C +{
}{
}<.>{
}<.> +.ft P +.fi +.UNINDENT +.UNINDENT + +.IP \(bu 2 +\fB_pieces\fP \-\- The tagged tokens and chunks encoded by this \fBChunkString\fP\&. +.IP \(bu 2 +\fB_debug\fP \-\- The debug level. See the constructor docs. +.IP \(bu 2 +\fBIN_CHUNK_PATTERN\fP \-\- A zero\-width regexp pattern string that +will only match positions that are in chunks. +.IP \(bu 2 +\fBIN_STRIP_PATTERN\fP \-\- A zero\-width regexp pattern string that +will only match positions that are in strips. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B CHUNK_TAG = \(aq(<[^\e\e{\e\e}<>]+?>)\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B CHUNK_TAG_CHAR = \(aq[^\e\e{\e\e}<>]\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B IN_CHUNK_PATTERN = \(aq(?=[^\e\e{]*\e\e})\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B IN_STRIP_PATTERN = \(aq(?=[^\e\e}]*(\e\e{|$))\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B to_chunkstruct(chunk_label=\(aqCHUNK\(aq) +Return the chunk structure encoded by this \fBChunkString\fP\&. +.INDENT 7.0 +.TP +.B Return type +Tree +.TP +.B Raises +\fBValueError\fP \-\- If a transformation has generated an +invalid chunkstring. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B xform(regexp, repl) +Apply the given transformation to the string encoding of this +\fBChunkString\fP\&. In particular, find all occurrences that match +\fBregexp\fP, and replace them using \fBrepl\fP (as done by +\fBre.sub\fP). +.sp +This transformation should only add and remove braces; it +should \fInot\fP modify the sequence of angle\-bracket delimited +tags. Furthermore, this transformation may not result in +improper bracketing. Note, in particular, that bracketing may +not be nested. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBregexp\fP (\fIstr\fP\fI or \fP\fIregexp\fP) \-\- A regular expression matching the substring +that should be replaced. This will typically include a +named group, which can be used by \fBrepl\fP\&. +.IP \(bu 2 +\fBrepl\fP (\fIstr\fP) \-\- An expression specifying what should replace the +matched substring. Typically, this will include a named +replacement group, specified by \fBregexp\fP\&. +.UNINDENT +.TP +.B Return type +None +.TP +.B Raises +\fBValueError\fP \-\- If this transformation generated an +invalid chunkstring. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.chunk.regexp.ExpandLeftRule(left_tag_pattern, right_tag_pattern, descr) +Bases: \fI\%nltk.chunk.regexp.RegexpChunkRule\fP +.sp +A rule specifying how to expand chunks in a \fBChunkString\fP to the left, +using two matching tag patterns: a left pattern, and a right pattern. +When applied to a \fBChunkString\fP, it will find any chunk whose beginning +matches right pattern, and immediately preceded by a strip whose +end matches left pattern. It will then expand the chunk to incorporate +the new material on the left. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.chunk.regexp.ExpandRightRule(left_tag_pattern, right_tag_pattern, descr) +Bases: \fI\%nltk.chunk.regexp.RegexpChunkRule\fP +.sp +A rule specifying how to expand chunks in a \fBChunkString\fP to the +right, using two matching tag patterns: a left pattern, and a +right pattern. When applied to a \fBChunkString\fP, it will find any +chunk whose end matches left pattern, and immediately followed by +a strip whose beginning matches right pattern. It will then +expand the chunk to incorporate the new material on the right. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.chunk.regexp.MergeRule(left_tag_pattern, right_tag_pattern, descr) +Bases: \fI\%nltk.chunk.regexp.RegexpChunkRule\fP +.sp +A rule specifying how to merge chunks in a \fBChunkString\fP, using +two matching tag patterns: a left pattern, and a right pattern. +When applied to a \fBChunkString\fP, it will find any chunk whose end +matches left pattern, and immediately followed by a chunk whose +beginning matches right pattern. It will then merge those two +chunks into a single chunk. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.chunk.regexp.RegexpChunkParser(rules, chunk_label=\(aqNP\(aq, root_label=\(aqS\(aq, trace=0) +Bases: \fI\%nltk.chunk.api.ChunkParserI\fP +.sp +A regular expression based chunk parser. \fBRegexpChunkParser\fP uses a +sequence of "rules" to find chunks of a single type within a +text. The chunking of the text is encoded using a \fBChunkString\fP, +and each rule acts by modifying the chunking in the +\fBChunkString\fP\&. The rules are all implemented using regular +expression matching and substitution. +.sp +The \fBRegexpChunkRule\fP class and its subclasses (\fBChunkRule\fP, +\fBStripRule\fP, \fBUnChunkRule\fP, \fBMergeRule\fP, and \fBSplitRule\fP) +define the rules that are used by \fBRegexpChunkParser\fP\&. Each rule +defines an \fBapply()\fP method, which modifies the chunking encoded +by a given \fBChunkString\fP\&. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB_rules\fP \-\- The list of rules that should be applied to a text. +.IP \(bu 2 +\fB_trace\fP \-\- The default level of tracing. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse(chunk_struct, trace=None) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBchunk_struct\fP (\fITree\fP) \-\- the chunk structure to be (further) chunked +.IP \(bu 2 +\fBtrace\fP (\fIint\fP) \-\- The level of tracing that should be used when +parsing a text. \fB0\fP will generate no tracing output; +\fB1\fP will generate normal tracing output; and \fB2\fP or +higher will generate verbose tracing output. This value +overrides the trace level value that was given to the +constructor. +.UNINDENT +.TP +.B Return type +Tree +.TP +.B Returns +a chunk structure that encodes the chunks in a given +tagged sentence. A chunk is a non\-overlapping linguistic +group, such as a noun phrase. The set of chunks +identified in the chunk structure depends on the rules +used to define this \fBRegexpChunkParser\fP\&. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B rules() +.INDENT 7.0 +.TP +.B Returns +the sequence of rules used by \fBRegexpChunkParser\fP\&. +.TP +.B Return type +list(RegexpChunkRule) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.chunk.regexp.RegexpChunkRule(regexp, repl, descr) +Bases: \fBobject\fP +.sp +A rule specifying how to modify the chunking in a \fBChunkString\fP, +using a transformational regular expression. The +\fBRegexpChunkRule\fP class itself can be used to implement any +transformational rule based on regular expressions. There are +also a number of subclasses, which can be used to implement +simpler types of rules, based on matching regular expressions. +.sp +Each \fBRegexpChunkRule\fP has a regular expression and a +replacement expression. When a \fBRegexpChunkRule\fP is "applied" +to a \fBChunkString\fP, it searches the \fBChunkString\fP for any +substring that matches the regular expression, and replaces it +using the replacement expression. This search/replace operation +has the same semantics as \fBre.sub\fP\&. +.sp +Each \fBRegexpChunkRule\fP also has a description string, which +gives a short (typically less than 75 characters) description of +the purpose of the rule. +.sp +This transformation defined by this \fBRegexpChunkRule\fP should +only add and remove braces; it should \fInot\fP modify the sequence +of angle\-bracket delimited tags. Furthermore, this transformation +may not result in nested or mismatched bracketing. +.INDENT 7.0 +.TP +.B apply(chunkstr) +Apply this rule to the given \fBChunkString\fP\&. See the +class reference documentation for a description of what it +means to apply a rule. +.INDENT 7.0 +.TP +.B Parameters +\fBchunkstr\fP (\fIChunkString\fP) \-\- The chunkstring to which this rule is applied. +.TP +.B Return type +None +.TP +.B Raises +\fBValueError\fP \-\- If this transformation generated an +invalid chunkstring. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B descr() +Return a short description of the purpose and/or effect of +this rule. +.INDENT 7.0 +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B static fromstring(s) +Create a RegexpChunkRule from a string description. +Currently, the following formats are supported: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +{regexp} # chunk rule +}regexp{ # strip rule +regexp}{regexp # split rule +regexp{}regexp # merge rule +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Where \fBregexp\fP is a regular expression for the rule. Any +text following the comment marker (\fB#\fP) will be used as +the rule\(aqs description: +.sp +.nf +.ft C +>>> from nltk.chunk.regexp import RegexpChunkRule +>>> RegexpChunkRule.fromstring(\(aq{
?+}\(aq) +?+\(aq> +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.chunk.regexp.RegexpParser(grammar, root_label=\(aqS\(aq, loop=1, trace=0) +Bases: \fI\%nltk.chunk.api.ChunkParserI\fP +.sp +A grammar based chunk parser. \fBchunk.RegexpParser\fP uses a set of +regular expression patterns to specify the behavior of the parser. +The chunking of the text is encoded using a \fBChunkString\fP, and +each rule acts by modifying the chunking in the \fBChunkString\fP\&. +The rules are all implemented using regular expression matching +and substitution. +.sp +A grammar contains one or more clauses in the following form: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +NP: + {} # chunk determiners and adjectives + }<[\e.VI].*>+{ # strip any tag beginning with V, I, or . + <.*>}{
# split a chunk at a determiner + {} # merge chunk ending with det/adj + # with one starting with a noun +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The patterns of a clause are executed in order. An earlier +pattern may introduce a chunk boundary that prevents a later +pattern from executing. Sometimes an individual pattern will +match on multiple, overlapping extents of the input. As with +regular expression substitution more generally, the chunker will +identify the first match possible, then continue looking for matches +after this one has ended. +.sp +The clauses of a grammar are also executed in order. A cascaded +chunk parser is one having more than one clause. The maximum depth +of a parse tree created by this chunk parser is the same as the +number of clauses in the grammar. +.sp +When tracing is turned on, the comment portion of a line is displayed +each time the corresponding pattern is applied. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB_start\fP \-\- The start symbol of the grammar (the root node of +resulting trees) +.IP \(bu 2 +\fB_stages\fP \-\- The list of parsing stages corresponding to the grammar +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse(chunk_struct, trace=None) +Apply the chunk parser to this input. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBchunk_struct\fP (\fITree\fP) \-\- the chunk structure to be (further) chunked +(this tree is modified, and is also returned) +.IP \(bu 2 +\fBtrace\fP (\fIint\fP) \-\- The level of tracing that should be used when +parsing a text. \fB0\fP will generate no tracing output; +\fB1\fP will generate normal tracing output; and \fB2\fP or +higher will generate verbose tracing output. This value +overrides the trace level value that was given to the +constructor. +.UNINDENT +.TP +.B Returns +the chunked output. +.TP +.B Return type +Tree +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.chunk.regexp.SplitRule(left_tag_pattern, right_tag_pattern, descr) +Bases: \fI\%nltk.chunk.regexp.RegexpChunkRule\fP +.sp +A rule specifying how to split chunks in a \fBChunkString\fP, using +two matching tag patterns: a left pattern, and a right pattern. +When applied to a \fBChunkString\fP, it will find any chunk that +matches the left pattern followed by the right pattern. It will +then split the chunk into two new chunks, at the point between the +two pattern matches. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.chunk.regexp.StripRule(tag_pattern, descr) +Bases: \fI\%nltk.chunk.regexp.RegexpChunkRule\fP +.sp +A rule specifying how to remove strips to a \fBChunkString\fP, +using a matching tag pattern. When applied to a +\fBChunkString\fP, it will find any substring that matches this +tag pattern and that is contained in a chunk, and remove it +from that chunk, thus creating two new chunks. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.chunk.regexp.UnChunkRule(tag_pattern, descr) +Bases: \fI\%nltk.chunk.regexp.RegexpChunkRule\fP +.sp +A rule specifying how to remove chunks to a \fBChunkString\fP, +using a matching tag pattern. When applied to a +\fBChunkString\fP, it will find any complete chunk that matches this +tag pattern, and un\-chunk it. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.regexp.demo() +A demonstration for the \fBRegexpChunkParser\fP class. A single text is +parsed with four different chunk parsers, using a variety of rules +and strategies. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.regexp.demo_eval(chunkparser, text) +Demonstration code for evaluating a chunk parser, using a +\fBChunkScore\fP\&. This function assumes that \fBtext\fP contains one +sentence per line, and that each sentence has the form expected by +\fBtree.chunk\fP\&. It runs the given chunk parser on each sentence in +the text, and scores the result. It prints the final score +(precision, recall, and f\-measure); and reports the set of chunks +that were missed and the set of chunks that were incorrect. (At +most 10 missing chunks and 10 incorrect chunks are reported). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBchunkparser\fP (\fIChunkParserI\fP) \-\- The chunkparser to be tested +.IP \(bu 2 +\fBtext\fP (\fIstr\fP) \-\- The chunked tagged text that should be used for +evaluation. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.regexp.tag_pattern2re_pattern(tag_pattern) +Convert a tag pattern to a regular expression pattern. A "tag +pattern" is a modified version of a regular expression, designed +for matching sequences of tags. The differences between regular +expression patterns and tag patterns are: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +In tag patterns, \fB\(aq<\(aq\fP and \fB\(aq>\(aq\fP act as parentheses; so +\fB\(aq+\(aq\fP matches one or more repetitions of \fB\(aq\(aq\fP, not +\fB\(aq\(aq\fP\&. +.IP \(bu 2 +Whitespace in tag patterns is ignored. So +\fB\(aq
| \(aq\fP is equivalent to \fB\(aq
|\(aq\fP +.IP \(bu 2 +In tag patterns, \fB\(aq.\(aq\fP is equivalent to \fB\(aq[^{}<>]\(aq\fP; so +\fB\(aq\(aq\fP matches any single tag starting with \fB\(aqNN\(aq\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +In particular, \fBtag_pattern2re_pattern\fP performs the following +transformations on the given pattern: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +Replace \(aq.\(aq with \(aq[^<>{}]\(aq +.IP \(bu 2 +Remove any whitespace +.IP \(bu 2 +Add extra parens around \(aq<\(aq and \(aq>\(aq, to make \(aq<\(aq and \(aq>\(aq act +like parentheses. E.g., so that in \(aq+\(aq, the \(aq+\(aq has scope +over the entire \(aq\(aq; and so that in \(aq\(aq, the \(aq|\(aq has +scope over \(aqNN\(aq and \(aqIN\(aq, but not \(aq<\(aq or \(aq>\(aq. +.IP \(bu 2 +Check to make sure the resulting pattern is valid. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +\fBtag_pattern\fP (\fIstr\fP) \-\- The tag pattern to convert to a regular +expression pattern. +.TP +.B Raises +\fBValueError\fP \-\- If \fBtag_pattern\fP is not a valid tag pattern. +In particular, \fBtag_pattern\fP should not include braces; and it +should not contain nested or mismatched angle\-brackets. +.TP +.B Return type +str +.TP +.B Returns +A regular expression pattern corresponding to +\fBtag_pattern\fP\&. +.UNINDENT +.UNINDENT +.SS nltk.chunk.util module +.INDENT 0.0 +.TP +.B class nltk.chunk.util.ChunkScore(**kwargs) +Bases: \fBobject\fP +.sp +A utility class for scoring chunk parsers. \fBChunkScore\fP can +evaluate a chunk parser\(aqs output, based on a number of statistics +(precision, recall, f\-measure, misssed chunks, incorrect chunks). +It can also combine the scores from the parsing of multiple texts; +this makes it significantly easier to evaluate a chunk parser that +operates one sentence at a time. +.sp +Texts are evaluated with the \fBscore\fP method. The results of +evaluation can be accessed via a number of accessor methods, such +as \fBprecision\fP and \fBf_measure\fP\&. A typical use of the +\fBChunkScore\fP class is: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +>>> chunkscore = ChunkScore() +>>> for correct in correct_sentences: +\&... guess = chunkparser.parse(correct.leaves()) +\&... chunkscore.score(correct, guess) +>>> print(\(aqF Measure:\(aq, chunkscore.f_measure()) +F Measure: 0.823 +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fBkwargs\fP \-\- +.sp +Keyword arguments: +.INDENT 2.0 +.IP \(bu 2 +max_tp_examples: The maximum number actual examples of true +positives to record. This affects the \fBcorrect\fP member +function: \fBcorrect\fP will not return more than this number +of true positive examples. This does \fInot\fP affect any of +the numerical metrics (precision, recall, or f\-measure) +.IP \(bu 2 +max_fp_examples: The maximum number actual examples of false +positives to record. This affects the \fBincorrect\fP member +function and the \fBguessed\fP member function: \fBincorrect\fP +will not return more than this number of examples, and +\fBguessed\fP will not return more than this number of true +positive examples. This does \fInot\fP affect any of the +numerical metrics (precision, recall, or f\-measure) +.IP \(bu 2 +max_fn_examples: The maximum number actual examples of false +negatives to record. This affects the \fBmissed\fP member +function and the \fBcorrect\fP member function: \fBmissed\fP +will not return more than this number of examples, and +\fBcorrect\fP will not return more than this number of true +negative examples. This does \fInot\fP affect any of the +numerical metrics (precision, recall, or f\-measure) +.IP \(bu 2 +chunk_label: A regular expression indicating which chunks +should be compared. Defaults to \fB\(aq.*\(aq\fP (i.e., all chunks). +.UNINDENT + +.IP \(bu 2 +\fB_tp\fP \-\- List of true positives +.IP \(bu 2 +\fB_fp\fP \-\- List of false positives +.IP \(bu 2 +\fB_fn\fP \-\- List of false negatives +.IP \(bu 2 +\fB_tp_num\fP \-\- Number of true positives +.IP \(bu 2 +\fB_fp_num\fP \-\- Number of false positives +.IP \(bu 2 +\fB_fn_num\fP \-\- Number of false negatives. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B accuracy() +Return the overall tag\-based accuracy for all text that have +been scored by this \fBChunkScore\fP, using the IOB (conll2000) +tag encoding. +.INDENT 7.0 +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B correct() +Return the chunks which were included in the correct +chunk structures, listed in input order. +.INDENT 7.0 +.TP +.B Return type +list of chunks +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B f_measure(alpha=0.5) +Return the overall F measure for all texts that have been +scored by this \fBChunkScore\fP\&. +.INDENT 7.0 +.TP +.B Parameters +\fBalpha\fP (\fIfloat\fP) \-\- the relative weighting of precision and recall. +Larger alpha biases the score towards the precision value, +while smaller alpha biases the score towards the recall +value. \fBalpha\fP should have a value in the range [0,1]. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B guessed() +Return the chunks which were included in the guessed +chunk structures, listed in input order. +.INDENT 7.0 +.TP +.B Return type +list of chunks +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B incorrect() +Return the chunks which were included in the guessed chunk structures, +but not in the correct chunk structures, listed in input order. +.INDENT 7.0 +.TP +.B Return type +list of chunks +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B missed() +Return the chunks which were included in the +correct chunk structures, but not in the guessed chunk +structures, listed in input order. +.INDENT 7.0 +.TP +.B Return type +list of chunks +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B precision() +Return the overall precision for all texts that have been +scored by this \fBChunkScore\fP\&. +.INDENT 7.0 +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B recall() +Return the overall recall for all texts that have been +scored by this \fBChunkScore\fP\&. +.INDENT 7.0 +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B score(correct, guessed) +Given a correctly chunked sentence, score another chunked +version of the same sentence. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBcorrect\fP (\fIchunk structure\fP) \-\- The known\-correct ("gold standard") chunked +sentence. +.IP \(bu 2 +\fBguessed\fP (\fIchunk structure\fP) \-\- The chunked sentence to be scored. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.util.accuracy(chunker, gold) +Score the accuracy of the chunker against the gold standard. +Strip the chunk information from the gold standard and rechunk it using +the chunker, then compute the accuracy score. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBchunker\fP (\fIChunkParserI\fP) \-\- The chunker being evaluated. +.IP \(bu 2 +\fBgold\fP (\fItree\fP) \-\- The chunk structures to score the chunker on. +.UNINDENT +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.util.conllstr2tree(s, chunk_types=(\(aqNP\(aq, \(aqPP\(aq, \(aqVP\(aq), root_label=\(aqS\(aq) +Return a chunk structure for a single sentence +encoded in the given CONLL 2000 style string. +This function converts a CoNLL IOB string into a tree. +It uses the specified chunk types +(defaults to NP, PP and VP), and creates a tree rooted at a node +labeled S (by default). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBs\fP (\fIstr\fP) \-\- The CoNLL string to be converted. +.IP \(bu 2 +\fBchunk_types\fP (\fItuple\fP) \-\- The chunk types to be converted. +.IP \(bu 2 +\fBroot_label\fP (\fIstr\fP) \-\- The node label to use for the root. +.UNINDENT +.TP +.B Return type +Tree +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.util.conlltags2tree(sentence, chunk_types=(\(aqNP\(aq, \(aqPP\(aq, \(aqVP\(aq), root_label=\(aqS\(aq, strict=False) +Convert the CoNLL IOB format to a tree. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.util.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.util.ieerstr2tree(s, chunk_types=[\(aqLOCATION\(aq, \(aqORGANIZATION\(aq, \(aqPERSON\(aq, \(aqDURATION\(aq, \(aqDATE\(aq, \(aqCARDINAL\(aq, \(aqPERCENT\(aq, \(aqMONEY\(aq, \(aqMEASURE\(aq], root_label=\(aqS\(aq) +Return a chunk structure containing the chunked tagged text that is +encoded in the given IEER style string. +Convert a string of chunked tagged text in the IEER named +entity format into a chunk structure. Chunks are of several +types, LOCATION, ORGANIZATION, PERSON, DURATION, DATE, CARDINAL, +PERCENT, MONEY, and MEASURE. +.INDENT 7.0 +.TP +.B Return type +Tree +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.util.tagstr2tree(s, chunk_label=\(aqNP\(aq, root_label=\(aqS\(aq, sep=\(aq/\(aq, source_tagset=None, target_tagset=None) +Divide a string of bracketted tagged text into +chunks and unchunked tokens, and produce a Tree. +Chunks are marked by square brackets (\fB[...]\fP). Words are +delimited by whitespace, and each word should have the form +\fBtext/tag\fP\&. Words that do not contain a slash are +assigned a \fBtag\fP of None. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBs\fP (\fIstr\fP) \-\- The string to be converted +.IP \(bu 2 +\fBchunk_label\fP (\fIstr\fP) \-\- The label to use for chunk nodes +.IP \(bu 2 +\fBroot_label\fP (\fIstr\fP) \-\- The label to use for the root of the tree +.UNINDENT +.TP +.B Return type +Tree +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.util.tree2conllstr(t) +Return a multiline string where each line contains a word, tag and IOB tag. +Convert a tree to the CoNLL IOB string format +.INDENT 7.0 +.TP +.B Parameters +\fBt\fP (\fITree\fP) \-\- The tree to be converted. +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.util.tree2conlltags(t) +Return a list of 3\-tuples containing \fB(word, tag, IOB\-tag)\fP\&. +Convert a tree to the CoNLL IOB tag format. +.INDENT 7.0 +.TP +.B Parameters +\fBt\fP (\fITree\fP) \-\- The tree to be converted. +.TP +.B Return type +list(tuple) +.UNINDENT +.UNINDENT +.SS Module contents +.sp +Classes and interfaces for identifying non\-overlapping linguistic +groups (such as base noun phrases) in unrestricted text. This task is +called "chunk parsing" or "chunking", and the identified groups are +called "chunks". The chunked text is represented using a shallow +tree called a "chunk structure." A chunk structure is a tree +containing tokens and chunks, where each chunk is a subtree containing +only tokens. For example, the chunk structure for base noun phrase +chunks in the sentence "I saw the big dog on the hill" is: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +(SENTENCE: + (NP: ) + + (NP: ) + + (NP: )) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +To convert a chunk structure back to a list of tokens, simply use the +chunk structure\(aqs \fBleaves()\fP method. +.sp +This module defines \fBChunkParserI\fP, a standard interface for +chunking texts; and \fBRegexpChunkParser\fP, a regular\-expression based +implementation of that interface. It also defines \fBChunkScore\fP, a +utility class for scoring chunk parsers. +.SS RegexpChunkParser +.sp +\fBRegexpChunkParser\fP is an implementation of the chunk parser interface +that uses regular\-expressions over tags to chunk a text. Its +\fBparse()\fP method first constructs a \fBChunkString\fP, which encodes a +particular chunking of the input text. Initially, nothing is +chunked. \fBparse.RegexpChunkParser\fP then applies a sequence of +\fBRegexpChunkRule\fP rules to the \fBChunkString\fP, each of which modifies +the chunking that it encodes. Finally, the \fBChunkString\fP is +transformed back into a chunk structure, which is returned. +.sp +\fBRegexpChunkParser\fP can only be used to chunk a single kind of phrase. +For example, you can use an \fBRegexpChunkParser\fP to chunk the noun +phrases in a text, or the verb phrases in a text; but you can not +use it to simultaneously chunk both noun phrases and verb phrases in +the same text. (This is a limitation of \fBRegexpChunkParser\fP, not of +chunk parsers in general.) +.SS RegexpChunkRules +.sp +A \fBRegexpChunkRule\fP is a transformational rule that updates the +chunking of a text by modifying its \fBChunkString\fP\&. Each +\fBRegexpChunkRule\fP defines the \fBapply()\fP method, which modifies +the chunking encoded by a \fBChunkString\fP\&. The +\fBRegexpChunkRule\fP class itself can be used to implement any +transformational rule based on regular expressions. There are +also a number of subclasses, which can be used to implement +simpler types of rules: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fBChunkRule\fP chunks anything that matches a given regular +expression. +.IP \(bu 2 +\fBStripRule\fP strips anything that matches a given regular +expression. +.IP \(bu 2 +\fBUnChunkRule\fP will un\-chunk any chunk that matches a given +regular expression. +.IP \(bu 2 +\fBMergeRule\fP can be used to merge two contiguous chunks. +.IP \(bu 2 +\fBSplitRule\fP can be used to split a single chunk into two +smaller chunks. +.IP \(bu 2 +\fBExpandLeftRule\fP will expand a chunk to incorporate new +unchunked material on the left. +.IP \(bu 2 +\fBExpandRightRule\fP will expand a chunk to incorporate new +unchunked material on the right. +.UNINDENT +.UNINDENT +.UNINDENT +.SS Tag Patterns +.sp +A \fBRegexpChunkRule\fP uses a modified version of regular +expression patterns, called "tag patterns". Tag patterns are +used to match sequences of tags. Examples of tag patterns are: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +r\(aq(
||)+\(aq +r\(aq+\(aq +r\(aq\(aq +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The differences between regular expression patterns and tag +patterns are: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +In tag patterns, \fB\(aq<\(aq\fP and \fB\(aq>\(aq\fP act as parentheses; so +\fB\(aq+\(aq\fP matches one or more repetitions of \fB\(aq\(aq\fP, not +\fB\(aq\(aq\fP\&. +.IP \(bu 2 +Whitespace in tag patterns is ignored. So +\fB\(aq
| \(aq\fP is equivalent to \fB\(aq
|\(aq\fP +.IP \(bu 2 +In tag patterns, \fB\(aq.\(aq\fP is equivalent to \fB\(aq[^{}<>]\(aq\fP; so +\fB\(aq\(aq\fP matches any single tag starting with \fB\(aqNN\(aq\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +The function \fBtag_pattern2re_pattern\fP can be used to transform +a tag pattern to an equivalent regular expression pattern. +.SS Efficiency +.sp +Preliminary tests indicate that \fBRegexpChunkParser\fP can chunk at a +rate of about 300 tokens/second, with a moderately complex rule set. +.sp +There may be problems if \fBRegexpChunkParser\fP is used with more than +5,000 tokens at a time. In particular, evaluation of some regular +expressions may cause the Python regular expression engine to +exceed its maximum recursion depth. We have attempted to minimize +these problems, but it is impossible to avoid them completely. We +therefore recommend that you apply the chunk parser to a single +sentence at a time. +.SS Emacs Tip +.sp +If you evaluate the following elisp expression in emacs, it will +colorize a \fBChunkString\fP when you use an interactive python shell +with emacs or xemacs ("C\-c !"): +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +(let () + (defconst comint\-mode\-font\-lock\-keywords + \(aq(("<[^>]+>" 0 \(aqfont\-lock\-reference\-face) + ("[{}]" 0 \(aqfont\-lock\-function\-name\-face))) + (add\-hook \(aqcomint\-mode\-hook (lambda () (turn\-on\-font\-lock)))) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +You can evaluate this code by copying it to a temporary buffer, +placing the cursor after the last close parenthesis, and typing +"\fBC\-x C\-e\fP". You should evaluate it before running the interactive +session. The change will last until you close emacs. +.SS Unresolved Issues +.sp +If we use the \fBre\fP module for regular expressions, Python\(aqs +regular expression engine generates "maximum recursion depth +exceeded" errors when processing very large texts, even for +regular expressions that should not require any recursion. We +therefore use the \fBpre\fP module instead. But note that \fBpre\fP +does not include Unicode support, so this module will not work +with unicode strings. Note also that \fBpre\fP regular expressions +are not quite as advanced as \fBre\fP ones (e.g., no leftward +zero\-length assertions). +.INDENT 0.0 +.TP +.B type CHUNK_TAG_PATTERN +regexp +.TP +.B var CHUNK_TAG_PATTERN +A regular expression to test whether a tag +pattern is valid. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.ne_chunk(tagged_tokens, binary=False) +Use NLTK\(aqs currently recommended named entity chunker to +chunk the given list of tagged tokens. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.chunk.ne_chunk_sents(tagged_sentences, binary=False) +Use NLTK\(aqs currently recommended named entity chunker to chunk the +given list of tagged sentences, each consisting of a list of tagged tokens. +.UNINDENT +.SS nltk.classify package +.SS Submodules +.SS nltk.classify.api module +.sp +Interfaces for labeling tokens with category labels (or "class labels"). +.sp +\fBClassifierI\fP is a standard interface for "single\-category +classification", in which the set of categories is known, the number +of categories is finite, and each text belongs to exactly one +category. +.sp +\fBMultiClassifierI\fP is a standard interface for "multi\-category +classification", which is like single\-category classification except +that each text belongs to zero or more categories. +.INDENT 0.0 +.TP +.B class nltk.classify.api.ClassifierI +Bases: \fBobject\fP +.sp +A processing interface for labeling tokens with a single category +label (or "class"). Labels are typically strs or +ints, but can be any immutable type. The set of labels +that the classifier chooses from must be fixed and finite. +.INDENT 7.0 +.TP +.B Subclasses must define: +.INDENT 7.0 +.IP \(bu 2 +\fBlabels()\fP +.IP \(bu 2 +either \fBclassify()\fP or \fBclassify_many()\fP (or both) +.UNINDENT +.TP +.B Subclasses may define: +.INDENT 7.0 +.IP \(bu 2 +either \fBprob_classify()\fP or \fBprob_classify_many()\fP (or both) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classify(featureset) +.INDENT 7.0 +.TP +.B Returns +the most appropriate label for the given featureset. +.TP +.B Return type +label +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classify_many(featuresets) +Apply \fBself.classify()\fP to each element of \fBfeaturesets\fP\&. I.e.: +.INDENT 7.0 +.INDENT 3.5 +return [self.classify(fs) for fs in featuresets] +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Return type +list(label) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B labels() +.INDENT 7.0 +.TP +.B Returns +the list of category labels used by this classifier. +.TP +.B Return type +list of (immutable) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob_classify(featureset) +.INDENT 7.0 +.TP +.B Returns +a probability distribution over labels for the given +featureset. +.TP +.B Return type +ProbDistI +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob_classify_many(featuresets) +Apply \fBself.prob_classify()\fP to each element of \fBfeaturesets\fP\&. I.e.: +.INDENT 7.0 +.INDENT 3.5 +return [self.prob_classify(fs) for fs in featuresets] +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Return type +list(ProbDistI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.classify.api.MultiClassifierI +Bases: \fBobject\fP +.sp +A processing interface for labeling tokens with zero or more +category labels (or "labels"). Labels are typically strs +or ints, but can be any immutable type. The set of labels +that the multi\-classifier chooses from must be fixed and finite. +.INDENT 7.0 +.TP +.B Subclasses must define: +.INDENT 7.0 +.IP \(bu 2 +\fBlabels()\fP +.IP \(bu 2 +either \fBclassify()\fP or \fBclassify_many()\fP (or both) +.UNINDENT +.TP +.B Subclasses may define: +.INDENT 7.0 +.IP \(bu 2 +either \fBprob_classify()\fP or \fBprob_classify_many()\fP (or both) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classify(featureset) +.INDENT 7.0 +.TP +.B Returns +the most appropriate set of labels for the given featureset. +.TP +.B Return type +set(label) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classify_many(featuresets) +Apply \fBself.classify()\fP to each element of \fBfeaturesets\fP\&. I.e.: +.INDENT 7.0 +.INDENT 3.5 +return [self.classify(fs) for fs in featuresets] +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Return type +list(set(label)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B labels() +.INDENT 7.0 +.TP +.B Returns +the list of category labels used by this classifier. +.TP +.B Return type +list of (immutable) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob_classify(featureset) +.INDENT 7.0 +.TP +.B Returns +a probability distribution over sets of labels for the +given featureset. +.TP +.B Return type +ProbDistI +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob_classify_many(featuresets) +Apply \fBself.prob_classify()\fP to each element of \fBfeaturesets\fP\&. I.e.: +.INDENT 7.0 +.INDENT 3.5 +return [self.prob_classify(fs) for fs in featuresets] +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Return type +list(ProbDistI) +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.classify.decisiontree module +.sp +A classifier model that decides which label to assign to a token on +the basis of a tree structure, where branches correspond to conditions +on feature values, and leaves correspond to label assignments. +.INDENT 0.0 +.TP +.B class nltk.classify.decisiontree.DecisionTreeClassifier(label, feature_name=None, decisions=None, default=None) +Bases: \fI\%nltk.classify.api.ClassifierI\fP +.INDENT 7.0 +.TP +.B static best_binary_stump(feature_names, labeled_featuresets, feature_values, verbose=False) +.UNINDENT +.INDENT 7.0 +.TP +.B static best_stump(feature_names, labeled_featuresets, verbose=False) +.UNINDENT +.INDENT 7.0 +.TP +.B static binary_stump(feature_name, feature_value, labeled_featuresets) +.UNINDENT +.INDENT 7.0 +.TP +.B classify(featureset) +.INDENT 7.0 +.TP +.B Returns +the most appropriate label for the given featureset. +.TP +.B Return type +label +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B error(labeled_featuresets) +.UNINDENT +.INDENT 7.0 +.TP +.B labels() +.INDENT 7.0 +.TP +.B Returns +the list of category labels used by this classifier. +.TP +.B Return type +list of (immutable) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B static leaf(labeled_featuresets) +.UNINDENT +.INDENT 7.0 +.TP +.B pretty_format(width=70, prefix=\(aq\(aq, depth=4) +Return a string containing a pretty\-printed version of this +decision tree. Each line in this string corresponds to a +single decision tree node or leaf, and indentation is used to +display the structure of the decision tree. +.UNINDENT +.INDENT 7.0 +.TP +.B pseudocode(prefix=\(aq\(aq, depth=4) +Return a string representation of this decision tree that +expresses the decisions it makes as a nested set of pseudocode +if statements. +.UNINDENT +.INDENT 7.0 +.TP +.B refine(labeled_featuresets, entropy_cutoff, depth_cutoff, support_cutoff, binary=False, feature_values=None, verbose=False) +.UNINDENT +.INDENT 7.0 +.TP +.B static stump(feature_name, labeled_featuresets) +.UNINDENT +.INDENT 7.0 +.TP +.B static train(labeled_featuresets, entropy_cutoff=0.05, depth_cutoff=100, support_cutoff=10, binary=False, feature_values=None, verbose=False) +.INDENT 7.0 +.TP +.B Parameters +\fBbinary\fP \-\- If true, then treat all feature/value pairs as +individual binary features, rather than using a single n\-way +branch for each feature. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.decisiontree.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.decisiontree.f(x) +.UNINDENT +.SS nltk.classify.maxent module +.sp +A classifier model based on maximum entropy modeling framework. This +framework considers all of the probability distributions that are +empirically consistent with the training data; and chooses the +distribution with the highest entropy. A probability distribution is +"empirically consistent" with a set of training data if its estimated +frequency with which a class and a feature vector value co\-occur is +equal to the actual frequency in the data. +.SS Terminology: \(aqfeature\(aq +.sp +The term \fIfeature\fP is usually used to refer to some property of an +unlabeled token. For example, when performing word sense +disambiguation, we might define a \fB\(aqprevword\(aq\fP feature whose value is +the word preceding the target word. However, in the context of +maxent modeling, the term \fIfeature\fP is typically used to refer to a +property of a "labeled" token. In order to prevent confusion, we +will introduce two distinct terms to disambiguate these two different +concepts: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +An "input\-feature" is a property of an unlabeled token. +.IP \(bu 2 +A "joint\-feature" is a property of a labeled token. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +In the rest of the \fBnltk.classify\fP module, the term "features" is +used to refer to what we will call "input\-features" in this module. +.sp +In literature that describes and discusses maximum entropy models, +input\-features are typically called "contexts", and joint\-features +are simply referred to as "features". +.SS Converting Input\-Features to Joint\-Features +.sp +In maximum entropy models, joint\-features are required to have numeric +values. Typically, each input\-feature \fBinput_feat\fP is mapped to a +set of joint\-features of the form: +.nf +joint_feat(token, label) = { 1 if input_feat(token) == feat_val +.in +2 +{ and label == some_label +{ +{ 0 otherwise +.in -2 +.fi +.sp +.sp +For all values of \fBfeat_val\fP and \fBsome_label\fP\&. This mapping is +performed by classes that implement the \fBMaxentFeatureEncodingI\fP +interface. +.INDENT 0.0 +.TP +.B class nltk.classify.maxent.BinaryMaxentFeatureEncoding(labels, mapping, unseen_features=False, alwayson_features=False) +Bases: \fI\%nltk.classify.maxent.MaxentFeatureEncodingI\fP +.sp +A feature encoding that generates vectors containing a binary +joint\-features of the form: +.nf +joint_feat(fs, l) = { 1 if (fs[fname] == fval) and (l == label) +.in +2 +{ +{ 0 otherwise +.in -2 +.fi +.sp +.sp +Where \fBfname\fP is the name of an input\-feature, \fBfval\fP is a value +for that input\-feature, and \fBlabel\fP is a label. +.sp +Typically, these features are constructed based on a training +corpus, using the \fBtrain()\fP method. This method will create one +feature for each combination of \fBfname\fP, \fBfval\fP, and \fBlabel\fP +that occurs at least once in the training corpus. +.sp +The \fBunseen_features\fP parameter can be used to add "unseen\-value +features", which are used whenever an input feature has a value +that was not encountered in the training corpus. These features +have the form: +.nf +joint_feat(fs, l) = { 1 if is_unseen(fname, fs[fname]) +.in +2 +{ and l == label +{ +{ 0 otherwise +.in -2 +.fi +.sp +.sp +Where \fBis_unseen(fname, fval)\fP is true if the encoding does not +contain any joint features that are true when \fBfs[fname]==fval\fP\&. +.sp +The \fBalwayson_features\fP parameter can be used to add "always\-on +features", which have the form: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +| joint_feat(fs, l) = { 1 if (l == label) +| { +| { 0 otherwise +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +These always\-on features allow the maxent model to directly model +the prior probabilities of each label. +.INDENT 7.0 +.TP +.B describe(f_id) +.INDENT 7.0 +.TP +.B Returns +A string describing the value of the joint\-feature +whose index in the generated feature vectors is \fBfid\fP\&. +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B encode(featureset, label) +Given a (featureset, label) pair, return the corresponding +vector of joint\-feature values. This vector is represented as +a list of \fB(index, value)\fP tuples, specifying the value of +each non\-zero joint\-feature. +.INDENT 7.0 +.TP +.B Return type +list(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B labels() +.INDENT 7.0 +.TP +.B Returns +A list of the "known labels" \-\- i.e., all labels +\fBl\fP such that \fBself.encode(fs,l)\fP can be a nonzero +joint\-feature vector for some value of \fBfs\fP\&. +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B length() +.INDENT 7.0 +.TP +.B Returns +The size of the fixed\-length joint\-feature vectors +that are generated by this encoding. +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod train(train_toks, count_cutoff=0, labels=None, **options) +Construct and return new feature encoding, based on a given +training corpus \fBtrain_toks\fP\&. See the class description +\fBBinaryMaxentFeatureEncoding\fP for a description of the +joint\-features that will be included in this encoding. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrain_toks\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIdict\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP) \-\- Training data, represented as a list of +pairs, the first member of which is a feature dictionary, +and the second of which is a classification label. +.IP \(bu 2 +\fBcount_cutoff\fP (\fIint\fP) \-\- A cutoff value that is used to discard +rare joint\-features. If a joint\-feature\(aqs value is 1 +fewer than \fBcount_cutoff\fP times in the training corpus, +then that joint\-feature is not included in the generated +encoding. +.IP \(bu 2 +\fBlabels\fP (\fIlist\fP) \-\- A list of labels that should be used by the +classifier. If not specified, then the set of labels +attested in \fBtrain_toks\fP will be used. +.IP \(bu 2 +\fBoptions\fP \-\- Extra parameters for the constructor, such as +\fBunseen_features\fP and \fBalwayson_features\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.maxent.ConditionalExponentialClassifier +Alias for MaxentClassifier. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.classify.maxent.FunctionBackedMaxentFeatureEncoding(func, length, labels) +Bases: \fI\%nltk.classify.maxent.MaxentFeatureEncodingI\fP +.sp +A feature encoding that calls a user\-supplied function to map a +given featureset/label pair to a sparse joint\-feature vector. +.INDENT 7.0 +.TP +.B describe(fid) +.INDENT 7.0 +.TP +.B Returns +A string describing the value of the joint\-feature +whose index in the generated feature vectors is \fBfid\fP\&. +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B encode(featureset, label) +Given a (featureset, label) pair, return the corresponding +vector of joint\-feature values. This vector is represented as +a list of \fB(index, value)\fP tuples, specifying the value of +each non\-zero joint\-feature. +.INDENT 7.0 +.TP +.B Return type +list(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B labels() +.INDENT 7.0 +.TP +.B Returns +A list of the "known labels" \-\- i.e., all labels +\fBl\fP such that \fBself.encode(fs,l)\fP can be a nonzero +joint\-feature vector for some value of \fBfs\fP\&. +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B length() +.INDENT 7.0 +.TP +.B Returns +The size of the fixed\-length joint\-feature vectors +that are generated by this encoding. +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.classify.maxent.GISEncoding(labels, mapping, unseen_features=False, alwayson_features=False, C=None) +Bases: \fI\%nltk.classify.maxent.BinaryMaxentFeatureEncoding\fP +.sp +A binary feature encoding which adds one new joint\-feature to the +joint\-features defined by \fBBinaryMaxentFeatureEncoding\fP: a +correction feature, whose value is chosen to ensure that the +sparse vector always sums to a constant non\-negative number. This +new feature is used to ensure two preconditions for the GIS +training algorithm: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +At least one feature vector index must be nonzero for every +token. +.IP \(bu 2 +The feature vector must sum to a constant non\-negative number +for every token. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B property C +The non\-negative constant that all encoded feature vectors +will sum to. +.UNINDENT +.INDENT 7.0 +.TP +.B describe(f_id) +.INDENT 7.0 +.TP +.B Returns +A string describing the value of the joint\-feature +whose index in the generated feature vectors is \fBfid\fP\&. +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B encode(featureset, label) +Given a (featureset, label) pair, return the corresponding +vector of joint\-feature values. This vector is represented as +a list of \fB(index, value)\fP tuples, specifying the value of +each non\-zero joint\-feature. +.INDENT 7.0 +.TP +.B Return type +list(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B length() +.INDENT 7.0 +.TP +.B Returns +The size of the fixed\-length joint\-feature vectors +that are generated by this encoding. +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.classify.maxent.MaxentClassifier(encoding, weights, logarithmic=True) +Bases: \fI\%nltk.classify.api.ClassifierI\fP +.sp +A maximum entropy classifier (also known as a "conditional +exponential classifier"). This classifier is parameterized by a +set of "weights", which are used to combine the joint\-features +that are generated from a featureset by an "encoding". In +particular, the encoding maps each \fB(featureset, label)\fP pair to +a vector. The probability of each label is then computed using +the following equation: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C + dotprod(weights, encode(fs,label)) +prob(fs|label) = \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- + sum(dotprod(weights, encode(fs,l)) for l in labels) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Where \fBdotprod\fP is the dot product: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +dotprod(a,b) = sum(x*y for (x,y) in zip(a,b)) +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B ALGORITHMS = [\(aqGIS\(aq, \(aqIIS\(aq, \(aqMEGAM\(aq, \(aqTADM\(aq] +A list of the algorithm names that are accepted for the +\fBtrain()\fP method\(aqs \fBalgorithm\fP parameter. +.UNINDENT +.INDENT 7.0 +.TP +.B classify(featureset) +.INDENT 7.0 +.TP +.B Returns +the most appropriate label for the given featureset. +.TP +.B Return type +label +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B explain(featureset, columns=4) +Print a table showing the effect of each of the features in +the given feature set, and how they combine to determine the +probabilities of each label for that featureset. +.UNINDENT +.INDENT 7.0 +.TP +.B labels() +.INDENT 7.0 +.TP +.B Returns +the list of category labels used by this classifier. +.TP +.B Return type +list of (immutable) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B most_informative_features(n=10) +Generates the ranked list of informative features from most to least. +.UNINDENT +.INDENT 7.0 +.TP +.B prob_classify(featureset) +.INDENT 7.0 +.TP +.B Returns +a probability distribution over labels for the given +featureset. +.TP +.B Return type +ProbDistI +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B set_weights(new_weights) +Set the feature weight vector for this classifier. +:param new_weights: The new feature weight vector. +:type new_weights: list of float +.UNINDENT +.INDENT 7.0 +.TP +.B show_most_informative_features(n=10, show=\(aqall\(aq) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBshow\fP (\fIstr\fP) \-\- all, neg, or pos (for negative\-only or positive\-only) +.IP \(bu 2 +\fBn\fP (\fIint\fP) \-\- The no. of top features +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod train(train_toks, algorithm=None, trace=3, encoding=None, labels=None, gaussian_prior_sigma=0, **cutoffs) +Train a new maxent classifier based on the given corpus of +training samples. This classifier will have its weights +chosen to maximize entropy while remaining empirically +consistent with the training corpus. +.INDENT 7.0 +.TP +.B Return type +MaxentClassifier +.TP +.B Returns +The new maxent classifier +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrain_toks\fP (\fIlist\fP) \-\- Training data, represented as a list of +pairs, the first member of which is a featureset, +and the second of which is a classification label. +.IP \(bu 2 +\fBalgorithm\fP (\fIstr\fP) \-\- +.sp +A case\-insensitive string, specifying which +algorithm should be used to train the classifier. The +following algorithms are currently available. +.INDENT 2.0 +.IP \(bu 2 +Iterative Scaling Methods: Generalized Iterative Scaling (\fB\(aqGIS\(aq\fP), +Improved Iterative Scaling (\fB\(aqIIS\(aq\fP) +.IP \(bu 2 +External Libraries (requiring megam): +LM\-BFGS algorithm, with training performed by Megam (\fB\(aqmegam\(aq\fP) +.UNINDENT +.sp +The default algorithm is \fB\(aqIIS\(aq\fP\&. + +.IP \(bu 2 +\fBtrace\fP (\fIint\fP) \-\- The level of diagnostic tracing output to produce. +Higher values produce more verbose output. +.IP \(bu 2 +\fBencoding\fP (\fIMaxentFeatureEncodingI\fP) \-\- A feature encoding, used to convert featuresets +into feature vectors. If none is specified, then a +\fBBinaryMaxentFeatureEncoding\fP will be built based on the +features that are attested in the training corpus. +.IP \(bu 2 +\fBlabels\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The set of possible labels. If none is given, then +the set of all labels attested in the training data will be +used instead. +.IP \(bu 2 +\fBgaussian_prior_sigma\fP \-\- The sigma value for a gaussian +prior on model weights. Currently, this is supported by +\fBmegam\fP\&. For other algorithms, its value is ignored. +.IP \(bu 2 +\fBcutoffs\fP \-\- +.sp +Arguments specifying various conditions under +which the training should be halted. (Some of the cutoff +conditions are not supported by some algorithms.) +.INDENT 2.0 +.IP \(bu 2 +\fBmax_iter=v\fP: Terminate after \fBv\fP iterations. +.IP \(bu 2 +\fBmin_ll=v\fP: Terminate after the negative average +log\-likelihood drops under \fBv\fP\&. +.IP \(bu 2 +\fBmin_lldelta=v\fP: Terminate if a single iteration improves +log likelihood by less than \fBv\fP\&. +.UNINDENT + +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B weights() +.INDENT 7.0 +.TP +.B Returns +The feature weight vector for this classifier. +.TP +.B Return type +list of float +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.classify.maxent.MaxentFeatureEncodingI +Bases: \fBobject\fP +.sp +A mapping that converts a set of input\-feature values to a vector +of joint\-feature values, given a label. This conversion is +necessary to translate featuresets into a format that can be used +by maximum entropy models. +.sp +The set of joint\-features used by a given encoding is fixed, and +each index in the generated joint\-feature vectors corresponds to a +single joint\-feature. The length of the generated joint\-feature +vectors is therefore constant (for a given encoding). +.sp +Because the joint\-feature vectors generated by +\fBMaxentFeatureEncodingI\fP are typically very sparse, they are +represented as a list of \fB(index, value)\fP tuples, specifying the +value of each non\-zero joint\-feature. +.sp +Feature encodings are generally created using the \fBtrain()\fP +method, which generates an appropriate encoding based on the +input\-feature values and labels that are present in a given +corpus. +.INDENT 7.0 +.TP +.B describe(fid) +.INDENT 7.0 +.TP +.B Returns +A string describing the value of the joint\-feature +whose index in the generated feature vectors is \fBfid\fP\&. +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B encode(featureset, label) +Given a (featureset, label) pair, return the corresponding +vector of joint\-feature values. This vector is represented as +a list of \fB(index, value)\fP tuples, specifying the value of +each non\-zero joint\-feature. +.INDENT 7.0 +.TP +.B Return type +list(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B labels() +.INDENT 7.0 +.TP +.B Returns +A list of the "known labels" \-\- i.e., all labels +\fBl\fP such that \fBself.encode(fs,l)\fP can be a nonzero +joint\-feature vector for some value of \fBfs\fP\&. +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B length() +.INDENT 7.0 +.TP +.B Returns +The size of the fixed\-length joint\-feature vectors +that are generated by this encoding. +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B train(train_toks) +Construct and return new feature encoding, based on a given +training corpus \fBtrain_toks\fP\&. +.INDENT 7.0 +.TP +.B Parameters +\fBtrain_toks\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIdict\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP) \-\- Training data, represented as a list of +pairs, the first member of which is a feature dictionary, +and the second of which is a classification label. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.classify.maxent.TadmEventMaxentFeatureEncoding(labels, mapping, unseen_features=False, alwayson_features=False) +Bases: \fI\%nltk.classify.maxent.BinaryMaxentFeatureEncoding\fP +.INDENT 7.0 +.TP +.B describe(fid) +.INDENT 7.0 +.TP +.B Returns +A string describing the value of the joint\-feature +whose index in the generated feature vectors is \fBfid\fP\&. +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B encode(featureset, label) +Given a (featureset, label) pair, return the corresponding +vector of joint\-feature values. This vector is represented as +a list of \fB(index, value)\fP tuples, specifying the value of +each non\-zero joint\-feature. +.INDENT 7.0 +.TP +.B Return type +list(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B labels() +.INDENT 7.0 +.TP +.B Returns +A list of the "known labels" \-\- i.e., all labels +\fBl\fP such that \fBself.encode(fs,l)\fP can be a nonzero +joint\-feature vector for some value of \fBfs\fP\&. +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B length() +.INDENT 7.0 +.TP +.B Returns +The size of the fixed\-length joint\-feature vectors +that are generated by this encoding. +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod train(train_toks, count_cutoff=0, labels=None, **options) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.classify.maxent.TadmMaxentClassifier(encoding, weights, logarithmic=True) +Bases: \fI\%nltk.classify.maxent.MaxentClassifier\fP +.INDENT 7.0 +.TP +.B classmethod train(train_toks, **kwargs) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.classify.maxent.TypedMaxentFeatureEncoding(labels, mapping, unseen_features=False, alwayson_features=False) +Bases: \fI\%nltk.classify.maxent.MaxentFeatureEncodingI\fP +.sp +A feature encoding that generates vectors containing integer, +float and binary joint\-features of the form: +.sp +Binary (for string and boolean features): +.nf +joint_feat(fs, l) = { 1 if (fs[fname] == fval) and (l == label) +.in +2 +{ +{ 0 otherwise +.in -2 +.fi +.sp +.sp +Value (for integer and float features): +.nf +joint_feat(fs, l) = { fval if (fs[fname] == type(fval)) +.in +2 +{ and (l == label) +{ +{ not encoded otherwise +.in -2 +.fi +.sp +.sp +Where \fBfname\fP is the name of an input\-feature, \fBfval\fP is a value +for that input\-feature, and \fBlabel\fP is a label. +.sp +Typically, these features are constructed based on a training +corpus, using the \fBtrain()\fP method. +.sp +For string and boolean features [type(fval) not in (int, float)] +this method will create one feature for each combination of +\fBfname\fP, \fBfval\fP, and \fBlabel\fP that occurs at least once in the +training corpus. +.sp +For integer and float features [type(fval) in (int, float)] this +method will create one feature for each combination of \fBfname\fP +and \fBlabel\fP that occurs at least once in the training corpus. +.sp +For binary features the \fBunseen_features\fP parameter can be used +to add "unseen\-value features", which are used whenever an input +feature has a value that was not encountered in the training +corpus. These features have the form: +.nf +joint_feat(fs, l) = { 1 if is_unseen(fname, fs[fname]) +.in +2 +{ and l == label +{ +{ 0 otherwise +.in -2 +.fi +.sp +.sp +Where \fBis_unseen(fname, fval)\fP is true if the encoding does not +contain any joint features that are true when \fBfs[fname]==fval\fP\&. +.sp +The \fBalwayson_features\fP parameter can be used to add "always\-on +features", which have the form: +.nf +joint_feat(fs, l) = { 1 if (l == label) +.in +2 +{ +{ 0 otherwise +.in -2 +.fi +.sp +.sp +These always\-on features allow the maxent model to directly model +the prior probabilities of each label. +.INDENT 7.0 +.TP +.B describe(f_id) +.INDENT 7.0 +.TP +.B Returns +A string describing the value of the joint\-feature +whose index in the generated feature vectors is \fBfid\fP\&. +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B encode(featureset, label) +Given a (featureset, label) pair, return the corresponding +vector of joint\-feature values. This vector is represented as +a list of \fB(index, value)\fP tuples, specifying the value of +each non\-zero joint\-feature. +.INDENT 7.0 +.TP +.B Return type +list(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B labels() +.INDENT 7.0 +.TP +.B Returns +A list of the "known labels" \-\- i.e., all labels +\fBl\fP such that \fBself.encode(fs,l)\fP can be a nonzero +joint\-feature vector for some value of \fBfs\fP\&. +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B length() +.INDENT 7.0 +.TP +.B Returns +The size of the fixed\-length joint\-feature vectors +that are generated by this encoding. +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod train(train_toks, count_cutoff=0, labels=None, **options) +Construct and return new feature encoding, based on a given +training corpus \fBtrain_toks\fP\&. See the class description +\fBTypedMaxentFeatureEncoding\fP for a description of the +joint\-features that will be included in this encoding. +.sp +Note: recognized feature values types are (int, float), over +types are interpreted as regular binary features. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrain_toks\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIdict\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP) \-\- Training data, represented as a list of +pairs, the first member of which is a feature dictionary, +and the second of which is a classification label. +.IP \(bu 2 +\fBcount_cutoff\fP (\fIint\fP) \-\- A cutoff value that is used to discard +rare joint\-features. If a joint\-feature\(aqs value is 1 +fewer than \fBcount_cutoff\fP times in the training corpus, +then that joint\-feature is not included in the generated +encoding. +.IP \(bu 2 +\fBlabels\fP (\fIlist\fP) \-\- A list of labels that should be used by the +classifier. If not specified, then the set of labels +attested in \fBtrain_toks\fP will be used. +.IP \(bu 2 +\fBoptions\fP \-\- Extra parameters for the constructor, such as +\fBunseen_features\fP and \fBalwayson_features\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.maxent.calculate_deltas(train_toks, classifier, unattested, ffreq_empirical, nfmap, nfarray, nftranspose, encoding) +Calculate the update values for the classifier weights for +this iteration of IIS. These update weights are the value of +\fBdelta\fP that solves the equation: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +ffreq_empirical[i] + = +SUM[fs,l] (classifier.prob_classify(fs).prob(l) * + feature_vector(fs,l)[i] * + exp(delta[i] * nf(feature_vector(fs,l)))) +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Where: +.INDENT 7.0 +.IP \(bu 2 +\fI(fs,l)\fP is a (featureset, label) tuple from \fBtrain_toks\fP +.IP \(bu 2 +\fIfeature_vector(fs,l)\fP = \fBencoding.encode(fs,l)\fP +.IP \(bu 2 +\fInf(vector)\fP = \fBsum([val for (id,val) in vector])\fP +.UNINDENT +.UNINDENT +.sp +This method uses Newton\(aqs method to solve this equation for +\fIdelta[i]\fP\&. In particular, it starts with a guess of +\fBdelta[i]\fP = 1; and iteratively updates \fBdelta\fP with: +.nf +delta[i] \-= (ffreq_empirical[i] \- sum1[i])/(\-sum2[i]) +.fi +.sp +.sp +until convergence, where \fIsum1\fP and \fIsum2\fP are defined as: +.nf +sum1[i](delta) = SUM[fs,l] f[i](fs,l,delta) +sum2[i](delta) = SUM[fs,l] (f[i](fs,l,delta).nf(feature_vector(fs,l))) +f[i](fs,l,delta) = (classifier.prob_classify(fs).prob(l) . +.in +2 +feature_vector(fs,l)[i] . +exp(delta[i] . nf(feature_vector(fs,l)))) +.in -2 +.fi +.sp +.sp +Note that \fIsum1\fP and \fIsum2\fP depend on \fBdelta\fP; so they need +to be re\-computed each iteration. +.sp +The variables \fBnfmap\fP, \fBnfarray\fP, and \fBnftranspose\fP are +used to generate a dense encoding for \fInf(ltext)\fP\&. This +allows \fB_deltas\fP to calculate \fIsum1\fP and \fIsum2\fP using +matrices, which yields a significant performance improvement. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrain_toks\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIdict\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP) \-\- The set of training tokens. +.IP \(bu 2 +\fBclassifier\fP (\fIClassifierI\fP) \-\- The current classifier. +.IP \(bu 2 +\fBffreq_empirical\fP (\fIsequence of float\fP) \-\- An array containing the empirical +frequency for each feature. The \fIi\fPth element of this +array is the empirical frequency for feature \fIi\fP\&. +.IP \(bu 2 +\fBunattested\fP (\fIsequence of int\fP) \-\- An array that is 1 for features that are +not attested in the training data; and 0 for features that +are attested. In other words, \fBunattested[i]==0\fP iff +\fBffreq_empirical[i]==0\fP\&. +.IP \(bu 2 +\fBnfmap\fP (\fIdict\fP\fI(\fP\fIint \-> int\fP\fI)\fP) \-\- A map that can be used to compress \fBnf\fP to a dense +vector. +.IP \(bu 2 +\fBnfarray\fP (\fIarray\fP\fI(\fP\fIfloat\fP\fI)\fP) \-\- An array that can be used to uncompress \fBnf\fP +from a dense vector. +.IP \(bu 2 +\fBnftranspose\fP (\fIarray\fP\fI(\fP\fIfloat\fP\fI)\fP) \-\- The transpose of \fBnfarray\fP +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.maxent.calculate_empirical_fcount(train_toks, encoding) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.maxent.calculate_estimated_fcount(classifier, train_toks, encoding) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.maxent.calculate_nfmap(train_toks, encoding) +Construct a map that can be used to compress \fBnf\fP (which is +typically sparse). +.sp +\fInf(feature_vector)\fP is the sum of the feature values for +\fIfeature_vector\fP\&. +.sp +This represents the number of features that are active for a +given labeled text. This method finds all values of \fInf(t)\fP +that are attested for at least one token in the given list of +training tokens; and constructs a dictionary mapping these +attested values to a continuous range \fI0...N\fP\&. For example, +if the only values of \fInf()\fP that were attested were 3, 5, and +7, then \fB_nfmap\fP might return the dictionary \fB{3:0, 5:1, 7:2}\fP\&. +.INDENT 7.0 +.TP +.B Returns +A map that can be used to compress \fBnf\fP to a dense +vector. +.TP +.B Return type +dict(int \-> int) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.maxent.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.maxent.train_maxent_classifier_with_gis(train_toks, trace=3, encoding=None, labels=None, **cutoffs) +Train a new \fBConditionalExponentialClassifier\fP, using the given +training samples, using the Generalized Iterative Scaling +algorithm. This \fBConditionalExponentialClassifier\fP will encode +the model that maximizes entropy from all the models that are +empirically consistent with \fBtrain_toks\fP\&. +.INDENT 7.0 +.TP +.B See +\fBtrain_maxent_classifier()\fP for parameter descriptions. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.maxent.train_maxent_classifier_with_iis(train_toks, trace=3, encoding=None, labels=None, **cutoffs) +Train a new \fBConditionalExponentialClassifier\fP, using the given +training samples, using the Improved Iterative Scaling algorithm. +This \fBConditionalExponentialClassifier\fP will encode the model +that maximizes entropy from all the models that are empirically +consistent with \fBtrain_toks\fP\&. +.INDENT 7.0 +.TP +.B See +\fBtrain_maxent_classifier()\fP for parameter descriptions. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.maxent.train_maxent_classifier_with_megam(train_toks, trace=3, encoding=None, labels=None, gaussian_prior_sigma=0, **kwargs) +Train a new \fBConditionalExponentialClassifier\fP, using the given +training samples, using the external \fBmegam\fP library. This +\fBConditionalExponentialClassifier\fP will encode the model that +maximizes entropy from all the models that are empirically +consistent with \fBtrain_toks\fP\&. +.INDENT 7.0 +.TP +.B See +\fBtrain_maxent_classifier()\fP for parameter descriptions. +.TP +.B See +\fBnltk.classify.megam\fP +.UNINDENT +.UNINDENT +.SS nltk.classify.megam module +.sp +A set of functions used to interface with the external \fI\%megam\fP maxent +optimization package. Before megam can be used, you should tell NLTK where it +can find the megam binary, using the \fBconfig_megam()\fP function. Typical +usage: +.sp +.nf +.ft C +>>> from nltk.classify import megam +>>> megam.config_megam() # pass path to megam if not found in PATH +[Found megam: ...] +.ft P +.fi +.sp +Use with MaxentClassifier. Example below, see MaxentClassifier documentation +for details. +.INDENT 0.0 +.INDENT 3.5 +nltk.classify.MaxentClassifier.train(corpus, \(aqmegam\(aq) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.megam.call_megam(args) +Call the \fBmegam\fP binary with the given arguments. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.megam.config_megam(bin=None) +Configure NLTK\(aqs interface to the \fBmegam\fP maxent optimization +package. +.INDENT 7.0 +.TP +.B Parameters +\fBbin\fP (\fIstr\fP) \-\- The full path to the \fBmegam\fP binary. If not specified, +then nltk will search the system for a \fBmegam\fP binary; and if +one is not found, it will raise a \fBLookupError\fP exception. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.megam.parse_megam_weights(s, features_count, explicit=True) +Given the stdout output generated by \fBmegam\fP when training a +model, return a \fBnumpy\fP array containing the corresponding weight +vector. This function does not currently handle bias features. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.megam.write_megam_file(train_toks, encoding, stream, bernoulli=True, explicit=True) +Generate an input file for \fBmegam\fP based on the given corpus of +classified tokens. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrain_toks\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIdict\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP) \-\- Training data, represented as a list of +pairs, the first member of which is a feature dictionary, +and the second of which is a classification label. +.IP \(bu 2 +\fBencoding\fP (\fIMaxentFeatureEncodingI\fP) \-\- A feature encoding, used to convert featuresets +into feature vectors. May optionally implement a cost() method +in order to assign different costs to different class predictions. +.IP \(bu 2 +\fBstream\fP (\fIstream\fP) \-\- The stream to which the megam input file should be +written. +.IP \(bu 2 +\fBbernoulli\fP \-\- If true, then use the \(aqbernoulli\(aq format. I.e., +all joint features have binary values, and are listed iff they +are true. Otherwise, list feature values explicitly. If +\fBbernoulli=False\fP, then you must call \fBmegam\fP with the +\fB\-fvals\fP option. +.IP \(bu 2 +\fBexplicit\fP \-\- If true, then use the \(aqexplicit\(aq format. I.e., +list the features that would fire for any of the possible +labels, for each token. If \fBexplicit=True\fP, then you must +call \fBmegam\fP with the \fB\-explicit\fP option. +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.classify.naivebayes module +.sp +A classifier based on the Naive Bayes algorithm. In order to find the +probability for a label, this algorithm first uses the Bayes rule to +express P(label|features) in terms of P(label) and P(features|label): +.nf +.in +2 +P(label) * P(features|label) +.in -2 +P(label|features) = \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- +.in +2 +P(features) +.in -2 +.fi +.sp +.sp +The algorithm then makes the \(aqnaive\(aq assumption that all features are +independent, given the label: +.nf +.in +2 +P(label) * P(f1|label) * ... * P(fn|label) +.in -2 +P(label|features) = \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- +.in +2 +P(features) +.in -2 +.fi +.sp +.sp +Rather than computing P(features) explicitly, the algorithm just +calculates the numerator for each label, and normalizes them so they +sum to one: +.nf +.in +2 +P(label) * P(f1|label) * ... * P(fn|label) +.in -2 +P(label|features) = \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- +.in +2 +SUM[l]( P(l) * P(f1|l) * ... * P(fn|l) ) +.in -2 +.fi +.sp +.INDENT 0.0 +.TP +.B class nltk.classify.naivebayes.NaiveBayesClassifier(label_probdist, feature_probdist) +Bases: \fI\%nltk.classify.api.ClassifierI\fP +.sp +A Naive Bayes classifier. Naive Bayes classifiers are +paramaterized by two probability distributions: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +P(label) gives the probability that an input will receive each +label, given no information about the input\(aqs features. +.IP \(bu 2 +P(fname=fval|label) gives the probability that a given feature +(fname) will receive a given value (fval), given that the +label (label). +.UNINDENT +.UNINDENT +.UNINDENT +.sp +If the classifier encounters an input with a feature that has +never been seen with any label, then rather than assigning a +probability of 0 to all labels, it will ignore that feature. +.sp +The feature value \(aqNone\(aq is reserved for unseen feature values; +you generally should not use \(aqNone\(aq as a feature value for one of +your own features. +.INDENT 7.0 +.TP +.B classify(featureset) +.INDENT 7.0 +.TP +.B Returns +the most appropriate label for the given featureset. +.TP +.B Return type +label +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B labels() +.INDENT 7.0 +.TP +.B Returns +the list of category labels used by this classifier. +.TP +.B Return type +list of (immutable) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B most_informative_features(n=100) +Return a list of the \(aqmost informative\(aq features used by this +classifier. For the purpose of this function, the +informativeness of a feature \fB(fname,fval)\fP is equal to the +highest value of P(fname=fval|label), for any label, divided by +the lowest value of P(fname=fval|label), for any label: +.nf +max[ P(fname=fval|label1) / P(fname=fval|label2) ] +.fi +.sp +.UNINDENT +.INDENT 7.0 +.TP +.B prob_classify(featureset) +.INDENT 7.0 +.TP +.B Returns +a probability distribution over labels for the given +featureset. +.TP +.B Return type +ProbDistI +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B show_most_informative_features(n=10) +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod train(labeled_featuresets, estimator=) +.INDENT 7.0 +.TP +.B Parameters +\fBlabeled_featuresets\fP \-\- A list of classified featuresets, +i.e., a list of tuples \fB(featureset, label)\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.naivebayes.demo() +.UNINDENT +.SS nltk.classify.positivenaivebayes module +.sp +A variant of the Naive Bayes Classifier that performs binary classification with +partially\-labeled training sets. In other words, assume we want to build a classifier +that assigns each example to one of two complementary classes (e.g., male names and +female names). +If we have a training set with labeled examples for both classes, we can use a +standard Naive Bayes Classifier. However, consider the case when we only have labeled +examples for one of the classes, and other, unlabeled, examples. +Then, assuming a prior distribution on the two labels, we can use the unlabeled set +to estimate the frequencies of the various features. +.sp +Let the two possible labels be 1 and 0, and let\(aqs say we only have examples labeled 1 +and unlabeled examples. We are also given an estimate of P(1). +.sp +We compute P(feature|1) exactly as in the standard case. +.sp +To compute P(feature|0), we first estimate P(feature) from the unlabeled set (we are +assuming that the unlabeled examples are drawn according to the given prior distribution) +and then express the conditional probability as: +.nf +.in +2 +P(feature) \- P(feature|1) * P(1) +.in -2 +P(feature|0) = \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- +.in +2 +P(0) +.in -2 +.fi +.sp +.sp +Example: +.sp +.nf +.ft C +>>> from nltk.classify import PositiveNaiveBayesClassifier +.ft P +.fi +.sp +Some sentences about sports: +.sp +.nf +.ft C +>>> sports_sentences = [ \(aqThe team dominated the game\(aq, +\&... \(aqThey lost the ball\(aq, +\&... \(aqThe game was intense\(aq, +\&... \(aqThe goalkeeper catched the ball\(aq, +\&... \(aqThe other team controlled the ball\(aq ] +.ft P +.fi +.sp +Mixed topics, including sports: +.sp +.nf +.ft C +>>> various_sentences = [ \(aqThe President did not comment\(aq, +\&... \(aqI lost the keys\(aq, +\&... \(aqThe team won the game\(aq, +\&... \(aqSara has two kids\(aq, +\&... \(aqThe ball went off the court\(aq, +\&... \(aqThey had the ball for the whole game\(aq, +\&... \(aqThe show is over\(aq ] +.ft P +.fi +.sp +The features of a sentence are simply the words it contains: +.sp +.nf +.ft C +>>> def features(sentence): +\&... words = sentence.lower().split() +\&... return dict((\(aqcontains(%s)\(aq % w, True) for w in words) +.ft P +.fi +.sp +We use the sports sentences as positive examples, the mixed ones ad unlabeled examples: +.sp +.nf +.ft C +>>> positive_featuresets = map(features, sports_sentences) +>>> unlabeled_featuresets = map(features, various_sentences) +>>> classifier = PositiveNaiveBayesClassifier.train(positive_featuresets, +\&... unlabeled_featuresets) +.ft P +.fi +.sp +Is the following sentence about sports? +.sp +.nf +.ft C +>>> classifier.classify(features(\(aqThe cat is on the table\(aq)) +False +.ft P +.fi +.sp +What about this one? +.sp +.nf +.ft C +>>> classifier.classify(features(\(aqMy team lost the game\(aq)) +True +.ft P +.fi +.INDENT 0.0 +.TP +.B class nltk.classify.positivenaivebayes.PositiveNaiveBayesClassifier(label_probdist, feature_probdist) +Bases: \fI\%nltk.classify.naivebayes.NaiveBayesClassifier\fP +.INDENT 7.0 +.TP +.B static train(positive_featuresets, unlabeled_featuresets, positive_prob_prior=0.5, estimator=) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBpositive_featuresets\fP \-\- An iterable of featuresets that are known as positive +examples (i.e., their label is \fBTrue\fP). +.IP \(bu 2 +\fBunlabeled_featuresets\fP \-\- An iterable of featuresets whose label is unknown. +.IP \(bu 2 +\fBpositive_prob_prior\fP \-\- A prior estimate of the probability of the label +\fBTrue\fP (default 0.5). +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.positivenaivebayes.demo() +.UNINDENT +.SS nltk.classify.rte_classify module +.sp +Simple classifier for RTE corpus. +.sp +It calculates the overlap in words and named entities between text and +hypothesis, and also whether there are words / named entities in the +hypothesis which fail to occur in the text, since this is an indicator that +the hypothesis is more informative than (i.e not entailed by) the text. +.sp +TO DO: better Named Entity classification +TO DO: add lemmatization +.INDENT 0.0 +.TP +.B class nltk.classify.rte_classify.RTEFeatureExtractor(rtepair, stop=True, use_lemmatize=False) +Bases: \fBobject\fP +.sp +This builds a bag of words for both the text and the hypothesis after +throwing away some stopwords, then calculates overlap and difference. +.INDENT 7.0 +.TP +.B hyp_extra(toktype, debug=True) +Compute the extraneous material in the hypothesis. +.INDENT 7.0 +.TP +.B Parameters +\fBtoktype\fP (\fI\(aqne\(aq\fP\fI or \fP\fI\(aqword\(aq\fP) \-\- distinguish Named Entities from ordinary words +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B overlap(toktype, debug=False) +Compute the overlap between text and hypothesis. +.INDENT 7.0 +.TP +.B Parameters +\fBtoktype\fP (\fI\(aqne\(aq\fP\fI or \fP\fI\(aqword\(aq\fP) \-\- distinguish Named Entities from ordinary words +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.rte_classify.rte_classifier(algorithm, sample_N=None) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.rte_classify.rte_features(rtepair) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.rte_classify.rte_featurize(rte_pairs) +.UNINDENT +.SS nltk.classify.scikitlearn module +.sp +scikit\-learn (\fI\%http://scikit\-learn.org\fP) is a machine learning library for +Python. It supports many classification algorithms, including SVMs, +Naive Bayes, logistic regression (MaxEnt) and decision trees. +.sp +This package implements a wrapper around scikit\-learn classifiers. To use this +wrapper, construct a scikit\-learn estimator object, then use that to construct +a SklearnClassifier. E.g., to wrap a linear SVM with default settings: +.sp +.nf +.ft C +>>> from sklearn.svm import LinearSVC +>>> from nltk.classify.scikitlearn import SklearnClassifier +>>> classif = SklearnClassifier(LinearSVC()) +.ft P +.fi +.sp +A scikit\-learn classifier may include preprocessing steps when it\(aqs wrapped +in a Pipeline object. The following constructs and wraps a Naive Bayes text +classifier with tf\-idf weighting and chi\-square feature selection to get the +best 1000 features: +.sp +.nf +.ft C +>>> from sklearn.feature_extraction.text import TfidfTransformer +>>> from sklearn.feature_selection import SelectKBest, chi2 +>>> from sklearn.naive_bayes import MultinomialNB +>>> from sklearn.pipeline import Pipeline +>>> pipeline = Pipeline([(\(aqtfidf\(aq, TfidfTransformer()), +\&... (\(aqchi2\(aq, SelectKBest(chi2, k=1000)), +\&... (\(aqnb\(aq, MultinomialNB())]) +>>> classif = SklearnClassifier(pipeline) +.ft P +.fi +.INDENT 0.0 +.TP +.B class nltk.classify.scikitlearn.SklearnClassifier(estimator, dtype=, sparse=True) +Bases: \fI\%nltk.classify.api.ClassifierI\fP +.sp +Wrapper for scikit\-learn classifiers. +.INDENT 7.0 +.TP +.B classify_many(featuresets) +Classify a batch of samples. +.INDENT 7.0 +.TP +.B Parameters +\fBfeaturesets\fP \-\- An iterable over featuresets, each a dict mapping +strings to either numbers, booleans or strings. +.TP +.B Returns +The predicted class label for each input sample. +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B labels() +The class labels used by this classifier. +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob_classify_many(featuresets) +Compute per\-class probabilities for a batch of samples. +.INDENT 7.0 +.TP +.B Parameters +\fBfeaturesets\fP \-\- An iterable over featuresets, each a dict mapping +strings to either numbers, booleans or strings. +.TP +.B Return type +list of \fBProbDistI\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B train(labeled_featuresets) +Train (fit) the scikit\-learn estimator. +.INDENT 7.0 +.TP +.B Parameters +\fBlabeled_featuresets\fP \-\- A list of \fB(featureset, label)\fP +where each \fBfeatureset\fP is a dict mapping strings to either +numbers, booleans or strings. +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.classify.senna module +.sp +A general interface to the SENNA pipeline that supports any of the +operations specified in SUPPORTED_OPERATIONS. +.sp +Applying multiple operations at once has the speed advantage. For example, +Senna will automatically determine POS tags if you are extracting named +entities. Applying both of the operations will cost only the time of +extracting the named entities. +.sp +The SENNA pipeline has a fixed maximum size of the sentences that it can read. +By default it is 1024 token/sentence. If you have larger sentences, changing +the MAX_SENTENCE_SIZE value in SENNA_main.c should be considered and your +system specific binary should be rebuilt. Otherwise this could introduce +misalignment errors. +.sp +The input is: +\- path to the directory that contains SENNA executables. If the path is incorrect, +.INDENT 0.0 +.INDENT 3.5 +Senna will automatically search for executable file specified in SENNA environment variable +.UNINDENT +.UNINDENT +.INDENT 0.0 +.IP \(bu 2 +List of the operations needed to be performed. +.IP \(bu 2 +(optionally) the encoding of the input data (default:utf\-8) +.UNINDENT +.sp +Note: Unit tests for this module can be found in test/unit/test_senna.py +.sp +.nf +.ft C +>>> from nltk.classify import Senna +>>> pipeline = Senna(\(aq/usr/share/senna\-v3.0\(aq, [\(aqpos\(aq, \(aqchk\(aq, \(aqner\(aq]) +>>> sent = \(aqDusseldorf is an international business center\(aq.split() +>>> [(token[\(aqword\(aq], token[\(aqchk\(aq], token[\(aqner\(aq], token[\(aqpos\(aq]) for token in pipeline.tag(sent)] +[(\(aqDusseldorf\(aq, \(aqB\-NP\(aq, \(aqB\-LOC\(aq, \(aqNNP\(aq), (\(aqis\(aq, \(aqB\-VP\(aq, \(aqO\(aq, \(aqVBZ\(aq), (\(aqan\(aq, \(aqB\-NP\(aq, \(aqO\(aq, \(aqDT\(aq), +(\(aqinternational\(aq, \(aqI\-NP\(aq, \(aqO\(aq, \(aqJJ\(aq), (\(aqbusiness\(aq, \(aqI\-NP\(aq, \(aqO\(aq, \(aqNN\(aq), (\(aqcenter\(aq, \(aqI\-NP\(aq, \(aqO\(aq, \(aqNN\(aq)] +.ft P +.fi +.INDENT 0.0 +.TP +.B class nltk.classify.senna.Senna(senna_path, operations, encoding=\(aqutf\-8\(aq) +Bases: \fBnltk.tag.api.TaggerI\fP +.INDENT 7.0 +.TP +.B SUPPORTED_OPERATIONS = [\(aqpos\(aq, \(aqchk\(aq, \(aqner\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B executable(base_path) +The function that determines the system specific binary that should be +used in the pipeline. In case, the system is not known the default senna binary will +be used. +.UNINDENT +.INDENT 7.0 +.TP +.B tag(tokens) +Applies the specified operation(s) on a list of tokens. +.UNINDENT +.INDENT 7.0 +.TP +.B tag_sents(sentences) +Applies the tag method over a list of sentences. This method will return a +list of dictionaries. Every dictionary will contain a word with its +calculated annotations/tags. +.UNINDENT +.UNINDENT +.SS nltk.classify.svm module +.sp +nltk.classify.svm was deprecated. For classification based +on support vector machines SVMs use nltk.classify.scikitlearn +(or \fI\%scikit\-learn\fP directly). +.INDENT 0.0 +.TP +.B class nltk.classify.svm.SvmClassifier(*args, **kwargs) +Bases: \fBobject\fP +.UNINDENT +.SS nltk.classify.tadm module +.INDENT 0.0 +.TP +.B nltk.classify.tadm.call_tadm(args) +Call the \fBtadm\fP binary with the given arguments. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.tadm.config_tadm(bin=None) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.tadm.encoding_demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.tadm.names_demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.tadm.parse_tadm_weights(paramfile) +Given the stdout output generated by \fBtadm\fP when training a +model, return a \fBnumpy\fP array containing the corresponding weight +vector. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.tadm.write_tadm_file(train_toks, encoding, stream) +Generate an input file for \fBtadm\fP based on the given corpus of +classified tokens. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrain_toks\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIdict\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP) \-\- Training data, represented as a list of +pairs, the first member of which is a feature dictionary, +and the second of which is a classification label. +.IP \(bu 2 +\fBencoding\fP (\fITadmEventMaxentFeatureEncoding\fP) \-\- A feature encoding, used to convert featuresets +into feature vectors. +.IP \(bu 2 +\fBstream\fP (\fIstream\fP) \-\- The stream to which the \fBtadm\fP input file should be +written. +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.classify.textcat module +.sp +A module for language identification using the TextCat algorithm. +An implementation of the text categorization algorithm +presented in Cavnar, W. B. and J. M. Trenkle, +"N\-Gram\-Based Text Categorization". +.sp +The algorithm takes advantage of Zipf\(aqs law and uses +n\-gram frequencies to profile languages and text\-yet to +be identified\-then compares using a distance measure. +.sp +Language n\-grams are provided by the "An Crubadan" +project. A corpus reader was created separately to read +those files. +.sp +For details regarding the algorithm, see: +\fI\%http://www.let.rug.nl/~vannoord/TextCat/textcat.pdf\fP +.sp +For details about An Crubadan, see: +\fI\%http://borel.slu.edu/crubadan/index.html\fP +.INDENT 0.0 +.TP +.B class nltk.classify.textcat.TextCat +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B calc_dist(lang, trigram, text_profile) +Calculate the "out\-of\-place" measure between the +text and language profile for a single trigram +.UNINDENT +.INDENT 7.0 +.TP +.B fingerprints = {} +.UNINDENT +.INDENT 7.0 +.TP +.B guess_language(text) +Find the language with the min distance +to the text and return its ISO 639\-3 code +.UNINDENT +.INDENT 7.0 +.TP +.B lang_dists(text) +Calculate the "out\-of\-place" measure between +the text and all languages +.UNINDENT +.INDENT 7.0 +.TP +.B last_distances = {} +.UNINDENT +.INDENT 7.0 +.TP +.B profile(text) +Create FreqDist of trigrams within text +.UNINDENT +.INDENT 7.0 +.TP +.B remove_punctuation(text) +Get rid of punctuation except apostrophes +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.textcat.demo() +.UNINDENT +.SS nltk.classify.util module +.sp +Utility functions and classes for classifiers. +.INDENT 0.0 +.TP +.B class nltk.classify.util.CutoffChecker(cutoffs) +Bases: \fBobject\fP +.sp +A helper class that implements cutoff checks based on number of +iterations and log likelihood. +.sp +Accuracy cutoffs are also implemented, but they\(aqre almost never +a good idea to use. +.INDENT 7.0 +.TP +.B check(classifier, train_toks) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.util.accuracy(classifier, gold) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.util.apply_features(feature_func, toks, labeled=None) +Use the \fBLazyMap\fP class to construct a lazy list\-like +object that is analogous to \fBmap(feature_func, toks)\fP\&. In +particular, if \fBlabeled=False\fP, then the returned list\-like +object\(aqs values are equal to: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +[feature_func(tok) for tok in toks] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +If \fBlabeled=True\fP, then the returned list\-like object\(aqs values +are equal to: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +[(feature_func(tok), label) for (tok, label) in toks] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The primary purpose of this function is to avoid the memory +overhead involved in storing all the featuresets for every token +in a corpus. Instead, these featuresets are constructed lazily, +as\-needed. The reduction in memory overhead can be especially +significant when the underlying list of tokens is itself lazy (as +is the case with many corpus readers). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfeature_func\fP \-\- The function that will be applied to each +token. It should return a featureset \-\- i.e., a dict +mapping feature names to feature values. +.IP \(bu 2 +\fBtoks\fP \-\- The list of tokens to which \fBfeature_func\fP should be +applied. If \fBlabeled=True\fP, then the list elements will be +passed directly to \fBfeature_func()\fP\&. If \fBlabeled=False\fP, +then the list elements should be tuples \fB(tok,label)\fP, and +\fBtok\fP will be passed to \fBfeature_func()\fP\&. +.IP \(bu 2 +\fBlabeled\fP \-\- If true, then \fBtoks\fP contains labeled tokens \-\- +i.e., tuples of the form \fB(tok, label)\fP\&. (Default: +auto\-detect based on types.) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.util.attested_labels(tokens) +.INDENT 7.0 +.TP +.B Returns +A list of all labels that are attested in the given list +of tokens. +.TP +.B Return type +list of (immutable) +.TP +.B Parameters +\fBtokens\fP (\fIlist\fP) \-\- The list of classified tokens from which to extract +labels. A classified token has the form \fB(token, label)\fP\&. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.util.binary_names_demo_features(name) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.util.check_megam_config() +Checks whether the MEGAM binary is configured. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.util.log_likelihood(classifier, gold) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.util.names_demo(trainer, features=) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.util.names_demo_features(name) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.util.partial_names_demo(trainer, features=) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.util.wsd_demo(trainer, word, features, n=1000) +.UNINDENT +.SS nltk.classify.weka module +.sp +Classifiers that make use of the external \(aqWeka\(aq package. +.INDENT 0.0 +.TP +.B class nltk.classify.weka.ARFF_Formatter(labels, features) +Bases: \fBobject\fP +.sp +Converts featuresets and labeled featuresets to ARFF\-formatted +strings, appropriate for input into Weka. +.sp +Features and classes can be specified manually in the constructor, or may +be determined from data using \fBfrom_train\fP\&. +.INDENT 7.0 +.TP +.B data_section(tokens, labeled=None) +Returns the ARFF data section for the given data. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP \-\- a list of featuresets (dicts) or labelled featuresets +which are tuples (featureset, label). +.IP \(bu 2 +\fBlabeled\fP \-\- Indicates whether the given tokens are labeled +or not. If None, then the tokens will be assumed to be +labeled if the first token\(aqs value is a tuple or list. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B format(tokens) +Returns a string representation of ARFF output for the given data. +.UNINDENT +.INDENT 7.0 +.TP +.B static from_train(tokens) +Constructs an ARFF_Formatter instance with class labels and feature +types determined from the given data. Handles boolean, numeric and +string (note: not nominal) types. +.UNINDENT +.INDENT 7.0 +.TP +.B header_section() +Returns an ARFF header as a string. +.UNINDENT +.INDENT 7.0 +.TP +.B labels() +Returns the list of classes. +.UNINDENT +.INDENT 7.0 +.TP +.B write(outfile, tokens) +Writes ARFF data to a file for the given data. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.classify.weka.WekaClassifier(formatter, model_filename) +Bases: \fI\%nltk.classify.api.ClassifierI\fP +.INDENT 7.0 +.TP +.B classify_many(featuresets) +Apply \fBself.classify()\fP to each element of \fBfeaturesets\fP\&. I.e.: +.INDENT 7.0 +.INDENT 3.5 +return [self.classify(fs) for fs in featuresets] +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Return type +list(label) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse_weka_distribution(s) +.UNINDENT +.INDENT 7.0 +.TP +.B parse_weka_output(lines) +.UNINDENT +.INDENT 7.0 +.TP +.B prob_classify_many(featuresets) +Apply \fBself.prob_classify()\fP to each element of \fBfeaturesets\fP\&. I.e.: +.INDENT 7.0 +.INDENT 3.5 +return [self.prob_classify(fs) for fs in featuresets] +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Return type +list(ProbDistI) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod train(model_filename, featuresets, classifier=\(aqnaivebayes\(aq, options=[], quiet=True) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.classify.weka.config_weka(classpath=None) +.UNINDENT +.SS Module contents +.sp +Classes and interfaces for labeling tokens with category labels (or +"class labels"). Typically, labels are represented with strings +(such as \fB\(aqhealth\(aq\fP or \fB\(aqsports\(aq\fP). Classifiers can be used to +perform a wide range of classification tasks. For example, +classifiers can be used... +.INDENT 0.0 +.IP \(bu 2 +to classify documents by topic +.IP \(bu 2 +to classify ambiguous words by which word sense is intended +.IP \(bu 2 +to classify acoustic signals by which phoneme they represent +.IP \(bu 2 +to classify sentences by their author +.UNINDENT +.SS Features +.sp +In order to decide which category label is appropriate for a given +token, classifiers examine one or more \(aqfeatures\(aq of the token. These +"features" are typically chosen by hand, and indicate which aspects +of the token are relevant to the classification decision. For +example, a document classifier might use a separate feature for each +word, recording how often that word occurred in the document. +.SS Featuresets +.sp +The features describing a token are encoded using a "featureset", +which is a dictionary that maps from "feature names" to "feature +values". Feature names are unique strings that indicate what aspect +of the token is encoded by the feature. Examples include +\fB\(aqprevword\(aq\fP, for a feature whose value is the previous word; and +\fB\(aqcontains\-word(library)\(aq\fP for a feature that is true when a document +contains the word \fB\(aqlibrary\(aq\fP\&. Feature values are typically +booleans, numbers, or strings, depending on which feature they +describe. +.sp +Featuresets are typically constructed using a "feature detector" +(also known as a "feature extractor"). A feature detector is a +function that takes a token (and sometimes information about its +context) as its input, and returns a featureset describing that token. +For example, the following feature detector converts a document +(stored as a list of words) to a featureset describing the set of +words included in the document: +.sp +.nf +.ft C +>>> # Define a feature detector function. +>>> def document_features(document): +\&... return dict([(\(aqcontains\-word(%s)\(aq % w, True) for w in document]) +.ft P +.fi +.sp +Feature detectors are typically applied to each token before it is fed +to the classifier: +.sp +.nf +.ft C +>>> # Classify each Gutenberg document. +>>> from nltk.corpus import gutenberg +>>> for fileid in gutenberg.fileids(): +\&... doc = gutenberg.words(fileid) +\&... print(fileid, classifier.classify(document_features(doc))) +.ft P +.fi +.sp +The parameters that a feature detector expects will vary, depending on +the task and the needs of the feature detector. For example, a +feature detector for word sense disambiguation (WSD) might take as its +input a sentence, and the index of a word that should be classified, +and return a featureset for that word. The following feature detector +for WSD includes features describing the left and right contexts of +the target word: +.sp +.nf +.ft C +>>> def wsd_features(sentence, index): +\&... featureset = {} +\&... for i in range(max(0, index\-3), index): +\&... featureset[\(aqleft\-context(%s)\(aq % sentence[i]] = True +\&... for i in range(index, max(index+3, len(sentence))): +\&... featureset[\(aqright\-context(%s)\(aq % sentence[i]] = True +\&... return featureset +.ft P +.fi +.SS Training Classifiers +.sp +Most classifiers are built by training them on a list of hand\-labeled +examples, known as the "training set". Training sets are represented +as lists of \fB(featuredict, label)\fP tuples. +.SS nltk.cluster package +.SS Submodules +.SS nltk.cluster.api module +.INDENT 0.0 +.TP +.B class nltk.cluster.api.ClusterI +Bases: \fBobject\fP +.sp +Interface covering basic clustering functionality. +.INDENT 7.0 +.TP +.B classification_probdist(vector) +Classifies the token into a cluster, returning +a probability distribution over the cluster identifiers. +.UNINDENT +.INDENT 7.0 +.TP +.B abstract classify(token) +Classifies the token into a cluster, setting the token\(aqs CLUSTER +parameter to that cluster identifier. +.UNINDENT +.INDENT 7.0 +.TP +.B abstract cluster(vectors, assign_clusters=False) +Assigns the vectors to clusters, learning the clustering parameters +from the data. Returns a cluster identifier for each vector. +.UNINDENT +.INDENT 7.0 +.TP +.B cluster_name(index) +Returns the names of the cluster at index. +.UNINDENT +.INDENT 7.0 +.TP +.B cluster_names() +Returns the names of the clusters. +:rtype: list +.UNINDENT +.INDENT 7.0 +.TP +.B likelihood(vector, label) +Returns the likelihood (a float) of the token having the +corresponding cluster. +.UNINDENT +.INDENT 7.0 +.TP +.B abstract num_clusters() +Returns the number of clusters. +.UNINDENT +.UNINDENT +.SS nltk.cluster.em module +.INDENT 0.0 +.TP +.B class nltk.cluster.em.EMClusterer(initial_means, priors=None, covariance_matrices=None, conv_threshold=1e\-06, bias=0.1, normalise=False, svd_dimensions=None) +Bases: \fI\%nltk.cluster.util.VectorSpaceClusterer\fP +.sp +The Gaussian EM clusterer models the vectors as being produced by +a mixture of k Gaussian sources. The parameters of these sources +(prior probability, mean and covariance matrix) are then found to +maximise the likelihood of the given data. This is done with the +expectation maximisation algorithm. It starts with k arbitrarily +chosen means, priors and covariance matrices. It then calculates +the membership probabilities for each vector in each of the +clusters; this is the \(aqE\(aq step. The cluster parameters are then +updated in the \(aqM\(aq step using the maximum likelihood estimate from +the cluster membership probabilities. This process continues until +the likelihood of the data does not significantly increase. +.INDENT 7.0 +.TP +.B classify_vectorspace(vector) +Returns the index of the appropriate cluster for the vector. +.UNINDENT +.INDENT 7.0 +.TP +.B cluster_vectorspace(vectors, trace=False) +Finds the clusters using the given set of vectors. +.UNINDENT +.INDENT 7.0 +.TP +.B likelihood_vectorspace(vector, cluster) +Returns the likelihood of the vector belonging to the cluster. +.UNINDENT +.INDENT 7.0 +.TP +.B num_clusters() +Returns the number of clusters. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.cluster.em.demo() +Non\-interactive demonstration of the clusterers with simple 2\-D data. +.UNINDENT +.SS nltk.cluster.gaac module +.INDENT 0.0 +.TP +.B class nltk.cluster.gaac.GAAClusterer(num_clusters=1, normalise=True, svd_dimensions=None) +Bases: \fI\%nltk.cluster.util.VectorSpaceClusterer\fP +.sp +The Group Average Agglomerative starts with each of the N vectors as singleton +clusters. It then iteratively merges pairs of clusters which have the +closest centroids. This continues until there is only one cluster. The +order of merges gives rise to a dendrogram: a tree with the earlier merges +lower than later merges. The membership of a given number of clusters c, 1 +<= c <= N, can be found by cutting the dendrogram at depth c. +.sp +This clusterer uses the cosine similarity metric only, which allows for +efficient speed\-up in the clustering process. +.INDENT 7.0 +.TP +.B classify_vectorspace(vector) +Returns the index of the appropriate cluster for the vector. +.UNINDENT +.INDENT 7.0 +.TP +.B cluster(vectors, assign_clusters=False, trace=False) +Assigns the vectors to clusters, learning the clustering parameters +from the data. Returns a cluster identifier for each vector. +.UNINDENT +.INDENT 7.0 +.TP +.B cluster_vectorspace(vectors, trace=False) +Finds the clusters using the given set of vectors. +.UNINDENT +.INDENT 7.0 +.TP +.B dendrogram() +.INDENT 7.0 +.TP +.B Returns +The dendrogram representing the current clustering +.TP +.B Return type +Dendrogram +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B num_clusters() +Returns the number of clusters. +.UNINDENT +.INDENT 7.0 +.TP +.B update_clusters(num_clusters) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.cluster.gaac.demo() +Non\-interactive demonstration of the clusterers with simple 2\-D data. +.UNINDENT +.SS nltk.cluster.kmeans module +.INDENT 0.0 +.TP +.B class nltk.cluster.kmeans.KMeansClusterer(num_means, distance, repeats=1, conv_test=1e\-06, initial_means=None, normalise=False, svd_dimensions=None, rng=None, avoid_empty_clusters=False) +Bases: \fI\%nltk.cluster.util.VectorSpaceClusterer\fP +.sp +The K\-means clusterer starts with k arbitrary chosen means then allocates +each vector to the cluster with the closest mean. It then recalculates the +means of each cluster as the centroid of the vectors in the cluster. This +process repeats until the cluster memberships stabilise. This is a +hill\-climbing algorithm which may converge to a local maximum. Hence the +clustering is often repeated with random initial means and the most +commonly occurring output means are chosen. +.INDENT 7.0 +.TP +.B classify_vectorspace(vector) +Returns the index of the appropriate cluster for the vector. +.UNINDENT +.INDENT 7.0 +.TP +.B cluster_vectorspace(vectors, trace=False) +Finds the clusters using the given set of vectors. +.UNINDENT +.INDENT 7.0 +.TP +.B means() +The means used for clustering. +.UNINDENT +.INDENT 7.0 +.TP +.B num_clusters() +Returns the number of clusters. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.cluster.kmeans.demo() +.UNINDENT +.SS nltk.cluster.util module +.INDENT 0.0 +.TP +.B class nltk.cluster.util.Dendrogram(items=[]) +Bases: \fBobject\fP +.sp +Represents a dendrogram, a tree with a specified branching order. This +must be initialised with the leaf items, then iteratively call merge for +each branch. This class constructs a tree representing the order of calls +to the merge function. +.INDENT 7.0 +.TP +.B groups(n) +Finds the n\-groups of items (leaves) reachable from a cut at depth n. +:param n: number of groups +:type n: int +.UNINDENT +.INDENT 7.0 +.TP +.B merge(*indices) +Merges nodes at given indices in the dendrogram. The nodes will be +combined which then replaces the first node specified. All other nodes +involved in the merge will be removed. +.INDENT 7.0 +.TP +.B Parameters +\fBindices\fP (\fIseq of int\fP) \-\- indices of the items to merge (at least two) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B show(leaf_labels=[]) +Print the dendrogram in ASCII art to standard out. +:param leaf_labels: an optional list of strings to use for labeling the +.INDENT 7.0 +.INDENT 3.5 +leaves +.UNINDENT +.UNINDENT +.INDENT 7.0 +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.cluster.util.VectorSpaceClusterer(normalise=False, svd_dimensions=None) +Bases: \fI\%nltk.cluster.api.ClusterI\fP +.sp +Abstract clusterer which takes tokens and maps them into a vector space. +Optionally performs singular value decomposition to reduce the +dimensionality. +.INDENT 7.0 +.TP +.B classify(vector) +Classifies the token into a cluster, setting the token\(aqs CLUSTER +parameter to that cluster identifier. +.UNINDENT +.INDENT 7.0 +.TP +.B abstract classify_vectorspace(vector) +Returns the index of the appropriate cluster for the vector. +.UNINDENT +.INDENT 7.0 +.TP +.B cluster(vectors, assign_clusters=False, trace=False) +Assigns the vectors to clusters, learning the clustering parameters +from the data. Returns a cluster identifier for each vector. +.UNINDENT +.INDENT 7.0 +.TP +.B abstract cluster_vectorspace(vectors, trace) +Finds the clusters using the given set of vectors. +.UNINDENT +.INDENT 7.0 +.TP +.B likelihood(vector, label) +Returns the likelihood (a float) of the token having the +corresponding cluster. +.UNINDENT +.INDENT 7.0 +.TP +.B likelihood_vectorspace(vector, cluster) +Returns the likelihood of the vector belonging to the cluster. +.UNINDENT +.INDENT 7.0 +.TP +.B vector(vector) +Returns the vector after normalisation and dimensionality reduction +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.cluster.util.cosine_distance(u, v) +Returns 1 minus the cosine of the angle between vectors v and u. This is +equal to 1 \- (u.v / +.nf +|u||v| +.fi +). +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.cluster.util.euclidean_distance(u, v) +Returns the euclidean distance between vectors u and v. This is equivalent +to the length of the vector (u \- v). +.UNINDENT +.SS Module contents +.sp +This module contains a number of basic clustering algorithms. Clustering +describes the task of discovering groups of similar items with a large +collection. It is also describe as unsupervised machine learning, as the data +from which it learns is unannotated with class information, as is the case for +supervised learning. Annotated data is difficult and expensive to obtain in +the quantities required for the majority of supervised learning algorithms. +This problem, the knowledge acquisition bottleneck, is common to most natural +language processing tasks, thus fueling the need for quality unsupervised +approaches. +.sp +This module contains a k\-means clusterer, E\-M clusterer and a group average +agglomerative clusterer (GAAC). All these clusterers involve finding good +cluster groupings for a set of vectors in multi\-dimensional space. +.sp +The K\-means clusterer starts with k arbitrary chosen means then allocates each +vector to the cluster with the closest mean. It then recalculates the means of +each cluster as the centroid of the vectors in the cluster. This process +repeats until the cluster memberships stabilise. This is a hill\-climbing +algorithm which may converge to a local maximum. Hence the clustering is +often repeated with random initial means and the most commonly occurring +output means are chosen. +.sp +The GAAC clusterer starts with each of the \fIN\fP vectors as singleton clusters. +It then iteratively merges pairs of clusters which have the closest centroids. +This continues until there is only one cluster. The order of merges gives rise +to a dendrogram \- a tree with the earlier merges lower than later merges. The +membership of a given number of clusters \fIc\fP, \fI1 <= c <= N\fP, can be found by +cutting the dendrogram at depth \fIc\fP\&. +.sp +The Gaussian EM clusterer models the vectors as being produced by a mixture +of k Gaussian sources. The parameters of these sources (prior probability, +mean and covariance matrix) are then found to maximise the likelihood of the +given data. This is done with the expectation maximisation algorithm. It +starts with k arbitrarily chosen means, priors and covariance matrices. It +then calculates the membership probabilities for each vector in each of the +clusters \- this is the \(aqE\(aq step. The cluster parameters are then updated in +the \(aqM\(aq step using the maximum likelihood estimate from the cluster membership +probabilities. This process continues until the likelihood of the data does +not significantly increase. +.sp +They all extend the ClusterI interface which defines common operations +available with each clusterer. These operations include. +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +cluster: clusters a sequence of vectors +.IP \(bu 2 +classify: assign a vector to a cluster +.IP \(bu 2 +classification_probdist: give the probability distribution over cluster memberships +.UNINDENT +.UNINDENT +.UNINDENT +.sp +The current existing classifiers also extend cluster.VectorSpace, an +abstract class which allows for singular value decomposition (SVD) and vector +normalisation. SVD is used to reduce the dimensionality of the vector space in +such a manner as to preserve as much of the variation as possible, by +reparameterising the axes in order of variability and discarding all bar the +first d dimensions. Normalisation ensures that vectors fall in the unit +hypersphere. +.INDENT 0.0 +.TP +.B Usage example (see also demo()):: +from nltk import cluster +from nltk.cluster import euclidean_distance +from numpy import array +.sp +vectors = [array(f) for f in [[3, 3], [1, 2], [4, 2], [4, 0]]] +.sp +# initialise the clusterer (will also assign the vectors to clusters) +clusterer = cluster.KMeansClusterer(2, euclidean_distance) +clusterer.cluster(vectors, True) +.sp +# classify a new vector +print(clusterer.classify(array([3, 3]))) +.UNINDENT +.sp +Note that the vectors must use numpy array\-like +objects. nltk_contrib.unimelb.tacohn.SparseArrays may be used for +efficiency when required. +.SS nltk.corpus package +.SS Subpackages +.SS nltk.corpus.reader package +.SS Submodules +.SS nltk.corpus.reader.aligned module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.aligned.AlignedCorpusReader(root, fileids, sep=\(aq/\(aq, word_tokenizer=WhitespaceTokenizer(pattern=\(aq\e\es+\(aq, gaps=True, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), sent_tokenizer=RegexpTokenizer(pattern=\(aq\en\(aq, gaps=True, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), alignedsent_block_reader=, encoding=\(aqlatin1\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for corpora of word\-aligned sentences. Tokens are assumed +to be separated by whitespace. Sentences begin on separate lines. +.INDENT 7.0 +.TP +.B aligned_sents(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of AlignedSent objects. +.TP +.B Return type +list(AlignedSent) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences or utterances, each encoded as a list of word +strings. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of words +and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.aligned.AlignedSentCorpusView(corpus_file, encoding, aligned, group_by_sent, word_tokenizer, sent_tokenizer, alignedsent_block_reader) +Bases: \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.sp +A specialized corpus view for aligned sentences. +\fBAlignedSentCorpusView\fP objects are typically created by +\fBAlignedCorpusReader\fP (not directly by nltk users). +.INDENT 7.0 +.TP +.B read_block(stream) +Read a block from the input stream. +.INDENT 7.0 +.TP +.B Returns +a block of tokens from the input stream +.TP +.B Return type +list(any) +.TP +.B Parameters +\fBstream\fP (\fIstream\fP) \-\- an input stream +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.api module +.sp +API for corpus readers. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.api.CategorizedCorpusReader(kwargs) +Bases: \fBobject\fP +.sp +A mixin class used to aid in the implementation of corpus readers +for categorized corpora. This class defines the method +\fBcategories()\fP, which returns a list of the categories for the +corpus or for a specified set of fileids; and overrides \fBfileids()\fP +to take a \fBcategories\fP argument, restricting the set of fileids to +be returned. +.sp +Subclasses are expected to: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +Call \fB__init__()\fP to set up the mapping. +.IP \(bu 2 +Override all view methods to accept a \fBcategories\fP parameter, +which can be used \fIinstead\fP of the \fBfileids\fP parameter, to +select which fileids should be included in the returned view. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B categories(fileids=None) +Return a list of the categories that are defined for this corpus, +or for the file(s) if it is given. +.UNINDENT +.INDENT 7.0 +.TP +.B fileids(categories=None) +Return a list of file identifiers for the files that make up +this corpus, or that make up the given category(s) if specified. +.UNINDENT +.INDENT 7.0 +.TP +.B paras(fileids=None, categories=None) +.UNINDENT +.INDENT 7.0 +.TP +.B raw(fileids=None, categories=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, categories=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, categories=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.api.CorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fBobject\fP +.sp +A base class for "corpus reader" classes, each of which can be +used to read a specific corpus format. Each individual corpus +reader instance is used to read a specific corpus, consisting of +one or more files under a common root directory. Each file is +identified by its \fBfile identifier\fP, which is the relative path +to the file from the root directory. +.sp +A separate subclass is defined for each corpus format. These +subclasses define one or more methods that provide \(aqviews\(aq on the +corpus contents, such as \fBwords()\fP (for a list of words) and +\fBparsed_sents()\fP (for a list of parsed sentences). Called with +no arguments, these methods will return the contents of the entire +corpus. For most corpora, these methods define one or more +selection arguments, such as \fBfileids\fP or \fBcategories\fP, which can +be used to select which portion of the corpus should be returned. +.INDENT 7.0 +.TP +.B abspath(fileid) +Return the absolute path for the given file. +.INDENT 7.0 +.TP +.B Parameters +\fBfileid\fP (\fIstr\fP) \-\- The file identifier for the file whose path +should be returned. +.TP +.B Return type +PathPointer +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abspaths(fileids=None, include_encoding=False, include_fileid=False) +Return a list of the absolute paths for all fileids in this corpus; +or for the given list of fileids, if specified. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP (\fINone\fP\fI or \fP\fIstr\fP\fI or \fP\fIlist\fP) \-\- Specifies the set of fileids for which paths should +be returned. Can be None, for all fileids; a list of +file identifiers, for a specified set of fileids; or a single +file identifier, for a single file. Note that the return +value is always a list of paths, even if \fBfileids\fP is a +single file identifier. +.IP \(bu 2 +\fBinclude_encoding\fP \-\- If true, then return a list of +\fB(path_pointer, encoding)\fP tuples. +.UNINDENT +.TP +.B Return type +list(PathPointer) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B citation() +Return the contents of the corpus citation.bib file, if it exists. +.UNINDENT +.INDENT 7.0 +.TP +.B encoding(file) +Return the unicode encoding for the given corpus file, if known. +If the encoding is unknown, or if the given file should be +processed using byte strings (str), then return None. +.UNINDENT +.INDENT 7.0 +.TP +.B ensure_loaded() +Load this corpus (if it has not already been loaded). This is +used by LazyCorpusLoader as a simple method that can be used to +make sure a corpus is loaded \-\- e.g., in case a user wants to +do help(some_corpus). +.UNINDENT +.INDENT 7.0 +.TP +.B fileids() +Return a list of file identifiers for the fileids that make up +this corpus. +.UNINDENT +.INDENT 7.0 +.TP +.B license() +Return the contents of the corpus LICENSE file, if it exists. +.UNINDENT +.INDENT 7.0 +.TP +.B open(file) +Return an open stream that can be used to read the given file. +If the file\(aqs encoding is not None, then the stream will +automatically decode the file\(aqs contents into unicode. +.INDENT 7.0 +.TP +.B Parameters +\fBfile\fP \-\- The file identifier of the file to read. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B raw(fileids=None) +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.TP +.B Returns +the given file(s) as a single string. +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B readme() +Return the contents of the corpus README file, if it exists. +.UNINDENT +.INDENT 7.0 +.TP +.B property root +The directory where this corpus is stored. +.INDENT 7.0 +.TP +.B Type +PathPointer +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.api.SyntaxCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +An abstract base class for reading corpora consisting of +syntactically parsed text. Subclasses should define: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fB__init__\fP, which specifies the location of the corpus +and a method for detecting the sentence blocks in corpus files. +.IP \(bu 2 +\fB_read_block\fP, which reads a block from the input stream. +.IP \(bu 2 +\fB_word\fP, which takes a block and returns a list of list of words. +.IP \(bu 2 +\fB_tag\fP, which takes a block and returns a list of list of tagged +words. +.IP \(bu 2 +\fB_parse\fP, which takes a block and returns a list of parsed +sentences. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parsed_sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.bnc module +.sp +Corpus reader for the XML version of the British National Corpus. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.bnc.BNCCorpusReader(root, fileids, lazy=True) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.sp +Corpus reader for the XML version of the British National Corpus. +.sp +For access to the complete XML data structure, use the \fBxml()\fP +method. For access to simple word lists and tagged word lists, use +\fBwords()\fP, \fBsents()\fP, \fBtagged_words()\fP, and \fBtagged_sents()\fP\&. +.sp +You can obtain the full version of the BNC corpus at +\fI\%http://www.ota.ox.ac.uk/desc/2554\fP +.sp +If you extracted the archive to a directory called \fIBNC\fP, then you can +instantiate the reader as: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +BNCCorpusReader(root=\(aqBNC/Texts/\(aq, fileids=r\(aq[A\-K]/\ew*/\ew*\e.xml\(aq) +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, strip_space=True, stem=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences or utterances, each encoded as a list of word +strings. +.TP +.B Return type +list(list(str)) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from +word tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, c5=False, strip_space=True, stem=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences, each encoded as a list of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(tuple(str,str))) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBc5\fP \-\- If true, then the tags used will be the more detailed +c5 tags. Otherwise, the simplified tags will be used. +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from +word tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, c5=False, strip_space=True, stem=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of tagged +words and punctuation symbols, encoded as tuples +\fB(word,tag)\fP\&. +.TP +.B Return type +list(tuple(str,str)) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBc5\fP \-\- If true, then the tags used will be the more detailed +c5 tags. Otherwise, the simplified tags will be used. +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from +word tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, strip_space=True, stem=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of words +and punctuation symbols. +.TP +.B Return type +list(str) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from +word tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.bnc.BNCSentence(num, items) +Bases: \fBlist\fP +.sp +A list of words, augmented by an attribute \fBnum\fP used to record +the sentence identifier (the \fBn\fP attribute from the XML). +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.bnc.BNCWordView(fileid, sent, tag, strip_space, stem) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusView\fP +.sp +A stream backed corpus view specialized for use with the BNC corpus. +.INDENT 7.0 +.TP +.B author +Author of the document. +.UNINDENT +.INDENT 7.0 +.TP +.B editor +Editor +.UNINDENT +.INDENT 7.0 +.TP +.B handle_elt(elt, context) +Convert an element into an appropriate value for inclusion in +the view. Unless overridden by a subclass or by the +\fBelt_handler\fP constructor argument, this method simply +returns \fBelt\fP\&. +.INDENT 7.0 +.TP +.B Returns +The view value corresponding to \fBelt\fP\&. +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBelt\fP (\fIElementTree\fP) \-\- The element that should be converted. +.IP \(bu 2 +\fBcontext\fP (\fIstr\fP) \-\- A string composed of element tags separated by +forward slashes, indicating the XML context of the given +element. For example, the string \fB\(aqfoo/bar/baz\(aq\fP +indicates that the element is a \fBbaz\fP element whose +parent is a \fBbar\fP element and whose grandparent is a +top\-level \fBfoo\fP element. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B handle_header(elt, context) +.UNINDENT +.INDENT 7.0 +.TP +.B handle_sent(elt) +.UNINDENT +.INDENT 7.0 +.TP +.B handle_word(elt) +.UNINDENT +.INDENT 7.0 +.TP +.B resps +Statement of responsibility +.UNINDENT +.INDENT 7.0 +.TP +.B tags_to_ignore = {\(aqalign\(aq, \(aqevent\(aq, \(aqgap\(aq, \(aqpause\(aq, \(aqpb\(aq, \(aqshift\(aq, \(aqunclear\(aq, \(aqvocal\(aq} +These tags are ignored. For their description refer to the +technical documentation, for example, +\fI\%http://www.natcorp.ox.ac.uk/docs/URG/ref\-vocal.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B title +Title of the document. +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.bracket_parse module +.sp +Corpus reader for corpora that consist of parenthesis\-delineated parse trees. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.bracket_parse.AlpinoCorpusReader(root, encoding=\(aqISO\-8859\-1\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.bracket_parse.BracketParseCorpusReader\fP +.sp +Reader for the Alpino Dutch Treebank. +This corpus has a lexical breakdown structure embedded, as read by _parse +Unfortunately this puts punctuation and some other words out of the sentence +order in the xml element tree. This is no good for +.nf +tag_ +.fi + and +.nf +word_ +.fi + +_tag and _word will be overridden to use a non\-default new parameter \(aqordered\(aq +to the overridden _normalize function. The _parse function can then remain +untouched. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.bracket_parse.BracketParseCorpusReader(root, fileids, comment_char=None, detect_blocks=\(aqunindented_paren\(aq, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.SyntaxCorpusReader\fP +.sp +Reader for corpora that consist of parenthesis\-delineated parse trees, +like those found in the "combined" section of the Penn Treebank, +e.g. "(S (NP (DT the) (JJ little) (NN dog)) (VP (VBD barked)))". +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.bracket_parse.CategorizedBracketParseCorpusReader(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.api.CategorizedCorpusReader\fP, \fI\%nltk.corpus.reader.bracket_parse.BracketParseCorpusReader\fP +.sp +A reader for parsed corpora whose documents are +divided into categories based on their file identifiers. +@author: Nathan Schneider <\fI\%nschneid@cs.cmu.edu\fP> +.INDENT 7.0 +.TP +.B parsed_paras(fileids=None, categories=None) +.UNINDENT +.INDENT 7.0 +.TP +.B parsed_sents(fileids=None, categories=None) +.UNINDENT +.INDENT 7.0 +.TP +.B parsed_words(fileids=None, categories=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None, categories=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, categories=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, categories=None, tagset=None) +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.categorized_sents module +.sp +CorpusReader structured for corpora that contain one instance on each row. +This CorpusReader is specifically used for the Subjectivity Dataset and the +Sentence Polarity Dataset. +.INDENT 0.0 +.IP \(bu 2 +Subjectivity Dataset information \- +.UNINDENT +.sp +Authors: Bo Pang and Lillian Lee. +Url: \fI\%http://www.cs.cornell.edu/people/pabo/movie\-review\-data\fP +.sp +Distributed with permission. +.sp +Related papers: +.INDENT 0.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Bo Pang and Lillian Lee. "A Sentimental Education: Sentiment Analysis Using +Subjectivity Summarization Based on Minimum Cuts". Proceedings of the ACL, +2004. +.UNINDENT +.IP \(bu 2 +Sentence Polarity Dataset information \- +.UNINDENT +.sp +Authors: Bo Pang and Lillian Lee. +Url: \fI\%http://www.cs.cornell.edu/people/pabo/movie\-review\-data\fP +.sp +Related papers: +.INDENT 0.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Bo Pang and Lillian Lee. "Seeing stars: Exploiting class relationships for +sentiment categorization with respect to rating scales". Proceedings of the +ACL, 2005. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.categorized_sents.CategorizedSentencesCorpusReader(root, fileids, word_tokenizer=WhitespaceTokenizer(pattern=\(aq\e\es+\(aq, gaps=True, discard_empty=True, flags=re.UNICODE | re.MULTILINE | re.DOTALL), sent_tokenizer=None, encoding=\(aqutf8\(aq, **kwargs) +Bases: \fI\%nltk.corpus.reader.api.CategorizedCorpusReader\fP, \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +A reader for corpora in which each row represents a single instance, mainly +a sentence. Istances are divided into categories based on their file identifiers +(see CategorizedCorpusReader). +Since many corpora allow rows that contain more than one sentence, it is +possible to specify a sentence tokenizer to retrieve all sentences instead +than all rows. +.sp +Examples using the Subjectivity Dataset: +.sp +.nf +.ft C +>>> from nltk.corpus import subjectivity +>>> subjectivity.sents()[23] +[\(aqtelevision\(aq, \(aqmade\(aq, \(aqhim\(aq, \(aqfamous\(aq, \(aq,\(aq, \(aqbut\(aq, \(aqhis\(aq, \(aqbiggest\(aq, \(aqhits\(aq, +\(aqhappened\(aq, \(aqoff\(aq, \(aqscreen\(aq, \(aq.\(aq] +>>> subjectivity.categories() +[\(aqobj\(aq, \(aqsubj\(aq] +>>> subjectivity.words(categories=\(aqsubj\(aq) +[\(aqsmart\(aq, \(aqand\(aq, \(aqalert\(aq, \(aq,\(aq, \(aqthirteen\(aq, ...] +.ft P +.fi +.sp +Examples using the Sentence Polarity Dataset: +.sp +.nf +.ft C +>>> from nltk.corpus import sentence_polarity +>>> sentence_polarity.sents() +[[\(aqsimplistic\(aq, \(aq,\(aq, \(aqsilly\(aq, \(aqand\(aq, \(aqtedious\(aq, \(aq.\(aq], ["it\(aqs", \(aqso\(aq, \(aqladdish\(aq, +\(aqand\(aq, \(aqjuvenile\(aq, \(aq,\(aq, \(aqonly\(aq, \(aqteenage\(aq, \(aqboys\(aq, \(aqcould\(aq, \(aqpossibly\(aq, \(aqfind\(aq, +\(aqit\(aq, \(aqfunny\(aq, \(aq.\(aq], ...] +>>> sentence_polarity.categories() +[\(aqneg\(aq, \(aqpos\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B CorpusView +alias of \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, categories=None) +Return all sentences in the corpus or in the specified file(s). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +sentences have to be returned. +.IP \(bu 2 +\fBcategories\fP \-\- a list specifying the categories whose sentences have +to be returned. +.UNINDENT +.TP +.B Returns +the given file(s) as a list of sentences. +Each sentence is tokenized using the specified word_tokenizer. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, categories=None) +Return all words and punctuation symbols in the corpus or in the specified +file(s). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +words have to be returned. +.IP \(bu 2 +\fBcategories\fP \-\- a list specifying the categories whose words have to +be returned. +.UNINDENT +.TP +.B Returns +the given file(s) as a list of words and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.chasen module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.chasen.ChasenCorpusReader(root, fileids, encoding=\(aqutf8\(aq, sent_splitter=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B paras(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.chasen.ChasenCorpusView(corpus_file, encoding, tagged, group_by_sent, group_by_para, sent_splitter=None) +Bases: \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.sp +A specialized corpus view for ChasenReader. Similar to \fBTaggedCorpusView\fP, +but this\(aqll use fixed sets of word and sentence tokenizer. +.INDENT 7.0 +.TP +.B read_block(stream) +Reads one paragraph at a time. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.chasen.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.chasen.test() +.UNINDENT +.SS nltk.corpus.reader.childes module +.sp +Corpus reader for the XML version of the CHILDES corpus. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.childes.CHILDESCorpusReader(root, fileids, lazy=True) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.sp +Corpus reader for the XML version of the CHILDES corpus. +The CHILDES corpus is available at \fBhttps://childes.talkbank.org/\fP\&. The XML +version of CHILDES is located at \fBhttps://childes.talkbank.org/data\-xml/\fP\&. +Copy the needed parts of the CHILDES XML corpus into the NLTK data directory +(\fBnltk_data/corpora/CHILDES/\fP). +.sp +For access to the file text use the usual nltk functions, +\fBwords()\fP, \fBsents()\fP, \fBtagged_words()\fP and \fBtagged_sents()\fP\&. +.INDENT 7.0 +.TP +.B MLU(fileids=None, speaker=\(aqCHI\(aq) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a floating number +.TP +.B Return type +list(float) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B age(fileids=None, speaker=\(aqCHI\(aq, month=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as string or int +.TP +.B Return type +list or int +.TP +.B Parameters +\fBmonth\fP \-\- If true, return months instead of year\-month\-date +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B childes_url_base = \(aqhttps://childes.talkbank.org/browser/index.php?url=\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B convert_age(age_year) +Caclculate age in months from a string in CHILDES format +.UNINDENT +.INDENT 7.0 +.TP +.B corpus(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a dict of \fB(corpus_property_key, value)\fP +.TP +.B Return type +list(dict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B participants(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a dict of +\fB(participant_property_key, value)\fP +.TP +.B Return type +list(dict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, speaker=\(aqALL\(aq, stem=False, relation=None, strip_space=True, replace=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of sentences or utterances, each +encoded as a list of word strings. +.TP +.B Return type +list(list(str)) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBspeaker\fP \-\- If specified, select specific speaker(s) defined +in the corpus. Default is \(aqALL\(aq (all participants). Common choices +are \(aqCHI\(aq (the child), \(aqMOT\(aq (mother), [\(aqCHI\(aq,\(aqMOT\(aq] (exclude +researchers) +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.IP \(bu 2 +\fBrelation\fP \-\- If true, then return tuples of \fB(str,pos,relation_list)\fP\&. +If there is manually\-annotated relation info, it will return +tuples of \fB(str,pos,test_relation_list,str,pos,gold_relation_list)\fP +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from word +tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBreplace\fP \-\- If true, then use the replaced (intended) word instead +of the original word (e.g., \(aqwat\(aq will be replaced with \(aqwatch\(aq) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, speaker=\(aqALL\(aq, stem=False, relation=None, strip_space=True, replace=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences, each encoded as a list of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(tuple(str,str))) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBspeaker\fP \-\- If specified, select specific speaker(s) defined +in the corpus. Default is \(aqALL\(aq (all participants). Common choices +are \(aqCHI\(aq (the child), \(aqMOT\(aq (mother), [\(aqCHI\(aq,\(aqMOT\(aq] (exclude +researchers) +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.IP \(bu 2 +\fBrelation\fP \-\- If true, then return tuples of \fB(str,pos,relation_list)\fP\&. +If there is manually\-annotated relation info, it will return +tuples of \fB(str,pos,test_relation_list,str,pos,gold_relation_list)\fP +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from word +tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBreplace\fP \-\- If true, then use the replaced (intended) word instead +of the original word (e.g., \(aqwat\(aq will be replaced with \(aqwatch\(aq) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, speaker=\(aqALL\(aq, stem=False, relation=False, strip_space=True, replace=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of tagged +words and punctuation symbols, encoded as tuples +\fB(word,tag)\fP\&. +.TP +.B Return type +list(tuple(str,str)) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBspeaker\fP \-\- If specified, select specific speaker(s) defined +in the corpus. Default is \(aqALL\(aq (all participants). Common choices +are \(aqCHI\(aq (the child), \(aqMOT\(aq (mother), [\(aqCHI\(aq,\(aqMOT\(aq] (exclude +researchers) +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.IP \(bu 2 +\fBrelation\fP \-\- If true, then return tuples of (stem, index, +dependent_index) +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from word +tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBreplace\fP \-\- If true, then use the replaced (intended) word instead +of the original word (e.g., \(aqwat\(aq will be replaced with \(aqwatch\(aq) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B webview_file(fileid, urlbase=None) +Map a corpus file to its web version on the CHILDES website, +and open it in a web browser. +.INDENT 7.0 +.TP +.B The complete URL to be used is: +childes.childes_url_base + urlbase + fileid.replace(\(aq.xml\(aq, \(aq.cha\(aq) +.UNINDENT +.sp +If no urlbase is passed, we try to calculate it. This +requires that the childes corpus was set up to mirror the +folder hierarchy under childes.psy.cmu.edu/data\-xml/, e.g.: +nltk_data/corpora/childes/Eng\-USA/Cornell/??? or +nltk_data/corpora/childes/Romance/Spanish/Aguirre/??? +.sp +The function first looks (as a special case) if "Eng\-USA" is +on the path consisting of +fileid; then if +"childes", possibly followed by "data\-xml", appears. If neither +one is found, we use the unmodified fileid and hope for the best. +If this is not right, specify urlbase explicitly, e.g., if the +corpus root points to the Cornell folder, urlbase=\(aqEng\-USA/Cornell\(aq. +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, speaker=\(aqALL\(aq, stem=False, relation=False, strip_space=True, replace=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of words +.TP +.B Return type +list(str) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBspeaker\fP \-\- If specified, select specific speaker(s) defined +in the corpus. Default is \(aqALL\(aq (all participants). Common choices +are \(aqCHI\(aq (the child), \(aqMOT\(aq (mother), [\(aqCHI\(aq,\(aqMOT\(aq] (exclude +researchers) +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.IP \(bu 2 +\fBrelation\fP \-\- If true, then return tuples of (stem, index, +dependent_index) +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from word +tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBreplace\fP \-\- If true, then use the replaced (intended) word instead +of the original word (e.g., \(aqwat\(aq will be replaced with \(aqwatch\(aq) +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.childes.demo(corpus_root=None) +The CHILDES corpus should be manually downloaded and saved +to \fB[NLTK_Data_Dir]/corpora/childes/\fP +.UNINDENT +.SS nltk.corpus.reader.chunked module +.sp +A reader for corpora that contain chunked (and optionally tagged) +documents. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.chunked.ChunkedCorpusReader(root, fileids, extension=\(aq\(aq, str2chunktree=, sent_tokenizer=RegexpTokenizer(pattern=\(aq\en\(aq, gaps=True, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), para_block_reader=, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for chunked (and optionally tagged) corpora. Paragraphs +are split using a block reader. They are then tokenized into +sentences using a sentence tokenizer. Finally, these sentences +are parsed into chunk trees using a string\-to\-chunktree conversion +function. Each of these steps can be performed using a default +function or a custom function. By default, paragraphs are split +on blank lines; sentences are listed one per line; and sentences +are parsed into chunk trees using \fBnltk.chunk.tagstr2tree\fP\&. +.INDENT 7.0 +.TP +.B chunked_paras(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as a shallow Tree. The leaves of these +trees are encoded as \fB(word, tag)\fP tuples (if the corpus +has tags) or word strings (if the corpus has no tags). +.TP +.B Return type +list(list(Tree)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B chunked_sents(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences, each encoded as a shallow Tree. The leaves +of these trees are encoded as \fB(word, tag)\fP tuples (if +the corpus has tags) or word strings (if the corpus has no +tags). +.TP +.B Return type +list(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B chunked_words(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of tagged +words and chunks. Words are encoded as \fB(word, tag)\fP +tuples (if the corpus has tags) or word strings (if the +corpus has no tags). Chunks are encoded as depth\-one +trees over \fB(word,tag)\fP tuples or word strings. +.TP +.B Return type +list(tuple(str,str) and Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B paras(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of word strings. +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences or utterances, each encoded as a list of word +strings. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(list(tuple(str,str)))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences, each encoded as a list of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(tuple(str,str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of tagged +words and punctuation symbols, encoded as tuples +\fB(word,tag)\fP\&. +.TP +.B Return type +list(tuple(str,str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of words +and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.chunked.ChunkedCorpusView(fileid, encoding, tagged, group_by_sent, group_by_para, chunked, str2chunktree, sent_tokenizer, para_block_reader, source_tagset=None, target_tagset=None) +Bases: \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.INDENT 7.0 +.TP +.B read_block(stream) +Read a block from the input stream. +.INDENT 7.0 +.TP +.B Returns +a block of tokens from the input stream +.TP +.B Return type +list(any) +.TP +.B Parameters +\fBstream\fP (\fIstream\fP) \-\- an input stream +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.cmudict module +.sp +The Carnegie Mellon Pronouncing Dictionary [cmudict.0.6] +\fI\%ftp://ftp.cs.cmu.edu/project/speech/dict/\fP +Copyright 1998 Carnegie Mellon University +.sp +File Format: Each line consists of an uppercased word, a counter +(for alternative pronunciations), and a transcription. Vowels are +marked for stress (1=primary, 2=secondary, 0=no stress). E.g.: +NATURAL 1 N AE1 CH ER0 AH0 L +.sp +The dictionary contains 127069 entries. Of these, 119400 words are assigned +a unique pronunciation, 6830 words have two pronunciations, and 839 words have +three or more pronunciations. Many of these are fast\-speech variants. +.sp +Phonemes: There are 39 phonemes, as shown below: +.sp +Phoneme Example Translation Phoneme Example Translation +\-\-\-\-\-\-\- \-\-\-\-\-\-\- \-\-\-\-\-\-\-\-\-\-\- \-\-\-\-\-\-\- \-\-\-\-\-\-\- \-\-\-\-\-\-\-\-\-\-\- +AA odd AA D AE at AE T +AH hut HH AH T AO ought AO T +AW cow K AW AY hide HH AY D +B be B IY CH cheese CH IY Z +D dee D IY DH thee DH IY +EH Ed EH D ER hurt HH ER T +EY ate EY T F fee F IY +G green G R IY N HH he HH IY +IH it IH T IY eat IY T +JH gee JH IY K key K IY +L lee L IY M me M IY +N knee N IY NG ping P IH NG +OW oat OW T OY toy T OY +P pee P IY R read R IY D +S sea S IY SH she SH IY +T tea T IY TH theta TH EY T AH +UH hood HH UH D UW two T UW +V vee V IY W we W IY +Y yield Y IY L D Z zee Z IY +ZH seizure S IY ZH ER +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.cmudict.CMUDictCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B dict() +.INDENT 7.0 +.TP +.B Returns +the cmudict lexicon as a dictionary, whose keys are +.UNINDENT +.sp +lowercase words and whose values are lists of pronunciations. +.UNINDENT +.INDENT 7.0 +.TP +.B entries() +.INDENT 7.0 +.TP +.B Returns +the cmudict lexicon as a list of entries +.UNINDENT +.sp +containing (word, transcriptions) tuples. +.UNINDENT +.INDENT 7.0 +.TP +.B words() +.INDENT 7.0 +.TP +.B Returns +a list of all words defined in the cmudict lexicon. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.cmudict.read_cmudict_block(stream) +.UNINDENT +.SS nltk.corpus.reader.comparative_sents module +.sp +CorpusReader for the Comparative Sentence Dataset. +.INDENT 0.0 +.IP \(bu 2 +Comparative Sentence Dataset information \- +.UNINDENT +.INDENT 0.0 +.TP +.B Annotated by: Nitin Jindal and Bing Liu, 2006. +Department of Computer Sicence +University of Illinois at Chicago +.TP +.B Contact: Nitin Jindal, \fI\%njindal@cs.uic.edu\fP +Bing Liu, \fI\%liub@cs.uic.edu\fP (\fI\%http://www.cs.uic.edu/~liub\fP) +.UNINDENT +.sp +Distributed with permission. +.sp +Related papers: +.INDENT 0.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Nitin Jindal and Bing Liu. "Identifying Comparative Sentences in Text Documents". +Proceedings of the ACM SIGIR International Conference on Information Retrieval +(SIGIR\-06), 2006. +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Nitin Jindal and Bing Liu. "Mining Comprative Sentences and Relations". +Proceedings of Twenty First National Conference on Artificial Intelligence +(AAAI\-2006), 2006. +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Murthy Ganapathibhotla and Bing Liu. "Mining Opinions in Comparative Sentences". +Proceedings of the 22nd International Conference on Computational Linguistics +(Coling\-2008), Manchester, 18\-22 August, 2008. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.comparative_sents.ComparativeSentencesCorpusReader(root, fileids, word_tokenizer=WhitespaceTokenizer(pattern=\(aq\e\es+\(aq, gaps=True, discard_empty=True, flags=re.UNICODE | re.MULTILINE | re.DOTALL), sent_tokenizer=None, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for the Comparative Sentence Dataset by Jindal and Liu (2006). +.sp +.nf +.ft C +>>> from nltk.corpus import comparative_sentences +>>> comparison = comparative_sentences.comparisons()[0] +>>> comparison.text +[\(aqits\(aq, \(aqfast\-forward\(aq, \(aqand\(aq, \(aqrewind\(aq, \(aqwork\(aq, \(aqmuch\(aq, \(aqmore\(aq, \(aqsmoothly\(aq, +\(aqand\(aq, \(aqconsistently\(aq, \(aqthan\(aq, \(aqthose\(aq, \(aqof\(aq, \(aqother\(aq, \(aqmodels\(aq, \(aqi\(aq, "\(aqve", +\(aqhad\(aq, \(aq.\(aq] +>>> comparison.entity_2 +\(aqmodels\(aq +>>> (comparison.feature, comparison.keyword) +(\(aqrewind\(aq, \(aqmore\(aq) +>>> len(comparative_sentences.comparisons()) +853 +.ft P +.fi +.INDENT 7.0 +.TP +.B CorpusView +alias of \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.UNINDENT +.INDENT 7.0 +.TP +.B comparisons(fileids=None) +Return all comparisons in the corpus. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +comparisons have to be returned. +.TP +.B Returns +the given file(s) as a list of Comparison objects. +.TP +.B Return type +list(Comparison) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B keywords(fileids=None) +Return a set of all keywords used in the corpus. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +keywords have to be returned. +.TP +.B Returns +the set of keywords and comparative phrases used in the corpus. +.TP +.B Return type +set(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B keywords_readme() +Return the list of words and constituents considered as clues of a +comparison (from listOfkeywords.txt). +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +Return all sentences in the corpus. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +sentences have to be returned. +.TP +.B Returns +all sentences of the corpus as lists of tokens (or as plain +strings, if no word tokenizer is specified). +.TP +.B Return type +list(list(str)) or list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +Return all words and punctuation symbols in the corpus. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +words have to be returned. +.TP +.B Returns +the given file(s) as a list of words and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.comparative_sents.Comparison(text=None, comp_type=None, entity_1=None, entity_2=None, feature=None, keyword=None) +Bases: \fBobject\fP +.sp +A Comparison represents a comparative sentence and its constituents. +.UNINDENT +.SS nltk.corpus.reader.conll module +.sp +Read CoNLL\-style chunk fileids. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.conll.ConllChunkCorpusReader(root, fileids, chunk_types, encoding=\(aqutf8\(aq, tagset=None, separator=None) +Bases: \fI\%nltk.corpus.reader.conll.ConllCorpusReader\fP +.sp +A ConllCorpusReader whose data file contains three columns: words, +pos, and chunk. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.conll.ConllCorpusReader(root, fileids, columntypes, chunk_types=None, root_label=\(aqS\(aq, pos_in_tree=False, srl_includes_roleset=True, encoding=\(aqutf8\(aq, tree_class=, tagset=None, separator=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +A corpus reader for CoNLL\-style files. These files consist of a +series of sentences, separated by blank lines. Each sentence is +encoded using a table (or "grid") of values, where each line +corresponds to a single word, and each column corresponds to an +annotation type. The set of columns used by CoNLL\-style files can +vary from corpus to corpus; the \fBConllCorpusReader\fP constructor +therefore takes an argument, \fBcolumntypes\fP, which is used to +specify the columns that are used by a given corpus. By default +columns are split by consecutive whitespaces, with the +\fBseparator\fP argument you can set a string to split by (e.g. +\fB\(aq \(aq\fP). +.INDENT 7.0 +.TP +.B @todo: Add support for reading from corpora where different +parallel files contain different columns. +.TP +.B @todo: Possibly add caching of the grid corpus view? This would +allow the same grid view to be used by different data access +methods (eg words() and parsed_sents() could both share the +same grid corpus view object). +.TP +.B @todo: Better support for \-DOCSTART\-. Currently, we just ignore +it, but it could be used to define methods that retrieve a +document at a time (eg parsed_documents()). +.UNINDENT +.INDENT 7.0 +.TP +.B CHUNK = \(aqchunk\(aq +column type for chunk structures +.UNINDENT +.INDENT 7.0 +.TP +.B COLUMN_TYPES = (\(aqwords\(aq, \(aqpos\(aq, \(aqtree\(aq, \(aqchunk\(aq, \(aqne\(aq, \(aqsrl\(aq, \(aqignore\(aq) +A list of all column types supported by the conll corpus reader. +.UNINDENT +.INDENT 7.0 +.TP +.B IGNORE = \(aqignore\(aq +column type for column that should be ignored +.UNINDENT +.INDENT 7.0 +.TP +.B NE = \(aqne\(aq +column type for named entities +.UNINDENT +.INDENT 7.0 +.TP +.B POS = \(aqpos\(aq +column type for part\-of\-speech tags +.UNINDENT +.INDENT 7.0 +.TP +.B SRL = \(aqsrl\(aq +column type for semantic role labels +.UNINDENT +.INDENT 7.0 +.TP +.B TREE = \(aqtree\(aq +column type for parse trees +.UNINDENT +.INDENT 7.0 +.TP +.B WORDS = \(aqwords\(aq +column type for words +.UNINDENT +.INDENT 7.0 +.TP +.B chunked_sents(fileids=None, chunk_types=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B chunked_words(fileids=None, chunk_types=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B iob_sents(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +a list of lists of word/tag/IOB tuples +.TP +.B Return type +list(list) +.TP +.B Parameters +\fBfileids\fP (\fINone\fP\fI or \fP\fIstr\fP\fI or \fP\fIlist\fP) \-\- the list of fileids that make up this corpus +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B iob_words(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +a list of word/tag/IOB tuples +.TP +.B Return type +list(tuple) +.TP +.B Parameters +\fBfileids\fP (\fINone\fP\fI or \fP\fIstr\fP\fI or \fP\fIlist\fP) \-\- the list of fileids that make up this corpus +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parsed_sents(fileids=None, pos_in_tree=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B srl_instances(fileids=None, pos_in_tree=None, flatten=True) +.UNINDENT +.INDENT 7.0 +.TP +.B srl_spans(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.conll.ConllSRLInstance(tree, verb_head, verb_stem, roleset, tagged_spans) +Bases: \fBobject\fP +.sp +An SRL instance from a CoNLL corpus, which identifies and +providing labels for the arguments of a single verb. +.INDENT 7.0 +.TP +.B arguments +A list of \fB(argspan, argid)\fP tuples, specifying the location +and type for each of the arguments identified by this +instance. \fBargspan\fP is a tuple \fBstart, end\fP, indicating +that the argument consists of the \fBwords[start:end]\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B pprint() +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_spans +A list of \fB(span, id)\fP tuples, specifying the location and +type for each of the arguments, as well as the verb pieces, +that make up this instance. +.UNINDENT +.INDENT 7.0 +.TP +.B tree +The parse tree for the sentence containing this instance. +.UNINDENT +.INDENT 7.0 +.TP +.B verb +A list of the word indices of the words that compose the +verb whose arguments are identified by this instance. +This will contain multiple word indices when multi\-word +verbs are used (e.g. \(aqturn on\(aq). +.UNINDENT +.INDENT 7.0 +.TP +.B verb_head +The word index of the head word of the verb whose arguments +are identified by this instance. E.g., for a sentence that +uses the verb \(aqturn on,\(aq \fBverb_head\fP will be the word index +of the word \(aqturn\(aq. +.UNINDENT +.INDENT 7.0 +.TP +.B words +A list of the words in the sentence containing this +instance. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.conll.ConllSRLInstanceList(tree, instances=()) +Bases: \fBlist\fP +.sp +Set of instances for a single sentence +.INDENT 7.0 +.TP +.B pprint(include_tree=False) +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.crubadan module +.sp +An NLTK interface for the n\-gram statistics gathered from +the corpora for each language using An Crubadan. +.sp +There are multiple potential applications for the data but +this reader was created with the goal of using it in the +context of language identification. +.sp +For details about An Crubadan, this data, and its potential uses, see: +\fI\%http://borel.slu.edu/crubadan/index.html\fP +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.crubadan.CrubadanCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +A corpus reader used to access language An Crubadan n\-gram files. +.INDENT 7.0 +.TP +.B crubadan_to_iso(lang) +Return ISO 639\-3 code given internal Crubadan code +.UNINDENT +.INDENT 7.0 +.TP +.B iso_to_crubadan(lang) +Return internal Crubadan code based on ISO 639\-3 code +.UNINDENT +.INDENT 7.0 +.TP +.B lang_freq(lang) +Return n\-gram FreqDist for a specific language +given ISO 639\-3 language code +.UNINDENT +.INDENT 7.0 +.TP +.B langs() +Return a list of supported languages as ISO 639\-3 codes +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.dependency module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.dependency.DependencyCorpusReader(root, fileids, encoding=\(aqutf8\(aq, word_tokenizer=, sent_tokenizer=RegexpTokenizer(pattern=\(aq\en\(aq, gaps=True, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), para_block_reader=) +Bases: \fI\%nltk.corpus.reader.api.SyntaxCorpusReader\fP +.INDENT 7.0 +.TP +.B parsed_sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.dependency.DependencyCorpusView(corpus_file, tagged, group_by_sent, dependencies, chunk_types=None, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.INDENT 7.0 +.TP +.B read_block(stream) +Read a block from the input stream. +.INDENT 7.0 +.TP +.B Returns +a block of tokens from the input stream +.TP +.B Return type +list(any) +.TP +.B Parameters +\fBstream\fP (\fIstream\fP) \-\- an input stream +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.framenet module +.sp +Corpus reader for the FrameNet 1.7 lexicon and corpus. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.framenet.AttrDict(*args, **kwargs) +Bases: \fBdict\fP +.sp +A class that wraps a dict and allows accessing the keys of the +dict as if they were attributes. Taken from here: +.INDENT 7.0 +.INDENT 3.5 +\fI\%http://stackoverflow.com/a/14620633/8879\fP +.UNINDENT +.UNINDENT +.sp +.nf +.ft C +>>> foo = {\(aqa\(aq:1, \(aqb\(aq:2, \(aqc\(aq:3} +>>> bar = AttrDict(foo) +>>> pprint(dict(bar)) +{\(aqa\(aq: 1, \(aqb\(aq: 2, \(aqc\(aq: 3} +>>> bar.b +2 +>>> bar.d = 4 +>>> pprint(dict(bar)) +{\(aqa\(aq: 1, \(aqb\(aq: 2, \(aqc\(aq: 3, \(aqd\(aq: 4} +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.framenet.FramenetCorpusReader(root, fileids) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.sp +A corpus reader for the Framenet Corpus. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> fn.lu(3238).frame.lexUnit[\(aqglint.v\(aq] is fn.lu(3238) +True +>>> fn.frame_by_name(\(aqReplacing\(aq) is fn.lus(\(aqreplace.v\(aq)[0].frame +True +>>> fn.lus(\(aqprejudice.n\(aq)[0].frame.frameRelations == fn.frame_relations(\(aqPartiality\(aq) +True +.ft P +.fi +.INDENT 7.0 +.TP +.B annotations(luNamePattern=None, exemplars=True, full_text=True) +Frame annotation sets matching the specified criteria. +.UNINDENT +.INDENT 7.0 +.TP +.B buildindexes() +Build the internal indexes to make look\-ups faster. +.UNINDENT +.INDENT 7.0 +.TP +.B doc(fn_docid) +Returns the annotated document whose id number is +\fBfn_docid\fP\&. This id number can be obtained by calling the +Documents() function. +.sp +The dict that is returned from this function will contain the +following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aq_type\(aq : \(aqfulltextannotation\(aq +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqsentence\(aq +a list of sentences in the document.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Each item in the list is a dict containing the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqID\(aq : the ID number of the sentence +.IP \(bu 2 +\(aq_type\(aq : \(aqsentence\(aq +.IP \(bu 2 +\(aqtext\(aq : the text of the sentence +.IP \(bu 2 +\(aqparagNo\(aq : the paragraph number +.IP \(bu 2 +\(aqsentNo\(aq : the sentence number +.IP \(bu 2 +\(aqdocID\(aq : the document ID number +.IP \(bu 2 +\(aqcorpID\(aq : the corpus ID number +.IP \(bu 2 +\(aqaPos\(aq : the annotation position +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqannotationSet\(aq +a list of annotation layers for the sentence.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Each item in the list is a dict containing the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqID\(aq : the ID number of the annotation set +.IP \(bu 2 +\(aq_type\(aq : \(aqannotationset\(aq +.IP \(bu 2 +\(aqstatus\(aq : either \(aqMANUAL\(aq or \(aqUNANN\(aq +.IP \(bu 2 +\(aqluName\(aq : (only if status is \(aqMANUAL\(aq) +.IP \(bu 2 +\(aqluID\(aq : (only if status is \(aqMANUAL\(aq) +.IP \(bu 2 +\(aqframeID\(aq : (only if status is \(aqMANUAL\(aq) +.IP \(bu 2 +\(aqframeName\(aq: (only if status is \(aqMANUAL\(aq) +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqlayer\(aq +a list of labels for the layer.INDENT 7.0 +.IP \(bu 2 +Each item in the layer is a dict containing the +following keys: +.INDENT 2.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\(aq_type\(aq: \(aqlayer\(aq +.IP \(bu 2 +\(aqrank\(aq +.IP \(bu 2 +\(aqname\(aq +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqlabel\(aq +a list of labels in the layer.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Each item is a dict containing the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqstart\(aq +.IP \(bu 2 +\(aqend\(aq +.IP \(bu 2 +\(aqname\(aq +.IP \(bu 2 +\(aqfeID\(aq (optional) +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +\fBfn_docid\fP (\fIint\fP) \-\- The Framenet id number of the document +.TP +.B Returns +Information about the annotated document +.TP +.B Return type +dict +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B docs(name=None) +Return a list of the annotated full\-text documents in FrameNet, +optionally filtered by a regex to be matched against the document name. +.UNINDENT +.INDENT 7.0 +.TP +.B docs_metadata(name=None) +Return an index of the annotated documents in Framenet. +.sp +Details for a specific annotated document can be obtained using this +class\(aqs doc() function and pass it the value of the \(aqID\(aq field. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> len(fn.docs()) in (78, 107) # FN 1.5 and 1.7, resp. +True +>>> set([x.corpname for x in fn.docs_metadata()])>=set([\(aqANC\(aq, \(aqKBEval\(aq, \(aqLUCorpus\-v0.3\(aq, \(aqMiscellaneous\(aq, \(aqNTI\(aq, \(aqPropBank\(aq]) +True +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBname\fP (\fIstr\fP) \-\- A regular expression pattern used to search the +file name of each annotated document. The document\(aqs +file name contains the name of the corpus that the +document is from, followed by two underscores "__" +followed by the document name. So, for example, the +file name "LUCorpus\-v0.3__20000410_nyt\-NEW.xml" is +from the corpus named "LUCorpus\-v0.3" and the +document name is "20000410_nyt\-NEW.xml". +.TP +.B Returns +A list of selected (or all) annotated documents +.TP +.B Return type +list of dicts, where each dict object contains the following +keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq +.IP \(bu 2 +\(aqID\(aq +.IP \(bu 2 +\(aqcorpid\(aq +.IP \(bu 2 +\(aqcorpname\(aq +.IP \(bu 2 +\(aqdescription\(aq +.IP \(bu 2 +\(aqfilename\(aq +.UNINDENT + +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B exemplars(luNamePattern=None, frame=None, fe=None, fe2=None) +Lexicographic exemplar sentences, optionally filtered by LU name and/or 1\-2 FEs that +are realized overtly. \(aqframe\(aq may be a name pattern, frame ID, or frame instance. +\(aqfe\(aq may be a name pattern or FE instance; if specified, \(aqfe2\(aq may also +be specified to retrieve sentences with both overt FEs (in either order). +.UNINDENT +.INDENT 7.0 +.TP +.B fe_relations() +Obtain a list of frame element relations. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> ferels = fn.fe_relations() +>>> isinstance(ferels, list) +True +>>> len(ferels) in (10020, 12393) # FN 1.5 and 1.7, resp. +True +>>> PrettyDict(ferels[0], breakLines=True) +{\(aqID\(aq: 14642, +\(aq_type\(aq: \(aqferelation\(aq, +\(aqframeRelation\(aq: Child=Lively_place>, +\(aqsubFE\(aq: , +\(aqsubFEName\(aq: \(aqDegree\(aq, +\(aqsubFrame\(aq: , +\(aqsubID\(aq: 11370, +\(aqsupID\(aq: 2271, +\(aqsuperFE\(aq: , +\(aqsuperFEName\(aq: \(aqDegree\(aq, +\(aqsuperFrame\(aq: , +\(aqtype\(aq: } +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +A list of all of the frame element relations in framenet +.TP +.B Return type +list(dict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B fes(name=None, frame=None) +Lists frame element objects. If \(aqname\(aq is provided, this is treated as +a case\-insensitive regular expression to filter by frame name. +(Case\-insensitivity is because casing of frame element names is not always +consistent across frames.) Specify \(aqframe\(aq to filter by a frame name pattern, +ID, or object. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> fn.fes(\(aqNoise_maker\(aq) +[] +>>> sorted([(fe.frame.name,fe.name) for fe in fn.fes(\(aqsound\(aq)]) +[(\(aqCause_to_make_noise\(aq, \(aqSound_maker\(aq), (\(aqMake_noise\(aq, \(aqSound\(aq), + (\(aqMake_noise\(aq, \(aqSound_source\(aq), (\(aqSound_movement\(aq, \(aqLocation_of_sound_source\(aq), + (\(aqSound_movement\(aq, \(aqSound\(aq), (\(aqSound_movement\(aq, \(aqSound_source\(aq), + (\(aqSounds\(aq, \(aqComponent_sound\(aq), (\(aqSounds\(aq, \(aqLocation_of_sound_source\(aq), + (\(aqSounds\(aq, \(aqSound_source\(aq), (\(aqVocalizations\(aq, \(aqLocation_of_sound_source\(aq), + (\(aqVocalizations\(aq, \(aqSound_source\(aq)] +>>> sorted([(fe.frame.name,fe.name) for fe in fn.fes(\(aqsound\(aq,r\(aq(?i)make_noise\(aq)]) +[(\(aqCause_to_make_noise\(aq, \(aqSound_maker\(aq), + (\(aqMake_noise\(aq, \(aqSound\(aq), + (\(aqMake_noise\(aq, \(aqSound_source\(aq)] +>>> sorted(set(fe.name for fe in fn.fes(\(aq^sound\(aq))) +[\(aqSound\(aq, \(aqSound_maker\(aq, \(aqSound_source\(aq] +>>> len(fn.fes(\(aq^sound$\(aq)) +2 +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBname\fP (\fIstr\fP) \-\- A regular expression pattern used to match against +frame element names. If \(aqname\(aq is None, then a list of all +frame elements will be returned. +.TP +.B Returns +A list of matching frame elements +.TP +.B Return type +list(AttrDict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B frame(fn_fid_or_fname, ignorekeys=[]) +Get the details for the specified Frame using the frame\(aqs name +or id number. +.sp +Usage examples: +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> f = fn.frame(256) +>>> f.name +\(aqMedical_specialties\(aq +>>> f = fn.frame(\(aqMedical_specialties\(aq) +>>> f.ID +256 +>>> # ensure non\-ASCII character in definition doesn\(aqt trigger an encoding error: +>>> fn.frame(\(aqImposing_obligation\(aq) +frame (1494): Imposing_obligation... +.ft P +.fi +.sp +The dict that is returned from this function will contain the +following information about the Frame: +.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq : the name of the Frame (e.g. \(aqBirth\(aq, \(aqApply_heat\(aq, etc.) +.IP \(bu 2 +\(aqdefinition\(aq : textual definition of the Frame +.IP \(bu 2 +\(aqID\(aq : the internal ID number of the Frame +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqsemTypes\(aq +a list of semantic types for this frame.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Each item in the list is a dict containing the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq : can be used with the semtype() function +.IP \(bu 2 +\(aqID\(aq : can be used with the semtype() function +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqlexUnit\(aq +a dict containing all of the LUs for this frame. +The keys in this dict are the names of the LUs and +the value for each key is itself a dict containing +info about the LU (see the lu() function for more info.) +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqFE\(aq +a dict containing the Frame Elements that are part of this frame.INDENT 7.0 +.INDENT 3.5 +The keys in this dict are the names of the FEs (e.g. \(aqBody_system\(aq) +and the values are dicts containing the following keys +.UNINDENT +.UNINDENT +.INDENT 7.0 +.IP \(bu 2 +\(aqdefinition\(aq : The definition of the FE +.IP \(bu 2 +\(aqname\(aq : The name of the FE e.g. \(aqBody_system\(aq +.IP \(bu 2 +\(aqID\(aq : The id number +.IP \(bu 2 +\(aq_type\(aq : \(aqfe\(aq +.IP \(bu 2 +\(aqabbrev\(aq : Abbreviation e.g. \(aqbod\(aq +.IP \(bu 2 +\(aqcoreType\(aq : one of "Core", "Peripheral", or "Extra\-Thematic" +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqsemType\(aq +if not None, a dict with the following two keys:.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqname\(aq +name of the semantic type. can be used with +the semtype() function +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqID\(aq +id number of the semantic type. can be used with +the semtype() function +.UNINDENT +.UNINDENT +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqrequiresFE\(aq +if not None, a dict with the following two keys:.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq : the name of another FE in this frame +.IP \(bu 2 +\(aqID\(aq : the id of the other FE in this frame +.UNINDENT +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqexcludesFE\(aq +if not None, a dict with the following two keys:.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq : the name of another FE in this frame +.IP \(bu 2 +\(aqID\(aq : the id of the other FE in this frame +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.IP \(bu 2 +\(aqframeRelation\(aq : a list of objects describing frame relations +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqFEcoreSets\(aq +a list of Frame Element core sets for this frame.INDENT 7.0 +.IP \(bu 2 +Each item in the list is a list of FE objects +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfn_fid_or_fname\fP (\fIint\fP\fI or \fP\fIstr\fP) \-\- The Framenet name or id number of the frame +.IP \(bu 2 +\fBignorekeys\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The keys to ignore. These keys will not be +included in the output. (optional) +.UNINDENT +.TP +.B Returns +Information about a frame +.TP +.B Return type +dict +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B frame_by_id(fn_fid, ignorekeys=[]) +Get the details for the specified Frame using the frame\(aqs id +number. +.sp +Usage examples: +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> f = fn.frame_by_id(256) +>>> f.ID +256 +>>> f.name +\(aqMedical_specialties\(aq +>>> f.definition +"This frame includes words that name ..." +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfn_fid\fP (\fIint\fP) \-\- The Framenet id number of the frame +.IP \(bu 2 +\fBignorekeys\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The keys to ignore. These keys will not be +included in the output. (optional) +.UNINDENT +.TP +.B Returns +Information about a frame +.TP +.B Return type +dict +.UNINDENT +.sp +Also see the \fBframe()\fP function for details about what is +contained in the dict that is returned. +.UNINDENT +.INDENT 7.0 +.TP +.B frame_by_name(fn_fname, ignorekeys=[], check_cache=True) +Get the details for the specified Frame using the frame\(aqs name. +.sp +Usage examples: +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> f = fn.frame_by_name(\(aqMedical_specialties\(aq) +>>> f.ID +256 +>>> f.name +\(aqMedical_specialties\(aq +>>> f.definition +"This frame includes words that name ..." +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfn_fname\fP (\fIstr\fP) \-\- The name of the frame +.IP \(bu 2 +\fBignorekeys\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The keys to ignore. These keys will not be +included in the output. (optional) +.UNINDENT +.TP +.B Returns +Information about a frame +.TP +.B Return type +dict +.UNINDENT +.sp +Also see the \fBframe()\fP function for details about what is +contained in the dict that is returned. +.UNINDENT +.INDENT 7.0 +.TP +.B frame_ids_and_names(name=None) +Uses the frame index, which is much faster than looking up each frame definition +if only the names and IDs are needed. +.UNINDENT +.INDENT 7.0 +.TP +.B frame_relation_types() +Obtain a list of frame relation types. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> frts = sorted(fn.frame_relation_types(), key=itemgetter(\(aqID\(aq)) +>>> isinstance(frts, list) +True +>>> len(frts) in (9, 10) # FN 1.5 and 1.7, resp. +True +>>> PrettyDict(frts[0], breakLines=True) +{\(aqID\(aq: 1, + \(aq_type\(aq: \(aqframerelationtype\(aq, + \(aqframeRelations\(aq: [ Child=Change_of_consistency>, Child=Rotting>, ...], + \(aqname\(aq: \(aqInheritance\(aq, + \(aqsubFrameName\(aq: \(aqChild\(aq, + \(aqsuperFrameName\(aq: \(aqParent\(aq} +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +A list of all of the frame relation types in framenet +.TP +.B Return type +list(dict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B frame_relations(frame=None, frame2=None, type=None) +.INDENT 7.0 +.TP +.B Parameters +\fBframe\fP \-\- (optional) frame object, name, or ID; only relations involving +.UNINDENT +.sp +this frame will be returned +:param frame2: (optional; \(aqframe\(aq must be a different frame) only show relations +between the two specified frames, in either direction +:param type: (optional) frame relation type (name or object); show only relations +of this type +:type frame: int or str or AttrDict +:return: A list of all of the frame relations in framenet +:rtype: list(dict) +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> frels = fn.frame_relations() +>>> isinstance(frels, list) +True +>>> len(frels) in (1676, 2070) # FN 1.5 and 1.7, resp. +True +>>> PrettyList(fn.frame_relations(\(aqCooking_creation\(aq), maxReprSize=0, breakLines=True) +[ Child=Cooking_creation>, + Child=Cooking_creation>, + ReferringEntry=Cooking_creation>] +>>> PrettyList(fn.frame_relations(274), breakLines=True) +[ Child=Dodging>, + Child=Evading>, ...] +>>> PrettyList(fn.frame_relations(fn.frame(\(aqCooking_creation\(aq)), breakLines=True) +[ Child=Cooking_creation>, + Child=Cooking_creation>, ...] +>>> PrettyList(fn.frame_relations(\(aqCooking_creation\(aq, type=\(aqInheritance\(aq)) +[ Child=Cooking_creation>] +>>> PrettyList(fn.frame_relations(\(aqCooking_creation\(aq, \(aqApply_heat\(aq), breakLines=True) +[ Child=Cooking_creation>, + ReferringEntry=Cooking_creation>] +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B frames(name=None) +Obtain details for a specific frame. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> len(fn.frames()) in (1019, 1221) # FN 1.5 and 1.7, resp. +True +>>> x = PrettyList(fn.frames(r\(aq(?i)crim\(aq), maxReprSize=0, breakLines=True) +>>> x.sort(key=itemgetter(\(aqID\(aq)) +>>> x +[, + , + , + ] +.ft P +.fi +.sp +A brief intro to Frames (excerpted from "FrameNet II: Extended +Theory and Practice" by Ruppenhofer et. al., 2010): +.sp +A Frame is a script\-like conceptual structure that describes a +particular type of situation, object, or event along with the +participants and props that are needed for that Frame. For +example, the "Apply_heat" frame describes a common situation +involving a Cook, some Food, and a Heating_Instrument, and is +evoked by words such as bake, blanch, boil, broil, brown, +simmer, steam, etc. +.sp +We call the roles of a Frame "frame elements" (FEs) and the +frame\-evoking words are called "lexical units" (LUs). +.sp +FrameNet includes relations between Frames. Several types of +relations are defined, of which the most important are: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +Inheritance: An IS\-A relation. The child frame is a subtype +of the parent frame, and each FE in the parent is bound to +a corresponding FE in the child. An example is the +"Revenge" frame which inherits from the +"Rewards_and_punishments" frame. +.IP \(bu 2 +Using: The child frame presupposes the parent frame as +background, e.g the "Speed" frame "uses" (or presupposes) +the "Motion" frame; however, not all parent FEs need to be +bound to child FEs. +.IP \(bu 2 +Subframe: The child frame is a subevent of a complex event +represented by the parent, e.g. the "Criminal_process" frame +has subframes of "Arrest", "Arraignment", "Trial", and +"Sentencing". +.IP \(bu 2 +Perspective_on: The child frame provides a particular +perspective on an un\-perspectivized parent frame. A pair of +examples consists of the "Hiring" and "Get_a_job" frames, +which perspectivize the "Employment_start" frame from the +Employer\(aqs and the Employee\(aqs point of view, respectively. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +\fBname\fP (\fIstr\fP) \-\- A regular expression pattern used to match against +Frame names. If \(aqname\(aq is None, then a list of all +Framenet Frames will be returned. +.TP +.B Returns +A list of matching Frames (or all Frames). +.TP +.B Return type +list(AttrDict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B frames_by_lemma(pat) +Returns a list of all frames that contain LUs in which the +\fBname\fP attribute of the LU matches the given regular expression +\fBpat\fP\&. Note that LU names are composed of "lemma.POS", where +the "lemma" part can be made up of either a single lexeme +(e.g. \(aqrun\(aq) or multiple lexemes (e.g. \(aqa little\(aq). +.sp +Note: if you are going to be doing a lot of this type of +searching, you\(aqd want to build an index that maps from lemmas to +frames because each time frames_by_lemma() is called, it has to +search through ALL of the frame XML files in the db. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> from nltk.corpus.reader.framenet import PrettyList +>>> PrettyList(sorted(fn.frames_by_lemma(r\(aq(?i)a little\(aq), key=itemgetter(\(aqID\(aq))) +[, ] +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +A list of frame objects. +.TP +.B Return type +list(AttrDict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B ft_sents(docNamePattern=None) +Full\-text annotation sentences, optionally filtered by document name. +.UNINDENT +.INDENT 7.0 +.TP +.B help(attrname=None) +Display help information summarizing the main methods. +.UNINDENT +.INDENT 7.0 +.TP +.B lu(fn_luid, ignorekeys=[], luName=None, frameID=None, frameName=None) +Access a lexical unit by its ID. luName, frameID, and frameName are used +only in the event that the LU does not have a file in the database +(which is the case for LUs with "Problem" status); in this case, +a placeholder LU is created which just contains its name, ID, and frame. +.sp +Usage examples: +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> fn.lu(256).name +\(aqforesee.v\(aq +>>> fn.lu(256).definition +\(aqCOD: be aware of beforehand; predict.\(aq +>>> fn.lu(256).frame.name +\(aqExpectation\(aq +>>> pprint(list(map(PrettyDict, fn.lu(256).lexemes))) +[{\(aqPOS\(aq: \(aqV\(aq, \(aqbreakBefore\(aq: \(aqfalse\(aq, \(aqheadword\(aq: \(aqfalse\(aq, \(aqname\(aq: \(aqforesee\(aq, \(aqorder\(aq: 1}] +.ft P +.fi +.sp +.nf +.ft C +>>> fn.lu(227).exemplars[23] +exemplar sentence (352962): +[sentNo] 0 +[aPos] 59699508 + +[LU] (227) guess.v in Coming_to_believe + +[frame] (23) Coming_to_believe + +[annotationSet] 2 annotation sets + +[POS] 18 tags + +[POS_tagset] BNC + +[GF] 3 relations + +[PT] 3 phrases + +[Other] 1 entry + +[text] + [Target] + [FE] + +When he was inside the house , Culley noticed the characteristic + \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- + Content + +he would n\(aqt have guessed at . +\-\- ******* \-\- +Co C1 [Evidence:INI] + (Co=Cognizer, C1=Content) + + +.ft P +.fi +.sp +The dict that is returned from this function will contain most of the +following information about the LU. Note that some LUs do not contain +all of these pieces of information \- particularly \(aqtotalAnnotated\(aq and +\(aqincorporatedFE\(aq may be missing in some LUs: +.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq : the name of the LU (e.g. \(aqmerger.n\(aq) +.IP \(bu 2 +\(aqdefinition\(aq : textual definition of the LU +.IP \(bu 2 +\(aqID\(aq : the internal ID number of the LU +.IP \(bu 2 +\(aq_type\(aq : \(aqlu\(aq +.IP \(bu 2 +\(aqstatus\(aq : e.g. \(aqCreated\(aq +.IP \(bu 2 +\(aqframe\(aq : Frame that this LU belongs to +.IP \(bu 2 +\(aqPOS\(aq : the part of speech of this LU (e.g. \(aqN\(aq) +.IP \(bu 2 +\(aqtotalAnnotated\(aq : total number of examples annotated with this LU +.IP \(bu 2 +\(aqincorporatedFE\(aq : FE that incorporates this LU (e.g. \(aqAilment\(aq) +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqsentenceCount\(aq +a dict with the following two keys:.INDENT 7.0 +.IP \(bu 2 +\(aqannotated\(aq: number of sentences annotated with this LU +.IP \(bu 2 +\(aqtotal\(aq : total number of sentences with this LU +.UNINDENT +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqlexemes\(aq +a list of dicts describing the lemma of this LU. +Each dict in the list contains these keys: +\- \(aqPOS\(aq : part of speech e.g. \(aqN\(aq +\- \(aqname\(aq : either single\-lexeme e.g. \(aqmerger\(aq or +.INDENT 7.0 +.INDENT 3.5 +multi\-lexeme e.g. \(aqa little\(aq +.UNINDENT +.UNINDENT +.INDENT 7.0 +.IP \(bu 2 +\(aqorder\(aq: the order of the lexeme in the lemma (starting from 1) +.IP \(bu 2 +\(aqheadword\(aq: a boolean (\(aqtrue\(aq or \(aqfalse\(aq) +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqbreakBefore\(aq: Can this lexeme be separated from the previous lexeme? +.INDENT 7.0 +.TP +.B Consider: "take over.v" as in: +Germany took over the Netherlands in 2 days. +Germany took the Netherlands over in 2 days. +.UNINDENT +.sp +In this case, \(aqbreakBefore\(aq would be "true" for the lexeme +"over". Contrast this with "take after.v" as in: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.INDENT 3.5 +Mary takes after her grandmother. +.UNINDENT +.UNINDENT +.sp + +.nf +* +.fi +Mary takes her grandmother after. +.UNINDENT +.UNINDENT +.sp +In this case, \(aqbreakBefore\(aq would be "false" for the lexeme "after" +.UNINDENT +.UNINDENT +.UNINDENT +.IP \(bu 2 +\(aqlemmaID\(aq : Can be used to connect lemmas in different LUs +.IP \(bu 2 +\(aqsemTypes\(aq : a list of semantic type objects for this LU +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqsubCorpus\(aq +a list of subcorpora.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Each item in the list is a dict containing the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq : +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqsentence\(aq +a list of sentences in the subcorpus.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B each item in the list is a dict with the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqID\(aq: +.IP \(bu 2 +\(aqsentNo\(aq: +.IP \(bu 2 +\(aqtext\(aq: the text of the sentence +.IP \(bu 2 +\(aqaPos\(aq: +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqannotationSet\(aq: a list of annotation sets +.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B each item in the list is a dict with the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqID\(aq: +.IP \(bu 2 +\(aqstatus\(aq: +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqlayer\(aq: a list of layers +.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B each layer is a dict containing the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq: layer name (e.g. \(aqBNC\(aq) +.IP \(bu 2 +\(aqrank\(aq: +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqlabel\(aq: a list of labels for the layer +.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B each label is a dict containing the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqstart\(aq: start pos of label in sentence \(aqtext\(aq (0\-based) +.IP \(bu 2 +\(aqend\(aq: end pos of label in sentence \(aqtext\(aq (0\-based) +.IP \(bu 2 +\(aqname\(aq: name of label (e.g. \(aqNN1\(aq) +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.sp +Under the hood, this implementation looks up the lexical unit information +in the \fIframe\fP definition file. That file does not contain +corpus annotations, so the LU files will be accessed on demand if those are +needed. In principle, valence patterns could be loaded here too, +though these are not currently supported. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfn_luid\fP (\fIint\fP) \-\- The id number of the lexical unit +.IP \(bu 2 +\fBignorekeys\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The keys to ignore. These keys will not be +included in the output. (optional) +.UNINDENT +.TP +.B Returns +All information about the lexical unit +.TP +.B Return type +dict +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lu_basic(fn_luid) +Returns basic information about the LU whose id is +\fBfn_luid\fP\&. This is basically just a wrapper around the +\fBlu()\fP function with "subCorpus" info excluded. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> lu = PrettyDict(fn.lu_basic(256), breakLines=True) +>>> # ellipses account for differences between FN 1.5 and 1.7 +>>> lu +{\(aqID\(aq: 256, + \(aqPOS\(aq: \(aqV\(aq, + \(aqURL\(aq: \(aqhttps://framenet2.icsi.berkeley.edu/fnReports/data/lu/lu256.xml\(aq, + \(aq_type\(aq: \(aqlu\(aq, + \(aqcBy\(aq: ..., + \(aqcDate\(aq: \(aq02/08/2001 01:27:50 PST Thu\(aq, + \(aqdefinition\(aq: \(aqCOD: be aware of beforehand; predict.\(aq, + \(aqdefinitionMarkup\(aq: \(aqCOD: be aware of beforehand; predict.\(aq, + \(aqframe\(aq: , + \(aqlemmaID\(aq: 15082, + \(aqlexemes\(aq: [{\(aqPOS\(aq: \(aqV\(aq, \(aqbreakBefore\(aq: \(aqfalse\(aq, \(aqheadword\(aq: \(aqfalse\(aq, \(aqname\(aq: \(aqforesee\(aq, \(aqorder\(aq: 1}], + \(aqname\(aq: \(aqforesee.v\(aq, + \(aqsemTypes\(aq: [], + \(aqsentenceCount\(aq: {\(aqannotated\(aq: ..., \(aqtotal\(aq: ...}, + \(aqstatus\(aq: \(aqFN1_Sent\(aq} +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBfn_luid\fP (\fIint\fP) \-\- The id number of the desired LU +.TP +.B Returns +Basic information about the lexical unit +.TP +.B Return type +dict +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lu_ids_and_names(name=None) +Uses the LU index, which is much faster than looking up each LU definition +if only the names and IDs are needed. +.UNINDENT +.INDENT 7.0 +.TP +.B lus(name=None, frame=None) +Obtain details for lexical units. +Optionally restrict by lexical unit name pattern, and/or to a certain frame +or frames whose name matches a pattern. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> len(fn.lus()) in (11829, 13572) # FN 1.5 and 1.7, resp. +True +>>> PrettyList(sorted(fn.lus(r\(aq(?i)a little\(aq), key=itemgetter(\(aqID\(aq)), maxReprSize=0, breakLines=True) +[, + , + ] +>>> PrettyList(sorted(fn.lus(r\(aqinterest\(aq, r\(aq(?i)stimulus\(aq), key=itemgetter(\(aqID\(aq))) +[, ] +.ft P +.fi +.sp +A brief intro to Lexical Units (excerpted from "FrameNet II: +Extended Theory and Practice" by Ruppenhofer et. al., 2010): +.sp +A lexical unit (LU) is a pairing of a word with a meaning. For +example, the "Apply_heat" Frame describes a common situation +involving a Cook, some Food, and a Heating Instrument, and is +_evoked_ by words such as bake, blanch, boil, broil, brown, +simmer, steam, etc. These frame\-evoking words are the LUs in the +Apply_heat frame. Each sense of a polysemous word is a different +LU. +.sp +We have used the word "word" in talking about LUs. The reality +is actually rather complex. When we say that the word "bake" is +polysemous, we mean that the lemma "bake.v" (which has the +word\-forms "bake", "bakes", "baked", and "baking") is linked to +three different frames: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +Apply_heat: "Michelle baked the potatoes for 45 minutes." +.IP \(bu 2 +Cooking_creation: "Michelle baked her mother a cake for her birthday." +.IP \(bu 2 +Absorb_heat: "The potatoes have to bake for more than 30 minutes." +.UNINDENT +.UNINDENT +.UNINDENT +.sp +These constitute three different LUs, with different +definitions. +.sp +Multiword expressions such as "given name" and hyphenated words +like "shut\-eye" can also be LUs. Idiomatic phrases such as +"middle of nowhere" and "give the slip (to)" are also defined as +LUs in the appropriate frames ("Isolated_places" and "Evading", +respectively), and their internal structure is not analyzed. +.sp +Framenet provides multiple annotated examples of each sense of a +word (i.e. each LU). Moreover, the set of examples +(approximately 20 per LU) illustrates all of the combinatorial +possibilities of the lexical unit. +.sp +Each LU is linked to a Frame, and hence to the other words which +evoke that Frame. This makes the FrameNet database similar to a +thesaurus, grouping together semantically similar words. +.sp +In the simplest case, frame\-evoking words are verbs such as +"fried" in: +.INDENT 7.0 +.INDENT 3.5 +"Matilde fried the catfish in a heavy iron skillet." +.UNINDENT +.UNINDENT +.sp +Sometimes event nouns may evoke a Frame. For example, +"reduction" evokes "Cause_change_of_scalar_position" in: +.INDENT 7.0 +.INDENT 3.5 +"...the reduction of debt levels to $665 million from $2.6 billion." +.UNINDENT +.UNINDENT +.sp +Adjectives may also evoke a Frame. For example, "asleep" may +evoke the "Sleep" frame as in: +.INDENT 7.0 +.INDENT 3.5 +"They were asleep for hours." +.UNINDENT +.UNINDENT +.sp +Many common nouns, such as artifacts like "hat" or "tower", +typically serve as dependents rather than clearly evoking their +own frames. +.INDENT 7.0 +.TP +.B Parameters +\fBname\fP (\fIstr\fP) \-\- +.sp +A regular expression pattern used to search the LU +names. Note that LU names take the form of a dotted +string (e.g. "run.v" or "a little.adv") in which a +lemma precedes the "." and a POS follows the +dot. The lemma may be composed of a single lexeme +(e.g. "run") or of multiple lexemes (e.g. "a +little"). If \(aqname\(aq is not given, then all LUs will +be returned. +.sp +The valid POSes are: +.INDENT 7.0 +.INDENT 3.5 +v \- verb +n \- noun +a \- adjective +adv \- adverb +prep \- preposition +num \- numbers +intj \- interjection +art \- article +c \- conjunction +scon \- subordinating conjunction +.UNINDENT +.UNINDENT + +.TP +.B Returns +A list of selected (or all) lexical units +.TP +.B Return type +list of LU objects (dicts) See the lu() function for info +about the specifics of LU objects. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B propagate_semtypes() +Apply inference rules to distribute semtypes over relations between FEs. +For FrameNet 1.5, this results in 1011 semtypes being propagated. +(Not done by default because it requires loading all frame files, +which takes several seconds. If this needed to be fast, it could be rewritten +to traverse the neighboring relations on demand for each FE semtype.) +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> x = sum(1 for f in fn.frames() for fe in f.FE.values() if fe.semType) +>>> fn.propagate_semtypes() +>>> y = sum(1 for f in fn.frames() for fe in f.FE.values() if fe.semType) +>>> y\-x > 1000 +True +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B semtype(key) +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> fn.semtype(233).name +\(aqTemperature\(aq +>>> fn.semtype(233).abbrev +\(aqTemp\(aq +>>> fn.semtype(\(aqTemperature\(aq).ID +233 +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBkey\fP (\fIstring\fP\fI or \fP\fIint\fP) \-\- The name, abbreviation, or id number of the semantic type +.TP +.B Returns +Information about a semantic type +.TP +.B Return type +dict +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B semtype_inherits(st, superST) +.UNINDENT +.INDENT 7.0 +.TP +.B semtypes() +Obtain a list of semantic types. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> stypes = fn.semtypes() +>>> len(stypes) in (73, 109) # FN 1.5 and 1.7, resp. +True +>>> sorted(stypes[0].keys()) +[\(aqID\(aq, \(aq_type\(aq, \(aqabbrev\(aq, \(aqdefinition\(aq, \(aqdefinitionMarkup\(aq, \(aqname\(aq, \(aqrootType\(aq, \(aqsubTypes\(aq, \(aqsuperType\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +A list of all of the semantic types in framenet +.TP +.B Return type +list(dict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(exemplars=True, full_text=True) +Annotated sentences matching the specified criteria. +.UNINDENT +.INDENT 7.0 +.TP +.B warnings(v) +Enable or disable warnings of data integrity issues as they are encountered. +If v is truthy, warnings will be enabled. +.sp +(This is a function rather than just an attribute/property to ensure that if +enabling warnings is the first action taken, the corpus reader is instantiated first.) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.corpus.reader.framenet.FramenetError +Bases: \fBException\fP +.sp +An exception class for framenet\-related errors. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.framenet.Future(loader, *args, **kwargs) +Bases: \fBobject\fP +.sp +Wraps and acts as a proxy for a value to be loaded lazily (on demand). +Adapted from \fI\%https://gist.github.com/sergey\-miryanov/2935416\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.framenet.PrettyDict(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.framenet.AttrDict\fP +.sp +Displays an abbreviated repr of values where possible. +Inherits from AttrDict, so a callable value will +be lazily converted to an actual value. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.framenet.PrettyLazyConcatenation(list_of_lists) +Bases: \fBnltk.collections.LazyConcatenation\fP +.sp +Displays an abbreviated repr of only the first several elements, not the whole list. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.framenet.PrettyLazyIteratorList(it, known_len=None) +Bases: \fBnltk.collections.LazyIteratorList\fP +.sp +Displays an abbreviated repr of only the first several elements, not the whole list. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.framenet.PrettyLazyMap(function, *lists, **config) +Bases: \fBnltk.collections.LazyMap\fP +.sp +Displays an abbreviated repr of only the first several elements, not the whole list. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.framenet.PrettyList(*args, **kwargs) +Bases: \fBlist\fP +.sp +Displays an abbreviated repr of only the first several elements, not the whole list. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.framenet.SpecialList(typ, *args, **kwargs) +Bases: \fBlist\fP +.sp +A list subclass which adds a \(aq_type\(aq attribute for special printing +(similar to an AttrDict, though this is NOT an AttrDict subclass). +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.framenet.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.framenet.mimic_wrap(lines, wrap_at=65, **kwargs) +Wrap the first of \(aqlines\(aq with textwrap and the remaining lines at exactly the same +positions as the first. +.UNINDENT +.SS nltk.corpus.reader.ieer module +.sp +Corpus reader for the Information Extraction and Entity Recognition Corpus. +.sp +NIST 1999 Information Extraction: Entity Recognition Evaluation +\fI\%http://www.itl.nist.gov/iad/894.01/tests/ie\-er/er_99/er_99.htm\fP +.sp +This corpus contains the NEWSWIRE development test data for the +NIST 1999 IE\-ER Evaluation. The files were taken from the +subdirectory: /ie_er_99/english/devtest/newswire/ +.nf +* +.fi +\&.ref.nwt +and filenames were shortened. +.sp +The corpus contains the following files: APW_19980314, APW_19980424, +APW_19980429, NYT_19980315, NYT_19980403, and NYT_19980407. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ieer.IEERCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B docs(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B parsed_docs(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ieer.IEERDocument(text, docno=None, doctype=None, date_time=None, headline=\(aq\(aq) +Bases: \fBobject\fP +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.ieer.documents = [\(aqAPW_19980314\(aq, \(aqAPW_19980424\(aq, \(aqAPW_19980429\(aq, \(aqNYT_19980315\(aq, \(aqNYT_19980403\(aq, \(aqNYT_19980407\(aq] +A list of all documents in this corpus. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.ieer.titles = {\(aqAPW_19980314\(aq: \(aqAssociated Press Weekly, 14 March 1998\(aq, \(aqAPW_19980424\(aq: \(aqAssociated Press Weekly, 24 April 1998\(aq, \(aqAPW_19980429\(aq: \(aqAssociated Press Weekly, 29 April 1998\(aq, \(aqNYT_19980315\(aq: \(aqNew York Times, 15 March 1998\(aq, \(aqNYT_19980403\(aq: \(aqNew York Times, 3 April 1998\(aq, \(aqNYT_19980407\(aq: \(aqNew York Times, 7 April 1998\(aq} +A dictionary whose keys are the names of documents in this corpus; +and whose values are descriptions of those documents\(aq contents. +.UNINDENT +.SS nltk.corpus.reader.indian module +.sp +Indian Language POS\-Tagged Corpus +Collected by A Kumaran, Microsoft Research, India +Distributed with permission +.INDENT 0.0 +.TP +.B Contents: +.INDENT 7.0 +.IP \(bu 2 +Bangla: IIT Kharagpur +.IP \(bu 2 +Hindi: Microsoft Research India +.IP \(bu 2 +Marathi: IIT Bombay +.IP \(bu 2 +Telugu: IIIT Hyderabad +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.indian.IndianCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +List of words, one per line. Blank lines are ignored. +.INDENT 7.0 +.TP +.B sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.indian.IndianCorpusView(corpus_file, encoding, tagged, group_by_sent, tag_mapping_function=None) +Bases: \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.INDENT 7.0 +.TP +.B read_block(stream) +Read a block from the input stream. +.INDENT 7.0 +.TP +.B Returns +a block of tokens from the input stream +.TP +.B Return type +list(any) +.TP +.B Parameters +\fBstream\fP (\fIstream\fP) \-\- an input stream +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.ipipan module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ipipan.IPIPANCorpusReader(root, fileids) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Corpus reader designed to work with corpus created by IPI PAN. +See \fI\%http://korpus.pl/en/\fP for more details about IPI PAN corpus. +.sp +The corpus includes information about text domain, channel and categories. +You can access possible values using \fBdomains()\fP, \fBchannels()\fP and +\fBcategories()\fP\&. You can use also this metadata to filter files, e.g.: +\fBfileids(channel=\(aqprasa\(aq)\fP, \fBfileids(categories=\(aqpublicystyczny\(aq)\fP\&. +.sp +The reader supports methods: words, sents, paras and their tagged versions. +You can get part of speech instead of full tag by giving "simplify_tags=True" +parameter, e.g.: \fBtagged_sents(simplify_tags=True)\fP\&. +.sp +Also you can get all tags disambiguated tags specifying parameter +"one_tag=False", e.g.: \fBtagged_paras(one_tag=False)\fP\&. +.sp +You can get all tags that were assigned by a morphological analyzer specifying +parameter "disamb_only=False", e.g. \fBtagged_words(disamb_only=False)\fP\&. +.sp +The IPIPAN Corpus contains tags indicating if there is a space between two +tokens. To add special "no space" markers, you should specify parameter +"append_no_space=True", e.g. \fBtagged_words(append_no_space=True)\fP\&. +As a result in place where there should be no space between two tokens new +pair (\(aq\(aq, \(aqno\-space\(aq) will be inserted (for tagged data) and just \(aq\(aq for +methods without tags. +.sp +The corpus reader can also try to append spaces between words. To enable this +option, specify parameter "append_space=True", e.g. \fBwords(append_space=True)\fP\&. +As a result either \(aq \(aq or (\(aq \(aq, \(aqspace\(aq) will be inserted between tokens. +.sp +By default, xml entities like " and & are replaced by corresponding +characters. You can turn off this feature, specifying parameter +"replace_xmlentities=False", e.g. \fBwords(replace_xmlentities=False)\fP\&. +.INDENT 7.0 +.TP +.B categories(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B channels(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B domains(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B fileids(channels=None, domains=None, categories=None) +Return a list of file identifiers for the fileids that make up +this corpus. +.UNINDENT +.INDENT 7.0 +.TP +.B paras(fileids=None, **kwargs) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, **kwargs) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None, **kwargs) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, **kwargs) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, **kwargs) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, **kwargs) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ipipan.IPIPANCorpusView(filename, startpos=0, **kwargs) +Bases: \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.INDENT 7.0 +.TP +.B PARAS_MODE = 2 +.UNINDENT +.INDENT 7.0 +.TP +.B SENTS_MODE = 1 +.UNINDENT +.INDENT 7.0 +.TP +.B WORDS_MODE = 0 +.UNINDENT +.INDENT 7.0 +.TP +.B read_block(stream) +Read a block from the input stream. +.INDENT 7.0 +.TP +.B Returns +a block of tokens from the input stream +.TP +.B Return type +list(any) +.TP +.B Parameters +\fBstream\fP (\fIstream\fP) \-\- an input stream +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.knbc module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.knbc.KNBCorpusReader(root, fileids, encoding=\(aqutf8\(aq, morphs2str=>) +Bases: \fI\%nltk.corpus.reader.api.SyntaxCorpusReader\fP +.INDENT 7.0 +.TP +.B This class implements: +.INDENT 7.0 +.IP \(bu 2 +\fB__init__\fP, which specifies the location of the corpus +and a method for detecting the sentence blocks in corpus files. +.IP \(bu 2 +\fB_read_block\fP, which reads a block from the input stream. +.IP \(bu 2 +\fB_word\fP, which takes a block and returns a list of list of words. +.IP \(bu 2 +\fB_tag\fP, which takes a block and returns a list of list of tagged +words. +.IP \(bu 2 +\fB_parse\fP, which takes a block and returns a list of parsed +sentences. +.UNINDENT +.TP +.B The structure of tagged words: +tagged_word = (word(str), tags(tuple)) +tags = (surface, reading, lemma, pos1, posid1, pos2, posid2, pos3, posid3, others ...) +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.corpus.util import LazyCorpusLoader +>>> knbc = LazyCorpusLoader( +\&... \(aqknbc/corpus1\(aq, +\&... KNBCorpusReader, +\&... r\(aq.*/KN.*\(aq, +\&... encoding=\(aqeuc\-jp\(aq, +\&... ) +.ft P +.fi +.sp +.nf +.ft C +>>> len(knbc.sents()[0]) +9 +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.knbc.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.knbc.test() +.UNINDENT +.SS nltk.corpus.reader.lin module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.lin.LinThesaurusCorpusReader(root, badscore=0.0) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Wrapper for the LISP\-formatted thesauruses distributed by Dekang Lin. +.INDENT 7.0 +.TP +.B scored_synonyms(ngram, fileid=None) +Returns a list of scored synonyms (tuples of synonyms and scores) for the current ngram +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBngram\fP (\fIC{string}\fP) \-\- ngram to lookup +.IP \(bu 2 +\fBfileid\fP (\fIC{string}\fP) \-\- thesaurus fileid to search in. If None, search all fileids. +.UNINDENT +.TP +.B Returns +If fileid is specified, list of tuples of scores and synonyms; otherwise, +list of tuples of fileids and lists, where inner lists consist of tuples of +scores and synonyms. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B similarity(ngram1, ngram2, fileid=None) +Returns the similarity score for two ngrams. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBngram1\fP (\fIC{string}\fP) \-\- first ngram to compare +.IP \(bu 2 +\fBngram2\fP (\fIC{string}\fP) \-\- second ngram to compare +.IP \(bu 2 +\fBfileid\fP (\fIC{string}\fP) \-\- thesaurus fileid to search in. If None, search all fileids. +.UNINDENT +.TP +.B Returns +If fileid is specified, just the score for the two ngrams; otherwise, +list of tuples of fileids and scores. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B synonyms(ngram, fileid=None) +Returns a list of synonyms for the current ngram. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBngram\fP (\fIC{string}\fP) \-\- ngram to lookup +.IP \(bu 2 +\fBfileid\fP (\fIC{string}\fP) \-\- thesaurus fileid to search in. If None, search all fileids. +.UNINDENT +.TP +.B Returns +If fileid is specified, list of synonyms; otherwise, list of tuples of fileids and +lists, where inner lists contain synonyms. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.lin.demo() +.UNINDENT +.SS nltk.corpus.reader.mte module +.sp +A reader for corpora whose documents are in MTE format. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.mte.MTECorpusReader(root=None, fileids=None, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.tagged.TaggedCorpusReader\fP +.sp +Reader for corpora following the TEI\-p5 xml scheme, such as MULTEXT\-East. +MULTEXT\-East contains part\-of\-speech\-tagged words with a quite precise tagging +scheme. These tags can be converted to the Universal tagset +.INDENT 7.0 +.TP +.B lemma_paras(fileids=None) +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.TP +.B Returns +the given file(s) as a list of paragraphs, each encoded as a +list of sentences, which are in turn encoded as a list of +tuples of the word and the corresponding lemma (word, lemma) +.TP +.B Return type +list(List(List(tuple(str, str)))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lemma_sents(fileids=None) +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.TP +.B Returns +the given file(s) as a list of sentences or utterances, each +encoded as a list of tuples of the word and the corresponding +lemma (word, lemma) +.TP +.B Return type +list(list(tuple(str, str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lemma_words(fileids=None) +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.TP +.B Returns +the given file(s) as a list of words, the corresponding lemmas +and punctuation symbols, encoded as tuples (word, lemma) +.TP +.B Return type +list(tuple(str,str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B paras(fileids=None) +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.TP +.B Returns +the given file(s) as a list of paragraphs, each encoded as a list +of sentences, which are in turn encoded as lists of word string +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.TP +.B Returns +the given file(s) as a list of sentences or utterances, +each encoded as a list of word strings +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None, tagset=\(aqmsd\(aq, tags=\(aq\(aq) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.IP \(bu 2 +\fBtagset\fP \-\- The tagset that should be used in the returned object, +either "universal" or "msd", "msd" is the default +.IP \(bu 2 +\fBtags\fP \-\- An MSD Tag that is used to filter all parts of the used corpus +that are not more precise or at least equal to the given tag +.UNINDENT +.TP +.B Returns +the given file(s) as a list of paragraphs, each encoded as a +list of sentences, which are in turn encoded as a list +of (word,tag) tuples +.TP +.B Return type +list(list(list(tuple(str, str)))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, tagset=\(aqmsd\(aq, tags=\(aq\(aq) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.IP \(bu 2 +\fBtagset\fP \-\- The tagset that should be used in the returned object, +either "universal" or "msd", "msd" is the default +.IP \(bu 2 +\fBtags\fP \-\- An MSD Tag that is used to filter all parts of the used corpus +that are not more precise or at least equal to the given tag +.UNINDENT +.TP +.B Returns +the given file(s) as a list of sentences or utterances, each +each encoded as a list of (word,tag) tuples +.TP +.B Return type +list(list(tuple(str, str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, tagset=\(aqmsd\(aq, tags=\(aq\(aq) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.IP \(bu 2 +\fBtagset\fP \-\- The tagset that should be used in the returned object, +either "universal" or "msd", "msd" is the default +.IP \(bu 2 +\fBtags\fP \-\- An MSD Tag that is used to filter all parts of the used corpus +that are not more precise or at least equal to the given tag +.UNINDENT +.TP +.B Returns +the given file(s) as a list of tagged words and punctuation symbols +encoded as tuples (word, tag) +.TP +.B Return type +list(tuple(str, str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.TP +.B Returns +the given file(s) as a list of words and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.mte.MTECorpusView(fileid, tagspec, elt_handler=None) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusView\fP +.sp +Class for lazy viewing the MTE Corpus. +.INDENT 7.0 +.TP +.B read_block(stream, tagspec=None, elt_handler=None) +Read from \fBstream\fP until we find at least one element that +matches \fBtagspec\fP, and return the result of applying +\fBelt_handler\fP to each element found. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.mte.MTEFileReader(file_path) +Bases: \fBobject\fP +.sp +Class for loading the content of the multext\-east corpus. It +parses the xml files and does some tag\-filtering depending on the +given method parameters. +.INDENT 7.0 +.TP +.B lemma_paras() +.UNINDENT +.INDENT 7.0 +.TP +.B lemma_sents() +.UNINDENT +.INDENT 7.0 +.TP +.B lemma_words() +.UNINDENT +.INDENT 7.0 +.TP +.B ns = {\(aqtei\(aq: \(aqhttp://www.tei\-c.org/ns/1.0\(aq, \(aqxml\(aq: \(aqhttp://www.w3.org/XML/1998/namespace\(aq} +.UNINDENT +.INDENT 7.0 +.TP +.B para_path = \(aqTEI/text/body/div/div/p\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B paras() +.UNINDENT +.INDENT 7.0 +.TP +.B sent_path = \(aqTEI/text/body/div/div/p/s\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B sents() +.UNINDENT +.INDENT 7.0 +.TP +.B tag_ns = \(aq{http://www.tei\-c.org/ns/1.0}\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(tagset, tags) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(tagset, tags) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(tagset, tags) +.UNINDENT +.INDENT 7.0 +.TP +.B word_path = \(aqTEI/text/body/div/div/p/s/(w|c)\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B words() +.UNINDENT +.INDENT 7.0 +.TP +.B xml_ns = \(aq{http://www.w3.org/XML/1998/namespace}\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.mte.MTETagConverter +Bases: \fBobject\fP +.sp +Class for converting msd tags to universal tags, more conversion +options are currently not implemented. +.INDENT 7.0 +.TP +.B mapping_msd_universal = {\(aq\-\(aq: \(aqX\(aq, \(aq.\(aq: \(aq.\(aq, \(aqA\(aq: \(aqADJ\(aq, \(aqC\(aq: \(aqCONJ\(aq, \(aqD\(aq: \(aqDET\(aq, \(aqM\(aq: \(aqNUM\(aq, \(aqN\(aq: \(aqNOUN\(aq, \(aqP\(aq: \(aqPRON\(aq, \(aqQ\(aq: \(aqPRT\(aq, \(aqR\(aq: \(aqADV\(aq, \(aqS\(aq: \(aqADP\(aq, \(aqV\(aq: \(aqVERB\(aq} +.UNINDENT +.INDENT 7.0 +.TP +.B static msd_to_universal(tag) +This function converts the annotation from the Multex\-East to the universal tagset +as described in Chapter 5 of the NLTK\-Book +.sp +Unknown Tags will be mapped to X. Punctuation marks are not supported in MSD tags, so +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.mte.xpath(root, path, ns) +.UNINDENT +.SS nltk.corpus.reader.nkjp module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.nkjp.NKJPCorpusReader(root, fileids=\(aq.*\(aq) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.INDENT 7.0 +.TP +.B HEADER_MODE = 2 +.UNINDENT +.INDENT 7.0 +.TP +.B RAW_MODE = 3 +.UNINDENT +.INDENT 7.0 +.TP +.B SENTS_MODE = 1 +.UNINDENT +.INDENT 7.0 +.TP +.B WORDS_MODE = 0 +.UNINDENT +.INDENT 7.0 +.TP +.B add_root(fileid) +Add root if necessary to specified fileid. +.UNINDENT +.INDENT 7.0 +.TP +.B fileids() +Returns a list of file identifiers for the fileids that make up +this corpus. +.UNINDENT +.INDENT 7.0 +.TP +.B get_paths() +.UNINDENT +.INDENT 7.0 +.TP +.B header(fileids=None, **kwargs) +Returns header(s) of specified fileids. +.UNINDENT +.INDENT 7.0 +.TP +.B raw(fileids=None, **kwargs) +Returns words in specified fileids. +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, **kwargs) +Returns sentences in specified fileids. +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, **kwargs) +Call with specified tags as a list, e.g. tags=[\(aqsubst\(aq, \(aqcomp\(aq]. +Returns tagged words in specified fileids. +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, **kwargs) +Returns words in specified fileids. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.nkjp.NKJPCorpus_Header_View(filename, **kwargs) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusView\fP +.INDENT 7.0 +.TP +.B handle_elt(elt, context) +Convert an element into an appropriate value for inclusion in +the view. Unless overridden by a subclass or by the +\fBelt_handler\fP constructor argument, this method simply +returns \fBelt\fP\&. +.INDENT 7.0 +.TP +.B Returns +The view value corresponding to \fBelt\fP\&. +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBelt\fP (\fIElementTree\fP) \-\- The element that should be converted. +.IP \(bu 2 +\fBcontext\fP (\fIstr\fP) \-\- A string composed of element tags separated by +forward slashes, indicating the XML context of the given +element. For example, the string \fB\(aqfoo/bar/baz\(aq\fP +indicates that the element is a \fBbaz\fP element whose +parent is a \fBbar\fP element and whose grandparent is a +top\-level \fBfoo\fP element. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B handle_query() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.nkjp.NKJPCorpus_Morph_View(filename, **kwargs) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusView\fP +.sp +A stream backed corpus view specialized for use with +ann_morphosyntax.xml files in NKJP corpus. +.INDENT 7.0 +.TP +.B handle_elt(elt, context) +Convert an element into an appropriate value for inclusion in +the view. Unless overridden by a subclass or by the +\fBelt_handler\fP constructor argument, this method simply +returns \fBelt\fP\&. +.INDENT 7.0 +.TP +.B Returns +The view value corresponding to \fBelt\fP\&. +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBelt\fP (\fIElementTree\fP) \-\- The element that should be converted. +.IP \(bu 2 +\fBcontext\fP (\fIstr\fP) \-\- A string composed of element tags separated by +forward slashes, indicating the XML context of the given +element. For example, the string \fB\(aqfoo/bar/baz\(aq\fP +indicates that the element is a \fBbaz\fP element whose +parent is a \fBbar\fP element and whose grandparent is a +top\-level \fBfoo\fP element. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B handle_query() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.nkjp.NKJPCorpus_Segmentation_View(filename, **kwargs) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusView\fP +.sp +A stream backed corpus view specialized for use with +ann_segmentation.xml files in NKJP corpus. +.INDENT 7.0 +.TP +.B get_segm_id(example_word) +.UNINDENT +.INDENT 7.0 +.TP +.B get_sent_beg(beg_word) +.UNINDENT +.INDENT 7.0 +.TP +.B get_sent_end(end_word) +.UNINDENT +.INDENT 7.0 +.TP +.B get_sentences(sent_segm) +.UNINDENT +.INDENT 7.0 +.TP +.B handle_elt(elt, context) +Convert an element into an appropriate value for inclusion in +the view. Unless overridden by a subclass or by the +\fBelt_handler\fP constructor argument, this method simply +returns \fBelt\fP\&. +.INDENT 7.0 +.TP +.B Returns +The view value corresponding to \fBelt\fP\&. +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBelt\fP (\fIElementTree\fP) \-\- The element that should be converted. +.IP \(bu 2 +\fBcontext\fP (\fIstr\fP) \-\- A string composed of element tags separated by +forward slashes, indicating the XML context of the given +element. For example, the string \fB\(aqfoo/bar/baz\(aq\fP +indicates that the element is a \fBbaz\fP element whose +parent is a \fBbar\fP element and whose grandparent is a +top\-level \fBfoo\fP element. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B handle_query() +.UNINDENT +.INDENT 7.0 +.TP +.B remove_choice(segm) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.nkjp.NKJPCorpus_Text_View(filename, **kwargs) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusView\fP +.sp +A stream backed corpus view specialized for use with +text.xml files in NKJP corpus. +.INDENT 7.0 +.TP +.B RAW_MODE = 1 +.UNINDENT +.INDENT 7.0 +.TP +.B SENTS_MODE = 0 +.UNINDENT +.INDENT 7.0 +.TP +.B get_segm_id(elt) +.UNINDENT +.INDENT 7.0 +.TP +.B handle_elt(elt, context) +Convert an element into an appropriate value for inclusion in +the view. Unless overridden by a subclass or by the +\fBelt_handler\fP constructor argument, this method simply +returns \fBelt\fP\&. +.INDENT 7.0 +.TP +.B Returns +The view value corresponding to \fBelt\fP\&. +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBelt\fP (\fIElementTree\fP) \-\- The element that should be converted. +.IP \(bu 2 +\fBcontext\fP (\fIstr\fP) \-\- A string composed of element tags separated by +forward slashes, indicating the XML context of the given +element. For example, the string \fB\(aqfoo/bar/baz\(aq\fP +indicates that the element is a \fBbaz\fP element whose +parent is a \fBbar\fP element and whose grandparent is a +top\-level \fBfoo\fP element. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B handle_query() +.UNINDENT +.INDENT 7.0 +.TP +.B read_block(stream, tagspec=None, elt_handler=None) +Returns text as a list of sentences. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.nkjp.XML_Tool(root, filename) +Bases: \fBobject\fP +.sp +Helper class creating xml file to one without references to nkjp: namespace. +That\(aqs needed because the XMLCorpusView assumes that one can find short substrings +of XML that are valid XML, which is not true if a namespace is declared at top level +.INDENT 7.0 +.TP +.B build_preprocessed_file() +.UNINDENT +.INDENT 7.0 +.TP +.B remove_preprocessed_file() +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.nombank module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.nombank.NombankChainTreePointer(pieces) +Bases: \fI\%nltk.corpus.reader.nombank.NombankPointer\fP +.INDENT 7.0 +.TP +.B pieces +A list of the pieces that make up this chain. Elements may +be either \fBNombankSplitTreePointer\fP or +\fBNombankTreePointer\fP pointers. +.UNINDENT +.INDENT 7.0 +.TP +.B select(tree) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.nombank.NombankCorpusReader(root, nomfile, framefiles=\(aq\(aq, nounsfile=None, parse_fileid_xform=None, parse_corpus=None, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Corpus reader for the nombank corpus, which augments the Penn +Treebank with information about the predicate argument structure +of every noun instance. The corpus consists of two parts: the +predicate\-argument annotations themselves, and a set of "frameset +files" which define the argument labels used by the annotations, +on a per\-noun basis. Each "frameset file" contains one or more +predicates, such as \fB\(aqturn\(aq\fP or \fB\(aqturn_on\(aq\fP, each of which is +divided into coarse\-grained word senses called "rolesets". For +each "roleset", the frameset file provides descriptions of the +argument roles, along with examples. +.INDENT 7.0 +.TP +.B instances(baseform=None) +.INDENT 7.0 +.TP +.B Returns +a corpus view that acts as a list of +.UNINDENT +.sp +\fBNombankInstance\fP objects, one for each noun in the corpus. +.UNINDENT +.INDENT 7.0 +.TP +.B lines() +.INDENT 7.0 +.TP +.B Returns +a corpus view that acts as a list of strings, one for +.UNINDENT +.sp +each line in the predicate\-argument annotation file. +.UNINDENT +.INDENT 7.0 +.TP +.B nouns() +.INDENT 7.0 +.TP +.B Returns +a corpus view that acts as a list of all noun lemmas +.UNINDENT +.sp +in this corpus (from the nombank.1.0.words file). +.UNINDENT +.INDENT 7.0 +.TP +.B roleset(roleset_id) +.INDENT 7.0 +.TP +.B Returns +the xml description for the given roleset. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B rolesets(baseform=None) +.INDENT 7.0 +.TP +.B Returns +list of xml descriptions for rolesets. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.nombank.NombankInstance(fileid, sentnum, wordnum, baseform, sensenumber, predicate, predid, arguments, parse_corpus=None) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B arguments +A list of tuples (argloc, argid), specifying the location +and identifier for each of the predicate\(aqs argument in the +containing sentence. Argument identifiers are strings such as +\fB\(aqARG0\(aq\fP or \fB\(aqARGM\-TMP\(aq\fP\&. This list does \fInot\fP contain +the predicate. +.UNINDENT +.INDENT 7.0 +.TP +.B baseform +The baseform of the predicate. +.UNINDENT +.INDENT 7.0 +.TP +.B fileid +The name of the file containing the parse tree for this +instance\(aqs sentence. +.UNINDENT +.INDENT 7.0 +.TP +.B static parse(s, parse_fileid_xform=None, parse_corpus=None) +.UNINDENT +.INDENT 7.0 +.TP +.B parse_corpus +A corpus reader for the parse trees corresponding to the +instances in this nombank corpus. +.UNINDENT +.INDENT 7.0 +.TP +.B predicate +A \fBNombankTreePointer\fP indicating the position of this +instance\(aqs predicate within its containing sentence. +.UNINDENT +.INDENT 7.0 +.TP +.B predid +Identifier of the predicate. +.UNINDENT +.INDENT 7.0 +.TP +.B property roleset +The name of the roleset used by this instance\(aqs predicate. +Use \fBnombank.roleset() \fP to +look up information about the roleset. +.UNINDENT +.INDENT 7.0 +.TP +.B sensenumber +The sense number of the predicate. +.UNINDENT +.INDENT 7.0 +.TP +.B sentnum +The sentence number of this sentence within \fBfileid\fP\&. +Indexing starts from zero. +.UNINDENT +.INDENT 7.0 +.TP +.B property tree +The parse tree corresponding to this instance, or None if +the corresponding tree is not available. +.UNINDENT +.INDENT 7.0 +.TP +.B wordnum +The word number of this instance\(aqs predicate within its +containing sentence. Word numbers are indexed starting from +zero, and include traces and other empty parse elements. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.nombank.NombankPointer +Bases: \fBobject\fP +.sp +A pointer used by nombank to identify one or more constituents in +a parse tree. \fBNombankPointer\fP is an abstract base class with +three concrete subclasses: +.INDENT 7.0 +.IP \(bu 2 +\fBNombankTreePointer\fP is used to point to single constituents. +.IP \(bu 2 +\fBNombankSplitTreePointer\fP is used to point to \(aqsplit\(aq +constituents, which consist of a sequence of two or more +\fBNombankTreePointer\fP pointers. +.IP \(bu 2 +\fBNombankChainTreePointer\fP is used to point to entire trace +chains in a tree. It consists of a sequence of pieces, which +can be \fBNombankTreePointer\fP or \fBNombankSplitTreePointer\fP pointers. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.nombank.NombankSplitTreePointer(pieces) +Bases: \fI\%nltk.corpus.reader.nombank.NombankPointer\fP +.INDENT 7.0 +.TP +.B pieces +A list of the pieces that make up this chain. Elements are +all \fBNombankTreePointer\fP pointers. +.UNINDENT +.INDENT 7.0 +.TP +.B select(tree) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.nombank.NombankTreePointer(wordnum, height) +Bases: \fI\%nltk.corpus.reader.nombank.NombankPointer\fP +.sp +wordnum:height*wordnum:height*... +wordnum:height, +.INDENT 7.0 +.TP +.B static parse(s) +.UNINDENT +.INDENT 7.0 +.TP +.B select(tree) +.UNINDENT +.INDENT 7.0 +.TP +.B treepos(tree) +Convert this pointer to a standard \(aqtree position\(aq pointer, +given that it points to the given tree. +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.nps_chat module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.nps_chat.NPSChatCorpusReader(root, fileids, wrap_etree=False, tagset=None) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.INDENT 7.0 +.TP +.B posts(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_posts(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +Returns all of the words and punctuation symbols in the specified file +that were in text nodes \-\- ie, tags are ignored. Like the xml() method, +fileid can only specify one file. +.INDENT 7.0 +.TP +.B Returns +the given file\(aqs text nodes as a list of words and punctuation symbols +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B xml_posts(fileids=None) +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.opinion_lexicon module +.sp +CorpusReader for the Opinion Lexicon. +.INDENT 0.0 +.IP \(bu 2 +Opinion Lexicon information \- +.UNINDENT +.INDENT 0.0 +.TP +.B Authors: Minqing Hu and Bing Liu, 2004. +Department of Computer Sicence +University of Illinois at Chicago +.TP +.B Contact: Bing Liu, \fI\%liub@cs.uic.edu\fP +\fI\%http://www.cs.uic.edu/~liub\fP +.UNINDENT +.sp +Distributed with permission. +.sp +Related papers: +\- Minqing Hu and Bing Liu. "Mining and summarizing customer reviews". +.INDENT 0.0 +.INDENT 3.5 +Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery +& Data Mining (KDD\-04), Aug 22\-25, 2004, Seattle, Washington, USA. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Bing Liu, Minqing Hu and Junsheng Cheng. "Opinion Observer: Analyzing and +Comparing Opinions on the Web". Proceedings of the 14th International World +Wide Web conference (WWW\-2005), May 10\-14, 2005, Chiba, Japan. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.opinion_lexicon.IgnoreReadmeCorpusView(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.sp +This CorpusView is used to skip the initial readme block of the corpus. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.opinion_lexicon.OpinionLexiconCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.wordlist.WordListCorpusReader\fP +.sp +Reader for Liu and Hu opinion lexicon. Blank lines and readme are ignored. +.sp +.nf +.ft C +>>> from nltk.corpus import opinion_lexicon +>>> opinion_lexicon.words() +[\(aq2\-faced\(aq, \(aq2\-faces\(aq, \(aqabnormal\(aq, \(aqabolish\(aq, ...] +.ft P +.fi +.sp +The OpinionLexiconCorpusReader provides shortcuts to retrieve positive/negative +words: +.sp +.nf +.ft C +>>> opinion_lexicon.negative() +[\(aq2\-faced\(aq, \(aq2\-faces\(aq, \(aqabnormal\(aq, \(aqabolish\(aq, ...] +.ft P +.fi +.sp +Note that words from \fIwords()\fP method are sorted by file id, not alphabetically: +.sp +.nf +.ft C +>>> opinion_lexicon.words()[0:10] +[\(aq2\-faced\(aq, \(aq2\-faces\(aq, \(aqabnormal\(aq, \(aqabolish\(aq, \(aqabominable\(aq, \(aqabominably\(aq, +\(aqabominate\(aq, \(aqabomination\(aq, \(aqabort\(aq, \(aqaborted\(aq] +>>> sorted(opinion_lexicon.words())[0:10] +[\(aq2\-faced\(aq, \(aq2\-faces\(aq, \(aqa+\(aq, \(aqabnormal\(aq, \(aqabolish\(aq, \(aqabominable\(aq, \(aqabominably\(aq, +\(aqabominate\(aq, \(aqabomination\(aq, \(aqabort\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B CorpusView +alias of \fI\%nltk.corpus.reader.opinion_lexicon.IgnoreReadmeCorpusView\fP +.UNINDENT +.INDENT 7.0 +.TP +.B negative() +Return all negative words in alphabetical order. +.INDENT 7.0 +.TP +.B Returns +a list of negative words. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B positive() +Return all positive words in alphabetical order. +.INDENT 7.0 +.TP +.B Returns +a list of positive words. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +Return all words in the opinion lexicon. Note that these words are not +sorted in alphabetical order. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +words have to be returned. +.TP +.B Returns +the given file(s) as a list of words and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.panlex_lite module +.sp +CorpusReader for PanLex Lite, a stripped down version of PanLex distributed +as an SQLite database. See the README.txt in the panlex_lite corpus directory +for more information on PanLex Lite. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.panlex_lite.Meaning(mn, attr) +Bases: \fBdict\fP +.sp +Represents a single PanLex meaning. A meaning is a translation set derived +from a single source. +.INDENT 7.0 +.TP +.B expressions() +.INDENT 7.0 +.TP +.B Returns +the meaning\(aqs expressions as a dictionary whose keys are language +variety uniform identifiers and whose values are lists of expression +texts. +.TP +.B Return type +dict +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B id() +.INDENT 7.0 +.TP +.B Returns +the meaning\(aqs id. +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B quality() +.INDENT 7.0 +.TP +.B Returns +the meaning\(aqs source\(aqs quality (0=worst, 9=best). +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B source() +.INDENT 7.0 +.TP +.B Returns +the meaning\(aqs source id. +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B source_group() +.INDENT 7.0 +.TP +.B Returns +the meaning\(aqs source group id. +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.panlex_lite.PanLexLiteCorpusReader(root) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B MEANING_Q = \(aq\en SELECT dnx2.mn, dnx2.uq, dnx2.ap, dnx2.ui, ex2.tt, ex2.lv\en FROM dnx\en JOIN ex ON (ex.ex = dnx.ex)\en JOIN dnx dnx2 ON (dnx2.mn = dnx.mn)\en JOIN ex ex2 ON (ex2.ex = dnx2.ex)\en WHERE dnx.ex != dnx2.ex AND ex.tt = ? AND ex.lv = ?\en ORDER BY dnx2.uq DESC\en \(aq +.UNINDENT +.INDENT 7.0 +.TP +.B TRANSLATION_Q = \(aq\en SELECT s.tt, sum(s.uq) AS trq FROM (\en SELECT ex2.tt, max(dnx.uq) AS uq\en FROM dnx\en JOIN ex ON (ex.ex = dnx.ex)\en JOIN dnx dnx2 ON (dnx2.mn = dnx.mn)\en JOIN ex ex2 ON (ex2.ex = dnx2.ex)\en WHERE dnx.ex != dnx2.ex AND ex.lv = ? AND ex.tt = ? AND ex2.lv = ?\en GROUP BY ex2.tt, dnx.ui\en ) s\en GROUP BY s.tt\en ORDER BY trq DESC, s.tt\en \(aq +.UNINDENT +.INDENT 7.0 +.TP +.B language_varieties(lc=None) +Return a list of PanLex language varieties. +.INDENT 7.0 +.TP +.B Parameters +\fBlc\fP \-\- ISO 639 alpha\-3 code. If specified, filters returned varieties +by this code. If unspecified, all varieties are returned. +.TP +.B Returns +the specified language varieties as a list of tuples. The first +element is the language variety\(aqs seven\-character uniform identifier, +and the second element is its default name. +.TP +.B Return type +list(tuple) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B meanings(expr_uid, expr_tt) +Return a list of meanings for an expression. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBexpr_uid\fP \-\- the expression\(aqs language variety, as a seven\-character +uniform identifier. +.IP \(bu 2 +\fBexpr_tt\fP \-\- the expression\(aqs text. +.UNINDENT +.TP +.B Returns +a list of Meaning objects. +.TP +.B Return type +list(Meaning) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B translations(from_uid, from_tt, to_uid) +.INDENT 7.0 +.TP +.B Return a list of translations for an expression into a single language +variety. +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfrom_uid\fP \-\- the source expression\(aqs language variety, as a +seven\-character uniform identifier. +.IP \(bu 2 +\fBfrom_tt\fP \-\- the source expression\(aqs text. +.IP \(bu 2 +\fBto_uid\fP \-\- the target language variety, as a seven\-character +uniform identifier. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B :return a list of translation tuples. The first element is the expression +text and the second element is the translation quality. +.UNINDENT +.INDENT 7.0 +.TP +.B Return type +list(tuple) +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.panlex_swadesh module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.panlex_swadesh.PanlexLanguage(panlex_uid, iso639, iso639_type, script, name, langvar_uid) +Bases: \fBtuple\fP +.INDENT 7.0 +.TP +.B iso639 +Alias for field number 1 +.UNINDENT +.INDENT 7.0 +.TP +.B iso639_type +Alias for field number 2 +.UNINDENT +.INDENT 7.0 +.TP +.B langvar_uid +Alias for field number 5 +.UNINDENT +.INDENT 7.0 +.TP +.B name +Alias for field number 4 +.UNINDENT +.INDENT 7.0 +.TP +.B panlex_uid +Alias for field number 0 +.UNINDENT +.INDENT 7.0 +.TP +.B script +Alias for field number 3 +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.panlex_swadesh.PanlexSwadeshCorpusReader(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.wordlist.WordListCorpusReader\fP +.sp +This is a class to read the PanLex Swadesh list from +.sp +David Kamholz, Jonathan Pool, and Susan M. Colowick (2014). +PanLex: Building a Resource for Panlingual Lexical Translation. +In LREC. \fI\%http://www.lrec\-conf.org/proceedings/lrec2014/pdf/1029_Paper.pdf\fP +.sp +License: CC0 1.0 Universal +\fI\%https://creativecommons.org/publicdomain/zero/1.0/legalcode\fP +.INDENT 7.0 +.TP +.B entries(fileids=None) +.INDENT 7.0 +.TP +.B Returns +a tuple of words for the specified fileids. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B get_languages() +.UNINDENT +.INDENT 7.0 +.TP +.B get_macrolanguages() +.UNINDENT +.INDENT 7.0 +.TP +.B language_codes() +.UNINDENT +.INDENT 7.0 +.TP +.B license() +Return the contents of the corpus LICENSE file, if it exists. +.UNINDENT +.INDENT 7.0 +.TP +.B words_by_iso639(iso63_code) +.INDENT 7.0 +.TP +.B Returns +a list of list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words_by_lang(lang_code) +.INDENT 7.0 +.TP +.B Returns +a list of list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.pl196x module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.pl196x.Pl196xCorpusReader(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.api.CategorizedCorpusReader\fP, \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.INDENT 7.0 +.TP +.B decode_tag(tag) +.UNINDENT +.INDENT 7.0 +.TP +.B head_len = 2770 +.UNINDENT +.INDENT 7.0 +.TP +.B paras(fileids=None, categories=None, textids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, categories=None, textids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None, categories=None, textids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, categories=None, textids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, categories=None, textids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B textids(fileids=None, categories=None) +In the pl196x corpus each category is stored in single +file and thus both methods provide identical functionality. In order +to accommodate finer granularity, a non\-standard textids() method was +implemented. All the main functions can be supplied with a list +of required chunks\-\-\-giving much more control to the user. +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, categories=None, textids=None) +Returns all of the words and punctuation symbols in the specified file +that were in text nodes \-\- ie, tags are ignored. Like the xml() method, +fileid can only specify one file. +.INDENT 7.0 +.TP +.B Returns +the given file\(aqs text nodes as a list of words and punctuation symbols +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B xml(fileids=None, categories=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.pl196x.TEICorpusView(corpus_file, tagged, group_by_sent, group_by_para, tagset=None, head_len=0, textids=None) +Bases: \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.INDENT 7.0 +.TP +.B read_block(stream) +Read a block from the input stream. +.INDENT 7.0 +.TP +.B Returns +a block of tokens from the input stream +.TP +.B Return type +list(any) +.TP +.B Parameters +\fBstream\fP (\fIstream\fP) \-\- an input stream +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.plaintext module +.sp +A reader for corpora that consist of plaintext documents. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.plaintext.CategorizedPlaintextCorpusReader(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.api.CategorizedCorpusReader\fP, \fI\%nltk.corpus.reader.plaintext.PlaintextCorpusReader\fP +.sp +A reader for plaintext corpora whose documents are divided into +categories based on their file identifiers. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.plaintext.EuroparlCorpusReader(root, fileids, word_tokenizer=WordPunctTokenizer(pattern=\(aq\e\ew+|[^\e\ew\e\es]+\(aq, gaps=False, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), sent_tokenizer=, para_block_reader=, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.plaintext.PlaintextCorpusReader\fP +.sp +Reader for Europarl corpora that consist of plaintext documents. +Documents are divided into chapters instead of paragraphs as +for regular plaintext documents. Chapters are separated using blank +lines. Everything is inherited from \fBPlaintextCorpusReader\fP except +that: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +Since the corpus is pre\-processed and pre\-tokenized, the +word tokenizer should just split the line at whitespaces. +.IP \(bu 2 +For the same reason, the sentence tokenizer should just +split the paragraph at line breaks. +.IP \(bu 2 +There is a new \(aqchapters()\(aq method that returns chapters instead +instead of paragraphs. +.IP \(bu 2 +The \(aqparas()\(aq method inherited from PlaintextCorpusReader is +made non\-functional to remove any confusion between chapters +and paragraphs for Europarl. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B chapters(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +chapters, each encoded as a list of sentences, which are +in turn encoded as lists of word strings. +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B paras(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of word strings. +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.plaintext.PlaintextCorpusReader(root, fileids, word_tokenizer=WordPunctTokenizer(pattern=\(aq\e\ew+|[^\e\ew\e\es]+\(aq, gaps=False, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), sent_tokenizer=, para_block_reader=, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for corpora that consist of plaintext documents. Paragraphs +are assumed to be split using blank lines. Sentences and words can +be tokenized using the default tokenizers, or by custom tokenizers +specified as parameters to the constructor. +.sp +This corpus reader can be customized (e.g., to skip preface +sections of specific document formats) by creating a subclass and +overriding the \fBCorpusView\fP class variable. +.INDENT 7.0 +.TP +.B CorpusView +alias of \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.UNINDENT +.INDENT 7.0 +.TP +.B paras(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of word strings. +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences or utterances, each encoded as a list of word +strings. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of words +and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.plaintext.PortugueseCategorizedPlaintextCorpusReader(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.plaintext.CategorizedPlaintextCorpusReader\fP +.UNINDENT +.SS nltk.corpus.reader.ppattach module +.sp +Read lines from the Prepositional Phrase Attachment Corpus. +.sp +The PP Attachment Corpus contains several files having the format: +.sp +sentence_id verb noun1 preposition noun2 attachment +.sp +For example: +.sp +42960 gives authority to administration V +46742 gives inventors of microchip N +.sp +The PP attachment is to the verb phrase (V) or noun phrase (N), i.e.: +.sp +(VP gives (NP authority) (PP to administration)) +(VP gives (NP inventors (PP of microchip))) +.sp +The corpus contains the following files: +.sp +training: training set +devset: development test set, used for algorithm development. +test: test set, used to report results +bitstrings: word classes derived from Mutual Information Clustering for the Wall Street Journal. +.sp +Ratnaparkhi, Adwait (1994). A Maximum Entropy Model for Prepositional +Phrase Attachment. Proceedings of the ARPA Human Language Technology +Conference. [\fI\%http://www.cis.upenn.edu/~adwait/papers/hlt94.ps\fP] +.sp +The PP Attachment Corpus is distributed with NLTK with the permission +of the author. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ppattach.PPAttachment(sent, verb, noun1, prep, noun2, attachment) +Bases: \fBobject\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ppattach.PPAttachmentCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +sentence_id verb noun1 preposition noun2 attachment +.INDENT 7.0 +.TP +.B attachments(fileids) +.UNINDENT +.INDENT 7.0 +.TP +.B tuples(fileids) +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.propbank module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.propbank.PropbankChainTreePointer(pieces) +Bases: \fI\%nltk.corpus.reader.propbank.PropbankPointer\fP +.INDENT 7.0 +.TP +.B pieces +A list of the pieces that make up this chain. Elements may +be either \fBPropbankSplitTreePointer\fP or +\fBPropbankTreePointer\fP pointers. +.UNINDENT +.INDENT 7.0 +.TP +.B select(tree) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.propbank.PropbankCorpusReader(root, propfile, framefiles=\(aq\(aq, verbsfile=None, parse_fileid_xform=None, parse_corpus=None, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Corpus reader for the propbank corpus, which augments the Penn +Treebank with information about the predicate argument structure +of every verb instance. The corpus consists of two parts: the +predicate\-argument annotations themselves, and a set of "frameset +files" which define the argument labels used by the annotations, +on a per\-verb basis. Each "frameset file" contains one or more +predicates, such as \fB\(aqturn\(aq\fP or \fB\(aqturn_on\(aq\fP, each of which is +divided into coarse\-grained word senses called "rolesets". For +each "roleset", the frameset file provides descriptions of the +argument roles, along with examples. +.INDENT 7.0 +.TP +.B instances(baseform=None) +.INDENT 7.0 +.TP +.B Returns +a corpus view that acts as a list of +.UNINDENT +.sp +\fBPropBankInstance\fP objects, one for each noun in the corpus. +.UNINDENT +.INDENT 7.0 +.TP +.B lines() +.INDENT 7.0 +.TP +.B Returns +a corpus view that acts as a list of strings, one for +.UNINDENT +.sp +each line in the predicate\-argument annotation file. +.UNINDENT +.INDENT 7.0 +.TP +.B roleset(roleset_id) +.INDENT 7.0 +.TP +.B Returns +the xml description for the given roleset. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B rolesets(baseform=None) +.INDENT 7.0 +.TP +.B Returns +list of xml descriptions for rolesets. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B verbs() +.INDENT 7.0 +.TP +.B Returns +a corpus view that acts as a list of all verb lemmas +.UNINDENT +.sp +in this corpus (from the verbs.txt file). +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.propbank.PropbankInflection(form=\(aq\-\(aq, tense=\(aq\-\(aq, aspect=\(aq\-\(aq, person=\(aq\-\(aq, voice=\(aq\-\(aq) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B ACTIVE = \(aqa\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B FINITE = \(aqv\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B FUTURE = \(aqf\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B GERUND = \(aqg\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B INFINITIVE = \(aqi\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B NONE = \(aq\-\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B PARTICIPLE = \(aqp\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B PASSIVE = \(aqp\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B PAST = \(aqp\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B PERFECT = \(aqp\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B PERFECT_AND_PROGRESSIVE = \(aqb\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B PRESENT = \(aqn\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B PROGRESSIVE = \(aqo\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B THIRD_PERSON = \(aq3\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B static parse(s) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.propbank.PropbankInstance(fileid, sentnum, wordnum, tagger, roleset, inflection, predicate, arguments, parse_corpus=None) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B arguments +A list of tuples (argloc, argid), specifying the location +and identifier for each of the predicate\(aqs argument in the +containing sentence. Argument identifiers are strings such as +\fB\(aqARG0\(aq\fP or \fB\(aqARGM\-TMP\(aq\fP\&. This list does \fInot\fP contain +the predicate. +.UNINDENT +.INDENT 7.0 +.TP +.B property baseform +The baseform of the predicate. +.UNINDENT +.INDENT 7.0 +.TP +.B fileid +The name of the file containing the parse tree for this +instance\(aqs sentence. +.UNINDENT +.INDENT 7.0 +.TP +.B inflection +A \fBPropbankInflection\fP object describing the inflection of +this instance\(aqs predicate. +.UNINDENT +.INDENT 7.0 +.TP +.B static parse(s, parse_fileid_xform=None, parse_corpus=None) +.UNINDENT +.INDENT 7.0 +.TP +.B parse_corpus +A corpus reader for the parse trees corresponding to the +instances in this propbank corpus. +.UNINDENT +.INDENT 7.0 +.TP +.B predicate +A \fBPropbankTreePointer\fP indicating the position of this +instance\(aqs predicate within its containing sentence. +.UNINDENT +.INDENT 7.0 +.TP +.B property predid +Identifier of the predicate. +.UNINDENT +.INDENT 7.0 +.TP +.B roleset +The name of the roleset used by this instance\(aqs predicate. +Use \fBpropbank.roleset() \fP to +look up information about the roleset. +.UNINDENT +.INDENT 7.0 +.TP +.B property sensenumber +The sense number of the predicate. +.UNINDENT +.INDENT 7.0 +.TP +.B sentnum +The sentence number of this sentence within \fBfileid\fP\&. +Indexing starts from zero. +.UNINDENT +.INDENT 7.0 +.TP +.B tagger +An identifier for the tagger who tagged this instance; or +\fB\(aqgold\(aq\fP if this is an adjuticated instance. +.UNINDENT +.INDENT 7.0 +.TP +.B property tree +The parse tree corresponding to this instance, or None if +the corresponding tree is not available. +.UNINDENT +.INDENT 7.0 +.TP +.B wordnum +The word number of this instance\(aqs predicate within its +containing sentence. Word numbers are indexed starting from +zero, and include traces and other empty parse elements. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.propbank.PropbankPointer +Bases: \fBobject\fP +.sp +A pointer used by propbank to identify one or more constituents in +a parse tree. \fBPropbankPointer\fP is an abstract base class with +three concrete subclasses: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fBPropbankTreePointer\fP is used to point to single constituents. +.IP \(bu 2 +\fBPropbankSplitTreePointer\fP is used to point to \(aqsplit\(aq +constituents, which consist of a sequence of two or more +\fBPropbankTreePointer\fP pointers. +.IP \(bu 2 +\fBPropbankChainTreePointer\fP is used to point to entire trace +chains in a tree. It consists of a sequence of pieces, which +can be \fBPropbankTreePointer\fP or \fBPropbankSplitTreePointer\fP pointers. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.propbank.PropbankSplitTreePointer(pieces) +Bases: \fI\%nltk.corpus.reader.propbank.PropbankPointer\fP +.INDENT 7.0 +.TP +.B pieces +A list of the pieces that make up this chain. Elements are +all \fBPropbankTreePointer\fP pointers. +.UNINDENT +.INDENT 7.0 +.TP +.B select(tree) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.propbank.PropbankTreePointer(wordnum, height) +Bases: \fI\%nltk.corpus.reader.propbank.PropbankPointer\fP +.sp +wordnum:height*wordnum:height*... +wordnum:height, +.INDENT 7.0 +.TP +.B static parse(s) +.UNINDENT +.INDENT 7.0 +.TP +.B select(tree) +.UNINDENT +.INDENT 7.0 +.TP +.B treepos(tree) +Convert this pointer to a standard \(aqtree position\(aq pointer, +given that it points to the given tree. +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.pros_cons module +.sp +CorpusReader for the Pros and Cons dataset. +.INDENT 0.0 +.IP \(bu 2 +Pros and Cons dataset information \- +.UNINDENT +.INDENT 0.0 +.TP +.B Contact: Bing Liu, \fI\%liub@cs.uic.edu\fP +\fI\%http://www.cs.uic.edu/~liub\fP +.UNINDENT +.sp +Distributed with permission. +.sp +Related papers: +.INDENT 0.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Murthy Ganapathibhotla and Bing Liu. "Mining Opinions in Comparative Sentences". +Proceedings of the 22nd International Conference on Computational Linguistics +(Coling\-2008), Manchester, 18\-22 August, 2008. +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Bing Liu, Minqing Hu and Junsheng Cheng. "Opinion Observer: Analyzing and Comparing +Opinions on the Web". Proceedings of the 14th international World Wide Web +conference (WWW\-2005), May 10\-14, 2005, in Chiba, Japan. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.pros_cons.ProsConsCorpusReader(root, fileids, word_tokenizer=WordPunctTokenizer(pattern=\(aq\e\ew+|[^\e\ew\e\es]+\(aq, gaps=False, discard_empty=True, flags=re.UNICODE | re.MULTILINE | re.DOTALL), encoding=\(aqutf8\(aq, **kwargs) +Bases: \fI\%nltk.corpus.reader.api.CategorizedCorpusReader\fP, \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for the Pros and Cons sentence dataset. +.sp +.nf +.ft C +>>> from nltk.corpus import pros_cons +>>> pros_cons.sents(categories=\(aqCons\(aq) +[[\(aqEast\(aq, \(aqbatteries\(aq, \(aq!\(aq, \(aqOn\(aq, \(aq\-\(aq, \(aqoff\(aq, \(aqswitch\(aq, \(aqtoo\(aq, \(aqeasy\(aq, +\(aqto\(aq, \(aqmaneuver\(aq, \(aq.\(aq], [\(aqEats\(aq, \(aq...\(aq, \(aqno\(aq, \(aq,\(aq, \(aqGULPS\(aq, \(aqbatteries\(aq], +\&...] +>>> pros_cons.words(\(aqIntegratedPros.txt\(aq) +[\(aqEasy\(aq, \(aqto\(aq, \(aquse\(aq, \(aq,\(aq, \(aqeconomical\(aq, \(aq!\(aq, ...] +.ft P +.fi +.INDENT 7.0 +.TP +.B CorpusView +alias of \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, categories=None) +Return all sentences in the corpus or in the specified files/categories. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +sentences have to be returned. +.IP \(bu 2 +\fBcategories\fP \-\- a list specifying the categories whose sentences +have to be returned. +.UNINDENT +.TP +.B Returns +the given file(s) as a list of sentences. Each sentence is +tokenized using the specified word_tokenizer. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, categories=None) +Return all words and punctuation symbols in the corpus or in the specified +files/categories. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +words have to be returned. +.IP \(bu 2 +\fBcategories\fP \-\- a list specifying the categories whose words have +to be returned. +.UNINDENT +.TP +.B Returns +the given file(s) as a list of words and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.reviews module +.sp +CorpusReader for reviews corpora (syntax based on Customer Review Corpus). +.INDENT 0.0 +.IP \(bu 2 +Customer Review Corpus information \- +.UNINDENT +.INDENT 0.0 +.TP +.B Annotated by: Minqing Hu and Bing Liu, 2004. +Department of Computer Sicence +University of Illinois at Chicago +.TP +.B Contact: Bing Liu, \fI\%liub@cs.uic.edu\fP +\fI\%http://www.cs.uic.edu/~liub\fP +.UNINDENT +.sp +Distributed with permission. +.sp +The "product_reviews_1" and "product_reviews_2" datasets respectively contain +annotated customer reviews of 5 and 9 products from amazon.com. +.sp +Related papers: +.INDENT 0.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Minqing Hu and Bing Liu. "Mining and summarizing customer reviews". +Proceedings of the ACM SIGKDD International Conference on Knowledge +Discovery & Data Mining (KDD\-04), 2004. +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Minqing Hu and Bing Liu. "Mining Opinion Features in Customer Reviews". +Proceedings of Nineteeth National Conference on Artificial Intelligence +(AAAI\-2004), 2004. +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Xiaowen Ding, Bing Liu and Philip S. Yu. "A Holistic Lexicon\-Based Appraoch to +Opinion Mining." Proceedings of First ACM International Conference on Web +Search and Data Mining (WSDM\-2008), Feb 11\-12, 2008, Stanford University, +Stanford, California, USA. +.UNINDENT +.UNINDENT +.sp +Symbols used in the annotated reviews: +.INDENT 0.0 +.INDENT 3.5 +[t] : the title of the review: Each [t] tag starts a review. +xxxx[+|\-n]: xxxx is a product feature. +[+n]: Positive opinion, n is the opinion strength: 3 strongest, and 1 weakest. +.INDENT 0.0 +.INDENT 3.5 +Note that the strength is quite subjective. +You may want ignore it, but only considering + and \- +.UNINDENT +.UNINDENT +.sp +[\-n]: Negative opinion +## : start of each sentence. Each line is a sentence. +[u] : feature not appeared in the sentence. +[p] : feature not appeared in the sentence. Pronoun resolution is needed. +[s] : suggestion or recommendation. +[cc]: comparison with a competing product from a different brand. +[cs]: comparison with a competing product from the same brand. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B Note: Some of the files (e.g. "ipod.txt", "Canon PowerShot SD500.txt") do not +provide separation between different reviews. This is due to the fact that +the dataset was specifically designed for aspect/feature\-based sentiment +analysis, for which sentence\-level annotation is sufficient. For document\- +level classification and analysis, this peculiarity should be taken into +consideration. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.reviews.Review(title=None, review_lines=None) +Bases: \fBobject\fP +.sp +A Review is the main block of a ReviewsCorpusReader. +.INDENT 7.0 +.TP +.B add_line(review_line) +Add a line (ReviewLine) to the review. +.INDENT 7.0 +.TP +.B Parameters +\fBreview_line\fP \-\- a ReviewLine instance that belongs to the Review. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B features() +Return a list of features in the review. Each feature is a tuple made of +the specific item feature and the opinion strength about that feature. +.INDENT 7.0 +.TP +.B Returns +all features of the review as a list of tuples (feat, score). +.TP +.B Return type +list(tuple) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents() +Return all tokenized sentences in the review. +.INDENT 7.0 +.TP +.B Returns +all sentences of the review as lists of tokens. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.reviews.ReviewLine(sent, features=None, notes=None) +Bases: \fBobject\fP +.sp +A ReviewLine represents a sentence of the review, together with (optional) +annotations of its features and notes about the reviewed item. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.reviews.ReviewsCorpusReader(root, fileids, word_tokenizer=WordPunctTokenizer(pattern=\(aq\e\ew+|[^\e\ew\e\es]+\(aq, gaps=False, discard_empty=True, flags=re.UNICODE | re.MULTILINE | re.DOTALL), encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for the Customer Review Data dataset by Hu, Liu (2004). +Note: we are not applying any sentence tokenization at the moment, just word +tokenization. +.sp +.nf +.ft C +>>> from nltk.corpus import product_reviews_1 +>>> camera_reviews = product_reviews_1.reviews(\(aqCanon_G3.txt\(aq) +>>> review = camera_reviews[0] +>>> review.sents()[0] +[\(aqi\(aq, \(aqrecently\(aq, \(aqpurchased\(aq, \(aqthe\(aq, \(aqcanon\(aq, \(aqpowershot\(aq, \(aqg3\(aq, \(aqand\(aq, \(aqam\(aq, +\(aqextremely\(aq, \(aqsatisfied\(aq, \(aqwith\(aq, \(aqthe\(aq, \(aqpurchase\(aq, \(aq.\(aq] +>>> review.features() +[(\(aqcanon powershot g3\(aq, \(aq+3\(aq), (\(aquse\(aq, \(aq+2\(aq), (\(aqpicture\(aq, \(aq+2\(aq), +(\(aqpicture quality\(aq, \(aq+1\(aq), (\(aqpicture quality\(aq, \(aq+1\(aq), (\(aqcamera\(aq, \(aq+2\(aq), +(\(aquse\(aq, \(aq+2\(aq), (\(aqfeature\(aq, \(aq+1\(aq), (\(aqpicture quality\(aq, \(aq+3\(aq), (\(aquse\(aq, \(aq+1\(aq), +(\(aqoption\(aq, \(aq+1\(aq)] +.ft P +.fi +.sp +We can also reach the same information directly from the stream: +.sp +.nf +.ft C +>>> product_reviews_1.features(\(aqCanon_G3.txt\(aq) +[(\(aqcanon powershot g3\(aq, \(aq+3\(aq), (\(aquse\(aq, \(aq+2\(aq), ...] +.ft P +.fi +.sp +We can compute stats for specific product features: +.sp +.nf +.ft C +>>> n_reviews = len([(feat,score) for (feat,score) in product_reviews_1.features(\(aqCanon_G3.txt\(aq) if feat==\(aqpicture\(aq]) +>>> tot = sum([int(score) for (feat,score) in product_reviews_1.features(\(aqCanon_G3.txt\(aq) if feat==\(aqpicture\(aq]) +>>> mean = tot / n_reviews +>>> print(n_reviews, tot, mean) +15 24 1.6 +.ft P +.fi +.INDENT 7.0 +.TP +.B CorpusView +alias of \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.UNINDENT +.INDENT 7.0 +.TP +.B features(fileids=None) +Return a list of features. Each feature is a tuple made of the specific +item feature and the opinion strength about that feature. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +features have to be returned. +.TP +.B Returns +all features for the item(s) in the given file(s). +.TP +.B Return type +list(tuple) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B reviews(fileids=None) +Return all the reviews as a list of Review objects. If \fIfileids\fP is +specified, return all the reviews from each of the specified files. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +reviews have to be returned. +.TP +.B Returns +the given file(s) as a list of reviews. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +Return all sentences in the corpus or in the specified files. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +sentences have to be returned. +.TP +.B Returns +the given file(s) as a list of sentences, each encoded as a +list of word strings. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +Return all words and punctuation symbols in the corpus or in the specified +files. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +words have to be returned. +.TP +.B Returns +the given file(s) as a list of words and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.rte module +.sp +Corpus reader for the Recognizing Textual Entailment (RTE) Challenge Corpora. +.sp +The files were taken from the RTE1, RTE2 and RTE3 datasets and the files +were regularized. +.sp +Filenames are of the form rte*_dev.xml and rte*_test.xml. The latter are the +gold standard annotated files. +.sp +Each entailment corpus is a list of \(aqtext\(aq/\(aqhypothesis\(aq pairs. The following +example is taken from RTE3: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C + + + The sale was made to pay Yukos\(aq US$ 27.5 billion tax bill, + Yuganskneftegaz was originally sold for US$ 9.4 billion to a little known + company Baikalfinansgroup which was later bought by the Russian + state\-owned oil company Rosneft . + + Baikalfinansgroup was sold to Rosneft. + +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +In order to provide globally unique IDs for each pair, a new attribute +\fBchallenge\fP has been added to the root element \fBentailment\-corpus\fP of each +file, taking values 1, 2 or 3. The GID is formatted \(aqm\-n\(aq, where \(aqm\(aq is the +challenge number and \(aqn\(aq is the pair ID. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.rte.RTECorpusReader(root, fileids, wrap_etree=False) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.sp +Corpus reader for corpora in RTE challenges. +.sp +This is just a wrapper around the XMLCorpusReader. See module docstring above for the expected +structure of input documents. +.INDENT 7.0 +.TP +.B pairs(fileids) +Build a list of RTEPairs from a RTE corpus. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list of RTE corpus fileids +.TP +.B Type +list +.TP +.B Return type +list(RTEPair) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.rte.RTEPair(pair, challenge=None, id=None, text=None, hyp=None, value=None, task=None, length=None) +Bases: \fBobject\fP +.sp +Container for RTE text\-hypothesis pairs. +.sp +The entailment relation is signalled by the \fBvalue\fP attribute in RTE1, and by +\fBentailment\fP in RTE2 and RTE3. These both get mapped on to the \fBentailment\fP +attribute of this class. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.rte.norm(value_string) +Normalize the string value in an RTE pair\(aqs \fBvalue\fP or \fBentailment\fP +attribute as an integer (1, 0). +.INDENT 7.0 +.TP +.B Parameters +\fBvalue_string\fP (\fIstr\fP) \-\- the label used to classify a text/hypothesis pair +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.semcor module +.sp +Corpus reader for the SemCor Corpus. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.semcor.SemcorCorpusReader(root, fileids, wordnet, lazy=True) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.sp +Corpus reader for the SemCor Corpus. +For access to the complete XML data structure, use the \fBxml()\fP +method. For access to simple word lists and tagged word lists, use +\fBwords()\fP, \fBsents()\fP, \fBtagged_words()\fP, and \fBtagged_sents()\fP\&. +.INDENT 7.0 +.TP +.B chunk_sents(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of sentences, each encoded +as a list of chunks. +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B chunks(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of chunks, +each of which is a list of words and punctuation symbols +that form a unit. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of sentences, each encoded +as a list of word strings. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_chunks(fileids=None, tag=\(aqpos\(aq) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of tagged chunks, represented +in tree form. +.TP +.B Return type +list(Tree) +.TP +.B Parameters +\fBtag\fP \-\- \fI\(aqpos\(aq\fP (part of speech), \fI\(aqsem\(aq\fP (semantic), or \fI\(aqboth\(aq\fP +to indicate the kind of tags to include. Semantic tags consist of +WordNet lemma IDs, plus an \fI\(aqNE\(aq\fP node if the chunk is a named entity +without a specific entry in WordNet. (Named entities of type \(aqother\(aq +have no lemma. Other chunks not in WordNet have no semantic tag. +Punctuation tokens have \fINone\fP for their part of speech tag.) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, tag=\(aqpos\(aq) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of sentences. Each sentence +is represented as a list of tagged chunks (in tree form). +.TP +.B Return type +list(list(Tree)) +.TP +.B Parameters +\fBtag\fP \-\- \fI\(aqpos\(aq\fP (part of speech), \fI\(aqsem\(aq\fP (semantic), or \fI\(aqboth\(aq\fP +to indicate the kind of tags to include. Semantic tags consist of +WordNet lemma IDs, plus an \fI\(aqNE\(aq\fP node if the chunk is a named entity +without a specific entry in WordNet. (Named entities of type \(aqother\(aq +have no lemma. Other chunks not in WordNet have no semantic tag. +Punctuation tokens have \fINone\fP for their part of speech tag.) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of words and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.semcor.SemcorSentence(num, items) +Bases: \fBlist\fP +.sp +A list of words, augmented by an attribute \fBnum\fP used to record +the sentence identifier (the \fBn\fP attribute from the XML). +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.semcor.SemcorWordView(fileid, unit, bracket_sent, pos_tag, sem_tag, wordnet) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusView\fP +.sp +A stream backed corpus view specialized for use with the BNC corpus. +.INDENT 7.0 +.TP +.B handle_elt(elt, context) +Convert an element into an appropriate value for inclusion in +the view. Unless overridden by a subclass or by the +\fBelt_handler\fP constructor argument, this method simply +returns \fBelt\fP\&. +.INDENT 7.0 +.TP +.B Returns +The view value corresponding to \fBelt\fP\&. +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBelt\fP (\fIElementTree\fP) \-\- The element that should be converted. +.IP \(bu 2 +\fBcontext\fP (\fIstr\fP) \-\- A string composed of element tags separated by +forward slashes, indicating the XML context of the given +element. For example, the string \fB\(aqfoo/bar/baz\(aq\fP +indicates that the element is a \fBbaz\fP element whose +parent is a \fBbar\fP element and whose grandparent is a +top\-level \fBfoo\fP element. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B handle_sent(elt) +.UNINDENT +.INDENT 7.0 +.TP +.B handle_word(elt) +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.senseval module +.sp +Read from the Senseval 2 Corpus. +.sp +SENSEVAL [\fI\%http://www.senseval.org/\fP] +Evaluation exercises for Word Sense Disambiguation. +Organized by ACL\-SIGLEX [\fI\%http://www.siglex.org/\fP] +.sp +Prepared by Ted Pedersen <\fI\%tpederse@umn.edu\fP>, University of Minnesota, +\fI\%http://www.d.umn.edu/~tpederse/data.html\fP +Distributed with permission. +.sp +The NLTK version of the Senseval 2 files uses well\-formed XML. +Each instance of the ambiguous words "hard", "interest", "line", and "serve" +is tagged with a sense identifier, and supplied with context. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.senseval.SensevalCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B instances(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.senseval.SensevalCorpusView(fileid, encoding) +Bases: \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.INDENT 7.0 +.TP +.B read_block(stream) +Read a block from the input stream. +.INDENT 7.0 +.TP +.B Returns +a block of tokens from the input stream +.TP +.B Return type +list(any) +.TP +.B Parameters +\fBstream\fP (\fIstream\fP) \-\- an input stream +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.senseval.SensevalInstance(word, position, context, senses) +Bases: \fBobject\fP +.UNINDENT +.SS nltk.corpus.reader.sentiwordnet module +.sp +An NLTK interface for SentiWordNet +.sp +SentiWordNet is a lexical resource for opinion mining. +SentiWordNet assigns to each synset of WordNet three +sentiment scores: positivity, negativity, and objectivity. +.sp +For details about SentiWordNet see: +\fI\%http://sentiwordnet.isti.cnr.it/\fP +.sp +.nf +.ft C +>>> from nltk.corpus import sentiwordnet as swn +>>> print(swn.senti_synset(\(aqbreakdown.n.03\(aq)) + +>>> list(swn.senti_synsets(\(aqslow\(aq)) +[SentiSynset(\(aqdecelerate.v.01\(aq), SentiSynset(\(aqslow.v.02\(aq), +SentiSynset(\(aqslow.v.03\(aq), SentiSynset(\(aqslow.a.01\(aq), +SentiSynset(\(aqslow.a.02\(aq), SentiSynset(\(aqdense.s.04\(aq), +SentiSynset(\(aqslow.a.04\(aq), SentiSynset(\(aqboring.s.01\(aq), +SentiSynset(\(aqdull.s.08\(aq), SentiSynset(\(aqslowly.r.01\(aq), +SentiSynset(\(aqbehind.r.03\(aq)] +>>> happy = swn.senti_synsets(\(aqhappy\(aq, \(aqa\(aq) +>>> happy0 = list(happy)[0] +>>> happy0.pos_score() +0.875 +>>> happy0.neg_score() +0.0 +>>> happy0.obj_score() +0.125 +.ft P +.fi +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.sentiwordnet.SentiSynset(pos_score, neg_score, synset) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B neg_score() +.UNINDENT +.INDENT 7.0 +.TP +.B obj_score() +.UNINDENT +.INDENT 7.0 +.TP +.B pos_score() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.sentiwordnet.SentiWordNetCorpusReader(root, fileids, encoding=\(aqutf\-8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B all_senti_synsets() +.UNINDENT +.INDENT 7.0 +.TP +.B senti_synset(*vals) +.UNINDENT +.INDENT 7.0 +.TP +.B senti_synsets(string, pos=None) +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.sinica_treebank module +.sp +Sinica Treebank Corpus Sample +.sp +\fI\%http://rocling.iis.sinica.edu.tw/CKIP/engversion/treebank.htm\fP +.sp +10,000 parsed sentences, drawn from the Academia Sinica Balanced +Corpus of Modern Chinese. Parse tree notation is based on +Information\-based Case Grammar. Tagset documentation is available +at \fI\%http://www.sinica.edu.tw/SinicaCorpus/modern_e_wordtype.html\fP +.sp +Language and Knowledge Processing Group, Institute of Information +Science, Academia Sinica +.sp +The data is distributed with the Natural Language Toolkit under the terms of +the Creative Commons Attribution\-NonCommercial\-ShareAlike License +[\fI\%http://creativecommons.org/licenses/by\-nc\-sa/2.5/\fP]. +.sp +References: +.sp +Feng\-Yi Chen, Pi\-Fang Tsai, Keh\-Jiann Chen, and Chu\-Ren Huang (1999) +The Construction of Sinica Treebank. Computational Linguistics and +Chinese Language Processing, 4, pp 87\-104. +.sp +Huang Chu\-Ren, Keh\-Jiann Chen, Feng\-Yi Chen, Keh\-Jiann Chen, Zhao\-Ming +Gao, and Kuang\-Yu Chen. 2000. Sinica Treebank: Design Criteria, +Annotation Guidelines, and On\-line Interface. Proceedings of 2nd +Chinese Language Processing Workshop, Association for Computational +Linguistics. +.sp +Chen Keh\-Jiann and Yu\-Ming Hsieh (2004) Chinese Treebanks and Grammar +Extraction, Proceedings of IJCNLP\-04, pp560\-565. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.sinica_treebank.SinicaTreebankCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.SyntaxCorpusReader\fP +.sp +Reader for the sinica treebank. +.UNINDENT +.SS nltk.corpus.reader.string_category module +.sp +Read tuples from a corpus consisting of categorized strings. +For example, from the question classification corpus: +.sp +NUM:dist How far is it from Denver to Aspen ? +LOC:city What county is Modesto , California in ? +HUM:desc Who was Galileo ? +DESC:def What is an atom ? +NUM:date When did Hawaii become a state ? +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.string_category.StringCategoryCorpusReader(root, fileids, delimiter=\(aq \(aq, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B tuples(fileids=None) +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.switchboard module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.switchboard.SwitchboardCorpusReader(root, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B discourses() +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_discourses(tagset=False) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_turns(tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B turns() +.UNINDENT +.INDENT 7.0 +.TP +.B words() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.switchboard.SwitchboardTurn(words, speaker, id) +Bases: \fBlist\fP +.sp +A specialized list object used to encode switchboard utterances. +The elements of the list are the words in the utterance; and two +attributes, \fBspeaker\fP and \fBid\fP, are provided to retrieve the +spearker identifier and utterance id. Note that utterance ids +are only unique within a given discourse. +.UNINDENT +.SS nltk.corpus.reader.tagged module +.sp +A reader for corpora whose documents contain part\-of\-speech\-tagged words. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.tagged.CategorizedTaggedCorpusReader(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.api.CategorizedCorpusReader\fP, \fI\%nltk.corpus.reader.tagged.TaggedCorpusReader\fP +.sp +A reader for part\-of\-speech tagged corpora whose documents are +divided into categories based on their file identifiers. +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None, categories=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(list(tuple(str,str)))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, categories=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences, each encoded as a list of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(tuple(str,str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, categories=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of tagged +words and punctuation symbols, encoded as tuples +\fB(word,tag)\fP\&. +.TP +.B Return type +list(tuple(str,str)) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.tagged.MacMorphoCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.tagged.TaggedCorpusReader\fP +.sp +A corpus reader for the MAC_MORPHO corpus. Each line contains a +single tagged word, using \(aq_\(aq as a separator. Sentence boundaries +are based on the end\-sentence tag (\(aq_.\(aq). Paragraph information +is not included in the corpus, so each paragraph returned by +\fBself.paras()\fP and \fBself.tagged_paras()\fP contains a single +sentence. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.tagged.TaggedCorpusReader(root, fileids, sep=\(aq/\(aq, word_tokenizer=WhitespaceTokenizer(pattern=\(aq\e\es+\(aq, gaps=True, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), sent_tokenizer=RegexpTokenizer(pattern=\(aq\en\(aq, gaps=True, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), para_block_reader=, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for simple part\-of\-speech tagged corpora. Paragraphs are +assumed to be split using blank lines. Sentences and words can be +tokenized using the default tokenizers, or by custom tokenizers +specified as parameters to the constructor. Words are parsed +using \fBnltk.tag.str2tuple\fP\&. By default, \fB\(aq/\(aq\fP is used as the +separator. I.e., words should have the form: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +word1/tag1 word2/tag2 word3/tag3 ... +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +But custom separators may be specified as parameters to the +constructor. Part of speech tags are case\-normalized to upper +case. +.INDENT 7.0 +.TP +.B paras(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of word strings. +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences or utterances, each encoded as a list of word +strings. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(list(tuple(str,str)))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences, each encoded as a list of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(tuple(str,str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of tagged +words and punctuation symbols, encoded as tuples +\fB(word,tag)\fP\&. +.TP +.B Return type +list(tuple(str,str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of words +and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.tagged.TaggedCorpusView(corpus_file, encoding, tagged, group_by_sent, group_by_para, sep, word_tokenizer, sent_tokenizer, para_block_reader, tag_mapping_function=None) +Bases: \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.sp +A specialized corpus view for tagged documents. It can be +customized via flags to divide the tagged corpus documents up by +sentence or paragraph, and to include or omit part of speech tags. +\fBTaggedCorpusView\fP objects are typically created by +\fBTaggedCorpusReader\fP (not directly by nltk users). +.INDENT 7.0 +.TP +.B read_block(stream) +Reads one paragraph at a time. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.tagged.TimitTaggedCorpusReader(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.tagged.TaggedCorpusReader\fP +.sp +A corpus reader for tagged sentences that are included in the TIMIT corpus. +.INDENT 7.0 +.TP +.B paras() +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of word strings. +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras() +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(list(tuple(str,str)))) +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.timit module +.sp +Read tokens, phonemes and audio data from the NLTK TIMIT Corpus. +.sp +This corpus contains selected portion of the TIMIT corpus. +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +16 speakers from 8 dialect regions +.IP \(bu 2 +1 male and 1 female from each dialect region +.IP \(bu 2 +total 130 sentences (10 sentences per speaker. Note that some +sentences are shared among other speakers, especially sa1 and sa2 +are spoken by all speakers.) +.IP \(bu 2 +total 160 recording of sentences (10 recordings per speaker) +.IP \(bu 2 +audio format: NIST Sphere, single channel, 16kHz sampling, +.UNINDENT +.INDENT 0.0 +.INDENT 3.5 +16 bit sample, PCM encoding +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.SS Module contents +.sp +The timit corpus reader provides 4 functions and 4 data items. +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +utterances +.sp +List of utterances in the corpus. There are total 160 utterances, +each of which corresponds to a unique utterance of a speaker. +Here\(aqs an example of an utterance identifier in the list: +.INDENT 2.0 +.INDENT 3.5 +.sp +.nf +.ft C +dr1\-fvmh0/sx206 + \- _\-\-\-\- _\-\-\- + | | | | | + | | | | | + | | | | \(ga\-\-\- sentence number + | | | \(ga\-\-\-\-\- sentence type (a:all, i:shared, x:exclusive) + | | \(ga\-\-\-\-\-\-\-\-\- speaker ID + | \(ga\-\-\-\-\-\-\-\-\-\-\-\- sex (m:male, f:female) + \(ga\-\-\-\-\-\-\-\-\-\-\-\-\-\- dialect region (1..8) +.ft P +.fi +.UNINDENT +.UNINDENT +.IP \(bu 2 +speakers +.sp +List of speaker IDs. An example of speaker ID: +.INDENT 2.0 +.INDENT 3.5 +.sp +.nf +.ft C +dr1\-fvmh0 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Note that if you split an item ID with colon and take the first element of +the result, you will get a speaker ID. +.sp +.nf +.ft C +>>> itemid = \(aqdr1\-fvmh0/sx206\(aq +>>> spkrid , sentid = itemid.split(\(aq/\(aq) +>>> spkrid +\(aqdr1\-fvmh0\(aq +.ft P +.fi +.sp +The second element of the result is a sentence ID. +.IP \(bu 2 +dictionary() +.sp +Phonetic dictionary of words contained in this corpus. This is a Python +dictionary from words to phoneme lists. +.IP \(bu 2 +spkrinfo() +.sp +Speaker information table. It\(aqs a Python dictionary from speaker IDs to +records of 10 fields. Speaker IDs the same as the ones in timie.speakers. +Each record is a dictionary from field names to values, and the fields are +as follows: +.INDENT 2.0 +.INDENT 3.5 +.sp +.nf +.ft C +id speaker ID as defined in the original TIMIT speaker info table +sex speaker gender (M:male, F:female) +dr speaker dialect region (1:new england, 2:northern, + 3:north midland, 4:south midland, 5:southern, 6:new york city, + 7:western, 8:army brat (moved around)) +use corpus type (TRN:training, TST:test) + in this sample corpus only TRN is available +recdate recording date +birthdate speaker birth date +ht speaker height +race speaker race (WHT:white, BLK:black, AMR:american indian, + SPN:spanish\-american, ORN:oriental,???:unknown) +edu speaker education level (HS:high school, AS:associate degree, + BS:bachelor\(aqs degree (BS or BA), MS:master\(aqs degree (MS or MA), + PHD:doctorate degree (PhD,JD,MD), ??:unknown) +comments comments by the recorder +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.sp +The 4 functions are as follows. +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +tokenized(sentences=items, offset=False) +.sp +Given a list of items, returns an iterator of a list of word lists, +each of which corresponds to an item (sentence). If offset is set to True, +each element of the word list is a tuple of word(string), start offset and +end offset, where offset is represented as a number of 16kHz samples. +.IP \(bu 2 +phonetic(sentences=items, offset=False) +.sp +Given a list of items, returns an iterator of a list of phoneme lists, +each of which corresponds to an item (sentence). If offset is set to True, +each element of the phoneme list is a tuple of word(string), start offset +and end offset, where offset is represented as a number of 16kHz samples. +.IP \(bu 2 +audiodata(item, start=0, end=None) +.sp +Given an item, returns a chunk of audio samples formatted into a string. +When the function is called, if start and end are omitted, the entire +samples of the recording will be returned. If only end is omitted, +samples from the start offset to the end of the recording will be returned. +.IP \(bu 2 +play(data) +.sp +Play the given audio samples. The audio samples can be obtained from the +timit.audiodata function. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.timit.SpeakerInfo(id, sex, dr, use, recdate, birthdate, ht, race, edu, comments=None) +Bases: \fBobject\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.timit.TimitCorpusReader(root, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for the TIMIT corpus (or any other corpus with the same +file layout and use of file formats). The corpus root directory +should contain the following files: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +timitdic.txt: dictionary of standard transcriptions +.IP \(bu 2 +spkrinfo.txt: table of speaker information +.UNINDENT +.UNINDENT +.UNINDENT +.sp +In addition, the root directory should contain one subdirectory +for each speaker, containing three files for each utterance: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +.txt: text content of utterances +.IP \(bu 2 +.wrd: tokenized text content of utterances +.IP \(bu 2 +.phn: phonetic transcription of utterances +.IP \(bu 2 +.wav: utterance sound file +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B audiodata(utterance, start=0, end=None) +.UNINDENT +.INDENT 7.0 +.TP +.B fileids(filetype=None) +Return a list of file identifiers for the files that make up +this corpus. +.INDENT 7.0 +.TP +.B Parameters +\fBfiletype\fP \-\- If specified, then \fBfiletype\fP indicates that +only the files that have the given type should be +returned. Accepted values are: \fBtxt\fP, \fBwrd\fP, \fBphn\fP, +\fBwav\fP, or \fBmetadata\fP, +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B phone_times(utterances=None) +offset is represented as a number of 16kHz samples! +.UNINDENT +.INDENT 7.0 +.TP +.B phone_trees(utterances=None) +.UNINDENT +.INDENT 7.0 +.TP +.B phones(utterances=None) +.UNINDENT +.INDENT 7.0 +.TP +.B play(utterance, start=0, end=None) +Play the given audio sample. +.INDENT 7.0 +.TP +.B Parameters +\fButterance\fP \-\- The utterance id of the sample to play +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sent_times(utterances=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sentid(utterance) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(utterances=None) +.UNINDENT +.INDENT 7.0 +.TP +.B spkrid(utterance) +.UNINDENT +.INDENT 7.0 +.TP +.B spkrinfo(speaker) +.INDENT 7.0 +.TP +.B Returns +A dictionary mapping .. something. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B spkrutteranceids(speaker) +.INDENT 7.0 +.TP +.B Returns +A list of all utterances associated with a given +.UNINDENT +.sp +speaker. +.UNINDENT +.INDENT 7.0 +.TP +.B transcription_dict() +.INDENT 7.0 +.TP +.B Returns +A dictionary giving the \(aqstandard\(aq transcription for +.UNINDENT +.sp +each word. +.UNINDENT +.INDENT 7.0 +.TP +.B utterance(spkrid, sentid) +.UNINDENT +.INDENT 7.0 +.TP +.B utteranceids(dialect=None, sex=None, spkrid=None, sent_type=None, sentid=None) +.INDENT 7.0 +.TP +.B Returns +A list of the utterance identifiers for all +.UNINDENT +.sp +utterances in this corpus, or for the given speaker, dialect +region, gender, sentence type, or sentence number, if +specified. +.UNINDENT +.INDENT 7.0 +.TP +.B wav(utterance, start=0, end=None) +.UNINDENT +.INDENT 7.0 +.TP +.B word_times(utterances=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(utterances=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.timit.read_timit_block(stream) +Block reader for timit tagged sentences, which are preceded by a sentence +number that will be ignored. +.UNINDENT +.SS nltk.corpus.reader.toolbox module +.sp +Module for reading, writing and manipulating +Toolbox databases and settings fileids. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.toolbox.ToolboxCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B entries(fileids, **kwargs) +.UNINDENT +.INDENT 7.0 +.TP +.B fields(fileids, strip=True, unwrap=True, encoding=\(aqutf8\(aq, errors=\(aqstrict\(aq, unicode_fields=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids, key=\(aqlx\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B xml(fileids, key=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.toolbox.demo() +.UNINDENT +.SS nltk.corpus.reader.twitter module +.sp +A reader for corpora that consist of Tweets. It is assumed that the Tweets +have been serialised into line\-delimited JSON. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.twitter.TwitterCorpusReader(root, fileids=None, word_tokenizer=, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for corpora that consist of Tweets represented as a list of line\-delimited JSON. +.sp +Individual Tweets can be tokenized using the default tokenizer, or by a +custom tokenizer specified as a parameter to the constructor. +.sp +Construct a new Tweet corpus reader for a set of documents +located at the given root directory. +.sp +If you made your own tweet collection in a directory called +\fItwitter\-files\fP, then you can initialise the reader as: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +from nltk.corpus import TwitterCorpusReader +reader = TwitterCorpusReader(root=\(aq/path/to/twitter\-files\(aq, \(aq.*\e.json\(aq) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +However, the recommended approach is to set the relevant directory as the +value of the environmental variable \fITWITTER\fP, and then invoke the reader +as follows: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +root = os.environ[\(aqTWITTER\(aq] +reader = TwitterCorpusReader(root, \(aq.*\e.json\(aq) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +If you want to work directly with the raw Tweets, the \fIjson\fP library can +be used: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +import json +for tweet in reader.docs(): + print(json.dumps(tweet, indent=1, sort_keys=True)) +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B CorpusView +alias of \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.UNINDENT +.INDENT 7.0 +.TP +.B docs(fileids=None) +Returns the full Tweet objects, as specified by \fI\%Twitter +documentation on Tweets\fP +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of dictionaries deserialised +.UNINDENT +.sp +from JSON. +:rtype: list(dict) +.UNINDENT +.INDENT 7.0 +.TP +.B strings(fileids=None) +Returns only the text content of Tweets in the file(s) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of Tweets. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tokenized(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of the text content of Tweets as +.UNINDENT +.sp +as a list of words, screenanames, hashtags, URLs and punctuation symbols. +.INDENT 7.0 +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.udhr module +.sp +UDHR corpus reader. It mostly deals with encodings. +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.udhr.UdhrCorpusReader(root=\(aqudhr\(aq) +Bases: \fI\%nltk.corpus.reader.plaintext.PlaintextCorpusReader\fP +.INDENT 7.0 +.TP +.B ENCODINGS = [(\(aq.*\-Latin1$\(aq, \(aqlatin\-1\(aq), (\(aq.*\-Hebrew$\(aq, \(aqhebrew\(aq), (\(aq.*\-Arabic$\(aq, \(aqcp1256\(aq), (\(aqCzech_Cesky\-UTF8\(aq, \(aqcp1250\(aq), (\(aq.*\-Cyrillic$\(aq, \(aqcyrillic\(aq), (\(aq.*\-SJIS$\(aq, \(aqSJIS\(aq), (\(aq.*\-GB2312$\(aq, \(aqGB2312\(aq), (\(aq.*\-Latin2$\(aq, \(aqISO\-8859\-2\(aq), (\(aq.*\-Greek$\(aq, \(aqgreek\(aq), (\(aq.*\-UTF8$\(aq, \(aqutf\-8\(aq), (\(aqHungarian_Magyar\-Unicode\(aq, \(aqutf\-16\-le\(aq), (\(aqAmahuaca\(aq, \(aqlatin1\(aq), (\(aqTurkish_Turkce\-Turkish\(aq, \(aqlatin5\(aq), (\(aqLithuanian_Lietuviskai\-Baltic\(aq, \(aqlatin4\(aq), (\(aqJapanese_Nihongo\-EUC\(aq, \(aqEUC\-JP\(aq), (\(aqJapanese_Nihongo\-JIS\(aq, \(aqiso2022_jp\(aq), (\(aqChinese_Mandarin\-HZ\(aq, \(aqhz\(aq), (\(aqAbkhaz\e\e\-Cyrillic\e\e+Abkh\(aq, \(aqcp1251\(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B SKIP = {\(aqAmharic\-Afenegus6..60375\(aq, \(aqArmenian\-DallakHelv\(aq, \(aqAzeri_Azerbaijani_Cyrillic\-Az.Times.Cyr.Normal0117\(aq, \(aqAzeri_Azerbaijani_Latin\-Az.Times.Lat0117\(aq, \(aqBhojpuri\-Agra\(aq, \(aqBurmese_Myanmar\-UTF8\(aq, \(aqBurmese_Myanmar\-WinResearcher\(aq, \(aqChinese_Mandarin\-HZ\(aq, \(aqChinese_Mandarin\-UTF8\(aq, \(aqCzech\-Latin2\-err\(aq, \(aqEsperanto\-T61\(aq, \(aqGujarati\-UTF8\(aq, \(aqHungarian_Magyar\-Unicode\(aq, \(aqJapanese_Nihongo\-JIS\(aq, \(aqLao\-UTF8\(aq, \(aqMagahi\-Agra\(aq, \(aqMagahi\-UTF8\(aq, \(aqMarathi\-UTF8\(aq, \(aqNavaho_Dine\-Navajo\-Navaho\-font\(aq, \(aqRussian_Russky\-UTF8~\(aq, \(aqTamil\-UTF8\(aq, \(aqTigrinya_Tigrigna\-VG2Main\(aq, \(aqVietnamese\-TCVN\(aq, \(aqVietnamese\-VIQR\(aq, \(aqVietnamese\-VPS\(aq} +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.util module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.util.ConcatenatedCorpusView(corpus_views) +Bases: \fBnltk.collections.AbstractLazySequence\fP +.sp +A \(aqview\(aq of a corpus file that joins together one or more +\fBStreamBackedCorpusViews\fP\&. At most +one file handle is left open at any time. +.INDENT 7.0 +.TP +.B close() +.UNINDENT +.INDENT 7.0 +.TP +.B iterate_from(start_tok) +Return an iterator that generates the tokens in the corpus +file underlying this corpus view, starting at the token number +\fBstart\fP\&. If \fBstart>=len(self)\fP, then this iterator will +generate no tokens. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.util.PickleCorpusView(fileid, delete_on_gc=False) +Bases: \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.sp +A stream backed corpus view for corpus files that consist of +sequences of serialized Python objects (serialized using +\fBpickle.dump\fP). One use case for this class is to store the +result of running feature detection on a corpus to disk. This can +be useful when performing feature detection is expensive (so we +don\(aqt want to repeat it); but the corpus is too large to store in +memory. The following example illustrates this technique: +.sp +.nf +.ft C +>>> from nltk.corpus.reader.util import PickleCorpusView +>>> from nltk.util import LazyMap +>>> feature_corpus = LazyMap(detect_features, corpus) +>>> PickleCorpusView.write(feature_corpus, some_fileid) +>>> pcv = PickleCorpusView(some_fileid) +.ft P +.fi +.INDENT 7.0 +.TP +.B BLOCK_SIZE = 100 +.UNINDENT +.INDENT 7.0 +.TP +.B PROTOCOL = \-1 +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod cache_to_tempfile(sequence, delete_on_gc=True) +Write the given sequence to a temporary file as a pickle +corpus; and then return a \fBPickleCorpusView\fP view for that +temporary corpus file. +.INDENT 7.0 +.TP +.B Parameters +\fBdelete_on_gc\fP \-\- If true, then the temporary file will be +deleted whenever this object gets garbage\-collected. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B read_block(stream) +Read a block from the input stream. +.INDENT 7.0 +.TP +.B Returns +a block of tokens from the input stream +.TP +.B Return type +list(any) +.TP +.B Parameters +\fBstream\fP (\fIstream\fP) \-\- an input stream +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod write(sequence, output_file) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.util.StreamBackedCorpusView(fileid, block_reader=None, startpos=0, encoding=\(aqutf8\(aq) +Bases: \fBnltk.collections.AbstractLazySequence\fP +.sp +A \(aqview\(aq of a corpus file, which acts like a sequence of tokens: +it can be accessed by index, iterated over, etc. However, the +tokens are only constructed as\-needed \-\- the entire corpus is +never stored in memory at once. +.sp +The constructor to \fBStreamBackedCorpusView\fP takes two arguments: +a corpus fileid (specified as a string or as a \fBPathPointer\fP); +and a block reader. A "block reader" is a function that reads +zero or more tokens from a stream, and returns them as a list. A +very simple example of a block reader is: +.sp +.nf +.ft C +>>> def simple_block_reader(stream): +\&... return stream.readline().split() +.ft P +.fi +.sp +This simple block reader reads a single line at a time, and +returns a single token (consisting of a string) for each +whitespace\-separated substring on the line. +.sp +When deciding how to define the block reader for a given +corpus, careful consideration should be given to the size of +blocks handled by the block reader. Smaller block sizes will +increase the memory requirements of the corpus view\(aqs internal +data structures (by 2 integers per block). On the other hand, +larger block sizes may decrease performance for random access to +the corpus. (But note that larger block sizes will \fInot\fP +decrease performance for iteration.) +.sp +Internally, \fBCorpusView\fP maintains a partial mapping from token +index to file position, with one entry per block. When a token +with a given index \fIi\fP is requested, the \fBCorpusView\fP constructs +it as follows: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP 1. 3 +First, it searches the toknum/filepos mapping for the token +index closest to (but less than or equal to) \fIi\fP\&. +.IP 2. 3 +Then, starting at the file position corresponding to that +index, it reads one block at a time using the block reader +until it reaches the requested token. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +The toknum/filepos mapping is created lazily: it is initially +empty, but every time a new block is read, the block\(aqs +initial token is added to the mapping. (Thus, the toknum/filepos +map has one entry per block.) +.sp +In order to increase efficiency for random access patterns that +have high degrees of locality, the corpus view may cache one or +more blocks. +.INDENT 7.0 +.TP +.B Note +Each \fBCorpusView\fP object internally maintains an open file +object for its underlying corpus file. This file should be +automatically closed when the \fBCorpusView\fP is garbage collected, +but if you wish to close it manually, use the \fBclose()\fP +method. If you access a \fBCorpusView\fP\(aqs items after it has been +closed, the file object will be automatically re\-opened. +.TP +.B Warning +If the contents of the file are modified during the +lifetime of the \fBCorpusView\fP, then the \fBCorpusView\fP\(aqs behavior +is undefined. +.TP +.B Warning +If a unicode encoding is specified when constructing a +\fBCorpusView\fP, then the block reader may only call +\fBstream.seek()\fP with offsets that have been returned by +\fBstream.tell()\fP; in particular, calling \fBstream.seek()\fP with +relative offsets, or with offsets based on string lengths, may +lead to incorrect behavior. +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB_block_reader\fP \-\- The function used to read +a single block from the underlying file stream. +.IP \(bu 2 +\fB_toknum\fP \-\- A list containing the token index of each block +that has been processed. In particular, \fB_toknum[i]\fP is the +token index of the first token in block \fBi\fP\&. Together +with \fB_filepos\fP, this forms a partial mapping between token +indices and file positions. +.IP \(bu 2 +\fB_filepos\fP \-\- A list containing the file position of each block +that has been processed. In particular, \fB_toknum[i]\fP is the +file position of the first character in block \fBi\fP\&. Together +with \fB_toknum\fP, this forms a partial mapping between token +indices and file positions. +.IP \(bu 2 +\fB_stream\fP \-\- The stream used to access the underlying corpus file. +.IP \(bu 2 +\fB_len\fP \-\- The total number of tokens in the corpus, if known; +or None, if the number of tokens is not yet known. +.IP \(bu 2 +\fB_eofpos\fP \-\- The character position of the last character in the +file. This is calculated when the corpus view is initialized, +and is used to decide when the end of file has been reached. +.IP \(bu 2 +\fB_cache\fP \-\- A cache of the most recently read block. It +is encoded as a tuple (start_toknum, end_toknum, tokens), where +start_toknum is the token index of the first token in the block; +end_toknum is the token index of the first token not in the +block; and tokens is a list of the tokens in the block. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B close() +Close the file stream associated with this corpus view. This +can be useful if you are worried about running out of file +handles (although the stream should automatically be closed +upon garbage collection of the corpus view). If the corpus +view is accessed after it is closed, it will be automatically +re\-opened. +.UNINDENT +.INDENT 7.0 +.TP +.B property fileid +The fileid of the file that is accessed by this view. +.INDENT 7.0 +.TP +.B Type +str or PathPointer +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B iterate_from(start_tok) +Return an iterator that generates the tokens in the corpus +file underlying this corpus view, starting at the token number +\fBstart\fP\&. If \fBstart>=len(self)\fP, then this iterator will +generate no tokens. +.UNINDENT +.INDENT 7.0 +.TP +.B read_block(stream) +Read a block from the input stream. +.INDENT 7.0 +.TP +.B Returns +a block of tokens from the input stream +.TP +.B Return type +list(any) +.TP +.B Parameters +\fBstream\fP (\fIstream\fP) \-\- an input stream +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.util.concat(docs) +Concatenate together the contents of multiple documents from a +single corpus, using an appropriate concatenation function. This +utility function is used by corpus readers when the user requests +more than one document at a time. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.util.find_corpus_fileids(root, regexp) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.util.read_alignedsent_block(stream) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.util.read_blankline_block(stream) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.util.read_line_block(stream) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.util.read_regexp_block(stream, start_re, end_re=None) +Read a sequence of tokens from a stream, where tokens begin with +lines that match \fBstart_re\fP\&. If \fBend_re\fP is specified, then +tokens end with lines that match \fBend_re\fP; otherwise, tokens end +whenever the next line matching \fBstart_re\fP or EOF is found. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.util.read_sexpr_block(stream, block_size=16384, comment_char=None) +Read a sequence of s\-expressions from the stream, and leave the +stream\(aqs file position at the end the last complete s\-expression +read. This function will always return at least one s\-expression, +unless there are no more s\-expressions in the file. +.sp +If the file ends in in the middle of an s\-expression, then that +incomplete s\-expression is returned when the end of the file is +reached. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBblock_size\fP \-\- The default block size for reading. If an +s\-expression is longer than one block, then more than one +block will be read. +.IP \(bu 2 +\fBcomment_char\fP \-\- A character that marks comments. Any lines +that begin with this character will be stripped out. +(If spaces or tabs precede the comment character, then the +line will not be stripped.) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.util.read_whitespace_block(stream) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.util.read_wordpunct_block(stream) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.util.tagged_treebank_para_block_reader(stream) +.UNINDENT +.SS nltk.corpus.reader.verbnet module +.sp +An NLTK interface to the VerbNet verb lexicon +.sp +For details about VerbNet see: +\fI\%https://verbs.colorado.edu/~mpalmer/projects/verbnet.html\fP +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.verbnet.VerbnetCorpusReader(root, fileids, wrap_etree=False) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.sp +An NLTK interface to the VerbNet verb lexicon. +.sp +From the VerbNet site: "VerbNet (VN) (Kipper\-Schuler 2006) is the largest +on\-line verb lexicon currently available for English. It is a hierarchical +domain\-independent, broad\-coverage verb lexicon with mappings to other +lexical resources such as WordNet (Miller, 1990; Fellbaum, 1998), XTAG +(XTAG Research Group, 2001), and FrameNet (Baker et al., 1998)." +.sp +For details about VerbNet see: +\fI\%https://verbs.colorado.edu/~mpalmer/projects/verbnet.html\fP +.INDENT 7.0 +.TP +.B classids(lemma=None, wordnetid=None, fileid=None, classid=None) +Return a list of the VerbNet class identifiers. If a file +identifier is specified, then return only the VerbNet class +identifiers for classes (and subclasses) defined by that file. +If a lemma is specified, then return only VerbNet class +identifiers for classes that contain that lemma as a member. +If a wordnetid is specified, then return only identifiers for +classes that contain that wordnetid as a member. If a classid +is specified, then return only identifiers for subclasses of +the specified VerbNet class. +If nothing is specified, return all classids within VerbNet +.UNINDENT +.INDENT 7.0 +.TP +.B fileids(vnclass_ids=None) +Return a list of fileids that make up this corpus. If +\fBvnclass_ids\fP is specified, then return the fileids that make +up the specified VerbNet class(es). +.UNINDENT +.INDENT 7.0 +.TP +.B frames(vnclass) +Given a VerbNet class, this method returns VerbNet frames +.sp +The members returned are: +1) Example +2) Description +3) Syntax +4) Semantics +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +containing the xml contents of a VerbNet class. +.TP +.B Returns +frames \- a list of frame dictionaries +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lemmas(vnclass=None) +Return a list of all verb lemmas that appear in any class, or +in the \fBclassid\fP if specified. +.UNINDENT +.INDENT 7.0 +.TP +.B longid(shortid) +Returns longid of a VerbNet class +.sp +Given a short VerbNet class identifier (eg \(aq37.10\(aq), map it +to a long id (eg \(aqconfess\-37.10\(aq). If \fBshortid\fP is already a +long id, then return it as\-is +.UNINDENT +.INDENT 7.0 +.TP +.B pprint(vnclass) +Returns pretty printed version of a VerbNet class +.sp +Return a string containing a pretty\-printed representation of +the given VerbNet class. +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +.UNINDENT +.sp +containing the xml contents of a VerbNet class. +.UNINDENT +.INDENT 7.0 +.TP +.B pprint_frames(vnclass, indent=\(aq\(aq) +Returns pretty version of all frames in a VerbNet class +.sp +Return a string containing a pretty\-printed representation of +the list of frames within the VerbNet class. +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +containing the xml contents of a VerbNet class. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B pprint_members(vnclass, indent=\(aq\(aq) +Returns pretty printed version of members in a VerbNet class +.sp +Return a string containing a pretty\-printed representation of +the given VerbNet class\(aqs member verbs. +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +containing the xml contents of a VerbNet class. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B pprint_subclasses(vnclass, indent=\(aq\(aq) +Returns pretty printed version of subclasses of VerbNet class +.sp +Return a string containing a pretty\-printed representation of +the given VerbNet class\(aqs subclasses. +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +containing the xml contents of a VerbNet class. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B pprint_themroles(vnclass, indent=\(aq\(aq) +Returns pretty printed version of thematic roles in a VerbNet class +.sp +Return a string containing a pretty\-printed representation of +the given VerbNet class\(aqs thematic roles. +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +containing the xml contents of a VerbNet class. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B shortid(longid) +Returns shortid of a VerbNet class +.sp +Given a long VerbNet class identifier (eg \(aqconfess\-37.10\(aq), +map it to a short id (eg \(aq37.10\(aq). If \fBlongid\fP is already a +short id, then return it as\-is. +.UNINDENT +.INDENT 7.0 +.TP +.B subclasses(vnclass) +Returns subclass ids, if any exist +.sp +Given a VerbNet class, this method returns subclass ids (if they exist) +in a list of strings. +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +containing the xml contents of a VerbNet class. +.TP +.B Returns +list of subclasses +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B themroles(vnclass) +Returns thematic roles participating in a VerbNet class +.sp +Members returned as part of roles are\- +1) Type +2) Modifiers +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +containing the xml contents of a VerbNet class. +.TP +.B Returns +themroles: A list of thematic roles in the VerbNet class +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B vnclass(fileid_or_classid) +Returns VerbNet class ElementTree +.sp +Return an ElementTree containing the xml for the specified +VerbNet class. +.INDENT 7.0 +.TP +.B Parameters +\fBfileid_or_classid\fP \-\- An identifier specifying which class +should be returned. Can be a file identifier (such as +\fB\(aqput\-9.1.xml\(aq\fP), or a VerbNet class identifier (such as +\fB\(aqput\-9.1\(aq\fP) or a short VerbNet class identifier (such as +\fB\(aq9.1\(aq\fP). +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B wordnetids(vnclass=None) +Return a list of all wordnet identifiers that appear in any +class, or in \fBclassid\fP if specified. +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.wordlist module +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.wordlist.MWAPPDBCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.wordlist.WordListCorpusReader\fP +.sp +This class is used to read the list of word pairs from the subset of lexical +pairs of The Paraphrase Database (PPDB) XXXL used in the Monolingual Word +Alignment (MWA) algorithm described in Sultan et al. (2014a, 2014b, 2015): +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fI\%http://acl2014.org/acl2014/Q14/pdf/Q14\-1017\fP +.IP \(bu 2 +\fI\%http://www.aclweb.org/anthology/S14\-2039\fP +.IP \(bu 2 +\fI\%http://www.aclweb.org/anthology/S15\-2027\fP +.UNINDENT +.UNINDENT +.UNINDENT +.sp +The original source of the full PPDB corpus can be found on +\fI\%http://www.cis.upenn.edu/~ccb/ppdb/\fP +.INDENT 7.0 +.TP +.B Returns +a list of tuples of similar lexical terms. +.UNINDENT +.INDENT 7.0 +.TP +.B entries(fileids=\(aqppdb\-1.0\-xxxl\-lexical.extended.synonyms.uniquepairs\(aq) +.INDENT 7.0 +.TP +.B Returns +a tuple of synonym word pairs. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B mwa_ppdb_xxxl_file = \(aqppdb\-1.0\-xxxl\-lexical.extended.synonyms.uniquepairs\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.wordlist.NonbreakingPrefixesCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.wordlist.WordListCorpusReader\fP +.sp +This is a class to read the nonbreaking prefixes textfiles from the +Moses Machine Translation toolkit. These lists are used in the Python port +of the Moses\(aq word tokenizer. +.INDENT 7.0 +.TP +.B available_langs = {\(aqca\(aq: \(aqca\(aq, \(aqcatalan\(aq: \(aqca\(aq, \(aqcs\(aq: \(aqcs\(aq, \(aqczech\(aq: \(aqcs\(aq, \(aqde\(aq: \(aqde\(aq, \(aqdutch\(aq: \(aqnl\(aq, \(aqel\(aq: \(aqel\(aq, \(aqen\(aq: \(aqen\(aq, \(aqenglish\(aq: \(aqen\(aq, \(aqes\(aq: \(aqes\(aq, \(aqfi\(aq: \(aqfi\(aq, \(aqfinnish\(aq: \(aqfi\(aq, \(aqfr\(aq: \(aqfr\(aq, \(aqfrench\(aq: \(aqfr\(aq, \(aqgerman\(aq: \(aqde\(aq, \(aqgreek\(aq: \(aqel\(aq, \(aqhu\(aq: \(aqhu\(aq, \(aqhungarian\(aq: \(aqhu\(aq, \(aqicelandic\(aq: \(aqis\(aq, \(aqis\(aq: \(aqis\(aq, \(aqit\(aq: \(aqit\(aq, \(aqitalian\(aq: \(aqit\(aq, \(aqlatvian\(aq: \(aqlv\(aq, \(aqlv\(aq: \(aqlv\(aq, \(aqnl\(aq: \(aqnl\(aq, \(aqpl\(aq: \(aqpl\(aq, \(aqpolish\(aq: \(aqpl\(aq, \(aqportuguese\(aq: \(aqpt\(aq, \(aqpt\(aq: \(aqpt\(aq, \(aqro\(aq: \(aqro\(aq, \(aqromanian\(aq: \(aqro\(aq, \(aqru\(aq: \(aqru\(aq, \(aqrussian\(aq: \(aqru\(aq, \(aqsk\(aq: \(aqsk\(aq, \(aqsl\(aq: \(aqsl\(aq, \(aqslovak\(aq: \(aqsk\(aq, \(aqslovenian\(aq: \(aqsl\(aq, \(aqspanish\(aq: \(aqes\(aq, \(aqsv\(aq: \(aqsv\(aq, \(aqswedish\(aq: \(aqsv\(aq, \(aqta\(aq: \(aqta\(aq, \(aqtamil\(aq: \(aqta\(aq} +.UNINDENT +.INDENT 7.0 +.TP +.B words(lang=None, fileids=None, ignore_lines_startswith=\(aq#\(aq) +This module returns a list of nonbreaking prefixes for the specified +language(s). +.sp +.nf +.ft C +>>> from nltk.corpus import nonbreaking_prefixes as nbp +>>> nbp.words(\(aqen\(aq)[:10] == [u\(aqA\(aq, u\(aqB\(aq, u\(aqC\(aq, u\(aqD\(aq, u\(aqE\(aq, u\(aqF\(aq, u\(aqG\(aq, u\(aqH\(aq, u\(aqI\(aq, u\(aqJ\(aq] +True +>>> nbp.words(\(aqta\(aq)[:5] == [u\(aqஅ\(aq, u\(aqஆ\(aq, u\(aqஇ\(aq, u\(aqஈ\(aq, u\(aqஉ\(aq] +True +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +a list words for the specified language(s). +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.wordlist.SwadeshCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.wordlist.WordListCorpusReader\fP +.INDENT 7.0 +.TP +.B entries(fileids=None) +.INDENT 7.0 +.TP +.B Returns +a tuple of words for the specified fileids. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.wordlist.UnicharsCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.wordlist.WordListCorpusReader\fP +.sp +This class is used to read lists of characters from the Perl Unicode +Properties (see \fI\%http://perldoc.perl.org/perluniprops.html\fP). +The files in the perluniprop.zip are extracted using the Unicode::Tussle +module from http://search.cpan.org/~bdfoy/Unicode\-Tussle\-1.11/lib/Unicode/Tussle.pm +.INDENT 7.0 +.TP +.B available_categories = [\(aqClose_Punctuation\(aq, \(aqCurrency_Symbol\(aq, \(aqIsAlnum\(aq, \(aqIsAlpha\(aq, \(aqIsLower\(aq, \(aqIsN\(aq, \(aqIsSc\(aq, \(aqIsSo\(aq, \(aqIsUpper\(aq, \(aqLine_Separator\(aq, \(aqNumber\(aq, \(aqOpen_Punctuation\(aq, \(aqPunctuation\(aq, \(aqSeparator\(aq, \(aqSymbol\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B chars(category=None, fileids=None) +This module returns a list of characters from the Perl Unicode Properties. +They are very useful when porting Perl tokenizers to Python. +.sp +.nf +.ft C +>>> from nltk.corpus import perluniprops as pup +>>> pup.chars(\(aqOpen_Punctuation\(aq)[:5] == [u\(aq(\(aq, u\(aq[\(aq, u\(aq{\(aq, u\(aq༺\(aq, u\(aq༼\(aq] +True +>>> pup.chars(\(aqCurrency_Symbol\(aq)[:5] == [u\(aq$\(aq, u\(aq¢\(aq, u\(aq£\(aq, u\(aq¤\(aq, u\(aq¥\(aq] +True +>>> pup.available_categories +[\(aqClose_Punctuation\(aq, \(aqCurrency_Symbol\(aq, \(aqIsAlnum\(aq, \(aqIsAlpha\(aq, \(aqIsLower\(aq, \(aqIsN\(aq, \(aqIsSc\(aq, \(aqIsSo\(aq, \(aqIsUpper\(aq, \(aqLine_Separator\(aq, \(aqNumber\(aq, \(aqOpen_Punctuation\(aq, \(aqPunctuation\(aq, \(aqSeparator\(aq, \(aqSymbol\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +a list of characters given the specific unicode character category +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.wordlist.WordListCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +List of words, one per line. Blank lines are ignored. +.INDENT 7.0 +.TP +.B words(fileids=None, ignore_lines_startswith=\(aq\en\(aq) +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.wordnet module +.sp +An NLTK interface for WordNet +.sp +WordNet is a lexical database of English. +Using synsets, helps find conceptual relationships between words +such as hypernyms, hyponyms, synonyms, antonyms etc. +.sp +For details about WordNet see: +\fI\%http://wordnet.princeton.edu/\fP +.sp +This module also allows you to find lemmas in languages +other than English from the Open Multilingual Wordnet +\fI\%http://compling.hss.ntu.edu.sg/omw/\fP +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.wordnet.Lemma(wordnet_corpus_reader, synset, name, lexname_index, lex_id, syntactic_marker) +Bases: \fBnltk.corpus.reader.wordnet._WordNetObject\fP +.sp +The lexical entry for a single morphological form of a +sense\-disambiguated word. +.sp +Create a Lemma from a "..." string where: + is the morphological stem identifying the synset + is one of the module attributes ADJ, ADJ_SAT, ADV, NOUN or VERB + is the sense number, counting from 0. + is the morphological form of interest +.sp +Note that and can be different, e.g. the Synset +\(aqsalt.n.03\(aq has the Lemmas \(aqsalt.n.03.salt\(aq, \(aqsalt.n.03.saltiness\(aq and +\(aqsalt.n.03.salinity\(aq. +.sp +Lemma attributes, accessible via methods with the same name: +.INDENT 7.0 +.IP \(bu 2 +name: The canonical name of this lemma. +.IP \(bu 2 +synset: The synset that this lemma belongs to. +.IP \(bu 2 +syntactic_marker: For adjectives, the WordNet string identifying the +syntactic position relative modified noun. See: +\fI\%https://wordnet.princeton.edu/documentation/wninput5wn\fP +For all other parts of speech, this attribute is None. +.IP \(bu 2 +count: The frequency of this lemma in wordnet. +.UNINDENT +.sp +Lemma methods: +.sp +Lemmas have the following methods for retrieving related Lemmas. They +correspond to the names for the pointer symbols defined here: +\fI\%https://wordnet.princeton.edu/documentation/wninput5wn\fP +These methods all return lists of Lemmas: +.INDENT 7.0 +.IP \(bu 2 +antonyms +.IP \(bu 2 +hypernyms, instance_hypernyms +.IP \(bu 2 +hyponyms, instance_hyponyms +.IP \(bu 2 +member_holonyms, substance_holonyms, part_holonyms +.IP \(bu 2 +member_meronyms, substance_meronyms, part_meronyms +.IP \(bu 2 +topic_domains, region_domains, usage_domains +.IP \(bu 2 +attributes +.IP \(bu 2 +derivationally_related_forms +.IP \(bu 2 +entailments +.IP \(bu 2 +causes +.IP \(bu 2 +also_sees +.IP \(bu 2 +verb_groups +.IP \(bu 2 +similar_tos +.IP \(bu 2 +pertainyms +.UNINDENT +.INDENT 7.0 +.TP +.B antonyms() +.UNINDENT +.INDENT 7.0 +.TP +.B count() +Return the frequency count for this Lemma +.UNINDENT +.INDENT 7.0 +.TP +.B derivationally_related_forms() +.UNINDENT +.INDENT 7.0 +.TP +.B frame_ids() +.UNINDENT +.INDENT 7.0 +.TP +.B frame_strings() +.UNINDENT +.INDENT 7.0 +.TP +.B key() +.UNINDENT +.INDENT 7.0 +.TP +.B lang() +.UNINDENT +.INDENT 7.0 +.TP +.B name() +.UNINDENT +.INDENT 7.0 +.TP +.B pertainyms() +.UNINDENT +.INDENT 7.0 +.TP +.B synset() +.UNINDENT +.INDENT 7.0 +.TP +.B syntactic_marker() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.wordnet.Synset(wordnet_corpus_reader) +Bases: \fBnltk.corpus.reader.wordnet._WordNetObject\fP +.sp +Create a Synset from a ".." string where: + is the word\(aqs morphological stem + is one of the module attributes ADJ, ADJ_SAT, ADV, NOUN or VERB + is the sense number, counting from 0. +.sp +Synset attributes, accessible via methods with the same name: +.INDENT 7.0 +.IP \(bu 2 +name: The canonical name of this synset, formed using the first lemma +of this synset. Note that this may be different from the name +passed to the constructor if that string used a different lemma to +identify the synset. +.IP \(bu 2 +pos: The synset\(aqs part of speech, matching one of the module level +attributes ADJ, ADJ_SAT, ADV, NOUN or VERB. +.IP \(bu 2 +lemmas: A list of the Lemma objects for this synset. +.IP \(bu 2 +definition: The definition for this synset. +.IP \(bu 2 +examples: A list of example strings for this synset. +.IP \(bu 2 +offset: The offset in the WordNet dict file of this synset. +.IP \(bu 2 +lexname: The name of the lexicographer file containing this synset. +.UNINDENT +.sp +Synset methods: +.sp +Synsets have the following methods for retrieving related Synsets. +They correspond to the names for the pointer symbols defined here: +\fI\%https://wordnet.princeton.edu/documentation/wninput5wn\fP +These methods all return lists of Synsets. +.INDENT 7.0 +.IP \(bu 2 +hypernyms, instance_hypernyms +.IP \(bu 2 +hyponyms, instance_hyponyms +.IP \(bu 2 +member_holonyms, substance_holonyms, part_holonyms +.IP \(bu 2 +member_meronyms, substance_meronyms, part_meronyms +.IP \(bu 2 +attributes +.IP \(bu 2 +entailments +.IP \(bu 2 +causes +.IP \(bu 2 +also_sees +.IP \(bu 2 +verb_groups +.IP \(bu 2 +similar_tos +.UNINDENT +.sp +Additionally, Synsets support the following methods specific to the +hypernym relation: +.INDENT 7.0 +.IP \(bu 2 +root_hypernyms +.IP \(bu 2 +common_hypernyms +.IP \(bu 2 +lowest_common_hypernyms +.UNINDENT +.sp +Note that Synsets do not support the following relations because +these are defined by WordNet as lexical relations: +.INDENT 7.0 +.IP \(bu 2 +antonyms +.IP \(bu 2 +derivationally_related_forms +.IP \(bu 2 +pertainyms +.UNINDENT +.INDENT 7.0 +.TP +.B acyclic_tree(children=, depth=\-1, cut_mark=None, traversed=None) +Traverse the nodes of a tree in depth\-first order, +discarding eventual cycles within any branch, +adding cut_mark (when specified) if cycles were truncated. +.sp +The first argument should be the tree root; +children should be a function taking as argument a tree node +and returning an iterator of the node\(aqs children. +.sp +Catches all cycles: +.sp +.nf +.ft C +>>> import nltk +>>> from nltk.util import acyclic_depth_first as acyclic_tree +>>> wn=nltk.corpus.wordnet +>>> from pprint import pprint +>>> pprint(acyclic_tree(wn.synset(\(aqdog.n.01\(aq), lambda s:s.hypernyms(),cut_mark=\(aq...\(aq)) +[Synset(\(aqdog.n.01\(aq), + [Synset(\(aqcanine.n.02\(aq), + [Synset(\(aqcarnivore.n.01\(aq), + [Synset(\(aqplacental.n.01\(aq), + [Synset(\(aqmammal.n.01\(aq), + [Synset(\(aqvertebrate.n.01\(aq), + [Synset(\(aqchordate.n.01\(aq), + [Synset(\(aqanimal.n.01\(aq), + [Synset(\(aqorganism.n.01\(aq), + [Synset(\(aqliving_thing.n.01\(aq), + [Synset(\(aqwhole.n.02\(aq), + [Synset(\(aqobject.n.01\(aq), + [Synset(\(aqphysical_entity.n.01\(aq), + [Synset(\(aqentity.n.01\(aq)]]]]]]]]]]]]], + [Synset(\(aqdomestic_animal.n.01\(aq), "Cycle(Synset(\(aqanimal.n.01\(aq),\-3,...)"]] +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B closure(rel, depth=\- 1) +Return the transitive closure of source under the rel +relationship, breadth\-first, discarding cycles: +.sp +.nf +.ft C +>>> from nltk.corpus import wordnet as wn +>>> computer = wn.synset(\(aqcomputer.n.01\(aq) +>>> topic = lambda s:s.topic_domains() +>>> print(list(computer.closure(topic))) +[Synset(\(aqcomputer_science.n.01\(aq)] +.ft P +.fi +.sp +UserWarning: Discarded redundant search for Synset(\(aqcomputer.n.01\(aq) at depth 2 +.sp +Include redundant paths (but only once), avoiding duplicate searches +(from \(aqanimal.n.01\(aq to \(aqentity.n.01\(aq): +.sp +.nf +.ft C +>>> dog = wn.synset(\(aqdog.n.01\(aq) +>>> hyp = lambda s:s.hypernyms() +>>> print(list(dog.closure(hyp))) +[Synset(\(aqcanine.n.02\(aq), Synset(\(aqdomestic_animal.n.01\(aq), Synset(\(aqcarnivore.n.01\(aq), +Synset(\(aqanimal.n.01\(aq), Synset(\(aqplacental.n.01\(aq), Synset(\(aqorganism.n.01\(aq), +Synset(\(aqmammal.n.01\(aq), Synset(\(aqliving_thing.n.01\(aq), Synset(\(aqvertebrate.n.01\(aq), +Synset(\(aqwhole.n.02\(aq), Synset(\(aqchordate.n.01\(aq), Synset(\(aqobject.n.01\(aq), +Synset(\(aqphysical_entity.n.01\(aq), Synset(\(aqentity.n.01\(aq)] +.ft P +.fi +.sp +UserWarning: Discarded redundant search for Synset(\(aqanimal.n.01\(aq) at depth 7 +.UNINDENT +.INDENT 7.0 +.TP +.B common_hypernyms(other) +Find all synsets that are hypernyms of this synset and the +other synset. +.INDENT 7.0 +.TP +.B Parameters +\fBother\fP (\fISynset\fP) \-\- other input synset. +.TP +.B Returns +The synsets that are hypernyms of both synsets. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B definition() +.UNINDENT +.INDENT 7.0 +.TP +.B examples() +.UNINDENT +.INDENT 7.0 +.TP +.B frame_ids() +.UNINDENT +.INDENT 7.0 +.TP +.B hypernym_distances(distance=0, simulate_root=False) +Get the path(s) from this synset to the root, counting the distance +of each node from the initial node on the way. A set of +(synset, distance) tuples is returned. +.INDENT 7.0 +.TP +.B Parameters +\fBdistance\fP (\fIint\fP) \-\- the distance (number of edges) from this hypernym to +the original hypernym \fBSynset\fP on which this method was called. +.TP +.B Returns +A set of \fB(Synset, int)\fP tuples where each \fBSynset\fP is +a hypernym of the first \fBSynset\fP\&. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B hypernym_paths() +Get the path(s) from this synset to the root, where each path is a +list of the synset nodes traversed on the way to the root. +.INDENT 7.0 +.TP +.B Returns +A list of lists, where each list gives the node sequence +connecting the initial \fBSynset\fP node and a root node. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B jcn_similarity(other, ic, verbose=False) +Jiang\-Conrath Similarity: +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node) and that of the two input Synsets. The relationship is +given by the equation 1 / (IC(s1) + IC(s2) \- 2 * IC(lcs)). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBic\fP (\fIdict\fP) \-\- an information content object (as returned by +\fBnltk.corpus.wordnet_ic.ic()\fP). +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lch_similarity(other, verbose=False, simulate_root=True) +Leacock Chodorow Similarity: +Return a score denoting how similar two word senses are, based on the +shortest path that connects the senses (as above) and the maximum depth +of the taxonomy in which the senses occur. The relationship is given as +\-log(p/2d) where p is the shortest path length and d is the taxonomy +depth. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBsimulate_root\fP (\fIbool\fP) \-\- The various verb taxonomies do not +share a single root which disallows this metric from working for +synsets that are not connected. This flag (True by default) +creates a fake root that connects all the taxonomies. Set it +to false to disable this behavior. For the noun taxonomy, +there is usually a default root except for WordNet version 1.6. +If you are using wordnet 1.6, a fake root will be added for nouns +as well. +.UNINDENT +.TP +.B Returns +A score denoting the similarity of the two \fBSynset\fP objects, +normally greater than 0. None is returned if no connecting path +could be found. If a \fBSynset\fP is compared with itself, the +maximum score is returned, which varies depending on the taxonomy +depth. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lemma_names(lang=\(aqeng\(aq) +Return all the lemma_names associated with the synset +.UNINDENT +.INDENT 7.0 +.TP +.B lemmas(lang=\(aqeng\(aq) +Return all the lemma objects associated with the synset +.UNINDENT +.INDENT 7.0 +.TP +.B lexname() +.UNINDENT +.INDENT 7.0 +.TP +.B lin_similarity(other, ic, verbose=False) +Lin Similarity: +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node) and that of the two input Synsets. The relationship is +given by the equation 2 * IC(lcs) / (IC(s1) + IC(s2)). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBic\fP (\fIdict\fP) \-\- an information content object (as returned by +\fBnltk.corpus.wordnet_ic.ic()\fP). +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects, in the range 0 to 1. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lowest_common_hypernyms(other, simulate_root=False, use_min_depth=False) +Get a list of lowest synset(s) that both synsets have as a hypernym. +When \fIuse_min_depth == False\fP this means that the synset which appears +as a hypernym of both \fIself\fP and \fIother\fP with the lowest maximum depth +is returned or if there are multiple such synsets at the same depth +they are all returned +.sp +However, if \fIuse_min_depth == True\fP then the synset(s) which has/have +the lowest minimum depth and appear(s) in both paths is/are returned. +.sp +By setting the use_min_depth flag to True, the behavior of NLTK2 can be +preserved. This was changed in NLTK3 to give more accurate results in a +small set of cases, generally with synsets concerning people. (eg: +\(aqchef.n.01\(aq, \(aqfireman.n.01\(aq, etc.) +.sp +This method is an implementation of Ted Pedersen\(aqs "Lowest Common +Subsumer" method from the Perl Wordnet module. It can return either +"self" or "other" if they are a hypernym of the other. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- other input synset +.IP \(bu 2 +\fBsimulate_root\fP (\fIbool\fP) \-\- The various verb taxonomies do not +share a single root which disallows this metric from working for +synsets that are not connected. This flag (False by default) +creates a fake root that connects all the taxonomies. Set it +to True to enable this behavior. For the noun taxonomy, +there is usually a default root except for WordNet version 1.6. +If you are using wordnet 1.6, a fake root will need to be added +for nouns as well. +.IP \(bu 2 +\fBuse_min_depth\fP (\fIbool\fP) \-\- This setting mimics older (v2) behavior of NLTK +wordnet If True, will use the min_depth function to calculate the +lowest common hypernyms. This is known to give strange results for +some synset pairs (eg: \(aqchef.n.01\(aq, \(aqfireman.n.01\(aq) but is retained +for backwards compatibility +.UNINDENT +.TP +.B Returns +The synsets that are the lowest common hypernyms of both +synsets +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B max_depth() +.INDENT 7.0 +.TP +.B Returns +The length of the longest hypernym path from this +.UNINDENT +.sp +synset to the root. +.UNINDENT +.INDENT 7.0 +.TP +.B min_depth() +.INDENT 7.0 +.TP +.B Returns +The length of the shortest hypernym path from this +.UNINDENT +.sp +synset to the root. +.UNINDENT +.INDENT 7.0 +.TP +.B mst(children=) +Output a Minimum Spanning Tree (MST) of an unweighted graph, +by traversing the nodes of a tree in breadth\-first order, +discarding eventual cycles. +.sp +The first argument should be the tree root; +children should be a function taking as argument a tree node +and returning an iterator of the node\(aqs children. +.sp +.nf +.ft C +>>> import nltk +>>> from nltk.util import unweighted_minimum_spanning_tree as mst +>>> wn=nltk.corpus.wordnet +>>> from pprint import pprint +>>> pprint(mst(wn.synset(\(aqbound.a.01\(aq), lambda s:s.also_sees())) +[Synset(\(aqbound.a.01\(aq), + [Synset(\(aqunfree.a.02\(aq), + [Synset(\(aqconfined.a.02\(aq)], + [Synset(\(aqdependent.a.01\(aq)], + [Synset(\(aqrestricted.a.01\(aq), [Synset(\(aqclassified.a.02\(aq)]]]] +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B name() +.UNINDENT +.INDENT 7.0 +.TP +.B offset() +.UNINDENT +.INDENT 7.0 +.TP +.B path_similarity(other, verbose=False, simulate_root=True) +Path Distance Similarity: +Return a score denoting how similar two word senses are, based on the +shortest path that connects the senses in the is\-a (hypernym/hypnoym) +taxonomy. The score is in the range 0 to 1, except in those cases where +a path cannot be found (will only be true for verbs as there are many +distinct verb taxonomies), in which case None is returned. A score of +1 represents identity i.e. comparing a sense with itself will return 1. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBsimulate_root\fP (\fIbool\fP) \-\- The various verb taxonomies do not +share a single root which disallows this metric from working for +synsets that are not connected. This flag (True by default) +creates a fake root that connects all the taxonomies. Set it +to false to disable this behavior. For the noun taxonomy, +there is usually a default root except for WordNet version 1.6. +If you are using wordnet 1.6, a fake root will be added for nouns +as well. +.UNINDENT +.TP +.B Returns +A score denoting the similarity of the two \fBSynset\fP objects, +normally between 0 and 1. None is returned if no connecting path +could be found. 1 is returned if a \fBSynset\fP is compared with +itself. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B pos() +.UNINDENT +.INDENT 7.0 +.TP +.B res_similarity(other, ic, verbose=False) +Resnik Similarity: +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBic\fP (\fIdict\fP) \-\- an information content object (as returned by +\fBnltk.corpus.wordnet_ic.ic()\fP). +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects. Synsets whose LCS is the root node of the taxonomy will +have a score of 0 (e.g. N[\(aqdog\(aq][0] and N[\(aqtable\(aq][0]). +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B root_hypernyms() +Get the topmost hypernyms of this synset in WordNet. +.UNINDENT +.INDENT 7.0 +.TP +.B shortest_path_distance(other, simulate_root=False) +Returns the distance of the shortest path linking the two synsets (if +one exists). For each synset, all the ancestor nodes and their +distances are recorded and compared. The ancestor node common to both +synsets that can be reached with the minimum number of traversals is +used. If no ancestor nodes are common, None is returned. If a node is +compared with itself 0 is returned. +.INDENT 7.0 +.TP +.B Parameters +\fBother\fP (\fISynset\fP) \-\- The Synset to which the shortest path will be found. +.TP +.B Returns +The number of edges in the shortest path connecting the two +nodes, or None if no path exists. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tree(rel, depth=\- 1, cut_mark=None) +Return the full relation tree, including self, +discarding cycles: +.sp +.nf +.ft C +>>> from nltk.corpus import wordnet as wn +>>> from pprint import pprint +>>> computer = wn.synset(\(aqcomputer.n.01\(aq) +>>> topic = lambda s:s.topic_domains() +>>> pprint(computer.tree(topic)) +[Synset(\(aqcomputer.n.01\(aq), [Synset(\(aqcomputer_science.n.01\(aq)]] +.ft P +.fi +.sp +UserWarning: Discarded redundant search for Synset(\(aqcomputer.n.01\(aq) at depth \-3 +.sp +But keep duplicate branches (from \(aqanimal.n.01\(aq to \(aqentity.n.01\(aq): +.sp +.nf +.ft C +>>> dog = wn.synset(\(aqdog.n.01\(aq) +>>> hyp = lambda s:s.hypernyms() +>>> pprint(dog.tree(hyp)) +[Synset(\(aqdog.n.01\(aq), + [Synset(\(aqcanine.n.02\(aq), + [Synset(\(aqcarnivore.n.01\(aq), + [Synset(\(aqplacental.n.01\(aq), + [Synset(\(aqmammal.n.01\(aq), + [Synset(\(aqvertebrate.n.01\(aq), + [Synset(\(aqchordate.n.01\(aq), + [Synset(\(aqanimal.n.01\(aq), + [Synset(\(aqorganism.n.01\(aq), + [Synset(\(aqliving_thing.n.01\(aq), + [Synset(\(aqwhole.n.02\(aq), + [Synset(\(aqobject.n.01\(aq), + [Synset(\(aqphysical_entity.n.01\(aq), + [Synset(\(aqentity.n.01\(aq)]]]]]]]]]]]]], + [Synset(\(aqdomestic_animal.n.01\(aq), + [Synset(\(aqanimal.n.01\(aq), + [Synset(\(aqorganism.n.01\(aq), + [Synset(\(aqliving_thing.n.01\(aq), + [Synset(\(aqwhole.n.02\(aq), + [Synset(\(aqobject.n.01\(aq), + [Synset(\(aqphysical_entity.n.01\(aq), [Synset(\(aqentity.n.01\(aq)]]]]]]]]] +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B wup_similarity(other, verbose=False, simulate_root=True) +Wu\-Palmer Similarity: +Return a score denoting how similar two word senses are, based on the +depth of the two senses in the taxonomy and that of their Least Common +Subsumer (most specific ancestor node). Previously, the scores computed +by this implementation did _not_ always agree with those given by +Pedersen\(aqs Perl implementation of WordNet Similarity. However, with +the addition of the simulate_root flag (see below), the score for +verbs now almost always agree but not always for nouns. +.sp +The LCS does not necessarily feature in the shortest path connecting +the two senses, as it is by definition the common ancestor deepest in +the taxonomy, not closest to the two senses. Typically, however, it +will so feature. Where multiple candidates for the LCS exist, that +whose shortest path to the root node is the longest will be selected. +Where the LCS has multiple paths to the root, the longer path is used +for the purposes of the calculation. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBsimulate_root\fP (\fIbool\fP) \-\- The various verb taxonomies do not +share a single root which disallows this metric from working for +synsets that are not connected. This flag (True by default) +creates a fake root that connects all the taxonomies. Set it +to false to disable this behavior. For the noun taxonomy, +there is usually a default root except for WordNet version 1.6. +If you are using wordnet 1.6, a fake root will be added for nouns +as well. +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects, normally greater than zero. If no connecting path between +the two senses can be found, None is returned. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.wordnet.WordNetCorpusReader(root, omw_reader) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +A corpus reader used to access wordnet or its variants. +.INDENT 7.0 +.TP +.B ADJ = \(aqa\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B ADJ_SAT = \(aqs\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B ADV = \(aqr\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B MORPHOLOGICAL_SUBSTITUTIONS = {\(aqa\(aq: [(\(aqer\(aq, \(aq\(aq), (\(aqest\(aq, \(aq\(aq), (\(aqer\(aq, \(aqe\(aq), (\(aqest\(aq, \(aqe\(aq)], \(aqn\(aq: [(\(aqs\(aq, \(aq\(aq), (\(aqses\(aq, \(aqs\(aq), (\(aqves\(aq, \(aqf\(aq), (\(aqxes\(aq, \(aqx\(aq), (\(aqzes\(aq, \(aqz\(aq), (\(aqches\(aq, \(aqch\(aq), (\(aqshes\(aq, \(aqsh\(aq), (\(aqmen\(aq, \(aqman\(aq), (\(aqies\(aq, \(aqy\(aq)], \(aqr\(aq: [], \(aqs\(aq: [(\(aqer\(aq, \(aq\(aq), (\(aqest\(aq, \(aq\(aq), (\(aqer\(aq, \(aqe\(aq), (\(aqest\(aq, \(aqe\(aq)], \(aqv\(aq: [(\(aqs\(aq, \(aq\(aq), (\(aqies\(aq, \(aqy\(aq), (\(aqes\(aq, \(aqe\(aq), (\(aqes\(aq, \(aq\(aq), (\(aqed\(aq, \(aqe\(aq), (\(aqed\(aq, \(aq\(aq), (\(aqing\(aq, \(aqe\(aq), (\(aqing\(aq, \(aq\(aq)]} +.UNINDENT +.INDENT 7.0 +.TP +.B NOUN = \(aqn\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B VERB = \(aqv\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B all_lemma_names(pos=None, lang=\(aqeng\(aq) +Return all lemma names for all synsets for the given +part of speech tag and language or languages. If pos is +not specified, all synsets for all parts of speech will +be used. +.UNINDENT +.INDENT 7.0 +.TP +.B all_synsets(pos=None) +Iterate over all synsets with a given part of speech tag. +If no pos is specified, all synsets for all parts of speech +will be loaded. +.UNINDENT +.INDENT 7.0 +.TP +.B citation(lang=\(aqomw\(aq) +Return the contents of citation.bib file (for omw) +use lang=lang to get the citation for an individual language +.UNINDENT +.INDENT 7.0 +.TP +.B custom_lemmas(tab_file, lang) +Reads a custom tab file containing mappings of lemmas in the given +language to Princeton WordNet 3.0 synset offsets, allowing NLTK\(aqs +WordNet functions to then be used with that language. +.sp +See the "Tab files" section at \fI\%http://compling.hss.ntu.edu.sg/omw/\fP for +documentation on the Multilingual WordNet tab file format. +.INDENT 7.0 +.TP +.B Parameters +\fBtab_file\fP \-\- Tab file as a file or file\-like object +.UNINDENT +.sp +:type lang str +:param lang ISO 639\-3 code of the language of the tab file +.UNINDENT +.INDENT 7.0 +.TP +.B digraph(inputs, rel=>, pos=None, maxdepth=\-1, shapes=None, attr=None, verbose=False) +Produce a graphical representation from \(aqinputs\(aq (a list of +start nodes, which can be a mix of Synsets, Lemmas and/or words), +and a synset relation, for drawing with the \(aqdot\(aq graph visualisation +program from the Graphviz package. +.sp +Return a string in the DOT graph file language, which can then be +converted to an image by nltk.parse.dependencygraph.dot2img(dot_string). +.sp +Optional Parameters: +:rel: Wordnet synset relation +:pos: for words, restricts Part of Speech to \(aqn\(aq, \(aqv\(aq, \(aqa\(aq or \(aqr\(aq +:maxdepth: limit the longest path +:shapes: dictionary of strings that trigger a specified shape +:attr: dictionary with global graph attributes +:verbose: warn about cycles +.sp +.nf +.ft C +>>> from nltk.corpus import wordnet as wn +>>> print(wn.digraph([wn.synset(\(aqdog.n.01\(aq)])) +digraph G { +"Synset(\(aqdog.n.01\(aq)" \-> "Synset(\(aqdomestic_animal.n.01\(aq)"; +"Synset(\(aqorganism.n.01\(aq)" \-> "Synset(\(aqliving_thing.n.01\(aq)"; +"Synset(\(aqmammal.n.01\(aq)" \-> "Synset(\(aqvertebrate.n.01\(aq)"; +"Synset(\(aqplacental.n.01\(aq)" \-> "Synset(\(aqmammal.n.01\(aq)"; +"Synset(\(aqanimal.n.01\(aq)" \-> "Synset(\(aqorganism.n.01\(aq)"; +"Synset(\(aqvertebrate.n.01\(aq)" \-> "Synset(\(aqchordate.n.01\(aq)"; +"Synset(\(aqchordate.n.01\(aq)" \-> "Synset(\(aqanimal.n.01\(aq)"; +"Synset(\(aqcanine.n.02\(aq)" \-> "Synset(\(aqcarnivore.n.01\(aq)"; +"Synset(\(aqliving_thing.n.01\(aq)" \-> "Synset(\(aqwhole.n.02\(aq)"; +"Synset(\(aqphysical_entity.n.01\(aq)" \-> "Synset(\(aqentity.n.01\(aq)"; +"Synset(\(aqcarnivore.n.01\(aq)" \-> "Synset(\(aqplacental.n.01\(aq)"; +"Synset(\(aqobject.n.01\(aq)" \-> "Synset(\(aqphysical_entity.n.01\(aq)"; +"Synset(\(aqwhole.n.02\(aq)" \-> "Synset(\(aqobject.n.01\(aq)"; +"Synset(\(aqdog.n.01\(aq)" \-> "Synset(\(aqcanine.n.02\(aq)"; +"Synset(\(aqdomestic_animal.n.01\(aq)" \-> "Synset(\(aqanimal.n.01\(aq)"; +} +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B get_version() +.UNINDENT +.INDENT 7.0 +.TP +.B ic(corpus, weight_senses_equally=False, smoothing=1.0) +Creates an information content lookup dictionary from a corpus. +.INDENT 7.0 +.TP +.B Parameters +\fBcorpus\fP (\fICorpusReader\fP) \-\- The corpus from which we create an information +.UNINDENT +.sp +content dictionary. +:type weight_senses_equally: bool +:param weight_senses_equally: If this is True, gives all +possible senses equal weight rather than dividing by the +number of possible senses. (If a word has 3 synses, each +sense gets 0.3333 per appearance when this is False, 1.0 when +it is true.) +:param smoothing: How much do we smooth synset counts (default is 1.0) +:type smoothing: float +:return: An information content dictionary +.UNINDENT +.INDENT 7.0 +.TP +.B jcn_similarity(synset1, synset2, ic, verbose=False) +Jiang\-Conrath Similarity: +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node) and that of the two input Synsets. The relationship is +given by the equation 1 / (IC(s1) + IC(s2) \- 2 * IC(lcs)). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBic\fP (\fIdict\fP) \-\- an information content object (as returned by +\fBnltk.corpus.wordnet_ic.ic()\fP). +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B langs() +return a list of languages supported by Multilingual Wordnet +.UNINDENT +.INDENT 7.0 +.TP +.B lch_similarity(synset1, synset2, verbose=False, simulate_root=True) +Leacock Chodorow Similarity: +Return a score denoting how similar two word senses are, based on the +shortest path that connects the senses (as above) and the maximum depth +of the taxonomy in which the senses occur. The relationship is given as +\-log(p/2d) where p is the shortest path length and d is the taxonomy +depth. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBsimulate_root\fP (\fIbool\fP) \-\- The various verb taxonomies do not +share a single root which disallows this metric from working for +synsets that are not connected. This flag (True by default) +creates a fake root that connects all the taxonomies. Set it +to false to disable this behavior. For the noun taxonomy, +there is usually a default root except for WordNet version 1.6. +If you are using wordnet 1.6, a fake root will be added for nouns +as well. +.UNINDENT +.TP +.B Returns +A score denoting the similarity of the two \fBSynset\fP objects, +normally greater than 0. None is returned if no connecting path +could be found. If a \fBSynset\fP is compared with itself, the +maximum score is returned, which varies depending on the taxonomy +depth. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lemma(name, lang=\(aqeng\(aq) +Return lemma object that matches the name +.UNINDENT +.INDENT 7.0 +.TP +.B lemma_count(lemma) +Return the frequency count for this Lemma +.UNINDENT +.INDENT 7.0 +.TP +.B lemma_from_key(key) +.UNINDENT +.INDENT 7.0 +.TP +.B lemmas(lemma, pos=None, lang=\(aqeng\(aq) +Return all Lemma objects with a name matching the specified lemma +name and part of speech tag. Matches any part of speech tag if none is +specified. +.UNINDENT +.INDENT 7.0 +.TP +.B license(lang=\(aqeng\(aq) +Return the contents of LICENSE (for omw) +use lang=lang to get the license for an individual language +.UNINDENT +.INDENT 7.0 +.TP +.B lin_similarity(synset1, synset2, ic, verbose=False) +Lin Similarity: +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node) and that of the two input Synsets. The relationship is +given by the equation 2 * IC(lcs) / (IC(s1) + IC(s2)). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBic\fP (\fIdict\fP) \-\- an information content object (as returned by +\fBnltk.corpus.wordnet_ic.ic()\fP). +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects, in the range 0 to 1. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B morphy(form, pos=None, check_exceptions=True) +Find a possible base form for the given form, with the given +part of speech, by checking WordNet\(aqs list of exceptional +forms, and by recursively stripping affixes for this part of +speech until a form in WordNet is found. +.sp +.nf +.ft C +>>> from nltk.corpus import wordnet as wn +>>> print(wn.morphy(\(aqdogs\(aq)) +dog +>>> print(wn.morphy(\(aqchurches\(aq)) +church +>>> print(wn.morphy(\(aqaardwolves\(aq)) +aardwolf +>>> print(wn.morphy(\(aqabaci\(aq)) +abacus +>>> wn.morphy(\(aqhardrock\(aq, wn.ADV) +>>> print(wn.morphy(\(aqbook\(aq, wn.NOUN)) +book +>>> wn.morphy(\(aqbook\(aq, wn.ADJ) +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B of2ss(of) +take an id and return the synsets +.UNINDENT +.INDENT 7.0 +.TP +.B path_similarity(synset1, synset2, verbose=False, simulate_root=True) +Path Distance Similarity: +Return a score denoting how similar two word senses are, based on the +shortest path that connects the senses in the is\-a (hypernym/hypnoym) +taxonomy. The score is in the range 0 to 1, except in those cases where +a path cannot be found (will only be true for verbs as there are many +distinct verb taxonomies), in which case None is returned. A score of +1 represents identity i.e. comparing a sense with itself will return 1. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBsimulate_root\fP (\fIbool\fP) \-\- The various verb taxonomies do not +share a single root which disallows this metric from working for +synsets that are not connected. This flag (True by default) +creates a fake root that connects all the taxonomies. Set it +to false to disable this behavior. For the noun taxonomy, +there is usually a default root except for WordNet version 1.6. +If you are using wordnet 1.6, a fake root will be added for nouns +as well. +.UNINDENT +.TP +.B Returns +A score denoting the similarity of the two \fBSynset\fP objects, +normally between 0 and 1. None is returned if no connecting path +could be found. 1 is returned if a \fBSynset\fP is compared with +itself. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B readme(lang=\(aqomw\(aq) +Return the contents of README (for omw) +use lang=lang to get the readme for an individual language +.UNINDENT +.INDENT 7.0 +.TP +.B res_similarity(synset1, synset2, ic, verbose=False) +Resnik Similarity: +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBic\fP (\fIdict\fP) \-\- an information content object (as returned by +\fBnltk.corpus.wordnet_ic.ic()\fP). +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects. Synsets whose LCS is the root node of the taxonomy will +have a score of 0 (e.g. N[\(aqdog\(aq][0] and N[\(aqtable\(aq][0]). +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B ss2of(ss, lang=None) +return the ID of the synset +.UNINDENT +.INDENT 7.0 +.TP +.B synset(name) +.UNINDENT +.INDENT 7.0 +.TP +.B synset_from_pos_and_offset(pos, offset) +.INDENT 7.0 +.IP \(bu 2 +pos: The synset\(aqs part of speech, matching one of the module level +attributes ADJ, ADJ_SAT, ADV, NOUN or VERB (\(aqa\(aq, \(aqs\(aq, \(aqr\(aq, \(aqn\(aq, or \(aqv\(aq). +.IP \(bu 2 +offset: The byte offset of this synset in the WordNet dict file +for this pos. +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.corpus import wordnet as wn +>>> print(wn.synset_from_pos_and_offset(\(aqn\(aq, 1740)) +Synset(\(aqentity.n.01\(aq) +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B synset_from_sense_key(sense_key) +Retrieves synset based on a given sense_key. Sense keys can be +obtained from lemma.key() +.sp +From \fI\%https://wordnet.princeton.edu/documentation/senseidx5wn\fP: +A sense_key is represented as: +.INDENT 7.0 +.INDENT 3.5 +lemma % lex_sense (e.g. \(aqdog%1:18:01::\(aq) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B where lex_sense is encoded as: +ss_type:lex_filenum:lex_id:head_word:head_id +.UNINDENT +.sp +lemma: ASCII text of word/collocation, in lower case +ss_type: synset type for the sense (1 digit int) +.INDENT 7.0 +.INDENT 3.5 +The synset type is encoded as follows: +1 NOUN +2 VERB +3 ADJECTIVE +4 ADVERB +5 ADJECTIVE SATELLITE +.UNINDENT +.UNINDENT +.sp +lex_filenum: name of lexicographer file containing the synset for the sense (2 digit int) +lex_id: when paired with lemma, uniquely identifies a sense in the lexicographer file (2 digit int) +head_word: lemma of the first word in satellite\(aqs head synset +.INDENT 7.0 +.INDENT 3.5 +Only used if sense is in an adjective satellite synset +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B head_id: uniquely identifies sense in a lexicographer file when paired with head_word +Only used if head_word is present (2 digit int) +.UNINDENT +.sp +.nf +.ft C +>>> import nltk +>>> from nltk.corpus import wordnet as wn +>>> print(wn.synset_from_sense_key("drive%1:04:03::")) +Synset(\(aqdrive.n.06\(aq) +.ft P +.fi +.sp +.nf +.ft C +>>> print(wn.synset_from_sense_key("driving%1:04:03::")) +Synset(\(aqdrive.n.06\(aq) +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B synsets(lemma, pos=None, lang=\(aqeng\(aq, check_exceptions=True) +Load all synsets with a given lemma and part of speech tag. +If no pos is specified, all synsets for all parts of speech +will be loaded. +If lang is specified, all the synsets associated with the lemma name +of that language will be returned. +.UNINDENT +.INDENT 7.0 +.TP +.B words(lang=\(aqeng\(aq) +return lemmas of the given language as list of words +.UNINDENT +.INDENT 7.0 +.TP +.B wup_similarity(synset1, synset2, verbose=False, simulate_root=True) +Wu\-Palmer Similarity: +Return a score denoting how similar two word senses are, based on the +depth of the two senses in the taxonomy and that of their Least Common +Subsumer (most specific ancestor node). Previously, the scores computed +by this implementation did _not_ always agree with those given by +Pedersen\(aqs Perl implementation of WordNet Similarity. However, with +the addition of the simulate_root flag (see below), the score for +verbs now almost always agree but not always for nouns. +.sp +The LCS does not necessarily feature in the shortest path connecting +the two senses, as it is by definition the common ancestor deepest in +the taxonomy, not closest to the two senses. Typically, however, it +will so feature. Where multiple candidates for the LCS exist, that +whose shortest path to the root node is the longest will be selected. +Where the LCS has multiple paths to the root, the longer path is used +for the purposes of the calculation. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBsimulate_root\fP (\fIbool\fP) \-\- The various verb taxonomies do not +share a single root which disallows this metric from working for +synsets that are not connected. This flag (True by default) +creates a fake root that connects all the taxonomies. Set it +to false to disable this behavior. For the noun taxonomy, +there is usually a default root except for WordNet version 1.6. +If you are using wordnet 1.6, a fake root will be added for nouns +as well. +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects, normally greater than zero. If no connecting path between +the two senses can be found, None is returned. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.corpus.reader.wordnet.WordNetError +Bases: \fBException\fP +.sp +An exception class for wordnet\-related errors. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.wordnet.WordNetICCorpusReader(root, fileids) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +A corpus reader for the WordNet information content corpus. +.INDENT 7.0 +.TP +.B ic(icfile) +Load an information content file from the wordnet_ic corpus +and return a dictionary. This dictionary has just two keys, +NOUN and VERB, whose values are dictionaries that map from +synsets to information content values. +.INDENT 7.0 +.TP +.B Parameters +\fBicfile\fP (\fIstr\fP) \-\- The name of the wordnet_ic file (e.g. "ic\-brown.dat") +.TP +.B Returns +An information content dictionary +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.wordnet.information_content(synset, ic) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.wordnet.jcn_similarity(synset1, synset2, ic, verbose=False) +Jiang\-Conrath Similarity: +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node) and that of the two input Synsets. The relationship is +given by the equation 1 / (IC(s1) + IC(s2) \- 2 * IC(lcs)). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBic\fP (\fIdict\fP) \-\- an information content object (as returned by +\fBnltk.corpus.wordnet_ic.ic()\fP). +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.wordnet.lch_similarity(synset1, synset2, verbose=False, simulate_root=True) +Leacock Chodorow Similarity: +Return a score denoting how similar two word senses are, based on the +shortest path that connects the senses (as above) and the maximum depth +of the taxonomy in which the senses occur. The relationship is given as +\-log(p/2d) where p is the shortest path length and d is the taxonomy +depth. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBsimulate_root\fP (\fIbool\fP) \-\- The various verb taxonomies do not +share a single root which disallows this metric from working for +synsets that are not connected. This flag (True by default) +creates a fake root that connects all the taxonomies. Set it +to false to disable this behavior. For the noun taxonomy, +there is usually a default root except for WordNet version 1.6. +If you are using wordnet 1.6, a fake root will be added for nouns +as well. +.UNINDENT +.TP +.B Returns +A score denoting the similarity of the two \fBSynset\fP objects, +normally greater than 0. None is returned if no connecting path +could be found. If a \fBSynset\fP is compared with itself, the +maximum score is returned, which varies depending on the taxonomy +depth. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.wordnet.lin_similarity(synset1, synset2, ic, verbose=False) +Lin Similarity: +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node) and that of the two input Synsets. The relationship is +given by the equation 2 * IC(lcs) / (IC(s1) + IC(s2)). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBic\fP (\fIdict\fP) \-\- an information content object (as returned by +\fBnltk.corpus.wordnet_ic.ic()\fP). +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects, in the range 0 to 1. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.wordnet.path_similarity(synset1, synset2, verbose=False, simulate_root=True) +Path Distance Similarity: +Return a score denoting how similar two word senses are, based on the +shortest path that connects the senses in the is\-a (hypernym/hypnoym) +taxonomy. The score is in the range 0 to 1, except in those cases where +a path cannot be found (will only be true for verbs as there are many +distinct verb taxonomies), in which case None is returned. A score of +1 represents identity i.e. comparing a sense with itself will return 1. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBsimulate_root\fP (\fIbool\fP) \-\- The various verb taxonomies do not +share a single root which disallows this metric from working for +synsets that are not connected. This flag (True by default) +creates a fake root that connects all the taxonomies. Set it +to false to disable this behavior. For the noun taxonomy, +there is usually a default root except for WordNet version 1.6. +If you are using wordnet 1.6, a fake root will be added for nouns +as well. +.UNINDENT +.TP +.B Returns +A score denoting the similarity of the two \fBSynset\fP objects, +normally between 0 and 1. None is returned if no connecting path +could be found. 1 is returned if a \fBSynset\fP is compared with +itself. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.wordnet.res_similarity(synset1, synset2, ic, verbose=False) +Resnik Similarity: +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBic\fP (\fIdict\fP) \-\- an information content object (as returned by +\fBnltk.corpus.wordnet_ic.ic()\fP). +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects. Synsets whose LCS is the root node of the taxonomy will +have a score of 0 (e.g. N[\(aqdog\(aq][0] and N[\(aqtable\(aq][0]). +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.wordnet.wup_similarity(synset1, synset2, verbose=False, simulate_root=True) +Wu\-Palmer Similarity: +Return a score denoting how similar two word senses are, based on the +depth of the two senses in the taxonomy and that of their Least Common +Subsumer (most specific ancestor node). Previously, the scores computed +by this implementation did _not_ always agree with those given by +Pedersen\(aqs Perl implementation of WordNet Similarity. However, with +the addition of the simulate_root flag (see below), the score for +verbs now almost always agree but not always for nouns. +.sp +The LCS does not necessarily feature in the shortest path connecting +the two senses, as it is by definition the common ancestor deepest in +the taxonomy, not closest to the two senses. Typically, however, it +will so feature. Where multiple candidates for the LCS exist, that +whose shortest path to the root node is the longest will be selected. +Where the LCS has multiple paths to the root, the longer path is used +for the purposes of the calculation. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBsimulate_root\fP (\fIbool\fP) \-\- The various verb taxonomies do not +share a single root which disallows this metric from working for +synsets that are not connected. This flag (True by default) +creates a fake root that connects all the taxonomies. Set it +to false to disable this behavior. For the noun taxonomy, +there is usually a default root except for WordNet version 1.6. +If you are using wordnet 1.6, a fake root will be added for nouns +as well. +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects, normally greater than zero. If no connecting path between +the two senses can be found, None is returned. +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.xmldocs module +.sp +Corpus reader for corpora whose documents are xml files. +.sp +(note \-\- not named \(aqxml\(aq to avoid conflicting w/ standard xml package) +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.xmldocs.XMLCorpusReader(root, fileids, wrap_etree=False) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Corpus reader for corpora whose documents are xml files. +.sp +Note that the \fBXMLCorpusReader\fP constructor does not take an +\fBencoding\fP argument, because the unicode encoding is specified by +the XML files themselves. See the XML specs for more info. +.INDENT 7.0 +.TP +.B words(fileid=None) +Returns all of the words and punctuation symbols in the specified file +that were in text nodes \-\- ie, tags are ignored. Like the xml() method, +fileid can only specify one file. +.INDENT 7.0 +.TP +.B Returns +the given file\(aqs text nodes as a list of words and punctuation symbols +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B xml(fileid=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.xmldocs.XMLCorpusView(fileid, tagspec, elt_handler=None) +Bases: \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.sp +A corpus view that selects out specified elements from an XML +file, and provides a flat list\-like interface for accessing them. +(Note: \fBXMLCorpusView\fP is not used by \fBXMLCorpusReader\fP itself, +but may be used by subclasses of \fBXMLCorpusReader\fP\&.) +.sp +Every XML corpus view has a "tag specification", indicating what +XML elements should be included in the view; and each (non\-nested) +element that matches this specification corresponds to one item in +the view. Tag specifications are regular expressions over tag +paths, where a tag path is a list of element tag names, separated +by \(aq/\(aq, indicating the ancestry of the element. Some examples: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fB\(aqfoo\(aq\fP: A top\-level element whose tag is \fBfoo\fP\&. +.IP \(bu 2 +\fB\(aqfoo/bar\(aq\fP: An element whose tag is \fBbar\fP and whose parent +is a top\-level element whose tag is \fBfoo\fP\&. +.IP \(bu 2 +\fB\(aq.*/foo\(aq\fP: An element whose tag is \fBfoo\fP, appearing anywhere +in the xml tree. +.IP \(bu 2 +\fB\(aq.*/(foo|bar)\(aq\fP: An wlement whose tag is \fBfoo\fP or \fBbar\fP, +appearing anywhere in the xml tree. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +The view items are generated from the selected XML elements via +the method \fBhandle_elt()\fP\&. By default, this method returns the +element as\-is (i.e., as an ElementTree object); but it can be +overridden, either via subclassing or via the \fBelt_handler\fP +constructor parameter. +.INDENT 7.0 +.TP +.B handle_elt(elt, context) +Convert an element into an appropriate value for inclusion in +the view. Unless overridden by a subclass or by the +\fBelt_handler\fP constructor argument, this method simply +returns \fBelt\fP\&. +.INDENT 7.0 +.TP +.B Returns +The view value corresponding to \fBelt\fP\&. +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBelt\fP (\fIElementTree\fP) \-\- The element that should be converted. +.IP \(bu 2 +\fBcontext\fP (\fIstr\fP) \-\- A string composed of element tags separated by +forward slashes, indicating the XML context of the given +element. For example, the string \fB\(aqfoo/bar/baz\(aq\fP +indicates that the element is a \fBbaz\fP element whose +parent is a \fBbar\fP element and whose grandparent is a +top\-level \fBfoo\fP element. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B read_block(stream, tagspec=None, elt_handler=None) +Read from \fBstream\fP until we find at least one element that +matches \fBtagspec\fP, and return the result of applying +\fBelt_handler\fP to each element found. +.UNINDENT +.UNINDENT +.SS nltk.corpus.reader.ycoe module +.sp +Corpus reader for the York\-Toronto\-Helsinki Parsed Corpus of Old +English Prose (YCOE), a 1.5 million word syntactically\-annotated +corpus of Old English prose texts. The corpus is distributed by the +Oxford Text Archive: \fI\%http://www.ota.ahds.ac.uk/\fP It is not included +with NLTK. +.sp +The YCOE corpus is divided into 100 files, each representing +an Old English prose text. Tags used within each text complies +to the YCOE standard: \fI\%http://www\-users.york.ac.uk/~lang22/YCOE/YcoeHome.htm\fP +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ycoe.YCOECorpusReader(root, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Corpus reader for the York\-Toronto\-Helsinki Parsed Corpus of Old +English Prose (YCOE), a 1.5 million word syntactically\-annotated +corpus of Old English prose texts. +.INDENT 7.0 +.TP +.B documents(fileids=None) +Return a list of document identifiers for all documents in +this corpus, or for the documents with the given file(s) if +specified. +.UNINDENT +.INDENT 7.0 +.TP +.B fileids(documents=None) +Return a list of file identifiers for the files that make up +this corpus, or that store the given document(s) if specified. +.UNINDENT +.INDENT 7.0 +.TP +.B paras(documents=None) +.UNINDENT +.INDENT 7.0 +.TP +.B parsed_sents(documents=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(documents=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(documents=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(documents=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(documents=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(documents=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ycoe.YCOEParseCorpusReader(root, fileids, comment_char=None, detect_blocks=\(aqunindented_paren\(aq, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.bracket_parse.BracketParseCorpusReader\fP +.sp +Specialized version of the standard bracket parse corpus reader +that strips out (CODE ...) and (ID ...) nodes. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ycoe.YCOETaggedCorpusReader(root, items, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.tagged.TaggedCorpusReader\fP +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.ycoe.documents = {\(aqcoadrian.o34\(aq: \(aqAdrian and Ritheus\(aq, \(aqcoaelhom.o3\(aq: \(aqÆlfric, Supplemental Homilies\(aq, \(aqcoaelive.o3\(aq: "Ælfric\(aqs Lives of Saints", \(aqcoalcuin\(aq: \(aqAlcuin De virtutibus et vitiis\(aq, \(aqcoalex.o23\(aq: "Alexander\(aqs Letter to Aristotle", \(aqcoapollo.o3\(aq: \(aqApollonius of Tyre\(aq, \(aqcoaugust\(aq: \(aqAugustine\(aq, \(aqcobede.o2\(aq: "Bede\(aqs History of the English Church", \(aqcobenrul.o3\(aq: \(aqBenedictine Rule\(aq, \(aqcoblick.o23\(aq: \(aqBlickling Homilies\(aq, \(aqcoboeth.o2\(aq: "Boethius\(aq Consolation of Philosophy", \(aqcobyrhtf.o3\(aq: "Byrhtferth\(aqs Manual", \(aqcocanedgD\(aq: \(aqCanons of Edgar (D)\(aq, \(aqcocanedgX\(aq: \(aqCanons of Edgar (X)\(aq, \(aqcocathom1.o3\(aq: "Ælfric\(aqs Catholic Homilies I", \(aqcocathom2.o3\(aq: "Ælfric\(aqs Catholic Homilies II", \(aqcochad.o24\(aq: \(aqSaint Chad\(aq, \(aqcochdrul\(aq: \(aqChrodegang of Metz, Rule\(aq, \(aqcochristoph\(aq: \(aqSaint Christopher\(aq, \(aqcochronA.o23\(aq: \(aqAnglo\-Saxon Chronicle A\(aq, \(aqcochronC\(aq: \(aqAnglo\-Saxon Chronicle C\(aq, \(aqcochronD\(aq: \(aqAnglo\-Saxon Chronicle D\(aq, \(aqcochronE.o34\(aq: \(aqAnglo\-Saxon Chronicle E\(aq, \(aqcocura.o2\(aq: \(aqCura Pastoralis\(aq, \(aqcocuraC\(aq: \(aqCura Pastoralis (Cotton)\(aq, \(aqcodicts.o34\(aq: \(aqDicts of Cato\(aq, \(aqcodocu1.o1\(aq: \(aqDocuments 1 (O1)\(aq, \(aqcodocu2.o12\(aq: \(aqDocuments 2 (O1/O2)\(aq, \(aqcodocu2.o2\(aq: \(aqDocuments 2 (O2)\(aq, \(aqcodocu3.o23\(aq: \(aqDocuments 3 (O2/O3)\(aq, \(aqcodocu3.o3\(aq: \(aqDocuments 3 (O3)\(aq, \(aqcodocu4.o24\(aq: \(aqDocuments 4 (O2/O4)\(aq, \(aqcoeluc1\(aq: \(aqHonorius of Autun, Elucidarium 1\(aq, \(aqcoeluc2\(aq: \(aqHonorius of Autun, Elucidarium 1\(aq, \(aqcoepigen.o3\(aq: "Ælfric\(aqs Epilogue to Genesis", \(aqcoeuphr\(aq: \(aqSaint Euphrosyne\(aq, \(aqcoeust\(aq: \(aqSaint Eustace and his companions\(aq, \(aqcoexodusP\(aq: \(aqExodus (P)\(aq, \(aqcogenesiC\(aq: \(aqGenesis (C)\(aq, \(aqcogregdC.o24\(aq: "Gregory\(aqs Dialogues (C)", \(aqcogregdH.o23\(aq: "Gregory\(aqs Dialogues (H)", \(aqcoherbar\(aq: \(aqPseudo\-Apuleius, Herbarium\(aq, \(aqcoinspolD.o34\(aq: "Wulfstan\(aqs Institute of Polity (D)", \(aqcoinspolX\(aq: "Wulfstan\(aqs Institute of Polity (X)", \(aqcojames\(aq: \(aqSaint James\(aq, \(aqcolacnu.o23\(aq: \(aqLacnunga\(aq, \(aqcolaece.o2\(aq: \(aqLeechdoms\(aq, \(aqcolaw1cn.o3\(aq: \(aqLaws, Cnut I\(aq, \(aqcolaw2cn.o3\(aq: \(aqLaws, Cnut II\(aq, \(aqcolaw5atr.o3\(aq: \(aqLaws, Æthelred V\(aq, \(aqcolaw6atr.o3\(aq: \(aqLaws, Æthelred VI\(aq, \(aqcolawaf.o2\(aq: \(aqLaws, Alfred\(aq, \(aqcolawafint.o2\(aq: "Alfred\(aqs Introduction to Laws", \(aqcolawger.o34\(aq: \(aqLaws, Gerefa\(aq, \(aqcolawine.ox2\(aq: \(aqLaws, Ine\(aq, \(aqcolawnorthu.o3\(aq: \(aqNorthumbra Preosta Lagu\(aq, \(aqcolawwllad.o4\(aq: \(aqLaws, William I, Lad\(aq, \(aqcoleofri.o4\(aq: \(aqLeofric\(aq, \(aqcolsigef.o3\(aq: "Ælfric\(aqs Letter to Sigefyrth", \(aqcolsigewB\(aq: "Ælfric\(aqs Letter to Sigeweard (B)", \(aqcolsigewZ.o34\(aq: "Ælfric\(aqs Letter to Sigeweard (Z)", \(aqcolwgeat\(aq: "Ælfric\(aqs Letter to Wulfgeat", \(aqcolwsigeT\(aq: "Ælfric\(aqs Letter to Wulfsige (T)", \(aqcolwsigeXa.o34\(aq: "Ælfric\(aqs Letter to Wulfsige (Xa)", \(aqcolwstan1.o3\(aq: "Ælfric\(aqs Letter to Wulfstan I", \(aqcolwstan2.o3\(aq: "Ælfric\(aqs Letter to Wulfstan II", \(aqcomargaC.o34\(aq: \(aqSaint Margaret (C)\(aq, \(aqcomargaT\(aq: \(aqSaint Margaret (T)\(aq, \(aqcomart1\(aq: \(aqMartyrology, I\(aq, \(aqcomart2\(aq: \(aqMartyrology, II\(aq, \(aqcomart3.o23\(aq: \(aqMartyrology, III\(aq, \(aqcomarvel.o23\(aq: \(aqMarvels of the East\(aq, \(aqcomary\(aq: \(aqMary of Egypt\(aq, \(aqconeot\(aq: \(aqSaint Neot\(aq, \(aqconicodA\(aq: \(aqGospel of Nicodemus (A)\(aq, \(aqconicodC\(aq: \(aqGospel of Nicodemus (C)\(aq, \(aqconicodD\(aq: \(aqGospel of Nicodemus (D)\(aq, \(aqconicodE\(aq: \(aqGospel of Nicodemus (E)\(aq, \(aqcoorosiu.o2\(aq: \(aqOrosius\(aq, \(aqcootest.o3\(aq: \(aqHeptateuch\(aq, \(aqcoprefcath1.o3\(aq: "Ælfric\(aqs Preface to Catholic Homilies I", \(aqcoprefcath2.o3\(aq: "Ælfric\(aqs Preface to Catholic Homilies II", \(aqcoprefcura.o2\(aq: \(aqPreface to the Cura Pastoralis\(aq, \(aqcoprefgen.o3\(aq: "Ælfric\(aqs Preface to Genesis", \(aqcopreflives.o3\(aq: "Ælfric\(aqs Preface to Lives of Saints", \(aqcoprefsolilo\(aq: "Preface to Augustine\(aqs Soliloquies", \(aqcoquadru.o23\(aq: \(aqPseudo\-Apuleius, Medicina de quadrupedibus\(aq, \(aqcorood\(aq: \(aqHistory of the Holy Rood\-Tree\(aq, \(aqcosevensl\(aq: \(aqSeven Sleepers\(aq, \(aqcosolilo\(aq: "St. Augustine\(aqs Soliloquies", \(aqcosolsat1.o4\(aq: \(aqSolomon and Saturn I\(aq, \(aqcosolsat2\(aq: \(aqSolomon and Saturn II\(aq, \(aqcotempo.o3\(aq: "Ælfric\(aqs De Temporibus Anni", \(aqcoverhom\(aq: \(aqVercelli Homilies\(aq, \(aqcoverhomE\(aq: \(aqVercelli Homilies (E)\(aq, \(aqcoverhomL\(aq: \(aqVercelli Homilies (L)\(aq, \(aqcovinceB\(aq: \(aqSaint Vincent (Bodley 343)\(aq, \(aqcovinsal\(aq: \(aqVindicta Salvatoris\(aq, \(aqcowsgosp.o3\(aq: \(aqWest\-Saxon Gospels\(aq, \(aqcowulf.o34\(aq: "Wulfstan\(aqs Homilies"} +A list of all documents and their titles in ycoe. +.UNINDENT +.SS Module contents +.sp +NLTK corpus readers. The modules in this package provide functions +that can be used to read corpus fileids in a variety of formats. These +functions can be used to read both the corpus fileids that are +distributed in the NLTK corpus package, and corpus fileids that are part +of external corpora. +.SS Corpus Reader Functions +.sp +Each corpus module defines one or more "corpus reader functions", +which can be used to read documents from that corpus. These functions +take an argument, \fBitem\fP, which is used to indicate which document +should be read from the corpus: +.INDENT 0.0 +.IP \(bu 2 +If \fBitem\fP is one of the unique identifiers listed in the corpus +module\(aqs \fBitems\fP variable, then the corresponding document will +be loaded from the NLTK corpus package. +.IP \(bu 2 +If \fBitem\fP is a fileid, then that file will be read. +.UNINDENT +.sp +Additionally, corpus reader functions can be given lists of item +names; in which case, they will return a concatenation of the +corresponding documents. +.sp +Corpus reader functions are named based on the type of information +they return. Some common examples, and their return types, are: +.INDENT 0.0 +.IP \(bu 2 +words(): list of str +.IP \(bu 2 +sents(): list of (list of str) +.IP \(bu 2 +paras(): list of (list of (list of str)) +.IP \(bu 2 +tagged_words(): list of (str,str) tuple +.IP \(bu 2 +tagged_sents(): list of (list of (str,str)) +.IP \(bu 2 +tagged_paras(): list of (list of (list of (str,str))) +.IP \(bu 2 +chunked_sents(): list of (Tree w/ (str,str) leaves) +.IP \(bu 2 +parsed_sents(): list of (Tree with str leaves) +.IP \(bu 2 +parsed_paras(): list of (list of (Tree with str leaves)) +.IP \(bu 2 +xml(): A single xml ElementTree +.IP \(bu 2 +raw(): unprocessed corpus contents +.UNINDENT +.sp +For example, to read a list of the words in the Brown Corpus, use +\fBnltk.corpus.brown.words()\fP: +.sp +.nf +.ft C +>>> from nltk.corpus import brown +>>> print(", ".join(brown.words())) +The, Fulton, County, Grand, Jury, said, ... +.ft P +.fi +.sp +isort:skip_file +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.AlignedCorpusReader(root, fileids, sep=\(aq/\(aq, word_tokenizer=WhitespaceTokenizer(pattern=\(aq\e\es+\(aq, gaps=True, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), sent_tokenizer=RegexpTokenizer(pattern=\(aq\en\(aq, gaps=True, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), alignedsent_block_reader=, encoding=\(aqlatin1\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for corpora of word\-aligned sentences. Tokens are assumed +to be separated by whitespace. Sentences begin on separate lines. +.INDENT 7.0 +.TP +.B aligned_sents(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of AlignedSent objects. +.TP +.B Return type +list(AlignedSent) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences or utterances, each encoded as a list of word +strings. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of words +and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.AlpinoCorpusReader(root, encoding=\(aqISO\-8859\-1\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.bracket_parse.BracketParseCorpusReader\fP +.sp +Reader for the Alpino Dutch Treebank. +This corpus has a lexical breakdown structure embedded, as read by _parse +Unfortunately this puts punctuation and some other words out of the sentence +order in the xml element tree. This is no good for +.nf +tag_ +.fi + and +.nf +word_ +.fi + +_tag and _word will be overridden to use a non\-default new parameter \(aqordered\(aq +to the overridden _normalize function. The _parse function can then remain +untouched. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.BNCCorpusReader(root, fileids, lazy=True) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.sp +Corpus reader for the XML version of the British National Corpus. +.sp +For access to the complete XML data structure, use the \fBxml()\fP +method. For access to simple word lists and tagged word lists, use +\fBwords()\fP, \fBsents()\fP, \fBtagged_words()\fP, and \fBtagged_sents()\fP\&. +.sp +You can obtain the full version of the BNC corpus at +\fI\%http://www.ota.ox.ac.uk/desc/2554\fP +.sp +If you extracted the archive to a directory called \fIBNC\fP, then you can +instantiate the reader as: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +BNCCorpusReader(root=\(aqBNC/Texts/\(aq, fileids=r\(aq[A\-K]/\ew*/\ew*\e.xml\(aq) +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, strip_space=True, stem=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences or utterances, each encoded as a list of word +strings. +.TP +.B Return type +list(list(str)) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from +word tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, c5=False, strip_space=True, stem=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences, each encoded as a list of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(tuple(str,str))) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBc5\fP \-\- If true, then the tags used will be the more detailed +c5 tags. Otherwise, the simplified tags will be used. +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from +word tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, c5=False, strip_space=True, stem=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of tagged +words and punctuation symbols, encoded as tuples +\fB(word,tag)\fP\&. +.TP +.B Return type +list(tuple(str,str)) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBc5\fP \-\- If true, then the tags used will be the more detailed +c5 tags. Otherwise, the simplified tags will be used. +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from +word tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, strip_space=True, stem=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of words +and punctuation symbols. +.TP +.B Return type +list(str) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from +word tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.BracketParseCorpusReader(root, fileids, comment_char=None, detect_blocks=\(aqunindented_paren\(aq, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.SyntaxCorpusReader\fP +.sp +Reader for corpora that consist of parenthesis\-delineated parse trees, +like those found in the "combined" section of the Penn Treebank, +e.g. "(S (NP (DT the) (JJ little) (NN dog)) (VP (VBD barked)))". +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.CHILDESCorpusReader(root, fileids, lazy=True) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.sp +Corpus reader for the XML version of the CHILDES corpus. +The CHILDES corpus is available at \fBhttps://childes.talkbank.org/\fP\&. The XML +version of CHILDES is located at \fBhttps://childes.talkbank.org/data\-xml/\fP\&. +Copy the needed parts of the CHILDES XML corpus into the NLTK data directory +(\fBnltk_data/corpora/CHILDES/\fP). +.sp +For access to the file text use the usual nltk functions, +\fBwords()\fP, \fBsents()\fP, \fBtagged_words()\fP and \fBtagged_sents()\fP\&. +.INDENT 7.0 +.TP +.B MLU(fileids=None, speaker=\(aqCHI\(aq) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a floating number +.TP +.B Return type +list(float) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B age(fileids=None, speaker=\(aqCHI\(aq, month=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as string or int +.TP +.B Return type +list or int +.TP +.B Parameters +\fBmonth\fP \-\- If true, return months instead of year\-month\-date +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B childes_url_base = \(aqhttps://childes.talkbank.org/browser/index.php?url=\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B convert_age(age_year) +Caclculate age in months from a string in CHILDES format +.UNINDENT +.INDENT 7.0 +.TP +.B corpus(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a dict of \fB(corpus_property_key, value)\fP +.TP +.B Return type +list(dict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B participants(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a dict of +\fB(participant_property_key, value)\fP +.TP +.B Return type +list(dict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, speaker=\(aqALL\(aq, stem=False, relation=None, strip_space=True, replace=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of sentences or utterances, each +encoded as a list of word strings. +.TP +.B Return type +list(list(str)) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBspeaker\fP \-\- If specified, select specific speaker(s) defined +in the corpus. Default is \(aqALL\(aq (all participants). Common choices +are \(aqCHI\(aq (the child), \(aqMOT\(aq (mother), [\(aqCHI\(aq,\(aqMOT\(aq] (exclude +researchers) +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.IP \(bu 2 +\fBrelation\fP \-\- If true, then return tuples of \fB(str,pos,relation_list)\fP\&. +If there is manually\-annotated relation info, it will return +tuples of \fB(str,pos,test_relation_list,str,pos,gold_relation_list)\fP +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from word +tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBreplace\fP \-\- If true, then use the replaced (intended) word instead +of the original word (e.g., \(aqwat\(aq will be replaced with \(aqwatch\(aq) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, speaker=\(aqALL\(aq, stem=False, relation=None, strip_space=True, replace=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences, each encoded as a list of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(tuple(str,str))) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBspeaker\fP \-\- If specified, select specific speaker(s) defined +in the corpus. Default is \(aqALL\(aq (all participants). Common choices +are \(aqCHI\(aq (the child), \(aqMOT\(aq (mother), [\(aqCHI\(aq,\(aqMOT\(aq] (exclude +researchers) +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.IP \(bu 2 +\fBrelation\fP \-\- If true, then return tuples of \fB(str,pos,relation_list)\fP\&. +If there is manually\-annotated relation info, it will return +tuples of \fB(str,pos,test_relation_list,str,pos,gold_relation_list)\fP +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from word +tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBreplace\fP \-\- If true, then use the replaced (intended) word instead +of the original word (e.g., \(aqwat\(aq will be replaced with \(aqwatch\(aq) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, speaker=\(aqALL\(aq, stem=False, relation=False, strip_space=True, replace=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of tagged +words and punctuation symbols, encoded as tuples +\fB(word,tag)\fP\&. +.TP +.B Return type +list(tuple(str,str)) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBspeaker\fP \-\- If specified, select specific speaker(s) defined +in the corpus. Default is \(aqALL\(aq (all participants). Common choices +are \(aqCHI\(aq (the child), \(aqMOT\(aq (mother), [\(aqCHI\(aq,\(aqMOT\(aq] (exclude +researchers) +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.IP \(bu 2 +\fBrelation\fP \-\- If true, then return tuples of (stem, index, +dependent_index) +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from word +tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBreplace\fP \-\- If true, then use the replaced (intended) word instead +of the original word (e.g., \(aqwat\(aq will be replaced with \(aqwatch\(aq) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B webview_file(fileid, urlbase=None) +Map a corpus file to its web version on the CHILDES website, +and open it in a web browser. +.INDENT 7.0 +.TP +.B The complete URL to be used is: +childes.childes_url_base + urlbase + fileid.replace(\(aq.xml\(aq, \(aq.cha\(aq) +.UNINDENT +.sp +If no urlbase is passed, we try to calculate it. This +requires that the childes corpus was set up to mirror the +folder hierarchy under childes.psy.cmu.edu/data\-xml/, e.g.: +nltk_data/corpora/childes/Eng\-USA/Cornell/??? or +nltk_data/corpora/childes/Romance/Spanish/Aguirre/??? +.sp +The function first looks (as a special case) if "Eng\-USA" is +on the path consisting of +fileid; then if +"childes", possibly followed by "data\-xml", appears. If neither +one is found, we use the unmodified fileid and hope for the best. +If this is not right, specify urlbase explicitly, e.g., if the +corpus root points to the Cornell folder, urlbase=\(aqEng\-USA/Cornell\(aq. +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, speaker=\(aqALL\(aq, stem=False, relation=False, strip_space=True, replace=False) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of words +.TP +.B Return type +list(str) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBspeaker\fP \-\- If specified, select specific speaker(s) defined +in the corpus. Default is \(aqALL\(aq (all participants). Common choices +are \(aqCHI\(aq (the child), \(aqMOT\(aq (mother), [\(aqCHI\(aq,\(aqMOT\(aq] (exclude +researchers) +.IP \(bu 2 +\fBstem\fP \-\- If true, then use word stems instead of word strings. +.IP \(bu 2 +\fBrelation\fP \-\- If true, then return tuples of (stem, index, +dependent_index) +.IP \(bu 2 +\fBstrip_space\fP \-\- If true, then strip trailing spaces from word +tokens. Otherwise, leave the spaces on the tokens. +.IP \(bu 2 +\fBreplace\fP \-\- If true, then use the replaced (intended) word instead +of the original word (e.g., \(aqwat\(aq will be replaced with \(aqwatch\(aq) +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.CMUDictCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B dict() +.INDENT 7.0 +.TP +.B Returns +the cmudict lexicon as a dictionary, whose keys are +.UNINDENT +.sp +lowercase words and whose values are lists of pronunciations. +.UNINDENT +.INDENT 7.0 +.TP +.B entries() +.INDENT 7.0 +.TP +.B Returns +the cmudict lexicon as a list of entries +.UNINDENT +.sp +containing (word, transcriptions) tuples. +.UNINDENT +.INDENT 7.0 +.TP +.B words() +.INDENT 7.0 +.TP +.B Returns +a list of all words defined in the cmudict lexicon. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.CategorizedBracketParseCorpusReader(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.api.CategorizedCorpusReader\fP, \fI\%nltk.corpus.reader.bracket_parse.BracketParseCorpusReader\fP +.sp +A reader for parsed corpora whose documents are +divided into categories based on their file identifiers. +@author: Nathan Schneider <\fI\%nschneid@cs.cmu.edu\fP> +.INDENT 7.0 +.TP +.B parsed_paras(fileids=None, categories=None) +.UNINDENT +.INDENT 7.0 +.TP +.B parsed_sents(fileids=None, categories=None) +.UNINDENT +.INDENT 7.0 +.TP +.B parsed_words(fileids=None, categories=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None, categories=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, categories=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, categories=None, tagset=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.CategorizedCorpusReader(kwargs) +Bases: \fBobject\fP +.sp +A mixin class used to aid in the implementation of corpus readers +for categorized corpora. This class defines the method +\fBcategories()\fP, which returns a list of the categories for the +corpus or for a specified set of fileids; and overrides \fBfileids()\fP +to take a \fBcategories\fP argument, restricting the set of fileids to +be returned. +.sp +Subclasses are expected to: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +Call \fB__init__()\fP to set up the mapping. +.IP \(bu 2 +Override all view methods to accept a \fBcategories\fP parameter, +which can be used \fIinstead\fP of the \fBfileids\fP parameter, to +select which fileids should be included in the returned view. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B categories(fileids=None) +Return a list of the categories that are defined for this corpus, +or for the file(s) if it is given. +.UNINDENT +.INDENT 7.0 +.TP +.B fileids(categories=None) +Return a list of file identifiers for the files that make up +this corpus, or that make up the given category(s) if specified. +.UNINDENT +.INDENT 7.0 +.TP +.B paras(fileids=None, categories=None) +.UNINDENT +.INDENT 7.0 +.TP +.B raw(fileids=None, categories=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, categories=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, categories=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.CategorizedPlaintextCorpusReader(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.api.CategorizedCorpusReader\fP, \fI\%nltk.corpus.reader.plaintext.PlaintextCorpusReader\fP +.sp +A reader for plaintext corpora whose documents are divided into +categories based on their file identifiers. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.CategorizedSentencesCorpusReader(root, fileids, word_tokenizer=WhitespaceTokenizer(pattern=\(aq\e\es+\(aq, gaps=True, discard_empty=True, flags=re.UNICODE | re.MULTILINE | re.DOTALL), sent_tokenizer=None, encoding=\(aqutf8\(aq, **kwargs) +Bases: \fI\%nltk.corpus.reader.api.CategorizedCorpusReader\fP, \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +A reader for corpora in which each row represents a single instance, mainly +a sentence. Istances are divided into categories based on their file identifiers +(see CategorizedCorpusReader). +Since many corpora allow rows that contain more than one sentence, it is +possible to specify a sentence tokenizer to retrieve all sentences instead +than all rows. +.sp +Examples using the Subjectivity Dataset: +.sp +.nf +.ft C +>>> from nltk.corpus import subjectivity +>>> subjectivity.sents()[23] +[\(aqtelevision\(aq, \(aqmade\(aq, \(aqhim\(aq, \(aqfamous\(aq, \(aq,\(aq, \(aqbut\(aq, \(aqhis\(aq, \(aqbiggest\(aq, \(aqhits\(aq, +\(aqhappened\(aq, \(aqoff\(aq, \(aqscreen\(aq, \(aq.\(aq] +>>> subjectivity.categories() +[\(aqobj\(aq, \(aqsubj\(aq] +>>> subjectivity.words(categories=\(aqsubj\(aq) +[\(aqsmart\(aq, \(aqand\(aq, \(aqalert\(aq, \(aq,\(aq, \(aqthirteen\(aq, ...] +.ft P +.fi +.sp +Examples using the Sentence Polarity Dataset: +.sp +.nf +.ft C +>>> from nltk.corpus import sentence_polarity +>>> sentence_polarity.sents() +[[\(aqsimplistic\(aq, \(aq,\(aq, \(aqsilly\(aq, \(aqand\(aq, \(aqtedious\(aq, \(aq.\(aq], ["it\(aqs", \(aqso\(aq, \(aqladdish\(aq, +\(aqand\(aq, \(aqjuvenile\(aq, \(aq,\(aq, \(aqonly\(aq, \(aqteenage\(aq, \(aqboys\(aq, \(aqcould\(aq, \(aqpossibly\(aq, \(aqfind\(aq, +\(aqit\(aq, \(aqfunny\(aq, \(aq.\(aq], ...] +>>> sentence_polarity.categories() +[\(aqneg\(aq, \(aqpos\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B CorpusView +alias of \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, categories=None) +Return all sentences in the corpus or in the specified file(s). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +sentences have to be returned. +.IP \(bu 2 +\fBcategories\fP \-\- a list specifying the categories whose sentences have +to be returned. +.UNINDENT +.TP +.B Returns +the given file(s) as a list of sentences. +Each sentence is tokenized using the specified word_tokenizer. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, categories=None) +Return all words and punctuation symbols in the corpus or in the specified +file(s). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +words have to be returned. +.IP \(bu 2 +\fBcategories\fP \-\- a list specifying the categories whose words have to +be returned. +.UNINDENT +.TP +.B Returns +the given file(s) as a list of words and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.CategorizedTaggedCorpusReader(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.api.CategorizedCorpusReader\fP, \fI\%nltk.corpus.reader.tagged.TaggedCorpusReader\fP +.sp +A reader for part\-of\-speech tagged corpora whose documents are +divided into categories based on their file identifiers. +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None, categories=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(list(tuple(str,str)))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, categories=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences, each encoded as a list of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(tuple(str,str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, categories=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of tagged +words and punctuation symbols, encoded as tuples +\fB(word,tag)\fP\&. +.TP +.B Return type +list(tuple(str,str)) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ChasenCorpusReader(root, fileids, encoding=\(aqutf8\(aq, sent_splitter=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B paras(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ChunkedCorpusReader(root, fileids, extension=\(aq\(aq, str2chunktree=, sent_tokenizer=RegexpTokenizer(pattern=\(aq\en\(aq, gaps=True, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), para_block_reader=, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for chunked (and optionally tagged) corpora. Paragraphs +are split using a block reader. They are then tokenized into +sentences using a sentence tokenizer. Finally, these sentences +are parsed into chunk trees using a string\-to\-chunktree conversion +function. Each of these steps can be performed using a default +function or a custom function. By default, paragraphs are split +on blank lines; sentences are listed one per line; and sentences +are parsed into chunk trees using \fBnltk.chunk.tagstr2tree\fP\&. +.INDENT 7.0 +.TP +.B chunked_paras(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as a shallow Tree. The leaves of these +trees are encoded as \fB(word, tag)\fP tuples (if the corpus +has tags) or word strings (if the corpus has no tags). +.TP +.B Return type +list(list(Tree)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B chunked_sents(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences, each encoded as a shallow Tree. The leaves +of these trees are encoded as \fB(word, tag)\fP tuples (if +the corpus has tags) or word strings (if the corpus has no +tags). +.TP +.B Return type +list(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B chunked_words(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of tagged +words and chunks. Words are encoded as \fB(word, tag)\fP +tuples (if the corpus has tags) or word strings (if the +corpus has no tags). Chunks are encoded as depth\-one +trees over \fB(word,tag)\fP tuples or word strings. +.TP +.B Return type +list(tuple(str,str) and Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B paras(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of word strings. +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences or utterances, each encoded as a list of word +strings. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(list(tuple(str,str)))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences, each encoded as a list of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(tuple(str,str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of tagged +words and punctuation symbols, encoded as tuples +\fB(word,tag)\fP\&. +.TP +.B Return type +list(tuple(str,str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of words +and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ComparativeSentencesCorpusReader(root, fileids, word_tokenizer=WhitespaceTokenizer(pattern=\(aq\e\es+\(aq, gaps=True, discard_empty=True, flags=re.UNICODE | re.MULTILINE | re.DOTALL), sent_tokenizer=None, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for the Comparative Sentence Dataset by Jindal and Liu (2006). +.sp +.nf +.ft C +>>> from nltk.corpus import comparative_sentences +>>> comparison = comparative_sentences.comparisons()[0] +>>> comparison.text +[\(aqits\(aq, \(aqfast\-forward\(aq, \(aqand\(aq, \(aqrewind\(aq, \(aqwork\(aq, \(aqmuch\(aq, \(aqmore\(aq, \(aqsmoothly\(aq, +\(aqand\(aq, \(aqconsistently\(aq, \(aqthan\(aq, \(aqthose\(aq, \(aqof\(aq, \(aqother\(aq, \(aqmodels\(aq, \(aqi\(aq, "\(aqve", +\(aqhad\(aq, \(aq.\(aq] +>>> comparison.entity_2 +\(aqmodels\(aq +>>> (comparison.feature, comparison.keyword) +(\(aqrewind\(aq, \(aqmore\(aq) +>>> len(comparative_sentences.comparisons()) +853 +.ft P +.fi +.INDENT 7.0 +.TP +.B CorpusView +alias of \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.UNINDENT +.INDENT 7.0 +.TP +.B comparisons(fileids=None) +Return all comparisons in the corpus. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +comparisons have to be returned. +.TP +.B Returns +the given file(s) as a list of Comparison objects. +.TP +.B Return type +list(Comparison) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B keywords(fileids=None) +Return a set of all keywords used in the corpus. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +keywords have to be returned. +.TP +.B Returns +the set of keywords and comparative phrases used in the corpus. +.TP +.B Return type +set(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B keywords_readme() +Return the list of words and constituents considered as clues of a +comparison (from listOfkeywords.txt). +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +Return all sentences in the corpus. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +sentences have to be returned. +.TP +.B Returns +all sentences of the corpus as lists of tokens (or as plain +strings, if no word tokenizer is specified). +.TP +.B Return type +list(list(str)) or list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +Return all words and punctuation symbols in the corpus. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +words have to be returned. +.TP +.B Returns +the given file(s) as a list of words and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ConllChunkCorpusReader(root, fileids, chunk_types, encoding=\(aqutf8\(aq, tagset=None, separator=None) +Bases: \fI\%nltk.corpus.reader.conll.ConllCorpusReader\fP +.sp +A ConllCorpusReader whose data file contains three columns: words, +pos, and chunk. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ConllCorpusReader(root, fileids, columntypes, chunk_types=None, root_label=\(aqS\(aq, pos_in_tree=False, srl_includes_roleset=True, encoding=\(aqutf8\(aq, tree_class=, tagset=None, separator=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +A corpus reader for CoNLL\-style files. These files consist of a +series of sentences, separated by blank lines. Each sentence is +encoded using a table (or "grid") of values, where each line +corresponds to a single word, and each column corresponds to an +annotation type. The set of columns used by CoNLL\-style files can +vary from corpus to corpus; the \fBConllCorpusReader\fP constructor +therefore takes an argument, \fBcolumntypes\fP, which is used to +specify the columns that are used by a given corpus. By default +columns are split by consecutive whitespaces, with the +\fBseparator\fP argument you can set a string to split by (e.g. +\fB\(aq \(aq\fP). +.INDENT 7.0 +.TP +.B @todo: Add support for reading from corpora where different +parallel files contain different columns. +.TP +.B @todo: Possibly add caching of the grid corpus view? This would +allow the same grid view to be used by different data access +methods (eg words() and parsed_sents() could both share the +same grid corpus view object). +.TP +.B @todo: Better support for \-DOCSTART\-. Currently, we just ignore +it, but it could be used to define methods that retrieve a +document at a time (eg parsed_documents()). +.UNINDENT +.INDENT 7.0 +.TP +.B CHUNK = \(aqchunk\(aq +column type for chunk structures +.UNINDENT +.INDENT 7.0 +.TP +.B COLUMN_TYPES = (\(aqwords\(aq, \(aqpos\(aq, \(aqtree\(aq, \(aqchunk\(aq, \(aqne\(aq, \(aqsrl\(aq, \(aqignore\(aq) +A list of all column types supported by the conll corpus reader. +.UNINDENT +.INDENT 7.0 +.TP +.B IGNORE = \(aqignore\(aq +column type for column that should be ignored +.UNINDENT +.INDENT 7.0 +.TP +.B NE = \(aqne\(aq +column type for named entities +.UNINDENT +.INDENT 7.0 +.TP +.B POS = \(aqpos\(aq +column type for part\-of\-speech tags +.UNINDENT +.INDENT 7.0 +.TP +.B SRL = \(aqsrl\(aq +column type for semantic role labels +.UNINDENT +.INDENT 7.0 +.TP +.B TREE = \(aqtree\(aq +column type for parse trees +.UNINDENT +.INDENT 7.0 +.TP +.B WORDS = \(aqwords\(aq +column type for words +.UNINDENT +.INDENT 7.0 +.TP +.B chunked_sents(fileids=None, chunk_types=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B chunked_words(fileids=None, chunk_types=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B iob_sents(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +a list of lists of word/tag/IOB tuples +.TP +.B Return type +list(list) +.TP +.B Parameters +\fBfileids\fP (\fINone\fP\fI or \fP\fIstr\fP\fI or \fP\fIlist\fP) \-\- the list of fileids that make up this corpus +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B iob_words(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +a list of word/tag/IOB tuples +.TP +.B Return type +list(tuple) +.TP +.B Parameters +\fBfileids\fP (\fINone\fP\fI or \fP\fIstr\fP\fI or \fP\fIlist\fP) \-\- the list of fileids that make up this corpus +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parsed_sents(fileids=None, pos_in_tree=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B srl_instances(fileids=None, pos_in_tree=None, flatten=True) +.UNINDENT +.INDENT 7.0 +.TP +.B srl_spans(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.CorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fBobject\fP +.sp +A base class for "corpus reader" classes, each of which can be +used to read a specific corpus format. Each individual corpus +reader instance is used to read a specific corpus, consisting of +one or more files under a common root directory. Each file is +identified by its \fBfile identifier\fP, which is the relative path +to the file from the root directory. +.sp +A separate subclass is defined for each corpus format. These +subclasses define one or more methods that provide \(aqviews\(aq on the +corpus contents, such as \fBwords()\fP (for a list of words) and +\fBparsed_sents()\fP (for a list of parsed sentences). Called with +no arguments, these methods will return the contents of the entire +corpus. For most corpora, these methods define one or more +selection arguments, such as \fBfileids\fP or \fBcategories\fP, which can +be used to select which portion of the corpus should be returned. +.INDENT 7.0 +.TP +.B abspath(fileid) +Return the absolute path for the given file. +.INDENT 7.0 +.TP +.B Parameters +\fBfileid\fP (\fIstr\fP) \-\- The file identifier for the file whose path +should be returned. +.TP +.B Return type +PathPointer +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abspaths(fileids=None, include_encoding=False, include_fileid=False) +Return a list of the absolute paths for all fileids in this corpus; +or for the given list of fileids, if specified. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP (\fINone\fP\fI or \fP\fIstr\fP\fI or \fP\fIlist\fP) \-\- Specifies the set of fileids for which paths should +be returned. Can be None, for all fileids; a list of +file identifiers, for a specified set of fileids; or a single +file identifier, for a single file. Note that the return +value is always a list of paths, even if \fBfileids\fP is a +single file identifier. +.IP \(bu 2 +\fBinclude_encoding\fP \-\- If true, then return a list of +\fB(path_pointer, encoding)\fP tuples. +.UNINDENT +.TP +.B Return type +list(PathPointer) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B citation() +Return the contents of the corpus citation.bib file, if it exists. +.UNINDENT +.INDENT 7.0 +.TP +.B encoding(file) +Return the unicode encoding for the given corpus file, if known. +If the encoding is unknown, or if the given file should be +processed using byte strings (str), then return None. +.UNINDENT +.INDENT 7.0 +.TP +.B ensure_loaded() +Load this corpus (if it has not already been loaded). This is +used by LazyCorpusLoader as a simple method that can be used to +make sure a corpus is loaded \-\- e.g., in case a user wants to +do help(some_corpus). +.UNINDENT +.INDENT 7.0 +.TP +.B fileids() +Return a list of file identifiers for the fileids that make up +this corpus. +.UNINDENT +.INDENT 7.0 +.TP +.B license() +Return the contents of the corpus LICENSE file, if it exists. +.UNINDENT +.INDENT 7.0 +.TP +.B open(file) +Return an open stream that can be used to read the given file. +If the file\(aqs encoding is not None, then the stream will +automatically decode the file\(aqs contents into unicode. +.INDENT 7.0 +.TP +.B Parameters +\fBfile\fP \-\- The file identifier of the file to read. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B raw(fileids=None) +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.TP +.B Returns +the given file(s) as a single string. +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B readme() +Return the contents of the corpus README file, if it exists. +.UNINDENT +.INDENT 7.0 +.TP +.B property root +The directory where this corpus is stored. +.INDENT 7.0 +.TP +.B Type +PathPointer +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.CrubadanCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +A corpus reader used to access language An Crubadan n\-gram files. +.INDENT 7.0 +.TP +.B crubadan_to_iso(lang) +Return ISO 639\-3 code given internal Crubadan code +.UNINDENT +.INDENT 7.0 +.TP +.B iso_to_crubadan(lang) +Return internal Crubadan code based on ISO 639\-3 code +.UNINDENT +.INDENT 7.0 +.TP +.B lang_freq(lang) +Return n\-gram FreqDist for a specific language +given ISO 639\-3 language code +.UNINDENT +.INDENT 7.0 +.TP +.B langs() +Return a list of supported languages as ISO 639\-3 codes +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.DependencyCorpusReader(root, fileids, encoding=\(aqutf8\(aq, word_tokenizer=, sent_tokenizer=RegexpTokenizer(pattern=\(aq\en\(aq, gaps=True, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), para_block_reader=) +Bases: \fI\%nltk.corpus.reader.api.SyntaxCorpusReader\fP +.INDENT 7.0 +.TP +.B parsed_sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.EuroparlCorpusReader(root, fileids, word_tokenizer=WordPunctTokenizer(pattern=\(aq\e\ew+|[^\e\ew\e\es]+\(aq, gaps=False, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), sent_tokenizer=, para_block_reader=, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.plaintext.PlaintextCorpusReader\fP +.sp +Reader for Europarl corpora that consist of plaintext documents. +Documents are divided into chapters instead of paragraphs as +for regular plaintext documents. Chapters are separated using blank +lines. Everything is inherited from \fBPlaintextCorpusReader\fP except +that: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +Since the corpus is pre\-processed and pre\-tokenized, the +word tokenizer should just split the line at whitespaces. +.IP \(bu 2 +For the same reason, the sentence tokenizer should just +split the paragraph at line breaks. +.IP \(bu 2 +There is a new \(aqchapters()\(aq method that returns chapters instead +instead of paragraphs. +.IP \(bu 2 +The \(aqparas()\(aq method inherited from PlaintextCorpusReader is +made non\-functional to remove any confusion between chapters +and paragraphs for Europarl. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B chapters(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +chapters, each encoded as a list of sentences, which are +in turn encoded as lists of word strings. +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B paras(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of word strings. +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.FramenetCorpusReader(root, fileids) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.sp +A corpus reader for the Framenet Corpus. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> fn.lu(3238).frame.lexUnit[\(aqglint.v\(aq] is fn.lu(3238) +True +>>> fn.frame_by_name(\(aqReplacing\(aq) is fn.lus(\(aqreplace.v\(aq)[0].frame +True +>>> fn.lus(\(aqprejudice.n\(aq)[0].frame.frameRelations == fn.frame_relations(\(aqPartiality\(aq) +True +.ft P +.fi +.INDENT 7.0 +.TP +.B annotations(luNamePattern=None, exemplars=True, full_text=True) +Frame annotation sets matching the specified criteria. +.UNINDENT +.INDENT 7.0 +.TP +.B buildindexes() +Build the internal indexes to make look\-ups faster. +.UNINDENT +.INDENT 7.0 +.TP +.B doc(fn_docid) +Returns the annotated document whose id number is +\fBfn_docid\fP\&. This id number can be obtained by calling the +Documents() function. +.sp +The dict that is returned from this function will contain the +following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aq_type\(aq : \(aqfulltextannotation\(aq +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqsentence\(aq +a list of sentences in the document.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Each item in the list is a dict containing the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqID\(aq : the ID number of the sentence +.IP \(bu 2 +\(aq_type\(aq : \(aqsentence\(aq +.IP \(bu 2 +\(aqtext\(aq : the text of the sentence +.IP \(bu 2 +\(aqparagNo\(aq : the paragraph number +.IP \(bu 2 +\(aqsentNo\(aq : the sentence number +.IP \(bu 2 +\(aqdocID\(aq : the document ID number +.IP \(bu 2 +\(aqcorpID\(aq : the corpus ID number +.IP \(bu 2 +\(aqaPos\(aq : the annotation position +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqannotationSet\(aq +a list of annotation layers for the sentence.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Each item in the list is a dict containing the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqID\(aq : the ID number of the annotation set +.IP \(bu 2 +\(aq_type\(aq : \(aqannotationset\(aq +.IP \(bu 2 +\(aqstatus\(aq : either \(aqMANUAL\(aq or \(aqUNANN\(aq +.IP \(bu 2 +\(aqluName\(aq : (only if status is \(aqMANUAL\(aq) +.IP \(bu 2 +\(aqluID\(aq : (only if status is \(aqMANUAL\(aq) +.IP \(bu 2 +\(aqframeID\(aq : (only if status is \(aqMANUAL\(aq) +.IP \(bu 2 +\(aqframeName\(aq: (only if status is \(aqMANUAL\(aq) +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqlayer\(aq +a list of labels for the layer.INDENT 7.0 +.IP \(bu 2 +Each item in the layer is a dict containing the +following keys: +.INDENT 2.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\(aq_type\(aq: \(aqlayer\(aq +.IP \(bu 2 +\(aqrank\(aq +.IP \(bu 2 +\(aqname\(aq +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqlabel\(aq +a list of labels in the layer.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Each item is a dict containing the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqstart\(aq +.IP \(bu 2 +\(aqend\(aq +.IP \(bu 2 +\(aqname\(aq +.IP \(bu 2 +\(aqfeID\(aq (optional) +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +\fBfn_docid\fP (\fIint\fP) \-\- The Framenet id number of the document +.TP +.B Returns +Information about the annotated document +.TP +.B Return type +dict +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B docs(name=None) +Return a list of the annotated full\-text documents in FrameNet, +optionally filtered by a regex to be matched against the document name. +.UNINDENT +.INDENT 7.0 +.TP +.B docs_metadata(name=None) +Return an index of the annotated documents in Framenet. +.sp +Details for a specific annotated document can be obtained using this +class\(aqs doc() function and pass it the value of the \(aqID\(aq field. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> len(fn.docs()) in (78, 107) # FN 1.5 and 1.7, resp. +True +>>> set([x.corpname for x in fn.docs_metadata()])>=set([\(aqANC\(aq, \(aqKBEval\(aq, \(aqLUCorpus\-v0.3\(aq, \(aqMiscellaneous\(aq, \(aqNTI\(aq, \(aqPropBank\(aq]) +True +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBname\fP (\fIstr\fP) \-\- A regular expression pattern used to search the +file name of each annotated document. The document\(aqs +file name contains the name of the corpus that the +document is from, followed by two underscores "__" +followed by the document name. So, for example, the +file name "LUCorpus\-v0.3__20000410_nyt\-NEW.xml" is +from the corpus named "LUCorpus\-v0.3" and the +document name is "20000410_nyt\-NEW.xml". +.TP +.B Returns +A list of selected (or all) annotated documents +.TP +.B Return type +list of dicts, where each dict object contains the following +keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq +.IP \(bu 2 +\(aqID\(aq +.IP \(bu 2 +\(aqcorpid\(aq +.IP \(bu 2 +\(aqcorpname\(aq +.IP \(bu 2 +\(aqdescription\(aq +.IP \(bu 2 +\(aqfilename\(aq +.UNINDENT + +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B exemplars(luNamePattern=None, frame=None, fe=None, fe2=None) +Lexicographic exemplar sentences, optionally filtered by LU name and/or 1\-2 FEs that +are realized overtly. \(aqframe\(aq may be a name pattern, frame ID, or frame instance. +\(aqfe\(aq may be a name pattern or FE instance; if specified, \(aqfe2\(aq may also +be specified to retrieve sentences with both overt FEs (in either order). +.UNINDENT +.INDENT 7.0 +.TP +.B fe_relations() +Obtain a list of frame element relations. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> ferels = fn.fe_relations() +>>> isinstance(ferels, list) +True +>>> len(ferels) in (10020, 12393) # FN 1.5 and 1.7, resp. +True +>>> PrettyDict(ferels[0], breakLines=True) +{\(aqID\(aq: 14642, +\(aq_type\(aq: \(aqferelation\(aq, +\(aqframeRelation\(aq: Child=Lively_place>, +\(aqsubFE\(aq: , +\(aqsubFEName\(aq: \(aqDegree\(aq, +\(aqsubFrame\(aq: , +\(aqsubID\(aq: 11370, +\(aqsupID\(aq: 2271, +\(aqsuperFE\(aq: , +\(aqsuperFEName\(aq: \(aqDegree\(aq, +\(aqsuperFrame\(aq: , +\(aqtype\(aq: } +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +A list of all of the frame element relations in framenet +.TP +.B Return type +list(dict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B fes(name=None, frame=None) +Lists frame element objects. If \(aqname\(aq is provided, this is treated as +a case\-insensitive regular expression to filter by frame name. +(Case\-insensitivity is because casing of frame element names is not always +consistent across frames.) Specify \(aqframe\(aq to filter by a frame name pattern, +ID, or object. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> fn.fes(\(aqNoise_maker\(aq) +[] +>>> sorted([(fe.frame.name,fe.name) for fe in fn.fes(\(aqsound\(aq)]) +[(\(aqCause_to_make_noise\(aq, \(aqSound_maker\(aq), (\(aqMake_noise\(aq, \(aqSound\(aq), + (\(aqMake_noise\(aq, \(aqSound_source\(aq), (\(aqSound_movement\(aq, \(aqLocation_of_sound_source\(aq), + (\(aqSound_movement\(aq, \(aqSound\(aq), (\(aqSound_movement\(aq, \(aqSound_source\(aq), + (\(aqSounds\(aq, \(aqComponent_sound\(aq), (\(aqSounds\(aq, \(aqLocation_of_sound_source\(aq), + (\(aqSounds\(aq, \(aqSound_source\(aq), (\(aqVocalizations\(aq, \(aqLocation_of_sound_source\(aq), + (\(aqVocalizations\(aq, \(aqSound_source\(aq)] +>>> sorted([(fe.frame.name,fe.name) for fe in fn.fes(\(aqsound\(aq,r\(aq(?i)make_noise\(aq)]) +[(\(aqCause_to_make_noise\(aq, \(aqSound_maker\(aq), + (\(aqMake_noise\(aq, \(aqSound\(aq), + (\(aqMake_noise\(aq, \(aqSound_source\(aq)] +>>> sorted(set(fe.name for fe in fn.fes(\(aq^sound\(aq))) +[\(aqSound\(aq, \(aqSound_maker\(aq, \(aqSound_source\(aq] +>>> len(fn.fes(\(aq^sound$\(aq)) +2 +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBname\fP (\fIstr\fP) \-\- A regular expression pattern used to match against +frame element names. If \(aqname\(aq is None, then a list of all +frame elements will be returned. +.TP +.B Returns +A list of matching frame elements +.TP +.B Return type +list(AttrDict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B frame(fn_fid_or_fname, ignorekeys=[]) +Get the details for the specified Frame using the frame\(aqs name +or id number. +.sp +Usage examples: +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> f = fn.frame(256) +>>> f.name +\(aqMedical_specialties\(aq +>>> f = fn.frame(\(aqMedical_specialties\(aq) +>>> f.ID +256 +>>> # ensure non\-ASCII character in definition doesn\(aqt trigger an encoding error: +>>> fn.frame(\(aqImposing_obligation\(aq) +frame (1494): Imposing_obligation... +.ft P +.fi +.sp +The dict that is returned from this function will contain the +following information about the Frame: +.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq : the name of the Frame (e.g. \(aqBirth\(aq, \(aqApply_heat\(aq, etc.) +.IP \(bu 2 +\(aqdefinition\(aq : textual definition of the Frame +.IP \(bu 2 +\(aqID\(aq : the internal ID number of the Frame +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqsemTypes\(aq +a list of semantic types for this frame.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Each item in the list is a dict containing the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq : can be used with the semtype() function +.IP \(bu 2 +\(aqID\(aq : can be used with the semtype() function +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqlexUnit\(aq +a dict containing all of the LUs for this frame. +The keys in this dict are the names of the LUs and +the value for each key is itself a dict containing +info about the LU (see the lu() function for more info.) +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqFE\(aq +a dict containing the Frame Elements that are part of this frame.INDENT 7.0 +.INDENT 3.5 +The keys in this dict are the names of the FEs (e.g. \(aqBody_system\(aq) +and the values are dicts containing the following keys +.UNINDENT +.UNINDENT +.INDENT 7.0 +.IP \(bu 2 +\(aqdefinition\(aq : The definition of the FE +.IP \(bu 2 +\(aqname\(aq : The name of the FE e.g. \(aqBody_system\(aq +.IP \(bu 2 +\(aqID\(aq : The id number +.IP \(bu 2 +\(aq_type\(aq : \(aqfe\(aq +.IP \(bu 2 +\(aqabbrev\(aq : Abbreviation e.g. \(aqbod\(aq +.IP \(bu 2 +\(aqcoreType\(aq : one of "Core", "Peripheral", or "Extra\-Thematic" +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqsemType\(aq +if not None, a dict with the following two keys:.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqname\(aq +name of the semantic type. can be used with +the semtype() function +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqID\(aq +id number of the semantic type. can be used with +the semtype() function +.UNINDENT +.UNINDENT +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqrequiresFE\(aq +if not None, a dict with the following two keys:.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq : the name of another FE in this frame +.IP \(bu 2 +\(aqID\(aq : the id of the other FE in this frame +.UNINDENT +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqexcludesFE\(aq +if not None, a dict with the following two keys:.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq : the name of another FE in this frame +.IP \(bu 2 +\(aqID\(aq : the id of the other FE in this frame +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.IP \(bu 2 +\(aqframeRelation\(aq : a list of objects describing frame relations +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqFEcoreSets\(aq +a list of Frame Element core sets for this frame.INDENT 7.0 +.IP \(bu 2 +Each item in the list is a list of FE objects +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfn_fid_or_fname\fP (\fIint\fP\fI or \fP\fIstr\fP) \-\- The Framenet name or id number of the frame +.IP \(bu 2 +\fBignorekeys\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The keys to ignore. These keys will not be +included in the output. (optional) +.UNINDENT +.TP +.B Returns +Information about a frame +.TP +.B Return type +dict +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B frame_by_id(fn_fid, ignorekeys=[]) +Get the details for the specified Frame using the frame\(aqs id +number. +.sp +Usage examples: +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> f = fn.frame_by_id(256) +>>> f.ID +256 +>>> f.name +\(aqMedical_specialties\(aq +>>> f.definition +"This frame includes words that name ..." +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfn_fid\fP (\fIint\fP) \-\- The Framenet id number of the frame +.IP \(bu 2 +\fBignorekeys\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The keys to ignore. These keys will not be +included in the output. (optional) +.UNINDENT +.TP +.B Returns +Information about a frame +.TP +.B Return type +dict +.UNINDENT +.sp +Also see the \fBframe()\fP function for details about what is +contained in the dict that is returned. +.UNINDENT +.INDENT 7.0 +.TP +.B frame_by_name(fn_fname, ignorekeys=[], check_cache=True) +Get the details for the specified Frame using the frame\(aqs name. +.sp +Usage examples: +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> f = fn.frame_by_name(\(aqMedical_specialties\(aq) +>>> f.ID +256 +>>> f.name +\(aqMedical_specialties\(aq +>>> f.definition +"This frame includes words that name ..." +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfn_fname\fP (\fIstr\fP) \-\- The name of the frame +.IP \(bu 2 +\fBignorekeys\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The keys to ignore. These keys will not be +included in the output. (optional) +.UNINDENT +.TP +.B Returns +Information about a frame +.TP +.B Return type +dict +.UNINDENT +.sp +Also see the \fBframe()\fP function for details about what is +contained in the dict that is returned. +.UNINDENT +.INDENT 7.0 +.TP +.B frame_ids_and_names(name=None) +Uses the frame index, which is much faster than looking up each frame definition +if only the names and IDs are needed. +.UNINDENT +.INDENT 7.0 +.TP +.B frame_relation_types() +Obtain a list of frame relation types. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> frts = sorted(fn.frame_relation_types(), key=itemgetter(\(aqID\(aq)) +>>> isinstance(frts, list) +True +>>> len(frts) in (9, 10) # FN 1.5 and 1.7, resp. +True +>>> PrettyDict(frts[0], breakLines=True) +{\(aqID\(aq: 1, + \(aq_type\(aq: \(aqframerelationtype\(aq, + \(aqframeRelations\(aq: [ Child=Change_of_consistency>, Child=Rotting>, ...], + \(aqname\(aq: \(aqInheritance\(aq, + \(aqsubFrameName\(aq: \(aqChild\(aq, + \(aqsuperFrameName\(aq: \(aqParent\(aq} +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +A list of all of the frame relation types in framenet +.TP +.B Return type +list(dict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B frame_relations(frame=None, frame2=None, type=None) +.INDENT 7.0 +.TP +.B Parameters +\fBframe\fP \-\- (optional) frame object, name, or ID; only relations involving +.UNINDENT +.sp +this frame will be returned +:param frame2: (optional; \(aqframe\(aq must be a different frame) only show relations +between the two specified frames, in either direction +:param type: (optional) frame relation type (name or object); show only relations +of this type +:type frame: int or str or AttrDict +:return: A list of all of the frame relations in framenet +:rtype: list(dict) +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> frels = fn.frame_relations() +>>> isinstance(frels, list) +True +>>> len(frels) in (1676, 2070) # FN 1.5 and 1.7, resp. +True +>>> PrettyList(fn.frame_relations(\(aqCooking_creation\(aq), maxReprSize=0, breakLines=True) +[ Child=Cooking_creation>, + Child=Cooking_creation>, + ReferringEntry=Cooking_creation>] +>>> PrettyList(fn.frame_relations(274), breakLines=True) +[ Child=Dodging>, + Child=Evading>, ...] +>>> PrettyList(fn.frame_relations(fn.frame(\(aqCooking_creation\(aq)), breakLines=True) +[ Child=Cooking_creation>, + Child=Cooking_creation>, ...] +>>> PrettyList(fn.frame_relations(\(aqCooking_creation\(aq, type=\(aqInheritance\(aq)) +[ Child=Cooking_creation>] +>>> PrettyList(fn.frame_relations(\(aqCooking_creation\(aq, \(aqApply_heat\(aq), breakLines=True) +[ Child=Cooking_creation>, + ReferringEntry=Cooking_creation>] +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B frames(name=None) +Obtain details for a specific frame. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> len(fn.frames()) in (1019, 1221) # FN 1.5 and 1.7, resp. +True +>>> x = PrettyList(fn.frames(r\(aq(?i)crim\(aq), maxReprSize=0, breakLines=True) +>>> x.sort(key=itemgetter(\(aqID\(aq)) +>>> x +[, + , + , + ] +.ft P +.fi +.sp +A brief intro to Frames (excerpted from "FrameNet II: Extended +Theory and Practice" by Ruppenhofer et. al., 2010): +.sp +A Frame is a script\-like conceptual structure that describes a +particular type of situation, object, or event along with the +participants and props that are needed for that Frame. For +example, the "Apply_heat" frame describes a common situation +involving a Cook, some Food, and a Heating_Instrument, and is +evoked by words such as bake, blanch, boil, broil, brown, +simmer, steam, etc. +.sp +We call the roles of a Frame "frame elements" (FEs) and the +frame\-evoking words are called "lexical units" (LUs). +.sp +FrameNet includes relations between Frames. Several types of +relations are defined, of which the most important are: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +Inheritance: An IS\-A relation. The child frame is a subtype +of the parent frame, and each FE in the parent is bound to +a corresponding FE in the child. An example is the +"Revenge" frame which inherits from the +"Rewards_and_punishments" frame. +.IP \(bu 2 +Using: The child frame presupposes the parent frame as +background, e.g the "Speed" frame "uses" (or presupposes) +the "Motion" frame; however, not all parent FEs need to be +bound to child FEs. +.IP \(bu 2 +Subframe: The child frame is a subevent of a complex event +represented by the parent, e.g. the "Criminal_process" frame +has subframes of "Arrest", "Arraignment", "Trial", and +"Sentencing". +.IP \(bu 2 +Perspective_on: The child frame provides a particular +perspective on an un\-perspectivized parent frame. A pair of +examples consists of the "Hiring" and "Get_a_job" frames, +which perspectivize the "Employment_start" frame from the +Employer\(aqs and the Employee\(aqs point of view, respectively. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +\fBname\fP (\fIstr\fP) \-\- A regular expression pattern used to match against +Frame names. If \(aqname\(aq is None, then a list of all +Framenet Frames will be returned. +.TP +.B Returns +A list of matching Frames (or all Frames). +.TP +.B Return type +list(AttrDict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B frames_by_lemma(pat) +Returns a list of all frames that contain LUs in which the +\fBname\fP attribute of the LU matches the given regular expression +\fBpat\fP\&. Note that LU names are composed of "lemma.POS", where +the "lemma" part can be made up of either a single lexeme +(e.g. \(aqrun\(aq) or multiple lexemes (e.g. \(aqa little\(aq). +.sp +Note: if you are going to be doing a lot of this type of +searching, you\(aqd want to build an index that maps from lemmas to +frames because each time frames_by_lemma() is called, it has to +search through ALL of the frame XML files in the db. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> from nltk.corpus.reader.framenet import PrettyList +>>> PrettyList(sorted(fn.frames_by_lemma(r\(aq(?i)a little\(aq), key=itemgetter(\(aqID\(aq))) +[, ] +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +A list of frame objects. +.TP +.B Return type +list(AttrDict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B ft_sents(docNamePattern=None) +Full\-text annotation sentences, optionally filtered by document name. +.UNINDENT +.INDENT 7.0 +.TP +.B help(attrname=None) +Display help information summarizing the main methods. +.UNINDENT +.INDENT 7.0 +.TP +.B lu(fn_luid, ignorekeys=[], luName=None, frameID=None, frameName=None) +Access a lexical unit by its ID. luName, frameID, and frameName are used +only in the event that the LU does not have a file in the database +(which is the case for LUs with "Problem" status); in this case, +a placeholder LU is created which just contains its name, ID, and frame. +.sp +Usage examples: +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> fn.lu(256).name +\(aqforesee.v\(aq +>>> fn.lu(256).definition +\(aqCOD: be aware of beforehand; predict.\(aq +>>> fn.lu(256).frame.name +\(aqExpectation\(aq +>>> pprint(list(map(PrettyDict, fn.lu(256).lexemes))) +[{\(aqPOS\(aq: \(aqV\(aq, \(aqbreakBefore\(aq: \(aqfalse\(aq, \(aqheadword\(aq: \(aqfalse\(aq, \(aqname\(aq: \(aqforesee\(aq, \(aqorder\(aq: 1}] +.ft P +.fi +.sp +.nf +.ft C +>>> fn.lu(227).exemplars[23] +exemplar sentence (352962): +[sentNo] 0 +[aPos] 59699508 + +[LU] (227) guess.v in Coming_to_believe + +[frame] (23) Coming_to_believe + +[annotationSet] 2 annotation sets + +[POS] 18 tags + +[POS_tagset] BNC + +[GF] 3 relations + +[PT] 3 phrases + +[Other] 1 entry + +[text] + [Target] + [FE] + +When he was inside the house , Culley noticed the characteristic + \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- + Content + +he would n\(aqt have guessed at . +\-\- ******* \-\- +Co C1 [Evidence:INI] + (Co=Cognizer, C1=Content) + + +.ft P +.fi +.sp +The dict that is returned from this function will contain most of the +following information about the LU. Note that some LUs do not contain +all of these pieces of information \- particularly \(aqtotalAnnotated\(aq and +\(aqincorporatedFE\(aq may be missing in some LUs: +.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq : the name of the LU (e.g. \(aqmerger.n\(aq) +.IP \(bu 2 +\(aqdefinition\(aq : textual definition of the LU +.IP \(bu 2 +\(aqID\(aq : the internal ID number of the LU +.IP \(bu 2 +\(aq_type\(aq : \(aqlu\(aq +.IP \(bu 2 +\(aqstatus\(aq : e.g. \(aqCreated\(aq +.IP \(bu 2 +\(aqframe\(aq : Frame that this LU belongs to +.IP \(bu 2 +\(aqPOS\(aq : the part of speech of this LU (e.g. \(aqN\(aq) +.IP \(bu 2 +\(aqtotalAnnotated\(aq : total number of examples annotated with this LU +.IP \(bu 2 +\(aqincorporatedFE\(aq : FE that incorporates this LU (e.g. \(aqAilment\(aq) +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqsentenceCount\(aq +a dict with the following two keys:.INDENT 7.0 +.IP \(bu 2 +\(aqannotated\(aq: number of sentences annotated with this LU +.IP \(bu 2 +\(aqtotal\(aq : total number of sentences with this LU +.UNINDENT +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqlexemes\(aq +a list of dicts describing the lemma of this LU. +Each dict in the list contains these keys: +\- \(aqPOS\(aq : part of speech e.g. \(aqN\(aq +\- \(aqname\(aq : either single\-lexeme e.g. \(aqmerger\(aq or +.INDENT 7.0 +.INDENT 3.5 +multi\-lexeme e.g. \(aqa little\(aq +.UNINDENT +.UNINDENT +.INDENT 7.0 +.IP \(bu 2 +\(aqorder\(aq: the order of the lexeme in the lemma (starting from 1) +.IP \(bu 2 +\(aqheadword\(aq: a boolean (\(aqtrue\(aq or \(aqfalse\(aq) +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqbreakBefore\(aq: Can this lexeme be separated from the previous lexeme? +.INDENT 7.0 +.TP +.B Consider: "take over.v" as in: +Germany took over the Netherlands in 2 days. +Germany took the Netherlands over in 2 days. +.UNINDENT +.sp +In this case, \(aqbreakBefore\(aq would be "true" for the lexeme +"over". Contrast this with "take after.v" as in: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.INDENT 3.5 +Mary takes after her grandmother. +.UNINDENT +.UNINDENT +.sp + +.nf +* +.fi +Mary takes her grandmother after. +.UNINDENT +.UNINDENT +.sp +In this case, \(aqbreakBefore\(aq would be "false" for the lexeme "after" +.UNINDENT +.UNINDENT +.UNINDENT +.IP \(bu 2 +\(aqlemmaID\(aq : Can be used to connect lemmas in different LUs +.IP \(bu 2 +\(aqsemTypes\(aq : a list of semantic type objects for this LU +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqsubCorpus\(aq +a list of subcorpora.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Each item in the list is a dict containing the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq : +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqsentence\(aq +a list of sentences in the subcorpus.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B each item in the list is a dict with the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqID\(aq: +.IP \(bu 2 +\(aqsentNo\(aq: +.IP \(bu 2 +\(aqtext\(aq: the text of the sentence +.IP \(bu 2 +\(aqaPos\(aq: +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqannotationSet\(aq: a list of annotation sets +.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B each item in the list is a dict with the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqID\(aq: +.IP \(bu 2 +\(aqstatus\(aq: +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqlayer\(aq: a list of layers +.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B each layer is a dict containing the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqname\(aq: layer name (e.g. \(aqBNC\(aq) +.IP \(bu 2 +\(aqrank\(aq: +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \(aqlabel\(aq: a list of labels for the layer +.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B each label is a dict containing the following keys: +.INDENT 7.0 +.IP \(bu 2 +\(aqstart\(aq: start pos of label in sentence \(aqtext\(aq (0\-based) +.IP \(bu 2 +\(aqend\(aq: end pos of label in sentence \(aqtext\(aq (0\-based) +.IP \(bu 2 +\(aqname\(aq: name of label (e.g. \(aqNN1\(aq) +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.sp +Under the hood, this implementation looks up the lexical unit information +in the \fIframe\fP definition file. That file does not contain +corpus annotations, so the LU files will be accessed on demand if those are +needed. In principle, valence patterns could be loaded here too, +though these are not currently supported. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfn_luid\fP (\fIint\fP) \-\- The id number of the lexical unit +.IP \(bu 2 +\fBignorekeys\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The keys to ignore. These keys will not be +included in the output. (optional) +.UNINDENT +.TP +.B Returns +All information about the lexical unit +.TP +.B Return type +dict +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lu_basic(fn_luid) +Returns basic information about the LU whose id is +\fBfn_luid\fP\&. This is basically just a wrapper around the +\fBlu()\fP function with "subCorpus" info excluded. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> lu = PrettyDict(fn.lu_basic(256), breakLines=True) +>>> # ellipses account for differences between FN 1.5 and 1.7 +>>> lu +{\(aqID\(aq: 256, + \(aqPOS\(aq: \(aqV\(aq, + \(aqURL\(aq: \(aqhttps://framenet2.icsi.berkeley.edu/fnReports/data/lu/lu256.xml\(aq, + \(aq_type\(aq: \(aqlu\(aq, + \(aqcBy\(aq: ..., + \(aqcDate\(aq: \(aq02/08/2001 01:27:50 PST Thu\(aq, + \(aqdefinition\(aq: \(aqCOD: be aware of beforehand; predict.\(aq, + \(aqdefinitionMarkup\(aq: \(aqCOD: be aware of beforehand; predict.\(aq, + \(aqframe\(aq: , + \(aqlemmaID\(aq: 15082, + \(aqlexemes\(aq: [{\(aqPOS\(aq: \(aqV\(aq, \(aqbreakBefore\(aq: \(aqfalse\(aq, \(aqheadword\(aq: \(aqfalse\(aq, \(aqname\(aq: \(aqforesee\(aq, \(aqorder\(aq: 1}], + \(aqname\(aq: \(aqforesee.v\(aq, + \(aqsemTypes\(aq: [], + \(aqsentenceCount\(aq: {\(aqannotated\(aq: ..., \(aqtotal\(aq: ...}, + \(aqstatus\(aq: \(aqFN1_Sent\(aq} +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBfn_luid\fP (\fIint\fP) \-\- The id number of the desired LU +.TP +.B Returns +Basic information about the lexical unit +.TP +.B Return type +dict +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lu_ids_and_names(name=None) +Uses the LU index, which is much faster than looking up each LU definition +if only the names and IDs are needed. +.UNINDENT +.INDENT 7.0 +.TP +.B lus(name=None, frame=None) +Obtain details for lexical units. +Optionally restrict by lexical unit name pattern, and/or to a certain frame +or frames whose name matches a pattern. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> len(fn.lus()) in (11829, 13572) # FN 1.5 and 1.7, resp. +True +>>> PrettyList(sorted(fn.lus(r\(aq(?i)a little\(aq), key=itemgetter(\(aqID\(aq)), maxReprSize=0, breakLines=True) +[, + , + ] +>>> PrettyList(sorted(fn.lus(r\(aqinterest\(aq, r\(aq(?i)stimulus\(aq), key=itemgetter(\(aqID\(aq))) +[, ] +.ft P +.fi +.sp +A brief intro to Lexical Units (excerpted from "FrameNet II: +Extended Theory and Practice" by Ruppenhofer et. al., 2010): +.sp +A lexical unit (LU) is a pairing of a word with a meaning. For +example, the "Apply_heat" Frame describes a common situation +involving a Cook, some Food, and a Heating Instrument, and is +_evoked_ by words such as bake, blanch, boil, broil, brown, +simmer, steam, etc. These frame\-evoking words are the LUs in the +Apply_heat frame. Each sense of a polysemous word is a different +LU. +.sp +We have used the word "word" in talking about LUs. The reality +is actually rather complex. When we say that the word "bake" is +polysemous, we mean that the lemma "bake.v" (which has the +word\-forms "bake", "bakes", "baked", and "baking") is linked to +three different frames: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +Apply_heat: "Michelle baked the potatoes for 45 minutes." +.IP \(bu 2 +Cooking_creation: "Michelle baked her mother a cake for her birthday." +.IP \(bu 2 +Absorb_heat: "The potatoes have to bake for more than 30 minutes." +.UNINDENT +.UNINDENT +.UNINDENT +.sp +These constitute three different LUs, with different +definitions. +.sp +Multiword expressions such as "given name" and hyphenated words +like "shut\-eye" can also be LUs. Idiomatic phrases such as +"middle of nowhere" and "give the slip (to)" are also defined as +LUs in the appropriate frames ("Isolated_places" and "Evading", +respectively), and their internal structure is not analyzed. +.sp +Framenet provides multiple annotated examples of each sense of a +word (i.e. each LU). Moreover, the set of examples +(approximately 20 per LU) illustrates all of the combinatorial +possibilities of the lexical unit. +.sp +Each LU is linked to a Frame, and hence to the other words which +evoke that Frame. This makes the FrameNet database similar to a +thesaurus, grouping together semantically similar words. +.sp +In the simplest case, frame\-evoking words are verbs such as +"fried" in: +.INDENT 7.0 +.INDENT 3.5 +"Matilde fried the catfish in a heavy iron skillet." +.UNINDENT +.UNINDENT +.sp +Sometimes event nouns may evoke a Frame. For example, +"reduction" evokes "Cause_change_of_scalar_position" in: +.INDENT 7.0 +.INDENT 3.5 +"...the reduction of debt levels to $665 million from $2.6 billion." +.UNINDENT +.UNINDENT +.sp +Adjectives may also evoke a Frame. For example, "asleep" may +evoke the "Sleep" frame as in: +.INDENT 7.0 +.INDENT 3.5 +"They were asleep for hours." +.UNINDENT +.UNINDENT +.sp +Many common nouns, such as artifacts like "hat" or "tower", +typically serve as dependents rather than clearly evoking their +own frames. +.INDENT 7.0 +.TP +.B Parameters +\fBname\fP (\fIstr\fP) \-\- +.sp +A regular expression pattern used to search the LU +names. Note that LU names take the form of a dotted +string (e.g. "run.v" or "a little.adv") in which a +lemma precedes the "." and a POS follows the +dot. The lemma may be composed of a single lexeme +(e.g. "run") or of multiple lexemes (e.g. "a +little"). If \(aqname\(aq is not given, then all LUs will +be returned. +.sp +The valid POSes are: +.INDENT 7.0 +.INDENT 3.5 +v \- verb +n \- noun +a \- adjective +adv \- adverb +prep \- preposition +num \- numbers +intj \- interjection +art \- article +c \- conjunction +scon \- subordinating conjunction +.UNINDENT +.UNINDENT + +.TP +.B Returns +A list of selected (or all) lexical units +.TP +.B Return type +list of LU objects (dicts) See the lu() function for info +about the specifics of LU objects. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B propagate_semtypes() +Apply inference rules to distribute semtypes over relations between FEs. +For FrameNet 1.5, this results in 1011 semtypes being propagated. +(Not done by default because it requires loading all frame files, +which takes several seconds. If this needed to be fast, it could be rewritten +to traverse the neighboring relations on demand for each FE semtype.) +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> x = sum(1 for f in fn.frames() for fe in f.FE.values() if fe.semType) +>>> fn.propagate_semtypes() +>>> y = sum(1 for f in fn.frames() for fe in f.FE.values() if fe.semType) +>>> y\-x > 1000 +True +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B semtype(key) +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> fn.semtype(233).name +\(aqTemperature\(aq +>>> fn.semtype(233).abbrev +\(aqTemp\(aq +>>> fn.semtype(\(aqTemperature\(aq).ID +233 +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBkey\fP (\fIstring\fP\fI or \fP\fIint\fP) \-\- The name, abbreviation, or id number of the semantic type +.TP +.B Returns +Information about a semantic type +.TP +.B Return type +dict +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B semtype_inherits(st, superST) +.UNINDENT +.INDENT 7.0 +.TP +.B semtypes() +Obtain a list of semantic types. +.sp +.nf +.ft C +>>> from nltk.corpus import framenet as fn +>>> stypes = fn.semtypes() +>>> len(stypes) in (73, 109) # FN 1.5 and 1.7, resp. +True +>>> sorted(stypes[0].keys()) +[\(aqID\(aq, \(aq_type\(aq, \(aqabbrev\(aq, \(aqdefinition\(aq, \(aqdefinitionMarkup\(aq, \(aqname\(aq, \(aqrootType\(aq, \(aqsubTypes\(aq, \(aqsuperType\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +A list of all of the semantic types in framenet +.TP +.B Return type +list(dict) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(exemplars=True, full_text=True) +Annotated sentences matching the specified criteria. +.UNINDENT +.INDENT 7.0 +.TP +.B warnings(v) +Enable or disable warnings of data integrity issues as they are encountered. +If v is truthy, warnings will be enabled. +.sp +(This is a function rather than just an attribute/property to ensure that if +enabling warnings is the first action taken, the corpus reader is instantiated first.) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.IEERCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B docs(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B parsed_docs(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.IPIPANCorpusReader(root, fileids) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Corpus reader designed to work with corpus created by IPI PAN. +See \fI\%http://korpus.pl/en/\fP for more details about IPI PAN corpus. +.sp +The corpus includes information about text domain, channel and categories. +You can access possible values using \fBdomains()\fP, \fBchannels()\fP and +\fBcategories()\fP\&. You can use also this metadata to filter files, e.g.: +\fBfileids(channel=\(aqprasa\(aq)\fP, \fBfileids(categories=\(aqpublicystyczny\(aq)\fP\&. +.sp +The reader supports methods: words, sents, paras and their tagged versions. +You can get part of speech instead of full tag by giving "simplify_tags=True" +parameter, e.g.: \fBtagged_sents(simplify_tags=True)\fP\&. +.sp +Also you can get all tags disambiguated tags specifying parameter +"one_tag=False", e.g.: \fBtagged_paras(one_tag=False)\fP\&. +.sp +You can get all tags that were assigned by a morphological analyzer specifying +parameter "disamb_only=False", e.g. \fBtagged_words(disamb_only=False)\fP\&. +.sp +The IPIPAN Corpus contains tags indicating if there is a space between two +tokens. To add special "no space" markers, you should specify parameter +"append_no_space=True", e.g. \fBtagged_words(append_no_space=True)\fP\&. +As a result in place where there should be no space between two tokens new +pair (\(aq\(aq, \(aqno\-space\(aq) will be inserted (for tagged data) and just \(aq\(aq for +methods without tags. +.sp +The corpus reader can also try to append spaces between words. To enable this +option, specify parameter "append_space=True", e.g. \fBwords(append_space=True)\fP\&. +As a result either \(aq \(aq or (\(aq \(aq, \(aqspace\(aq) will be inserted between tokens. +.sp +By default, xml entities like " and & are replaced by corresponding +characters. You can turn off this feature, specifying parameter +"replace_xmlentities=False", e.g. \fBwords(replace_xmlentities=False)\fP\&. +.INDENT 7.0 +.TP +.B categories(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B channels(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B domains(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B fileids(channels=None, domains=None, categories=None) +Return a list of file identifiers for the fileids that make up +this corpus. +.UNINDENT +.INDENT 7.0 +.TP +.B paras(fileids=None, **kwargs) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, **kwargs) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None, **kwargs) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, **kwargs) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, **kwargs) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, **kwargs) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.IndianCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +List of words, one per line. Blank lines are ignored. +.INDENT 7.0 +.TP +.B sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.KNBCorpusReader(root, fileids, encoding=\(aqutf8\(aq, morphs2str=>) +Bases: \fI\%nltk.corpus.reader.api.SyntaxCorpusReader\fP +.INDENT 7.0 +.TP +.B This class implements: +.INDENT 7.0 +.IP \(bu 2 +\fB__init__\fP, which specifies the location of the corpus +and a method for detecting the sentence blocks in corpus files. +.IP \(bu 2 +\fB_read_block\fP, which reads a block from the input stream. +.IP \(bu 2 +\fB_word\fP, which takes a block and returns a list of list of words. +.IP \(bu 2 +\fB_tag\fP, which takes a block and returns a list of list of tagged +words. +.IP \(bu 2 +\fB_parse\fP, which takes a block and returns a list of parsed +sentences. +.UNINDENT +.TP +.B The structure of tagged words: +tagged_word = (word(str), tags(tuple)) +tags = (surface, reading, lemma, pos1, posid1, pos2, posid2, pos3, posid3, others ...) +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.corpus.util import LazyCorpusLoader +>>> knbc = LazyCorpusLoader( +\&... \(aqknbc/corpus1\(aq, +\&... KNBCorpusReader, +\&... r\(aq.*/KN.*\(aq, +\&... encoding=\(aqeuc\-jp\(aq, +\&... ) +.ft P +.fi +.sp +.nf +.ft C +>>> len(knbc.sents()[0]) +9 +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.LinThesaurusCorpusReader(root, badscore=0.0) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Wrapper for the LISP\-formatted thesauruses distributed by Dekang Lin. +.INDENT 7.0 +.TP +.B scored_synonyms(ngram, fileid=None) +Returns a list of scored synonyms (tuples of synonyms and scores) for the current ngram +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBngram\fP (\fIC{string}\fP) \-\- ngram to lookup +.IP \(bu 2 +\fBfileid\fP (\fIC{string}\fP) \-\- thesaurus fileid to search in. If None, search all fileids. +.UNINDENT +.TP +.B Returns +If fileid is specified, list of tuples of scores and synonyms; otherwise, +list of tuples of fileids and lists, where inner lists consist of tuples of +scores and synonyms. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B similarity(ngram1, ngram2, fileid=None) +Returns the similarity score for two ngrams. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBngram1\fP (\fIC{string}\fP) \-\- first ngram to compare +.IP \(bu 2 +\fBngram2\fP (\fIC{string}\fP) \-\- second ngram to compare +.IP \(bu 2 +\fBfileid\fP (\fIC{string}\fP) \-\- thesaurus fileid to search in. If None, search all fileids. +.UNINDENT +.TP +.B Returns +If fileid is specified, just the score for the two ngrams; otherwise, +list of tuples of fileids and scores. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B synonyms(ngram, fileid=None) +Returns a list of synonyms for the current ngram. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBngram\fP (\fIC{string}\fP) \-\- ngram to lookup +.IP \(bu 2 +\fBfileid\fP (\fIC{string}\fP) \-\- thesaurus fileid to search in. If None, search all fileids. +.UNINDENT +.TP +.B Returns +If fileid is specified, list of synonyms; otherwise, list of tuples of fileids and +lists, where inner lists contain synonyms. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.MTECorpusReader(root=None, fileids=None, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.tagged.TaggedCorpusReader\fP +.sp +Reader for corpora following the TEI\-p5 xml scheme, such as MULTEXT\-East. +MULTEXT\-East contains part\-of\-speech\-tagged words with a quite precise tagging +scheme. These tags can be converted to the Universal tagset +.INDENT 7.0 +.TP +.B lemma_paras(fileids=None) +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.TP +.B Returns +the given file(s) as a list of paragraphs, each encoded as a +list of sentences, which are in turn encoded as a list of +tuples of the word and the corresponding lemma (word, lemma) +.TP +.B Return type +list(List(List(tuple(str, str)))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lemma_sents(fileids=None) +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.TP +.B Returns +the given file(s) as a list of sentences or utterances, each +encoded as a list of tuples of the word and the corresponding +lemma (word, lemma) +.TP +.B Return type +list(list(tuple(str, str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lemma_words(fileids=None) +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.TP +.B Returns +the given file(s) as a list of words, the corresponding lemmas +and punctuation symbols, encoded as tuples (word, lemma) +.TP +.B Return type +list(tuple(str,str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B paras(fileids=None) +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.TP +.B Returns +the given file(s) as a list of paragraphs, each encoded as a list +of sentences, which are in turn encoded as lists of word string +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.TP +.B Returns +the given file(s) as a list of sentences or utterances, +each encoded as a list of word strings +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None, tagset=\(aqmsd\(aq, tags=\(aq\(aq) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.IP \(bu 2 +\fBtagset\fP \-\- The tagset that should be used in the returned object, +either "universal" or "msd", "msd" is the default +.IP \(bu 2 +\fBtags\fP \-\- An MSD Tag that is used to filter all parts of the used corpus +that are not more precise or at least equal to the given tag +.UNINDENT +.TP +.B Returns +the given file(s) as a list of paragraphs, each encoded as a +list of sentences, which are in turn encoded as a list +of (word,tag) tuples +.TP +.B Return type +list(list(list(tuple(str, str)))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, tagset=\(aqmsd\(aq, tags=\(aq\(aq) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.IP \(bu 2 +\fBtagset\fP \-\- The tagset that should be used in the returned object, +either "universal" or "msd", "msd" is the default +.IP \(bu 2 +\fBtags\fP \-\- An MSD Tag that is used to filter all parts of the used corpus +that are not more precise or at least equal to the given tag +.UNINDENT +.TP +.B Returns +the given file(s) as a list of sentences or utterances, each +each encoded as a list of (word,tag) tuples +.TP +.B Return type +list(list(tuple(str, str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, tagset=\(aqmsd\(aq, tags=\(aq\(aq) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.IP \(bu 2 +\fBtagset\fP \-\- The tagset that should be used in the returned object, +either "universal" or "msd", "msd" is the default +.IP \(bu 2 +\fBtags\fP \-\- An MSD Tag that is used to filter all parts of the used corpus +that are not more precise or at least equal to the given tag +.UNINDENT +.TP +.B Returns +the given file(s) as a list of tagged words and punctuation symbols +encoded as tuples (word, tag) +.TP +.B Return type +list(tuple(str, str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- A list specifying the fileids that should be used. +.TP +.B Returns +the given file(s) as a list of words and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.MWAPPDBCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.wordlist.WordListCorpusReader\fP +.sp +This class is used to read the list of word pairs from the subset of lexical +pairs of The Paraphrase Database (PPDB) XXXL used in the Monolingual Word +Alignment (MWA) algorithm described in Sultan et al. (2014a, 2014b, 2015): +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fI\%http://acl2014.org/acl2014/Q14/pdf/Q14\-1017\fP +.IP \(bu 2 +\fI\%http://www.aclweb.org/anthology/S14\-2039\fP +.IP \(bu 2 +\fI\%http://www.aclweb.org/anthology/S15\-2027\fP +.UNINDENT +.UNINDENT +.UNINDENT +.sp +The original source of the full PPDB corpus can be found on +\fI\%http://www.cis.upenn.edu/~ccb/ppdb/\fP +.INDENT 7.0 +.TP +.B Returns +a list of tuples of similar lexical terms. +.UNINDENT +.INDENT 7.0 +.TP +.B entries(fileids=\(aqppdb\-1.0\-xxxl\-lexical.extended.synonyms.uniquepairs\(aq) +.INDENT 7.0 +.TP +.B Returns +a tuple of synonym word pairs. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B mwa_ppdb_xxxl_file = \(aqppdb\-1.0\-xxxl\-lexical.extended.synonyms.uniquepairs\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.MacMorphoCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.tagged.TaggedCorpusReader\fP +.sp +A corpus reader for the MAC_MORPHO corpus. Each line contains a +single tagged word, using \(aq_\(aq as a separator. Sentence boundaries +are based on the end\-sentence tag (\(aq_.\(aq). Paragraph information +is not included in the corpus, so each paragraph returned by +\fBself.paras()\fP and \fBself.tagged_paras()\fP contains a single +sentence. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.NKJPCorpusReader(root, fileids=\(aq.*\(aq) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.INDENT 7.0 +.TP +.B HEADER_MODE = 2 +.UNINDENT +.INDENT 7.0 +.TP +.B RAW_MODE = 3 +.UNINDENT +.INDENT 7.0 +.TP +.B SENTS_MODE = 1 +.UNINDENT +.INDENT 7.0 +.TP +.B WORDS_MODE = 0 +.UNINDENT +.INDENT 7.0 +.TP +.B add_root(fileid) +Add root if necessary to specified fileid. +.UNINDENT +.INDENT 7.0 +.TP +.B fileids() +Returns a list of file identifiers for the fileids that make up +this corpus. +.UNINDENT +.INDENT 7.0 +.TP +.B get_paths() +.UNINDENT +.INDENT 7.0 +.TP +.B header(fileids=None, **kwargs) +Returns header(s) of specified fileids. +.UNINDENT +.INDENT 7.0 +.TP +.B raw(fileids=None, **kwargs) +Returns words in specified fileids. +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, **kwargs) +Returns sentences in specified fileids. +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, **kwargs) +Call with specified tags as a list, e.g. tags=[\(aqsubst\(aq, \(aqcomp\(aq]. +Returns tagged words in specified fileids. +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, **kwargs) +Returns words in specified fileids. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.NPSChatCorpusReader(root, fileids, wrap_etree=False, tagset=None) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.INDENT 7.0 +.TP +.B posts(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_posts(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +Returns all of the words and punctuation symbols in the specified file +that were in text nodes \-\- ie, tags are ignored. Like the xml() method, +fileid can only specify one file. +.INDENT 7.0 +.TP +.B Returns +the given file\(aqs text nodes as a list of words and punctuation symbols +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B xml_posts(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.NombankCorpusReader(root, nomfile, framefiles=\(aq\(aq, nounsfile=None, parse_fileid_xform=None, parse_corpus=None, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Corpus reader for the nombank corpus, which augments the Penn +Treebank with information about the predicate argument structure +of every noun instance. The corpus consists of two parts: the +predicate\-argument annotations themselves, and a set of "frameset +files" which define the argument labels used by the annotations, +on a per\-noun basis. Each "frameset file" contains one or more +predicates, such as \fB\(aqturn\(aq\fP or \fB\(aqturn_on\(aq\fP, each of which is +divided into coarse\-grained word senses called "rolesets". For +each "roleset", the frameset file provides descriptions of the +argument roles, along with examples. +.INDENT 7.0 +.TP +.B instances(baseform=None) +.INDENT 7.0 +.TP +.B Returns +a corpus view that acts as a list of +.UNINDENT +.sp +\fBNombankInstance\fP objects, one for each noun in the corpus. +.UNINDENT +.INDENT 7.0 +.TP +.B lines() +.INDENT 7.0 +.TP +.B Returns +a corpus view that acts as a list of strings, one for +.UNINDENT +.sp +each line in the predicate\-argument annotation file. +.UNINDENT +.INDENT 7.0 +.TP +.B nouns() +.INDENT 7.0 +.TP +.B Returns +a corpus view that acts as a list of all noun lemmas +.UNINDENT +.sp +in this corpus (from the nombank.1.0.words file). +.UNINDENT +.INDENT 7.0 +.TP +.B roleset(roleset_id) +.INDENT 7.0 +.TP +.B Returns +the xml description for the given roleset. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B rolesets(baseform=None) +.INDENT 7.0 +.TP +.B Returns +list of xml descriptions for rolesets. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.NonbreakingPrefixesCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.wordlist.WordListCorpusReader\fP +.sp +This is a class to read the nonbreaking prefixes textfiles from the +Moses Machine Translation toolkit. These lists are used in the Python port +of the Moses\(aq word tokenizer. +.INDENT 7.0 +.TP +.B available_langs = {\(aqca\(aq: \(aqca\(aq, \(aqcatalan\(aq: \(aqca\(aq, \(aqcs\(aq: \(aqcs\(aq, \(aqczech\(aq: \(aqcs\(aq, \(aqde\(aq: \(aqde\(aq, \(aqdutch\(aq: \(aqnl\(aq, \(aqel\(aq: \(aqel\(aq, \(aqen\(aq: \(aqen\(aq, \(aqenglish\(aq: \(aqen\(aq, \(aqes\(aq: \(aqes\(aq, \(aqfi\(aq: \(aqfi\(aq, \(aqfinnish\(aq: \(aqfi\(aq, \(aqfr\(aq: \(aqfr\(aq, \(aqfrench\(aq: \(aqfr\(aq, \(aqgerman\(aq: \(aqde\(aq, \(aqgreek\(aq: \(aqel\(aq, \(aqhu\(aq: \(aqhu\(aq, \(aqhungarian\(aq: \(aqhu\(aq, \(aqicelandic\(aq: \(aqis\(aq, \(aqis\(aq: \(aqis\(aq, \(aqit\(aq: \(aqit\(aq, \(aqitalian\(aq: \(aqit\(aq, \(aqlatvian\(aq: \(aqlv\(aq, \(aqlv\(aq: \(aqlv\(aq, \(aqnl\(aq: \(aqnl\(aq, \(aqpl\(aq: \(aqpl\(aq, \(aqpolish\(aq: \(aqpl\(aq, \(aqportuguese\(aq: \(aqpt\(aq, \(aqpt\(aq: \(aqpt\(aq, \(aqro\(aq: \(aqro\(aq, \(aqromanian\(aq: \(aqro\(aq, \(aqru\(aq: \(aqru\(aq, \(aqrussian\(aq: \(aqru\(aq, \(aqsk\(aq: \(aqsk\(aq, \(aqsl\(aq: \(aqsl\(aq, \(aqslovak\(aq: \(aqsk\(aq, \(aqslovenian\(aq: \(aqsl\(aq, \(aqspanish\(aq: \(aqes\(aq, \(aqsv\(aq: \(aqsv\(aq, \(aqswedish\(aq: \(aqsv\(aq, \(aqta\(aq: \(aqta\(aq, \(aqtamil\(aq: \(aqta\(aq} +.UNINDENT +.INDENT 7.0 +.TP +.B words(lang=None, fileids=None, ignore_lines_startswith=\(aq#\(aq) +This module returns a list of nonbreaking prefixes for the specified +language(s). +.sp +.nf +.ft C +>>> from nltk.corpus import nonbreaking_prefixes as nbp +>>> nbp.words(\(aqen\(aq)[:10] == [u\(aqA\(aq, u\(aqB\(aq, u\(aqC\(aq, u\(aqD\(aq, u\(aqE\(aq, u\(aqF\(aq, u\(aqG\(aq, u\(aqH\(aq, u\(aqI\(aq, u\(aqJ\(aq] +True +>>> nbp.words(\(aqta\(aq)[:5] == [u\(aqஅ\(aq, u\(aqஆ\(aq, u\(aqஇ\(aq, u\(aqஈ\(aq, u\(aqஉ\(aq] +True +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +a list words for the specified language(s). +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.OpinionLexiconCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.wordlist.WordListCorpusReader\fP +.sp +Reader for Liu and Hu opinion lexicon. Blank lines and readme are ignored. +.sp +.nf +.ft C +>>> from nltk.corpus import opinion_lexicon +>>> opinion_lexicon.words() +[\(aq2\-faced\(aq, \(aq2\-faces\(aq, \(aqabnormal\(aq, \(aqabolish\(aq, ...] +.ft P +.fi +.sp +The OpinionLexiconCorpusReader provides shortcuts to retrieve positive/negative +words: +.sp +.nf +.ft C +>>> opinion_lexicon.negative() +[\(aq2\-faced\(aq, \(aq2\-faces\(aq, \(aqabnormal\(aq, \(aqabolish\(aq, ...] +.ft P +.fi +.sp +Note that words from \fIwords()\fP method are sorted by file id, not alphabetically: +.sp +.nf +.ft C +>>> opinion_lexicon.words()[0:10] +[\(aq2\-faced\(aq, \(aq2\-faces\(aq, \(aqabnormal\(aq, \(aqabolish\(aq, \(aqabominable\(aq, \(aqabominably\(aq, +\(aqabominate\(aq, \(aqabomination\(aq, \(aqabort\(aq, \(aqaborted\(aq] +>>> sorted(opinion_lexicon.words())[0:10] +[\(aq2\-faced\(aq, \(aq2\-faces\(aq, \(aqa+\(aq, \(aqabnormal\(aq, \(aqabolish\(aq, \(aqabominable\(aq, \(aqabominably\(aq, +\(aqabominate\(aq, \(aqabomination\(aq, \(aqabort\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B CorpusView +alias of \fI\%nltk.corpus.reader.opinion_lexicon.IgnoreReadmeCorpusView\fP +.UNINDENT +.INDENT 7.0 +.TP +.B negative() +Return all negative words in alphabetical order. +.INDENT 7.0 +.TP +.B Returns +a list of negative words. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B positive() +Return all positive words in alphabetical order. +.INDENT 7.0 +.TP +.B Returns +a list of positive words. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +Return all words in the opinion lexicon. Note that these words are not +sorted in alphabetical order. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +words have to be returned. +.TP +.B Returns +the given file(s) as a list of words and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.PPAttachmentCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +sentence_id verb noun1 preposition noun2 attachment +.INDENT 7.0 +.TP +.B attachments(fileids) +.UNINDENT +.INDENT 7.0 +.TP +.B tuples(fileids) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.PanLexLiteCorpusReader(root) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B MEANING_Q = \(aq\en SELECT dnx2.mn, dnx2.uq, dnx2.ap, dnx2.ui, ex2.tt, ex2.lv\en FROM dnx\en JOIN ex ON (ex.ex = dnx.ex)\en JOIN dnx dnx2 ON (dnx2.mn = dnx.mn)\en JOIN ex ex2 ON (ex2.ex = dnx2.ex)\en WHERE dnx.ex != dnx2.ex AND ex.tt = ? AND ex.lv = ?\en ORDER BY dnx2.uq DESC\en \(aq +.UNINDENT +.INDENT 7.0 +.TP +.B TRANSLATION_Q = \(aq\en SELECT s.tt, sum(s.uq) AS trq FROM (\en SELECT ex2.tt, max(dnx.uq) AS uq\en FROM dnx\en JOIN ex ON (ex.ex = dnx.ex)\en JOIN dnx dnx2 ON (dnx2.mn = dnx.mn)\en JOIN ex ex2 ON (ex2.ex = dnx2.ex)\en WHERE dnx.ex != dnx2.ex AND ex.lv = ? AND ex.tt = ? AND ex2.lv = ?\en GROUP BY ex2.tt, dnx.ui\en ) s\en GROUP BY s.tt\en ORDER BY trq DESC, s.tt\en \(aq +.UNINDENT +.INDENT 7.0 +.TP +.B language_varieties(lc=None) +Return a list of PanLex language varieties. +.INDENT 7.0 +.TP +.B Parameters +\fBlc\fP \-\- ISO 639 alpha\-3 code. If specified, filters returned varieties +by this code. If unspecified, all varieties are returned. +.TP +.B Returns +the specified language varieties as a list of tuples. The first +element is the language variety\(aqs seven\-character uniform identifier, +and the second element is its default name. +.TP +.B Return type +list(tuple) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B meanings(expr_uid, expr_tt) +Return a list of meanings for an expression. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBexpr_uid\fP \-\- the expression\(aqs language variety, as a seven\-character +uniform identifier. +.IP \(bu 2 +\fBexpr_tt\fP \-\- the expression\(aqs text. +.UNINDENT +.TP +.B Returns +a list of Meaning objects. +.TP +.B Return type +list(Meaning) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B translations(from_uid, from_tt, to_uid) +.INDENT 7.0 +.TP +.B Return a list of translations for an expression into a single language +variety. +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfrom_uid\fP \-\- the source expression\(aqs language variety, as a +seven\-character uniform identifier. +.IP \(bu 2 +\fBfrom_tt\fP \-\- the source expression\(aqs text. +.IP \(bu 2 +\fBto_uid\fP \-\- the target language variety, as a seven\-character +uniform identifier. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B :return a list of translation tuples. The first element is the expression +text and the second element is the translation quality. +.UNINDENT +.INDENT 7.0 +.TP +.B Return type +list(tuple) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.PanlexSwadeshCorpusReader(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.wordlist.WordListCorpusReader\fP +.sp +This is a class to read the PanLex Swadesh list from +.sp +David Kamholz, Jonathan Pool, and Susan M. Colowick (2014). +PanLex: Building a Resource for Panlingual Lexical Translation. +In LREC. \fI\%http://www.lrec\-conf.org/proceedings/lrec2014/pdf/1029_Paper.pdf\fP +.sp +License: CC0 1.0 Universal +\fI\%https://creativecommons.org/publicdomain/zero/1.0/legalcode\fP +.INDENT 7.0 +.TP +.B entries(fileids=None) +.INDENT 7.0 +.TP +.B Returns +a tuple of words for the specified fileids. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B get_languages() +.UNINDENT +.INDENT 7.0 +.TP +.B get_macrolanguages() +.UNINDENT +.INDENT 7.0 +.TP +.B language_codes() +.UNINDENT +.INDENT 7.0 +.TP +.B license() +Return the contents of the corpus LICENSE file, if it exists. +.UNINDENT +.INDENT 7.0 +.TP +.B words_by_iso639(iso63_code) +.INDENT 7.0 +.TP +.B Returns +a list of list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words_by_lang(lang_code) +.INDENT 7.0 +.TP +.B Returns +a list of list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.Pl196xCorpusReader(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.api.CategorizedCorpusReader\fP, \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.INDENT 7.0 +.TP +.B decode_tag(tag) +.UNINDENT +.INDENT 7.0 +.TP +.B head_len = 2770 +.UNINDENT +.INDENT 7.0 +.TP +.B paras(fileids=None, categories=None, textids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, categories=None, textids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None, categories=None, textids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, categories=None, textids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, categories=None, textids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B textids(fileids=None, categories=None) +In the pl196x corpus each category is stored in single +file and thus both methods provide identical functionality. In order +to accommodate finer granularity, a non\-standard textids() method was +implemented. All the main functions can be supplied with a list +of required chunks\-\-\-giving much more control to the user. +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, categories=None, textids=None) +Returns all of the words and punctuation symbols in the specified file +that were in text nodes \-\- ie, tags are ignored. Like the xml() method, +fileid can only specify one file. +.INDENT 7.0 +.TP +.B Returns +the given file\(aqs text nodes as a list of words and punctuation symbols +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B xml(fileids=None, categories=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.PlaintextCorpusReader(root, fileids, word_tokenizer=WordPunctTokenizer(pattern=\(aq\e\ew+|[^\e\ew\e\es]+\(aq, gaps=False, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), sent_tokenizer=, para_block_reader=, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for corpora that consist of plaintext documents. Paragraphs +are assumed to be split using blank lines. Sentences and words can +be tokenized using the default tokenizers, or by custom tokenizers +specified as parameters to the constructor. +.sp +This corpus reader can be customized (e.g., to skip preface +sections of specific document formats) by creating a subclass and +overriding the \fBCorpusView\fP class variable. +.INDENT 7.0 +.TP +.B CorpusView +alias of \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.UNINDENT +.INDENT 7.0 +.TP +.B paras(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of word strings. +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences or utterances, each encoded as a list of word +strings. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of words +and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.PortugueseCategorizedPlaintextCorpusReader(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.plaintext.CategorizedPlaintextCorpusReader\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.PropbankCorpusReader(root, propfile, framefiles=\(aq\(aq, verbsfile=None, parse_fileid_xform=None, parse_corpus=None, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Corpus reader for the propbank corpus, which augments the Penn +Treebank with information about the predicate argument structure +of every verb instance. The corpus consists of two parts: the +predicate\-argument annotations themselves, and a set of "frameset +files" which define the argument labels used by the annotations, +on a per\-verb basis. Each "frameset file" contains one or more +predicates, such as \fB\(aqturn\(aq\fP or \fB\(aqturn_on\(aq\fP, each of which is +divided into coarse\-grained word senses called "rolesets". For +each "roleset", the frameset file provides descriptions of the +argument roles, along with examples. +.INDENT 7.0 +.TP +.B instances(baseform=None) +.INDENT 7.0 +.TP +.B Returns +a corpus view that acts as a list of +.UNINDENT +.sp +\fBPropBankInstance\fP objects, one for each noun in the corpus. +.UNINDENT +.INDENT 7.0 +.TP +.B lines() +.INDENT 7.0 +.TP +.B Returns +a corpus view that acts as a list of strings, one for +.UNINDENT +.sp +each line in the predicate\-argument annotation file. +.UNINDENT +.INDENT 7.0 +.TP +.B roleset(roleset_id) +.INDENT 7.0 +.TP +.B Returns +the xml description for the given roleset. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B rolesets(baseform=None) +.INDENT 7.0 +.TP +.B Returns +list of xml descriptions for rolesets. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B verbs() +.INDENT 7.0 +.TP +.B Returns +a corpus view that acts as a list of all verb lemmas +.UNINDENT +.sp +in this corpus (from the verbs.txt file). +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ProsConsCorpusReader(root, fileids, word_tokenizer=WordPunctTokenizer(pattern=\(aq\e\ew+|[^\e\ew\e\es]+\(aq, gaps=False, discard_empty=True, flags=re.UNICODE | re.MULTILINE | re.DOTALL), encoding=\(aqutf8\(aq, **kwargs) +Bases: \fI\%nltk.corpus.reader.api.CategorizedCorpusReader\fP, \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for the Pros and Cons sentence dataset. +.sp +.nf +.ft C +>>> from nltk.corpus import pros_cons +>>> pros_cons.sents(categories=\(aqCons\(aq) +[[\(aqEast\(aq, \(aqbatteries\(aq, \(aq!\(aq, \(aqOn\(aq, \(aq\-\(aq, \(aqoff\(aq, \(aqswitch\(aq, \(aqtoo\(aq, \(aqeasy\(aq, +\(aqto\(aq, \(aqmaneuver\(aq, \(aq.\(aq], [\(aqEats\(aq, \(aq...\(aq, \(aqno\(aq, \(aq,\(aq, \(aqGULPS\(aq, \(aqbatteries\(aq], +\&...] +>>> pros_cons.words(\(aqIntegratedPros.txt\(aq) +[\(aqEasy\(aq, \(aqto\(aq, \(aquse\(aq, \(aq,\(aq, \(aqeconomical\(aq, \(aq!\(aq, ...] +.ft P +.fi +.INDENT 7.0 +.TP +.B CorpusView +alias of \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None, categories=None) +Return all sentences in the corpus or in the specified files/categories. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +sentences have to be returned. +.IP \(bu 2 +\fBcategories\fP \-\- a list specifying the categories whose sentences +have to be returned. +.UNINDENT +.TP +.B Returns +the given file(s) as a list of sentences. Each sentence is +tokenized using the specified word_tokenizer. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None, categories=None) +Return all words and punctuation symbols in the corpus or in the specified +files/categories. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +words have to be returned. +.IP \(bu 2 +\fBcategories\fP \-\- a list specifying the categories whose words have +to be returned. +.UNINDENT +.TP +.B Returns +the given file(s) as a list of words and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.RTECorpusReader(root, fileids, wrap_etree=False) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.sp +Corpus reader for corpora in RTE challenges. +.sp +This is just a wrapper around the XMLCorpusReader. See module docstring above for the expected +structure of input documents. +.INDENT 7.0 +.TP +.B pairs(fileids) +Build a list of RTEPairs from a RTE corpus. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list of RTE corpus fileids +.TP +.B Type +list +.TP +.B Return type +list(RTEPair) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ReviewsCorpusReader(root, fileids, word_tokenizer=WordPunctTokenizer(pattern=\(aq\e\ew+|[^\e\ew\e\es]+\(aq, gaps=False, discard_empty=True, flags=re.UNICODE | re.MULTILINE | re.DOTALL), encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for the Customer Review Data dataset by Hu, Liu (2004). +Note: we are not applying any sentence tokenization at the moment, just word +tokenization. +.sp +.nf +.ft C +>>> from nltk.corpus import product_reviews_1 +>>> camera_reviews = product_reviews_1.reviews(\(aqCanon_G3.txt\(aq) +>>> review = camera_reviews[0] +>>> review.sents()[0] +[\(aqi\(aq, \(aqrecently\(aq, \(aqpurchased\(aq, \(aqthe\(aq, \(aqcanon\(aq, \(aqpowershot\(aq, \(aqg3\(aq, \(aqand\(aq, \(aqam\(aq, +\(aqextremely\(aq, \(aqsatisfied\(aq, \(aqwith\(aq, \(aqthe\(aq, \(aqpurchase\(aq, \(aq.\(aq] +>>> review.features() +[(\(aqcanon powershot g3\(aq, \(aq+3\(aq), (\(aquse\(aq, \(aq+2\(aq), (\(aqpicture\(aq, \(aq+2\(aq), +(\(aqpicture quality\(aq, \(aq+1\(aq), (\(aqpicture quality\(aq, \(aq+1\(aq), (\(aqcamera\(aq, \(aq+2\(aq), +(\(aquse\(aq, \(aq+2\(aq), (\(aqfeature\(aq, \(aq+1\(aq), (\(aqpicture quality\(aq, \(aq+3\(aq), (\(aquse\(aq, \(aq+1\(aq), +(\(aqoption\(aq, \(aq+1\(aq)] +.ft P +.fi +.sp +We can also reach the same information directly from the stream: +.sp +.nf +.ft C +>>> product_reviews_1.features(\(aqCanon_G3.txt\(aq) +[(\(aqcanon powershot g3\(aq, \(aq+3\(aq), (\(aquse\(aq, \(aq+2\(aq), ...] +.ft P +.fi +.sp +We can compute stats for specific product features: +.sp +.nf +.ft C +>>> n_reviews = len([(feat,score) for (feat,score) in product_reviews_1.features(\(aqCanon_G3.txt\(aq) if feat==\(aqpicture\(aq]) +>>> tot = sum([int(score) for (feat,score) in product_reviews_1.features(\(aqCanon_G3.txt\(aq) if feat==\(aqpicture\(aq]) +>>> mean = tot / n_reviews +>>> print(n_reviews, tot, mean) +15 24 1.6 +.ft P +.fi +.INDENT 7.0 +.TP +.B CorpusView +alias of \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.UNINDENT +.INDENT 7.0 +.TP +.B features(fileids=None) +Return a list of features. Each feature is a tuple made of the specific +item feature and the opinion strength about that feature. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +features have to be returned. +.TP +.B Returns +all features for the item(s) in the given file(s). +.TP +.B Return type +list(tuple) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B reviews(fileids=None) +Return all the reviews as a list of Review objects. If \fIfileids\fP is +specified, return all the reviews from each of the specified files. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +reviews have to be returned. +.TP +.B Returns +the given file(s) as a list of reviews. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +Return all sentences in the corpus or in the specified files. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +sentences have to be returned. +.TP +.B Returns +the given file(s) as a list of sentences, each encoded as a +list of word strings. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +Return all words and punctuation symbols in the corpus or in the specified +files. +.INDENT 7.0 +.TP +.B Parameters +\fBfileids\fP \-\- a list or regexp specifying the ids of the files whose +words have to be returned. +.TP +.B Returns +the given file(s) as a list of words and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.SemcorCorpusReader(root, fileids, wordnet, lazy=True) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.sp +Corpus reader for the SemCor Corpus. +For access to the complete XML data structure, use the \fBxml()\fP +method. For access to simple word lists and tagged word lists, use +\fBwords()\fP, \fBsents()\fP, \fBtagged_words()\fP, and \fBtagged_sents()\fP\&. +.INDENT 7.0 +.TP +.B chunk_sents(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of sentences, each encoded +as a list of chunks. +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B chunks(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of chunks, +each of which is a list of words and punctuation symbols +that form a unit. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of sentences, each encoded +as a list of word strings. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_chunks(fileids=None, tag=\(aqpos\(aq) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of tagged chunks, represented +in tree form. +.TP +.B Return type +list(Tree) +.TP +.B Parameters +\fBtag\fP \-\- \fI\(aqpos\(aq\fP (part of speech), \fI\(aqsem\(aq\fP (semantic), or \fI\(aqboth\(aq\fP +to indicate the kind of tags to include. Semantic tags consist of +WordNet lemma IDs, plus an \fI\(aqNE\(aq\fP node if the chunk is a named entity +without a specific entry in WordNet. (Named entities of type \(aqother\(aq +have no lemma. Other chunks not in WordNet have no semantic tag. +Punctuation tokens have \fINone\fP for their part of speech tag.) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, tag=\(aqpos\(aq) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of sentences. Each sentence +is represented as a list of tagged chunks (in tree form). +.TP +.B Return type +list(list(Tree)) +.TP +.B Parameters +\fBtag\fP \-\- \fI\(aqpos\(aq\fP (part of speech), \fI\(aqsem\(aq\fP (semantic), or \fI\(aqboth\(aq\fP +to indicate the kind of tags to include. Semantic tags consist of +WordNet lemma IDs, plus an \fI\(aqNE\(aq\fP node if the chunk is a named entity +without a specific entry in WordNet. (Named entities of type \(aqother\(aq +have no lemma. Other chunks not in WordNet have no semantic tag. +Punctuation tokens have \fINone\fP for their part of speech tag.) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of words and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.SensevalCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B instances(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.SentiSynset(pos_score, neg_score, synset) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B neg_score() +.UNINDENT +.INDENT 7.0 +.TP +.B obj_score() +.UNINDENT +.INDENT 7.0 +.TP +.B pos_score() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.SentiWordNetCorpusReader(root, fileids, encoding=\(aqutf\-8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B all_senti_synsets() +.UNINDENT +.INDENT 7.0 +.TP +.B senti_synset(*vals) +.UNINDENT +.INDENT 7.0 +.TP +.B senti_synsets(string, pos=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.SinicaTreebankCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.SyntaxCorpusReader\fP +.sp +Reader for the sinica treebank. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.StringCategoryCorpusReader(root, fileids, delimiter=\(aq \(aq, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B tuples(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.SwadeshCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.wordlist.WordListCorpusReader\fP +.INDENT 7.0 +.TP +.B entries(fileids=None) +.INDENT 7.0 +.TP +.B Returns +a tuple of words for the specified fileids. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.SwitchboardCorpusReader(root, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B discourses() +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_discourses(tagset=False) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_turns(tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B turns() +.UNINDENT +.INDENT 7.0 +.TP +.B words() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.SyntaxCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +An abstract base class for reading corpora consisting of +syntactically parsed text. Subclasses should define: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fB__init__\fP, which specifies the location of the corpus +and a method for detecting the sentence blocks in corpus files. +.IP \(bu 2 +\fB_read_block\fP, which reads a block from the input stream. +.IP \(bu 2 +\fB_word\fP, which takes a block and returns a list of list of words. +.IP \(bu 2 +\fB_tag\fP, which takes a block and returns a list of list of tagged +words. +.IP \(bu 2 +\fB_parse\fP, which takes a block and returns a list of parsed +sentences. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parsed_sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, tagset=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.TEICorpusView(corpus_file, tagged, group_by_sent, group_by_para, tagset=None, head_len=0, textids=None) +Bases: \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.INDENT 7.0 +.TP +.B read_block(stream) +Read a block from the input stream. +.INDENT 7.0 +.TP +.B Returns +a block of tokens from the input stream +.TP +.B Return type +list(any) +.TP +.B Parameters +\fBstream\fP (\fIstream\fP) \-\- an input stream +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.TaggedCorpusReader(root, fileids, sep=\(aq/\(aq, word_tokenizer=WhitespaceTokenizer(pattern=\(aq\e\es+\(aq, gaps=True, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), sent_tokenizer=RegexpTokenizer(pattern=\(aq\en\(aq, gaps=True, discard_empty=True, flags=re.UNICODE|re.MULTILINE|re.DOTALL), para_block_reader=, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for simple part\-of\-speech tagged corpora. Paragraphs are +assumed to be split using blank lines. Sentences and words can be +tokenized using the default tokenizers, or by custom tokenizers +specified as parameters to the constructor. Words are parsed +using \fBnltk.tag.str2tuple\fP\&. By default, \fB\(aq/\(aq\fP is used as the +separator. I.e., words should have the form: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +word1/tag1 word2/tag2 word3/tag3 ... +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +But custom separators may be specified as parameters to the +constructor. Part of speech tags are case\-normalized to upper +case. +.INDENT 7.0 +.TP +.B paras(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of word strings. +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sents(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences or utterances, each encoded as a list of word +strings. +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(list(tuple(str,str)))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +sentences, each encoded as a list of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(tuple(str,str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(fileids=None, tagset=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of tagged +words and punctuation symbols, encoded as tuples +\fB(word,tag)\fP\&. +.TP +.B Return type +list(tuple(str,str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of words +and punctuation symbols. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.TimitCorpusReader(root, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for the TIMIT corpus (or any other corpus with the same +file layout and use of file formats). The corpus root directory +should contain the following files: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +timitdic.txt: dictionary of standard transcriptions +.IP \(bu 2 +spkrinfo.txt: table of speaker information +.UNINDENT +.UNINDENT +.UNINDENT +.sp +In addition, the root directory should contain one subdirectory +for each speaker, containing three files for each utterance: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +.txt: text content of utterances +.IP \(bu 2 +.wrd: tokenized text content of utterances +.IP \(bu 2 +.phn: phonetic transcription of utterances +.IP \(bu 2 +.wav: utterance sound file +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B audiodata(utterance, start=0, end=None) +.UNINDENT +.INDENT 7.0 +.TP +.B fileids(filetype=None) +Return a list of file identifiers for the files that make up +this corpus. +.INDENT 7.0 +.TP +.B Parameters +\fBfiletype\fP \-\- If specified, then \fBfiletype\fP indicates that +only the files that have the given type should be +returned. Accepted values are: \fBtxt\fP, \fBwrd\fP, \fBphn\fP, +\fBwav\fP, or \fBmetadata\fP, +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B phone_times(utterances=None) +offset is represented as a number of 16kHz samples! +.UNINDENT +.INDENT 7.0 +.TP +.B phone_trees(utterances=None) +.UNINDENT +.INDENT 7.0 +.TP +.B phones(utterances=None) +.UNINDENT +.INDENT 7.0 +.TP +.B play(utterance, start=0, end=None) +Play the given audio sample. +.INDENT 7.0 +.TP +.B Parameters +\fButterance\fP \-\- The utterance id of the sample to play +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sent_times(utterances=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sentid(utterance) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(utterances=None) +.UNINDENT +.INDENT 7.0 +.TP +.B spkrid(utterance) +.UNINDENT +.INDENT 7.0 +.TP +.B spkrinfo(speaker) +.INDENT 7.0 +.TP +.B Returns +A dictionary mapping .. something. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B spkrutteranceids(speaker) +.INDENT 7.0 +.TP +.B Returns +A list of all utterances associated with a given +.UNINDENT +.sp +speaker. +.UNINDENT +.INDENT 7.0 +.TP +.B transcription_dict() +.INDENT 7.0 +.TP +.B Returns +A dictionary giving the \(aqstandard\(aq transcription for +.UNINDENT +.sp +each word. +.UNINDENT +.INDENT 7.0 +.TP +.B utterance(spkrid, sentid) +.UNINDENT +.INDENT 7.0 +.TP +.B utteranceids(dialect=None, sex=None, spkrid=None, sent_type=None, sentid=None) +.INDENT 7.0 +.TP +.B Returns +A list of the utterance identifiers for all +.UNINDENT +.sp +utterances in this corpus, or for the given speaker, dialect +region, gender, sentence type, or sentence number, if +specified. +.UNINDENT +.INDENT 7.0 +.TP +.B wav(utterance, start=0, end=None) +.UNINDENT +.INDENT 7.0 +.TP +.B word_times(utterances=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(utterances=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.TimitTaggedCorpusReader(*args, **kwargs) +Bases: \fI\%nltk.corpus.reader.tagged.TaggedCorpusReader\fP +.sp +A corpus reader for tagged sentences that are included in the TIMIT corpus. +.INDENT 7.0 +.TP +.B paras() +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of word strings. +.TP +.B Return type +list(list(list(str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras() +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of +paragraphs, each encoded as a list of sentences, which are +in turn encoded as lists of \fB(word,tag)\fP tuples. +.TP +.B Return type +list(list(list(tuple(str,str)))) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.ToolboxCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.INDENT 7.0 +.TP +.B entries(fileids, **kwargs) +.UNINDENT +.INDENT 7.0 +.TP +.B fields(fileids, strip=True, unwrap=True, encoding=\(aqutf8\(aq, errors=\(aqstrict\(aq, unicode_fields=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(fileids, key=\(aqlx\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B xml(fileids, key=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.TwitterCorpusReader(root, fileids=None, word_tokenizer=, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Reader for corpora that consist of Tweets represented as a list of line\-delimited JSON. +.sp +Individual Tweets can be tokenized using the default tokenizer, or by a +custom tokenizer specified as a parameter to the constructor. +.sp +Construct a new Tweet corpus reader for a set of documents +located at the given root directory. +.sp +If you made your own tweet collection in a directory called +\fItwitter\-files\fP, then you can initialise the reader as: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +from nltk.corpus import TwitterCorpusReader +reader = TwitterCorpusReader(root=\(aq/path/to/twitter\-files\(aq, \(aq.*\e.json\(aq) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +However, the recommended approach is to set the relevant directory as the +value of the environmental variable \fITWITTER\fP, and then invoke the reader +as follows: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +root = os.environ[\(aqTWITTER\(aq] +reader = TwitterCorpusReader(root, \(aq.*\e.json\(aq) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +If you want to work directly with the raw Tweets, the \fIjson\fP library can +be used: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +import json +for tweet in reader.docs(): + print(json.dumps(tweet, indent=1, sort_keys=True)) +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B CorpusView +alias of \fI\%nltk.corpus.reader.util.StreamBackedCorpusView\fP +.UNINDENT +.INDENT 7.0 +.TP +.B docs(fileids=None) +Returns the full Tweet objects, as specified by \fI\%Twitter +documentation on Tweets\fP +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of dictionaries deserialised +.UNINDENT +.sp +from JSON. +:rtype: list(dict) +.UNINDENT +.INDENT 7.0 +.TP +.B strings(fileids=None) +Returns only the text content of Tweets in the file(s) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of Tweets. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tokenized(fileids=None) +.INDENT 7.0 +.TP +.B Returns +the given file(s) as a list of the text content of Tweets as +.UNINDENT +.sp +as a list of words, screenanames, hashtags, URLs and punctuation symbols. +.INDENT 7.0 +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.UdhrCorpusReader(root=\(aqudhr\(aq) +Bases: \fI\%nltk.corpus.reader.plaintext.PlaintextCorpusReader\fP +.INDENT 7.0 +.TP +.B ENCODINGS = [(\(aq.*\-Latin1$\(aq, \(aqlatin\-1\(aq), (\(aq.*\-Hebrew$\(aq, \(aqhebrew\(aq), (\(aq.*\-Arabic$\(aq, \(aqcp1256\(aq), (\(aqCzech_Cesky\-UTF8\(aq, \(aqcp1250\(aq), (\(aq.*\-Cyrillic$\(aq, \(aqcyrillic\(aq), (\(aq.*\-SJIS$\(aq, \(aqSJIS\(aq), (\(aq.*\-GB2312$\(aq, \(aqGB2312\(aq), (\(aq.*\-Latin2$\(aq, \(aqISO\-8859\-2\(aq), (\(aq.*\-Greek$\(aq, \(aqgreek\(aq), (\(aq.*\-UTF8$\(aq, \(aqutf\-8\(aq), (\(aqHungarian_Magyar\-Unicode\(aq, \(aqutf\-16\-le\(aq), (\(aqAmahuaca\(aq, \(aqlatin1\(aq), (\(aqTurkish_Turkce\-Turkish\(aq, \(aqlatin5\(aq), (\(aqLithuanian_Lietuviskai\-Baltic\(aq, \(aqlatin4\(aq), (\(aqJapanese_Nihongo\-EUC\(aq, \(aqEUC\-JP\(aq), (\(aqJapanese_Nihongo\-JIS\(aq, \(aqiso2022_jp\(aq), (\(aqChinese_Mandarin\-HZ\(aq, \(aqhz\(aq), (\(aqAbkhaz\e\e\-Cyrillic\e\e+Abkh\(aq, \(aqcp1251\(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B SKIP = {\(aqAmharic\-Afenegus6..60375\(aq, \(aqArmenian\-DallakHelv\(aq, \(aqAzeri_Azerbaijani_Cyrillic\-Az.Times.Cyr.Normal0117\(aq, \(aqAzeri_Azerbaijani_Latin\-Az.Times.Lat0117\(aq, \(aqBhojpuri\-Agra\(aq, \(aqBurmese_Myanmar\-UTF8\(aq, \(aqBurmese_Myanmar\-WinResearcher\(aq, \(aqChinese_Mandarin\-HZ\(aq, \(aqChinese_Mandarin\-UTF8\(aq, \(aqCzech\-Latin2\-err\(aq, \(aqEsperanto\-T61\(aq, \(aqGujarati\-UTF8\(aq, \(aqHungarian_Magyar\-Unicode\(aq, \(aqJapanese_Nihongo\-JIS\(aq, \(aqLao\-UTF8\(aq, \(aqMagahi\-Agra\(aq, \(aqMagahi\-UTF8\(aq, \(aqMarathi\-UTF8\(aq, \(aqNavaho_Dine\-Navajo\-Navaho\-font\(aq, \(aqRussian_Russky\-UTF8~\(aq, \(aqTamil\-UTF8\(aq, \(aqTigrinya_Tigrigna\-VG2Main\(aq, \(aqVietnamese\-TCVN\(aq, \(aqVietnamese\-VIQR\(aq, \(aqVietnamese\-VPS\(aq} +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.UnicharsCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.wordlist.WordListCorpusReader\fP +.sp +This class is used to read lists of characters from the Perl Unicode +Properties (see \fI\%http://perldoc.perl.org/perluniprops.html\fP). +The files in the perluniprop.zip are extracted using the Unicode::Tussle +module from http://search.cpan.org/~bdfoy/Unicode\-Tussle\-1.11/lib/Unicode/Tussle.pm +.INDENT 7.0 +.TP +.B available_categories = [\(aqClose_Punctuation\(aq, \(aqCurrency_Symbol\(aq, \(aqIsAlnum\(aq, \(aqIsAlpha\(aq, \(aqIsLower\(aq, \(aqIsN\(aq, \(aqIsSc\(aq, \(aqIsSo\(aq, \(aqIsUpper\(aq, \(aqLine_Separator\(aq, \(aqNumber\(aq, \(aqOpen_Punctuation\(aq, \(aqPunctuation\(aq, \(aqSeparator\(aq, \(aqSymbol\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B chars(category=None, fileids=None) +This module returns a list of characters from the Perl Unicode Properties. +They are very useful when porting Perl tokenizers to Python. +.sp +.nf +.ft C +>>> from nltk.corpus import perluniprops as pup +>>> pup.chars(\(aqOpen_Punctuation\(aq)[:5] == [u\(aq(\(aq, u\(aq[\(aq, u\(aq{\(aq, u\(aq༺\(aq, u\(aq༼\(aq] +True +>>> pup.chars(\(aqCurrency_Symbol\(aq)[:5] == [u\(aq$\(aq, u\(aq¢\(aq, u\(aq£\(aq, u\(aq¤\(aq, u\(aq¥\(aq] +True +>>> pup.available_categories +[\(aqClose_Punctuation\(aq, \(aqCurrency_Symbol\(aq, \(aqIsAlnum\(aq, \(aqIsAlpha\(aq, \(aqIsLower\(aq, \(aqIsN\(aq, \(aqIsSc\(aq, \(aqIsSo\(aq, \(aqIsUpper\(aq, \(aqLine_Separator\(aq, \(aqNumber\(aq, \(aqOpen_Punctuation\(aq, \(aqPunctuation\(aq, \(aqSeparator\(aq, \(aqSymbol\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +a list of characters given the specific unicode character category +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.VerbnetCorpusReader(root, fileids, wrap_etree=False) +Bases: \fI\%nltk.corpus.reader.xmldocs.XMLCorpusReader\fP +.sp +An NLTK interface to the VerbNet verb lexicon. +.sp +From the VerbNet site: "VerbNet (VN) (Kipper\-Schuler 2006) is the largest +on\-line verb lexicon currently available for English. It is a hierarchical +domain\-independent, broad\-coverage verb lexicon with mappings to other +lexical resources such as WordNet (Miller, 1990; Fellbaum, 1998), XTAG +(XTAG Research Group, 2001), and FrameNet (Baker et al., 1998)." +.sp +For details about VerbNet see: +\fI\%https://verbs.colorado.edu/~mpalmer/projects/verbnet.html\fP +.INDENT 7.0 +.TP +.B classids(lemma=None, wordnetid=None, fileid=None, classid=None) +Return a list of the VerbNet class identifiers. If a file +identifier is specified, then return only the VerbNet class +identifiers for classes (and subclasses) defined by that file. +If a lemma is specified, then return only VerbNet class +identifiers for classes that contain that lemma as a member. +If a wordnetid is specified, then return only identifiers for +classes that contain that wordnetid as a member. If a classid +is specified, then return only identifiers for subclasses of +the specified VerbNet class. +If nothing is specified, return all classids within VerbNet +.UNINDENT +.INDENT 7.0 +.TP +.B fileids(vnclass_ids=None) +Return a list of fileids that make up this corpus. If +\fBvnclass_ids\fP is specified, then return the fileids that make +up the specified VerbNet class(es). +.UNINDENT +.INDENT 7.0 +.TP +.B frames(vnclass) +Given a VerbNet class, this method returns VerbNet frames +.sp +The members returned are: +1) Example +2) Description +3) Syntax +4) Semantics +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +containing the xml contents of a VerbNet class. +.TP +.B Returns +frames \- a list of frame dictionaries +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lemmas(vnclass=None) +Return a list of all verb lemmas that appear in any class, or +in the \fBclassid\fP if specified. +.UNINDENT +.INDENT 7.0 +.TP +.B longid(shortid) +Returns longid of a VerbNet class +.sp +Given a short VerbNet class identifier (eg \(aq37.10\(aq), map it +to a long id (eg \(aqconfess\-37.10\(aq). If \fBshortid\fP is already a +long id, then return it as\-is +.UNINDENT +.INDENT 7.0 +.TP +.B pprint(vnclass) +Returns pretty printed version of a VerbNet class +.sp +Return a string containing a pretty\-printed representation of +the given VerbNet class. +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +.UNINDENT +.sp +containing the xml contents of a VerbNet class. +.UNINDENT +.INDENT 7.0 +.TP +.B pprint_frames(vnclass, indent=\(aq\(aq) +Returns pretty version of all frames in a VerbNet class +.sp +Return a string containing a pretty\-printed representation of +the list of frames within the VerbNet class. +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +containing the xml contents of a VerbNet class. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B pprint_members(vnclass, indent=\(aq\(aq) +Returns pretty printed version of members in a VerbNet class +.sp +Return a string containing a pretty\-printed representation of +the given VerbNet class\(aqs member verbs. +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +containing the xml contents of a VerbNet class. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B pprint_subclasses(vnclass, indent=\(aq\(aq) +Returns pretty printed version of subclasses of VerbNet class +.sp +Return a string containing a pretty\-printed representation of +the given VerbNet class\(aqs subclasses. +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +containing the xml contents of a VerbNet class. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B pprint_themroles(vnclass, indent=\(aq\(aq) +Returns pretty printed version of thematic roles in a VerbNet class +.sp +Return a string containing a pretty\-printed representation of +the given VerbNet class\(aqs thematic roles. +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +containing the xml contents of a VerbNet class. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B shortid(longid) +Returns shortid of a VerbNet class +.sp +Given a long VerbNet class identifier (eg \(aqconfess\-37.10\(aq), +map it to a short id (eg \(aq37.10\(aq). If \fBlongid\fP is already a +short id, then return it as\-is. +.UNINDENT +.INDENT 7.0 +.TP +.B subclasses(vnclass) +Returns subclass ids, if any exist +.sp +Given a VerbNet class, this method returns subclass ids (if they exist) +in a list of strings. +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +containing the xml contents of a VerbNet class. +.TP +.B Returns +list of subclasses +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B themroles(vnclass) +Returns thematic roles participating in a VerbNet class +.sp +Members returned as part of roles are\- +1) Type +2) Modifiers +.INDENT 7.0 +.TP +.B Parameters +\fBvnclass\fP \-\- A VerbNet class identifier; or an ElementTree +containing the xml contents of a VerbNet class. +.TP +.B Returns +themroles: A list of thematic roles in the VerbNet class +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B vnclass(fileid_or_classid) +Returns VerbNet class ElementTree +.sp +Return an ElementTree containing the xml for the specified +VerbNet class. +.INDENT 7.0 +.TP +.B Parameters +\fBfileid_or_classid\fP \-\- An identifier specifying which class +should be returned. Can be a file identifier (such as +\fB\(aqput\-9.1.xml\(aq\fP), or a VerbNet class identifier (such as +\fB\(aqput\-9.1\(aq\fP) or a short VerbNet class identifier (such as +\fB\(aq9.1\(aq\fP). +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B wordnetids(vnclass=None) +Return a list of all wordnet identifiers that appear in any +class, or in \fBclassid\fP if specified. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.WordListCorpusReader(root, fileids, encoding=\(aqutf8\(aq, tagset=None) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +List of words, one per line. Blank lines are ignored. +.INDENT 7.0 +.TP +.B words(fileids=None, ignore_lines_startswith=\(aq\en\(aq) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.WordNetCorpusReader(root, omw_reader) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +A corpus reader used to access wordnet or its variants. +.INDENT 7.0 +.TP +.B ADJ = \(aqa\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B ADJ_SAT = \(aqs\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B ADV = \(aqr\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B MORPHOLOGICAL_SUBSTITUTIONS = {\(aqa\(aq: [(\(aqer\(aq, \(aq\(aq), (\(aqest\(aq, \(aq\(aq), (\(aqer\(aq, \(aqe\(aq), (\(aqest\(aq, \(aqe\(aq)], \(aqn\(aq: [(\(aqs\(aq, \(aq\(aq), (\(aqses\(aq, \(aqs\(aq), (\(aqves\(aq, \(aqf\(aq), (\(aqxes\(aq, \(aqx\(aq), (\(aqzes\(aq, \(aqz\(aq), (\(aqches\(aq, \(aqch\(aq), (\(aqshes\(aq, \(aqsh\(aq), (\(aqmen\(aq, \(aqman\(aq), (\(aqies\(aq, \(aqy\(aq)], \(aqr\(aq: [], \(aqs\(aq: [(\(aqer\(aq, \(aq\(aq), (\(aqest\(aq, \(aq\(aq), (\(aqer\(aq, \(aqe\(aq), (\(aqest\(aq, \(aqe\(aq)], \(aqv\(aq: [(\(aqs\(aq, \(aq\(aq), (\(aqies\(aq, \(aqy\(aq), (\(aqes\(aq, \(aqe\(aq), (\(aqes\(aq, \(aq\(aq), (\(aqed\(aq, \(aqe\(aq), (\(aqed\(aq, \(aq\(aq), (\(aqing\(aq, \(aqe\(aq), (\(aqing\(aq, \(aq\(aq)]} +.UNINDENT +.INDENT 7.0 +.TP +.B NOUN = \(aqn\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B VERB = \(aqv\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B all_lemma_names(pos=None, lang=\(aqeng\(aq) +Return all lemma names for all synsets for the given +part of speech tag and language or languages. If pos is +not specified, all synsets for all parts of speech will +be used. +.UNINDENT +.INDENT 7.0 +.TP +.B all_synsets(pos=None) +Iterate over all synsets with a given part of speech tag. +If no pos is specified, all synsets for all parts of speech +will be loaded. +.UNINDENT +.INDENT 7.0 +.TP +.B citation(lang=\(aqomw\(aq) +Return the contents of citation.bib file (for omw) +use lang=lang to get the citation for an individual language +.UNINDENT +.INDENT 7.0 +.TP +.B custom_lemmas(tab_file, lang) +Reads a custom tab file containing mappings of lemmas in the given +language to Princeton WordNet 3.0 synset offsets, allowing NLTK\(aqs +WordNet functions to then be used with that language. +.sp +See the "Tab files" section at \fI\%http://compling.hss.ntu.edu.sg/omw/\fP for +documentation on the Multilingual WordNet tab file format. +.INDENT 7.0 +.TP +.B Parameters +\fBtab_file\fP \-\- Tab file as a file or file\-like object +.UNINDENT +.sp +:type lang str +:param lang ISO 639\-3 code of the language of the tab file +.UNINDENT +.INDENT 7.0 +.TP +.B digraph(inputs, rel=>, pos=None, maxdepth=\-1, shapes=None, attr=None, verbose=False) +Produce a graphical representation from \(aqinputs\(aq (a list of +start nodes, which can be a mix of Synsets, Lemmas and/or words), +and a synset relation, for drawing with the \(aqdot\(aq graph visualisation +program from the Graphviz package. +.sp +Return a string in the DOT graph file language, which can then be +converted to an image by nltk.parse.dependencygraph.dot2img(dot_string). +.sp +Optional Parameters: +:rel: Wordnet synset relation +:pos: for words, restricts Part of Speech to \(aqn\(aq, \(aqv\(aq, \(aqa\(aq or \(aqr\(aq +:maxdepth: limit the longest path +:shapes: dictionary of strings that trigger a specified shape +:attr: dictionary with global graph attributes +:verbose: warn about cycles +.sp +.nf +.ft C +>>> from nltk.corpus import wordnet as wn +>>> print(wn.digraph([wn.synset(\(aqdog.n.01\(aq)])) +digraph G { +"Synset(\(aqdog.n.01\(aq)" \-> "Synset(\(aqdomestic_animal.n.01\(aq)"; +"Synset(\(aqorganism.n.01\(aq)" \-> "Synset(\(aqliving_thing.n.01\(aq)"; +"Synset(\(aqmammal.n.01\(aq)" \-> "Synset(\(aqvertebrate.n.01\(aq)"; +"Synset(\(aqplacental.n.01\(aq)" \-> "Synset(\(aqmammal.n.01\(aq)"; +"Synset(\(aqanimal.n.01\(aq)" \-> "Synset(\(aqorganism.n.01\(aq)"; +"Synset(\(aqvertebrate.n.01\(aq)" \-> "Synset(\(aqchordate.n.01\(aq)"; +"Synset(\(aqchordate.n.01\(aq)" \-> "Synset(\(aqanimal.n.01\(aq)"; +"Synset(\(aqcanine.n.02\(aq)" \-> "Synset(\(aqcarnivore.n.01\(aq)"; +"Synset(\(aqliving_thing.n.01\(aq)" \-> "Synset(\(aqwhole.n.02\(aq)"; +"Synset(\(aqphysical_entity.n.01\(aq)" \-> "Synset(\(aqentity.n.01\(aq)"; +"Synset(\(aqcarnivore.n.01\(aq)" \-> "Synset(\(aqplacental.n.01\(aq)"; +"Synset(\(aqobject.n.01\(aq)" \-> "Synset(\(aqphysical_entity.n.01\(aq)"; +"Synset(\(aqwhole.n.02\(aq)" \-> "Synset(\(aqobject.n.01\(aq)"; +"Synset(\(aqdog.n.01\(aq)" \-> "Synset(\(aqcanine.n.02\(aq)"; +"Synset(\(aqdomestic_animal.n.01\(aq)" \-> "Synset(\(aqanimal.n.01\(aq)"; +} +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B get_version() +.UNINDENT +.INDENT 7.0 +.TP +.B ic(corpus, weight_senses_equally=False, smoothing=1.0) +Creates an information content lookup dictionary from a corpus. +.INDENT 7.0 +.TP +.B Parameters +\fBcorpus\fP (\fICorpusReader\fP) \-\- The corpus from which we create an information +.UNINDENT +.sp +content dictionary. +:type weight_senses_equally: bool +:param weight_senses_equally: If this is True, gives all +possible senses equal weight rather than dividing by the +number of possible senses. (If a word has 3 synses, each +sense gets 0.3333 per appearance when this is False, 1.0 when +it is true.) +:param smoothing: How much do we smooth synset counts (default is 1.0) +:type smoothing: float +:return: An information content dictionary +.UNINDENT +.INDENT 7.0 +.TP +.B jcn_similarity(synset1, synset2, ic, verbose=False) +Jiang\-Conrath Similarity: +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node) and that of the two input Synsets. The relationship is +given by the equation 1 / (IC(s1) + IC(s2) \- 2 * IC(lcs)). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBic\fP (\fIdict\fP) \-\- an information content object (as returned by +\fBnltk.corpus.wordnet_ic.ic()\fP). +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B langs() +return a list of languages supported by Multilingual Wordnet +.UNINDENT +.INDENT 7.0 +.TP +.B lch_similarity(synset1, synset2, verbose=False, simulate_root=True) +Leacock Chodorow Similarity: +Return a score denoting how similar two word senses are, based on the +shortest path that connects the senses (as above) and the maximum depth +of the taxonomy in which the senses occur. The relationship is given as +\-log(p/2d) where p is the shortest path length and d is the taxonomy +depth. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBsimulate_root\fP (\fIbool\fP) \-\- The various verb taxonomies do not +share a single root which disallows this metric from working for +synsets that are not connected. This flag (True by default) +creates a fake root that connects all the taxonomies. Set it +to false to disable this behavior. For the noun taxonomy, +there is usually a default root except for WordNet version 1.6. +If you are using wordnet 1.6, a fake root will be added for nouns +as well. +.UNINDENT +.TP +.B Returns +A score denoting the similarity of the two \fBSynset\fP objects, +normally greater than 0. None is returned if no connecting path +could be found. If a \fBSynset\fP is compared with itself, the +maximum score is returned, which varies depending on the taxonomy +depth. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lemma(name, lang=\(aqeng\(aq) +Return lemma object that matches the name +.UNINDENT +.INDENT 7.0 +.TP +.B lemma_count(lemma) +Return the frequency count for this Lemma +.UNINDENT +.INDENT 7.0 +.TP +.B lemma_from_key(key) +.UNINDENT +.INDENT 7.0 +.TP +.B lemmas(lemma, pos=None, lang=\(aqeng\(aq) +Return all Lemma objects with a name matching the specified lemma +name and part of speech tag. Matches any part of speech tag if none is +specified. +.UNINDENT +.INDENT 7.0 +.TP +.B license(lang=\(aqeng\(aq) +Return the contents of LICENSE (for omw) +use lang=lang to get the license for an individual language +.UNINDENT +.INDENT 7.0 +.TP +.B lin_similarity(synset1, synset2, ic, verbose=False) +Lin Similarity: +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node) and that of the two input Synsets. The relationship is +given by the equation 2 * IC(lcs) / (IC(s1) + IC(s2)). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBic\fP (\fIdict\fP) \-\- an information content object (as returned by +\fBnltk.corpus.wordnet_ic.ic()\fP). +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects, in the range 0 to 1. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B morphy(form, pos=None, check_exceptions=True) +Find a possible base form for the given form, with the given +part of speech, by checking WordNet\(aqs list of exceptional +forms, and by recursively stripping affixes for this part of +speech until a form in WordNet is found. +.sp +.nf +.ft C +>>> from nltk.corpus import wordnet as wn +>>> print(wn.morphy(\(aqdogs\(aq)) +dog +>>> print(wn.morphy(\(aqchurches\(aq)) +church +>>> print(wn.morphy(\(aqaardwolves\(aq)) +aardwolf +>>> print(wn.morphy(\(aqabaci\(aq)) +abacus +>>> wn.morphy(\(aqhardrock\(aq, wn.ADV) +>>> print(wn.morphy(\(aqbook\(aq, wn.NOUN)) +book +>>> wn.morphy(\(aqbook\(aq, wn.ADJ) +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B of2ss(of) +take an id and return the synsets +.UNINDENT +.INDENT 7.0 +.TP +.B path_similarity(synset1, synset2, verbose=False, simulate_root=True) +Path Distance Similarity: +Return a score denoting how similar two word senses are, based on the +shortest path that connects the senses in the is\-a (hypernym/hypnoym) +taxonomy. The score is in the range 0 to 1, except in those cases where +a path cannot be found (will only be true for verbs as there are many +distinct verb taxonomies), in which case None is returned. A score of +1 represents identity i.e. comparing a sense with itself will return 1. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBsimulate_root\fP (\fIbool\fP) \-\- The various verb taxonomies do not +share a single root which disallows this metric from working for +synsets that are not connected. This flag (True by default) +creates a fake root that connects all the taxonomies. Set it +to false to disable this behavior. For the noun taxonomy, +there is usually a default root except for WordNet version 1.6. +If you are using wordnet 1.6, a fake root will be added for nouns +as well. +.UNINDENT +.TP +.B Returns +A score denoting the similarity of the two \fBSynset\fP objects, +normally between 0 and 1. None is returned if no connecting path +could be found. 1 is returned if a \fBSynset\fP is compared with +itself. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B readme(lang=\(aqomw\(aq) +Return the contents of README (for omw) +use lang=lang to get the readme for an individual language +.UNINDENT +.INDENT 7.0 +.TP +.B res_similarity(synset1, synset2, ic, verbose=False) +Resnik Similarity: +Return a score denoting how similar two word senses are, based on the +Information Content (IC) of the Least Common Subsumer (most specific +ancestor node). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBic\fP (\fIdict\fP) \-\- an information content object (as returned by +\fBnltk.corpus.wordnet_ic.ic()\fP). +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects. Synsets whose LCS is the root node of the taxonomy will +have a score of 0 (e.g. N[\(aqdog\(aq][0] and N[\(aqtable\(aq][0]). +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B ss2of(ss, lang=None) +return the ID of the synset +.UNINDENT +.INDENT 7.0 +.TP +.B synset(name) +.UNINDENT +.INDENT 7.0 +.TP +.B synset_from_pos_and_offset(pos, offset) +.INDENT 7.0 +.IP \(bu 2 +pos: The synset\(aqs part of speech, matching one of the module level +attributes ADJ, ADJ_SAT, ADV, NOUN or VERB (\(aqa\(aq, \(aqs\(aq, \(aqr\(aq, \(aqn\(aq, or \(aqv\(aq). +.IP \(bu 2 +offset: The byte offset of this synset in the WordNet dict file +for this pos. +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.corpus import wordnet as wn +>>> print(wn.synset_from_pos_and_offset(\(aqn\(aq, 1740)) +Synset(\(aqentity.n.01\(aq) +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B synset_from_sense_key(sense_key) +Retrieves synset based on a given sense_key. Sense keys can be +obtained from lemma.key() +.sp +From \fI\%https://wordnet.princeton.edu/documentation/senseidx5wn\fP: +A sense_key is represented as: +.INDENT 7.0 +.INDENT 3.5 +lemma % lex_sense (e.g. \(aqdog%1:18:01::\(aq) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B where lex_sense is encoded as: +ss_type:lex_filenum:lex_id:head_word:head_id +.UNINDENT +.sp +lemma: ASCII text of word/collocation, in lower case +ss_type: synset type for the sense (1 digit int) +.INDENT 7.0 +.INDENT 3.5 +The synset type is encoded as follows: +1 NOUN +2 VERB +3 ADJECTIVE +4 ADVERB +5 ADJECTIVE SATELLITE +.UNINDENT +.UNINDENT +.sp +lex_filenum: name of lexicographer file containing the synset for the sense (2 digit int) +lex_id: when paired with lemma, uniquely identifies a sense in the lexicographer file (2 digit int) +head_word: lemma of the first word in satellite\(aqs head synset +.INDENT 7.0 +.INDENT 3.5 +Only used if sense is in an adjective satellite synset +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B head_id: uniquely identifies sense in a lexicographer file when paired with head_word +Only used if head_word is present (2 digit int) +.UNINDENT +.sp +.nf +.ft C +>>> import nltk +>>> from nltk.corpus import wordnet as wn +>>> print(wn.synset_from_sense_key("drive%1:04:03::")) +Synset(\(aqdrive.n.06\(aq) +.ft P +.fi +.sp +.nf +.ft C +>>> print(wn.synset_from_sense_key("driving%1:04:03::")) +Synset(\(aqdrive.n.06\(aq) +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B synsets(lemma, pos=None, lang=\(aqeng\(aq, check_exceptions=True) +Load all synsets with a given lemma and part of speech tag. +If no pos is specified, all synsets for all parts of speech +will be loaded. +If lang is specified, all the synsets associated with the lemma name +of that language will be returned. +.UNINDENT +.INDENT 7.0 +.TP +.B words(lang=\(aqeng\(aq) +return lemmas of the given language as list of words +.UNINDENT +.INDENT 7.0 +.TP +.B wup_similarity(synset1, synset2, verbose=False, simulate_root=True) +Wu\-Palmer Similarity: +Return a score denoting how similar two word senses are, based on the +depth of the two senses in the taxonomy and that of their Least Common +Subsumer (most specific ancestor node). Previously, the scores computed +by this implementation did _not_ always agree with those given by +Pedersen\(aqs Perl implementation of WordNet Similarity. However, with +the addition of the simulate_root flag (see below), the score for +verbs now almost always agree but not always for nouns. +.sp +The LCS does not necessarily feature in the shortest path connecting +the two senses, as it is by definition the common ancestor deepest in +the taxonomy, not closest to the two senses. Typically, however, it +will so feature. Where multiple candidates for the LCS exist, that +whose shortest path to the root node is the longest will be selected. +Where the LCS has multiple paths to the root, the longer path is used +for the purposes of the calculation. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP (\fISynset\fP) \-\- The \fBSynset\fP that this \fBSynset\fP is being compared to. +.IP \(bu 2 +\fBsimulate_root\fP (\fIbool\fP) \-\- The various verb taxonomies do not +share a single root which disallows this metric from working for +synsets that are not connected. This flag (True by default) +creates a fake root that connects all the taxonomies. Set it +to false to disable this behavior. For the noun taxonomy, +there is usually a default root except for WordNet version 1.6. +If you are using wordnet 1.6, a fake root will be added for nouns +as well. +.UNINDENT +.TP +.B Returns +A float score denoting the similarity of the two \fBSynset\fP +objects, normally greater than zero. If no connecting path between +the two senses can be found, None is returned. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.WordNetICCorpusReader(root, fileids) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +A corpus reader for the WordNet information content corpus. +.INDENT 7.0 +.TP +.B ic(icfile) +Load an information content file from the wordnet_ic corpus +and return a dictionary. This dictionary has just two keys, +NOUN and VERB, whose values are dictionaries that map from +synsets to information content values. +.INDENT 7.0 +.TP +.B Parameters +\fBicfile\fP (\fIstr\fP) \-\- The name of the wordnet_ic file (e.g. "ic\-brown.dat") +.TP +.B Returns +An information content dictionary +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.XMLCorpusReader(root, fileids, wrap_etree=False) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Corpus reader for corpora whose documents are xml files. +.sp +Note that the \fBXMLCorpusReader\fP constructor does not take an +\fBencoding\fP argument, because the unicode encoding is specified by +the XML files themselves. See the XML specs for more info. +.INDENT 7.0 +.TP +.B words(fileid=None) +Returns all of the words and punctuation symbols in the specified file +that were in text nodes \-\- ie, tags are ignored. Like the xml() method, +fileid can only specify one file. +.INDENT 7.0 +.TP +.B Returns +the given file\(aqs text nodes as a list of words and punctuation symbols +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B xml(fileid=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.corpus.reader.YCOECorpusReader(root, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.corpus.reader.api.CorpusReader\fP +.sp +Corpus reader for the York\-Toronto\-Helsinki Parsed Corpus of Old +English Prose (YCOE), a 1.5 million word syntactically\-annotated +corpus of Old English prose texts. +.INDENT 7.0 +.TP +.B documents(fileids=None) +Return a list of document identifiers for all documents in +this corpus, or for the documents with the given file(s) if +specified. +.UNINDENT +.INDENT 7.0 +.TP +.B fileids(documents=None) +Return a list of file identifiers for the files that make up +this corpus, or that store the given document(s) if specified. +.UNINDENT +.INDENT 7.0 +.TP +.B paras(documents=None) +.UNINDENT +.INDENT 7.0 +.TP +.B parsed_sents(documents=None) +.UNINDENT +.INDENT 7.0 +.TP +.B sents(documents=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_paras(documents=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_sents(documents=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_words(documents=None) +.UNINDENT +.INDENT 7.0 +.TP +.B words(documents=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.find_corpus_fileids(root, regexp) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.corpus.reader.tagged_treebank_para_block_reader(stream) +.UNINDENT +.SS Submodules +.SS nltk.corpus.europarl_raw module +.SS nltk.corpus.util module +.INDENT 0.0 +.TP +.B class nltk.corpus.util.LazyCorpusLoader(name, reader_cls, *args, **kwargs) +Bases: \fBobject\fP +.sp +To see the API documentation for this lazily loaded corpus, first +run corpus.ensure_loaded(), and then run help(this_corpus). +.sp +LazyCorpusLoader is a proxy object which is used to stand in for a +corpus object before the corpus is loaded. This allows NLTK to +create an object for each corpus, but defer the costs associated +with loading those corpora until the first time that they\(aqre +actually accessed. +.sp +The first time this object is accessed in any way, it will load +the corresponding corpus, and transform itself into that corpus +(by modifying its own \fB__class__\fP and \fB__dict__\fP attributes). +.sp +If the corpus can not be found, then accessing this object will +raise an exception, displaying installation instructions for the +NLTK data package. Once they\(aqve properly installed the data +package (or modified \fBnltk.data.path\fP to point to its location), +they can then use the corpus object without restarting python. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBname\fP (\fIstr\fP) \-\- The name of the corpus +.IP \(bu 2 +\fBreader_cls\fP \-\- The specific CorpusReader class, e.g. PlaintextCorpusReader, WordListCorpusReader +.IP \(bu 2 +\fBnltk_data_subdir\fP (\fIstr\fP) \-\- The subdirectory where the corpus is stored. +.IP \(bu 2 +\fB*args\fP \-\- +.sp +Any other non\-keywords arguments that \fIreader_cls\fP might need. + +.IP \(bu 2 +\fB*kargs\fP \-\- +.sp +Any other keywords arguments that \fIreader_cls\fP might need. + +.UNINDENT +.UNINDENT +.UNINDENT +.SS Module contents +.sp +NLTK corpus readers. The modules in this package provide functions +that can be used to read corpus files in a variety of formats. These +functions can be used to read both the corpus files that are +distributed in the NLTK corpus package, and corpus files that are part +of external corpora. +.SS Available Corpora +.sp +Please see \fI\%http://www.nltk.org/nltk_data/\fP for a complete list. +Install corpora using nltk.download(). +.SS Corpus Reader Functions +.sp +Each corpus module defines one or more "corpus reader functions", +which can be used to read documents from that corpus. These functions +take an argument, \fBitem\fP, which is used to indicate which document +should be read from the corpus: +.INDENT 0.0 +.IP \(bu 2 +If \fBitem\fP is one of the unique identifiers listed in the corpus +module\(aqs \fBitems\fP variable, then the corresponding document will +be loaded from the NLTK corpus package. +.IP \(bu 2 +If \fBitem\fP is a filename, then that file will be read. +.UNINDENT +.sp +Additionally, corpus reader functions can be given lists of item +names; in which case, they will return a concatenation of the +corresponding documents. +.sp +Corpus reader functions are named based on the type of information +they return. Some common examples, and their return types, are: +.INDENT 0.0 +.IP \(bu 2 +words(): list of str +.IP \(bu 2 +sents(): list of (list of str) +.IP \(bu 2 +paras(): list of (list of (list of str)) +.IP \(bu 2 +tagged_words(): list of (str,str) tuple +.IP \(bu 2 +tagged_sents(): list of (list of (str,str)) +.IP \(bu 2 +tagged_paras(): list of (list of (list of (str,str))) +.IP \(bu 2 +chunked_sents(): list of (Tree w/ (str,str) leaves) +.IP \(bu 2 +parsed_sents(): list of (Tree with str leaves) +.IP \(bu 2 +parsed_paras(): list of (list of (Tree with str leaves)) +.IP \(bu 2 +xml(): A single xml ElementTree +.IP \(bu 2 +raw(): unprocessed corpus contents +.UNINDENT +.sp +For example, to read a list of the words in the Brown Corpus, use +\fBnltk.corpus.brown.words()\fP: +.sp +.nf +.ft C +>>> from nltk.corpus import brown +>>> print(", ".join(brown.words())) +The, Fulton, County, Grand, Jury, said, ... +.ft P +.fi +.INDENT 0.0 +.TP +.B nltk.corpus.demo() +.UNINDENT +.SS nltk.draw package +.SS Submodules +.SS nltk.draw.cfg module +.SS nltk.draw.dispersion module +.sp +A utility for displaying lexical dispersion. +.INDENT 0.0 +.TP +.B nltk.draw.dispersion.dispersion_plot(text, words, ignore_case=False, title=\(aqLexical Dispersion Plot\(aq) +Generate a lexical dispersion plot. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtext\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI) or \fP\fIenum\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The source text +.IP \(bu 2 +\fBwords\fP (\fIlist of str\fP) \-\- The target words +.IP \(bu 2 +\fBignore_case\fP (\fIbool\fP) \-\- flag to set if case should be ignored when searching text +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.draw.table module +.SS nltk.draw.tree module +.SS nltk.draw.util module +.SS Module contents +.SS nltk.inference package +.SS Submodules +.SS nltk.inference.api module +.sp +Interfaces and base classes for theorem provers and model builders. +.sp +\fBProver\fP is a standard interface for a theorem prover which tries to prove a goal from a +list of assumptions. +.sp +\fBModelBuilder\fP is a standard interface for a model builder. Given just a set of assumptions. +the model builder tries to build a model for the assumptions. Given a set of assumptions and a +goal \fIG\fP, the model builder tries to find a counter\-model, in the sense of a model that will satisfy +the assumptions plus the negation of \fIG\fP\&. +.INDENT 0.0 +.TP +.B class nltk.inference.api.BaseModelBuilderCommand(modelbuilder, goal=None, assumptions=None) +Bases: \fI\%nltk.inference.api.BaseTheoremToolCommand\fP, \fI\%nltk.inference.api.ModelBuilderCommand\fP +.sp +This class holds a \fBModelBuilder\fP, a goal, and a list of assumptions. When +build_model() is called, the \fBModelBuilder\fP is executed with the goal and +assumptions. +.INDENT 7.0 +.TP +.B build_model(verbose=False) +Attempt to build a model. Store the result to prevent unnecessary +re\-building. +.UNINDENT +.INDENT 7.0 +.TP +.B get_model_builder() +Return the model builder object +:return: \fBModelBuilder\fP +.UNINDENT +.INDENT 7.0 +.TP +.B model(format=None) +Return a string representation of the model +.INDENT 7.0 +.TP +.B Parameters +\fBsimplify\fP \-\- bool simplify the proof? +.TP +.B Returns +str +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.api.BaseProverCommand(prover, goal=None, assumptions=None) +Bases: \fI\%nltk.inference.api.BaseTheoremToolCommand\fP, \fI\%nltk.inference.api.ProverCommand\fP +.sp +This class holds a \fBProver\fP, a goal, and a list of assumptions. When +prove() is called, the \fBProver\fP is executed with the goal and assumptions. +.INDENT 7.0 +.TP +.B decorate_proof(proof_string, simplify=True) +Modify and return the proof string +:param proof_string: str the proof to decorate +:param simplify: bool simplify the proof? +:return: str +.UNINDENT +.INDENT 7.0 +.TP +.B get_prover() +Return the prover object +:return: \fBProver\fP +.UNINDENT +.INDENT 7.0 +.TP +.B proof(simplify=True) +Return the proof string +:param simplify: bool simplify the proof? +:return: str +.UNINDENT +.INDENT 7.0 +.TP +.B prove(verbose=False) +Perform the actual proof. Store the result to prevent unnecessary +re\-proving. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.api.BaseTheoremToolCommand(goal=None, assumptions=None) +Bases: \fI\%nltk.inference.api.TheoremToolCommand\fP +.sp +This class holds a goal and a list of assumptions to be used in proving +or model building. +.INDENT 7.0 +.TP +.B add_assumptions(new_assumptions) +Add new assumptions to the assumption list. +.INDENT 7.0 +.TP +.B Parameters +\fBnew_assumptions\fP (\fIlist\fP\fI(\fP\fIsem.Expression\fP\fI)\fP) \-\- new assumptions +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B assumptions() +List the current assumptions. +.INDENT 7.0 +.TP +.B Returns +list of \fBExpression\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B goal() +Return the goal +.INDENT 7.0 +.TP +.B Returns +\fBExpression\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B print_assumptions() +Print the list of the current assumptions. +.UNINDENT +.INDENT 7.0 +.TP +.B retract_assumptions(retracted, debug=False) +Retract assumptions from the assumption list. +.INDENT 7.0 +.TP +.B Parameters +\fBdebug\fP \-\- If True, give warning when \fBretracted\fP is not present on +.UNINDENT +.sp +assumptions list. +:type debug: bool +:param retracted: assumptions to be retracted +:type retracted: list(sem.Expression) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.api.ModelBuilder +Bases: \fBobject\fP +.sp +Interface for trying to build a model of set of formulas. +Open formulas are assumed to be universally quantified. +Both the goal and the assumptions are constrained to be formulas +of \fBlogic.Expression\fP\&. +.INDENT 7.0 +.TP +.B build_model(goal=None, assumptions=None, verbose=False) +Perform the actual model building. +:return: Whether a model was generated +:rtype: bool +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.api.ModelBuilderCommand +Bases: \fI\%nltk.inference.api.TheoremToolCommand\fP +.sp +This class holds a \fBModelBuilder\fP, a goal, and a list of assumptions. +When build_model() is called, the \fBModelBuilder\fP is executed with the goal +and assumptions. +.INDENT 7.0 +.TP +.B abstract build_model(verbose=False) +Perform the actual model building. +:return: A model if one is generated; None otherwise. +:rtype: sem.Valuation +.UNINDENT +.INDENT 7.0 +.TP +.B abstract get_model_builder() +Return the model builder object +:return: \fBModelBuilder\fP +.UNINDENT +.INDENT 7.0 +.TP +.B abstract model(format=None) +Return a string representation of the model +.INDENT 7.0 +.TP +.B Parameters +\fBsimplify\fP \-\- bool simplify the proof? +.TP +.B Returns +str +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.api.ModelBuilderCommandDecorator(modelBuilderCommand) +Bases: \fI\%nltk.inference.api.TheoremToolCommandDecorator\fP, \fI\%nltk.inference.api.ModelBuilderCommand\fP +.sp +A base decorator for the \fBModelBuilderCommand\fP class from which other +prover command decorators can extend. +.INDENT 7.0 +.TP +.B build_model(verbose=False) +Attempt to build a model. Store the result to prevent unnecessary +re\-building. +.UNINDENT +.INDENT 7.0 +.TP +.B get_model_builder() +Return the model builder object +:return: \fBModelBuilder\fP +.UNINDENT +.INDENT 7.0 +.TP +.B model(format=None) +Return a string representation of the model +.INDENT 7.0 +.TP +.B Parameters +\fBsimplify\fP \-\- bool simplify the proof? +.TP +.B Returns +str +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.api.ParallelProverBuilder(prover, modelbuilder) +Bases: \fI\%nltk.inference.api.Prover\fP, \fI\%nltk.inference.api.ModelBuilder\fP +.sp +This class stores both a prover and a model builder and when either +prove() or build_model() is called, then both theorem tools are run in +parallel. Whichever finishes first, the prover or the model builder, is the +result that will be used. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.api.ParallelProverBuilderCommand(prover, modelbuilder, goal=None, assumptions=None) +Bases: \fI\%nltk.inference.api.BaseProverCommand\fP, \fI\%nltk.inference.api.BaseModelBuilderCommand\fP +.sp +This command stores both a prover and a model builder and when either +prove() or build_model() is called, then both theorem tools are run in +parallel. Whichever finishes first, the prover or the model builder, is the +result that will be used. +.sp +Because the theorem prover result is the opposite of the model builder +result, we will treat self._result as meaning "proof found/no model found". +.INDENT 7.0 +.TP +.B build_model(verbose=False) +Attempt to build a model. Store the result to prevent unnecessary +re\-building. +.UNINDENT +.INDENT 7.0 +.TP +.B prove(verbose=False) +Perform the actual proof. Store the result to prevent unnecessary +re\-proving. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.api.Prover +Bases: \fBobject\fP +.sp +Interface for trying to prove a goal from assumptions. Both the goal and +the assumptions are constrained to be formulas of \fBlogic.Expression\fP\&. +.INDENT 7.0 +.TP +.B prove(goal=None, assumptions=None, verbose=False) +.INDENT 7.0 +.TP +.B Returns +Whether the proof was successful or not. +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.api.ProverCommand +Bases: \fI\%nltk.inference.api.TheoremToolCommand\fP +.sp +This class holds a \fBProver\fP, a goal, and a list of assumptions. When +prove() is called, the \fBProver\fP is executed with the goal and assumptions. +.INDENT 7.0 +.TP +.B abstract get_prover() +Return the prover object +:return: \fBProver\fP +.UNINDENT +.INDENT 7.0 +.TP +.B abstract proof(simplify=True) +Return the proof string +:param simplify: bool simplify the proof? +:return: str +.UNINDENT +.INDENT 7.0 +.TP +.B abstract prove(verbose=False) +Perform the actual proof. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.api.ProverCommandDecorator(proverCommand) +Bases: \fI\%nltk.inference.api.TheoremToolCommandDecorator\fP, \fI\%nltk.inference.api.ProverCommand\fP +.sp +A base decorator for the \fBProverCommand\fP class from which other +prover command decorators can extend. +.INDENT 7.0 +.TP +.B decorate_proof(proof_string, simplify=True) +Modify and return the proof string +:param proof_string: str the proof to decorate +:param simplify: bool simplify the proof? +:return: str +.UNINDENT +.INDENT 7.0 +.TP +.B get_prover() +Return the prover object +:return: \fBProver\fP +.UNINDENT +.INDENT 7.0 +.TP +.B proof(simplify=True) +Return the proof string +:param simplify: bool simplify the proof? +:return: str +.UNINDENT +.INDENT 7.0 +.TP +.B prove(verbose=False) +Perform the actual proof. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.api.TheoremToolCommand +Bases: \fBobject\fP +.sp +This class holds a goal and a list of assumptions to be used in proving +or model building. +.INDENT 7.0 +.TP +.B abstract add_assumptions(new_assumptions) +Add new assumptions to the assumption list. +.INDENT 7.0 +.TP +.B Parameters +\fBnew_assumptions\fP (\fIlist\fP\fI(\fP\fIsem.Expression\fP\fI)\fP) \-\- new assumptions +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract assumptions() +List the current assumptions. +.INDENT 7.0 +.TP +.B Returns +list of \fBExpression\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract goal() +Return the goal +.INDENT 7.0 +.TP +.B Returns +\fBExpression\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract print_assumptions() +Print the list of the current assumptions. +.UNINDENT +.INDENT 7.0 +.TP +.B abstract retract_assumptions(retracted, debug=False) +Retract assumptions from the assumption list. +.INDENT 7.0 +.TP +.B Parameters +\fBdebug\fP \-\- If True, give warning when \fBretracted\fP is not present on +.UNINDENT +.sp +assumptions list. +:type debug: bool +:param retracted: assumptions to be retracted +:type retracted: list(sem.Expression) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.api.TheoremToolCommandDecorator(command) +Bases: \fI\%nltk.inference.api.TheoremToolCommand\fP +.sp +A base decorator for the \fBProverCommandDecorator\fP and +\fBModelBuilderCommandDecorator\fP classes from which decorators can extend. +.INDENT 7.0 +.TP +.B add_assumptions(new_assumptions) +Add new assumptions to the assumption list. +.INDENT 7.0 +.TP +.B Parameters +\fBnew_assumptions\fP (\fIlist\fP\fI(\fP\fIsem.Expression\fP\fI)\fP) \-\- new assumptions +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B assumptions() +List the current assumptions. +.INDENT 7.0 +.TP +.B Returns +list of \fBExpression\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B goal() +Return the goal +.INDENT 7.0 +.TP +.B Returns +\fBExpression\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B print_assumptions() +Print the list of the current assumptions. +.UNINDENT +.INDENT 7.0 +.TP +.B retract_assumptions(retracted, debug=False) +Retract assumptions from the assumption list. +.INDENT 7.0 +.TP +.B Parameters +\fBdebug\fP \-\- If True, give warning when \fBretracted\fP is not present on +.UNINDENT +.sp +assumptions list. +:type debug: bool +:param retracted: assumptions to be retracted +:type retracted: list(sem.Expression) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.api.TheoremToolThread(command, verbose, name=None) +Bases: \fBthreading.Thread\fP +.INDENT 7.0 +.TP +.B property result +.UNINDENT +.INDENT 7.0 +.TP +.B run() +Method representing the thread\(aqs activity. +.sp +You may override this method in a subclass. The standard run() method +invokes the callable object passed to the object\(aqs constructor as the +target argument, if any, with sequential and keyword arguments taken +from the args and kwargs arguments, respectively. +.UNINDENT +.UNINDENT +.SS nltk.inference.discourse module +.sp +Module for incrementally developing simple discourses, and checking for semantic ambiguity, +consistency and informativeness. +.sp +Many of the ideas are based on the CURT family of programs of Blackburn and Bos +(see \fI\%http://homepages.inf.ed.ac.uk/jbos/comsem/book1.html\fP). +.sp +Consistency checking is carried out by using the \fBmace\fP module to call the Mace4 model builder. +Informativeness checking is carried out with a call to \fBProver.prove()\fP from +the \fBinference\fP module. +.sp +\fBDiscourseTester\fP is a constructor for discourses. +The basic data structure is a list of sentences, stored as \fBself._sentences\fP\&. Each sentence in the list +is assigned a "sentence ID" (\fBsid\fP) of the form \fBs\fP\fIi\fP\&. For example: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +s0: A boxer walks +s1: Every boxer chases a girl +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Each sentence can be ambiguous between a number of readings, each of which receives a +"reading ID" (\fBrid\fP) of the form \fBs\fP\fIi\fP \-\fBr\fP\fIj\fP\&. For example: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +s0 readings: + +s0\-r1: some x.(boxer(x) & walk(x)) +s0\-r0: some x.(boxerdog(x) & walk(x)) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +A "thread" is a list of readings, represented as a list of \fBrid\fPs. +Each thread receives a "thread ID" (\fBtid\fP) of the form \fBd\fP\fIi\fP\&. +For example: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +d0: [\(aqs0\-r0\(aq, \(aqs1\-r0\(aq] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The set of all threads for a discourse is the Cartesian product of all the readings of the sequences of sentences. +(This is not intended to scale beyond very short discourses!) The method \fBreadings(filter=True)\fP will only show +those threads which are consistent (taking into account any background assumptions). +.INDENT 0.0 +.TP +.B class nltk.inference.discourse.CfgReadingCommand(gramfile=None) +Bases: \fI\%nltk.inference.discourse.ReadingCommand\fP +.INDENT 7.0 +.TP +.B combine_readings(readings) +.INDENT 7.0 +.TP +.B See +ReadingCommand.combine_readings() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse_to_readings(sentence) +.INDENT 7.0 +.TP +.B See +ReadingCommand.parse_to_readings() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B to_fol(expression) +.INDENT 7.0 +.TP +.B See +ReadingCommand.to_fol() +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.discourse.DiscourseTester(input, reading_command=None, background=None) +Bases: \fBobject\fP +.sp +Check properties of an ongoing discourse. +.INDENT 7.0 +.TP +.B add_background(background, verbose=False) +Add a list of background assumptions for reasoning about the discourse. +.sp +When called, this method also updates the discourse model\(aqs set of readings and threads. +:param background: Formulas which contain background information +:type background: list(Expression) +.UNINDENT +.INDENT 7.0 +.TP +.B add_sentence(sentence, informchk=False, consistchk=False) +Add a sentence to the current discourse. +.sp +Updates \fBself._input\fP and \fBself._sentences\fP\&. +:param sentence: An input sentence +:type sentence: str +:param informchk: if \fBTrue\fP, check that the result of adding the sentence is thread\-informative. Updates \fBself._readings\fP\&. +:param consistchk: if \fBTrue\fP, check that the result of adding the sentence is thread\-consistent. Updates \fBself._readings\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B background() +Show the current background assumptions. +.UNINDENT +.INDENT 7.0 +.TP +.B expand_threads(thread_id, threads=None) +Given a thread ID, find the list of \fBlogic.Expression\fP objects corresponding to the reading IDs in that thread. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBthread_id\fP (\fIstr\fP) \-\- thread ID +.IP \(bu 2 +\fBthreads\fP (\fIdict\fP) \-\- a mapping from thread IDs to lists of reading IDs +.UNINDENT +.TP +.B Returns +A list of pairs \fB(rid, reading)\fP where reading is the \fBlogic.Expression\fP associated with a reading ID +.TP +.B Return type +list of tuple +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B grammar() +Print out the grammar in use for parsing input sentences +.UNINDENT +.INDENT 7.0 +.TP +.B models(thread_id=None, show=True, verbose=False) +Call Mace4 to build a model for each current discourse thread. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBthread_id\fP (\fIstr\fP) \-\- thread ID +.IP \(bu 2 +\fBshow\fP \-\- If \fBTrue\fP, display the model that has been found. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B static multiply(discourse, readings) +Multiply every thread in \fBdiscourse\fP by every reading in \fBreadings\fP\&. +.sp +Given discourse = [[\(aqA\(aq], [\(aqB\(aq]], readings = [\(aqa\(aq, \(aqb\(aq, \(aqc\(aq] , returns +[[\(aqA\(aq, \(aqa\(aq], [\(aqA\(aq, \(aqb\(aq], [\(aqA\(aq, \(aqc\(aq], [\(aqB\(aq, \(aqa\(aq], [\(aqB\(aq, \(aqb\(aq], [\(aqB\(aq, \(aqc\(aq]] +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBdiscourse\fP (\fIlist of lists\fP) \-\- the current list of readings +.IP \(bu 2 +\fBreadings\fP (\fIlist\fP\fI(\fP\fIExpression\fP\fI)\fP) \-\- an additional list of readings +.UNINDENT +.TP +.B Return type +A list of lists +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B readings(sentence=None, threaded=False, verbose=True, filter=False, show_thread_readings=False) +Construct and show the readings of the discourse (or of a single sentence). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsentence\fP (\fIstr\fP) \-\- test just this sentence +.IP \(bu 2 +\fBthreaded\fP \-\- if \fBTrue\fP, print out each thread ID and the corresponding thread. +.IP \(bu 2 +\fBfilter\fP \-\- if \fBTrue\fP, only print out consistent thread IDs and threads. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B retract_sentence(sentence, verbose=True) +Remove a sentence from the current discourse. +.sp +Updates \fBself._input\fP, \fBself._sentences\fP and \fBself._readings\fP\&. +:param sentence: An input sentence +:type sentence: str +:param verbose: If \fBTrue\fP, report on the updated list of sentences. +.UNINDENT +.INDENT 7.0 +.TP +.B sentences() +Display the list of sentences in the current discourse. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.discourse.DrtGlueReadingCommand(semtype_file=None, remove_duplicates=False, depparser=None) +Bases: \fI\%nltk.inference.discourse.ReadingCommand\fP +.INDENT 7.0 +.TP +.B combine_readings(readings) +.INDENT 7.0 +.TP +.B See +ReadingCommand.combine_readings() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse_to_readings(sentence) +.INDENT 7.0 +.TP +.B See +ReadingCommand.parse_to_readings() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B process_thread(sentence_readings) +.INDENT 7.0 +.TP +.B See +ReadingCommand.process_thread() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B to_fol(expression) +.INDENT 7.0 +.TP +.B See +ReadingCommand.to_fol() +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.discourse.ReadingCommand +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B abstract combine_readings(readings) +.INDENT 7.0 +.TP +.B Parameters +\fBreadings\fP (\fIlist\fP\fI(\fP\fIExpression\fP\fI)\fP) \-\- readings to combine +.TP +.B Returns +one combined reading +.TP +.B Return type +Expression +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract parse_to_readings(sentence) +.INDENT 7.0 +.TP +.B Parameters +\fBsentence\fP (\fIstr\fP) \-\- the sentence to read +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B process_thread(sentence_readings) +This method should be used to handle dependencies between readings such +as resolving anaphora. +.INDENT 7.0 +.TP +.B Parameters +\fBsentence_readings\fP (\fIlist\fP\fI(\fP\fIExpression\fP\fI)\fP) \-\- readings to process +.TP +.B Returns +the list of readings after processing +.TP +.B Return type +list(Expression) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract to_fol(expression) +Convert this expression into a First\-Order Logic expression. +.INDENT 7.0 +.TP +.B Parameters +\fBexpression\fP (\fIExpression\fP) \-\- an expression +.TP +.B Returns +a FOL version of the input expression +.TP +.B Return type +Expression +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.discourse.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.discourse.discourse_demo(reading_command=None) +Illustrate the various methods of \fBDiscourseTester\fP +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.discourse.drt_discourse_demo(reading_command=None) +Illustrate the various methods of \fBDiscourseTester\fP +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.discourse.load_fol(s) +Temporarily duplicated from \fBnltk.sem.util\fP\&. +Convert a file of first order formulas into a list of \fBExpression\fP objects. +.INDENT 7.0 +.TP +.B Parameters +\fBs\fP (\fIstr\fP) \-\- the contents of the file +.TP +.B Returns +a list of parsed formulas. +.TP +.B Return type +list(Expression) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.discourse.spacer(num=30) +.UNINDENT +.SS nltk.inference.mace module +.sp +A model builder that makes use of the external \(aqMace4\(aq package. +.INDENT 0.0 +.TP +.B class nltk.inference.mace.Mace(end_size=500) +Bases: \fI\%nltk.inference.prover9.Prover9Parent\fP, \fI\%nltk.inference.api.ModelBuilder\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.mace.MaceCommand(goal=None, assumptions=None, max_models=500, model_builder=None) +Bases: \fI\%nltk.inference.prover9.Prover9CommandParent\fP, \fI\%nltk.inference.api.BaseModelBuilderCommand\fP +.sp +A \fBMaceCommand\fP specific to the \fBMace\fP model builder. It contains +a print_assumptions() method that is used to print the list +of assumptions in multiple formats. +.INDENT 7.0 +.TP +.B property valuation +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.mace.decode_result(found) +Decode the result of model_found() +.INDENT 7.0 +.TP +.B Parameters +\fBfound\fP (\fIbool\fP) \-\- The output of model_found() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.mace.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.mace.spacer(num=30) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.mace.test_build_model(arguments) +Try to build a \fBnltk.sem.Valuation\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.mace.test_make_relation_set() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.mace.test_model_found(arguments) +Try some proofs and exhibit the results. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.mace.test_transform_output(argument_pair) +Transform the model into various Mace4 \fBinterpformat\fP formats. +.UNINDENT +.SS nltk.inference.nonmonotonic module +.sp +A module to perform nonmonotonic reasoning. The ideas and demonstrations in +this module are based on "Logical Foundations of Artificial Intelligence" by +Michael R. Genesereth and Nils J. Nilsson. +.INDENT 0.0 +.TP +.B class nltk.inference.nonmonotonic.ClosedDomainProver(proverCommand) +Bases: \fI\%nltk.inference.api.ProverCommandDecorator\fP +.sp +This is a prover decorator that adds domain closure assumptions before +proving. +.INDENT 7.0 +.TP +.B assumptions() +List the current assumptions. +.INDENT 7.0 +.TP +.B Returns +list of \fBExpression\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B goal() +Return the goal +.INDENT 7.0 +.TP +.B Returns +\fBExpression\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B replace_quants(ex, domain) +.INDENT 7.0 +.TP +.B Apply the closed domain assumption to the expression +.INDENT 7.0 +.IP \(bu 2 +Domain = union([e.free()|e.constants() for e in all_expressions]) +.IP \(bu 2 +.INDENT 2.0 +.TP +.B translate "exists x.P" to "(z=d1 | z=d2 | ... ) & P.replace(x,z)" OR +"P.replace(x, d1) | P.replace(x, d2) | ..." +.UNINDENT +.IP \(bu 2 +translate "all x.P" to "P.replace(x, d1) & P.replace(x, d2) & ..." +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBex\fP \-\- \fBExpression\fP +.IP \(bu 2 +\fBdomain\fP \-\- set of {Variable}s +.UNINDENT +.TP +.B Returns +\fBExpression\fP +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.nonmonotonic.ClosedWorldProver(proverCommand) +Bases: \fI\%nltk.inference.api.ProverCommandDecorator\fP +.sp +This is a prover decorator that completes predicates before proving. +.sp +If the assumptions contain "P(A)", then "all x.(P(x) \-> (x=A))" is the completion of "P". +If the assumptions contain "all x.(ostrich(x) \-> bird(x))", then "all x.(bird(x) \-> ostrich(x))" is the completion of "bird". +If the assumptions don\(aqt contain anything that are "P", then "all x.\-P(x)" is the completion of "P". +.sp +walk(Socrates) +Socrates != Bill ++ all x.(walk(x) \-> (x=Socrates)) +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- +\-walk(Bill) +.sp +see(Socrates, John) +see(John, Mary) +Socrates != John +John != Mary ++ all x.all y.(see(x,y) \-> ((x=Socrates & y=John) | (x=John & y=Mary))) +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- +\-see(Socrates, Mary) +.sp +all x.(ostrich(x) \-> bird(x)) +bird(Tweety) +\-ostrich(Sam) +Sam != Tweety ++ all x.(bird(x) \-> (ostrich(x) | x=Tweety)) ++ all x.\-ostrich(x) +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- +\-bird(Sam) +.INDENT 7.0 +.TP +.B assumptions() +List the current assumptions. +.INDENT 7.0 +.TP +.B Returns +list of \fBExpression\fP +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.nonmonotonic.PredHolder +Bases: \fBobject\fP +.sp +This class will be used by a dictionary that will store information +about predicates to be used by the \fBClosedWorldProver\fP\&. +.sp +The \(aqsignatures\(aq property is a list of tuples defining signatures for +which the predicate is true. For instance, \(aqsee(john, mary)\(aq would be +result in the signature \(aq(john,mary)\(aq for \(aqsee\(aq. +.sp +The second element of the pair is a list of pairs such that the first +element of the pair is a tuple of variables and the second element is an +expression of those variables that makes the predicate true. For instance, +\(aqall x.all y.(see(x,y) \-> know(x,y))\(aq would result in "((x,y),(\(aqsee(x,y)\(aq))" +for \(aqknow\(aq. +.INDENT 7.0 +.TP +.B append_prop(new_prop) +.UNINDENT +.INDENT 7.0 +.TP +.B append_sig(new_sig) +.UNINDENT +.INDENT 7.0 +.TP +.B validate_sig_len(new_sig) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.inference.nonmonotonic.ProverParseError +Bases: \fBException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.nonmonotonic.SetHolder(iterable=(), /) +Bases: \fBlist\fP +.sp +A list of sets of Variables. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.nonmonotonic.UniqueNamesProver(proverCommand) +Bases: \fI\%nltk.inference.api.ProverCommandDecorator\fP +.sp +This is a prover decorator that adds unique names assumptions before +proving. +.INDENT 7.0 +.TP +.B assumptions() +.INDENT 7.0 +.IP \(bu 2 +Domain = union([e.free()|e.constants() for e in all_expressions]) +.IP \(bu 2 +if "d1 = d2" cannot be proven from the premises, then add "d1 != d2" +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.nonmonotonic.closed_domain_demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.nonmonotonic.closed_world_demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.nonmonotonic.combination_prover_demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.nonmonotonic.default_reasoning_demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.nonmonotonic.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.nonmonotonic.get_domain(goal, assumptions) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.nonmonotonic.print_proof(goal, premises) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.nonmonotonic.unique_names_demo() +.UNINDENT +.SS nltk.inference.prover9 module +.sp +A theorem prover that makes use of the external \(aqProver9\(aq package. +.INDENT 0.0 +.TP +.B class nltk.inference.prover9.Prover9(timeout=60) +Bases: \fI\%nltk.inference.prover9.Prover9Parent\fP, \fI\%nltk.inference.api.Prover\fP +.INDENT 7.0 +.TP +.B prover9_input(goal, assumptions) +.INDENT 7.0 +.TP +.B See +Prover9Parent.prover9_input +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.prover9.Prover9Command(goal=None, assumptions=None, timeout=60, prover=None) +Bases: \fI\%nltk.inference.prover9.Prover9CommandParent\fP, \fI\%nltk.inference.api.BaseProverCommand\fP +.sp +A \fBProverCommand\fP specific to the \fBProver9\fP prover. It contains +the a print_assumptions() method that is used to print the list +of assumptions in multiple formats. +.INDENT 7.0 +.TP +.B decorate_proof(proof_string, simplify=True) +:see BaseProverCommand.decorate_proof() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.prover9.Prover9CommandParent +Bases: \fBobject\fP +.sp +A common base class used by both \fBProver9Command\fP and \fBMaceCommand\fP, +which is responsible for maintaining a goal and a set of assumptions, +and generating prover9\-style input files from them. +.INDENT 7.0 +.TP +.B print_assumptions(output_format=\(aqnltk\(aq) +Print the list of the current assumptions. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.inference.prover9.Prover9Exception(returncode, message) +Bases: \fBException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.inference.prover9.Prover9FatalException(returncode, message) +Bases: \fI\%nltk.inference.prover9.Prover9Exception\fP +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.inference.prover9.Prover9LimitExceededException(returncode, message) +Bases: \fI\%nltk.inference.prover9.Prover9Exception\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.prover9.Prover9Parent +Bases: \fBobject\fP +.sp +A common class extended by both \fBProver9\fP and \fBMace \fP\&. +It contains the functionality required to convert NLTK\-style +expressions into Prover9\-style expressions. +.INDENT 7.0 +.TP +.B binary_locations() +A list of directories that should be searched for the prover9 +executables. This list is used by \fBconfig_prover9\fP when searching +for the prover9 executables. +.UNINDENT +.INDENT 7.0 +.TP +.B config_prover9(binary_location, verbose=False) +.UNINDENT +.INDENT 7.0 +.TP +.B prover9_input(goal, assumptions) +.INDENT 7.0 +.TP +.B Returns +The input string that should be provided to the +.UNINDENT +.sp +prover9 binary. This string is formed based on the goal, +assumptions, and timeout value of this object. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.prover9.convert_to_prover9(input) +Convert a \fBlogic.Expression\fP to Prover9 format. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.prover9.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.prover9.spacer(num=45) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.prover9.test_config() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.prover9.test_convert_to_prover9(expr) +Test that parsing works OK. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.prover9.test_prove(arguments) +Try some proofs and exhibit the results. +.UNINDENT +.SS nltk.inference.resolution module +.sp +Module for a resolution\-based First Order theorem prover. +.INDENT 0.0 +.TP +.B class nltk.inference.resolution.BindingDict(binding_list=None) +Bases: \fBobject\fP +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.inference.resolution.BindingException(arg) +Bases: \fBException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.resolution.Clause(data) +Bases: \fBlist\fP +.INDENT 7.0 +.TP +.B free() +.UNINDENT +.INDENT 7.0 +.TP +.B isSubsetOf(other) +Return True iff every term in \(aqself\(aq is a term in \(aqother\(aq. +.INDENT 7.0 +.TP +.B Parameters +\fBother\fP \-\- \fBClause\fP +.TP +.B Returns +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_tautology() +Self is a tautology if it contains ground terms P and \-P. The ground +term, P, must be an exact match, ie, not using unification. +.UNINDENT +.INDENT 7.0 +.TP +.B replace(variable, expression) +Replace every instance of variable with expression across every atom +in the clause +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBvariable\fP \-\- \fBVariable\fP +.IP \(bu 2 +\fBexpression\fP \-\- \fBExpression\fP +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B substitute_bindings(bindings) +Replace every binding +.INDENT 7.0 +.TP +.B Parameters +\fBbindings\fP \-\- A list of tuples mapping Variable Expressions to the +.UNINDENT +.sp +Expressions to which they are bound +:return: \fBClause\fP +.UNINDENT +.INDENT 7.0 +.TP +.B subsumes(other) +Return True iff \(aqself\(aq subsumes \(aqother\(aq, this is, if there is a +substitution such that every term in \(aqself\(aq can be unified with a term +in \(aqother\(aq. +.INDENT 7.0 +.TP +.B Parameters +\fBother\fP \-\- \fBClause\fP +.TP +.B Returns +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B unify(other, bindings=None, used=None, skipped=None, debug=False) +Attempt to unify this Clause with the other, returning a list of +resulting, unified, Clauses. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP \-\- \fBClause\fP with which to unify +.IP \(bu 2 +\fBbindings\fP \-\- \fBBindingDict\fP containing bindings that should be used +.UNINDENT +.UNINDENT +.sp +during the unification +:param used: tuple of two lists of atoms. The first lists the +atoms from \(aqself\(aq that were successfully unified with atoms from +\(aqother\(aq. The second lists the atoms from \(aqother\(aq that were successfully +unified with atoms from \(aqself\(aq. +:param skipped: tuple of two \fBClause\fP objects. The first is a list of all +the atoms from the \(aqself\(aq Clause that have not been unified with +anything on the path. The second is same thing for the \(aqother\(aq Clause. +:param debug: bool indicating whether debug statements should print +:return: list containing all the resulting \fBClause\fP objects that could be +obtained by unification +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.resolution.DebugObject(enabled=True, indent=0) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B line(line) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.inference.resolution.ProverParseError +Bases: \fBException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.resolution.ResolutionProver +Bases: \fI\%nltk.inference.api.Prover\fP +.INDENT 7.0 +.TP +.B ANSWER_KEY = \(aqANSWER\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.resolution.ResolutionProverCommand(goal=None, assumptions=None, prover=None) +Bases: \fI\%nltk.inference.api.BaseProverCommand\fP +.INDENT 7.0 +.TP +.B find_answers(verbose=False) +.UNINDENT +.INDENT 7.0 +.TP +.B prove(verbose=False) +Perform the actual proof. Store the result to prevent unnecessary +re\-proving. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.inference.resolution.UnificationException(a, b) +Bases: \fBException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.resolution.clausify(expression) +Skolemize, clausify, and standardize the variables apart. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.resolution.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.resolution.most_general_unification(a, b, bindings=None) +Find the most general unification of the two given expressions +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBa\fP \-\- \fBExpression\fP +.IP \(bu 2 +\fBb\fP \-\- \fBExpression\fP +.IP \(bu 2 +\fBbindings\fP \-\- \fBBindingDict\fP a starting set of bindings with which the +unification must be consistent +.UNINDENT +.TP +.B Returns +a list of bindings +.TP +.B Raises +\fBBindingException\fP \-\- if the Expressions cannot be unified +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.resolution.resolution_test(e) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.resolution.testResolutionProver() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.resolution.test_clausify() +.UNINDENT +.SS nltk.inference.tableau module +.sp +Module for a tableau\-based First Order theorem prover. +.INDENT 0.0 +.TP +.B class nltk.inference.tableau.Agenda +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B clone() +.UNINDENT +.INDENT 7.0 +.TP +.B mark_alls_fresh() +.UNINDENT +.INDENT 7.0 +.TP +.B mark_neqs_fresh() +.UNINDENT +.INDENT 7.0 +.TP +.B pop_first() +Pop the first expression that appears in the agenda +.UNINDENT +.INDENT 7.0 +.TP +.B put(expression, context=None) +.UNINDENT +.INDENT 7.0 +.TP +.B put_all(expressions) +.UNINDENT +.INDENT 7.0 +.TP +.B put_atoms(atoms) +.UNINDENT +.INDENT 7.0 +.TP +.B replace_all(old, new) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.tableau.Categories +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B ALL = 20 +.UNINDENT +.INDENT 7.0 +.TP +.B AND = 10 +.UNINDENT +.INDENT 7.0 +.TP +.B APP = 4 +.UNINDENT +.INDENT 7.0 +.TP +.B ATOM = 0 +.UNINDENT +.INDENT 7.0 +.TP +.B D_NEG = 7 +.UNINDENT +.INDENT 7.0 +.TP +.B EQ = 18 +.UNINDENT +.INDENT 7.0 +.TP +.B EXISTS = 19 +.UNINDENT +.INDENT 7.0 +.TP +.B IFF = 16 +.UNINDENT +.INDENT 7.0 +.TP +.B IMP = 14 +.UNINDENT +.INDENT 7.0 +.TP +.B N_ALL = 8 +.UNINDENT +.INDENT 7.0 +.TP +.B N_AND = 15 +.UNINDENT +.INDENT 7.0 +.TP +.B N_APP = 5 +.UNINDENT +.INDENT 7.0 +.TP +.B N_ATOM = 2 +.UNINDENT +.INDENT 7.0 +.TP +.B N_EQ = 6 +.UNINDENT +.INDENT 7.0 +.TP +.B N_EXISTS = 9 +.UNINDENT +.INDENT 7.0 +.TP +.B N_IFF = 17 +.UNINDENT +.INDENT 7.0 +.TP +.B N_IMP = 12 +.UNINDENT +.INDENT 7.0 +.TP +.B N_OR = 11 +.UNINDENT +.INDENT 7.0 +.TP +.B N_PROP = 3 +.UNINDENT +.INDENT 7.0 +.TP +.B OR = 13 +.UNINDENT +.INDENT 7.0 +.TP +.B PROP = 1 +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.tableau.Debug(verbose, indent=0, lines=None) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B line(data, indent=0) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.inference.tableau.ProverParseError +Bases: \fBException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.tableau.TableauProver +Bases: \fI\%nltk.inference.api.Prover\fP +.INDENT 7.0 +.TP +.B static is_atom(e) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.inference.tableau.TableauProverCommand(goal=None, assumptions=None, prover=None) +Bases: \fI\%nltk.inference.api.BaseProverCommand\fP +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.tableau.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.tableau.tableau_test(c, ps=None, verbose=False) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.tableau.testHigherOrderTableauProver() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.inference.tableau.testTableauProver() +.UNINDENT +.SS Module contents +.sp +Classes and interfaces for theorem proving and model building. +.SS nltk.lm package +.SS Submodules +.SS nltk.lm.api module +.sp +Language Model Interface. +.INDENT 0.0 +.TP +.B class nltk.lm.api.LanguageModel(order, vocabulary=None, counter=None) +Bases: \fBobject\fP +.sp +ABC for Language Models. +.sp +Cannot be directly instantiated itself. +.INDENT 7.0 +.TP +.B context_counts(context) +Helper method for retrieving counts for a given context. +.sp +Assumes context has been checked and oov words in it masked. +:type context: tuple(str) or None +.UNINDENT +.INDENT 7.0 +.TP +.B entropy(text_ngrams) +Calculate cross\-entropy of model for given evaluation text. +.INDENT 7.0 +.TP +.B Parameters +\fBtext_ngrams\fP (\fIIterable\fP\fI(\fP\fItuple\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- A sequence of ngram tuples. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B fit(text, vocabulary_text=None) +Trains the model on a text. +.INDENT 7.0 +.TP +.B Parameters +\fBtext\fP \-\- Training text as a sequence of sentences. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B generate(num_words=1, text_seed=None, random_seed=None) +Generate words from the model. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBnum_words\fP (\fIint\fP) \-\- How many words to generate. By default 1. +.IP \(bu 2 +\fBtext_seed\fP \-\- Generation can be conditioned on preceding context. +.IP \(bu 2 +\fBrandom_seed\fP \-\- A random seed or an instance of \fIrandom.Random\fP\&. If provided, +.UNINDENT +.UNINDENT +.sp +makes the random sampling part of generation reproducible. +:return: One (str) word or a list of words generated from model. +.sp +Examples: +.sp +.nf +.ft C +>>> from nltk.lm import MLE +>>> lm = MLE(2) +>>> lm.fit([[("a", "b"), ("b", "c")]], vocabulary_text=[\(aqa\(aq, \(aqb\(aq, \(aqc\(aq]) +>>> lm.fit([[("a",), ("b",), ("c",)]]) +>>> lm.generate(random_seed=3) +\(aqa\(aq +>>> lm.generate(text_seed=[\(aqa\(aq]) +\(aqb\(aq +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B logscore(word, context=None) +Evaluate the log score of this word in this context. +.sp +The arguments are the same as for \fIscore\fP and \fIunmasked_score\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B perplexity(text_ngrams) +Calculates the perplexity of the given text. +.sp +This is simply 2 ** cross\-entropy for the text, so the arguments are the same. +.UNINDENT +.INDENT 7.0 +.TP +.B score(word, context=None) +Masks out of vocab (OOV) words and computes their model score. +.sp +For model\-specific logic of calculating scores, see the \fIunmasked_score\fP +method. +.UNINDENT +.INDENT 7.0 +.TP +.B abstract unmasked_score(word, context=None) +Score a word given some optional context. +.sp +Concrete models are expected to provide an implementation. +Note that this method does not mask its arguments with the OOV label. +Use the \fIscore\fP method for that. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBword\fP (\fIstr\fP) \-\- Word for which we want the score +.IP \(bu 2 +\fBcontext\fP (\fItuple\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- Context the word is in. +.UNINDENT +.UNINDENT +.sp +If \fINone\fP, compute unigram score. +:param context: tuple(str) or None +:rtype: float +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.api.Smoothing(vocabulary, counter) +Bases: \fBobject\fP +.sp +Ngram Smoothing Interface +.sp +Implements Chen & Goodman 1995\(aqs idea that all smoothing algorithms have +certain features in common. This should ideally allow smoothing algorithms to +work both with Backoff and Interpolation. +.INDENT 7.0 +.TP +.B abstract alpha_gamma(word, context) +.UNINDENT +.INDENT 7.0 +.TP +.B abstract unigram_score(word) +.UNINDENT +.UNINDENT +.SS nltk.lm.counter module +.SS Language Model Counter +.INDENT 0.0 +.TP +.B class nltk.lm.counter.NgramCounter(ngram_text=None) +Bases: \fBobject\fP +.sp +Class for counting ngrams. +.sp +Will count any ngram sequence you give it ;) +.sp +First we need to make sure we are feeding the counter sentences of ngrams. +.sp +.nf +.ft C +>>> text = [["a", "b", "c", "d"], ["a", "c", "d", "c"]] +>>> from nltk.util import ngrams +>>> text_bigrams = [ngrams(sent, 2) for sent in text] +>>> text_unigrams = [ngrams(sent, 1) for sent in text] +.ft P +.fi +.sp +The counting itself is very simple. +.sp +.nf +.ft C +>>> from nltk.lm import NgramCounter +>>> ngram_counts = NgramCounter(text_bigrams + text_unigrams) +.ft P +.fi +.sp +You can conveniently access ngram counts using standard python dictionary notation. +String keys will give you unigram counts. +.sp +.nf +.ft C +>>> ngram_counts[\(aqa\(aq] +2 +>>> ngram_counts[\(aqaliens\(aq] +0 +.ft P +.fi +.sp +If you want to access counts for higher order ngrams, use a list or a tuple. +These are treated as "context" keys, so what you get is a frequency distribution +over all continuations after the given context. +.sp +.nf +.ft C +>>> sorted(ngram_counts[[\(aqa\(aq]].items()) +[(\(aqb\(aq, 1), (\(aqc\(aq, 1)] +>>> sorted(ngram_counts[(\(aqa\(aq,)].items()) +[(\(aqb\(aq, 1), (\(aqc\(aq, 1)] +.ft P +.fi +.sp +This is equivalent to specifying explicitly the order of the ngram (in this case +2 for bigram) and indexing on the context. +>>> ngram_counts[2][(\(aqa\(aq,)] is ngram_counts[[\(aqa\(aq]] +True +.sp +Note that the keys in \fIConditionalFreqDist\fP cannot be lists, only tuples! +It is generally advisable to use the less verbose and more flexible square +bracket notation. +.sp +To get the count of the full ngram "a b", do this: +.sp +.nf +.ft C +>>> ngram_counts[[\(aqa\(aq]][\(aqb\(aq] +1 +.ft P +.fi +.sp +Specifying the ngram order as a number can be useful for accessing all ngrams +in that order. +.sp +.nf +.ft C +>>> ngram_counts[2] + +.ft P +.fi +.sp +The keys of this \fIConditionalFreqDist\fP are the contexts we discussed earlier. +Unigrams can also be accessed with a human\-friendly alias. +.sp +.nf +.ft C +>>> ngram_counts.unigrams is ngram_counts[1] +True +.ft P +.fi +.sp +Similarly to \fIcollections.Counter\fP, you can update counts after initialization. +.sp +.nf +.ft C +>>> ngram_counts[\(aqe\(aq] +0 +>>> ngram_counts.update([ngrams(["d", "e", "f"], 1)]) +>>> ngram_counts[\(aqe\(aq] +1 +.ft P +.fi +.INDENT 7.0 +.TP +.B N() +Returns grand total number of ngrams stored. +.sp +This includes ngrams from all orders, so some duplication is expected. +:rtype: int +.sp +.nf +.ft C +>>> from nltk.lm import NgramCounter +>>> counts = NgramCounter([[("a", "b"), ("c",), ("d", "e")]]) +>>> counts.N() +3 +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B update(ngram_text) +Updates ngram counts from \fIngram_text\fP\&. +.sp +Expects \fIngram_text\fP to be a sequence of sentences (sequences). +Each sentence consists of ngrams as tuples of strings. +.INDENT 7.0 +.TP +.B Parameters +\fBngram_text\fP (\fIIterable\fP\fI(\fP\fIIterable\fP\fI(\fP\fItuple\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP\fI)\fP) \-\- Text containing sentences of ngrams. +.TP +.B Raises +\fBTypeError\fP \-\- if the ngrams are not tuples. +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.lm.models module +.sp +Language Models +.INDENT 0.0 +.TP +.B class nltk.lm.models.AbsoluteDiscountingInterpolated(order, discount=0.75, **kwargs) +Bases: \fI\%nltk.lm.models.InterpolatedLanguageModel\fP +.sp +Interpolated version of smoothing with absolute discount. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.models.InterpolatedLanguageModel(smoothing_cls, order, **kwargs) +Bases: \fI\%nltk.lm.api.LanguageModel\fP +.sp +Logic common to all interpolated language models. +.sp +The idea to abstract this comes from Chen & Goodman 1995. +Do not instantiate this class directly! +.INDENT 7.0 +.TP +.B unmasked_score(word, context=None) +Score a word given some optional context. +.sp +Concrete models are expected to provide an implementation. +Note that this method does not mask its arguments with the OOV label. +Use the \fIscore\fP method for that. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBword\fP (\fIstr\fP) \-\- Word for which we want the score +.IP \(bu 2 +\fBcontext\fP (\fItuple\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- Context the word is in. +.UNINDENT +.UNINDENT +.sp +If \fINone\fP, compute unigram score. +:param context: tuple(str) or None +:rtype: float +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.models.KneserNeyInterpolated(order, discount=0.1, **kwargs) +Bases: \fI\%nltk.lm.models.InterpolatedLanguageModel\fP +.sp +Interpolated version of Kneser\-Ney smoothing. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.models.Laplace(*args, **kwargs) +Bases: \fI\%nltk.lm.models.Lidstone\fP +.sp +Implements Laplace (add one) smoothing. +.sp +Initialization identical to BaseNgramModel because gamma is always 1. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.models.Lidstone(gamma, *args, **kwargs) +Bases: \fI\%nltk.lm.api.LanguageModel\fP +.sp +Provides Lidstone\-smoothed scores. +.sp +In addition to initialization arguments from BaseNgramModel also requires +a number by which to increase the counts, gamma. +.INDENT 7.0 +.TP +.B unmasked_score(word, context=None) +Add\-one smoothing: Lidstone or Laplace. +.sp +To see what kind, look at \fIgamma\fP attribute on the class. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.models.MLE(order, vocabulary=None, counter=None) +Bases: \fI\%nltk.lm.api.LanguageModel\fP +.sp +Class for providing MLE ngram model scores. +.sp +Inherits initialization from BaseNgramModel. +.INDENT 7.0 +.TP +.B unmasked_score(word, context=None) +Returns the MLE score for a word given a context. +.sp +Args: +\- word is expected to be a string +\- context is expected to be something reasonably convertible to a tuple +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.models.StupidBackoff(alpha=0.4, *args, **kwargs) +Bases: \fI\%nltk.lm.api.LanguageModel\fP +.sp +Provides StupidBackoff scores. +.sp +In addition to initialization arguments from BaseNgramModel also requires +a parameter alpha with which we scale the lower order probabilities. +Note that this is not a true probability distribution as scores for ngrams +of the same order do not sum up to unity. +.INDENT 7.0 +.TP +.B unmasked_score(word, context=None) +Score a word given some optional context. +.sp +Concrete models are expected to provide an implementation. +Note that this method does not mask its arguments with the OOV label. +Use the \fIscore\fP method for that. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBword\fP (\fIstr\fP) \-\- Word for which we want the score +.IP \(bu 2 +\fBcontext\fP (\fItuple\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- Context the word is in. +.UNINDENT +.UNINDENT +.sp +If \fINone\fP, compute unigram score. +:param context: tuple(str) or None +:rtype: float +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.models.WittenBellInterpolated(order, **kwargs) +Bases: \fI\%nltk.lm.models.InterpolatedLanguageModel\fP +.sp +Interpolated version of Witten\-Bell smoothing. +.UNINDENT +.SS nltk.lm.preprocessing module +.INDENT 0.0 +.TP +.B nltk.lm.preprocessing.flatten(iterable, /) +Alternative chain() constructor taking a single iterable argument that evaluates lazily. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.lm.preprocessing.padded_everygram_pipeline(order, text) +Default preprocessing for a sequence of sentences. +.sp +Creates two iterators: +\- sentences padded and turned into sequences of \fInltk.util.everygrams\fP +\- sentences padded as above and chained together for a flat stream of words +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBorder\fP \-\- Largest ngram length produced by \fIeverygrams\fP\&. +.IP \(bu 2 +\fBtext\fP \-\- Text to iterate over. Expected to be an iterable of sentences: +.UNINDENT +.UNINDENT +.sp +Iterable[Iterable[str]] +:return: iterator over text as ngrams, iterator over text as vocabulary data +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.lm.preprocessing.padded_everygrams(order, sentence) +Helper with some useful defaults. +.sp +Applies pad_both_ends to sentence and follows it up with everygrams. +.UNINDENT +.SS nltk.lm.smoothing module +.sp +Smoothing algorithms for language modeling. +.sp +According to Chen & Goodman 1995 these should work with both Backoff and +Interpolation. +.INDENT 0.0 +.TP +.B class nltk.lm.smoothing.AbsoluteDiscounting(vocabulary, counter, discount=0.75, **kwargs) +Bases: \fI\%nltk.lm.api.Smoothing\fP +.sp +Smoothing with absolute discount. +.INDENT 7.0 +.TP +.B alpha_gamma(word, context) +.UNINDENT +.INDENT 7.0 +.TP +.B unigram_score(word) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.smoothing.KneserNey(vocabulary, counter, order, discount=0.1, **kwargs) +Bases: \fI\%nltk.lm.api.Smoothing\fP +.sp +Kneser\-Ney Smoothing. +.sp +This is an extension of smoothing with a discount. +.sp +Resources: +\- \fI\%https://pages.ucsd.edu/~rlevy/lign256/winter2008/kneser_ney_mini_example.pdf\fP +\- \fI\%https://www.youtube.com/watch?v=ody1ysUTD7o\fP +\- \fI\%https://medium.com/@dennyc/a\-simple\-numerical\-example\-for\-kneser\-ney\-smoothing\-nlp\-4600addf38b8\fP +\- \fI\%https://www.cl.uni\-heidelberg.de/courses/ss15/smt/scribe6.pdf\fP +\- \fI\%https://www\-i6.informatik.rwth\-aachen.de/publications/download/951/Kneser\-ICASSP\-1995.pdf\fP +.INDENT 7.0 +.TP +.B alpha_gamma(word, context) +.UNINDENT +.INDENT 7.0 +.TP +.B unigram_score(word) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.smoothing.WittenBell(vocabulary, counter, **kwargs) +Bases: \fI\%nltk.lm.api.Smoothing\fP +.sp +Witten\-Bell smoothing. +.INDENT 7.0 +.TP +.B alpha_gamma(word, context) +.UNINDENT +.INDENT 7.0 +.TP +.B unigram_score(word) +.UNINDENT +.UNINDENT +.SS nltk.lm.util module +.sp +Language Model Utilities +.INDENT 0.0 +.TP +.B nltk.lm.util.log_base2(score) +Convenience function for computing logarithms with base 2. +.UNINDENT +.SS nltk.lm.vocabulary module +.sp +Language Model Vocabulary +.INDENT 0.0 +.TP +.B class nltk.lm.vocabulary.Vocabulary(counts=None, unk_cutoff=1, unk_label=\(aq\(aq) +Bases: \fBobject\fP +.sp +Stores language model vocabulary. +.sp +Satisfies two common language modeling requirements for a vocabulary: +\- When checking membership and calculating its size, filters items +.INDENT 7.0 +.INDENT 3.5 +by comparing their counts to a cutoff value. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.IP \(bu 2 +Adds a special "unknown" token which unseen words are mapped to. +.UNINDENT +.sp +.nf +.ft C +>>> words = [\(aqa\(aq, \(aqc\(aq, \(aq\-\(aq, \(aqd\(aq, \(aqc\(aq, \(aqa\(aq, \(aqb\(aq, \(aqr\(aq, \(aqa\(aq, \(aqc\(aq, \(aqd\(aq] +>>> from nltk.lm import Vocabulary +>>> vocab = Vocabulary(words, unk_cutoff=2) +.ft P +.fi +.sp +Tokens with counts greater than or equal to the cutoff value will +be considered part of the vocabulary. +.sp +.nf +.ft C +>>> vocab[\(aqc\(aq] +3 +>>> \(aqc\(aq in vocab +True +>>> vocab[\(aqd\(aq] +2 +>>> \(aqd\(aq in vocab +True +.ft P +.fi +.sp +Tokens with frequency counts less than the cutoff value will be considered not +part of the vocabulary even though their entries in the count dictionary are +preserved. +.sp +.nf +.ft C +>>> vocab[\(aqb\(aq] +1 +>>> \(aqb\(aq in vocab +False +>>> vocab[\(aqaliens\(aq] +0 +>>> \(aqaliens\(aq in vocab +False +.ft P +.fi +.sp +Keeping the count entries for seen words allows us to change the cutoff value +without having to recalculate the counts. +.sp +.nf +.ft C +>>> vocab2 = Vocabulary(vocab.counts, unk_cutoff=1) +>>> "b" in vocab2 +True +.ft P +.fi +.sp +The cutoff value influences not only membership checking but also the result of +getting the size of the vocabulary using the built\-in \fIlen\fP\&. +Note that while the number of keys in the vocabulary\(aqs counter stays the same, +the items in the vocabulary differ depending on the cutoff. +We use \fIsorted\fP to demonstrate because it keeps the order consistent. +.sp +.nf +.ft C +>>> sorted(vocab2.counts) +[\(aq\-\(aq, \(aqa\(aq, \(aqb\(aq, \(aqc\(aq, \(aqd\(aq, \(aqr\(aq] +>>> sorted(vocab2) +[\(aq\-\(aq, \(aq\(aq, \(aqa\(aq, \(aqb\(aq, \(aqc\(aq, \(aqd\(aq, \(aqr\(aq] +>>> sorted(vocab.counts) +[\(aq\-\(aq, \(aqa\(aq, \(aqb\(aq, \(aqc\(aq, \(aqd\(aq, \(aqr\(aq] +>>> sorted(vocab) +[\(aq\(aq, \(aqa\(aq, \(aqc\(aq, \(aqd\(aq] +.ft P +.fi +.sp +In addition to items it gets populated with, the vocabulary stores a special +token that stands in for so\-called "unknown" items. By default it\(aqs "". +.sp +.nf +.ft C +>>> "" in vocab +True +.ft P +.fi +.sp +We can look up words in a vocabulary using its \fIlookup\fP method. +"Unseen" words (with counts less than cutoff) are looked up as the unknown label. +If given one word (a string) as an input, this method will return a string. +.sp +.nf +.ft C +>>> vocab.lookup("a") +\(aqa\(aq +>>> vocab.lookup("aliens") +\(aq\(aq +.ft P +.fi +.sp +If given a sequence, it will return an tuple of the looked up words. +.sp +.nf +.ft C +>>> vocab.lookup(["p", \(aqa\(aq, \(aqr\(aq, \(aqd\(aq, \(aqb\(aq, \(aqc\(aq]) +(\(aq\(aq, \(aqa\(aq, \(aq\(aq, \(aqd\(aq, \(aq\(aq, \(aqc\(aq) +.ft P +.fi +.sp +It\(aqs possible to update the counts after the vocabulary has been created. +In general, the interface is the same as that of \fIcollections.Counter\fP\&. +.sp +.nf +.ft C +>>> vocab[\(aqb\(aq] +1 +>>> vocab.update(["b", "b", "c"]) +>>> vocab[\(aqb\(aq] +3 +.ft P +.fi +.INDENT 7.0 +.TP +.B property cutoff +Cutoff value. +.sp +Items with count below this value are not considered part of vocabulary. +.UNINDENT +.INDENT 7.0 +.TP +.B lookup(words) +Look up one or more words in the vocabulary. +.sp +If passed one word as a string will return that word or \fIself.unk_label\fP\&. +Otherwise will assume it was passed a sequence of words, will try to look +each of them up and return an iterator over the looked up words. +.INDENT 7.0 +.TP +.B Parameters +\fBwords\fP (\fIIterable\fP\fI(\fP\fIstr\fP\fI) or \fP\fIstr\fP) \-\- Word(s) to look up. +.TP +.B Return type +generator(str) or str +.TP +.B Raises +TypeError for types other than strings or iterables +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.lm import Vocabulary +>>> vocab = Vocabulary(["a", "b", "c", "a", "b"], unk_cutoff=2) +>>> vocab.lookup("a") +\(aqa\(aq +>>> vocab.lookup("aliens") +\(aq\(aq +>>> vocab.lookup(["a", "b", "c", ["x", "b"]]) +(\(aqa\(aq, \(aqb\(aq, \(aq\(aq, (\(aq\(aq, \(aqb\(aq)) +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B update(*counter_args, **counter_kwargs) +Update vocabulary counts. +.sp +Wraps \fIcollections.Counter.update\fP method. +.UNINDENT +.UNINDENT +.SS Module contents +.SS NLTK Language Modeling Module. +.sp +Currently this module covers only ngram language models, but it should be easy +to extend to neural models. +.SS Preparing Data +.sp +Before we train our ngram models it is necessary to make sure the data we put in +them is in the right format. +Let\(aqs say we have a text that is a list of sentences, where each sentence is +a list of strings. For simplicity we just consider a text consisting of +characters instead of words. +.sp +.nf +.ft C +>>> text = [[\(aqa\(aq, \(aqb\(aq, \(aqc\(aq], [\(aqa\(aq, \(aqc\(aq, \(aqd\(aq, \(aqc\(aq, \(aqe\(aq, \(aqf\(aq]] +.ft P +.fi +.sp +If we want to train a bigram model, we need to turn this text into bigrams. +Here\(aqs what the first sentence of our text would look like if we use a function +from NLTK for this. +.sp +.nf +.ft C +>>> from nltk.util import bigrams +>>> list(bigrams(text[0])) +[(\(aqa\(aq, \(aqb\(aq), (\(aqb\(aq, \(aqc\(aq)] +.ft P +.fi +.sp +Notice how "b" occurs both as the first and second member of different bigrams +but "a" and "c" don\(aqt? Wouldn\(aqt it be nice to somehow indicate how often sentences +start with "a" and end with "c"? +A standard way to deal with this is to add special "padding" symbols to the +sentence before splitting it into ngrams. +Fortunately, NLTK also has a function for that, let\(aqs see what it does to the +first sentence. +.sp +.nf +.ft C +>>> from nltk.util import pad_sequence +>>> list(pad_sequence(text[0], +\&... pad_left=True, +\&... left_pad_symbol="", +\&... pad_right=True, +\&... right_pad_symbol="", +\&... n=2)) +[\(aq\(aq, \(aqa\(aq, \(aqb\(aq, \(aqc\(aq, \(aq\(aq] +.ft P +.fi +.sp +Note the \fIn\fP argument, that tells the function we need padding for bigrams. +Now, passing all these parameters every time is tedious and in most cases they +can be safely assumed as defaults anyway. +Thus our module provides a convenience function that has all these arguments +already set while the other arguments remain the same as for \fIpad_sequence\fP\&. +.sp +.nf +.ft C +>>> from nltk.lm.preprocessing import pad_both_ends +>>> list(pad_both_ends(text[0], n=2)) +[\(aq\(aq, \(aqa\(aq, \(aqb\(aq, \(aqc\(aq, \(aq\(aq] +.ft P +.fi +.sp +Combining the two parts discussed so far we get the following preparation steps +for one sentence. +.sp +.nf +.ft C +>>> list(bigrams(pad_both_ends(text[0], n=2))) +[(\(aq\(aq, \(aqa\(aq), (\(aqa\(aq, \(aqb\(aq), (\(aqb\(aq, \(aqc\(aq), (\(aqc\(aq, \(aq\(aq)] +.ft P +.fi +.sp +To make our model more robust we could also train it on unigrams (single words) +as well as bigrams, its main source of information. +NLTK once again helpfully provides a function called \fIeverygrams\fP\&. +While not the most efficient, it is conceptually simple. +.sp +.nf +.ft C +>>> from nltk.util import everygrams +>>> padded_bigrams = list(pad_both_ends(text[0], n=2)) +>>> list(everygrams(padded_bigrams, max_len=2)) +[(\(aq\(aq,), (\(aq\(aq, \(aqa\(aq), (\(aqa\(aq,), (\(aqa\(aq, \(aqb\(aq), (\(aqb\(aq,), (\(aqb\(aq, \(aqc\(aq), (\(aqc\(aq,), (\(aqc\(aq, \(aq\(aq), (\(aq\(aq,)] +.ft P +.fi +.sp +We are almost ready to start counting ngrams, just one more step left. +During training and evaluation our model will rely on a vocabulary that +defines which words are "known" to the model. +To create this vocabulary we need to pad our sentences (just like for counting +ngrams) and then combine the sentences into one flat stream of words. +.sp +.nf +.ft C +>>> from nltk.lm.preprocessing import flatten +>>> list(flatten(pad_both_ends(sent, n=2) for sent in text)) +[\(aq\(aq, \(aqa\(aq, \(aqb\(aq, \(aqc\(aq, \(aq\(aq, \(aq\(aq, \(aqa\(aq, \(aqc\(aq, \(aqd\(aq, \(aqc\(aq, \(aqe\(aq, \(aqf\(aq, \(aq\(aq] +.ft P +.fi +.sp +In most cases we want to use the same text as the source for both vocabulary +and ngram counts. +Now that we understand what this means for our preprocessing, we can simply import +a function that does everything for us. +.sp +.nf +.ft C +>>> from nltk.lm.preprocessing import padded_everygram_pipeline +>>> train, vocab = padded_everygram_pipeline(2, text) +.ft P +.fi +.sp +So as to avoid re\-creating the text in memory, both \fItrain\fP and \fIvocab\fP are lazy +iterators. They are evaluated on demand at training time. +.SS Training +.sp +Having prepared our data we are ready to start training a model. +As a simple example, let us train a Maximum Likelihood Estimator (MLE). +We only need to specify the highest ngram order to instantiate it. +.sp +.nf +.ft C +>>> from nltk.lm import MLE +>>> lm = MLE(2) +.ft P +.fi +.sp +This automatically creates an empty vocabulary... +.sp +.nf +.ft C +>>> len(lm.vocab) +0 +.ft P +.fi +.sp +\&... which gets filled as we fit the model. +.sp +.nf +.ft C +>>> lm.fit(train, vocab) +>>> print(lm.vocab) +\(aq and 9 items> +>>> len(lm.vocab) +9 +.ft P +.fi +.sp +The vocabulary helps us handle words that have not occurred during training. +.sp +.nf +.ft C +>>> lm.vocab.lookup(text[0]) +(\(aqa\(aq, \(aqb\(aq, \(aqc\(aq) +>>> lm.vocab.lookup(["aliens", "from", "Mars"]) +(\(aq\(aq, \(aq\(aq, \(aq\(aq) +.ft P +.fi +.sp +Moreover, in some cases we want to ignore words that we did see during training +but that didn\(aqt occur frequently enough, to provide us useful information. +You can tell the vocabulary to ignore such words. +To find out how that works, check out the docs for the \fIVocabulary\fP class. +.SS Using a Trained Model +.sp +When it comes to ngram models the training boils down to counting up the ngrams +from the training corpus. +.sp +.nf +.ft C +>>> print(lm.counts) + +.ft P +.fi +.sp +This provides a convenient interface to access counts for unigrams... +.sp +.nf +.ft C +>>> lm.counts[\(aqa\(aq] +2 +.ft P +.fi +.sp +\&...and bigrams (in this case "a b") +.sp +.nf +.ft C +>>> lm.counts[[\(aqa\(aq]][\(aqb\(aq] +1 +.ft P +.fi +.sp +And so on. However, the real purpose of training a language model is to have it +score how probable words are in certain contexts. +This being MLE, the model returns the item\(aqs relative frequency as its score. +.sp +.nf +.ft C +>>> lm.score("a") +0.15384615384615385 +.ft P +.fi +.sp +Items that are not seen during training are mapped to the vocabulary\(aqs +"unknown label" token. This is "" by default. +.sp +.nf +.ft C +>>> lm.score("") == lm.score("aliens") +True +.ft P +.fi +.sp +Here\(aqs how you get the score for a word given some preceding context. +For example we want to know what is the chance that "b" is preceded by "a". +.sp +.nf +.ft C +>>> lm.score("b", ["a"]) +0.5 +.ft P +.fi +.sp +To avoid underflow when working with many small score values it makes sense to +take their logarithm. +For convenience this can be done with the \fIlogscore\fP method. +.sp +.nf +.ft C +>>> lm.logscore("a") +\-2.700439718141092 +.ft P +.fi +.sp +Building on this method, we can also evaluate our model\(aqs cross\-entropy and +perplexity with respect to sequences of ngrams. +.sp +.nf +.ft C +>>> test = [(\(aqa\(aq, \(aqb\(aq), (\(aqc\(aq, \(aqd\(aq)] +>>> lm.entropy(test) +1.292481250360578 +>>> lm.perplexity(test) +2.449489742783178 +.ft P +.fi +.sp +It is advisable to preprocess your test text exactly the same way as you did +the training text. +.sp +One cool feature of ngram models is that they can be used to generate text. +.sp +.nf +.ft C +>>> lm.generate(1, random_seed=3) +\(aq\(aq +>>> lm.generate(5, random_seed=3) +[\(aq\(aq, \(aqa\(aq, \(aqb\(aq, \(aqc\(aq, \(aqd\(aq] +.ft P +.fi +.sp +Provide \fIrandom_seed\fP if you want to consistently reproduce the same text all +other things being equal. Here we are using it to test the examples. +.sp +You can also condition your generation on some preceding text with the \fIcontext\fP +argument. +.sp +.nf +.ft C +>>> lm.generate(5, text_seed=[\(aqc\(aq], random_seed=3) +[\(aq\(aq, \(aqc\(aq, \(aqd\(aq, \(aqc\(aq, \(aqd\(aq] +.ft P +.fi +.sp +Note that an ngram model is restricted in how much preceding context it can +take into account. For example, a trigram model can only condition its output +on 2 preceding words. If you pass in a 4\-word context, the first two words +will be ignored. +.INDENT 0.0 +.TP +.B class nltk.lm.AbsoluteDiscountingInterpolated(order, discount=0.75, **kwargs) +Bases: \fI\%nltk.lm.models.InterpolatedLanguageModel\fP +.sp +Interpolated version of smoothing with absolute discount. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.KneserNeyInterpolated(order, discount=0.1, **kwargs) +Bases: \fI\%nltk.lm.models.InterpolatedLanguageModel\fP +.sp +Interpolated version of Kneser\-Ney smoothing. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.Laplace(*args, **kwargs) +Bases: \fI\%nltk.lm.models.Lidstone\fP +.sp +Implements Laplace (add one) smoothing. +.sp +Initialization identical to BaseNgramModel because gamma is always 1. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.Lidstone(gamma, *args, **kwargs) +Bases: \fI\%nltk.lm.api.LanguageModel\fP +.sp +Provides Lidstone\-smoothed scores. +.sp +In addition to initialization arguments from BaseNgramModel also requires +a number by which to increase the counts, gamma. +.INDENT 7.0 +.TP +.B unmasked_score(word, context=None) +Add\-one smoothing: Lidstone or Laplace. +.sp +To see what kind, look at \fIgamma\fP attribute on the class. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.MLE(order, vocabulary=None, counter=None) +Bases: \fI\%nltk.lm.api.LanguageModel\fP +.sp +Class for providing MLE ngram model scores. +.sp +Inherits initialization from BaseNgramModel. +.INDENT 7.0 +.TP +.B unmasked_score(word, context=None) +Returns the MLE score for a word given a context. +.sp +Args: +\- word is expected to be a string +\- context is expected to be something reasonably convertible to a tuple +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.NgramCounter(ngram_text=None) +Bases: \fBobject\fP +.sp +Class for counting ngrams. +.sp +Will count any ngram sequence you give it ;) +.sp +First we need to make sure we are feeding the counter sentences of ngrams. +.sp +.nf +.ft C +>>> text = [["a", "b", "c", "d"], ["a", "c", "d", "c"]] +>>> from nltk.util import ngrams +>>> text_bigrams = [ngrams(sent, 2) for sent in text] +>>> text_unigrams = [ngrams(sent, 1) for sent in text] +.ft P +.fi +.sp +The counting itself is very simple. +.sp +.nf +.ft C +>>> from nltk.lm import NgramCounter +>>> ngram_counts = NgramCounter(text_bigrams + text_unigrams) +.ft P +.fi +.sp +You can conveniently access ngram counts using standard python dictionary notation. +String keys will give you unigram counts. +.sp +.nf +.ft C +>>> ngram_counts[\(aqa\(aq] +2 +>>> ngram_counts[\(aqaliens\(aq] +0 +.ft P +.fi +.sp +If you want to access counts for higher order ngrams, use a list or a tuple. +These are treated as "context" keys, so what you get is a frequency distribution +over all continuations after the given context. +.sp +.nf +.ft C +>>> sorted(ngram_counts[[\(aqa\(aq]].items()) +[(\(aqb\(aq, 1), (\(aqc\(aq, 1)] +>>> sorted(ngram_counts[(\(aqa\(aq,)].items()) +[(\(aqb\(aq, 1), (\(aqc\(aq, 1)] +.ft P +.fi +.sp +This is equivalent to specifying explicitly the order of the ngram (in this case +2 for bigram) and indexing on the context. +>>> ngram_counts[2][(\(aqa\(aq,)] is ngram_counts[[\(aqa\(aq]] +True +.sp +Note that the keys in \fIConditionalFreqDist\fP cannot be lists, only tuples! +It is generally advisable to use the less verbose and more flexible square +bracket notation. +.sp +To get the count of the full ngram "a b", do this: +.sp +.nf +.ft C +>>> ngram_counts[[\(aqa\(aq]][\(aqb\(aq] +1 +.ft P +.fi +.sp +Specifying the ngram order as a number can be useful for accessing all ngrams +in that order. +.sp +.nf +.ft C +>>> ngram_counts[2] + +.ft P +.fi +.sp +The keys of this \fIConditionalFreqDist\fP are the contexts we discussed earlier. +Unigrams can also be accessed with a human\-friendly alias. +.sp +.nf +.ft C +>>> ngram_counts.unigrams is ngram_counts[1] +True +.ft P +.fi +.sp +Similarly to \fIcollections.Counter\fP, you can update counts after initialization. +.sp +.nf +.ft C +>>> ngram_counts[\(aqe\(aq] +0 +>>> ngram_counts.update([ngrams(["d", "e", "f"], 1)]) +>>> ngram_counts[\(aqe\(aq] +1 +.ft P +.fi +.INDENT 7.0 +.TP +.B N() +Returns grand total number of ngrams stored. +.sp +This includes ngrams from all orders, so some duplication is expected. +:rtype: int +.sp +.nf +.ft C +>>> from nltk.lm import NgramCounter +>>> counts = NgramCounter([[("a", "b"), ("c",), ("d", "e")]]) +>>> counts.N() +3 +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B update(ngram_text) +Updates ngram counts from \fIngram_text\fP\&. +.sp +Expects \fIngram_text\fP to be a sequence of sentences (sequences). +Each sentence consists of ngrams as tuples of strings. +.INDENT 7.0 +.TP +.B Parameters +\fBngram_text\fP (\fIIterable\fP\fI(\fP\fIIterable\fP\fI(\fP\fItuple\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP\fI)\fP) \-\- Text containing sentences of ngrams. +.TP +.B Raises +\fBTypeError\fP \-\- if the ngrams are not tuples. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.StupidBackoff(alpha=0.4, *args, **kwargs) +Bases: \fI\%nltk.lm.api.LanguageModel\fP +.sp +Provides StupidBackoff scores. +.sp +In addition to initialization arguments from BaseNgramModel also requires +a parameter alpha with which we scale the lower order probabilities. +Note that this is not a true probability distribution as scores for ngrams +of the same order do not sum up to unity. +.INDENT 7.0 +.TP +.B unmasked_score(word, context=None) +Score a word given some optional context. +.sp +Concrete models are expected to provide an implementation. +Note that this method does not mask its arguments with the OOV label. +Use the \fIscore\fP method for that. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBword\fP (\fIstr\fP) \-\- Word for which we want the score +.IP \(bu 2 +\fBcontext\fP (\fItuple\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- Context the word is in. +.UNINDENT +.UNINDENT +.sp +If \fINone\fP, compute unigram score. +:param context: tuple(str) or None +:rtype: float +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.Vocabulary(counts=None, unk_cutoff=1, unk_label=\(aq\(aq) +Bases: \fBobject\fP +.sp +Stores language model vocabulary. +.sp +Satisfies two common language modeling requirements for a vocabulary: +\- When checking membership and calculating its size, filters items +.INDENT 7.0 +.INDENT 3.5 +by comparing their counts to a cutoff value. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.IP \(bu 2 +Adds a special "unknown" token which unseen words are mapped to. +.UNINDENT +.sp +.nf +.ft C +>>> words = [\(aqa\(aq, \(aqc\(aq, \(aq\-\(aq, \(aqd\(aq, \(aqc\(aq, \(aqa\(aq, \(aqb\(aq, \(aqr\(aq, \(aqa\(aq, \(aqc\(aq, \(aqd\(aq] +>>> from nltk.lm import Vocabulary +>>> vocab = Vocabulary(words, unk_cutoff=2) +.ft P +.fi +.sp +Tokens with counts greater than or equal to the cutoff value will +be considered part of the vocabulary. +.sp +.nf +.ft C +>>> vocab[\(aqc\(aq] +3 +>>> \(aqc\(aq in vocab +True +>>> vocab[\(aqd\(aq] +2 +>>> \(aqd\(aq in vocab +True +.ft P +.fi +.sp +Tokens with frequency counts less than the cutoff value will be considered not +part of the vocabulary even though their entries in the count dictionary are +preserved. +.sp +.nf +.ft C +>>> vocab[\(aqb\(aq] +1 +>>> \(aqb\(aq in vocab +False +>>> vocab[\(aqaliens\(aq] +0 +>>> \(aqaliens\(aq in vocab +False +.ft P +.fi +.sp +Keeping the count entries for seen words allows us to change the cutoff value +without having to recalculate the counts. +.sp +.nf +.ft C +>>> vocab2 = Vocabulary(vocab.counts, unk_cutoff=1) +>>> "b" in vocab2 +True +.ft P +.fi +.sp +The cutoff value influences not only membership checking but also the result of +getting the size of the vocabulary using the built\-in \fIlen\fP\&. +Note that while the number of keys in the vocabulary\(aqs counter stays the same, +the items in the vocabulary differ depending on the cutoff. +We use \fIsorted\fP to demonstrate because it keeps the order consistent. +.sp +.nf +.ft C +>>> sorted(vocab2.counts) +[\(aq\-\(aq, \(aqa\(aq, \(aqb\(aq, \(aqc\(aq, \(aqd\(aq, \(aqr\(aq] +>>> sorted(vocab2) +[\(aq\-\(aq, \(aq\(aq, \(aqa\(aq, \(aqb\(aq, \(aqc\(aq, \(aqd\(aq, \(aqr\(aq] +>>> sorted(vocab.counts) +[\(aq\-\(aq, \(aqa\(aq, \(aqb\(aq, \(aqc\(aq, \(aqd\(aq, \(aqr\(aq] +>>> sorted(vocab) +[\(aq\(aq, \(aqa\(aq, \(aqc\(aq, \(aqd\(aq] +.ft P +.fi +.sp +In addition to items it gets populated with, the vocabulary stores a special +token that stands in for so\-called "unknown" items. By default it\(aqs "". +.sp +.nf +.ft C +>>> "" in vocab +True +.ft P +.fi +.sp +We can look up words in a vocabulary using its \fIlookup\fP method. +"Unseen" words (with counts less than cutoff) are looked up as the unknown label. +If given one word (a string) as an input, this method will return a string. +.sp +.nf +.ft C +>>> vocab.lookup("a") +\(aqa\(aq +>>> vocab.lookup("aliens") +\(aq\(aq +.ft P +.fi +.sp +If given a sequence, it will return an tuple of the looked up words. +.sp +.nf +.ft C +>>> vocab.lookup(["p", \(aqa\(aq, \(aqr\(aq, \(aqd\(aq, \(aqb\(aq, \(aqc\(aq]) +(\(aq\(aq, \(aqa\(aq, \(aq\(aq, \(aqd\(aq, \(aq\(aq, \(aqc\(aq) +.ft P +.fi +.sp +It\(aqs possible to update the counts after the vocabulary has been created. +In general, the interface is the same as that of \fIcollections.Counter\fP\&. +.sp +.nf +.ft C +>>> vocab[\(aqb\(aq] +1 +>>> vocab.update(["b", "b", "c"]) +>>> vocab[\(aqb\(aq] +3 +.ft P +.fi +.INDENT 7.0 +.TP +.B property cutoff +Cutoff value. +.sp +Items with count below this value are not considered part of vocabulary. +.UNINDENT +.INDENT 7.0 +.TP +.B lookup(words) +Look up one or more words in the vocabulary. +.sp +If passed one word as a string will return that word or \fIself.unk_label\fP\&. +Otherwise will assume it was passed a sequence of words, will try to look +each of them up and return an iterator over the looked up words. +.INDENT 7.0 +.TP +.B Parameters +\fBwords\fP (\fIIterable\fP\fI(\fP\fIstr\fP\fI) or \fP\fIstr\fP) \-\- Word(s) to look up. +.TP +.B Return type +generator(str) or str +.TP +.B Raises +TypeError for types other than strings or iterables +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.lm import Vocabulary +>>> vocab = Vocabulary(["a", "b", "c", "a", "b"], unk_cutoff=2) +>>> vocab.lookup("a") +\(aqa\(aq +>>> vocab.lookup("aliens") +\(aq\(aq +>>> vocab.lookup(["a", "b", "c", ["x", "b"]]) +(\(aqa\(aq, \(aqb\(aq, \(aq\(aq, (\(aq\(aq, \(aqb\(aq)) +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B update(*counter_args, **counter_kwargs) +Update vocabulary counts. +.sp +Wraps \fIcollections.Counter.update\fP method. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.lm.WittenBellInterpolated(order, **kwargs) +Bases: \fI\%nltk.lm.models.InterpolatedLanguageModel\fP +.sp +Interpolated version of Witten\-Bell smoothing. +.UNINDENT +.SS nltk.metrics package +.SS Submodules +.SS nltk.metrics.agreement module +.sp +Implementations of inter\-annotator agreement coefficients surveyed by Artstein +and Poesio (2007), Inter\-Coder Agreement for Computational Linguistics. +.sp +An agreement coefficient calculates the amount that annotators agreed on label +assignments beyond what is expected by chance. +.sp +In defining the AnnotationTask class, we use naming conventions similar to the +paper\(aqs terminology. There are three types of objects in an annotation task: +.INDENT 0.0 +.INDENT 3.5 +the coders (variables "c" and "C") +the items to be annotated (variables "i" and "I") +the potential categories to be assigned (variables "k" and "K") +.UNINDENT +.UNINDENT +.sp +Additionally, it is often the case that we don\(aqt want to treat two different +labels as complete disagreement, and so the AnnotationTask constructor can also +take a distance metric as a final argument. Distance metrics are simply +functions that take two arguments, and return a value between 0.0 and 1.0 +indicating the distance between them. If not supplied, the default is binary +comparison between the arguments. +.sp +The simplest way to initialize an AnnotationTask is with a list of triples, +each containing a coder\(aqs assignment for one object in the task: +.INDENT 0.0 +.INDENT 3.5 +task = AnnotationTask(data=[(\(aqc1\(aq, \(aq1\(aq, \(aqv1\(aq),(\(aqc2\(aq, \(aq1\(aq, \(aqv1\(aq),...]) +.UNINDENT +.UNINDENT +.sp +Note that the data list needs to contain the same number of triples for each +individual coder, containing category values for the same set of items. +.sp +Alpha (Krippendorff 1980) +Kappa (Cohen 1960) +S (Bennet, Albert and Goldstein 1954) +Pi (Scott 1955) +.sp +TODO: Describe handling of multiple coders and missing data +.sp +Expected results from the Artstein and Poesio survey paper: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +>>> from nltk.metrics.agreement import AnnotationTask +>>> import os.path +>>> t = AnnotationTask(data=[x.split() for x in open(os.path.join(os.path.dirname(__file__), "artstein_poesio_example.txt"))]) +>>> t.avg_Ao() +0.88 +>>> t.pi() +0.7995322418977615... +>>> t.S() +0.8199999999999998... +.ft P +.fi +.sp +This would have returned a wrong value (0.0) in @785fb79 as coders are in +the wrong order. Subsequently, all values for pi(), S(), and kappa() would +have been wrong as they are computed with avg_Ao(). +>>> t2 = AnnotationTask(data=[(\(aqb\(aq,\(aq1\(aq,\(aqstat\(aq),(\(aqa\(aq,\(aq1\(aq,\(aqstat\(aq)]) +>>> t2.avg_Ao() +1.0 +.sp +The following, of course, also works. +>>> t3 = AnnotationTask(data=[(\(aqa\(aq,\(aq1\(aq,\(aqothr\(aq),(\(aqb\(aq,\(aq1\(aq,\(aqothr\(aq)]) +>>> t3.avg_Ao() +1.0 +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.metrics.agreement.AnnotationTask(data=None, distance=) +Bases: \fBobject\fP +.sp +Represents an annotation task, i.e. people assign labels to items. +.sp +Notation tries to match notation in Artstein and Poesio (2007). +.sp +In general, coders and items can be represented as any hashable object. +Integers, for example, are fine, though strings are more readable. +Labels must support the distance functions applied to them, so e.g. +a string\-edit\-distance makes no sense if your labels are integers, +whereas interval distance needs numeric values. A notable case of this +is the MASI metric, which requires Python sets. +.INDENT 7.0 +.TP +.B Ae_kappa(cA, cB) +.UNINDENT +.INDENT 7.0 +.TP +.B Ao(cA, cB) +Observed agreement between two coders on all items. +.UNINDENT +.INDENT 7.0 +.TP +.B Disagreement(label_freqs) +.UNINDENT +.INDENT 7.0 +.TP +.B Do_Kw(max_distance=1.0) +Averaged over all labelers +.UNINDENT +.INDENT 7.0 +.TP +.B Do_Kw_pairwise(cA, cB, max_distance=1.0) +The observed disagreement for the weighted kappa coefficient. +.UNINDENT +.INDENT 7.0 +.TP +.B N(**kwargs) +Implements the "n\-notation" used in Artstein and Poesio (2007) +.sp +@deprecated: Use Nk, Nik or Nck instead +.UNINDENT +.INDENT 7.0 +.TP +.B Nck(c, k) +.UNINDENT +.INDENT 7.0 +.TP +.B Nik(i, k) +.UNINDENT +.INDENT 7.0 +.TP +.B Nk(k) +.UNINDENT +.INDENT 7.0 +.TP +.B S() +Bennett, Albert and Goldstein 1954 +.UNINDENT +.INDENT 7.0 +.TP +.B agr(cA, cB, i, data=None) +Agreement between two coders on a given item +.UNINDENT +.INDENT 7.0 +.TP +.B alpha() +Krippendorff 1980 +.UNINDENT +.INDENT 7.0 +.TP +.B avg_Ao() +Average observed agreement across all coders and items. +.UNINDENT +.INDENT 7.0 +.TP +.B kappa() +Cohen 1960 +Averages naively over kappas for each coder pair. +.UNINDENT +.INDENT 7.0 +.TP +.B kappa_pairwise(cA, cB) +.UNINDENT +.INDENT 7.0 +.TP +.B load_array(array) +Load an sequence of annotation results, appending to any data already loaded. +.INDENT 7.0 +.TP +.B The argument is a sequence of 3\-tuples, each representing a coder\(aqs labeling of an item: +(coder,item,label) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B multi_kappa() +Davies and Fleiss 1982 +Averages over observed and expected agreements for each coder pair. +.UNINDENT +.INDENT 7.0 +.TP +.B pi() +Scott 1955; here, multi\-pi. +Equivalent to K from Siegel and Castellan (1988). +.UNINDENT +.INDENT 7.0 +.TP +.B weighted_kappa(max_distance=1.0) +Cohen 1968 +.UNINDENT +.INDENT 7.0 +.TP +.B weighted_kappa_pairwise(cA, cB, max_distance=1.0) +Cohen 1968 +.UNINDENT +.UNINDENT +.SS nltk.metrics.aline module +.sp +ALINE +\fI\%http://webdocs.cs.ualberta.ca/~kondrak/\fP +Copyright 2002 by Grzegorz Kondrak. +.sp +ALINE is an algorithm for aligning phonetic sequences, described in [1]. +This module is a port of Kondrak\(aqs (2002) ALINE. It provides functions for +phonetic sequence alignment and similarity analysis. These are useful in +historical linguistics, sociolinguistics and synchronic phonology. +.sp +ALINE has parameters that can be tuned for desired output. These parameters are: +\- C_skip, C_sub, C_exp, C_vwl +\- Salience weights +\- Segmental features +.sp +In this implementation, some parameters have been changed from their default +values as described in [1], in order to replicate published results. All changes +are noted in comments. +.SS Example usage +.sp +# Get optimal alignment of two phonetic sequences +.sp +.nf +.ft C +>>> align(\(aqθin\(aq, \(aqtenwis\(aq) +[[(\(aqθ\(aq, \(aqt\(aq), (\(aqi\(aq, \(aqe\(aq), (\(aqn\(aq, \(aqn\(aq), (\(aq\-\(aq, \(aqw\(aq), (\(aq\-\(aq, \(aqi\(aq), (\(aq\-\(aq, \(aqs\(aq)]] +.ft P +.fi +.sp +[1] G. Kondrak. Algorithms for Language Reconstruction. PhD dissertation, +University of Toronto. +.INDENT 0.0 +.TP +.B nltk.metrics.aline.R(p, q) +Return relevant features for segment comparison. +.sp +(Kondrak 2002: 54) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.aline.V(p) +Return vowel weight if P is vowel. +.sp +(Kondrak 2002: 54) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.aline.align(str1, str2, epsilon=0) +Compute the alignment of two phonetic strings. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBstr2\fP (\fIstr1\fP\fI,\fP) \-\- Two strings to be aligned +.IP \(bu 2 +\fBepsilon\fP (\fIfloat\fP\fI (\fP\fI0.0 to 1.0\fP\fI)\fP) \-\- Adjusts threshold similarity score for near\-optimal alignments +.UNINDENT +.TP +.B Return type +list(list(tuple(str, str))) +.TP +.B Returns +Alignment(s) of str1 and str2 +.UNINDENT +.sp +(Kondrak 2002: 51) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.aline.delta(p, q) +Return weighted sum of difference between P and Q. +.sp +(Kondrak 2002: 54) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.aline.demo() +A demonstration of the result of aligning phonetic sequences +used in Kondrak\(aqs (2002) dissertation. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.aline.diff(p, q, f) +Returns difference between phonetic segments P and Q for feature F. +.sp +(Kondrak 2002: 52, 54) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.aline.sigma_exp(p, q) +Returns score of an expansion/compression. +.sp +(Kondrak 2002: 54) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.aline.sigma_skip(p) +Returns score of an indel of P. +.sp +(Kondrak 2002: 54) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.aline.sigma_sub(p, q) +Returns score of a substitution of P with Q. +.sp +(Kondrak 2002: 54) +.UNINDENT +.SS nltk.metrics.association module +.sp +Provides scoring functions for a number of association measures through a +generic, abstract implementation in \fBNgramAssocMeasures\fP, and n\-specific +\fBBigramAssocMeasures\fP and \fBTrigramAssocMeasures\fP\&. +.INDENT 0.0 +.TP +.B class nltk.metrics.association.BigramAssocMeasures +Bases: \fI\%nltk.metrics.association.NgramAssocMeasures\fP +.sp +A collection of bigram association measures. Each association measure +is provided as a function with three arguments: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +bigram_score_fn(n_ii, (n_ix, n_xi), n_xx) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The arguments constitute the marginals of a contingency table, counting +the occurrences of particular events in a corpus. The letter i in the +suffix refers to the appearance of the word in question, while x indicates +the appearance of any word. Thus, for example: +.INDENT 7.0 +.INDENT 3.5 +n_ii counts (w1, w2), i.e. the bigram being scored +n_ix counts (w1, \fI) +n_xi counts (\fP, w2) +n_xx counts ( +.nf +* +.fi +, +.nf +* +.fi +), i.e. any bigram +.UNINDENT +.UNINDENT +.sp +This may be shown with respect to a contingency table: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C + w1 ~w1 + \-\-\-\-\-\- \-\-\-\-\-\- + w2 | n_ii | n_oi | = n_xi + \-\-\-\-\-\- \-\-\-\-\-\- +~w2 | n_io | n_oo | + \-\-\-\-\-\- \-\-\-\-\-\- + = n_ix TOTAL = n_xx +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod chi_sq(n_ii, n_ix_xi_tuple, n_xx) +Scores bigrams using chi\-square, i.e. phi\-sq multiplied by the number +of bigrams, as in Manning and Schutze 5.3.3. +.UNINDENT +.INDENT 7.0 +.TP +.B static dice(n_ii, n_ix_xi_tuple, n_xx) +Scores bigrams using Dice\(aqs coefficient. +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod fisher(*marginals) +Scores bigrams using Fisher\(aqs Exact Test (Pedersen 1996). Less +sensitive to small counts than PMI or Chi Sq, but also more expensive +to compute. Requires scipy. +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod phi_sq(*marginals) +Scores bigrams using phi\-square, the square of the Pearson correlation +coefficient. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.metrics.association.ContingencyMeasures(measures) +Bases: \fBobject\fP +.sp +Wraps NgramAssocMeasures classes such that the arguments of association +measures are contingency table values rather than marginals. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.association.NGRAM = 0 +Marginals index for the ngram count +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.metrics.association.NgramAssocMeasures +Bases: \fBobject\fP +.sp +An abstract class defining a collection of generic association measures. +Each public method returns a score, taking the following arguments: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +score_fn(count_of_ngram, + (count_of_n\-1gram_1, ..., count_of_n\-1gram_j), + (count_of_n\-2gram_1, ..., count_of_n\-2gram_k), + ..., + (count_of_1gram_1, ..., count_of_1gram_n), + count_of_total_words) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +See \fBBigramAssocMeasures\fP and \fBTrigramAssocMeasures\fP +.sp +Inheriting classes should define a property _n, and a method _contingency +which calculates contingency values from marginals in order for all +association measures defined here to be usable. +.INDENT 7.0 +.TP +.B classmethod chi_sq(*marginals) +Scores ngrams using Pearson\(aqs chi\-square as in Manning and Schutze +5.3.3. +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod jaccard(*marginals) +Scores ngrams using the Jaccard index. +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod likelihood_ratio(*marginals) +Scores ngrams using likelihood ratios as in Manning and Schutze 5.3.4. +.UNINDENT +.INDENT 7.0 +.TP +.B static mi_like(*marginals, **kwargs) +Scores ngrams using a variant of mutual information. The keyword +argument power sets an exponent (default 3) for the numerator. No +logarithm of the result is calculated. +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod pmi(*marginals) +Scores ngrams by pointwise mutual information, as in Manning and +Schutze 5.4. +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod poisson_stirling(*marginals) +Scores ngrams using the Poisson\-Stirling measure. +.UNINDENT +.INDENT 7.0 +.TP +.B static raw_freq(*marginals) +Scores ngrams by their frequency +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod student_t(*marginals) +Scores ngrams using Student\(aqs t test with independence hypothesis +for unigrams, as in Manning and Schutze 5.3.1. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.metrics.association.QuadgramAssocMeasures +Bases: \fI\%nltk.metrics.association.NgramAssocMeasures\fP +.sp +A collection of quadgram association measures. Each association measure +is provided as a function with five arguments: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +trigram_score_fn(n_iiii, + (n_iiix, n_iixi, n_ixii, n_xiii), + (n_iixx, n_ixix, n_ixxi, n_xixi, n_xxii, n_xiix), + (n_ixxx, n_xixx, n_xxix, n_xxxi), + n_all) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The arguments constitute the marginals of a contingency table, counting +the occurrences of particular events in a corpus. The letter i in the +suffix refers to the appearance of the word in question, while x indicates +the appearance of any word. Thus, for example: +n_iiii counts (w1, w2, w3, w4), i.e. the quadgram being scored +n_ixxi counts (w1, \fI, *, w4) +n_xxxx counts (\fP, +.nf +* +.fi +, +.nf +* +.fi +, +.nf +* +.fi +), i.e. any quadgram +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.association.TOTAL = \-1 +Marginals index for the number of words in the data +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.metrics.association.TrigramAssocMeasures +Bases: \fI\%nltk.metrics.association.NgramAssocMeasures\fP +.sp +A collection of trigram association measures. Each association measure +is provided as a function with four arguments: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +trigram_score_fn(n_iii, + (n_iix, n_ixi, n_xii), + (n_ixx, n_xix, n_xxi), + n_xxx) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The arguments constitute the marginals of a contingency table, counting +the occurrences of particular events in a corpus. The letter i in the +suffix refers to the appearance of the word in question, while x indicates +the appearance of any word. Thus, for example: +n_iii counts (w1, w2, w3), i.e. the trigram being scored +n_ixx counts (w1, \fI, *) +n_xxx counts (\fP, +.nf +* +.fi +, +.nf +* +.fi +), i.e. any trigram +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.association.UNIGRAMS = \-2 +Marginals index for a tuple of each unigram count +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.association.fisher_exact(*_args, **_kwargs) +.UNINDENT +.SS nltk.metrics.confusionmatrix module +.INDENT 0.0 +.TP +.B class nltk.metrics.confusionmatrix.ConfusionMatrix(reference, test, sort_by_count=False) +Bases: \fBobject\fP +.sp +The confusion matrix between a list of reference values and a +corresponding list of test values. Entry \fI[r,t]\fP of this +matrix is a count of the number of times that the reference value +\fIr\fP corresponds to the test value \fIt\fP\&. E.g.: +.sp +.nf +.ft C +>>> from nltk.metrics import ConfusionMatrix +>>> ref = \(aqDET NN VB DET JJ NN NN IN DET NN\(aq.split() +>>> test = \(aqDET VB VB DET NN NN NN IN DET NN\(aq.split() +>>> cm = ConfusionMatrix(ref, test) +>>> print(cm[\(aqNN\(aq, \(aqNN\(aq]) +3 +.ft P +.fi +.sp +Note that the diagonal entries \fIRi=Tj\fP of this matrix +corresponds to correct values; and the off\-diagonal entries +correspond to incorrect values. +.INDENT 7.0 +.TP +.B key() +.UNINDENT +.INDENT 7.0 +.TP +.B pretty_format(show_percents=False, values_in_chart=True, truncate=None, sort_by_count=False) +.INDENT 7.0 +.TP +.B Returns +A multi\-line string representation of this confusion matrix. +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtruncate\fP (\fIint\fP) \-\- If specified, then only show the specified +number of values. Any sorting (e.g., sort_by_count) +will be performed before truncation. +.IP \(bu 2 +\fBsort_by_count\fP \-\- If true, then sort by the count of each +label in the reference data. I.e., labels that occur more +frequently in the reference label will be towards the left +edge of the matrix, and labels that occur less frequently +will be towards the right edge. +.UNINDENT +.UNINDENT +.sp +@todo: add marginals? +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.confusionmatrix.demo() +.UNINDENT +.SS nltk.metrics.distance module +.sp +Distance Metrics. +.sp +Compute the distance between two items (usually strings). +As metrics, they must satisfy the following three requirements: +.INDENT 0.0 +.IP 1. 3 +d(a, a) = 0 +.IP 2. 3 +d(a, b) >= 0 +.IP 3. 3 +d(a, c) <= d(a, b) + d(b, c) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.distance.binary_distance(label1, label2) +Simple equality test. +.sp +0.0 if the labels are identical, 1.0 if they are different. +.sp +.nf +.ft C +>>> from nltk.metrics import binary_distance +>>> binary_distance(1,1) +0.0 +.ft P +.fi +.sp +.nf +.ft C +>>> binary_distance(1,3) +1.0 +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.distance.custom_distance(file) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.distance.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.distance.edit_distance(s1, s2, substitution_cost=1, transpositions=False) +Calculate the Levenshtein edit\-distance between two strings. +The edit distance is the number of characters that need to be +substituted, inserted, or deleted, to transform s1 into s2. For +example, transforming "rain" to "shine" requires three steps, +consisting of two substitutions and one insertion: +"rain" \-> "sain" \-> "shin" \-> "shine". These operations could have +been done in other orders, but at least three steps are needed. +.sp +Allows specifying the cost of substitution edits (e.g., "a" \-> "b"), +because sometimes it makes sense to assign greater penalties to +substitutions. +.sp +This also optionally allows transposition edits (e.g., "ab" \-> "ba"), +though this is disabled by default. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBs2\fP (\fIstr\fP) \-\- The strings to be analysed +.IP \(bu 2 +\fBtranspositions\fP (\fIbool\fP) \-\- Whether to allow transposition edits +.UNINDENT +.UNINDENT +.sp +:rtype int +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.distance.edit_distance_align(s1, s2, substitution_cost=1) +Calculate the minimum Levenshtein edit\-distance based alignment +mapping between two strings. The alignment finds the mapping +from string s1 to s2 that minimizes the edit distance cost. +For example, mapping "rain" to "shine" would involve 2 +substitutions, 2 matches and an insertion resulting in +the following mapping: +[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (4, 5)] +NB: (0, 0) is the start state without any letters associated +See more: \fI\%https://web.stanford.edu/class/cs124/lec/med.pdf\fP +.sp +In case of multiple valid minimum\-distance alignments, the +backtrace has the following operation precedence: +1. Skip s1 character +2. Skip s2 character +3. Substitute s1 and s2 characters +The backtrace is carried out in reverse string order. +.sp +This function does not support transposition. +.INDENT 7.0 +.TP +.B Parameters +\fBs2\fP (\fIstr\fP) \-\- The strings to be aligned +.UNINDENT +.sp +:rtype List[Tuple(int, int)] +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.distance.fractional_presence(label) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.distance.interval_distance(label1, label2) +Krippendorff\(aqs interval distance metric +.sp +.nf +.ft C +>>> from nltk.metrics import interval_distance +>>> interval_distance(1,10) +81 +.ft P +.fi +.sp +Krippendorff 1980, Content Analysis: An Introduction to its Methodology +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.distance.jaccard_distance(label1, label2) +Distance metric comparing set\-similarity. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.distance.jaro_similarity(s1, s2) +Computes the Jaro similarity between 2 sequences from: +.INDENT 7.0 +.INDENT 3.5 +Matthew A. Jaro (1989). Advances in record linkage methodology +as applied to the 1985 census of Tampa Florida. Journal of the +American Statistical Association. 84 (406): 414\-20. +.UNINDENT +.UNINDENT +.sp +The Jaro distance between is the min no. of single\-character transpositions +required to change one word into another. The Jaro similarity formula from +\fI\%https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance\fP : +.INDENT 7.0 +.INDENT 3.5 +jaro_sim = 0 if m = 0 else 1/3 * (m/ +.nf +|s_1| +.fi + + m/s_2 + (m\-t)/m) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B where: +.INDENT 7.0 +.IP \(bu 2 + +.nf +|s_i| +.fi + is the length of string s_i +.IP \(bu 2 +m is the no. of matching characters +.IP \(bu 2 +t is the half no. of possible transpositions. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.distance.jaro_winkler_similarity(s1, s2, p=0.1, max_l=4) +The Jaro Winkler distance is an extension of the Jaro similarity in: +.INDENT 7.0 +.INDENT 3.5 +William E. Winkler. 1990. String Comparator Metrics and Enhanced +Decision Rules in the Fellegi\-Sunter Model of Record Linkage. +Proceedings of the Section on Survey Research Methods. +American Statistical Association: 354\-359. +.UNINDENT +.UNINDENT +.sp +such that: +.INDENT 7.0 +.INDENT 3.5 +jaro_winkler_sim = jaro_sim + ( l * p * (1 \- jaro_sim) ) +.UNINDENT +.UNINDENT +.sp +where, +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +jaro_sim is the output from the Jaro Similarity, +.UNINDENT +.sp +see jaro_similarity() +\- l is the length of common prefix at the start of the string +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +this implementation provides an upperbound for the l value +to keep the prefixes.A common value of this upperbound is 4. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.IP \(bu 2 +p is the constant scaling factor to overweigh common prefixes. +The Jaro\-Winkler similarity will fall within the [0, 1] bound, +given that max(p)<=0.25 , default is p=0.1 in Winkler (1990) +.UNINDENT +.UNINDENT +.UNINDENT +.sp +Test using outputs from \fI\%https://www.census.gov/srd/papers/pdf/rr93\-8.pdf\fP +from "Table 5 Comparison of String Comparators Rescaled between 0 and 1" +.sp +.nf +.ft C +>>> winkler_examples = [("billy", "billy"), ("billy", "bill"), ("billy", "blily"), +\&... ("massie", "massey"), ("yvette", "yevett"), ("billy", "bolly"), ("dwayne", "duane"), +\&... ("dixon", "dickson"), ("billy", "susan")] +.ft P +.fi +.sp +.nf +.ft C +>>> winkler_scores = [1.000, 0.967, 0.947, 0.944, 0.911, 0.893, 0.858, 0.853, 0.000] +>>> jaro_scores = [1.000, 0.933, 0.933, 0.889, 0.889, 0.867, 0.822, 0.790, 0.000] +.ft P +.fi +.INDENT 7.0 +.INDENT 3.5 +# One way to match the values on the Winkler\(aqs paper is to provide a different +.UNINDENT +.UNINDENT +.sp +# p scaling factor for different pairs of strings, e.g. +>>> p_factors = [0.1, 0.125, 0.20, 0.125, 0.20, 0.20, 0.20, 0.15, 0.1] +.sp +.nf +.ft C +>>> for (s1, s2), jscore, wscore, p in zip(winkler_examples, jaro_scores, winkler_scores, p_factors): +\&... assert round(jaro_similarity(s1, s2), 3) == jscore +\&... assert round(jaro_winkler_similarity(s1, s2, p=p), 3) == wscore +.ft P +.fi +.sp +Test using outputs from \fI\%https://www.census.gov/srd/papers/pdf/rr94\-5.pdf\fP from +"Table 2.1. Comparison of String Comparators Using Last Names, First Names, and Street Names" +.sp +.nf +.ft C +>>> winkler_examples = [(\(aqSHACKLEFORD\(aq, \(aqSHACKELFORD\(aq), (\(aqDUNNINGHAM\(aq, \(aqCUNNIGHAM\(aq), +\&... (\(aqNICHLESON\(aq, \(aqNICHULSON\(aq), (\(aqJONES\(aq, \(aqJOHNSON\(aq), (\(aqMASSEY\(aq, \(aqMASSIE\(aq), +\&... (\(aqABROMS\(aq, \(aqABRAMS\(aq), (\(aqHARDIN\(aq, \(aqMARTINEZ\(aq), (\(aqITMAN\(aq, \(aqSMITH\(aq), +\&... (\(aqJERALDINE\(aq, \(aqGERALDINE\(aq), (\(aqMARHTA\(aq, \(aqMARTHA\(aq), (\(aqMICHELLE\(aq, \(aqMICHAEL\(aq), +\&... (\(aqJULIES\(aq, \(aqJULIUS\(aq), (\(aqTANYA\(aq, \(aqTONYA\(aq), (\(aqDWAYNE\(aq, \(aqDUANE\(aq), (\(aqSEAN\(aq, \(aqSUSAN\(aq), +\&... (\(aqJON\(aq, \(aqJOHN\(aq), (\(aqJON\(aq, \(aqJAN\(aq), (\(aqBROOKHAVEN\(aq, \(aqBRROKHAVEN\(aq), +\&... (\(aqBROOK HALLOW\(aq, \(aqBROOK HLLW\(aq), (\(aqDECATUR\(aq, \(aqDECATIR\(aq), (\(aqFITZRUREITER\(aq, \(aqFITZENREITER\(aq), +\&... (\(aqHIGBEE\(aq, \(aqHIGHEE\(aq), (\(aqHIGBEE\(aq, \(aqHIGVEE\(aq), (\(aqLACURA\(aq, \(aqLOCURA\(aq), (\(aqIOWA\(aq, \(aqIONA\(aq), (\(aq1ST\(aq, \(aqIST\(aq)] +.ft P +.fi +.sp +.nf +.ft C +>>> jaro_scores = [0.970, 0.896, 0.926, 0.790, 0.889, 0.889, 0.722, 0.467, 0.926, +\&... 0.944, 0.869, 0.889, 0.867, 0.822, 0.783, 0.917, 0.000, 0.933, 0.944, 0.905, +\&... 0.856, 0.889, 0.889, 0.889, 0.833, 0.000] +.ft P +.fi +.sp +.nf +.ft C +>>> winkler_scores = [0.982, 0.896, 0.956, 0.832, 0.944, 0.922, 0.722, 0.467, 0.926, +\&... 0.961, 0.921, 0.933, 0.880, 0.858, 0.805, 0.933, 0.000, 0.947, 0.967, 0.943, +\&... 0.913, 0.922, 0.922, 0.900, 0.867, 0.000] +.ft P +.fi +.INDENT 7.0 +.INDENT 3.5 +# One way to match the values on the Winkler\(aqs paper is to provide a different +.UNINDENT +.UNINDENT +.sp +# p scaling factor for different pairs of strings, e.g. +>>> p_factors = [0.1, 0.1, 0.1, 0.1, 0.125, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.20, +\&... 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] +.sp +.nf +.ft C +>>> for (s1, s2), jscore, wscore, p in zip(winkler_examples, jaro_scores, winkler_scores, p_factors): +\&... if (s1, s2) in [(\(aqJON\(aq, \(aqJAN\(aq), (\(aq1ST\(aq, \(aqIST\(aq)]: +\&... continue # Skip bad examples from the paper. +\&... assert round(jaro_similarity(s1, s2), 3) == jscore +\&... assert round(jaro_winkler_similarity(s1, s2, p=p), 3) == wscore +.ft P +.fi +.sp +This test\-case proves that the output of Jaro\-Winkler similarity depends on +the product l * p and not on the product max_l * p. Here the product max_l * p > 1 +however the product l * p <= 1 +.sp +.nf +.ft C +>>> round(jaro_winkler_similarity(\(aqTANYA\(aq, \(aqTONYA\(aq, p=0.1, max_l=100), 3) +0.88 +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.distance.masi_distance(label1, label2) +Distance metric that takes into account partial agreement when multiple +labels are assigned. +.sp +.nf +.ft C +>>> from nltk.metrics import masi_distance +>>> masi_distance(set([1, 2]), set([1, 2, 3, 4])) +0.665 +.ft P +.fi +.sp +Passonneau 2006, Measuring Agreement on Set\-Valued Items (MASI) +for Semantic and Pragmatic Annotation. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.distance.presence(label) +Higher\-order function to test presence of a given label +.UNINDENT +.SS nltk.metrics.paice module +.sp +Counts Paice\(aqs performance statistics for evaluating stemming algorithms. +.INDENT 0.0 +.TP +.B What is required: +.INDENT 7.0 +.IP \(bu 2 +A dictionary of words grouped by their real lemmas +.IP \(bu 2 +A dictionary of words grouped by stems from a stemming algorithm +.UNINDENT +.UNINDENT +.sp +When these are given, Understemming Index (UI), Overstemming Index (OI), +Stemming Weight (SW) and Error\-rate relative to truncation (ERRT) are counted. +.sp +References: +Chris D. Paice (1994). An evaluation method for stemming algorithms. +In Proceedings of SIGIR, 42\-\-50. +.INDENT 0.0 +.TP +.B class nltk.metrics.paice.Paice(lemmas, stems) +Bases: \fBobject\fP +.sp +Class for storing lemmas, stems and evaluation metrics. +.INDENT 7.0 +.TP +.B update() +Update statistics after lemmas and stems have been set. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.paice.demo() +Demonstration of the module. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.paice.get_words_from_dictionary(lemmas) +Get original set of words used for analysis. +.INDENT 7.0 +.TP +.B Parameters +\fBlemmas\fP \-\- A dictionary where keys are lemmas and values are sets +.UNINDENT +.sp +or lists of words corresponding to that lemma. +:type lemmas: dict(str): list(str) +:return: Set of words that exist as values in the dictionary +:rtype: set(str) +.UNINDENT +.SS nltk.metrics.scores module +.INDENT 0.0 +.TP +.B nltk.metrics.scores.accuracy(reference, test) +Given a list of reference values and a corresponding list of test +values, return the fraction of corresponding values that are +equal. In particular, return the fraction of indices +\fB0>> # Same examples as Kulyukin C++ implementation +>>> ghd(\(aq1100100000\(aq, \(aq1100010000\(aq, 1.0, 1.0, 0.5) +0.5 +>>> ghd(\(aq1100100000\(aq, \(aq1100000001\(aq, 1.0, 1.0, 0.5) +2.0 +>>> ghd(\(aq011\(aq, \(aq110\(aq, 1.0, 1.0, 0.5) +1.0 +>>> ghd(\(aq1\(aq, \(aq0\(aq, 1.0, 1.0, 0.5) +1.0 +>>> ghd(\(aq111\(aq, \(aq000\(aq, 1.0, 1.0, 0.5) +3.0 +>>> ghd(\(aq000\(aq, \(aq111\(aq, 1.0, 2.0, 0.5) +6.0 +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBref\fP (\fIstr\fP\fI or \fP\fIlist\fP) \-\- the reference segmentation +.IP \(bu 2 +\fBhyp\fP (\fIstr\fP\fI or \fP\fIlist\fP) \-\- the hypothetical segmentation +.IP \(bu 2 +\fBins_cost\fP (\fIfloat\fP) \-\- insertion cost +.IP \(bu 2 +\fBdel_cost\fP (\fIfloat\fP) \-\- deletion cost +.IP \(bu 2 +\fBshift_cost_coeff\fP \-\- constant used to compute the cost of a shift. +.UNINDENT +.UNINDENT +.sp +shift cost = shift_cost_coeff * +.nf +|i \- j| +.fi + where i and j are +the positions indicating the shift +:type shift_cost_coeff: float +:param boundary: boundary value +:type boundary: str or int or bool +:rtype: float +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.segmentation.pk(ref, hyp, k=None, boundary=\(aq1\(aq) +Compute the Pk metric for a pair of segmentations A segmentation +is any sequence over a vocabulary of two items (e.g. "0", "1"), +where the specified boundary value is used to mark the edge of a +segmentation. +.sp +.nf +.ft C +>>> \(aq%.2f\(aq % pk(\(aq0100\(aq*100, \(aq1\(aq*400, 2) +\(aq0.50\(aq +>>> \(aq%.2f\(aq % pk(\(aq0100\(aq*100, \(aq0\(aq*400, 2) +\(aq0.50\(aq +>>> \(aq%.2f\(aq % pk(\(aq0100\(aq*100, \(aq0100\(aq*100, 2) +\(aq0.00\(aq +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBref\fP (\fIstr\fP\fI or \fP\fIlist\fP) \-\- the reference segmentation +.IP \(bu 2 +\fBhyp\fP (\fIstr\fP\fI or \fP\fIlist\fP) \-\- the segmentation to evaluate +.IP \(bu 2 +\fBk\fP \-\- window size, if None, set to half of the average reference segment length +.IP \(bu 2 +\fBboundary\fP (\fIstr\fP\fI or \fP\fIint\fP\fI or \fP\fIbool\fP) \-\- boundary value +.UNINDENT +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.segmentation.windowdiff(seg1, seg2, k, boundary=\(aq1\(aq, weighted=False) +Compute the windowdiff score for a pair of segmentations. A +segmentation is any sequence over a vocabulary of two items +(e.g. "0", "1"), where the specified boundary value is used to +mark the edge of a segmentation. +.sp +.nf +.ft C +>>> s1 = "000100000010" +>>> s2 = "000010000100" +>>> s3 = "100000010000" +>>> \(aq%.2f\(aq % windowdiff(s1, s1, 3) +\(aq0.00\(aq +>>> \(aq%.2f\(aq % windowdiff(s1, s2, 3) +\(aq0.30\(aq +>>> \(aq%.2f\(aq % windowdiff(s2, s3, 3) +\(aq0.80\(aq +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBseg1\fP (\fIstr\fP\fI or \fP\fIlist\fP) \-\- a segmentation +.IP \(bu 2 +\fBseg2\fP (\fIstr\fP\fI or \fP\fIlist\fP) \-\- a segmentation +.IP \(bu 2 +\fBk\fP (\fIint\fP) \-\- window width +.IP \(bu 2 +\fBboundary\fP (\fIstr\fP\fI or \fP\fIint\fP\fI or \fP\fIbool\fP) \-\- boundary value +.IP \(bu 2 +\fBweighted\fP (\fIboolean\fP) \-\- use the weighted variant of windowdiff +.UNINDENT +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.SS nltk.metrics.spearman module +.sp +Tools for comparing ranked lists. +.INDENT 0.0 +.TP +.B nltk.metrics.spearman.ranks_from_scores(scores, rank_gap=1e\-15) +Given a sequence of (key, score) tuples, yields each key with an +increasing rank, tying with previous key\(aqs rank if the difference between +their scores is less than rank_gap. Suitable for use as an argument to +\fBspearman_correlation\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.spearman.ranks_from_sequence(seq) +Given a sequence, yields each element with an increasing rank, suitable +for use as an argument to \fBspearman_correlation\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.metrics.spearman.spearman_correlation(ranks1, ranks2) +Returns the Spearman correlation coefficient for two rankings, which +should be dicts or sequences of (key, rank). The coefficient ranges from +\-1.0 (ranks are opposite) to 1.0 (ranks are identical), and is only +calculated for keys in both rankings (for meaningful results, remove keys +present in only one list before ranking). +.UNINDENT +.SS Module contents +.sp +NLTK Metrics +.sp +Classes and methods for scoring processing modules. +.SS nltk.misc package +.SS Submodules +.SS nltk.misc.babelfish module +.sp +This module previously provided an interface to Babelfish online +translation service; this service is no longer available; this +module is kept in NLTK source code in order to provide better error +messages for people following the NLTK Book 2.0. +.INDENT 0.0 +.TP +.B nltk.misc.babelfish.babelize_shell() +.UNINDENT +.SS nltk.misc.chomsky module +.sp +CHOMSKY is an aid to writing linguistic papers in the style +of the great master. It is based on selected phrases taken +from actual books and articles written by Noam Chomsky. +Upon request, it assembles the phrases in the elegant +stylistic patterns that Chomsky is noted for. +To generate n sentences of linguistic wisdom, type +.INDENT 0.0 +.INDENT 3.5 +(CHOMSKY n) \-\- for example +(CHOMSKY 5) generates half a screen of linguistic truth. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.misc.chomsky.generate_chomsky(times=5, line_length=72) +.UNINDENT +.SS nltk.misc.minimalset module +.INDENT 0.0 +.TP +.B class nltk.misc.minimalset.MinimalSet(parameters=None) +Bases: \fBobject\fP +.sp +Find contexts where more than one possible target value can +appear. E.g. if targets are word\-initial letters, and contexts +are the remainders of words, then we would like to find cases like +"fat" vs "cat", and "training" vs "draining". If targets are +parts\-of\-speech and contexts are words, then we would like to find +cases like wind (noun) \(aqair in rapid motion\(aq, vs wind (verb) +\(aqcoil, wrap\(aq. +.INDENT 7.0 +.TP +.B add(context, target, display) +Add a new item to the minimal set, having the specified +context, target, and display form. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBcontext\fP (\fIstr\fP) \-\- The context in which the item of interest appears +.IP \(bu 2 +\fBtarget\fP (\fIstr\fP) \-\- The item of interest +.IP \(bu 2 +\fBdisplay\fP (\fIstr\fP) \-\- The information to be reported for each item +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B contexts(minimum=2) +Determine which contexts occurred with enough distinct targets. +.INDENT 7.0 +.TP +.B Parameters +\fBminimum\fP (\fIint\fP) \-\- the minimum number of distinct target forms +.UNINDENT +.sp +:rtype list +.UNINDENT +.INDENT 7.0 +.TP +.B display(context, target, default=\(aq\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B display_all(context) +.UNINDENT +.INDENT 7.0 +.TP +.B targets() +.UNINDENT +.UNINDENT +.SS nltk.misc.sort module +.sp +This module provides a variety of list sorting algorithms, to +illustrate the many different algorithms (recipes) for solving a +problem, and how to analyze algorithms experimentally. +.INDENT 0.0 +.TP +.B nltk.misc.sort.bubble(a) +Bubble Sort: compare adjacent elements of the list left\-to\-right, +and swap them if they are out of order. After one pass through +the list swapping adjacent items, the largest item will be in +the rightmost position. The remainder is one element smaller; +apply the same method to this list, and so on. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.misc.sort.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.misc.sort.merge(a) +Merge Sort: split the list in half, and sort each half, then +combine the sorted halves. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.misc.sort.quick(a) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.misc.sort.selection(a) +Selection Sort: scan the list to find its smallest element, then +swap it with the first element. The remainder of the list is one +element smaller; apply the same method to this list, and so on. +.UNINDENT +.SS nltk.misc.wordfinder module +.INDENT 0.0 +.TP +.B nltk.misc.wordfinder.check(word, dir, x, y, grid, rows, cols) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.misc.wordfinder.revword(word) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.misc.wordfinder.step(word, x, xf, y, yf, grid) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.misc.wordfinder.word_finder() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.misc.wordfinder.wordfinder(words, rows=20, cols=20, attempts=50, alph=\(aqABCDEFGHIJKLMNOPQRSTUVWXYZ\(aq) +Attempt to arrange words into a letter\-grid with the specified +number of rows and columns. Try each word in several positions +and directions, until it can be fitted into the grid, or the +maximum number of allowable attempts is exceeded. Returns a tuple +consisting of the grid and the words that were successfully +placed. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBwords\fP (\fIlist\fP) \-\- the list of words to be put into the grid +.IP \(bu 2 +\fBrows\fP (\fIint\fP) \-\- the number of rows in the grid +.IP \(bu 2 +\fBcols\fP (\fIint\fP) \-\- the number of columns in the grid +.IP \(bu 2 +\fBattempts\fP (\fIint\fP) \-\- the number of times to attempt placing a word +.IP \(bu 2 +\fBalph\fP (\fIlist\fP) \-\- the alphabet, to be used for filling blank cells +.UNINDENT +.TP +.B Return type +tuple +.UNINDENT +.UNINDENT +.SS Module contents +.SS nltk.parse package +.SS Submodules +.SS nltk.parse.api module +.INDENT 0.0 +.TP +.B class nltk.parse.api.ParserI +Bases: \fBobject\fP +.sp +A processing class for deriving trees that represent possible +structures for a sequence of tokens. These tree structures are +known as "parses". Typically, parsers are used to derive syntax +trees for sentences. But parsers can also be used to derive other +kinds of tree structure, such as morphological trees and discourse +structures. +.INDENT 7.0 +.TP +.B Subclasses must define: +.INDENT 7.0 +.IP \(bu 2 +at least one of: \fBparse()\fP, \fBparse_sents()\fP\&. +.UNINDENT +.TP +.B Subclasses may define: +.INDENT 7.0 +.IP \(bu 2 +\fBgrammar()\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B grammar() +.INDENT 7.0 +.TP +.B Returns +The grammar used by this parser. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse(sent, *args, **kwargs) +.INDENT 7.0 +.TP +.B Returns +An iterator that generates parse trees for the sentence. +.UNINDENT +.sp +When possible this list is sorted from most likely to least likely. +.INDENT 7.0 +.TP +.B Parameters +\fBsent\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The sentence to be parsed +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse_all(sent, *args, **kwargs) +.INDENT 7.0 +.TP +.B Return type +list(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse_one(sent, *args, **kwargs) +.INDENT 7.0 +.TP +.B Return type +Tree or None +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse_sents(sents, *args, **kwargs) +Apply \fBself.parse()\fP to each element of \fBsents\fP\&. +:rtype: iter(iter(Tree)) +.UNINDENT +.UNINDENT +.SS nltk.parse.bllip module +.INDENT 0.0 +.TP +.B class nltk.parse.bllip.BllipParser(parser_model=None, reranker_features=None, reranker_weights=None, parser_options=None, reranker_options=None) +Bases: \fI\%nltk.parse.api.ParserI\fP +.sp +Interface for parsing with BLLIP Parser. BllipParser objects can be +constructed with the \fBBllipParser.from_unified_model_dir\fP class +method or manually using the \fBBllipParser\fP constructor. +.INDENT 7.0 +.TP +.B classmethod from_unified_model_dir(model_dir, parser_options=None, reranker_options=None) +Create a \fBBllipParser\fP object from a unified parsing model +directory. Unified parsing model directories are a standardized +way of storing BLLIP parser and reranker models together on disk. +See \fBbllipparser.RerankingParser.get_unified_model_parameters()\fP +for more information about unified model directories. +.INDENT 7.0 +.TP +.B Returns +A \fBBllipParser\fP object using the parser and reranker +.UNINDENT +.sp +models in the model directory. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBmodel_dir\fP (\fIstr\fP) \-\- Path to the unified model directory. +.IP \(bu 2 +\fBparser_options\fP \-\- optional dictionary of parser options, see +.UNINDENT +.UNINDENT +.sp +\fBbllipparser.RerankingParser.RerankingParser.load_parser_options()\fP +for more information. +:type parser_options: dict(str) +:param reranker_options: optional dictionary of reranker options, see +\fBbllipparser.RerankingParser.RerankingParser.load_reranker_model()\fP +for more information. +:type reranker_options: dict(str) +:rtype: BllipParser +.UNINDENT +.INDENT 7.0 +.TP +.B parse(sentence) +Use BLLIP Parser to parse a sentence. Takes a sentence as a list +of words; it will be automatically tagged with this BLLIP Parser +instance\(aqs tagger. +.INDENT 7.0 +.TP +.B Returns +An iterator that generates parse trees for the sentence +.UNINDENT +.sp +from most likely to least likely. +.INDENT 7.0 +.TP +.B Parameters +\fBsentence\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The sentence to be parsed +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_parse(word_and_tag_pairs) +Use BLLIP to parse a sentence. Takes a sentence as a list of +(word, tag) tuples; the sentence must have already been tokenized +and tagged. BLLIP will attempt to use the tags provided but may +use others if it can\(aqt come up with a complete parse subject +to those constraints. You may also specify a tag as \fBNone\fP +to leave a token\(aqs tag unconstrained. +.INDENT 7.0 +.TP +.B Returns +An iterator that generates parse trees for the sentence +.UNINDENT +.sp +from most likely to least likely. +.INDENT 7.0 +.TP +.B Parameters +\fBsentence\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIstr\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP) \-\- Input sentence to parse as (word, tag) pairs +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.parse.chart module +.sp +Data classes and parser implementations for "chart parsers", which +use dynamic programming to efficiently parse a text. A chart +parser derives parse trees for a text by iteratively adding "edges" +to a "chart." Each edge represents a hypothesis about the tree +structure for a subsequence of the text. The chart is a +"blackboard" for composing and combining these hypotheses. +.sp +When a chart parser begins parsing a text, it creates a new (empty) +chart, spanning the text. It then incrementally adds new edges to the +chart. A set of "chart rules" specifies the conditions under which +new edges should be added to the chart. Once the chart reaches a +stage where none of the chart rules adds any new edges, parsing is +complete. +.sp +Charts are encoded with the \fBChart\fP class, and edges are encoded with +the \fBTreeEdge\fP and \fBLeafEdge\fP classes. The chart parser module +defines three chart parsers: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fBChartParser\fP is a simple and flexible chart parser. Given a +set of chart rules, it will apply those rules to the chart until +no more edges are added. +.IP \(bu 2 +\fBSteppingChartParser\fP is a subclass of \fBChartParser\fP that can +be used to step through the parsing process. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.AbstractChartRule +Bases: \fI\%nltk.parse.chart.ChartRuleI\fP +.sp +An abstract base class for chart rules. \fBAbstractChartRule\fP +provides: +.INDENT 7.0 +.IP \(bu 2 +A default implementation for \fBapply\fP\&. +.IP \(bu 2 +A default implementation for \fBapply_everywhere\fP, +(Currently, this implementation assumes that +.nf +\(ga\(ga +.fi +NUM_EDGES\(ga\(ga<=3.) +.IP \(bu 2 +A default implementation for \fB__str__\fP, which returns a +name based on the rule\(aqs class name. +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar, *edges) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B apply_everywhere(chart, grammar) +Return a generator that will add all edges licensed by +this rule, given the edges that are currently in the +chart, one at a time. Each time the generator is resumed, +it will either add a new edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.BottomUpChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.chart.ChartParser\fP +.sp +A \fBChartParser\fP using a bottom\-up parsing strategy. +See \fBChartParser\fP for more information. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.BottomUpLeftCornerChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.chart.ChartParser\fP +.sp +A \fBChartParser\fP using a bottom\-up left\-corner parsing strategy. +This strategy is often more efficient than standard bottom\-up. +See \fBChartParser\fP for more information. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.BottomUpPredictCombineRule +Bases: \fI\%nltk.parse.chart.BottomUpPredictRule\fP +.sp +A rule licensing any edge corresponding to a production whose +right\-hand side begins with a complete edge\(aqs left\-hand side. In +particular, this rule specifies that \fB[A \-> alpha \e*]\fP +licenses the edge \fB[B \-> A \e* beta]\fP for each grammar +production \fBB \-> A beta\fP\&. +.INDENT 7.0 +.TP +.B Note +This is like \fBBottomUpPredictRule\fP, but it also applies +the \fBFundamentalRule\fP to the resulting edge. +.UNINDENT +.INDENT 7.0 +.TP +.B NUM_EDGES = 1 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar, edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.BottomUpPredictRule +Bases: \fI\%nltk.parse.chart.AbstractChartRule\fP +.sp +A rule licensing any edge corresponding to a production whose +right\-hand side begins with a complete edge\(aqs left\-hand side. In +particular, this rule specifies that \fB[A \-> alpha \e*]\fP licenses +the edge \fB[B \-> \e* A beta]\fP for each grammar production \fBB \-> A beta\fP\&. +.INDENT 7.0 +.TP +.B NUM_EDGES = 1 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar, edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.CachedTopDownPredictRule +Bases: \fI\%nltk.parse.chart.TopDownPredictRule\fP +.sp +A cached version of \fBTopDownPredictRule\fP\&. After the first time +this rule is applied to an edge with a given \fBend\fP and \fBnext\fP, +it will not generate any more edges for edges with that \fBend\fP and +\fBnext\fP\&. +.sp +If \fBchart\fP or \fBgrammar\fP are changed, then the cache is flushed. +.INDENT 7.0 +.TP +.B apply(chart, grammar, edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.Chart(tokens) +Bases: \fBobject\fP +.sp +A blackboard for hypotheses about the syntactic constituents of a +sentence. A chart contains a set of edges, and each edge encodes +a single hypothesis about the structure of some portion of the +sentence. +.sp +The \fBselect\fP method can be used to select a specific collection +of edges. For example \fBchart.select(is_complete=True, start=0)\fP +yields all complete edges whose start indices are 0. To ensure +the efficiency of these selection operations, \fBChart\fP dynamically +creates and maintains an index for each set of attributes that +have been selected on. +.sp +In order to reconstruct the trees that are represented by an edge, +the chart associates each edge with a set of child pointer lists. +A child pointer list is a list of the edges that license an +edge\(aqs right\-hand side. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB_tokens\fP \-\- The sentence that the chart covers. +.IP \(bu 2 +\fB_num_leaves\fP \-\- The number of tokens. +.IP \(bu 2 +\fB_edges\fP \-\- A list of the edges in the chart +.IP \(bu 2 +\fB_edge_to_cpls\fP \-\- A dictionary mapping each edge to a set +of child pointer lists that are associated with that edge. +.IP \(bu 2 +\fB_indexes\fP \-\- A dictionary mapping tuples of edge attributes +to indices, where each index maps the corresponding edge +attribute values to lists of edges. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B child_pointer_lists(edge) +Return the set of child pointer lists for the given edge. +Each child pointer list is a list of edges that have +been used to form this edge. +.INDENT 7.0 +.TP +.B Return type +list(list(EdgeI)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B dot_digraph() +.UNINDENT +.INDENT 7.0 +.TP +.B edges() +Return a list of all edges in this chart. New edges +that are added to the chart after the call to edges() +will \fInot\fP be contained in this list. +.INDENT 7.0 +.TP +.B Return type +list(EdgeI) +.TP +.B See +\fBiteredges\fP, \fBselect\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B initialize() +Clear the chart. +.UNINDENT +.INDENT 7.0 +.TP +.B insert(edge, *child_pointer_lists) +Add a new edge to the chart, and return True if this operation +modified the chart. In particular, return true iff the chart +did not already contain \fBedge\fP, or if it did not already associate +\fBchild_pointer_lists\fP with \fBedge\fP\&. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBedge\fP (\fIEdgeI\fP) \-\- The new edge +.IP \(bu 2 +\fBchild_pointer_lists\fP (\fIsequence of tuple\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A sequence of lists of the edges that +were used to form this edge. This list is used to reconstruct +the trees (or partial trees) that are associated with \fBedge\fP\&. +.UNINDENT +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B insert_with_backpointer(new_edge, previous_edge, child_edge) +Add a new edge to the chart, using a pointer to the previous edge. +.UNINDENT +.INDENT 7.0 +.TP +.B iteredges() +Return an iterator over the edges in this chart. It is +not guaranteed that new edges which are added to the +chart before the iterator is exhausted will also be generated. +.INDENT 7.0 +.TP +.B Return type +iter(EdgeI) +.TP +.B See +\fBedges\fP, \fBselect\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B leaf(index) +Return the leaf value of the word at the given index. +.INDENT 7.0 +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B leaves() +Return a list of the leaf values of each word in the +chart\(aqs sentence. +.INDENT 7.0 +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B num_edges() +Return the number of edges contained in this chart. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B num_leaves() +Return the number of words in this chart\(aqs sentence. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parses(root, tree_class=) +Return an iterator of the complete tree structures that span +the entire chart, and whose root node is \fBroot\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B pretty_format(width=None) +Return a pretty\-printed string representation of this chart. +.INDENT 7.0 +.TP +.B Parameters +\fBwidth\fP \-\- The number of characters allotted to each +index in the sentence. +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B pretty_format_edge(edge, width=None) +Return a pretty\-printed string representation of a given edge +in this chart. +.INDENT 7.0 +.TP +.B Return type +str +.TP +.B Parameters +\fBwidth\fP \-\- The number of characters allotted to each +index in the sentence. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B pretty_format_leaves(width=None) +Return a pretty\-printed string representation of this +chart\(aqs leaves. This string can be used as a header +for calls to \fBpretty_format_edge\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B select(**restrictions) +Return an iterator over the edges in this chart. Any +new edges that are added to the chart before the iterator +is exahusted will also be generated. \fBrestrictions\fP +can be used to restrict the set of edges that will be +generated. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBspan\fP \-\- Only generate edges \fBe\fP where \fBe.span()==span\fP +.IP \(bu 2 +\fBstart\fP \-\- Only generate edges \fBe\fP where \fBe.start()==start\fP +.IP \(bu 2 +\fBend\fP \-\- Only generate edges \fBe\fP where \fBe.end()==end\fP +.IP \(bu 2 +\fBlength\fP \-\- Only generate edges \fBe\fP where \fBe.length()==length\fP +.IP \(bu 2 +\fBlhs\fP \-\- Only generate edges \fBe\fP where \fBe.lhs()==lhs\fP +.IP \(bu 2 +\fBrhs\fP \-\- Only generate edges \fBe\fP where \fBe.rhs()==rhs\fP +.IP \(bu 2 +\fBnextsym\fP \-\- Only generate edges \fBe\fP where +\fBe.nextsym()==nextsym\fP +.IP \(bu 2 +\fBdot\fP \-\- Only generate edges \fBe\fP where \fBe.dot()==dot\fP +.IP \(bu 2 +\fBis_complete\fP \-\- Only generate edges \fBe\fP where +\fBe.is_complete()==is_complete\fP +.IP \(bu 2 +\fBis_incomplete\fP \-\- Only generate edges \fBe\fP where +\fBe.is_incomplete()==is_incomplete\fP +.UNINDENT +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B trees(edge, tree_class=, complete=False) +Return an iterator of the tree structures that are associated +with \fBedge\fP\&. +.sp +If \fBedge\fP is incomplete, then the unexpanded children will be +encoded as childless subtrees, whose node value is the +corresponding terminal or nonterminal. +.INDENT 7.0 +.TP +.B Return type +list(Tree) +.TP +.B Note +If two trees share a common subtree, then the same +Tree may be used to encode that subtree in +both trees. If you need to eliminate this subtree +sharing, then create a deep copy of each tree. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.ChartParser(grammar, strategy=[, , , ], trace=0, trace_chart_width=50, use_agenda=True, chart_class=) +Bases: \fI\%nltk.parse.api.ParserI\fP +.sp +A generic chart parser. A "strategy", or list of +\fBChartRuleI\fP instances, is used to decide what edges to add to +the chart. In particular, \fBChartParser\fP uses the following +algorithm to parse texts: +.nf +Until no new edges are added: +.in +2 +For each \fIrule\fP in \fIstrategy\fP: +.in +2 +Apply \fIrule\fP to any applicable edges in the chart. +.in -2 +.in -2 +Return any complete parses in the chart +.fi +.sp +.INDENT 7.0 +.TP +.B chart_parse(tokens, trace=None) +Return the final parse \fBChart\fP from which all possible +parse trees can be extracted. +.INDENT 7.0 +.TP +.B Parameters +\fBtokens\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The sentence to be parsed +.TP +.B Return type +Chart +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B grammar() +.INDENT 7.0 +.TP +.B Returns +The grammar used by this parser. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse(tokens, tree_class=) +.INDENT 7.0 +.TP +.B Returns +An iterator that generates parse trees for the sentence. +.UNINDENT +.sp +When possible this list is sorted from most likely to least likely. +.INDENT 7.0 +.TP +.B Parameters +\fBsent\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The sentence to be parsed +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.ChartRuleI +Bases: \fBobject\fP +.sp +A rule that specifies what new edges are licensed by any given set +of existing edges. Each chart rule expects a fixed number of +edges, as indicated by the class variable \fBNUM_EDGES\fP\&. In +particular: +.INDENT 7.0 +.IP \(bu 2 +A chart rule with \fBNUM_EDGES=0\fP specifies what new edges are +licensed, regardless of existing edges. +.IP \(bu 2 +A chart rule with \fBNUM_EDGES=1\fP specifies what new edges are +licensed by a single existing edge. +.IP \(bu 2 +A chart rule with \fBNUM_EDGES=2\fP specifies what new edges are +licensed by a pair of existing edges. +.UNINDENT +.INDENT 7.0 +.TP +.B Variables +\fBNUM_EDGES\fP \-\- The number of existing edges that this rule uses +to license new edges. Typically, this number ranges from zero +to two. +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar, *edges) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B apply_everywhere(chart, grammar) +Return a generator that will add all edges licensed by +this rule, given the edges that are currently in the +chart, one at a time. Each time the generator is resumed, +it will either add a new edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.EdgeI +Bases: \fBobject\fP +.sp +A hypothesis about the structure of part of a sentence. +Each edge records the fact that a structure is (partially) +consistent with the sentence. An edge contains: +.INDENT 7.0 +.IP \(bu 2 +A span, indicating what part of the sentence is +consistent with the hypothesized structure. +.IP \(bu 2 +A left\-hand side, specifying what kind of structure is +hypothesized. +.IP \(bu 2 +A right\-hand side, specifying the contents of the +hypothesized structure. +.IP \(bu 2 +A dot position, indicating how much of the hypothesized +structure is consistent with the sentence. +.UNINDENT +.sp +Every edge is either complete or incomplete: +.INDENT 7.0 +.IP \(bu 2 +An edge is complete if its structure is fully consistent +with the sentence. +.IP \(bu 2 +An edge is incomplete if its structure is partially +consistent with the sentence. For every incomplete edge, the +span specifies a possible prefix for the edge\(aqs structure. +.UNINDENT +.sp +There are two kinds of edge: +.INDENT 7.0 +.IP \(bu 2 +A \fBTreeEdge\fP records which trees have been found to +be (partially) consistent with the text. +.IP \(bu 2 +A \fBLeafEdge\fP records the tokens occurring in the text. +.UNINDENT +.sp +The \fBEdgeI\fP interface provides a common interface to both types +of edge, allowing chart parsers to treat them in a uniform manner. +.INDENT 7.0 +.TP +.B dot() +Return this edge\(aqs dot position, which indicates how much of +the hypothesized structure is consistent with the +sentence. In particular, \fBself.rhs[:dot]\fP is consistent +with \fBtokens[self.start():self.end()]\fP\&. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B end() +Return the end index of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_complete() +Return True if this edge\(aqs structure is fully consistent +with the text. +.INDENT 7.0 +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_incomplete() +Return True if this edge\(aqs structure is partially consistent +with the text. +.INDENT 7.0 +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B length() +Return the length of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lhs() +Return this edge\(aqs left\-hand side, which specifies what kind +of structure is hypothesized by this edge. +.INDENT 7.0 +.TP +.B See +\fBTreeEdge\fP and \fBLeafEdge\fP for a description of +the left\-hand side values for each edge type. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B nextsym() +Return the element of this edge\(aqs right\-hand side that +immediately follows its dot. +.INDENT 7.0 +.TP +.B Return type +Nonterminal or terminal or None +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B rhs() +Return this edge\(aqs right\-hand side, which specifies +the content of the structure hypothesized by this edge. +.INDENT 7.0 +.TP +.B See +\fBTreeEdge\fP and \fBLeafEdge\fP for a description of +the right\-hand side values for each edge type. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B span() +Return a tuple \fB(s, e)\fP, where \fBtokens[s:e]\fP is the +portion of the sentence that is consistent with this +edge\(aqs structure. +.INDENT 7.0 +.TP +.B Return type +tuple(int, int) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B start() +Return the start index of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.EmptyPredictRule +Bases: \fI\%nltk.parse.chart.AbstractChartRule\fP +.sp +A rule that inserts all empty productions as passive edges, +in every position in the chart. +.INDENT 7.0 +.TP +.B NUM_EDGES = 0 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.FilteredBottomUpPredictCombineRule +Bases: \fI\%nltk.parse.chart.BottomUpPredictCombineRule\fP +.INDENT 7.0 +.TP +.B apply(chart, grammar, edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.FilteredSingleEdgeFundamentalRule +Bases: \fI\%nltk.parse.chart.SingleEdgeFundamentalRule\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.FundamentalRule +Bases: \fI\%nltk.parse.chart.AbstractChartRule\fP +.sp +A rule that joins two adjacent edges to form a single combined +edge. In particular, this rule specifies that any pair of edges +.INDENT 7.0 +.IP \(bu 2 +\fB[A \-> alpha \e* B beta][i:j]\fP +.IP \(bu 2 +\fB[B \-> gamma \e*][j:k]\fP +.UNINDENT +.sp +licenses the edge: +.INDENT 7.0 +.IP \(bu 2 +\fB[A \-> alpha B * beta][i:j]\fP +.UNINDENT +.INDENT 7.0 +.TP +.B NUM_EDGES = 2 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar, left_edge, right_edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.LeafEdge(leaf, index) +Bases: \fI\%nltk.parse.chart.EdgeI\fP +.sp +An edge that records the fact that a leaf value is consistent with +a word in the sentence. A leaf edge consists of: +.INDENT 7.0 +.IP \(bu 2 +An index, indicating the position of the word. +.IP \(bu 2 +A leaf, specifying the word\(aqs content. +.UNINDENT +.sp +A leaf edge\(aqs left\-hand side is its leaf value, and its right hand +side is \fB()\fP\&. Its span is \fB[index, index+1]\fP, and its dot +position is \fB0\fP\&. +.INDENT 7.0 +.TP +.B dot() +Return this edge\(aqs dot position, which indicates how much of +the hypothesized structure is consistent with the +sentence. In particular, \fBself.rhs[:dot]\fP is consistent +with \fBtokens[self.start():self.end()]\fP\&. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B end() +Return the end index of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_complete() +Return True if this edge\(aqs structure is fully consistent +with the text. +.INDENT 7.0 +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_incomplete() +Return True if this edge\(aqs structure is partially consistent +with the text. +.INDENT 7.0 +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B length() +Return the length of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lhs() +Return this edge\(aqs left\-hand side, which specifies what kind +of structure is hypothesized by this edge. +.INDENT 7.0 +.TP +.B See +\fBTreeEdge\fP and \fBLeafEdge\fP for a description of +the left\-hand side values for each edge type. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B nextsym() +Return the element of this edge\(aqs right\-hand side that +immediately follows its dot. +.INDENT 7.0 +.TP +.B Return type +Nonterminal or terminal or None +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B rhs() +Return this edge\(aqs right\-hand side, which specifies +the content of the structure hypothesized by this edge. +.INDENT 7.0 +.TP +.B See +\fBTreeEdge\fP and \fBLeafEdge\fP for a description of +the right\-hand side values for each edge type. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B span() +Return a tuple \fB(s, e)\fP, where \fBtokens[s:e]\fP is the +portion of the sentence that is consistent with this +edge\(aqs structure. +.INDENT 7.0 +.TP +.B Return type +tuple(int, int) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B start() +Return the start index of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.LeafInitRule +Bases: \fI\%nltk.parse.chart.AbstractChartRule\fP +.INDENT 7.0 +.TP +.B NUM_EDGES = 0 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.LeftCornerChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.chart.ChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.SingleEdgeFundamentalRule +Bases: \fI\%nltk.parse.chart.FundamentalRule\fP +.sp +A rule that joins a given edge with adjacent edges in the chart, +to form combined edges. In particular, this rule specifies that +either of the edges: +.INDENT 7.0 +.IP \(bu 2 +\fB[A \-> alpha \e* B beta][i:j]\fP +.IP \(bu 2 +\fB[B \-> gamma \e*][j:k]\fP +.UNINDENT +.sp +licenses the edge: +.INDENT 7.0 +.IP \(bu 2 +\fB[A \-> alpha B * beta][i:j]\fP +.UNINDENT +.sp +if the other edge is already in the chart. +.INDENT 7.0 +.TP +.B Note +This is basically \fBFundamentalRule\fP, with one edge left +unspecified. +.UNINDENT +.INDENT 7.0 +.TP +.B NUM_EDGES = 1 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar, edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.SteppingChartParser(grammar, strategy=[], trace=0) +Bases: \fI\%nltk.parse.chart.ChartParser\fP +.sp +A \fBChartParser\fP that allows you to step through the parsing +process, adding a single edge at a time. It also allows you to +change the parser\(aqs strategy or grammar midway through parsing a +text. +.sp +The \fBinitialize\fP method is used to start parsing a text. \fBstep\fP +adds a single edge to the chart. \fBset_strategy\fP changes the +strategy used by the chart parser. \fBparses\fP returns the set of +parses that has been found by the chart parser. +.INDENT 7.0 +.TP +.B Variables +\fB_restart\fP \-\- Records whether the parser\(aqs strategy, grammar, +or chart has been changed. If so, then \fBstep\fP must restart +the parsing algorithm. +.UNINDENT +.INDENT 7.0 +.TP +.B chart() +Return the chart that is used by this parser. +.UNINDENT +.INDENT 7.0 +.TP +.B current_chartrule() +Return the chart rule used to generate the most recent edge. +.UNINDENT +.INDENT 7.0 +.TP +.B grammar() +Return the grammar used by this parser. +.UNINDENT +.INDENT 7.0 +.TP +.B initialize(tokens) +Begin parsing the given tokens. +.UNINDENT +.INDENT 7.0 +.TP +.B parse(tokens, tree_class=) +.INDENT 7.0 +.TP +.B Returns +An iterator that generates parse trees for the sentence. +.UNINDENT +.sp +When possible this list is sorted from most likely to least likely. +.INDENT 7.0 +.TP +.B Parameters +\fBsent\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The sentence to be parsed +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parses(tree_class=) +Return the parse trees currently contained in the chart. +.UNINDENT +.INDENT 7.0 +.TP +.B set_chart(chart) +Load a given chart into the chart parser. +.UNINDENT +.INDENT 7.0 +.TP +.B set_grammar(grammar) +Change the grammar used by the parser. +.UNINDENT +.INDENT 7.0 +.TP +.B set_strategy(strategy) +Change the strategy that the parser uses to decide which edges +to add to the chart. +.INDENT 7.0 +.TP +.B Parameters +\fBstrategy\fP (\fIlist\fP\fI(\fP\fIChartRuleI\fP\fI)\fP) \-\- A list of rules that should be used to decide +what edges to add to the chart. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B step() +Return a generator that adds edges to the chart, one at a +time. Each time the generator is resumed, it adds a single +edge and yields that edge. If no more edges can be added, +then it yields None. +.sp +If the parser\(aqs strategy, grammar, or chart is changed, then +the generator will continue adding edges using the new +strategy, grammar, or chart. +.sp +Note that this generator never terminates, since the grammar +or strategy might be changed to values that would add new +edges. Instead, it yields None when no more edges can be +added with the current strategy and grammar. +.UNINDENT +.INDENT 7.0 +.TP +.B strategy() +Return the strategy used by this parser. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.TopDownChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.chart.ChartParser\fP +.sp +A \fBChartParser\fP using a top\-down parsing strategy. +See \fBChartParser\fP for more information. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.TopDownInitRule +Bases: \fI\%nltk.parse.chart.AbstractChartRule\fP +.sp +A rule licensing edges corresponding to the grammar productions for +the grammar\(aqs start symbol. In particular, this rule specifies that +\fB[S \-> \e* alpha][0:i]\fP is licensed for each grammar production +\fBS \-> alpha\fP, where \fBS\fP is the grammar\(aqs start symbol. +.INDENT 7.0 +.TP +.B NUM_EDGES = 0 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.TopDownPredictRule +Bases: \fI\%nltk.parse.chart.AbstractChartRule\fP +.sp +A rule licensing edges corresponding to the grammar productions +for the nonterminal following an incomplete edge\(aqs dot. In +particular, this rule specifies that +\fB[A \-> alpha \e* B beta][i:j]\fP licenses the edge +\fB[B \-> \e* gamma][j:j]\fP for each grammar production \fBB \-> gamma\fP\&. +.INDENT 7.0 +.TP +.B Note +This rule corresponds to the Predictor Rule in Earley parsing. +.UNINDENT +.INDENT 7.0 +.TP +.B NUM_EDGES = 1 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar, edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.chart.TreeEdge(span, lhs, rhs, dot=0) +Bases: \fI\%nltk.parse.chart.EdgeI\fP +.sp +An edge that records the fact that a tree is (partially) +consistent with the sentence. A tree edge consists of: +.INDENT 7.0 +.IP \(bu 2 +A span, indicating what part of the sentence is +consistent with the hypothesized tree. +.IP \(bu 2 +A left\-hand side, specifying the hypothesized tree\(aqs node +value. +.IP \(bu 2 +A right\-hand side, specifying the hypothesized tree\(aqs +children. Each element of the right\-hand side is either a +terminal, specifying a token with that terminal as its leaf +value; or a nonterminal, specifying a subtree with that +nonterminal\(aqs symbol as its node value. +.IP \(bu 2 +A dot position, indicating which children are consistent +with part of the sentence. In particular, if \fBdot\fP is the +dot position, \fBrhs\fP is the right\-hand size, \fB(start,end)\fP +is the span, and \fBsentence\fP is the list of tokens in the +sentence, then \fBtokens[start:end]\fP can be spanned by the +children specified by \fBrhs[:dot]\fP\&. +.UNINDENT +.sp +For more information about edges, see the \fBEdgeI\fP interface. +.INDENT 7.0 +.TP +.B dot() +Return this edge\(aqs dot position, which indicates how much of +the hypothesized structure is consistent with the +sentence. In particular, \fBself.rhs[:dot]\fP is consistent +with \fBtokens[self.start():self.end()]\fP\&. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B end() +Return the end index of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B static from_production(production, index) +Return a new \fBTreeEdge\fP formed from the given production. +The new edge\(aqs left\-hand side and right\-hand side will +be taken from \fBproduction\fP; its span will be +\fB(index,index)\fP; and its dot position will be \fB0\fP\&. +.INDENT 7.0 +.TP +.B Return type +TreeEdge +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_complete() +Return True if this edge\(aqs structure is fully consistent +with the text. +.INDENT 7.0 +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_incomplete() +Return True if this edge\(aqs structure is partially consistent +with the text. +.INDENT 7.0 +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B length() +Return the length of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lhs() +Return this edge\(aqs left\-hand side, which specifies what kind +of structure is hypothesized by this edge. +.INDENT 7.0 +.TP +.B See +\fBTreeEdge\fP and \fBLeafEdge\fP for a description of +the left\-hand side values for each edge type. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B move_dot_forward(new_end) +Return a new \fBTreeEdge\fP formed from this edge. +The new edge\(aqs dot position is increased by \fB1\fP, +and its end index will be replaced by \fBnew_end\fP\&. +.INDENT 7.0 +.TP +.B Parameters +\fBnew_end\fP (\fIint\fP) \-\- The new end index. +.TP +.B Return type +TreeEdge +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B nextsym() +Return the element of this edge\(aqs right\-hand side that +immediately follows its dot. +.INDENT 7.0 +.TP +.B Return type +Nonterminal or terminal or None +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B rhs() +Return this edge\(aqs right\-hand side, which specifies +the content of the structure hypothesized by this edge. +.INDENT 7.0 +.TP +.B See +\fBTreeEdge\fP and \fBLeafEdge\fP for a description of +the right\-hand side values for each edge type. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B span() +Return a tuple \fB(s, e)\fP, where \fBtokens[s:e]\fP is the +portion of the sentence that is consistent with this +edge\(aqs structure. +.INDENT 7.0 +.TP +.B Return type +tuple(int, int) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B start() +Return the start index of this edge\(aqs span. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.chart.demo(choice=None, print_times=True, print_grammar=False, print_trees=True, trace=2, sent=\(aqI saw John with a dog with my cookie\(aq, numparses=5) +A demonstration of the chart parsers. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.chart.demo_grammar() +.UNINDENT +.SS nltk.parse.corenlp module +.INDENT 0.0 +.TP +.B class nltk.parse.corenlp.CoreNLPDependencyParser(url=\(aqhttp://localhost:9000\(aq, encoding=\(aqutf8\(aq, tagtype=None) +Bases: \fI\%nltk.parse.corenlp.GenericCoreNLPParser\fP +.sp +Dependency parser. +.sp +.nf +.ft C +>>> dep_parser = CoreNLPDependencyParser(url=\(aqhttp://localhost:9000\(aq) +.ft P +.fi +.sp +.nf +.ft C +>>> parse, = dep_parser.raw_parse( +\&... \(aqThe quick brown fox jumps over the lazy dog.\(aq +\&... ) +>>> print(parse.to_conll(4)) +The DT 4 det +quick JJ 4 amod +brown JJ 4 amod +fox NN 5 nsubj +jumps VBZ 0 ROOT +over IN 9 case +the DT 9 det +lazy JJ 9 amod +dog NN 5 nmod +\&. . 5 punct +.ft P +.fi +.sp +.nf +.ft C +>>> print(parse.tree()) +(jumps (fox The quick brown) (dog over the lazy) .) +.ft P +.fi +.sp +.nf +.ft C +>>> for governor, dep, dependent in parse.triples(): +\&... print(governor, dep, dependent) + (\(aqjumps\(aq, \(aqVBZ\(aq) nsubj (\(aqfox\(aq, \(aqNN\(aq) + (\(aqfox\(aq, \(aqNN\(aq) det (\(aqThe\(aq, \(aqDT\(aq) + (\(aqfox\(aq, \(aqNN\(aq) amod (\(aqquick\(aq, \(aqJJ\(aq) + (\(aqfox\(aq, \(aqNN\(aq) amod (\(aqbrown\(aq, \(aqJJ\(aq) + (\(aqjumps\(aq, \(aqVBZ\(aq) nmod (\(aqdog\(aq, \(aqNN\(aq) + (\(aqdog\(aq, \(aqNN\(aq) case (\(aqover\(aq, \(aqIN\(aq) + (\(aqdog\(aq, \(aqNN\(aq) det (\(aqthe\(aq, \(aqDT\(aq) + (\(aqdog\(aq, \(aqNN\(aq) amod (\(aqlazy\(aq, \(aqJJ\(aq) + (\(aqjumps\(aq, \(aqVBZ\(aq) punct (\(aq.\(aq, \(aq.\(aq) +.ft P +.fi +.sp +.nf +.ft C +>>> (parse_fox, ), (parse_dog, ) = dep_parser.raw_parse_sents( +\&... [ +\&... \(aqThe quick brown fox jumps over the lazy dog.\(aq, +\&... \(aqThe quick grey wolf jumps over the lazy fox.\(aq, +\&... ] +\&... ) +>>> print(parse_fox.to_conll(4)) +The DT 4 det +quick JJ 4 amod +brown JJ 4 amod +fox NN 5 nsubj +jumps VBZ 0 ROOT +over IN 9 case +the DT 9 det +lazy JJ 9 amod +dog NN 5 nmod +\&. . 5 punct +.ft P +.fi +.sp +.nf +.ft C +>>> print(parse_dog.to_conll(4)) +The DT 4 det +quick JJ 4 amod +grey JJ 4 amod +wolf NN 5 nsubj +jumps VBZ 0 ROOT +over IN 9 case +the DT 9 det +lazy JJ 9 amod +fox NN 5 nmod +\&. . 5 punct +.ft P +.fi +.sp +.nf +.ft C +>>> (parse_dog, ), (parse_friends, ) = dep_parser.parse_sents( +\&... [ +\&... "I \(aqm a dog".split(), +\&... "This is my friends \(aq cat ( the tabby )".split(), +\&... ] +\&... ) +>>> print(parse_dog.to_conll(4)) +I PRP 4 nsubj +\(aqm VBP 4 cop +a DT 4 det +dog NN 0 ROOT +.ft P +.fi +.sp +.nf +.ft C +>>> print(parse_friends.to_conll(4)) +This DT 6 nsubj +is VBZ 6 cop +my PRP$ 4 nmod:poss +friends NNS 6 nmod:poss +\(aq POS 4 case +cat NN 0 ROOT +\-LRB\- \-LRB\- 9 punct +the DT 9 det +tabby NN 6 appos +\-RRB\- \-RRB\- 9 punct +.ft P +.fi +.sp +.nf +.ft C +>>> parse_john, parse_mary, = dep_parser.parse_text( +\&... \(aqJohn loves Mary. Mary walks.\(aq +\&... ) +.ft P +.fi +.sp +.nf +.ft C +>>> print(parse_john.to_conll(4)) +John NNP 2 nsubj +loves VBZ 0 ROOT +Mary NNP 2 dobj +\&. . 2 punct +.ft P +.fi +.sp +.nf +.ft C +>>> print(parse_mary.to_conll(4)) +Mary NNP 2 nsubj +walks VBZ 0 ROOT +\&. . 2 punct +.ft P +.fi +.sp +Non\-breaking space inside of a token. +.sp +.nf +.ft C +>>> len( +\&... next( +\&... dep_parser.raw_parse( +\&... \(aqAnhalt said children typically treat a 20\-ounce soda bottle as one \(aq +\&... \(aqserving, while it actually contains 2 1/2 servings.\(aq +\&... ) +\&... ).nodes +\&... ) +21 +.ft P +.fi +.sp +Phone numbers. +.sp +.nf +.ft C +>>> len( +\&... next( +\&... dep_parser.raw_parse(\(aqThis is not going to crash: 01 111 555.\(aq) +\&... ).nodes +\&... ) +10 +.ft P +.fi +.sp +.nf +.ft C +>>> print( +\&... next( +\&... dep_parser.raw_parse(\(aqThe underscore _ should not simply disappear.\(aq) +\&... ).to_conll(4) +\&... ) +The DT 3 det +underscore VBP 3 amod +_ NN 7 nsubj +should MD 7 aux +not RB 7 neg +simply RB 7 advmod +disappear VB 0 ROOT +\&. . 7 punct +.ft P +.fi +.sp +.nf +.ft C +>>> print( +\&... \(aq\en\(aq.join( +\&... next( +\&... dep_parser.raw_parse( +\&... \(aqfor all of its insights into the dream world of teen life , and its electronic expression through \(aq +\&... \(aqcyber culture , the film gives no quarter to anyone seeking to pull a cohesive story out of its 2 \(aq +\&... \(aq1/2\-hour running time .\(aq +\&... ) +\&... ).to_conll(4).split(\(aq\en\(aq)[\-8:] +\&... ) +\&... ) +its PRP$ 40 nmod:poss +2\ 1/2 CD 40 nummod +\- : 40 punct +hour NN 31 nmod +running VBG 42 amod +time NN 40 dep +\&. . 24 punct + +.ft P +.fi +.INDENT 7.0 +.TP +.B make_tree(result) +.UNINDENT +.INDENT 7.0 +.TP +.B parser_annotator = \(aqdepparse\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.corenlp.CoreNLPParser(url=\(aqhttp://localhost:9000\(aq, encoding=\(aqutf8\(aq, tagtype=None) +Bases: \fI\%nltk.parse.corenlp.GenericCoreNLPParser\fP +.sp +.nf +.ft C +>>> parser = CoreNLPParser(url=\(aqhttp://localhost:9000\(aq) +.ft P +.fi +.sp +.nf +.ft C +>>> next( +\&... parser.raw_parse(\(aqThe quick brown fox jumps over the lazy dog.\(aq) +\&... ).pretty_print() + ROOT + | + S + _______________|__________________________ + | VP | + | _________|___ | + | | PP | + | | ________|___ | + NP | | NP | + ____|__________ | | _______|____ | + DT JJ JJ NN VBZ IN DT JJ NN . + | | | | | | | | | | +The quick brown fox jumps over the lazy dog . +.ft P +.fi +.sp +.nf +.ft C +>>> (parse_fox, ), (parse_wolf, ) = parser.raw_parse_sents( +\&... [ +\&... \(aqThe quick brown fox jumps over the lazy dog.\(aq, +\&... \(aqThe quick grey wolf jumps over the lazy fox.\(aq, +\&... ] +\&... ) +.ft P +.fi +.sp +.nf +.ft C +>>> parse_fox.pretty_print() + ROOT + | + S + _______________|__________________________ + | VP | + | _________|___ | + | | PP | + | | ________|___ | + NP | | NP | + ____|__________ | | _______|____ | + DT JJ JJ NN VBZ IN DT JJ NN . + | | | | | | | | | | +The quick brown fox jumps over the lazy dog . +.ft P +.fi +.sp +.nf +.ft C +>>> parse_wolf.pretty_print() + ROOT + | + S + _______________|__________________________ + | VP | + | _________|___ | + | | PP | + | | ________|___ | + NP | | NP | + ____|_________ | | _______|____ | + DT JJ JJ NN VBZ IN DT JJ NN . + | | | | | | | | | | +The quick grey wolf jumps over the lazy fox . +.ft P +.fi +.sp +.nf +.ft C +>>> (parse_dog, ), (parse_friends, ) = parser.parse_sents( +\&... [ +\&... "I \(aqm a dog".split(), +\&... "This is my friends \(aq cat ( the tabby )".split(), +\&... ] +\&... ) +.ft P +.fi +.sp +.nf +.ft C +>>> parse_dog.pretty_print() + ROOT + | + S + _______|____ + | VP + | ________|___ + NP | NP + | | ___|___ +PRP VBP DT NN + | | | | + I \(aqm a dog +.ft P +.fi +.sp +.nf +.ft C +>>> parse_friends.pretty_print() + ROOT + | + S + ____|___________ + | VP + | ___________|_____________ + | | NP + | | _______|_________ + | | NP PRN + | | _____|_______ ____|______________ + NP | NP | | NP | + | | ______|_________ | | ___|____ | + DT VBZ PRP$ NNS POS NN \-LRB\- DT NN \-RRB\- + | | | | | | | | | | +This is my friends \(aq cat \-LRB\- the tabby \-RRB\- +.ft P +.fi +.sp +.nf +.ft C +>>> parse_john, parse_mary, = parser.parse_text( +\&... \(aqJohn loves Mary. Mary walks.\(aq +\&... ) +.ft P +.fi +.sp +.nf +.ft C +>>> parse_john.pretty_print() + ROOT + | + S + _____|_____________ + | VP | + | ____|___ | + NP | NP | + | | | | +NNP VBZ NNP . + | | | | +John loves Mary . +.ft P +.fi +.sp +.nf +.ft C +>>> parse_mary.pretty_print() + ROOT + | + S + _____|____ + NP VP | + | | | +NNP VBZ . + | | | +Mary walks . +.ft P +.fi +.sp +.nf +.ft C +>>> next( +\&... parser.raw_parse( +\&... \(aqNASIRIYA, Iraq—Iraqi doctors who treated former prisoner of war \(aq +\&... \(aqJessica Lynch have angrily dismissed claims made in her biography \(aq +\&... \(aqthat she was raped by her Iraqi captors.\(aq +\&... ) +\&... ).height() +20 +.ft P +.fi +.sp +.nf +.ft C +>>> next( +\&... parser.raw_parse( +\&... "The broader Standard & Poor\(aqs 500 Index <.SPX> was 0.46 points lower, or " +\&... \(aq0.05 percent, at 997.02.\(aq +\&... ) +\&... ).height() +9 +.ft P +.fi +.INDENT 7.0 +.TP +.B make_tree(result) +.UNINDENT +.INDENT 7.0 +.TP +.B parser_annotator = \(aqparse\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.corenlp.CoreNLPServer(path_to_jar=None, path_to_models_jar=None, verbose=False, java_options=None, corenlp_options=None, port=None) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B start(stdout=\(aqdevnull\(aq, stderr=\(aqdevnull\(aq) +Starts the CoreNLP server +.INDENT 7.0 +.TP +.B Parameters +\fBstderr\fP (\fIstdout\fP\fI,\fP) \-\- Specifies where CoreNLP output is redirected. Valid values are \(aqdevnull\(aq, \(aqstdout\(aq, \(aqpipe\(aq +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B stop() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.parse.corenlp.CoreNLPServerError +Bases: \fBOSError\fP +.sp +Exceptions associated with the Core NLP server. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.corenlp.GenericCoreNLPParser(url=\(aqhttp://localhost:9000\(aq, encoding=\(aqutf8\(aq, tagtype=None) +Bases: \fI\%nltk.parse.api.ParserI\fP, \fBnltk.tokenize.api.TokenizerI\fP, \fBnltk.tag.api.TaggerI\fP +.sp +Interface to the CoreNLP Parser. +.INDENT 7.0 +.TP +.B api_call(data, properties=None, timeout=60) +.UNINDENT +.INDENT 7.0 +.TP +.B parse_sents(sentences, *args, **kwargs) +Parse multiple sentences. +.sp +Takes multiple sentences as a list where each sentence is a list of +words. Each sentence will be automatically tagged with this +CoreNLPParser instance\(aqs tagger. +.sp +If a whitespace exists inside a token, then the token will be treated as +several tokens. +.INDENT 7.0 +.TP +.B Parameters +\fBsentences\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- Input sentences to parse +.TP +.B Return type +iter(iter(Tree)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse_text(text, *args, **kwargs) +Parse a piece of text. +.sp +The text might contain several sentences which will be split by CoreNLP. +.INDENT 7.0 +.TP +.B Parameters +\fBtext\fP (\fIstr\fP) \-\- text to be split. +.TP +.B Returns +an iterable of syntactic structures. # TODO: should it be an iterable of iterables? +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B raw_parse(sentence, properties=None, *args, **kwargs) +Parse a sentence. +.sp +Takes a sentence as a string; before parsing, it will be automatically +tokenized and tagged by the CoreNLP Parser. +.INDENT 7.0 +.TP +.B Parameters +\fBsentence\fP (\fIstr\fP) \-\- Input sentence to parse +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B raw_parse_sents(sentences, verbose=False, properties=None, *args, **kwargs) +Parse multiple sentences. +.sp +Takes multiple sentences as a list of strings. Each sentence will be +automatically tokenized and tagged. +.INDENT 7.0 +.TP +.B Parameters +\fBsentences\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- Input sentences to parse. +.TP +.B Return type +iter(iter(Tree)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B raw_tag_sents(sentences) +Tag multiple sentences. +.sp +Takes multiple sentences as a list where each sentence is a string. +.INDENT 7.0 +.TP +.B Parameters +\fBsentences\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- Input sentences to tag +.TP +.B Return type +list(list(list(tuple(str, str))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tag(sentence) +Tag a list of tokens. +.INDENT 7.0 +.TP +.B Return type +list(tuple(str, str)) +.UNINDENT +.sp +.nf +.ft C +>>> parser = CoreNLPParser(url=\(aqhttp://localhost:9000\(aq, tagtype=\(aqner\(aq) +>>> tokens = \(aqRami Eid is studying at Stony Brook University in NY\(aq.split() +>>> parser.tag(tokens) +[(\(aqRami\(aq, \(aqPERSON\(aq), (\(aqEid\(aq, \(aqPERSON\(aq), (\(aqis\(aq, \(aqO\(aq), (\(aqstudying\(aq, \(aqO\(aq), (\(aqat\(aq, \(aqO\(aq), (\(aqStony\(aq, \(aqORGANIZATION\(aq), +(\(aqBrook\(aq, \(aqORGANIZATION\(aq), (\(aqUniversity\(aq, \(aqORGANIZATION\(aq), (\(aqin\(aq, \(aqO\(aq), (\(aqNY\(aq, \(aqO\(aq)] +.ft P +.fi +.sp +.nf +.ft C +>>> parser = CoreNLPParser(url=\(aqhttp://localhost:9000\(aq, tagtype=\(aqpos\(aq) +>>> tokens = "What is the airspeed of an unladen swallow ?".split() +>>> parser.tag(tokens) +[(\(aqWhat\(aq, \(aqWP\(aq), (\(aqis\(aq, \(aqVBZ\(aq), (\(aqthe\(aq, \(aqDT\(aq), +(\(aqairspeed\(aq, \(aqNN\(aq), (\(aqof\(aq, \(aqIN\(aq), (\(aqan\(aq, \(aqDT\(aq), +(\(aqunladen\(aq, \(aqJJ\(aq), (\(aqswallow\(aq, \(aqVB\(aq), (\(aq?\(aq, \(aq.\(aq)] +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B tag_sents(sentences) +Tag multiple sentences. +.sp +Takes multiple sentences as a list where each sentence is a list of +tokens. +.INDENT 7.0 +.TP +.B Parameters +\fBsentences\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- Input sentences to tag +.TP +.B Return type +list(list(tuple(str, str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(text, properties=None) +Tokenize a string of text. +.sp +.nf +.ft C +>>> parser = CoreNLPParser(url=\(aqhttp://localhost:9000\(aq) +.ft P +.fi +.sp +.nf +.ft C +>>> text = \(aqGood muffins cost $3.88\enin New York. Please buy me\entwo of them.\enThanks.\(aq +>>> list(parser.tokenize(text)) +[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$\(aq, \(aq3.88\(aq, \(aqin\(aq, \(aqNew\(aq, \(aqYork\(aq, \(aq.\(aq, \(aqPlease\(aq, \(aqbuy\(aq, \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem\(aq, \(aq.\(aq, \(aqThanks\(aq, \(aq.\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> s = "The colour of the wall is blue." +>>> list( +\&... parser.tokenize( +\&... \(aqThe colour of the wall is blue.\(aq, +\&... properties={\(aqtokenize.options\(aq: \(aqamericanize=true\(aq}, +\&... ) +\&... ) +[\(aqThe\(aq, \(aqcolor\(aq, \(aqof\(aq, \(aqthe\(aq, \(aqwall\(aq, \(aqis\(aq, \(aqblue\(aq, \(aq.\(aq] +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.corenlp.transform(sentence) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.corenlp.try_port(port=0) +.UNINDENT +.SS nltk.parse.dependencygraph module +.sp +Tools for reading and writing dependency trees. +The input is assumed to be in Malt\-TAB format +(\fI\%http://stp.lingfil.uu.se/~nivre/research/MaltXML.html\fP). +.INDENT 0.0 +.TP +.B class nltk.parse.dependencygraph.DependencyGraph(tree_str=None, cell_extractor=None, zero_based=False, cell_separator=None, top_relation_label=\(aqROOT\(aq) +Bases: \fBobject\fP +.sp +A container for the nodes and labelled edges of a dependency structure. +.INDENT 7.0 +.TP +.B add_arc(head_address, mod_address) +Adds an arc from the node specified by head_address to the +node specified by the mod address. +.UNINDENT +.INDENT 7.0 +.TP +.B add_node(node) +.UNINDENT +.INDENT 7.0 +.TP +.B connect_graph() +Fully connects all non\-root nodes. All nodes are set to be dependents +of the root node. +.UNINDENT +.INDENT 7.0 +.TP +.B contains_address(node_address) +Returns true if the graph contains a node with the given node +address, false otherwise. +.UNINDENT +.INDENT 7.0 +.TP +.B contains_cycle() +Check whether there are cycles. +.sp +.nf +.ft C +>>> dg = DependencyGraph(treebank_data) +>>> dg.contains_cycle() +False +.ft P +.fi +.sp +.nf +.ft C +>>> cyclic_dg = DependencyGraph() +>>> top = {\(aqword\(aq: None, \(aqdeps\(aq: [1], \(aqrel\(aq: \(aqTOP\(aq, \(aqaddress\(aq: 0} +>>> child1 = {\(aqword\(aq: None, \(aqdeps\(aq: [2], \(aqrel\(aq: \(aqNTOP\(aq, \(aqaddress\(aq: 1} +>>> child2 = {\(aqword\(aq: None, \(aqdeps\(aq: [4], \(aqrel\(aq: \(aqNTOP\(aq, \(aqaddress\(aq: 2} +>>> child3 = {\(aqword\(aq: None, \(aqdeps\(aq: [1], \(aqrel\(aq: \(aqNTOP\(aq, \(aqaddress\(aq: 3} +>>> child4 = {\(aqword\(aq: None, \(aqdeps\(aq: [3], \(aqrel\(aq: \(aqNTOP\(aq, \(aqaddress\(aq: 4} +>>> cyclic_dg.nodes = { +\&... 0: top, +\&... 1: child1, +\&... 2: child2, +\&... 3: child3, +\&... 4: child4, +\&... } +>>> cyclic_dg.root = top +.ft P +.fi +.sp +.nf +.ft C +>>> cyclic_dg.contains_cycle() +[3, 1, 2, 4] +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B get_by_address(node_address) +Return the node with the given address. +.UNINDENT +.INDENT 7.0 +.TP +.B get_cycle_path(curr_node, goal_node_index) +.UNINDENT +.INDENT 7.0 +.TP +.B left_children(node_index) +Returns the number of left children under the node specified +by the given address. +.UNINDENT +.INDENT 7.0 +.TP +.B static load(filename, zero_based=False, cell_separator=None, top_relation_label=\(aqROOT\(aq) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfilename\fP \-\- a name of a file in Malt\-TAB format +.IP \(bu 2 +\fBzero_based\fP \-\- nodes in the input file are numbered starting from 0 +.UNINDENT +.UNINDENT +.sp +rather than 1 (as produced by, e.g., zpar) +:param str cell_separator: the cell separator. If not provided, cells +are split by whitespace. +:param str top_relation_label: the label by which the top relation is +identified, for examlple, \fIROOT\fP, \fInull\fP or \fITOP\fP\&. +.INDENT 7.0 +.TP +.B Returns +a list of DependencyGraphs +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B nx_graph() +Convert the data in a \fBnodelist\fP into a networkx labeled directed graph. +.UNINDENT +.INDENT 7.0 +.TP +.B redirect_arcs(originals, redirect) +Redirects arcs to any of the nodes in the originals list +to the redirect node address. +.UNINDENT +.INDENT 7.0 +.TP +.B remove_by_address(address) +Removes the node with the given address. References +to this node in others will still exist. +.UNINDENT +.INDENT 7.0 +.TP +.B right_children(node_index) +Returns the number of right children under the node specified +by the given address. +.UNINDENT +.INDENT 7.0 +.TP +.B to_conll(style) +The dependency graph in CoNLL format. +.INDENT 7.0 +.TP +.B Parameters +\fBstyle\fP (\fIint\fP) \-\- the style to use for the format (3, 4, 10 columns) +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B to_dot() +Return a dot representation suitable for using with Graphviz. +.sp +.nf +.ft C +>>> dg = DependencyGraph( +\&... \(aqJohn N 2\en\(aq +\&... \(aqloves V 0\en\(aq +\&... \(aqMary N 2\(aq +\&... ) +>>> print(dg.to_dot()) +digraph G{ +edge [dir=forward] +node [shape=plaintext] + +0 [label="0 (None)"] +0 \-> 2 [label="ROOT"] +1 [label="1 (John)"] +2 [label="2 (loves)"] +2 \-> 1 [label=""] +2 \-> 3 [label=""] +3 [label="3 (Mary)"] +} +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B tree() +Starting with the \fBroot\fP node, build a dependency tree using the NLTK +\fBTree\fP constructor. Dependency labels are omitted. +.UNINDENT +.INDENT 7.0 +.TP +.B triples(node=None) +Extract dependency triples of the form: +((head word, head tag), rel, (dep word, dep tag)) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.parse.dependencygraph.DependencyGraphError +Bases: \fBException\fP +.sp +Dependency graph exception. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.dependencygraph.conll_demo() +A demonstration of how to read a string representation of +a CoNLL format dependency tree. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.dependencygraph.conll_file_demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.dependencygraph.cycle_finding_demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.dependencygraph.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.dependencygraph.dot2img(dot_string, t=\(aqsvg\(aq) +Create image representation fom dot_string, using the \(aqdot\(aq program +from the Graphviz package. +.sp +Use the \(aqt\(aq argument to specify the image file format, for ex. +\(aqpng\(aq or \(aqjpeg\(aq (Running \(aqdot \-T:\(aq lists all available formats). +.sp +sys.stdout is used instead of subprocess.PIPE, to avoid decoding errors +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.dependencygraph.malt_demo(nx=False) +A demonstration of the result of reading a dependency +version of the first sentence of the Penn Treebank. +.UNINDENT +.SS nltk.parse.earleychart module +.sp +Data classes and parser implementations for \fIincremental\fP chart +parsers, which use dynamic programming to efficiently parse a text. +A "chart parser" derives parse trees for a text by iteratively adding +"edges" to a "chart". Each "edge" represents a hypothesis about the tree +structure for a subsequence of the text. The "chart" is a +"blackboard" for composing and combining these hypotheses. +.sp +A parser is "incremental", if it guarantees that for all i, j where i < j, +all edges ending at i are built before any edges ending at j. +This is appealing for, say, speech recognizer hypothesis filtering. +.sp +The main parser class is \fBEarleyChartParser\fP, which is a top\-down +algorithm, originally formulated by Jay Earley (1970). +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.CompleteFundamentalRule +Bases: \fI\%nltk.parse.chart.SingleEdgeFundamentalRule\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.CompleterRule +Bases: \fI\%nltk.parse.earleychart.CompleteFundamentalRule\fP +.INDENT 7.0 +.TP +.B apply(chart, grammar, edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.EarleyChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.earleychart.IncrementalChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.FeatureCompleteFundamentalRule +Bases: \fI\%nltk.parse.featurechart.FeatureSingleEdgeFundamentalRule\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.FeatureCompleterRule +Bases: \fI\%nltk.parse.earleychart.CompleterRule\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.FeatureEarleyChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.earleychart.FeatureIncrementalChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.FeatureIncrementalBottomUpChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.earleychart.FeatureIncrementalChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.FeatureIncrementalBottomUpLeftCornerChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.earleychart.FeatureIncrementalChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.FeatureIncrementalChart(tokens) +Bases: \fI\%nltk.parse.earleychart.IncrementalChart\fP, \fI\%nltk.parse.featurechart.FeatureChart\fP +.INDENT 7.0 +.TP +.B select(end, **restrictions) +Returns an iterator over the edges in this chart. +See \fBChart.select\fP for more information about the +\fBrestrictions\fP on the edges. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.FeatureIncrementalChartParser(grammar, strategy=[, , , ], trace_chart_width=20, chart_class=, **parser_args) +Bases: \fI\%nltk.parse.earleychart.IncrementalChartParser\fP, \fI\%nltk.parse.featurechart.FeatureChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.FeatureIncrementalTopDownChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.earleychart.FeatureIncrementalChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.FeaturePredictorRule +Bases: \fI\%nltk.parse.featurechart.FeatureTopDownPredictRule\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.FeatureScannerRule +Bases: \fI\%nltk.parse.earleychart.ScannerRule\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.FilteredCompleteFundamentalRule +Bases: \fI\%nltk.parse.chart.FilteredSingleEdgeFundamentalRule\fP +.INDENT 7.0 +.TP +.B apply(chart, grammar, edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.IncrementalBottomUpChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.earleychart.IncrementalChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.IncrementalBottomUpLeftCornerChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.earleychart.IncrementalChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.IncrementalChart(tokens) +Bases: \fI\%nltk.parse.chart.Chart\fP +.INDENT 7.0 +.TP +.B edges() +Return a list of all edges in this chart. New edges +that are added to the chart after the call to edges() +will \fInot\fP be contained in this list. +.INDENT 7.0 +.TP +.B Return type +list(EdgeI) +.TP +.B See +\fBiteredges\fP, \fBselect\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B initialize() +Clear the chart. +.UNINDENT +.INDENT 7.0 +.TP +.B iteredges() +Return an iterator over the edges in this chart. It is +not guaranteed that new edges which are added to the +chart before the iterator is exhausted will also be generated. +.INDENT 7.0 +.TP +.B Return type +iter(EdgeI) +.TP +.B See +\fBedges\fP, \fBselect\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B select(end, **restrictions) +Return an iterator over the edges in this chart. Any +new edges that are added to the chart before the iterator +is exahusted will also be generated. \fBrestrictions\fP +can be used to restrict the set of edges that will be +generated. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBspan\fP \-\- Only generate edges \fBe\fP where \fBe.span()==span\fP +.IP \(bu 2 +\fBstart\fP \-\- Only generate edges \fBe\fP where \fBe.start()==start\fP +.IP \(bu 2 +\fBend\fP \-\- Only generate edges \fBe\fP where \fBe.end()==end\fP +.IP \(bu 2 +\fBlength\fP \-\- Only generate edges \fBe\fP where \fBe.length()==length\fP +.IP \(bu 2 +\fBlhs\fP \-\- Only generate edges \fBe\fP where \fBe.lhs()==lhs\fP +.IP \(bu 2 +\fBrhs\fP \-\- Only generate edges \fBe\fP where \fBe.rhs()==rhs\fP +.IP \(bu 2 +\fBnextsym\fP \-\- Only generate edges \fBe\fP where +\fBe.nextsym()==nextsym\fP +.IP \(bu 2 +\fBdot\fP \-\- Only generate edges \fBe\fP where \fBe.dot()==dot\fP +.IP \(bu 2 +\fBis_complete\fP \-\- Only generate edges \fBe\fP where +\fBe.is_complete()==is_complete\fP +.IP \(bu 2 +\fBis_incomplete\fP \-\- Only generate edges \fBe\fP where +\fBe.is_incomplete()==is_incomplete\fP +.UNINDENT +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.IncrementalChartParser(grammar, strategy=[, , , ], trace=0, trace_chart_width=50, chart_class=) +Bases: \fI\%nltk.parse.chart.ChartParser\fP +.sp +An \fIincremental\fP chart parser implementing Jay Earley\(aqs +parsing algorithm: +.nf +For each index end in [0, 1, ..., N]: +.in +2 +For each edge such that edge.end = end: +.in +2 +If edge is incomplete and edge.next is not a part of speech: +.in +2 +Apply PredictorRule to edge +.in -2 +If edge is incomplete and edge.next is a part of speech: +.in +2 +Apply ScannerRule to edge +.in -2 +If edge is complete: +.in +2 +Apply CompleterRule to edge +.in -2 +.in -2 +.in -2 +Return any complete parses in the chart +.fi +.sp +.INDENT 7.0 +.TP +.B chart_parse(tokens, trace=None) +Return the final parse \fBChart\fP from which all possible +parse trees can be extracted. +.INDENT 7.0 +.TP +.B Parameters +\fBtokens\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The sentence to be parsed +.TP +.B Return type +Chart +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.IncrementalLeftCornerChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.earleychart.IncrementalChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.IncrementalTopDownChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.earleychart.IncrementalChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.PredictorRule +Bases: \fI\%nltk.parse.chart.CachedTopDownPredictRule\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.earleychart.ScannerRule +Bases: \fI\%nltk.parse.earleychart.CompleteFundamentalRule\fP +.INDENT 7.0 +.TP +.B apply(chart, grammar, edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.earleychart.demo(print_times=True, print_grammar=False, print_trees=True, trace=2, sent=\(aqI saw John with a dog with my cookie\(aq, numparses=5) +A demonstration of the Earley parsers. +.UNINDENT +.SS nltk.parse.evaluate module +.INDENT 0.0 +.TP +.B class nltk.parse.evaluate.DependencyEvaluator(parsed_sents, gold_sents) +Bases: \fBobject\fP +.sp +Class for measuring labelled and unlabelled attachment score for +dependency parsing. Note that the evaluation ignores punctuation. +.sp +.nf +.ft C +>>> from nltk.parse import DependencyGraph, DependencyEvaluator +.ft P +.fi +.sp +.nf +.ft C +>>> gold_sent = DependencyGraph(""" +\&... Pierre NNP 2 NMOD +\&... Vinken NNP 8 SUB +\&... , , 2 P +\&... 61 CD 5 NMOD +\&... years NNS 6 AMOD +\&... old JJ 2 NMOD +\&... , , 2 P +\&... will MD 0 ROOT +\&... join VB 8 VC +\&... the DT 11 NMOD +\&... board NN 9 OBJ +\&... as IN 9 VMOD +\&... a DT 15 NMOD +\&... nonexecutive JJ 15 NMOD +\&... director NN 12 PMOD +\&... Nov. NNP 9 VMOD +\&... 29 CD 16 NMOD +\&... . . 9 VMOD +\&... """) +.ft P +.fi +.sp +.nf +.ft C +>>> parsed_sent = DependencyGraph(""" +\&... Pierre NNP 8 NMOD +\&... Vinken NNP 1 SUB +\&... , , 3 P +\&... 61 CD 6 NMOD +\&... years NNS 6 AMOD +\&... old JJ 2 NMOD +\&... , , 3 AMOD +\&... will MD 0 ROOT +\&... join VB 8 VC +\&... the DT 11 AMOD +\&... board NN 9 OBJECT +\&... as IN 9 NMOD +\&... a DT 15 NMOD +\&... nonexecutive JJ 15 NMOD +\&... director NN 12 PMOD +\&... Nov. NNP 9 VMOD +\&... 29 CD 16 NMOD +\&... . . 9 VMOD +\&... """) +.ft P +.fi +.sp +.nf +.ft C +>>> de = DependencyEvaluator([parsed_sent],[gold_sent]) +>>> las, uas = de.eval() +>>> las +0.6... +>>> uas +0.8... +>>> abs(uas \- 0.8) < 0.00001 +True +.ft P +.fi +.INDENT 7.0 +.TP +.B eval() +Return the Labeled Attachment Score (LAS) and Unlabeled Attachment Score (UAS) +.sp +:return : tuple(float,float) +.UNINDENT +.UNINDENT +.SS nltk.parse.featurechart module +.sp +Extension of chart parsing implementation to handle grammars with +feature structures as nodes. +.INDENT 0.0 +.TP +.B class nltk.parse.featurechart.FeatureBottomUpChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.featurechart.FeatureChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.featurechart.FeatureBottomUpLeftCornerChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.featurechart.FeatureChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.featurechart.FeatureBottomUpPredictCombineRule +Bases: \fI\%nltk.parse.chart.BottomUpPredictCombineRule\fP +.INDENT 7.0 +.TP +.B apply(chart, grammar, edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.featurechart.FeatureBottomUpPredictRule +Bases: \fI\%nltk.parse.chart.BottomUpPredictRule\fP +.INDENT 7.0 +.TP +.B apply(chart, grammar, edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.featurechart.FeatureChart(tokens) +Bases: \fI\%nltk.parse.chart.Chart\fP +.sp +A Chart for feature grammars. +:see: \fBChart\fP for more information. +.INDENT 7.0 +.TP +.B parses(start, tree_class=) +Return an iterator of the complete tree structures that span +the entire chart, and whose root node is \fBroot\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B select(**restrictions) +Returns an iterator over the edges in this chart. +See \fBChart.select\fP for more information about the +\fBrestrictions\fP on the edges. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.featurechart.FeatureChartParser(grammar, strategy=[, , , ], trace_chart_width=20, chart_class=, **parser_args) +Bases: \fI\%nltk.parse.chart.ChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.featurechart.FeatureEmptyPredictRule +Bases: \fI\%nltk.parse.chart.EmptyPredictRule\fP +.INDENT 7.0 +.TP +.B apply(chart, grammar) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.featurechart.FeatureFundamentalRule +Bases: \fI\%nltk.parse.chart.FundamentalRule\fP +.sp +A specialized version of the fundamental rule that operates on +nonterminals whose symbols are \fBFeatStructNonterminal\(ga\(gas. Rather +than simply comparing the nonterminals for equality, they are +unified. Variable bindings from these unifications are collected +and stored in the chart using a \(ga\(gaFeatureTreeEdge\fP\&. When a +complete edge is generated, these bindings are applied to all +nonterminals in the edge. +.sp +The fundamental rule states that: +.INDENT 7.0 +.IP \(bu 2 +\fB[A \-> alpha \e* B1 beta][i:j]\fP +.IP \(bu 2 +\fB[B2 \-> gamma \e*][j:k]\fP +.UNINDENT +.sp +licenses the edge: +.INDENT 7.0 +.IP \(bu 2 +\fB[A \-> alpha B3 \e* beta][i:j]\fP +.UNINDENT +.sp +assuming that B1 and B2 can be unified to generate B3. +.INDENT 7.0 +.TP +.B apply(chart, grammar, left_edge, right_edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.featurechart.FeatureSingleEdgeFundamentalRule +Bases: \fI\%nltk.parse.chart.SingleEdgeFundamentalRule\fP +.sp +A specialized version of the completer / single edge fundamental rule +that operates on nonterminals whose symbols are +.nf +\(ga\(ga +.fi +FeatStructNonterminal\(ga\(gas. +Rather than simply comparing the nonterminals for equality, they are +unified. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.featurechart.FeatureTopDownChartParser(grammar, **parser_args) +Bases: \fI\%nltk.parse.featurechart.FeatureChartParser\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.featurechart.FeatureTopDownInitRule +Bases: \fI\%nltk.parse.chart.TopDownInitRule\fP +.INDENT 7.0 +.TP +.B apply(chart, grammar) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.featurechart.FeatureTopDownPredictRule +Bases: \fI\%nltk.parse.chart.CachedTopDownPredictRule\fP +.sp +A specialized version of the (cached) top down predict rule that operates +on nonterminals whose symbols are +.nf +\(ga\(ga +.fi +FeatStructNonterminal\(ga\(gas. Rather +than simply comparing the nonterminals for equality, they are +unified. +.sp +The top down expand rule states that: +.INDENT 7.0 +.IP \(bu 2 +\fB[A \-> alpha \e* B1 beta][i:j]\fP +.UNINDENT +.sp +licenses the edge: +.INDENT 7.0 +.IP \(bu 2 +\fB[B2 \-> \e* gamma][j:j]\fP +.UNINDENT +.sp +for each grammar production \fBB2 \-> gamma\fP, assuming that B1 +and B2 can be unified. +.INDENT 7.0 +.TP +.B apply(chart, grammar, edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.featurechart.FeatureTreeEdge(span, lhs, rhs, dot=0, bindings=None) +Bases: \fI\%nltk.parse.chart.TreeEdge\fP +.sp +A specialized tree edge that allows shared variable bindings +between nonterminals on the left\-hand side and right\-hand side. +.sp +Each \fBFeatureTreeEdge\fP contains a set of \fBbindings\fP, i.e., a +dictionary mapping from variables to values. If the edge is not +complete, then these bindings are simply stored. However, if the +edge is complete, then the constructor applies these bindings to +every nonterminal in the edge whose symbol implements the +interface \fBSubstituteBindingsI\fP\&. +.INDENT 7.0 +.TP +.B bindings() +Return a copy of this edge\(aqs bindings dictionary. +.UNINDENT +.INDENT 7.0 +.TP +.B static from_production(production, index) +.INDENT 7.0 +.TP +.B Returns +A new \fBTreeEdge\fP formed from the given production. +The new edge\(aqs left\-hand side and right\-hand side will +be taken from \fBproduction\fP; its span will be +\fB(index,index)\fP; and its dot position will be \fB0\fP\&. +.TP +.B Return type +TreeEdge +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B move_dot_forward(new_end, bindings=None) +.INDENT 7.0 +.TP +.B Returns +A new \fBFeatureTreeEdge\fP formed from this edge. +The new edge\(aqs dot position is increased by \fB1\fP, +and its end index will be replaced by \fBnew_end\fP\&. +.TP +.B Return type +FeatureTreeEdge +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBnew_end\fP (\fIint\fP) \-\- The new end index. +.IP \(bu 2 +\fBbindings\fP (\fIdict\fP) \-\- Bindings for the new edge. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B next_with_bindings() +.UNINDENT +.INDENT 7.0 +.TP +.B variables() +.INDENT 7.0 +.TP +.B Returns +The set of variables used by this edge. +.TP +.B Return type +set(Variable) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.featurechart.InstantiateVarsChart(tokens) +Bases: \fI\%nltk.parse.featurechart.FeatureChart\fP +.sp +A specialized chart that \(aqinstantiates\(aq variables whose names +start with \(aq@\(aq, by replacing them with unique new variables. +In particular, whenever a complete edge is added to the chart, any +variables in the edge\(aqs \fBlhs\fP whose names start with \(aq@\(aq will be +replaced by unique new +.nf +\(ga\(ga +.fi +Variable\(ga\(gas. +.INDENT 7.0 +.TP +.B initialize() +Clear the chart. +.UNINDENT +.INDENT 7.0 +.TP +.B insert(edge, child_pointer_list) +Add a new edge to the chart, and return True if this operation +modified the chart. In particular, return true iff the chart +did not already contain \fBedge\fP, or if it did not already associate +\fBchild_pointer_lists\fP with \fBedge\fP\&. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBedge\fP (\fIEdgeI\fP) \-\- The new edge +.IP \(bu 2 +\fBchild_pointer_lists\fP (\fIsequence of tuple\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A sequence of lists of the edges that +were used to form this edge. This list is used to reconstruct +the trees (or partial trees) that are associated with \fBedge\fP\&. +.UNINDENT +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B inst_vars(edge) +.UNINDENT +.INDENT 7.0 +.TP +.B instantiate_edge(edge) +If the edge is a \fBFeatureTreeEdge\fP, and it is complete, +then instantiate all variables whose names start with \(aq@\(aq, +by replacing them with unique new variables. +.sp +Note that instantiation is done in\-place, since the +parsing algorithms might already hold a reference to +the edge for future use. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.featurechart.demo(print_times=True, print_grammar=True, print_trees=True, print_sentence=True, trace=1, parser=, sent=\(aqI saw John with a dog with my cookie\(aq) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.featurechart.demo_grammar() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.featurechart.run_profile() +.UNINDENT +.SS nltk.parse.generate module +.INDENT 0.0 +.TP +.B nltk.parse.generate.demo(N=23) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.generate.generate(grammar, start=None, depth=None, n=None) +Generates an iterator of all sentences from a CFG. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBgrammar\fP \-\- The Grammar used to generate sentences. +.IP \(bu 2 +\fBstart\fP \-\- The Nonterminal from which to start generate sentences. +.IP \(bu 2 +\fBdepth\fP \-\- The maximal depth of the generated tree. +.IP \(bu 2 +\fBn\fP \-\- The maximum number of sentences to return. +.UNINDENT +.TP +.B Returns +An iterator of lists of terminal tokens. +.UNINDENT +.UNINDENT +.SS nltk.parse.malt module +.INDENT 0.0 +.TP +.B class nltk.parse.malt.MaltParser(parser_dirname, model_filename=None, tagger=None, additional_java_args=None) +Bases: \fI\%nltk.parse.api.ParserI\fP +.sp +A class for dependency parsing with MaltParser. The input is the paths to: +\- a maltparser directory +\- (optionally) the path to a pre\-trained MaltParser .mco model file +\- (optionally) the tagger to use for POS tagging before parsing +\- (optionally) additional Java arguments +.INDENT 7.0 +.TP +.B Example: +.sp +.nf +.ft C +>>> from nltk.parse import malt +>>> # With MALT_PARSER and MALT_MODEL environment set. +>>> mp = malt.MaltParser(\(aqmaltparser\-1.7.2\(aq, \(aqengmalt.linear\-1.7.mco\(aq) +>>> mp.parse_one(\(aqI shot an elephant in my pajamas .\(aq.split()).tree() +(shot I (elephant an) (in (pajamas my)) .) +>>> # Without MALT_PARSER and MALT_MODEL environment. +>>> mp = malt.MaltParser(\(aq/home/user/maltparser\-1.7.2/\(aq, \(aq/home/user/engmalt.linear\-1.7.mco\(aq) +>>> mp.parse_one(\(aqI shot an elephant in my pajamas .\(aq.split()).tree() +(shot I (elephant an) (in (pajamas my)) .) +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B generate_malt_command(inputfilename, outputfilename=None, mode=None) +This function generates the maltparser command use at the terminal. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBinputfilename\fP (\fIstr\fP) \-\- path to the input file +.IP \(bu 2 +\fBoutputfilename\fP (\fIstr\fP) \-\- path to the output file +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse_sents(sentences, verbose=False, top_relation_label=\(aqnull\(aq) +Use MaltParser to parse multiple sentences. +Takes a list of sentences, where each sentence is a list of words. +Each sentence will be automatically tagged with this +MaltParser instance\(aqs tagger. +.INDENT 7.0 +.TP +.B Parameters +\fBsentences\fP \-\- Input sentences to parse +.TP +.B Returns +iter(DependencyGraph) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse_tagged_sents(sentences, verbose=False, top_relation_label=\(aqnull\(aq) +Use MaltParser to parse multiple POS tagged sentences. Takes multiple +sentences where each sentence is a list of (word, tag) tuples. +The sentences must have already been tokenized and tagged. +.INDENT 7.0 +.TP +.B Parameters +\fBsentences\fP \-\- Input sentences to parse +.TP +.B Returns +iter(iter(\fBDependencyGraph\fP)) the dependency graph +.UNINDENT +.sp +representation of each sentence +.UNINDENT +.INDENT 7.0 +.TP +.B train(depgraphs, verbose=False) +Train MaltParser from a list of \fBDependencyGraph\fP objects +.INDENT 7.0 +.TP +.B Parameters +\fBdepgraphs\fP (\fIDependencyGraph\fP) \-\- list of \fBDependencyGraph\fP objects for training input data +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B train_from_file(conll_file, verbose=False) +Train MaltParser from a file +:param conll_file: str for the filename of the training input data +:type conll_file: str +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.malt.find_malt_model(model_filename) +A module to find pre\-trained MaltParser model. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.malt.find_maltparser(parser_dirname) +A module to find MaltParser .jar file and its dependencies. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.malt.malt_regex_tagger() +.UNINDENT +.SS nltk.parse.nonprojectivedependencyparser module +.INDENT 0.0 +.TP +.B class nltk.parse.nonprojectivedependencyparser.DemoScorer +Bases: \fI\%nltk.parse.nonprojectivedependencyparser.DependencyScorerI\fP +.INDENT 7.0 +.TP +.B score(graph) +.INDENT 7.0 +.TP +.B Parameters +\fBgraph\fP (\fIDependencyGraph\fP) \-\- A dependency graph whose set of edges need to be +.UNINDENT +.sp +scored. +:rtype: A three\-dimensional list of numbers. +:return: The score is returned in a multidimensional(3) list, such +that the outer\-dimension refers to the head, and the +inner\-dimension refers to the dependencies. For instance, +scores[0][1] would reference the list of scores corresponding to +arcs from node 0 to node 1. The node\(aqs \(aqaddress\(aq field can be used +to determine its number identification. +.sp +For further illustration, a score list corresponding to Fig.2 of +Keith Hall\(aqs \(aqK\-best Spanning Tree Parsing\(aq paper: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.TP +.B scores = [[[], [5], [1], [1]], +[[], [], [11], [4]], +[[], [10], [], [5]], +[[], [8], [8], []]] +.UNINDENT +.UNINDENT +.UNINDENT +.sp +When used in conjunction with a MaxEntClassifier, each score would +correspond to the confidence of a particular edge being classified +with the positive training examples. +.UNINDENT +.INDENT 7.0 +.TP +.B train(graphs) +.INDENT 7.0 +.TP +.B Parameters +\fBgraphs\fP (\fIlist\fP\fI(\fP\fIDependencyGraph\fP\fI)\fP) \-\- A list of dependency graphs to train the scorer. +.UNINDENT +.sp +Typically the edges present in the graphs can be used as +positive training examples, and the edges not present as negative +examples. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.nonprojectivedependencyparser.DependencyScorerI +Bases: \fBobject\fP +.sp +A scorer for calculated the weights on the edges of a weighted +dependency graph. This is used by a +\fBProbabilisticNonprojectiveParser\fP to initialize the edge +weights of a \fBDependencyGraph\fP\&. While typically this would be done +by training a binary classifier, any class that can return a +multidimensional list representation of the edge weights can +implement this interface. As such, it has no necessary +fields. +.INDENT 7.0 +.TP +.B score(graph) +.INDENT 7.0 +.TP +.B Parameters +\fBgraph\fP (\fIDependencyGraph\fP) \-\- A dependency graph whose set of edges need to be +.UNINDENT +.sp +scored. +:rtype: A three\-dimensional list of numbers. +:return: The score is returned in a multidimensional(3) list, such +that the outer\-dimension refers to the head, and the +inner\-dimension refers to the dependencies. For instance, +scores[0][1] would reference the list of scores corresponding to +arcs from node 0 to node 1. The node\(aqs \(aqaddress\(aq field can be used +to determine its number identification. +.sp +For further illustration, a score list corresponding to Fig.2 of +Keith Hall\(aqs \(aqK\-best Spanning Tree Parsing\(aq paper: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.TP +.B scores = [[[], [5], [1], [1]], +[[], [], [11], [4]], +[[], [10], [], [5]], +[[], [8], [8], []]] +.UNINDENT +.UNINDENT +.UNINDENT +.sp +When used in conjunction with a MaxEntClassifier, each score would +correspond to the confidence of a particular edge being classified +with the positive training examples. +.UNINDENT +.INDENT 7.0 +.TP +.B train(graphs) +.INDENT 7.0 +.TP +.B Parameters +\fBgraphs\fP (\fIlist\fP\fI(\fP\fIDependencyGraph\fP\fI)\fP) \-\- A list of dependency graphs to train the scorer. +.UNINDENT +.sp +Typically the edges present in the graphs can be used as +positive training examples, and the edges not present as negative +examples. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.nonprojectivedependencyparser.NaiveBayesDependencyScorer +Bases: \fI\%nltk.parse.nonprojectivedependencyparser.DependencyScorerI\fP +.sp +A dependency scorer built around a MaxEnt classifier. In this +particular class that classifier is a \fBNaiveBayesClassifier\fP\&. +It uses head\-word, head\-tag, child\-word, and child\-tag features +for classification. +.sp +.nf +.ft C +>>> from nltk.parse.dependencygraph import DependencyGraph, conll_data2 +.ft P +.fi +.sp +.nf +.ft C +>>> graphs = [DependencyGraph(entry) for entry in conll_data2.split(\(aq\en\en\(aq) if entry] +>>> npp = ProbabilisticNonprojectiveParser() +>>> npp.train(graphs, NaiveBayesDependencyScorer()) +>>> parses = npp.parse([\(aqCathy\(aq, \(aqzag\(aq, \(aqhen\(aq, \(aqzwaaien\(aq, \(aq.\(aq], [\(aqN\(aq, \(aqV\(aq, \(aqPron\(aq, \(aqAdj\(aq, \(aqN\(aq, \(aqPunc\(aq]) +>>> len(list(parses)) +1 +.ft P +.fi +.INDENT 7.0 +.TP +.B score(graph) +Converts the graph into a feature\-based representation of +each edge, and then assigns a score to each based on the +confidence of the classifier in assigning it to the +positive label. Scores are returned in a multidimensional list. +.INDENT 7.0 +.TP +.B Parameters +\fBgraph\fP (\fIDependencyGraph\fP) \-\- A dependency graph to score. +.TP +.B Return type +3 dimensional list +.TP +.B Returns +Edge scores for the graph parameter. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B train(graphs) +Trains a \fBNaiveBayesClassifier\fP using the edges present in +graphs list as positive examples, the edges not present as +negative examples. Uses a feature vector of head\-word, +head\-tag, child\-word, and child\-tag. +.INDENT 7.0 +.TP +.B Parameters +\fBgraphs\fP (\fIlist\fP\fI(\fP\fIDependencyGraph\fP\fI)\fP) \-\- A list of dependency graphs to train the scorer. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.nonprojectivedependencyparser.NonprojectiveDependencyParser(dependency_grammar) +Bases: \fBobject\fP +.sp +A non\-projective, rule\-based, dependency parser. This parser +will return the set of all possible non\-projective parses based on +the word\-to\-word relations defined in the parser\(aqs dependency +grammar, and will allow the branches of the parse tree to cross +in order to capture a variety of linguistic phenomena that a +projective parser will not. +.INDENT 7.0 +.TP +.B parse(tokens) +Parses the input tokens with respect to the parser\(aqs grammar. Parsing +is accomplished by representing the search\-space of possible parses as +a fully\-connected directed graph. Arcs that would lead to ungrammatical +parses are removed and a lattice is constructed of length n, where n is +the number of input tokens, to represent all possible grammatical +traversals. All possible paths through the lattice are then enumerated +to produce the set of non\-projective parses. +.sp +param tokens: A list of tokens to parse. +type tokens: list(str) +return: An iterator of non\-projective parses. +rtype: iter(DependencyGraph) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.nonprojectivedependencyparser.ProbabilisticNonprojectiveParser +Bases: \fBobject\fP +.sp +A probabilistic non\-projective dependency parser. +.sp +Nonprojective dependencies allows for "crossing branches" in the parse tree +which is necessary for representing particular linguistic phenomena, or even +typical parses in some languages. This parser follows the MST parsing +algorithm, outlined in McDonald(2005), which likens the search for the best +non\-projective parse to finding the maximum spanning tree in a weighted +directed graph. +.sp +.nf +.ft C +>>> class Scorer(DependencyScorerI): +\&... def train(self, graphs): +\&... pass +\&... +\&... def score(self, graph): +\&... return [ +\&... [[], [5], [1], [1]], +\&... [[], [], [11], [4]], +\&... [[], [10], [], [5]], +\&... [[], [8], [8], []], +\&... ] +.ft P +.fi +.sp +.nf +.ft C +>>> npp = ProbabilisticNonprojectiveParser() +>>> npp.train([], Scorer()) +.ft P +.fi +.sp +.nf +.ft C +>>> parses = npp.parse([\(aqv1\(aq, \(aqv2\(aq, \(aqv3\(aq], [None, None, None]) +>>> len(list(parses)) +1 +.ft P +.fi +.sp +.nf +.ft C +>>> from nltk.grammar import DependencyGrammar +.ft P +.fi +.sp +.nf +.ft C +>>> grammar = DependencyGrammar.fromstring(\(aq\(aq\(aq +\&... \(aqtaught\(aq \-> \(aqplay\(aq | \(aqman\(aq +\&... \(aqman\(aq \-> \(aqthe\(aq | \(aqin\(aq +\&... \(aqin\(aq \-> \(aqcorner\(aq +\&... \(aqcorner\(aq \-> \(aqthe\(aq +\&... \(aqplay\(aq \-> \(aqgolf\(aq | \(aqdachshund\(aq | \(aqto\(aq +\&... \(aqdachshund\(aq \-> \(aqhis\(aq +\&... \(aq\(aq\(aq) +.ft P +.fi +.sp +.nf +.ft C +>>> ndp = NonprojectiveDependencyParser(grammar) +>>> parses = ndp.parse([\(aqthe\(aq, \(aqman\(aq, \(aqin\(aq, \(aqthe\(aq, \(aqcorner\(aq, \(aqtaught\(aq, \(aqhis\(aq, \(aqdachshund\(aq, \(aqto\(aq, \(aqplay\(aq, \(aqgolf\(aq]) +>>> len(list(parses)) +4 +.ft P +.fi +.INDENT 7.0 +.TP +.B best_incoming_arc(node_index) +Returns the source of the best incoming arc to the +node with address: node_index +.INDENT 7.0 +.TP +.B Parameters +\fBnode_index\fP (\fIinteger.\fP) \-\- The address of the \(aqdestination\(aq node, +.UNINDENT +.sp +the node that is arced to. +.UNINDENT +.INDENT 7.0 +.TP +.B collapse_nodes(new_node, cycle_path, g_graph, b_graph, c_graph) +Takes a list of nodes that have been identified to belong to a cycle, +and collapses them into on larger node. The arcs of all nodes in +the graph must be updated to account for this. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBnew_node\fP (\fINode.\fP) \-\- A Node (Dictionary) to collapse the cycle nodes into. +.IP \(bu 2 +\fBcycle_path\fP (\fIA list of integers.\fP) \-\- A list of node addresses, each of which is in the cycle. +.IP \(bu 2 +\fBc_graph\fP (\fIg_graph\fP\fI, \fP\fIb_graph\fP\fI,\fP) \-\- Graphs which need to be updated. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B compute_max_subtract_score(column_index, cycle_indexes) +When updating scores the score of the highest\-weighted incoming +arc is subtracted upon collapse. This returns the correct +amount to subtract from that edge. +.INDENT 7.0 +.TP +.B Parameters +\fBcolumn_index\fP (\fIinteger.\fP) \-\- A index representing the column of incoming arcs +.UNINDENT +.sp +to a particular node being updated +:type cycle_indexes: A list of integers. +:param cycle_indexes: Only arcs from cycle nodes are considered. This +is a list of such nodes addresses. +.UNINDENT +.INDENT 7.0 +.TP +.B compute_original_indexes(new_indexes) +As nodes are collapsed into others, they are replaced +by the new node in the graph, but it\(aqs still necessary +to keep track of what these original nodes were. This +takes a list of node addresses and replaces any collapsed +node addresses with their original addresses. +.INDENT 7.0 +.TP +.B Parameters +\fBnew_indexes\fP (\fIA list of integers.\fP) \-\- A list of node addresses to check for +.UNINDENT +.sp +subsumed nodes. +.UNINDENT +.INDENT 7.0 +.TP +.B initialize_edge_scores(graph) +Assigns a score to every edge in the \fBDependencyGraph\fP graph. +These scores are generated via the parser\(aqs scorer which +was assigned during the training process. +.INDENT 7.0 +.TP +.B Parameters +\fBgraph\fP (\fIDependencyGraph\fP) \-\- A dependency graph to assign scores to. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B original_best_arc(node_index) +.UNINDENT +.INDENT 7.0 +.TP +.B parse(tokens, tags) +Parses a list of tokens in accordance to the MST parsing algorithm +for non\-projective dependency parses. Assumes that the tokens to +be parsed have already been tagged and those tags are provided. Various +scoring methods can be used by implementing the \fBDependencyScorerI\fP +interface and passing it to the training algorithm. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A list of words or punctuation to be parsed. +.IP \(bu 2 +\fBtags\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A list of tags corresponding by index to the words in the tokens list. +.UNINDENT +.TP +.B Returns +An iterator of non\-projective parses. +.TP +.B Return type +iter(DependencyGraph) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B train(graphs, dependency_scorer) +Trains a \fBDependencyScorerI\fP from a set of \fBDependencyGraph\fP objects, +and establishes this as the parser\(aqs scorer. This is used to +initialize the scores on a \fBDependencyGraph\fP during the parsing +procedure. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBgraphs\fP (\fIlist\fP\fI(\fP\fIDependencyGraph\fP\fI)\fP) \-\- A list of dependency graphs to train the scorer. +.IP \(bu 2 +\fBdependency_scorer\fP (\fIDependencyScorerI\fP) \-\- A scorer which implements the +\fBDependencyScorerI\fP interface. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B update_edge_scores(new_node, cycle_path) +Updates the edge scores to reflect a collapse operation into +new_node. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBnew_node\fP (\fIA Node.\fP) \-\- The node which cycle nodes are collapsed into. +.IP \(bu 2 +\fBcycle_path\fP (\fIA list of integers.\fP) \-\- A list of node addresses that belong to the cycle. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.nonprojectivedependencyparser.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.nonprojectivedependencyparser.hall_demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.nonprojectivedependencyparser.nonprojective_conll_parse_demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.nonprojectivedependencyparser.rule_based_demo() +.UNINDENT +.SS nltk.parse.pchart module +.sp +Classes and interfaces for associating probabilities with tree +structures that represent the internal organization of a text. The +probabilistic parser module defines \fBBottomUpProbabilisticChartParser\fP\&. +.sp +\fBBottomUpProbabilisticChartParser\fP is an abstract class that implements +a bottom\-up chart parser for \fBPCFG\fP grammars. It maintains a queue of edges, +and adds them to the chart one at a time. The ordering of this queue +is based on the probabilities associated with the edges, allowing the +parser to expand more likely edges before less likely ones. Each +subclass implements a different queue ordering, producing different +search strategies. Currently the following subclasses are defined: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fBInsideChartParser\fP searches edges in decreasing order of +their trees\(aq inside probabilities. +.IP \(bu 2 +\fBRandomChartParser\fP searches edges in random order. +.IP \(bu 2 +\fBLongestChartParser\fP searches edges in decreasing order of their +location\(aqs length. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +The \fBBottomUpProbabilisticChartParser\fP constructor has an optional +argument beam_size. If non\-zero, this controls the size of the beam +(aka the edge queue). This option is most useful with InsideChartParser. +.INDENT 0.0 +.TP +.B class nltk.parse.pchart.BottomUpProbabilisticChartParser(grammar, beam_size=0, trace=0) +Bases: \fI\%nltk.parse.api.ParserI\fP +.sp +An abstract bottom\-up parser for \fBPCFG\fP grammars that uses a \fBChart\fP to +record partial results. \fBBottomUpProbabilisticChartParser\fP maintains +a queue of edges that can be added to the chart. This queue is +initialized with edges for each token in the text that is being +parsed. \fBBottomUpProbabilisticChartParser\fP inserts these edges into +the chart one at a time, starting with the most likely edges, and +proceeding to less likely edges. For each edge that is added to +the chart, it may become possible to insert additional edges into +the chart; these are added to the queue. This process continues +until enough complete parses have been generated, or until the +queue is empty. +.sp +The sorting order for the queue is not specified by +\fBBottomUpProbabilisticChartParser\fP\&. Different sorting orders will +result in different search strategies. The sorting order for the +queue is defined by the method \fBsort_queue\fP; subclasses are required +to provide a definition for this method. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB_grammar\fP \-\- The grammar used to parse sentences. +.IP \(bu 2 +\fB_trace\fP \-\- The level of tracing output that should be generated +when parsing a text. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B grammar() +.INDENT 7.0 +.TP +.B Returns +The grammar used by this parser. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse(tokens) +.INDENT 7.0 +.TP +.B Returns +An iterator that generates parse trees for the sentence. +.UNINDENT +.sp +When possible this list is sorted from most likely to least likely. +.INDENT 7.0 +.TP +.B Parameters +\fBsent\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The sentence to be parsed +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B sort_queue(queue, chart) +Sort the given queue of \fBEdge\fP objects, placing the edge that should +be tried first at the beginning of the queue. This method +will be called after each \fBEdge\fP is added to the queue. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBqueue\fP (\fIlist\fP\fI(\fP\fIEdge\fP\fI)\fP) \-\- The queue of \fBEdge\fP objects to sort. Each edge in +this queue is an edge that could be added to the chart by +the fundamental rule; but that has not yet been added. +.IP \(bu 2 +\fBchart\fP (\fIChart\fP) \-\- The chart being used to parse the text. This +chart can be used to provide extra information for sorting +the queue. +.UNINDENT +.TP +.B Return type +None +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B trace(trace=2) +Set the level of tracing output that should be generated when +parsing a text. +.INDENT 7.0 +.TP +.B Parameters +\fBtrace\fP (\fIint\fP) \-\- The trace level. A trace level of \fB0\fP will +generate no tracing output; and higher trace levels will +produce more verbose tracing output. +.TP +.B Return type +None +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.pchart.InsideChartParser(grammar, beam_size=0, trace=0) +Bases: \fI\%nltk.parse.pchart.BottomUpProbabilisticChartParser\fP +.sp +A bottom\-up parser for \fBPCFG\fP grammars that tries edges in descending +order of the inside probabilities of their trees. The "inside +probability" of a tree is simply the +probability of the entire tree, ignoring its context. In +particular, the inside probability of a tree generated by +production \fIp\fP with children \fIc[1], c[2], ..., c[n]\fP is +\fIP(p)P(c[1])P(c[2])...P(c[n])\fP; and the inside +probability of a token is 1 if it is present in the text, and 0 if +it is absent. +.sp +This sorting order results in a type of lowest\-cost\-first search +strategy. +.INDENT 7.0 +.TP +.B sort_queue(queue, chart) +Sort the given queue of edges, in descending order of the +inside probabilities of the edges\(aq trees. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBqueue\fP (\fIlist\fP\fI(\fP\fIEdge\fP\fI)\fP) \-\- The queue of \fBEdge\fP objects to sort. Each edge in +this queue is an edge that could be added to the chart by +the fundamental rule; but that has not yet been added. +.IP \(bu 2 +\fBchart\fP (\fIChart\fP) \-\- The chart being used to parse the text. This +chart can be used to provide extra information for sorting +the queue. +.UNINDENT +.TP +.B Return type +None +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.pchart.LongestChartParser(grammar, beam_size=0, trace=0) +Bases: \fI\%nltk.parse.pchart.BottomUpProbabilisticChartParser\fP +.sp +A bottom\-up parser for \fBPCFG\fP grammars that tries longer edges before +shorter ones. This sorting order results in a type of best\-first +search strategy. +.INDENT 7.0 +.TP +.B sort_queue(queue, chart) +Sort the given queue of \fBEdge\fP objects, placing the edge that should +be tried first at the beginning of the queue. This method +will be called after each \fBEdge\fP is added to the queue. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBqueue\fP (\fIlist\fP\fI(\fP\fIEdge\fP\fI)\fP) \-\- The queue of \fBEdge\fP objects to sort. Each edge in +this queue is an edge that could be added to the chart by +the fundamental rule; but that has not yet been added. +.IP \(bu 2 +\fBchart\fP (\fIChart\fP) \-\- The chart being used to parse the text. This +chart can be used to provide extra information for sorting +the queue. +.UNINDENT +.TP +.B Return type +None +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.pchart.ProbabilisticBottomUpInitRule +Bases: \fI\%nltk.parse.chart.AbstractChartRule\fP +.INDENT 7.0 +.TP +.B NUM_EDGES = 0 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.pchart.ProbabilisticBottomUpPredictRule +Bases: \fI\%nltk.parse.chart.AbstractChartRule\fP +.INDENT 7.0 +.TP +.B NUM_EDGES = 1 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar, edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.pchart.ProbabilisticFundamentalRule +Bases: \fI\%nltk.parse.chart.AbstractChartRule\fP +.INDENT 7.0 +.TP +.B NUM_EDGES = 2 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar, left_edge, right_edge) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.pchart.ProbabilisticLeafEdge(leaf, index) +Bases: \fI\%nltk.parse.chart.LeafEdge\fP +.INDENT 7.0 +.TP +.B prob() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.pchart.ProbabilisticTreeEdge(prob, *args, **kwargs) +Bases: \fI\%nltk.parse.chart.TreeEdge\fP +.INDENT 7.0 +.TP +.B static from_production(production, index, p) +Return a new \fBTreeEdge\fP formed from the given production. +The new edge\(aqs left\-hand side and right\-hand side will +be taken from \fBproduction\fP; its span will be +\fB(index,index)\fP; and its dot position will be \fB0\fP\&. +.INDENT 7.0 +.TP +.B Return type +TreeEdge +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.pchart.RandomChartParser(grammar, beam_size=0, trace=0) +Bases: \fI\%nltk.parse.pchart.BottomUpProbabilisticChartParser\fP +.sp +A bottom\-up parser for \fBPCFG\fP grammars that tries edges in random order. +This sorting order results in a random search strategy. +.INDENT 7.0 +.TP +.B sort_queue(queue, chart) +Sort the given queue of \fBEdge\fP objects, placing the edge that should +be tried first at the beginning of the queue. This method +will be called after each \fBEdge\fP is added to the queue. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBqueue\fP (\fIlist\fP\fI(\fP\fIEdge\fP\fI)\fP) \-\- The queue of \fBEdge\fP objects to sort. Each edge in +this queue is an edge that could be added to the chart by +the fundamental rule; but that has not yet been added. +.IP \(bu 2 +\fBchart\fP (\fIChart\fP) \-\- The chart being used to parse the text. This +chart can be used to provide extra information for sorting +the queue. +.UNINDENT +.TP +.B Return type +None +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.pchart.SingleEdgeProbabilisticFundamentalRule +Bases: \fI\%nltk.parse.chart.AbstractChartRule\fP +.INDENT 7.0 +.TP +.B NUM_EDGES = 1 +.UNINDENT +.INDENT 7.0 +.TP +.B apply(chart, grammar, edge1) +Return a generator that will add edges licensed by this rule +and the given edges to the chart, one at a time. Each +time the generator is resumed, it will either add a new +edge and yield that edge; or return. +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP (\fIlist\fP\fI(\fP\fIEdgeI\fP\fI)\fP) \-\- A set of existing edges. The number of edges +that should be passed to \fBapply()\fP is specified by the +\fBNUM_EDGES\fP class variable. +.TP +.B Return type +iter(EdgeI) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.pchart.UnsortedChartParser(grammar, beam_size=0, trace=0) +Bases: \fI\%nltk.parse.pchart.BottomUpProbabilisticChartParser\fP +.sp +A bottom\-up parser for \fBPCFG\fP grammars that tries edges in whatever order. +.INDENT 7.0 +.TP +.B sort_queue(queue, chart) +Sort the given queue of \fBEdge\fP objects, placing the edge that should +be tried first at the beginning of the queue. This method +will be called after each \fBEdge\fP is added to the queue. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBqueue\fP (\fIlist\fP\fI(\fP\fIEdge\fP\fI)\fP) \-\- The queue of \fBEdge\fP objects to sort. Each edge in +this queue is an edge that could be added to the chart by +the fundamental rule; but that has not yet been added. +.IP \(bu 2 +\fBchart\fP (\fIChart\fP) \-\- The chart being used to parse the text. This +chart can be used to provide extra information for sorting +the queue. +.UNINDENT +.TP +.B Return type +None +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.pchart.demo(choice=None, draw_parses=None, print_parses=None) +A demonstration of the probabilistic parsers. The user is +prompted to select which demo to run, and how many parses should +be found; and then each parser is run on the same demo, and a +summary of the results are displayed. +.UNINDENT +.SS nltk.parse.projectivedependencyparser module +.INDENT 0.0 +.TP +.B class nltk.parse.projectivedependencyparser.ChartCell(x, y) +Bases: \fBobject\fP +.sp +A cell from the parse chart formed when performing the CYK algorithm. +Each cell keeps track of its x and y coordinates (though this will probably +be discarded), and a list of spans serving as the cell\(aqs entries. +.INDENT 7.0 +.TP +.B add(span) +Appends the given span to the list of spans +representing the chart cell\(aqs entries. +.INDENT 7.0 +.TP +.B Parameters +\fBspan\fP (\fIDependencySpan\fP) \-\- The span to add. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.projectivedependencyparser.DependencySpan(start_index, end_index, head_index, arcs, tags) +Bases: \fBobject\fP +.sp +A contiguous span over some part of the input string representing +dependency (head \-> modifier) relationships amongst words. An atomic +span corresponds to only one word so it isn\(aqt a \(aqspan\(aq in the conventional +sense, as its _start_index = _end_index = _head_index for concatenation +purposes. All other spans are assumed to have arcs between all nodes +within the start and end indexes of the span, and one head index corresponding +to the head word for the entire span. This is the same as the root node if +the dependency structure were depicted as a graph. +.INDENT 7.0 +.TP +.B head_index() +.INDENT 7.0 +.TP +.B Returns +An value indexing the head of the entire \fBDependencySpan\fP\&. +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.projectivedependencyparser.ProbabilisticProjectiveDependencyParser +Bases: \fBobject\fP +.sp +A probabilistic, projective dependency parser. +.sp +This parser returns the most probable projective parse derived from the +probabilistic dependency grammar derived from the train() method. The +probabilistic model is an implementation of Eisner\(aqs (1996) Model C, which +conditions on head\-word, head\-tag, child\-word, and child\-tag. The decoding +uses a bottom\-up chart\-based span concatenation algorithm that\(aqs identical +to the one utilized by the rule\-based projective parser. +.sp +.nf +.ft C +>>> from nltk.parse.dependencygraph import conll_data2 +.ft P +.fi +.sp +.nf +.ft C +>>> graphs = [ +\&... DependencyGraph(entry) for entry in conll_data2.split(\(aq\en\en\(aq) if entry +\&... ] +.ft P +.fi +.sp +.nf +.ft C +>>> ppdp = ProbabilisticProjectiveDependencyParser() +>>> ppdp.train(graphs) +.ft P +.fi +.sp +.nf +.ft C +>>> sent = [\(aqCathy\(aq, \(aqzag\(aq, \(aqhen\(aq, \(aqwild\(aq, \(aqzwaaien\(aq, \(aq.\(aq] +>>> list(ppdp.parse(sent)) +[Tree(\(aqzag\(aq, [\(aqCathy\(aq, \(aqhen\(aq, Tree(\(aqzwaaien\(aq, [\(aqwild\(aq, \(aq.\(aq])])] +.ft P +.fi +.INDENT 7.0 +.TP +.B compute_prob(dg) +Computes the probability of a dependency graph based +on the parser\(aqs probability model (defined by the parser\(aqs +statistical dependency grammar). +.INDENT 7.0 +.TP +.B Parameters +\fBdg\fP (\fIDependencyGraph\fP) \-\- A dependency graph to score. +.TP +.B Returns +The probability of the dependency graph. +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B concatenate(span1, span2) +Concatenates the two spans in whichever way possible. This +includes rightward concatenation (from the leftmost word of the +leftmost span to the rightmost word of the rightmost span) and +leftward concatenation (vice\-versa) between adjacent spans. Unlike +Eisner\(aqs presentation of span concatenation, these spans do not +share or pivot on a particular word/word\-index. +.INDENT 7.0 +.TP +.B Returns +A list of new spans formed through concatenation. +.TP +.B Return type +list(DependencySpan) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse(tokens) +Parses the list of tokens subject to the projectivity constraint +and the productions in the parser\(aqs grammar. This uses a method +similar to the span\-concatenation algorithm defined in Eisner (1996). +It returns the most probable parse derived from the parser\(aqs +probabilistic dependency grammar. +.UNINDENT +.INDENT 7.0 +.TP +.B train(graphs) +Trains a ProbabilisticDependencyGrammar based on the list of input +DependencyGraphs. This model is an implementation of Eisner\(aqs (1996) +Model C, which derives its statistics from head\-word, head\-tag, +child\-word, and child\-tag relationships. +.INDENT 7.0 +.TP +.B Parameters +\fBgraphs\fP \-\- A list of dependency graphs to train from. +.TP +.B Type +list(DependencyGraph) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.projectivedependencyparser.ProjectiveDependencyParser(dependency_grammar) +Bases: \fBobject\fP +.sp +A projective, rule\-based, dependency parser. A ProjectiveDependencyParser +is created with a DependencyGrammar, a set of productions specifying +word\-to\-word dependency relations. The parse() method will then +return the set of all parses, in tree representation, for a given input +sequence of tokens. Each parse must meet the requirements of the both +the grammar and the projectivity constraint which specifies that the +branches of the dependency tree are not allowed to cross. Alternatively, +this can be understood as stating that each parent node and its children +in the parse tree form a continuous substring of the input sequence. +.INDENT 7.0 +.TP +.B concatenate(span1, span2) +Concatenates the two spans in whichever way possible. This +includes rightward concatenation (from the leftmost word of the +leftmost span to the rightmost word of the rightmost span) and +leftward concatenation (vice\-versa) between adjacent spans. Unlike +Eisner\(aqs presentation of span concatenation, these spans do not +share or pivot on a particular word/word\-index. +.INDENT 7.0 +.TP +.B Returns +A list of new spans formed through concatenation. +.TP +.B Return type +list(DependencySpan) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse(tokens) +Performs a projective dependency parse on the list of tokens using +a chart\-based, span\-concatenation algorithm similar to Eisner (1996). +.INDENT 7.0 +.TP +.B Parameters +\fBtokens\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The list of input tokens. +.TP +.B Returns +An iterator over parse trees. +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.projectivedependencyparser.arity_parse_demo() +A demonstration showing the creation of a \fBDependencyGrammar\fP +in which a specific number of modifiers is listed for a given +head. This can further constrain the number of possible parses +created by a \fBProjectiveDependencyParser\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.projectivedependencyparser.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.projectivedependencyparser.projective_prob_parse_demo() +A demo showing the training and use of a projective +dependency parser. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.projectivedependencyparser.projective_rule_parse_demo() +A demonstration showing the creation and use of a +\fBDependencyGrammar\fP to perform a projective dependency +parse. +.UNINDENT +.SS nltk.parse.recursivedescent module +.INDENT 0.0 +.TP +.B class nltk.parse.recursivedescent.RecursiveDescentParser(grammar, trace=0) +Bases: \fI\%nltk.parse.api.ParserI\fP +.sp +A simple top\-down CFG parser that parses texts by recursively +expanding the fringe of a Tree, and matching it against a +text. +.sp +\fBRecursiveDescentParser\fP uses a list of tree locations called a +"frontier" to remember which subtrees have not yet been expanded +and which leaves have not yet been matched against the text. Each +tree location consists of a list of child indices specifying the +path from the root of the tree to a subtree or a leaf; see the +reference documentation for Tree for more information +about tree locations. +.sp +When the parser begins parsing a text, it constructs a tree +containing only the start symbol, and a frontier containing the +location of the tree\(aqs root node. It then extends the tree to +cover the text, using the following recursive procedure: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +If the frontier is empty, and the text is covered by the tree, +then return the tree as a possible parse. +.IP \(bu 2 +If the frontier is empty, and the text is not covered by the +tree, then return no parses. +.IP \(bu 2 +If the first element of the frontier is a subtree, then +use CFG productions to "expand" it. For each applicable +production, add the expanded subtree\(aqs children to the +frontier, and recursively find all parses that can be +generated by the new tree and frontier. +.IP \(bu 2 +If the first element of the frontier is a token, then "match" +it against the next token from the text. Remove the token +from the frontier, and recursively find all parses that can be +generated by the new tree and frontier. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B See +\fBnltk.grammar\fP +.UNINDENT +.INDENT 7.0 +.TP +.B grammar() +.INDENT 7.0 +.TP +.B Returns +The grammar used by this parser. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse(tokens) +.INDENT 7.0 +.TP +.B Returns +An iterator that generates parse trees for the sentence. +.UNINDENT +.sp +When possible this list is sorted from most likely to least likely. +.INDENT 7.0 +.TP +.B Parameters +\fBsent\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The sentence to be parsed +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B trace(trace=2) +Set the level of tracing output that should be generated when +parsing a text. +.INDENT 7.0 +.TP +.B Parameters +\fBtrace\fP (\fIint\fP) \-\- The trace level. A trace level of \fB0\fP will +generate no tracing output; and higher trace levels will +produce more verbose tracing output. +.TP +.B Return type +None +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.recursivedescent.SteppingRecursiveDescentParser(grammar, trace=0) +Bases: \fI\%nltk.parse.recursivedescent.RecursiveDescentParser\fP +.sp +A \fBRecursiveDescentParser\fP that allows you to step through the +parsing process, performing a single operation at a time. +.sp +The \fBinitialize\fP method is used to start parsing a text. +\fBexpand\fP expands the first element on the frontier using a single +CFG production, and \fBmatch\fP matches the first element on the +frontier against the next text token. \fBbacktrack\fP undoes the most +recent expand or match operation. \fBstep\fP performs a single +expand, match, or backtrack operation. \fBparses\fP returns the set +of parses that have been found by the parser. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB_history\fP \-\- A list of \fB(rtext, tree, frontier)\fP tripples, +containing the previous states of the parser. This history is +used to implement the \fBbacktrack\fP operation. +.IP \(bu 2 +\fB_tried_e\fP \-\- A record of all productions that have been tried +for a given tree. This record is used by \fBexpand\fP to perform +the next untried production. +.IP \(bu 2 +\fB_tried_m\fP \-\- A record of what tokens have been matched for a +given tree. This record is used by \fBstep\fP to decide whether +or not to match a token. +.UNINDENT +.TP +.B See +\fBnltk.grammar\fP +.UNINDENT +.INDENT 7.0 +.TP +.B backtrack() +Return the parser to its state before the most recent +match or expand operation. Calling \fBundo\fP repeatedly return +the parser to successively earlier states. If no match or +expand operations have been performed, \fBundo\fP will make no +changes. +.INDENT 7.0 +.TP +.B Returns +true if an operation was successfully undone. +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B currently_complete() +.INDENT 7.0 +.TP +.B Returns +Whether the parser\(aqs current state represents a +complete parse. +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B expand(production=None) +Expand the first element of the frontier. In particular, if +the first element of the frontier is a subtree whose node type +is equal to \fBproduction\fP\(aqs left hand side, then add a child +to that subtree for each element of \fBproduction\fP\(aqs right hand +side. If \fBproduction\fP is not specified, then use the first +untried expandable production. If all expandable productions +have been tried, do nothing. +.INDENT 7.0 +.TP +.B Returns +The production used to expand the frontier, if an +expansion was performed. If no expansion was performed, +return None. +.TP +.B Return type +Production or None +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B expandable_productions() +.INDENT 7.0 +.TP +.B Returns +A list of all the productions for which expansions +are available for the current parser state. +.TP +.B Return type +list(Production) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B frontier() +.INDENT 7.0 +.TP +.B Returns +A list of the tree locations of all subtrees that +have not yet been expanded, and all leaves that have not +yet been matched. +.TP +.B Return type +list(tuple(int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B initialize(tokens) +Start parsing a given text. This sets the parser\(aqs tree to +the start symbol, its frontier to the root node, and its +remaining text to \fBtoken[\(aqSUBTOKENS\(aq]\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B match() +Match the first element of the frontier. In particular, if +the first element of the frontier has the same type as the +next text token, then substitute the text token into the tree. +.INDENT 7.0 +.TP +.B Returns +The token matched, if a match operation was +performed. If no match was performed, return None +.TP +.B Return type +str or None +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse(tokens) +.INDENT 7.0 +.TP +.B Returns +An iterator that generates parse trees for the sentence. +.UNINDENT +.sp +When possible this list is sorted from most likely to least likely. +.INDENT 7.0 +.TP +.B Parameters +\fBsent\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The sentence to be parsed +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parses() +.INDENT 7.0 +.TP +.B Returns +An iterator of the parses that have been found by this +parser so far. +.TP +.B Return type +list of Tree +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B remaining_text() +.INDENT 7.0 +.TP +.B Returns +The portion of the text that is not yet covered by the +tree. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B set_grammar(grammar) +Change the grammar used to parse texts. +.INDENT 7.0 +.TP +.B Parameters +\fBgrammar\fP (\fICFG\fP) \-\- The new grammar. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B step() +Perform a single parsing operation. If an untried match is +possible, then perform the match, and return the matched +token. If an untried expansion is possible, then perform the +expansion, and return the production that it is based on. If +backtracking is possible, then backtrack, and return True. +Otherwise, return None. +.INDENT 7.0 +.TP +.B Returns +None if no operation was performed; a token if a match +was performed; a production if an expansion was performed; +and True if a backtrack operation was performed. +.TP +.B Return type +Production or String or bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tree() +.INDENT 7.0 +.TP +.B Returns +A partial structure for the text that is +currently being parsed. The elements specified by the +frontier have not yet been expanded or matched. +.TP +.B Return type +Tree +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B untried_expandable_productions() +.INDENT 7.0 +.TP +.B Returns +A list of all the untried productions for which +expansions are available for the current parser state. +.TP +.B Return type +list(Production) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B untried_match() +.INDENT 7.0 +.TP +.B Returns +Whether the first element of the frontier is a token +that has not yet been matched. +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.recursivedescent.demo() +A demonstration of the recursive descent parser. +.UNINDENT +.SS nltk.parse.shiftreduce module +.INDENT 0.0 +.TP +.B class nltk.parse.shiftreduce.ShiftReduceParser(grammar, trace=0) +Bases: \fI\%nltk.parse.api.ParserI\fP +.sp +A simple bottom\-up CFG parser that uses two operations, "shift" +and "reduce", to find a single parse for a text. +.sp +\fBShiftReduceParser\fP maintains a stack, which records the +structure of a portion of the text. This stack is a list of +strings and Trees that collectively cover a portion of +the text. For example, while parsing the sentence "the dog saw +the man" with a typical grammar, \fBShiftReduceParser\fP will produce +the following stack, which covers "the dog saw": +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +[(NP: (Det: \(aqthe\(aq) (N: \(aqdog\(aq)), (V: \(aqsaw\(aq)] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +\fBShiftReduceParser\fP attempts to extend the stack to cover the +entire text, and to combine the stack elements into a single tree, +producing a complete parse for the sentence. +.sp +Initially, the stack is empty. It is extended to cover the text, +from left to right, by repeatedly applying two operations: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +"shift" moves a token from the beginning of the text to the +end of the stack. +.IP \(bu 2 +"reduce" uses a CFG production to combine the rightmost stack +elements into a single Tree. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +Often, more than one operation can be performed on a given stack. +In this case, \fBShiftReduceParser\fP uses the following heuristics +to decide which operation to perform: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +Only shift if no reductions are available. +.IP \(bu 2 +If multiple reductions are available, then apply the reduction +whose CFG production is listed earliest in the grammar. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +Note that these heuristics are not guaranteed to choose an +operation that leads to a parse of the text. Also, if multiple +parses exists, \fBShiftReduceParser\fP will return at most one of +them. +.INDENT 7.0 +.TP +.B See +\fBnltk.grammar\fP +.UNINDENT +.INDENT 7.0 +.TP +.B grammar() +.INDENT 7.0 +.TP +.B Returns +The grammar used by this parser. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse(tokens) +.INDENT 7.0 +.TP +.B Returns +An iterator that generates parse trees for the sentence. +.UNINDENT +.sp +When possible this list is sorted from most likely to least likely. +.INDENT 7.0 +.TP +.B Parameters +\fBsent\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The sentence to be parsed +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B trace(trace=2) +Set the level of tracing output that should be generated when +parsing a text. +.INDENT 7.0 +.TP +.B Parameters +\fBtrace\fP (\fIint\fP) \-\- The trace level. A trace level of \fB0\fP will +generate no tracing output; and higher trace levels will +produce more verbose tracing output. +.TP +.B Return type +None +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.shiftreduce.SteppingShiftReduceParser(grammar, trace=0) +Bases: \fI\%nltk.parse.shiftreduce.ShiftReduceParser\fP +.sp +A \fBShiftReduceParser\fP that allows you to setp through the parsing +process, performing a single operation at a time. It also allows +you to change the parser\(aqs grammar midway through parsing a text. +.sp +The \fBinitialize\fP method is used to start parsing a text. +\fBshift\fP performs a single shift operation, and \fBreduce\fP performs +a single reduce operation. \fBstep\fP will perform a single reduce +operation if possible; otherwise, it will perform a single shift +operation. \fBparses\fP returns the set of parses that have been +found by the parser. +.INDENT 7.0 +.TP +.B Variables +\fB_history\fP \-\- A list of \fB(stack, remaining_text)\fP pairs, +containing all of the previous states of the parser. This +history is used to implement the \fBundo\fP operation. +.TP +.B See +\fBnltk.grammar\fP +.UNINDENT +.INDENT 7.0 +.TP +.B initialize(tokens) +Start parsing a given text. This sets the parser\(aqs stack to +\fB[]\fP and sets its remaining text to \fBtokens\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B parse(tokens) +.INDENT 7.0 +.TP +.B Returns +An iterator that generates parse trees for the sentence. +.UNINDENT +.sp +When possible this list is sorted from most likely to least likely. +.INDENT 7.0 +.TP +.B Parameters +\fBsent\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The sentence to be parsed +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parses() +.INDENT 7.0 +.TP +.B Returns +An iterator of the parses that have been found by this +parser so far. +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B reduce(production=None) +Use \fBproduction\fP to combine the rightmost stack elements into +a single Tree. If \fBproduction\fP does not match the +rightmost stack elements, then do nothing. +.INDENT 7.0 +.TP +.B Returns +The production used to reduce the stack, if a +reduction was performed. If no reduction was performed, +return None. +.TP +.B Return type +Production or None +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B reducible_productions() +.INDENT 7.0 +.TP +.B Returns +A list of the productions for which reductions are +available for the current parser state. +.TP +.B Return type +list(Production) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B remaining_text() +.INDENT 7.0 +.TP +.B Returns +The portion of the text that is not yet covered by the +stack. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B set_grammar(grammar) +Change the grammar used to parse texts. +.INDENT 7.0 +.TP +.B Parameters +\fBgrammar\fP (\fICFG\fP) \-\- The new grammar. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B shift() +Move a token from the beginning of the remaining text to the +end of the stack. If there are no more tokens in the +remaining text, then do nothing. +.INDENT 7.0 +.TP +.B Returns +True if the shift operation was successful. +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B stack() +.INDENT 7.0 +.TP +.B Returns +The parser\(aqs stack. +.TP +.B Return type +list(str and Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B step() +Perform a single parsing operation. If a reduction is +possible, then perform that reduction, and return the +production that it is based on. Otherwise, if a shift is +possible, then perform it, and return True. Otherwise, +return False. +.INDENT 7.0 +.TP +.B Returns +False if no operation was performed; True if a shift was +performed; and the CFG production used to reduce if a +reduction was performed. +.TP +.B Return type +Production or bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B undo() +Return the parser to its state before the most recent +shift or reduce operation. Calling \fBundo\fP repeatedly return +the parser to successively earlier states. If no shift or +reduce operations have been performed, \fBundo\fP will make no +changes. +.INDENT 7.0 +.TP +.B Returns +true if an operation was successfully undone. +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.shiftreduce.demo() +A demonstration of the shift\-reduce parser. +.UNINDENT +.SS nltk.parse.stanford module +.INDENT 0.0 +.TP +.B class nltk.parse.stanford.GenericStanfordParser(path_to_jar=None, path_to_models_jar=None, model_path=\(aqedu/stanford/nlp/models/lexparser/englishPCFG.ser.gz\(aq, encoding=\(aqutf8\(aq, verbose=False, java_options=\(aq\-mx4g\(aq, corenlp_options=\(aq\(aq) +Bases: \fI\%nltk.parse.api.ParserI\fP +.sp +Interface to the Stanford Parser +.INDENT 7.0 +.TP +.B parse_sents(sentences, verbose=False) +Use StanfordParser to parse multiple sentences. Takes multiple sentences as a +list where each sentence is a list of words. +Each sentence will be automatically tagged with this StanfordParser instance\(aqs +tagger. +If whitespaces exists inside a token, then the token will be treated as +separate tokens. +.INDENT 7.0 +.TP +.B Parameters +\fBsentences\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- Input sentences to parse +.TP +.B Return type +iter(iter(Tree)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B raw_parse(sentence, verbose=False) +Use StanfordParser to parse a sentence. Takes a sentence as a string; +before parsing, it will be automatically tokenized and tagged by +the Stanford Parser. +.INDENT 7.0 +.TP +.B Parameters +\fBsentence\fP (\fIstr\fP) \-\- Input sentence to parse +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B raw_parse_sents(sentences, verbose=False) +Use StanfordParser to parse multiple sentences. Takes multiple sentences as a +list of strings. +Each sentence will be automatically tokenized and tagged by the Stanford Parser. +.INDENT 7.0 +.TP +.B Parameters +\fBsentences\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- Input sentences to parse +.TP +.B Return type +iter(iter(Tree)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_parse(sentence, verbose=False) +Use StanfordParser to parse a sentence. Takes a sentence as a list of +(word, tag) tuples; the sentence must have already been tokenized and +tagged. +.INDENT 7.0 +.TP +.B Parameters +\fBsentence\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIstr\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP) \-\- Input sentence to parse +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tagged_parse_sents(sentences, verbose=False) +Use StanfordParser to parse multiple sentences. Takes multiple sentences +where each sentence is a list of (word, tag) tuples. +The sentences must have already been tokenized and tagged. +.INDENT 7.0 +.TP +.B Parameters +\fBsentences\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIstr\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP\fI)\fP) \-\- Input sentences to parse +.TP +.B Return type +iter(iter(Tree)) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.stanford.StanfordDependencyParser(*args, **kwargs) +Bases: \fI\%nltk.parse.stanford.GenericStanfordParser\fP +.sp +.nf +.ft C +>>> dep_parser=StanfordDependencyParser( +\&... model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" +\&... ) +.ft P +.fi +.sp +.nf +.ft C +>>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] +[Tree(\(aqjumps\(aq, [Tree(\(aqfox\(aq, [\(aqThe\(aq, \(aqquick\(aq, \(aqbrown\(aq]), Tree(\(aqdog\(aq, [\(aqover\(aq, \(aqthe\(aq, \(aqlazy\(aq])])] +.ft P +.fi +.sp +.nf +.ft C +>>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] +[[((u\(aqjumps\(aq, u\(aqVBZ\(aq), u\(aqnsubj\(aq, (u\(aqfox\(aq, u\(aqNN\(aq)), ((u\(aqfox\(aq, u\(aqNN\(aq), u\(aqdet\(aq, (u\(aqThe\(aq, u\(aqDT\(aq)), +((u\(aqfox\(aq, u\(aqNN\(aq), u\(aqamod\(aq, (u\(aqquick\(aq, u\(aqJJ\(aq)), ((u\(aqfox\(aq, u\(aqNN\(aq), u\(aqamod\(aq, (u\(aqbrown\(aq, u\(aqJJ\(aq)), +((u\(aqjumps\(aq, u\(aqVBZ\(aq), u\(aqnmod\(aq, (u\(aqdog\(aq, u\(aqNN\(aq)), ((u\(aqdog\(aq, u\(aqNN\(aq), u\(aqcase\(aq, (u\(aqover\(aq, u\(aqIN\(aq)), +((u\(aqdog\(aq, u\(aqNN\(aq), u\(aqdet\(aq, (u\(aqthe\(aq, u\(aqDT\(aq)), ((u\(aqdog\(aq, u\(aqNN\(aq), u\(aqamod\(aq, (u\(aqlazy\(aq, u\(aqJJ\(aq))]] +.ft P +.fi +.sp +.nf +.ft C +>>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents(( +\&... "The quick brown fox jumps over the lazy dog.", +\&... "The quick grey wolf jumps over the lazy fox." +\&... ))], []) +[Tree(\(aqjumps\(aq, [Tree(\(aqfox\(aq, [\(aqThe\(aq, \(aqquick\(aq, \(aqbrown\(aq]), Tree(\(aqdog\(aq, [\(aqover\(aq, \(aqthe\(aq, \(aqlazy\(aq])]), +Tree(\(aqjumps\(aq, [Tree(\(aqwolf\(aq, [\(aqThe\(aq, \(aqquick\(aq, \(aqgrey\(aq]), Tree(\(aqfox\(aq, [\(aqover\(aq, \(aqthe\(aq, \(aqlazy\(aq])])] +.ft P +.fi +.sp +.nf +.ft C +>>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents(( +\&... "I \(aqm a dog".split(), +\&... "This is my friends \(aq cat ( the tabby )".split(), +\&... ))], []) +[Tree(\(aqdog\(aq, [\(aqI\(aq, "\(aqm", \(aqa\(aq]), Tree(\(aqcat\(aq, [\(aqThis\(aq, \(aqis\(aq, Tree(\(aqfriends\(aq, [\(aqmy\(aq, "\(aq"]), Tree(\(aqtabby\(aq, [\(aqthe\(aq])])] +.ft P +.fi +.sp +.nf +.ft C +>>> sum([[list(parse.triples()) for parse in dep_graphs] for dep_graphs in dep_parser.tagged_parse_sents(( +\&... ( +\&... ("The", "DT"), +\&... ("quick", "JJ"), +\&... ("brown", "JJ"), +\&... ("fox", "NN"), +\&... ("jumped", "VBD"), +\&... ("over", "IN"), +\&... ("the", "DT"), +\&... ("lazy", "JJ"), +\&... ("dog", "NN"), +\&... (".", "."), +\&... ), +\&... ))],[]) +[[((u\(aqjumped\(aq, u\(aqVBD\(aq), u\(aqnsubj\(aq, (u\(aqfox\(aq, u\(aqNN\(aq)), ((u\(aqfox\(aq, u\(aqNN\(aq), u\(aqdet\(aq, (u\(aqThe\(aq, u\(aqDT\(aq)), +((u\(aqfox\(aq, u\(aqNN\(aq), u\(aqamod\(aq, (u\(aqquick\(aq, u\(aqJJ\(aq)), ((u\(aqfox\(aq, u\(aqNN\(aq), u\(aqamod\(aq, (u\(aqbrown\(aq, u\(aqJJ\(aq)), +((u\(aqjumped\(aq, u\(aqVBD\(aq), u\(aqnmod\(aq, (u\(aqdog\(aq, u\(aqNN\(aq)), ((u\(aqdog\(aq, u\(aqNN\(aq), u\(aqcase\(aq, (u\(aqover\(aq, u\(aqIN\(aq)), +((u\(aqdog\(aq, u\(aqNN\(aq), u\(aqdet\(aq, (u\(aqthe\(aq, u\(aqDT\(aq)), ((u\(aqdog\(aq, u\(aqNN\(aq), u\(aqamod\(aq, (u\(aqlazy\(aq, u\(aqJJ\(aq))]] +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.stanford.StanfordNeuralDependencyParser(*args, **kwargs) +Bases: \fI\%nltk.parse.stanford.GenericStanfordParser\fP +.sp +.nf +.ft C +>>> from nltk.parse.stanford import StanfordNeuralDependencyParser +>>> dep_parser=StanfordNeuralDependencyParser(java_options=\(aq\-mx4g\(aq) +.ft P +.fi +.sp +.nf +.ft C +>>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] +[Tree(\(aqjumps\(aq, [Tree(\(aqfox\(aq, [\(aqThe\(aq, \(aqquick\(aq, \(aqbrown\(aq]), Tree(\(aqdog\(aq, [\(aqover\(aq, \(aqthe\(aq, \(aqlazy\(aq]), \(aq.\(aq])] +.ft P +.fi +.sp +.nf +.ft C +>>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] +[[((u\(aqjumps\(aq, u\(aqVBZ\(aq), u\(aqnsubj\(aq, (u\(aqfox\(aq, u\(aqNN\(aq)), ((u\(aqfox\(aq, u\(aqNN\(aq), u\(aqdet\(aq, +(u\(aqThe\(aq, u\(aqDT\(aq)), ((u\(aqfox\(aq, u\(aqNN\(aq), u\(aqamod\(aq, (u\(aqquick\(aq, u\(aqJJ\(aq)), ((u\(aqfox\(aq, u\(aqNN\(aq), +u\(aqamod\(aq, (u\(aqbrown\(aq, u\(aqJJ\(aq)), ((u\(aqjumps\(aq, u\(aqVBZ\(aq), u\(aqnmod\(aq, (u\(aqdog\(aq, u\(aqNN\(aq)), +((u\(aqdog\(aq, u\(aqNN\(aq), u\(aqcase\(aq, (u\(aqover\(aq, u\(aqIN\(aq)), ((u\(aqdog\(aq, u\(aqNN\(aq), u\(aqdet\(aq, +(u\(aqthe\(aq, u\(aqDT\(aq)), ((u\(aqdog\(aq, u\(aqNN\(aq), u\(aqamod\(aq, (u\(aqlazy\(aq, u\(aqJJ\(aq)), ((u\(aqjumps\(aq, u\(aqVBZ\(aq), +u\(aqpunct\(aq, (u\(aq.\(aq, u\(aq.\(aq))]] +.ft P +.fi +.sp +.nf +.ft C +>>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents(( +\&... "The quick brown fox jumps over the lazy dog.", +\&... "The quick grey wolf jumps over the lazy fox." +\&... ))], []) +[Tree(\(aqjumps\(aq, [Tree(\(aqfox\(aq, [\(aqThe\(aq, \(aqquick\(aq, \(aqbrown\(aq]), Tree(\(aqdog\(aq, [\(aqover\(aq, +\(aqthe\(aq, \(aqlazy\(aq]), \(aq.\(aq]), Tree(\(aqjumps\(aq, [Tree(\(aqwolf\(aq, [\(aqThe\(aq, \(aqquick\(aq, \(aqgrey\(aq]), +Tree(\(aqfox\(aq, [\(aqover\(aq, \(aqthe\(aq, \(aqlazy\(aq]), \(aq.\(aq])] +.ft P +.fi +.sp +.nf +.ft C +>>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents(( +\&... "I \(aqm a dog".split(), +\&... "This is my friends \(aq cat ( the tabby )".split(), +\&... ))], []) +[Tree(\(aqdog\(aq, [\(aqI\(aq, "\(aqm", \(aqa\(aq]), Tree(\(aqcat\(aq, [\(aqThis\(aq, \(aqis\(aq, Tree(\(aqfriends\(aq, +[\(aqmy\(aq, "\(aq"]), Tree(\(aqtabby\(aq, [\(aq\-LRB\-\(aq, \(aqthe\(aq, \(aq\-RRB\-\(aq])])] +.ft P +.fi +.INDENT 7.0 +.TP +.B tagged_parse_sents(sentences, verbose=False) +Currently unimplemented because the neural dependency parser (and +the StanfordCoreNLP pipeline class) doesn\(aqt support passing in pre\- +tagged tokens. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.stanford.StanfordParser(*args, **kwargs) +Bases: \fI\%nltk.parse.stanford.GenericStanfordParser\fP +.sp +.nf +.ft C +>>> parser=StanfordParser( +\&... model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" +\&... ) +.ft P +.fi +.sp +.nf +.ft C +>>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog")) +[Tree(\(aqROOT\(aq, [Tree(\(aqNP\(aq, [Tree(\(aqNP\(aq, [Tree(\(aqDT\(aq, [\(aqthe\(aq]), Tree(\(aqJJ\(aq, [\(aqquick\(aq]), Tree(\(aqJJ\(aq, [\(aqbrown\(aq]), +Tree(\(aqNN\(aq, [\(aqfox\(aq])]), Tree(\(aqNP\(aq, [Tree(\(aqNP\(aq, [Tree(\(aqNNS\(aq, [\(aqjumps\(aq])]), Tree(\(aqPP\(aq, [Tree(\(aqIN\(aq, [\(aqover\(aq]), +Tree(\(aqNP\(aq, [Tree(\(aqDT\(aq, [\(aqthe\(aq]), Tree(\(aqJJ\(aq, [\(aqlazy\(aq]), Tree(\(aqNN\(aq, [\(aqdog\(aq])])])])])])] +.ft P +.fi +.sp +.nf +.ft C +>>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents(( +\&... "the quick brown fox jumps over the lazy dog", +\&... "the quick grey wolf jumps over the lazy fox" +\&... ))], []) +[Tree(\(aqROOT\(aq, [Tree(\(aqNP\(aq, [Tree(\(aqNP\(aq, [Tree(\(aqDT\(aq, [\(aqthe\(aq]), Tree(\(aqJJ\(aq, [\(aqquick\(aq]), Tree(\(aqJJ\(aq, [\(aqbrown\(aq]), +Tree(\(aqNN\(aq, [\(aqfox\(aq])]), Tree(\(aqNP\(aq, [Tree(\(aqNP\(aq, [Tree(\(aqNNS\(aq, [\(aqjumps\(aq])]), Tree(\(aqPP\(aq, [Tree(\(aqIN\(aq, [\(aqover\(aq]), +Tree(\(aqNP\(aq, [Tree(\(aqDT\(aq, [\(aqthe\(aq]), Tree(\(aqJJ\(aq, [\(aqlazy\(aq]), Tree(\(aqNN\(aq, [\(aqdog\(aq])])])])])]), Tree(\(aqROOT\(aq, [Tree(\(aqNP\(aq, +[Tree(\(aqNP\(aq, [Tree(\(aqDT\(aq, [\(aqthe\(aq]), Tree(\(aqJJ\(aq, [\(aqquick\(aq]), Tree(\(aqJJ\(aq, [\(aqgrey\(aq]), Tree(\(aqNN\(aq, [\(aqwolf\(aq])]), Tree(\(aqNP\(aq, +[Tree(\(aqNP\(aq, [Tree(\(aqNNS\(aq, [\(aqjumps\(aq])]), Tree(\(aqPP\(aq, [Tree(\(aqIN\(aq, [\(aqover\(aq]), Tree(\(aqNP\(aq, [Tree(\(aqDT\(aq, [\(aqthe\(aq]), +Tree(\(aqJJ\(aq, [\(aqlazy\(aq]), Tree(\(aqNN\(aq, [\(aqfox\(aq])])])])])])] +.ft P +.fi +.sp +.nf +.ft C +>>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents(( +\&... "I \(aqm a dog".split(), +\&... "This is my friends \(aq cat ( the tabby )".split(), +\&... ))], []) +[Tree(\(aqROOT\(aq, [Tree(\(aqS\(aq, [Tree(\(aqNP\(aq, [Tree(\(aqPRP\(aq, [\(aqI\(aq])]), Tree(\(aqVP\(aq, [Tree(\(aqVBP\(aq, ["\(aqm"]), +Tree(\(aqNP\(aq, [Tree(\(aqDT\(aq, [\(aqa\(aq]), Tree(\(aqNN\(aq, [\(aqdog\(aq])])])])]), Tree(\(aqROOT\(aq, [Tree(\(aqS\(aq, [Tree(\(aqNP\(aq, +[Tree(\(aqDT\(aq, [\(aqThis\(aq])]), Tree(\(aqVP\(aq, [Tree(\(aqVBZ\(aq, [\(aqis\(aq]), Tree(\(aqNP\(aq, [Tree(\(aqNP\(aq, [Tree(\(aqNP\(aq, [Tree(\(aqPRP$\(aq, [\(aqmy\(aq]), +Tree(\(aqNNS\(aq, [\(aqfriends\(aq]), Tree(\(aqPOS\(aq, ["\(aq"])]), Tree(\(aqNN\(aq, [\(aqcat\(aq])]), Tree(\(aqPRN\(aq, [Tree(\(aq\-LRB\-\(aq, [Tree(\(aq\(aq, []), +Tree(\(aqNP\(aq, [Tree(\(aqDT\(aq, [\(aqthe\(aq]), Tree(\(aqNN\(aq, [\(aqtabby\(aq])]), Tree(\(aq\-RRB\-\(aq, [])])])])])])])] +.ft P +.fi +.sp +.nf +.ft C +>>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents(( +\&... ( +\&... ("The", "DT"), +\&... ("quick", "JJ"), +\&... ("brown", "JJ"), +\&... ("fox", "NN"), +\&... ("jumped", "VBD"), +\&... ("over", "IN"), +\&... ("the", "DT"), +\&... ("lazy", "JJ"), +\&... ("dog", "NN"), +\&... (".", "."), +\&... ), +\&... ))],[]) +[Tree(\(aqROOT\(aq, [Tree(\(aqS\(aq, [Tree(\(aqNP\(aq, [Tree(\(aqDT\(aq, [\(aqThe\(aq]), Tree(\(aqJJ\(aq, [\(aqquick\(aq]), Tree(\(aqJJ\(aq, [\(aqbrown\(aq]), +Tree(\(aqNN\(aq, [\(aqfox\(aq])]), Tree(\(aqVP\(aq, [Tree(\(aqVBD\(aq, [\(aqjumped\(aq]), Tree(\(aqPP\(aq, [Tree(\(aqIN\(aq, [\(aqover\(aq]), Tree(\(aqNP\(aq, +[Tree(\(aqDT\(aq, [\(aqthe\(aq]), Tree(\(aqJJ\(aq, [\(aqlazy\(aq]), Tree(\(aqNN\(aq, [\(aqdog\(aq])])])]), Tree(\(aq.\(aq, [\(aq.\(aq])])])] +.ft P +.fi +.UNINDENT +.SS nltk.parse.transitionparser module +.INDENT 0.0 +.TP +.B class nltk.parse.transitionparser.Configuration(dep_graph) +Bases: \fBobject\fP +.sp +Class for holding configuration which is the partial analysis of the input sentence. +The transition based parser aims at finding set of operators that transfer the initial +configuration to the terminal configuration. +.INDENT 7.0 +.TP +.B The configuration includes: +.INDENT 7.0 +.IP \(bu 2 +Stack: for storing partially proceeded words +.IP \(bu 2 +Buffer: for storing remaining input words +.IP \(bu 2 +Set of arcs: for storing partially built dependency tree +.UNINDENT +.UNINDENT +.sp +This class also provides a method to represent a configuration as list of features. +.INDENT 7.0 +.TP +.B extract_features() +Extract the set of features for the current configuration. Implement standard features as describe in +Table 3.2 (page 31) in Dependency Parsing book by Sandra Kubler, Ryan McDonal, Joakim Nivre. +Please note that these features are very basic. +:return: list(str) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.transitionparser.Transition(alg_option) +Bases: \fBobject\fP +.sp +This class defines a set of transition which is applied to a configuration to get another configuration +Note that for different parsing algorithm, the transition is different. +.INDENT 7.0 +.TP +.B LEFT_ARC = \(aqLEFTARC\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B REDUCE = \(aqREDUCE\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B RIGHT_ARC = \(aqRIGHTARC\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B SHIFT = \(aqSHIFT\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B left_arc(conf, relation) +.INDENT 7.0 +.TP +.B Note that the algorithm for left\-arc is quite similar except for precondition for both arc\-standard and arc\-eager +.INDENT 7.0 +.TP +.B param configuration +is the current configuration +.UNINDENT +.sp +:return : A new configuration or \-1 if the pre\-condition is not satisfied +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B reduce(conf) +.INDENT 7.0 +.TP +.B Note that the algorithm for reduce is only available for arc\-eager +.INDENT 7.0 +.TP +.B param configuration +is the current configuration +.UNINDENT +.sp +:return : A new configuration or \-1 if the pre\-condition is not satisfied +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B right_arc(conf, relation) +.INDENT 7.0 +.TP +.B Note that the algorithm for right\-arc is DIFFERENT for arc\-standard and arc\-eager +.INDENT 7.0 +.TP +.B param configuration +is the current configuration +.UNINDENT +.sp +:return : A new configuration or \-1 if the pre\-condition is not satisfied +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B shift(conf) +.INDENT 7.0 +.TP +.B Note that the algorithm for shift is the SAME for arc\-standard and arc\-eager +.INDENT 7.0 +.TP +.B param configuration +is the current configuration +.UNINDENT +.sp +:return : A new configuration or \-1 if the pre\-condition is not satisfied +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.parse.transitionparser.TransitionParser(algorithm) +Bases: \fI\%nltk.parse.api.ParserI\fP +.sp +Class for transition based parser. Implement 2 algorithms which are "arc\-standard" and "arc\-eager" +.INDENT 7.0 +.TP +.B ARC_EAGER = \(aqarc\-eager\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B ARC_STANDARD = \(aqarc\-standard\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B parse(depgraphs, modelFile) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBdepgraphs\fP (\fIlist\fP\fI(\fP\fIDependencyGraph\fP\fI)\fP) \-\- the list of test sentence, each sentence is represented as a dependency graph where the \(aqhead\(aq information is dummy +.IP \(bu 2 +\fBmodelfile\fP (\fIstr\fP) \-\- the model file +.UNINDENT +.TP +.B Returns +list (DependencyGraph) with the \(aqhead\(aq and \(aqrel\(aq information +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B train(depgraphs, modelfile, verbose=True) +:param depgraphs : list of DependencyGraph as the training data +:type depgraphs : DependencyGraph +:param modelfile : file name to save the trained model +:type modelfile : str +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.transitionparser.demo() +.sp +.nf +.ft C +>>> from nltk.parse import DependencyGraph, DependencyEvaluator +>>> from nltk.parse.transitionparser import TransitionParser, Configuration, Transition +>>> gold_sent = DependencyGraph(""" +\&... Economic JJ 2 ATT +\&... news NN 3 SBJ +\&... has VBD 0 ROOT +\&... little JJ 5 ATT +\&... effect NN 3 OBJ +\&... on IN 5 ATT +\&... financial JJ 8 ATT +\&... markets NNS 6 PC +\&... . . 3 PU +\&... """) +.ft P +.fi +.sp +.nf +.ft C +>>> conf = Configuration(gold_sent) +.ft P +.fi +.sp +###################### Check the Initial Feature ######################## +.sp +.nf +.ft C +>>> print(\(aq, \(aq.join(conf.extract_features())) +STK_0_POS_TOP, BUF_0_FORM_Economic, BUF_0_LEMMA_Economic, BUF_0_POS_JJ, BUF_1_FORM_news, BUF_1_POS_NN, BUF_2_POS_VBD, BUF_3_POS_JJ +.ft P +.fi +.sp +###################### Check The Transition ####################### +Check the Initialized Configuration +>>> print(conf) +Stack : [0] Buffer : [1, 2, 3, 4, 5, 6, 7, 8, 9] Arcs : [] +.INDENT 7.0 +.IP A. 3 +Do some transition checks for ARC\-STANDARD +.UNINDENT +.sp +.nf +.ft C +>>> operation = Transition(\(aqarc\-standard\(aq) +>>> operation.shift(conf) +>>> operation.left_arc(conf, "ATT") +>>> operation.shift(conf) +>>> operation.left_arc(conf,"SBJ") +>>> operation.shift(conf) +>>> operation.shift(conf) +>>> operation.left_arc(conf, "ATT") +>>> operation.shift(conf) +>>> operation.shift(conf) +>>> operation.shift(conf) +>>> operation.left_arc(conf, "ATT") +.ft P +.fi +.sp +Middle Configuration and Features Check +>>> print(conf) +Stack : [0, 3, 5, 6] Buffer : [8, 9] Arcs : [(2, \(aqATT\(aq, 1), (3, \(aqSBJ\(aq, 2), (5, \(aqATT\(aq, 4), (8, \(aqATT\(aq, 7)] +.sp +.nf +.ft C +>>> print(\(aq, \(aq.join(conf.extract_features())) +STK_0_FORM_on, STK_0_LEMMA_on, STK_0_POS_IN, STK_1_POS_NN, BUF_0_FORM_markets, BUF_0_LEMMA_markets, BUF_0_POS_NNS, BUF_1_FORM_., BUF_1_POS_., BUF_0_LDEP_ATT +.ft P +.fi +.sp +.nf +.ft C +>>> operation.right_arc(conf, "PC") +>>> operation.right_arc(conf, "ATT") +>>> operation.right_arc(conf, "OBJ") +>>> operation.shift(conf) +>>> operation.right_arc(conf, "PU") +>>> operation.right_arc(conf, "ROOT") +>>> operation.shift(conf) +.ft P +.fi +.sp +Terminated Configuration Check +>>> print(conf) +Stack : [0] Buffer : [] Arcs : [(2, \(aqATT\(aq, 1), (3, \(aqSBJ\(aq, 2), (5, \(aqATT\(aq, 4), (8, \(aqATT\(aq, 7), (6, \(aqPC\(aq, 8), (5, \(aqATT\(aq, 6), (3, \(aqOBJ\(aq, 5), (3, \(aqPU\(aq, 9), (0, \(aqROOT\(aq, 3)] +.INDENT 7.0 +.IP B. 3 +Do some transition checks for ARC\-EAGER +.UNINDENT +.sp +.nf +.ft C +>>> conf = Configuration(gold_sent) +>>> operation = Transition(\(aqarc\-eager\(aq) +>>> operation.shift(conf) +>>> operation.left_arc(conf,\(aqATT\(aq) +>>> operation.shift(conf) +>>> operation.left_arc(conf,\(aqSBJ\(aq) +>>> operation.right_arc(conf,\(aqROOT\(aq) +>>> operation.shift(conf) +>>> operation.left_arc(conf,\(aqATT\(aq) +>>> operation.right_arc(conf,\(aqOBJ\(aq) +>>> operation.right_arc(conf,\(aqATT\(aq) +>>> operation.shift(conf) +>>> operation.left_arc(conf,\(aqATT\(aq) +>>> operation.right_arc(conf,\(aqPC\(aq) +>>> operation.reduce(conf) +>>> operation.reduce(conf) +>>> operation.reduce(conf) +>>> operation.right_arc(conf,\(aqPU\(aq) +>>> print(conf) +Stack : [0, 3, 9] Buffer : [] Arcs : [(2, \(aqATT\(aq, 1), (3, \(aqSBJ\(aq, 2), (0, \(aqROOT\(aq, 3), (5, \(aqATT\(aq, 4), (3, \(aqOBJ\(aq, 5), (5, \(aqATT\(aq, 6), (8, \(aqATT\(aq, 7), (6, \(aqPC\(aq, 8), (3, \(aqPU\(aq, 9)] +.ft P +.fi +.sp +###################### Check The Training Function ####################### +.sp +A. Check the ARC\-STANDARD training +>>> import tempfile +>>> import os +>>> input_file = tempfile.NamedTemporaryFile(prefix=\(aqtransition_parse.train\(aq, dir=tempfile.gettempdir(), delete=False) +.sp +.nf +.ft C +>>> parser_std = TransitionParser(\(aqarc\-standard\(aq) +>>> print(\(aq, \(aq.join(parser_std._create_training_examples_arc_std([gold_sent], input_file))) + Number of training examples : 1 + Number of valid (projective) examples : 1 +SHIFT, LEFTARC:ATT, SHIFT, LEFTARC:SBJ, SHIFT, SHIFT, LEFTARC:ATT, SHIFT, SHIFT, SHIFT, LEFTARC:ATT, RIGHTARC:PC, RIGHTARC:ATT, RIGHTARC:OBJ, SHIFT, RIGHTARC:PU, RIGHTARC:ROOT, SHIFT +.ft P +.fi +.sp +.nf +.ft C +>>> parser_std.train([gold_sent],\(aqtemp.arcstd.model\(aq, verbose=False) + Number of training examples : 1 + Number of valid (projective) examples : 1 +>>> remove(input_file.name) +.ft P +.fi +.INDENT 7.0 +.IP B. 3 +Check the ARC\-EAGER training +.UNINDENT +.sp +.nf +.ft C +>>> input_file = tempfile.NamedTemporaryFile(prefix=\(aqtransition_parse.train\(aq, dir=tempfile.gettempdir(),delete=False) +>>> parser_eager = TransitionParser(\(aqarc\-eager\(aq) +>>> print(\(aq, \(aq.join(parser_eager._create_training_examples_arc_eager([gold_sent], input_file))) + Number of training examples : 1 + Number of valid (projective) examples : 1 +SHIFT, LEFTARC:ATT, SHIFT, LEFTARC:SBJ, RIGHTARC:ROOT, SHIFT, LEFTARC:ATT, RIGHTARC:OBJ, RIGHTARC:ATT, SHIFT, LEFTARC:ATT, RIGHTARC:PC, REDUCE, REDUCE, REDUCE, RIGHTARC:PU +.ft P +.fi +.sp +.nf +.ft C +>>> parser_eager.train([gold_sent],\(aqtemp.arceager.model\(aq, verbose=False) + Number of training examples : 1 + Number of valid (projective) examples : 1 +.ft P +.fi +.sp +.nf +.ft C +>>> remove(input_file.name) +.ft P +.fi +.sp +###################### Check The Parsing Function ######################## +.INDENT 7.0 +.IP A. 3 +Check the ARC\-STANDARD parser +.UNINDENT +.sp +.nf +.ft C +>>> result = parser_std.parse([gold_sent], \(aqtemp.arcstd.model\(aq) +>>> de = DependencyEvaluator(result, [gold_sent]) +>>> de.eval() >= (0, 0) +True +.ft P +.fi +.sp +B. Check the ARC\-EAGER parser +>>> result = parser_eager.parse([gold_sent], \(aqtemp.arceager.model\(aq) +>>> de = DependencyEvaluator(result, [gold_sent]) +>>> de.eval() >= (0, 0) +True +.sp +Remove test temporary files +>>> remove(\(aqtemp.arceager.model\(aq) +>>> remove(\(aqtemp.arcstd.model\(aq) +.sp +Note that result is very poor because of only one training example. +.UNINDENT +.SS nltk.parse.util module +.sp +Utility functions for parsers. +.INDENT 0.0 +.TP +.B class nltk.parse.util.TestGrammar(grammar, suite, accept=None, reject=None) +Bases: \fBobject\fP +.sp +Unit tests for CFG. +.INDENT 7.0 +.TP +.B run(show_trees=False) +.INDENT 7.0 +.TP +.B Sentences in the test suite are divided into two classes: +.INDENT 7.0 +.IP \(bu 2 +grammatical (\fBaccept\fP) and +.IP \(bu 2 +ungrammatical (\fBreject\fP). +.UNINDENT +.UNINDENT +.sp +If a sentence should parse according to the grammar, the value of +\fBtrees\fP will be a non\-empty list. If a sentence should be rejected +according to the grammar, then the value of \fBtrees\fP will be None. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.util.extract_test_sentences(string, comment_chars=\(aq#%;\(aq, encoding=None) +Parses a string with one test sentence per line. +Lines can optionally begin with: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +a bool, saying if the sentence is grammatical or not, or +.IP \(bu 2 +an int, giving the number of parse trees is should have, +.UNINDENT +.UNINDENT +.UNINDENT +.sp +The result information is followed by a colon, and then the sentence. +Empty lines and lines beginning with a comment char are ignored. +.INDENT 7.0 +.TP +.B Returns +a list of tuple of sentences and expected results, +where a sentence is a list of str, +and a result is None, or bool, or int +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBcomment_chars\fP \-\- \fBstr\fP of possible comment characters. +.IP \(bu 2 +\fBencoding\fP \-\- the encoding of the string, if it is binary +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.util.load_parser(grammar_url, trace=0, parser=None, chart_class=None, beam_size=0, **load_args) +Load a grammar from a file, and build a parser based on that grammar. +The parser depends on the grammar format, and might also depend +on properties of the grammar itself. +.INDENT 7.0 +.TP +.B The following grammar formats are currently supported: +.INDENT 7.0 +.IP \(bu 2 +\fB\(aqcfg\(aq\fP (CFGs: \fBCFG\fP) +.IP \(bu 2 +\fB\(aqpcfg\(aq\fP (probabilistic CFGs: \fBPCFG\fP) +.IP \(bu 2 +\fB\(aqfcfg\(aq\fP (feature\-based CFGs: \fBFeatureGrammar\fP) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBgrammar_url\fP (\fIstr\fP) \-\- A URL specifying where the grammar is located. +The default protocol is \fB"nltk:"\fP, which searches for the file +in the the NLTK data package. +.IP \(bu 2 +\fBtrace\fP (\fIint\fP) \-\- The level of tracing that should be used when +parsing a text. \fB0\fP will generate no tracing output; +and higher numbers will produce more verbose tracing output. +.IP \(bu 2 +\fBparser\fP \-\- The class used for parsing; should be \fBChartParser\fP +or a subclass. +If None, the class depends on the grammar format. +.IP \(bu 2 +\fBchart_class\fP \-\- The class used for storing the chart; +should be \fBChart\fP or a subclass. +Only used for CFGs and feature CFGs. +If None, the chart class depends on the grammar format. +.IP \(bu 2 +\fBbeam_size\fP (\fIint\fP) \-\- The maximum length for the parser\(aqs edge queue. +Only used for probabilistic CFGs. +.IP \(bu 2 +\fBload_args\fP \-\- Keyword parameters used when loading the grammar. +See \fBdata.load\fP for more information. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.util.taggedsent_to_conll(sentence) +A module to convert a single POS tagged sentence into CONLL format. +.sp +.nf +.ft C +>>> from nltk import word_tokenize, pos_tag +>>> text = "This is a foobar sentence." +>>> for line in taggedsent_to_conll(pos_tag(word_tokenize(text))): +\&... print(line, end="") + 1 This _ DT DT _ 0 a _ _ + 2 is _ VBZ VBZ _ 0 a _ _ + 3 a _ DT DT _ 0 a _ _ + 4 foobar _ JJ JJ _ 0 a _ _ + 5 sentence _ NN NN _ 0 a _ _ + 6 . _ . . _ 0 a _ _ +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBsentence\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIstr\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP) \-\- A single input sentence to parse +.TP +.B Return type +iter(str) +.TP +.B Returns +a generator yielding a single sentence in CONLL format. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.util.taggedsents_to_conll(sentences) +A module to convert the a POS tagged document stream +(i.e. list of list of tuples, a list of sentences) and yield lines +in CONLL format. This module yields one line per word and two newlines +for end of sentence. +.sp +.nf +.ft C +>>> from nltk import word_tokenize, sent_tokenize, pos_tag +>>> text = "This is a foobar sentence. Is that right?" +>>> sentences = [pos_tag(word_tokenize(sent)) for sent in sent_tokenize(text)] +>>> for line in taggedsents_to_conll(sentences): +\&... if line: +\&... print(line, end="") +1 This _ DT DT _ 0 a _ _ +2 is _ VBZ VBZ _ 0 a _ _ +3 a _ DT DT _ 0 a _ _ +4 foobar _ JJ JJ _ 0 a _ _ +5 sentence _ NN NN _ 0 a _ _ +6 . _ . . _ 0 a _ _ + + +1 Is _ VBZ VBZ _ 0 a _ _ +2 that _ IN IN _ 0 a _ _ +3 right _ NN NN _ 0 a _ _ +4 ? _ . . _ 0 a _ _ + + +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBsentences\fP \-\- Input sentences to parse +.TP +.B Return type +iter(str) +.TP +.B Returns +a generator yielding sentences in CONLL format. +.UNINDENT +.UNINDENT +.SS nltk.parse.viterbi module +.INDENT 0.0 +.TP +.B class nltk.parse.viterbi.ViterbiParser(grammar, trace=0) +Bases: \fI\%nltk.parse.api.ParserI\fP +.sp +A bottom\-up \fBPCFG\fP parser that uses dynamic programming to find +the single most likely parse for a text. The \fBViterbiParser\fP parser +parses texts by filling in a "most likely constituent table". +This table records the most probable tree representation for any +given span and node value. In particular, it has an entry for +every start index, end index, and node value, recording the most +likely subtree that spans from the start index to the end index, +and has the given node value. +.sp +The \fBViterbiParser\fP parser fills in this table incrementally. It starts +by filling in all entries for constituents that span one element +of text (i.e., entries where the end index is one greater than the +start index). After it has filled in all table entries for +constituents that span one element of text, it fills in the +entries for constitutants that span two elements of text. It +continues filling in the entries for constituents spanning larger +and larger portions of the text, until the entire table has been +filled. Finally, it returns the table entry for a constituent +spanning the entire text, whose node value is the grammar\(aqs start +symbol. +.sp +In order to find the most likely constituent with a given span and +node value, the \fBViterbiParser\fP parser considers all productions that +could produce that node value. For each production, it finds all +children that collectively cover the span and have the node values +specified by the production\(aqs right hand side. If the probability +of the tree formed by applying the production to the children is +greater than the probability of the current entry in the table, +then the table is updated with this new tree. +.sp +A pseudo\-code description of the algorithm used by +\fBViterbiParser\fP is: +.nf +Create an empty most likely constituent table, \fIMLC\fP\&. +For width in 1...len(text): +.in +2 +For start in 1...len(text)\-width: +.in +2 +For prod in grammar.productions: +.in +2 +For each sequence of subtrees [t[1], t[2], ..., t[n]] in MLC, +.in +2 +where t[i].label()==prod.rhs[i], +and the sequence covers [start:start+width]: +.in +2 +old_p = MLC[start, start+width, prod.lhs] +new_p = P(t[1])P(t[1])...P(t[n])P(prod) +if new_p > old_p: +.in +2 +new_tree = Tree(prod.lhs, t[1], t[2], ..., t[n]) +MLC[start, start+width, prod.lhs] = new_tree +.in -2 +.in -2 +.in -2 +.in -2 +.in -2 +.in -2 +Return MLC[0, len(text), start_symbol] +.fi +.sp +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB_grammar\fP \-\- The grammar used to parse sentences. +.IP \(bu 2 +\fB_trace\fP \-\- The level of tracing output that should be generated +when parsing a text. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B grammar() +.INDENT 7.0 +.TP +.B Returns +The grammar used by this parser. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse(tokens) +.INDENT 7.0 +.TP +.B Returns +An iterator that generates parse trees for the sentence. +.UNINDENT +.sp +When possible this list is sorted from most likely to least likely. +.INDENT 7.0 +.TP +.B Parameters +\fBsent\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The sentence to be parsed +.TP +.B Return type +iter(Tree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B trace(trace=2) +Set the level of tracing output that should be generated when +parsing a text. +.INDENT 7.0 +.TP +.B Parameters +\fBtrace\fP (\fIint\fP) \-\- The trace level. A trace level of \fB0\fP will +generate no tracing output; and higher trace levels will +produce more verbose tracing output. +.TP +.B Return type +None +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.parse.viterbi.demo() +A demonstration of the probabilistic parsers. The user is +prompted to select which demo to run, and how many parses should +be found; and then each parser is run on the same demo, and a +summary of the results are displayed. +.UNINDENT +.SS Module contents +.sp +NLTK Parsers +.sp +Classes and interfaces for producing tree structures that represent +the internal organization of a text. This task is known as "parsing" +the text, and the resulting tree structures are called the text\(aqs +"parses". Typically, the text is a single sentence, and the tree +structure represents the syntactic structure of the sentence. +However, parsers can also be used in other domains. For example, +parsers can be used to derive the morphological structure of the +morphemes that make up a word, or to derive the discourse structure +for a set of utterances. +.sp +Sometimes, a single piece of text can be represented by more than one +tree structure. Texts represented by more than one tree structure are +called "ambiguous" texts. Note that there are actually two ways in +which a text can be ambiguous: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +The text has multiple correct parses. +.IP \(bu 2 +There is not enough information to decide which of several +candidate parses is correct. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +However, the parser module does \fInot\fP distinguish these two types of +ambiguity. +.sp +The parser module defines \fBParserI\fP, a standard interface for parsing +texts; and two simple implementations of that interface, +\fBShiftReduceParser\fP and \fBRecursiveDescentParser\fP\&. It also contains +three sub\-modules for specialized kinds of parsing: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fBnltk.parser.chart\fP defines chart parsing, which uses dynamic +programming to efficiently parse texts. +.IP \(bu 2 +\fBnltk.parser.probabilistic\fP defines probabilistic parsing, which +associates a probability with each parse. +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.sem package +.SS Submodules +.SS nltk.sem.boxer module +.sp +An interface to Boxer. +.sp +This interface relies on the latest version of the development (subversion) version of +C&C and Boxer. +.INDENT 0.0 +.TP +.B Usage: +Set the environment variable CANDC to the bin directory of your CandC installation. +The models directory should be in the CandC root directory. +For example: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.TP +.B /path/to/candc/ +.INDENT 7.0 +.TP +.B bin/ +candc +boxer +.TP +.B models/ +boxer/ +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.AbstractBoxerDrs +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B atoms() +.UNINDENT +.INDENT 7.0 +.TP +.B clean() +.UNINDENT +.INDENT 7.0 +.TP +.B renumber_sentences(f) +.UNINDENT +.INDENT 7.0 +.TP +.B variable_types() +.UNINDENT +.INDENT 7.0 +.TP +.B variables() +.INDENT 7.0 +.TP +.B Returns +(set, set, set) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.Boxer(boxer_drs_interpreter=None, elimeq=False, bin_dir=None, verbose=False, resolve=True) +Bases: \fBobject\fP +.sp +This class is an interface to Johan Bos\(aqs program Boxer, a wide\-coverage +semantic parser that produces Discourse Representation Structures (DRSs). +.INDENT 7.0 +.TP +.B interpret(input, discourse_id=None, question=False, verbose=False) +Use Boxer to give a first order representation. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBinput\fP \-\- str Input sentence to parse +.IP \(bu 2 +\fBoccur_index\fP \-\- bool Should predicates be occurrence indexed? +.IP \(bu 2 +\fBdiscourse_id\fP \-\- str An identifier to be inserted to each occurrence\-indexed predicate. +.UNINDENT +.TP +.B Returns +\fBdrt.DrtExpression\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B interpret_multi(input, discourse_id=None, question=False, verbose=False) +Use Boxer to give a first order representation. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBinput\fP \-\- list of str Input sentences to parse as a single discourse +.IP \(bu 2 +\fBoccur_index\fP \-\- bool Should predicates be occurrence indexed? +.IP \(bu 2 +\fBdiscourse_id\fP \-\- str An identifier to be inserted to each occurrence\-indexed predicate. +.UNINDENT +.TP +.B Returns +\fBdrt.DrtExpression\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B interpret_multi_sents(inputs, discourse_ids=None, question=False, verbose=False) +Use Boxer to give a first order representation. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBinputs\fP \-\- list of list of str Input discourses to parse +.IP \(bu 2 +\fBoccur_index\fP \-\- bool Should predicates be occurrence indexed? +.IP \(bu 2 +\fBdiscourse_ids\fP \-\- list of str Identifiers to be inserted to each occurrence\-indexed predicate. +.UNINDENT +.TP +.B Returns +\fBdrt.DrtExpression\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B interpret_sents(inputs, discourse_ids=None, question=False, verbose=False) +Use Boxer to give a first order representation. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBinputs\fP \-\- list of str Input sentences to parse as individual discourses +.IP \(bu 2 +\fBoccur_index\fP \-\- bool Should predicates be occurrence indexed? +.IP \(bu 2 +\fBdiscourse_ids\fP \-\- list of str Identifiers to be inserted to each occurrence\-indexed predicate. +.UNINDENT +.TP +.B Returns +list of \fBdrt.DrtExpression\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B set_bin_dir(bin_dir, verbose=False) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.BoxerCard(discourse_id, sent_index, word_indices, var, value, type) +Bases: \fI\%nltk.sem.boxer.BoxerIndexed\fP +.INDENT 7.0 +.TP +.B renumber_sentences(f) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.BoxerDrs(refs, conds, consequent=None) +Bases: \fI\%nltk.sem.boxer.AbstractBoxerDrs\fP +.INDENT 7.0 +.TP +.B atoms() +.UNINDENT +.INDENT 7.0 +.TP +.B clean() +.UNINDENT +.INDENT 7.0 +.TP +.B renumber_sentences(f) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.BoxerDrsParser(discourse_id=None) +Bases: \fI\%nltk.sem.drt.DrtParser\fP +.sp +Reparse the str form of subclasses of \fBAbstractBoxerDrs\fP +.INDENT 7.0 +.TP +.B attempt_adjuncts(expression, context) +.UNINDENT +.INDENT 7.0 +.TP +.B get_all_symbols() +This method exists to be overridden +.UNINDENT +.INDENT 7.0 +.TP +.B get_next_token_variable(description) +.UNINDENT +.INDENT 7.0 +.TP +.B handle(tok, context) +This method is intended to be overridden for logics that +use different operators or expressions +.UNINDENT +.INDENT 7.0 +.TP +.B nullableIntToken() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.BoxerEq(discourse_id, sent_index, word_indices, var1, var2) +Bases: \fI\%nltk.sem.boxer.BoxerIndexed\fP +.INDENT 7.0 +.TP +.B atoms() +.UNINDENT +.INDENT 7.0 +.TP +.B renumber_sentences(f) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.BoxerIndexed(discourse_id, sent_index, word_indices) +Bases: \fI\%nltk.sem.boxer.AbstractBoxerDrs\fP +.INDENT 7.0 +.TP +.B atoms() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.BoxerNamed(discourse_id, sent_index, word_indices, var, name, type, sense) +Bases: \fI\%nltk.sem.boxer.BoxerIndexed\fP +.INDENT 7.0 +.TP +.B change_var(var) +.UNINDENT +.INDENT 7.0 +.TP +.B clean() +.UNINDENT +.INDENT 7.0 +.TP +.B renumber_sentences(f) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.BoxerNot(drs) +Bases: \fI\%nltk.sem.boxer.AbstractBoxerDrs\fP +.INDENT 7.0 +.TP +.B atoms() +.UNINDENT +.INDENT 7.0 +.TP +.B clean() +.UNINDENT +.INDENT 7.0 +.TP +.B renumber_sentences(f) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.BoxerOr(discourse_id, sent_index, word_indices, drs1, drs2) +Bases: \fI\%nltk.sem.boxer.BoxerIndexed\fP +.INDENT 7.0 +.TP +.B atoms() +.UNINDENT +.INDENT 7.0 +.TP +.B clean() +.UNINDENT +.INDENT 7.0 +.TP +.B renumber_sentences(f) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.BoxerOutputDrsParser(discourse_id=None) +Bases: \fI\%nltk.sem.drt.DrtParser\fP +.INDENT 7.0 +.TP +.B attempt_adjuncts(expression, context) +.UNINDENT +.INDENT 7.0 +.TP +.B get_all_symbols() +This method exists to be overridden +.UNINDENT +.INDENT 7.0 +.TP +.B handle(tok, context) +This method is intended to be overridden for logics that +use different operators or expressions +.UNINDENT +.INDENT 7.0 +.TP +.B handle_condition(tok, indices) +Handle a DRS condition +.INDENT 7.0 +.TP +.B Parameters +\fBindices\fP \-\- list of int +.TP +.B Returns +list of \fBDrtExpression\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B handle_drs(tok) +.UNINDENT +.INDENT 7.0 +.TP +.B parse(data, signature=None) +Parse the expression. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBdata\fP \-\- str for the input to be parsed +.IP \(bu 2 +\fBsignature\fP \-\- \fBdict\fP that maps variable names to type +.UNINDENT +.UNINDENT +.sp +strings +:returns: a parsed Expression +.UNINDENT +.INDENT 7.0 +.TP +.B parse_condition(indices) +Parse a DRS condition +.INDENT 7.0 +.TP +.B Returns +list of \fBDrtExpression\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parse_drs() +.UNINDENT +.INDENT 7.0 +.TP +.B parse_index() +.UNINDENT +.INDENT 7.0 +.TP +.B parse_variable() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.BoxerPred(discourse_id, sent_index, word_indices, var, name, pos, sense) +Bases: \fI\%nltk.sem.boxer.BoxerIndexed\fP +.INDENT 7.0 +.TP +.B change_var(var) +.UNINDENT +.INDENT 7.0 +.TP +.B clean() +.UNINDENT +.INDENT 7.0 +.TP +.B renumber_sentences(f) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.BoxerProp(discourse_id, sent_index, word_indices, var, drs) +Bases: \fI\%nltk.sem.boxer.BoxerIndexed\fP +.INDENT 7.0 +.TP +.B atoms() +.UNINDENT +.INDENT 7.0 +.TP +.B clean() +.UNINDENT +.INDENT 7.0 +.TP +.B referenced_labels() +.UNINDENT +.INDENT 7.0 +.TP +.B renumber_sentences(f) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.BoxerRel(discourse_id, sent_index, word_indices, var1, var2, rel, sense) +Bases: \fI\%nltk.sem.boxer.BoxerIndexed\fP +.INDENT 7.0 +.TP +.B clean() +.UNINDENT +.INDENT 7.0 +.TP +.B renumber_sentences(f) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.BoxerWhq(discourse_id, sent_index, word_indices, ans_types, drs1, variable, drs2) +Bases: \fI\%nltk.sem.boxer.BoxerIndexed\fP +.INDENT 7.0 +.TP +.B atoms() +.UNINDENT +.INDENT 7.0 +.TP +.B clean() +.UNINDENT +.INDENT 7.0 +.TP +.B renumber_sentences(f) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.NltkDrtBoxerDrsInterpreter(occur_index=False) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B interpret(ex) +.INDENT 7.0 +.TP +.B Parameters +\fBex\fP \-\- \fBAbstractBoxerDrs\fP +.TP +.B Returns +\fBDrtExpression\fP +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.boxer.PassthroughBoxerDrsInterpreter +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B interpret(ex) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.sem.boxer.UnparseableInputException +Bases: \fBException\fP +.UNINDENT +.SS nltk.sem.chat80 module +.SS Overview +.sp +Chat\-80 was a natural language system which allowed the user to +interrogate a Prolog knowledge base in the domain of world +geography. It was developed in the early \(aq80s by Warren and Pereira; see +\fBhttp://www.aclweb.org/anthology/J82\-3002.pdf\fP for a description and +\fBhttp://www.cis.upenn.edu/~pereira/oldies.html\fP for the source +files. +.sp +This module contains functions to extract data from the Chat\-80 +relation files (\(aqthe world database\(aq), and convert then into a format +that can be incorporated in the FOL models of +\fBnltk.sem.evaluate\fP\&. The code assumes that the Prolog +input files are available in the NLTK corpora directory. +.sp +The Chat\-80 World Database consists of the following files: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +world0.pl +rivers.pl +cities.pl +countries.pl +contain.pl +borders.pl +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +This module uses a slightly modified version of \fBworld0.pl\fP, in which +a set of Prolog rules have been omitted. The modified file is named +\fBworld1.pl\fP\&. Currently, the file \fBrivers.pl\fP is not read in, since +it uses a list rather than a string in the second field. +.SS Reading Chat\-80 Files +.sp +Chat\-80 relations are like tables in a relational database. The +relation acts as the name of the table; the first argument acts as the +\(aqprimary key\(aq; and subsequent arguments are further fields in the +table. In general, the name of the table provides a label for a unary +predicate whose extension is all the primary keys. For example, +relations in \fBcities.pl\fP are of the following form: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +\(aqcity(athens,greece,1368).\(aq +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Here, \fB\(aqathens\(aq\fP is the key, and will be mapped to a member of the +unary predicate \fIcity\fP\&. +.sp +The fields in the table are mapped to binary predicates. The first +argument of the predicate is the primary key, while the second +argument is the data in the relevant field. Thus, in the above +example, the third field is mapped to the binary predicate +\fIpopulation_of\fP, whose extension is a set of pairs such as +\fB\(aq(athens, 1368)\(aq\fP\&. +.sp +An exception to this general framework is required by the relations in +the files \fBborders.pl\fP and \fBcontains.pl\fP\&. These contain facts of the +following form: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +\(aqborders(albania,greece).\(aq + +\(aqcontains0(africa,central_africa).\(aq +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +We do not want to form a unary concept out the element in +the first field of these records, and we want the label of the binary +relation just to be \fB\(aqborder\(aq\fP/\fB\(aqcontain\(aq\fP respectively. +.sp +In order to drive the extraction process, we use \(aqrelation metadata bundles\(aq +which are Python dictionaries such as the following: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +city = {\(aqlabel\(aq: \(aqcity\(aq, + \(aqclosures\(aq: [], + \(aqschema\(aq: [\(aqcity\(aq, \(aqcountry\(aq, \(aqpopulation\(aq], + \(aqfilename\(aq: \(aqcities.pl\(aq} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +According to this, the file \fBcity[\(aqfilename\(aq]\fP contains a list of +relational tuples (or more accurately, the corresponding strings in +Prolog form) whose predicate symbol is \fBcity[\(aqlabel\(aq]\fP and whose +relational schema is \fBcity[\(aqschema\(aq]\fP\&. The notion of a \fBclosure\fP is +discussed in the next section. +.SS Concepts +.sp +In order to encapsulate the results of the extraction, a class of +\fBConcept\fP objects is introduced. A \fBConcept\fP object has a number of +attributes, in particular a \fBprefLabel\fP and \fBextension\fP, which make +it easier to inspect the output of the extraction. In addition, the +\fBextension\fP can be further processed: in the case of the \fB\(aqborder\(aq\fP +relation, we check that the relation is symmetric, and in the case +of the \fB\(aqcontain\(aq\fP relation, we carry out the transitive +closure. The closure properties associated with a concept is +indicated in the relation metadata, as indicated earlier. +.sp +The \fBextension\fP of a \fBConcept\fP object is then incorporated into a +\fBValuation\fP object. +.SS Persistence +.sp +The functions \fBval_dump\fP and \fBval_load\fP are provided to allow a +valuation to be stored in a persistent database and re\-loaded, rather +than having to be re\-computed each time. +.SS Individuals and Lexical Items +.sp +As well as deriving relations from the Chat\-80 data, we also create a +set of individual constants, one for each entity in the domain. The +individual constants are string\-identical to the entities. For +example, given a data item such as \fB\(aqzloty\(aq\fP, we add to the valuation +a pair \fB(\(aqzloty\(aq, \(aqzloty\(aq)\fP\&. In order to parse English sentences that +refer to these entities, we also create a lexical item such as the +following for each individual constant: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +PropN[num=sg, sem=<\eP.(P zloty)>] \-> \(aqZloty\(aq +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The set of rules is written to the file \fBchat_pnames.cfg\fP in the +current directory. +.INDENT 0.0 +.TP +.B class nltk.sem.chat80.Concept(prefLabel, arity, altLabels=[], closures=[], extension={}) +Bases: \fBobject\fP +.sp +A Concept class, loosely based on SKOS +(\fI\%http://www.w3.org/TR/swbp\-skos\-core\-guide/\fP). +.INDENT 7.0 +.TP +.B augment(data) +Add more data to the \fBConcept\fP\(aqs extension set. +.INDENT 7.0 +.TP +.B Parameters +\fBdata\fP (\fIstring\fP\fI or \fP\fIpair of strings\fP) \-\- a new semantic value +.TP +.B Return type +set +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B close() +Close a binary relation in the \fBConcept\fP\(aqs extension set. +.INDENT 7.0 +.TP +.B Returns +a new extension for the \fBConcept\fP in which the +relation is closed under a given property +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.chat80.binary_concept(label, closures, subj, obj, records) +Make a binary concept out of the primary key and another field in a record. +.sp +A record is a list of entities in some relation, such as +\fB[\(aqfrance\(aq, \(aqparis\(aq]\fP, where \fB\(aqfrance\(aq\fP is acting as the primary +key, and \fB\(aqparis\(aq\fP stands in the \fB\(aqcapital_of\(aq\fP relation to +\fB\(aqfrance\(aq\fP\&. +.sp +More generally, given a record such as \fB[\(aqa\(aq, \(aqb\(aq, \(aqc\(aq]\fP, where +label is bound to \fB\(aqB\(aq\fP, and \fBobj\fP bound to 1, the derived +binary concept will have label \fB\(aqB_of\(aq\fP, and its extension will +be a set of pairs such as \fB(\(aqa\(aq, \(aqb\(aq)\fP\&. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBlabel\fP (\fIstr\fP) \-\- the base part of the preferred label for the concept +.IP \(bu 2 +\fBclosures\fP (\fIlist\fP) \-\- closure properties for the extension of the concept +.IP \(bu 2 +\fBsubj\fP (\fIint\fP) \-\- position in the record of the subject of the predicate +.IP \(bu 2 +\fBobj\fP (\fIint\fP) \-\- position in the record of the object of the predicate +.IP \(bu 2 +\fBrecords\fP (\fIlist of lists\fP) \-\- a list of records +.UNINDENT +.TP +.B Returns +\fBConcept\fP of arity 2 +.TP +.B Return type +Concept +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.chat80.cities2table(filename, rel_name, dbname, verbose=False, setup=False) +Convert a file of Prolog clauses into a database table. +.sp +This is not generic, since it doesn\(aqt allow arbitrary +schemas to be set as a parameter. +.sp +Intended usage: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +cities2table(\(aqcities.pl\(aq, \(aqcity\(aq, \(aqcity.db\(aq, verbose=True, setup=True) +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfilename\fP (\fIstr\fP) \-\- filename containing the relations +.IP \(bu 2 +\fBrel_name\fP (\fIstr\fP) \-\- name of the relation +.IP \(bu 2 +\fBdbname\fP \-\- filename of persistent store +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.chat80.clause2concepts(filename, rel_name, schema, closures=[]) +Convert a file of Prolog clauses into a list of \fBConcept\fP objects. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfilename\fP (\fIstr\fP) \-\- filename containing the relations +.IP \(bu 2 +\fBrel_name\fP (\fIstr\fP) \-\- name of the relation +.IP \(bu 2 +\fBschema\fP (\fIlist\fP) \-\- the schema used in a set of relational tuples +.IP \(bu 2 +\fBclosures\fP (\fIlist\fP) \-\- closure properties for the extension of the concept +.UNINDENT +.TP +.B Returns +a list of \fBConcept\fP objects +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.chat80.concepts(items=(\(aqborders\(aq, \(aqcircle_of_lat\(aq, \(aqcircle_of_long\(aq, \(aqcity\(aq, \(aqcontains\(aq, \(aqcontinent\(aq, \(aqcountry\(aq, \(aqocean\(aq, \(aqregion\(aq, \(aqsea\(aq)) +Build a list of concepts corresponding to the relation names in \fBitems\fP\&. +.INDENT 7.0 +.TP +.B Parameters +\fBitems\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- names of the Chat\-80 relations to extract +.TP +.B Returns +the \fBConcept\fP objects which are extracted from the relations +.TP +.B Return type +list(Concept) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.chat80.label_indivs(valuation, lexicon=False) +Assign individual constants to the individuals in the domain of a \fBValuation\fP\&. +.sp +Given a valuation with an entry of the form \fB{\(aqrel\(aq: {\(aqa\(aq: True}}\fP, +add a new entry \fB{\(aqa\(aq: \(aqa\(aq}\fP\&. +.INDENT 7.0 +.TP +.B Return type +Valuation +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.chat80.main() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.chat80.make_lex(symbols) +Create lexical CFG rules for each individual symbol. +.sp +Given a valuation with an entry of the form \fB{\(aqzloty\(aq: \(aqzloty\(aq}\fP, +create a lexical rule for the proper name \(aqZloty\(aq. +.INDENT 7.0 +.TP +.B Parameters +\fBsymbols\fP (\fIsequence \-\- set\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- a list of individual constants in the semantic representation +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.chat80.make_valuation(concepts, read=False, lexicon=False) +Convert a list of \fBConcept\fP objects into a list of (label, extension) pairs; +optionally create a \fBValuation\fP object. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBconcepts\fP (\fIlist\fP\fI(\fP\fIConcept\fP\fI)\fP) \-\- concepts +.IP \(bu 2 +\fBread\fP (\fIbool\fP) \-\- if \fBTrue\fP, \fB(symbol, set)\fP pairs are read into a \fBValuation\fP +.UNINDENT +.TP +.B Return type +list or Valuation +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.chat80.process_bundle(rels) +Given a list of relation metadata bundles, make a corresponding +dictionary of concepts, indexed by the relation name. +.INDENT 7.0 +.TP +.B Parameters +\fBrels\fP (\fIlist\fP\fI(\fP\fIdict\fP\fI)\fP) \-\- bundle of metadata needed for constructing a concept +.TP +.B Returns +a dictionary of concepts, indexed by the relation name. +.TP +.B Return type +dict(str): Concept +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.chat80.sql_demo() +Print out every row from the \(aqcity.db\(aq database. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.chat80.sql_query(dbname, query) +Execute an SQL query over a database. +:param dbname: filename of persistent store +:type schema: str +:param query: SQL query +:type rel_name: str +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.chat80.unary_concept(label, subj, records) +Make a unary concept out of the primary key in a record. +.sp +A record is a list of entities in some relation, such as +\fB[\(aqfrance\(aq, \(aqparis\(aq]\fP, where \fB\(aqfrance\(aq\fP is acting as the primary +key. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBlabel\fP (\fIstring\fP) \-\- the preferred label for the concept +.IP \(bu 2 +\fBsubj\fP (\fIint\fP) \-\- position in the record of the subject of the predicate +.IP \(bu 2 +\fBrecords\fP (\fIlist of lists\fP) \-\- a list of records +.UNINDENT +.TP +.B Returns +\fBConcept\fP of arity 1 +.TP +.B Return type +Concept +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.chat80.val_dump(rels, db) +Make a \fBValuation\fP from a list of relation metadata bundles and dump to +persistent database. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBrels\fP (\fIlist of dict\fP) \-\- bundle of metadata needed for constructing a concept +.IP \(bu 2 +\fBdb\fP (\fIstr\fP) \-\- name of file to which data is written. +The suffix \(aq.db\(aq will be automatically appended. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.chat80.val_load(db) +Load a \fBValuation\fP from a persistent database. +.INDENT 7.0 +.TP +.B Parameters +\fBdb\fP (\fIstr\fP) \-\- name of file from which data is read. +The suffix \(aq.db\(aq should be omitted from the name. +.UNINDENT +.UNINDENT +.SS nltk.sem.cooper_storage module +.INDENT 0.0 +.TP +.B class nltk.sem.cooper_storage.CooperStore(featstruct) +Bases: \fBobject\fP +.sp +A container for handling quantifier ambiguity via Cooper storage. +.INDENT 7.0 +.TP +.B s_retrieve(trace=False) +Carry out S\-Retrieval of binding operators in store. If hack=True, +serialize the bindop and core as strings and reparse. Ugh. +.sp +Each permutation of the store (i.e. list of binding operators) is +taken to be a possible scoping of quantifiers. We iterate through the +binding operators in each permutation, and successively apply them to +the current term, starting with the core semantic representation, +working from the inside out. +.sp +Binding operators are of the form: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +bo(\eP.all x.(man(x) \-> P(x)),z1) +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.cooper_storage.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.cooper_storage.parse_with_bindops(sentence, grammar=None, trace=0) +Use a grammar with Binding Operators to parse a sentence. +.UNINDENT +.SS nltk.sem.drt module +.INDENT 0.0 +.TP +.B exception nltk.sem.drt.AnaphoraResolutionException +Bases: \fBException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DRS(refs, conds, consequent=None) +Bases: \fI\%nltk.sem.drt.DrtExpression\fP, \fI\%nltk.sem.logic.Expression\fP +.sp +A Discourse Representation Structure. +.INDENT 7.0 +.TP +.B eliminate_equality() +.UNINDENT +.INDENT 7.0 +.TP +.B fol() +.UNINDENT +.INDENT 7.0 +.TP +.B free() +.INDENT 7.0 +.TP +.B See +Expression.free() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B get_refs(recursive=False) +.INDENT 7.0 +.TP +.B See +AbstractExpression.get_refs() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B replace(variable, expression, replace_bound=False, alpha_convert=True) +Replace all instances of variable v with expression E in self, +where v is free in self. +.UNINDENT +.INDENT 7.0 +.TP +.B visit(function, combinator) +.INDENT 7.0 +.TP +.B See +Expression.visit() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B visit_structured(function, combinator) +.INDENT 7.0 +.TP +.B See +Expression.visit_structured() +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrsDrawer(drs, size_canvas=True, canvas=None) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B BUFFER = 3 +.UNINDENT +.INDENT 7.0 +.TP +.B OUTERSPACE = 6 +.UNINDENT +.INDENT 7.0 +.TP +.B TOPSPACE = 10 +.UNINDENT +.INDENT 7.0 +.TP +.B draw(x=6, y=10) +Draw the DRS +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtAbstractVariableExpression(variable) +Bases: \fI\%nltk.sem.drt.DrtExpression\fP, \fI\%nltk.sem.logic.AbstractVariableExpression\fP +.INDENT 7.0 +.TP +.B eliminate_equality() +.UNINDENT +.INDENT 7.0 +.TP +.B fol() +.UNINDENT +.INDENT 7.0 +.TP +.B get_refs(recursive=False) +.INDENT 7.0 +.TP +.B See +AbstractExpression.get_refs() +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtApplicationExpression(function, argument) +Bases: \fI\%nltk.sem.drt.DrtExpression\fP, \fI\%nltk.sem.logic.ApplicationExpression\fP +.INDENT 7.0 +.TP +.B fol() +.UNINDENT +.INDENT 7.0 +.TP +.B get_refs(recursive=False) +.INDENT 7.0 +.TP +.B See +AbstractExpression.get_refs() +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtBinaryExpression(first, second) +Bases: \fI\%nltk.sem.drt.DrtExpression\fP, \fI\%nltk.sem.logic.BinaryExpression\fP +.INDENT 7.0 +.TP +.B get_refs(recursive=False) +.INDENT 7.0 +.TP +.B See +AbstractExpression.get_refs() +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtBooleanExpression(first, second) +Bases: \fI\%nltk.sem.drt.DrtBinaryExpression\fP, \fI\%nltk.sem.logic.BooleanExpression\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtConcatenation(first, second, consequent=None) +Bases: \fI\%nltk.sem.drt.DrtBooleanExpression\fP +.sp +DRS of the form \(aq(DRS + DRS)\(aq +.INDENT 7.0 +.TP +.B eliminate_equality() +.UNINDENT +.INDENT 7.0 +.TP +.B fol() +.UNINDENT +.INDENT 7.0 +.TP +.B getOp() +.UNINDENT +.INDENT 7.0 +.TP +.B get_refs(recursive=False) +.INDENT 7.0 +.TP +.B See +AbstractExpression.get_refs() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B replace(variable, expression, replace_bound=False, alpha_convert=True) +Replace all instances of variable v with expression E in self, +where v is free in self. +.UNINDENT +.INDENT 7.0 +.TP +.B simplify() +.INDENT 7.0 +.TP +.B Returns +beta\-converted version of this expression +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B visit(function, combinator) +.INDENT 7.0 +.TP +.B See +Expression.visit() +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtConstantExpression(variable) +Bases: \fI\%nltk.sem.drt.DrtAbstractVariableExpression\fP, \fI\%nltk.sem.logic.ConstantExpression\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtEqualityExpression(first, second) +Bases: \fI\%nltk.sem.drt.DrtBinaryExpression\fP, \fI\%nltk.sem.logic.EqualityExpression\fP +.INDENT 7.0 +.TP +.B fol() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtEventVariableExpression(variable) +Bases: \fI\%nltk.sem.drt.DrtIndividualVariableExpression\fP, \fI\%nltk.sem.logic.EventVariableExpression\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtExpression +Bases: \fBobject\fP +.sp +This is the base abstract DRT Expression from which every DRT +Expression extends. +.INDENT 7.0 +.TP +.B applyto(other) +.UNINDENT +.INDENT 7.0 +.TP +.B draw() +.UNINDENT +.INDENT 7.0 +.TP +.B eliminate_equality() +.UNINDENT +.INDENT 7.0 +.TP +.B equiv(other, prover=None) +Check for logical equivalence. +Pass the expression (self <\-> other) to the theorem prover. +If the prover says it is valid, then the self and other are equal. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP \-\- an \fBDrtExpression\fP to check equality against +.IP \(bu 2 +\fBprover\fP \-\- a \fBnltk.inference.api.Prover\fP +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod fromstring(s) +.UNINDENT +.INDENT 7.0 +.TP +.B get_refs(recursive=False) +Return the set of discourse referents in this DRS. +:param recursive: bool Also find discourse referents in subterms? +:return: list of \fBVariable\fP objects +.UNINDENT +.INDENT 7.0 +.TP +.B is_pronoun_function() +Is self of the form "PRO(x)"? +.UNINDENT +.INDENT 7.0 +.TP +.B make_EqualityExpression(first, second) +.UNINDENT +.INDENT 7.0 +.TP +.B make_VariableExpression(variable) +.UNINDENT +.INDENT 7.0 +.TP +.B pretty_format() +Draw the DRS +:return: the pretty print string +.UNINDENT +.INDENT 7.0 +.TP +.B pretty_print() +.UNINDENT +.INDENT 7.0 +.TP +.B resolve_anaphora() +.UNINDENT +.INDENT 7.0 +.TP +.B property type +.UNINDENT +.INDENT 7.0 +.TP +.B typecheck(signature=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtFunctionVariableExpression(variable) +Bases: \fI\%nltk.sem.drt.DrtAbstractVariableExpression\fP, \fI\%nltk.sem.logic.FunctionVariableExpression\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtIndividualVariableExpression(variable) +Bases: \fI\%nltk.sem.drt.DrtAbstractVariableExpression\fP, \fI\%nltk.sem.logic.IndividualVariableExpression\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtLambdaExpression(variable, term) +Bases: \fI\%nltk.sem.drt.DrtExpression\fP, \fI\%nltk.sem.logic.LambdaExpression\fP +.INDENT 7.0 +.TP +.B alpha_convert(newvar) +Rename all occurrences of the variable introduced by this variable +binder in the expression to \fBnewvar\fP\&. +:param newvar: \fBVariable\fP, for the new variable +.UNINDENT +.INDENT 7.0 +.TP +.B fol() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtNegatedExpression(term) +Bases: \fI\%nltk.sem.drt.DrtExpression\fP, \fI\%nltk.sem.logic.NegatedExpression\fP +.INDENT 7.0 +.TP +.B fol() +.UNINDENT +.INDENT 7.0 +.TP +.B get_refs(recursive=False) +.INDENT 7.0 +.TP +.B See +AbstractExpression.get_refs() +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtOrExpression(first, second) +Bases: \fI\%nltk.sem.drt.DrtBooleanExpression\fP, \fI\%nltk.sem.logic.OrExpression\fP +.INDENT 7.0 +.TP +.B fol() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtParser +Bases: \fI\%nltk.sem.logic.LogicParser\fP +.sp +A lambda calculus expression parser. +.INDENT 7.0 +.TP +.B get_BooleanExpression_factory(tok) +This method serves as a hook for other logic parsers that +have different boolean operators +.UNINDENT +.INDENT 7.0 +.TP +.B get_all_symbols() +This method exists to be overridden +.UNINDENT +.INDENT 7.0 +.TP +.B handle(tok, context) +This method is intended to be overridden for logics that +use different operators or expressions +.UNINDENT +.INDENT 7.0 +.TP +.B handle_DRS(tok, context) +.UNINDENT +.INDENT 7.0 +.TP +.B handle_conds(context) +.UNINDENT +.INDENT 7.0 +.TP +.B handle_prop(tok, context) +.UNINDENT +.INDENT 7.0 +.TP +.B handle_refs() +.UNINDENT +.INDENT 7.0 +.TP +.B isvariable(tok) +.UNINDENT +.INDENT 7.0 +.TP +.B make_ApplicationExpression(function, argument) +.UNINDENT +.INDENT 7.0 +.TP +.B make_BooleanExpression(factory, first, second) +.UNINDENT +.INDENT 7.0 +.TP +.B make_EqualityExpression(first, second) +This method serves as a hook for other logic parsers that +have different equality expression classes +.UNINDENT +.INDENT 7.0 +.TP +.B make_LambdaExpression(variables, term) +.UNINDENT +.INDENT 7.0 +.TP +.B make_NegatedExpression(expression) +.UNINDENT +.INDENT 7.0 +.TP +.B make_VariableExpression(name) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtProposition(variable, drs) +Bases: \fI\%nltk.sem.drt.DrtExpression\fP, \fI\%nltk.sem.logic.Expression\fP +.INDENT 7.0 +.TP +.B eliminate_equality() +.UNINDENT +.INDENT 7.0 +.TP +.B fol() +.UNINDENT +.INDENT 7.0 +.TP +.B get_refs(recursive=False) +Return the set of discourse referents in this DRS. +:param recursive: bool Also find discourse referents in subterms? +:return: list of \fBVariable\fP objects +.UNINDENT +.INDENT 7.0 +.TP +.B replace(variable, expression, replace_bound=False, alpha_convert=True) +Replace every instance of \(aqvariable\(aq with \(aqexpression\(aq +:param variable: \fBVariable\fP The variable to replace +:param expression: \fBExpression\fP The expression with which to replace it +:param replace_bound: bool Should bound variables be replaced? +:param alpha_convert: bool Alpha convert automatically to avoid name clashes? +.UNINDENT +.INDENT 7.0 +.TP +.B visit(function, combinator) +.INDENT 7.0 +.TP +.B See +Expression.visit() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B visit_structured(function, combinator) +.INDENT 7.0 +.TP +.B See +Expression.visit_structured() +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.DrtTokens +Bases: \fI\%nltk.sem.logic.Tokens\fP +.INDENT 7.0 +.TP +.B CLOSE_BRACKET = \(aq]\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B COLON = \(aq:\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B DRS = \(aqDRS\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B DRS_CONC = \(aq+\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B OPEN_BRACKET = \(aq[\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B PRONOUN = \(aqPRO\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B PUNCT = [\(aq+\(aq, \(aq[\(aq, \(aq]\(aq, \(aq:\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B SYMBOLS = [\(aq&\(aq, \(aq^\(aq, \(aq|\(aq, \(aq\->\(aq, \(aq=>\(aq, \(aq<\->\(aq, \(aq<=>\(aq, \(aq=\(aq, \(aq==\(aq, \(aq!=\(aq, \(aq\e\e\(aq, \(aq.\(aq, \(aq(\(aq, \(aq)\(aq, \(aq,\(aq, \(aq\-\(aq, \(aq!\(aq, \(aq+\(aq, \(aq[\(aq, \(aq]\(aq, \(aq:\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B TOKENS = [\(aqand\(aq, \(aq&\(aq, \(aq^\(aq, \(aqor\(aq, \(aq|\(aq, \(aqimplies\(aq, \(aq\->\(aq, \(aq=>\(aq, \(aqiff\(aq, \(aq<\->\(aq, \(aq<=>\(aq, \(aq=\(aq, \(aq==\(aq, \(aq!=\(aq, \(aqsome\(aq, \(aqexists\(aq, \(aqexist\(aq, \(aqall\(aq, \(aqforall\(aq, \(aq\e\e\(aq, \(aq.\(aq, \(aq(\(aq, \(aq)\(aq, \(aq,\(aq, \(aqnot\(aq, \(aq\-\(aq, \(aq!\(aq, \(aqDRS\(aq, \(aq+\(aq, \(aq[\(aq, \(aq]\(aq, \(aq:\(aq] +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.drt.DrtVariableExpression(variable) +This is a factory method that instantiates and returns a subtype of +\fBDrtAbstractVariableExpression\fP appropriate for the given variable. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt.PossibleAntecedents(iterable=(), /) +Bases: \fBlist\fP, \fI\%nltk.sem.drt.DrtExpression\fP, \fI\%nltk.sem.logic.Expression\fP +.INDENT 7.0 +.TP +.B free() +Set of free variables. +.UNINDENT +.INDENT 7.0 +.TP +.B replace(variable, expression, replace_bound=False, alpha_convert=True) +Replace all instances of variable v with expression E in self, +where v is free in self. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.drt.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.drt.resolve_anaphora(expression, trail=[]) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.drt.test_draw() +.UNINDENT +.SS nltk.sem.drt_glue_demo module +.INDENT 0.0 +.TP +.B class nltk.sem.drt_glue_demo.DrsWidget(canvas, drs, **attribs) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B clear() +.UNINDENT +.INDENT 7.0 +.TP +.B draw() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.drt_glue_demo.DrtGlueDemo(examples) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B about(*e) +.UNINDENT +.INDENT 7.0 +.TP +.B destroy(*e) +.UNINDENT +.INDENT 7.0 +.TP +.B mainloop(*args, **kwargs) +Enter the Tkinter mainloop. This function must be called if +this demo is created from a non\-interactive program (e.g. +from a secript); otherwise, the demo will close as soon as +the script completes. +.UNINDENT +.INDENT 7.0 +.TP +.B next(*e) +.UNINDENT +.INDENT 7.0 +.TP +.B postscript(*e) +.UNINDENT +.INDENT 7.0 +.TP +.B prev(*e) +.UNINDENT +.INDENT 7.0 +.TP +.B resize(size=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.drt_glue_demo.demo() +.UNINDENT +.SS nltk.sem.evaluate module +.sp +This module provides data structures for representing first\-order +models. +.INDENT 0.0 +.TP +.B class nltk.sem.evaluate.Assignment(domain, assign=None) +Bases: \fBdict\fP +.sp +A dictionary which represents an assignment of values to variables. +.sp +An assignment can only assign values from its domain. +.sp +If an unknown expression \fIa\fP is passed to a model \fIM\fP\(aqs +interpretation function \fIi\fP, \fIi\fP will first check whether \fIM\fP\(aqs +valuation assigns an interpretation to \fIa\fP as a constant, and if +this fails, \fIi\fP will delegate the interpretation of \fIa\fP to +\fIg\fP\&. \fIg\fP only assigns values to individual variables (i.e., +members of the class \fBIndividualVariableExpression\fP in the \fBlogic\fP +module. If a variable is not assigned a value by \fIg\fP, it will raise +an \fBUndefined\fP exception. +.sp +A variable \fIAssignment\fP is a mapping from individual variables to +entities in the domain. Individual variables are usually indicated +with the letters \fB\(aqx\(aq\fP, \fB\(aqy\(aq\fP, \fB\(aqw\(aq\fP and \fB\(aqz\(aq\fP, optionally +followed by an integer (e.g., \fB\(aqx0\(aq\fP, \fB\(aqy332\(aq\fP). Assignments are +created using the \fBAssignment\fP constructor, which also takes the +domain as a parameter. +.sp +.nf +.ft C +>>> from nltk.sem.evaluate import Assignment +>>> dom = set([\(aqu1\(aq, \(aqu2\(aq, \(aqu3\(aq, \(aqu4\(aq]) +>>> g3 = Assignment(dom, [(\(aqx\(aq, \(aqu1\(aq), (\(aqy\(aq, \(aqu2\(aq)]) +>>> g3 == {\(aqx\(aq: \(aqu1\(aq, \(aqy\(aq: \(aqu2\(aq} +True +.ft P +.fi +.sp +There is also a \fBprint\fP format for assignments which uses a notation +closer to that in logic textbooks: +.sp +.nf +.ft C +>>> print(g3) +g[u1/x][u2/y] +.ft P +.fi +.sp +It is also possible to update an assignment using the \fBadd\fP method: +.sp +.nf +.ft C +>>> dom = set([\(aqu1\(aq, \(aqu2\(aq, \(aqu3\(aq, \(aqu4\(aq]) +>>> g4 = Assignment(dom) +>>> g4.add(\(aqx\(aq, \(aqu1\(aq) +{\(aqx\(aq: \(aqu1\(aq} +.ft P +.fi +.sp +With no arguments, \fBpurge()\fP is equivalent to \fBclear()\fP on a dictionary: +.sp +.nf +.ft C +>>> g4.purge() +>>> g4 +{} +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBdomain\fP (\fIset\fP) \-\- the domain of discourse +.IP \(bu 2 +\fBassign\fP (\fIlist\fP) \-\- a list of (varname, value) associations +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B add(var, val) +Add a new variable\-value pair to the assignment, and update +\fBself.variant\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B copy() -> a shallow copy of D +.UNINDENT +.INDENT 7.0 +.TP +.B purge(var=None) +Remove one or all keys (i.e. logic variables) from an +assignment, and update \fBself.variant\fP\&. +.INDENT 7.0 +.TP +.B Parameters +\fBvar\fP \-\- a Variable acting as a key for the assignment. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.sem.evaluate.Error +Bases: \fBException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.evaluate.Model(domain, valuation) +Bases: \fBobject\fP +.sp +A first order model is a domain \fID\fP of discourse and a valuation \fIV\fP\&. +.sp +A domain \fID\fP is a set, and a valuation \fIV\fP is a map that associates +expressions with values in the model. +The domain of \fIV\fP should be a subset of \fID\fP\&. +.sp +Construct a new \fBModel\fP\&. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBdomain\fP (\fIset\fP) \-\- A set of entities representing the domain of discourse of the model. +.IP \(bu 2 +\fBvaluation\fP (\fIValuation\fP) \-\- the valuation of the model. +.IP \(bu 2 +\fBprop\fP \-\- If this is set, then we are building a propositional model and don\(aqt require the domain of \fIV\fP to be subset of \fID\fP\&. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B evaluate(expr, g, trace=None) +Read input expressions, and provide a handler for \fBsatisfy\fP +that blocks further propagation of the \fBUndefined\fP error. +:param expr: An \fBExpression\fP of \fBlogic\fP\&. +:type g: Assignment +:param g: an assignment to individual variables. +:rtype: bool or \(aqUndefined\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B i(parsed, g, trace=False) +An interpretation function. +.sp +Assuming that \fBparsed\fP is atomic: +.INDENT 7.0 +.IP \(bu 2 +if \fBparsed\fP is a non\-logical constant, calls the valuation \fIV\fP +.IP \(bu 2 +else if \fBparsed\fP is an individual variable, calls assignment \fIg\fP +.IP \(bu 2 +else returns \fBUndefined\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBparsed\fP \-\- an \fBExpression\fP of \fBlogic\fP\&. +.IP \(bu 2 +\fBg\fP (\fIAssignment\fP) \-\- an assignment to individual variables. +.UNINDENT +.TP +.B Returns +a semantic value +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B satisfiers(parsed, varex, g, trace=None, nesting=0) +Generate the entities from the model\(aqs domain that satisfy an open formula. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBparsed\fP (\fIExpression\fP) \-\- an open formula +.IP \(bu 2 +\fBvarex\fP (\fIVariableExpression\fP\fI or \fP\fIstr\fP) \-\- the relevant free individual variable in \fBparsed\fP\&. +.IP \(bu 2 +\fBg\fP (\fIAssignment\fP) \-\- a variable assignment +.UNINDENT +.TP +.B Returns +a set of the entities that satisfy \fBparsed\fP\&. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B satisfy(parsed, g, trace=None) +Recursive interpretation function for a formula of first\-order logic. +.sp +Raises an \fBUndefined\fP error when \fBparsed\fP is an atomic string +but is not a symbol or an individual variable. +.INDENT 7.0 +.TP +.B Returns +Returns a truth value or \fBUndefined\fP if \fBparsed\fP is complex, and calls the interpretation function \fBi\fP if \fBparsed\fP is atomic. +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBparsed\fP \-\- An expression of \fBlogic\fP\&. +.IP \(bu 2 +\fBg\fP (\fIAssignment\fP) \-\- an assignment to individual variables. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.sem.evaluate.Undefined +Bases: \fI\%nltk.sem.evaluate.Error\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.evaluate.Valuation(xs) +Bases: \fBdict\fP +.sp +A dictionary which represents a model\-theoretic Valuation of non\-logical constants. +Keys are strings representing the constants to be interpreted, and values correspond +to individuals (represented as strings) and n\-ary relations (represented as sets of tuples +of strings). +.sp +An instance of \fBValuation\fP will raise a KeyError exception (i.e., +just behave like a standard dictionary) if indexed with an expression that +is not in its list of symbols. +.INDENT 7.0 +.TP +.B property domain +Set\-theoretic domain of the value\-space of a Valuation. +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod fromstring(s) +.UNINDENT +.INDENT 7.0 +.TP +.B property symbols +The non\-logical constants which the Valuation recognizes. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.evaluate.arity(rel) +Check the arity of a relation. +:type rel: set of tuples +:rtype: int of tuple of str +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.evaluate.demo(num=0, trace=None) +Run exists demos. +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +num = 1: propositional logic demo +.IP \(bu 2 +num = 2: first order model demo (only if trace is set) +.IP \(bu 2 +num = 3: first order sentences demo +.IP \(bu 2 +num = 4: satisfaction of open formulas demo +.IP \(bu 2 +any other value: run all the demos +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +\fBtrace\fP \-\- trace = 1, or trace = 2 for more verbose tracing +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.evaluate.foldemo(trace=None) +Interpretation of closed expressions in a first\-order model. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.evaluate.folmodel(quiet=False, trace=None) +Example of a first\-order model. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.evaluate.is_rel(s) +Check whether a set represents a relation (of any arity). +.INDENT 7.0 +.TP +.B Parameters +\fBs\fP (\fIset\fP) \-\- a set containing tuples of str elements +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.evaluate.propdemo(trace=None) +Example of a propositional model. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.evaluate.read_valuation(s, encoding=None) +Convert a valuation string into a valuation. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBs\fP (\fIstr\fP) \-\- a valuation string +.IP \(bu 2 +\fBencoding\fP (\fIstr\fP) \-\- the encoding of the input string, if it is binary +.UNINDENT +.TP +.B Returns +a \fBnltk.sem\fP valuation +.TP +.B Return type +Valuation +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.evaluate.satdemo(trace=None) +Satisfiers of an open formula in a first order model. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.evaluate.set2rel(s) +Convert a set containing individuals (strings or numbers) into a set of +unary tuples. Any tuples of strings already in the set are passed through +unchanged. +.INDENT 7.0 +.TP +.B For example: +.INDENT 7.0 +.IP \(bu 2 +set([\(aqa\(aq, \(aqb\(aq]) => set([(\(aqa\(aq,), (\(aqb\(aq,)]) +.IP \(bu 2 +set([3, 27]) => set([(\(aq3\(aq,), (\(aq27\(aq,)]) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Return type +set of tuple of str +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.evaluate.trace(f, *args, **kw) +.UNINDENT +.SS nltk.sem.glue module +.INDENT 0.0 +.TP +.B class nltk.sem.glue.DrtGlue(semtype_file=None, remove_duplicates=False, depparser=None, verbose=False) +Bases: \fI\%nltk.sem.glue.Glue\fP +.INDENT 7.0 +.TP +.B get_glue_dict() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.glue.DrtGlueDict(filename, encoding=None) +Bases: \fI\%nltk.sem.glue.GlueDict\fP +.INDENT 7.0 +.TP +.B get_GlueFormula_factory() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.glue.DrtGlueFormula(meaning, glue, indices=None) +Bases: \fI\%nltk.sem.glue.GlueFormula\fP +.INDENT 7.0 +.TP +.B make_LambdaExpression(variable, term) +.UNINDENT +.INDENT 7.0 +.TP +.B make_VariableExpression(name) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.glue.Glue(semtype_file=None, remove_duplicates=False, depparser=None, verbose=False) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B dep_parse(sentence) +Return a dependency graph for the sentence. +.INDENT 7.0 +.TP +.B Parameters +\fBsentence\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- the sentence to be parsed +.TP +.B Return type +DependencyGraph +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B depgraph_to_glue(depgraph) +.UNINDENT +.INDENT 7.0 +.TP +.B get_glue_dict() +.UNINDENT +.INDENT 7.0 +.TP +.B get_pos_tagger() +.UNINDENT +.INDENT 7.0 +.TP +.B get_readings(agenda) +.UNINDENT +.INDENT 7.0 +.TP +.B gfl_to_compiled(gfl) +.UNINDENT +.INDENT 7.0 +.TP +.B parse_to_compiled(sentence) +.UNINDENT +.INDENT 7.0 +.TP +.B parse_to_meaning(sentence) +.UNINDENT +.INDENT 7.0 +.TP +.B train_depparser(depgraphs=None) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.glue.GlueDict(filename, encoding=None) +Bases: \fBdict\fP +.INDENT 7.0 +.TP +.B add_missing_dependencies(node, depgraph) +.UNINDENT +.INDENT 7.0 +.TP +.B find_label_name(name, node, depgraph, unique_index) +.UNINDENT +.INDENT 7.0 +.TP +.B get_GlueFormula_factory() +.UNINDENT +.INDENT 7.0 +.TP +.B get_glueformulas_from_semtype_entry(lookup, word, node, depgraph, counter) +.UNINDENT +.INDENT 7.0 +.TP +.B get_label(node) +Pick an alphabetic character as identifier for an entity in the model. +.INDENT 7.0 +.TP +.B Parameters +\fBvalue\fP (\fIint\fP) \-\- where to index into the list of characters +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B get_meaning_formula(generic, word) +.INDENT 7.0 +.TP +.B Parameters +\fBgeneric\fP \-\- A meaning formula string containing the +.UNINDENT +.sp +parameter "" +:param word: The actual word to be replace "" +.UNINDENT +.INDENT 7.0 +.TP +.B get_semtypes(node) +Based on the node, return a list of plausible semtypes in order of +plausibility. +.UNINDENT +.INDENT 7.0 +.TP +.B initialize_labels(expr, node, depgraph, unique_index) +.UNINDENT +.INDENT 7.0 +.TP +.B lookup(node, depgraph, counter) +.UNINDENT +.INDENT 7.0 +.TP +.B lookup_unique(rel, node, depgraph) +Lookup \(aqkey\(aq. There should be exactly one item in the associated relation. +.UNINDENT +.INDENT 7.0 +.TP +.B read_file(empty_first=True) +.UNINDENT +.INDENT 7.0 +.TP +.B to_glueformula_list(depgraph, node=None, counter=None, verbose=False) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.glue.GlueFormula(meaning, glue, indices=None) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B applyto(arg) +self = (x.(walk x), (subj \-o f)) +arg = (john , subj) +returns ((walk john), f) +.UNINDENT +.INDENT 7.0 +.TP +.B compile(counter=None) +From Iddo Lev\(aqs PhD Dissertation p108\-109 +.UNINDENT +.INDENT 7.0 +.TP +.B lambda_abstract(other) +.UNINDENT +.INDENT 7.0 +.TP +.B make_LambdaExpression(variable, term) +.UNINDENT +.INDENT 7.0 +.TP +.B make_VariableExpression(name) +.UNINDENT +.INDENT 7.0 +.TP +.B simplify() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.glue.demo(show_example=\- 1) +.UNINDENT +.SS nltk.sem.hole module +.sp +An implementation of the Hole Semantics model, following Blackburn and Bos, +Representation and Inference for Natural Language (CSLI, 2005). +.sp +The semantic representations are built by the grammar hole.fcfg. +This module contains driver code to read in sentences and parse them +according to a hole semantics grammar. +.sp +After parsing, the semantic representation is in the form of an underspecified +representation that is not easy to read. We use a "plugging" algorithm to +convert that representation into first\-order logic formulas. +.INDENT 0.0 +.TP +.B class nltk.sem.hole.Constants +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B ALL = \(aqALL\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B AND = \(aqAND\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B EXISTS = \(aqEXISTS\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B HOLE = \(aqHOLE\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B IFF = \(aqIFF\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B IMP = \(aqIMP\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B LABEL = \(aqLABEL\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B LEQ = \(aqLEQ\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B MAP = {\(aqALL\(aq: >, \(aqAND\(aq: , \(aqEXISTS\(aq: >, \(aqIFF\(aq: , \(aqIMP\(aq: , \(aqNOT\(aq: , \(aqOR\(aq: , \(aqPRED\(aq: } +.UNINDENT +.INDENT 7.0 +.TP +.B NOT = \(aqNOT\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B OR = \(aqOR\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B PRED = \(aqPRED\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.hole.Constraint(lhs, rhs) +Bases: \fBobject\fP +.sp +This class represents a constraint of the form (L =< N), +where L is a label and N is a node (a label or a hole). +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.hole.HoleSemantics(usr) +Bases: \fBobject\fP +.sp +This class holds the broken\-down components of a hole semantics, i.e. it +extracts the holes, labels, logic formula fragments and constraints out of +a big conjunction of such as produced by the hole semantics grammar. It +then provides some operations on the semantics dealing with holes, labels +and finding legal ways to plug holes with labels. +.INDENT 7.0 +.TP +.B formula_tree(plugging) +Return the first\-order logic formula tree for this underspecified +representation using the plugging given. +.UNINDENT +.INDENT 7.0 +.TP +.B is_node(x) +Return true if x is a node (label or hole) in this semantic +representation. +.UNINDENT +.INDENT 7.0 +.TP +.B pluggings() +Calculate and return all the legal pluggings (mappings of labels to +holes) of this semantics given the constraints. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.hole.hole_readings(sentence, grammar_filename=None, verbose=False) +.UNINDENT +.SS nltk.sem.lfg module +.INDENT 0.0 +.TP +.B class nltk.sem.lfg.FStructure +Bases: \fBdict\fP +.INDENT 7.0 +.TP +.B pretty_format(indent=3) +.UNINDENT +.INDENT 7.0 +.TP +.B static read_depgraph(depgraph) +.UNINDENT +.INDENT 7.0 +.TP +.B safeappend(key, item) +Append \(aqitem\(aq to the list at \(aqkey\(aq. If no list exists for \(aqkey\(aq, then +construct one. +.UNINDENT +.INDENT 7.0 +.TP +.B to_depgraph(rel=None) +.UNINDENT +.INDENT 7.0 +.TP +.B to_glueformula_list(glue_dict) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.lfg.demo_read_depgraph() +.UNINDENT +.SS nltk.sem.linearlogic module +.INDENT 0.0 +.TP +.B class nltk.sem.linearlogic.ApplicationExpression(function, argument, argument_indices=None) +Bases: \fI\%nltk.sem.linearlogic.Expression\fP +.INDENT 7.0 +.TP +.B simplify(bindings=None) +Since function is an implication, return its consequent. There should be +no need to check that the application is valid since the checking is done +by the constructor. +.INDENT 7.0 +.TP +.B Parameters +\fBbindings\fP \-\- \fBBindingDict\fP A dictionary of bindings used to simplify +.TP +.B Returns +\fBExpression\fP +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.linearlogic.AtomicExpression(name, dependencies=None) +Bases: \fI\%nltk.sem.linearlogic.Expression\fP +.INDENT 7.0 +.TP +.B compile_neg(index_counter, glueFormulaFactory) +From Iddo Lev\(aqs PhD Dissertation p108\-109 +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBindex_counter\fP \-\- \fBCounter\fP for unique indices +.IP \(bu 2 +\fBglueFormulaFactory\fP \-\- \fBGlueFormula\fP for creating new glue formulas +.UNINDENT +.TP +.B Returns +(\fBExpression\fP,set) for the compiled linear logic and any newly created glue formulas +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B compile_pos(index_counter, glueFormulaFactory) +From Iddo Lev\(aqs PhD Dissertation p108\-109 +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBindex_counter\fP \-\- \fBCounter\fP for unique indices +.IP \(bu 2 +\fBglueFormulaFactory\fP \-\- \fBGlueFormula\fP for creating new glue formulas +.UNINDENT +.TP +.B Returns +(\fBExpression\fP,set) for the compiled linear logic and any newly created glue formulas +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B initialize_labels(fstruct) +.UNINDENT +.INDENT 7.0 +.TP +.B simplify(bindings=None) +If \(aqself\(aq is bound by \(aqbindings\(aq, return the atomic to which it is bound. +Otherwise, return self. +.INDENT 7.0 +.TP +.B Parameters +\fBbindings\fP \-\- \fBBindingDict\fP A dictionary of bindings used to simplify +.TP +.B Returns +\fBAtomicExpression\fP +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.linearlogic.BindingDict(bindings=None) +Bases: \fBobject\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.linearlogic.ConstantExpression(name, dependencies=None) +Bases: \fI\%nltk.sem.linearlogic.AtomicExpression\fP +.INDENT 7.0 +.TP +.B unify(other, bindings) +If \(aqother\(aq is a constant, then it must be equal to \(aqself\(aq. If \(aqother\(aq is a variable, +then it must not be bound to anything other than \(aqself\(aq. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP \-\- \fBExpression\fP +.IP \(bu 2 +\fBbindings\fP \-\- \fBBindingDict\fP A dictionary of all current bindings +.UNINDENT +.TP +.B Returns +\fBBindingDict\fP A new combined dictionary of of \(aqbindings\(aq and any new binding +.TP +.B Raises +\fBUnificationException\fP \-\- If \(aqself\(aq and \(aqother\(aq cannot be unified in the context of \(aqbindings\(aq +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.linearlogic.Expression +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B applyto(other, other_indices=None) +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod fromstring(s) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.linearlogic.ImpExpression(antecedent, consequent) +Bases: \fI\%nltk.sem.linearlogic.Expression\fP +.INDENT 7.0 +.TP +.B compile_neg(index_counter, glueFormulaFactory) +From Iddo Lev\(aqs PhD Dissertation p108\-109 +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBindex_counter\fP \-\- \fBCounter\fP for unique indices +.IP \(bu 2 +\fBglueFormulaFactory\fP \-\- \fBGlueFormula\fP for creating new glue formulas +.UNINDENT +.TP +.B Returns +(\fBExpression\fP,list of \fBGlueFormula\fP) for the compiled linear logic and any newly created glue formulas +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B compile_pos(index_counter, glueFormulaFactory) +From Iddo Lev\(aqs PhD Dissertation p108\-109 +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBindex_counter\fP \-\- \fBCounter\fP for unique indices +.IP \(bu 2 +\fBglueFormulaFactory\fP \-\- \fBGlueFormula\fP for creating new glue formulas +.UNINDENT +.TP +.B Returns +(\fBExpression\fP,set) for the compiled linear logic and any newly created glue formulas +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B initialize_labels(fstruct) +.UNINDENT +.INDENT 7.0 +.TP +.B simplify(bindings=None) +.UNINDENT +.INDENT 7.0 +.TP +.B unify(other, bindings) +Both the antecedent and consequent of \(aqself\(aq and \(aqother\(aq must unify. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP \-\- \fBImpExpression\fP +.IP \(bu 2 +\fBbindings\fP \-\- \fBBindingDict\fP A dictionary of all current bindings +.UNINDENT +.TP +.B Returns +\fBBindingDict\fP A new combined dictionary of of \(aqbindings\(aq and any new bindings +.TP +.B Raises +\fBUnificationException\fP \-\- If \(aqself\(aq and \(aqother\(aq cannot be unified in the context of \(aqbindings\(aq +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.sem.linearlogic.LinearLogicApplicationException +Bases: \fBException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.linearlogic.LinearLogicParser +Bases: \fI\%nltk.sem.logic.LogicParser\fP +.sp +A linear logic expression parser. +.INDENT 7.0 +.TP +.B attempt_ApplicationExpression(expression, context) +Attempt to make an application expression. If the next tokens +are an argument in parens, then the argument expression is a +function being applied to the arguments. Otherwise, return the +argument expression. +.UNINDENT +.INDENT 7.0 +.TP +.B get_BooleanExpression_factory(tok) +This method serves as a hook for other logic parsers that +have different boolean operators +.UNINDENT +.INDENT 7.0 +.TP +.B get_all_symbols() +This method exists to be overridden +.UNINDENT +.INDENT 7.0 +.TP +.B handle(tok, context) +This method is intended to be overridden for logics that +use different operators or expressions +.UNINDENT +.INDENT 7.0 +.TP +.B make_BooleanExpression(factory, first, second) +.UNINDENT +.INDENT 7.0 +.TP +.B make_VariableExpression(name) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.linearlogic.Tokens +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B CLOSE = \(aq)\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B IMP = \(aq\-o\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B OPEN = \(aq(\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B PUNCT = [\(aq(\(aq, \(aq)\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B TOKENS = [\(aq(\(aq, \(aq)\(aq, \(aq\-o\(aq] +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.sem.linearlogic.UnificationException(a, b, bindings) +Bases: \fBException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.sem.linearlogic.VariableBindingException +Bases: \fBException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.linearlogic.VariableExpression(name, dependencies=None) +Bases: \fI\%nltk.sem.linearlogic.AtomicExpression\fP +.INDENT 7.0 +.TP +.B unify(other, bindings) +\(aqself\(aq must not be bound to anything other than \(aqother\(aq. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP \-\- \fBExpression\fP +.IP \(bu 2 +\fBbindings\fP \-\- \fBBindingDict\fP A dictionary of all current bindings +.UNINDENT +.TP +.B Returns +\fBBindingDict\fP A new combined dictionary of of \(aqbindings\(aq and the new binding +.TP +.B Raises +\fBUnificationException\fP \-\- If \(aqself\(aq and \(aqother\(aq cannot be unified in the context of \(aqbindings\(aq +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.linearlogic.demo() +.UNINDENT +.SS nltk.sem.logic module +.sp +A version of first order predicate logic, built on +top of the typed lambda calculus. +.INDENT 0.0 +.TP +.B class nltk.sem.logic.AbstractVariableExpression(variable) +Bases: \fI\%nltk.sem.logic.Expression\fP +.sp +This class represents a variable to be used as a predicate or entity +.INDENT 7.0 +.TP +.B findtype(variable) +:see Expression.findtype() +.UNINDENT +.INDENT 7.0 +.TP +.B predicates() +.INDENT 7.0 +.TP +.B See +Expression.predicates() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B replace(variable, expression, replace_bound=False, alpha_convert=True) +.INDENT 7.0 +.TP +.B See +Expression.replace() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B simplify() +.INDENT 7.0 +.TP +.B Returns +beta\-converted version of this expression +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.AllExpression(variable, term) +Bases: \fI\%nltk.sem.logic.QuantifiedExpression\fP +.INDENT 7.0 +.TP +.B getQuantifier() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.AndExpression(first, second) +Bases: \fI\%nltk.sem.logic.BooleanExpression\fP +.sp +This class represents conjunctions +.INDENT 7.0 +.TP +.B getOp() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.AnyType +Bases: \fI\%nltk.sem.logic.BasicType\fP, \fI\%nltk.sem.logic.ComplexType\fP +.INDENT 7.0 +.TP +.B property first +.UNINDENT +.INDENT 7.0 +.TP +.B matches(other) +.UNINDENT +.INDENT 7.0 +.TP +.B resolve(other) +.UNINDENT +.INDENT 7.0 +.TP +.B property second +.UNINDENT +.INDENT 7.0 +.TP +.B str() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.ApplicationExpression(function, argument) +Bases: \fI\%nltk.sem.logic.Expression\fP +.sp +This class is used to represent two related types of logical expressions. +.sp +The first is a Predicate Expression, such as "P(x,y)". A predicate +expression is comprised of a \fBFunctionVariableExpression\fP or +\fBConstantExpression\fP as the predicate and a list of Expressions as the +arguments. +.sp +The second is a an application of one expression to another, such as +"(x.dog(x))(fido)". +.sp +The reason Predicate Expressions are treated as Application Expressions is +that the Variable Expression predicate of the expression may be replaced +with another Expression, such as a LambdaExpression, which would mean that +the Predicate should be thought of as being applied to the arguments. +.sp +The logical expression reader will always curry arguments in a application expression. +So, "x y.see(x,y)(john,mary)" will be represented internally as +"((x y.(see(x))(y))(john))(mary)". This simplifies the internals since +there will always be exactly one argument in an application. +.sp +The str() method will usually print the curried forms of application +expressions. The one exception is when the the application expression is +really a predicate expression (ie, underlying function is an +\fBAbstractVariableExpression\fP). This means that the example from above +will be returned as "(x y.see(x,y)(john))(mary)". +.INDENT 7.0 +.TP +.B property args +Return uncurried arg\-list +.UNINDENT +.INDENT 7.0 +.TP +.B constants() +.INDENT 7.0 +.TP +.B See +Expression.constants() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B findtype(variable) +:see Expression.findtype() +.UNINDENT +.INDENT 7.0 +.TP +.B is_atom() +Is this expression an atom (as opposed to a lambda expression applied +to a term)? +.UNINDENT +.INDENT 7.0 +.TP +.B property pred +Return uncurried base\-function. +If this is an atom, then the result will be a variable expression. +Otherwise, it will be a lambda expression. +.UNINDENT +.INDENT 7.0 +.TP +.B predicates() +.INDENT 7.0 +.TP +.B See +Expression.predicates() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B simplify() +.INDENT 7.0 +.TP +.B Returns +beta\-converted version of this expression +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B property type +.UNINDENT +.INDENT 7.0 +.TP +.B uncurry() +Uncurry this application expression +.sp +return: A tuple (base\-function, arg\-list) +.UNINDENT +.INDENT 7.0 +.TP +.B visit(function, combinator) +.INDENT 7.0 +.TP +.B See +Expression.visit() +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.BasicType +Bases: \fI\%nltk.sem.logic.Type\fP +.INDENT 7.0 +.TP +.B matches(other) +.UNINDENT +.INDENT 7.0 +.TP +.B resolve(other) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.BinaryExpression(first, second) +Bases: \fI\%nltk.sem.logic.Expression\fP +.INDENT 7.0 +.TP +.B findtype(variable) +:see Expression.findtype() +.UNINDENT +.INDENT 7.0 +.TP +.B property type +.UNINDENT +.INDENT 7.0 +.TP +.B visit(function, combinator) +.INDENT 7.0 +.TP +.B See +Expression.visit() +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.BooleanExpression(first, second) +Bases: \fI\%nltk.sem.logic.BinaryExpression\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.ComplexType(first, second) +Bases: \fI\%nltk.sem.logic.Type\fP +.INDENT 7.0 +.TP +.B matches(other) +.UNINDENT +.INDENT 7.0 +.TP +.B resolve(other) +.UNINDENT +.INDENT 7.0 +.TP +.B str() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.ConstantExpression(variable) +Bases: \fI\%nltk.sem.logic.AbstractVariableExpression\fP +.sp +This class represents variables that do not take the form of a single +character followed by zero or more digits. +.INDENT 7.0 +.TP +.B constants() +.INDENT 7.0 +.TP +.B See +Expression.constants() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B free() +.INDENT 7.0 +.TP +.B See +Expression.free() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B type = e +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.EntityType +Bases: \fI\%nltk.sem.logic.BasicType\fP +.INDENT 7.0 +.TP +.B str() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.EqualityExpression(first, second) +Bases: \fI\%nltk.sem.logic.BinaryExpression\fP +.sp +This class represents equality expressions like "(x = y)". +.INDENT 7.0 +.TP +.B getOp() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.EventType +Bases: \fI\%nltk.sem.logic.BasicType\fP +.INDENT 7.0 +.TP +.B str() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.EventVariableExpression(variable) +Bases: \fI\%nltk.sem.logic.IndividualVariableExpression\fP +.sp +This class represents variables that take the form of a single lowercase +\(aqe\(aq character followed by zero or more digits. +.INDENT 7.0 +.TP +.B type = v +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.ExistsExpression(variable, term) +Bases: \fI\%nltk.sem.logic.QuantifiedExpression\fP +.INDENT 7.0 +.TP +.B getQuantifier() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.sem.logic.ExpectedMoreTokensException(index, message=None) +Bases: \fI\%nltk.sem.logic.LogicalExpressionException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.Expression +Bases: \fI\%nltk.sem.logic.SubstituteBindingsI\fP +.sp +This is the base abstract object for all logical expressions +.INDENT 7.0 +.TP +.B applyto(other) +.UNINDENT +.INDENT 7.0 +.TP +.B constants() +Return a set of individual constants (non\-predicates). +:return: set of \fBVariable\fP objects +.UNINDENT +.INDENT 7.0 +.TP +.B equiv(other, prover=None) +Check for logical equivalence. +Pass the expression (self <\-> other) to the theorem prover. +If the prover says it is valid, then the self and other are equal. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBother\fP \-\- an \fBExpression\fP to check equality against +.IP \(bu 2 +\fBprover\fP \-\- a \fBnltk.inference.api.Prover\fP +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B findtype(variable) +Find the type of the given variable as it is used in this expression. +For example, finding the type of "P" in "P(x) & Q(x,y)" yields "" +.INDENT 7.0 +.TP +.B Parameters +\fBvariable\fP \-\- Variable +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B free() +Return a set of all the free (non\-bound) variables. This includes +both individual and predicate variables, but not constants. +:return: set of \fBVariable\fP objects +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod fromstring(s, type_check=False, signature=None) +.UNINDENT +.INDENT 7.0 +.TP +.B make_VariableExpression(variable) +.UNINDENT +.INDENT 7.0 +.TP +.B negate() +If this is a negated expression, remove the negation. +Otherwise add a negation. +.UNINDENT +.INDENT 7.0 +.TP +.B normalize(newvars=None) +Rename auto\-generated unique variables +.UNINDENT +.INDENT 7.0 +.TP +.B predicates() +Return a set of predicates (constants, not variables). +:return: set of \fBVariable\fP objects +.UNINDENT +.INDENT 7.0 +.TP +.B replace(variable, expression, replace_bound=False, alpha_convert=True) +Replace every instance of \(aqvariable\(aq with \(aqexpression\(aq +:param variable: \fBVariable\fP The variable to replace +:param expression: \fBExpression\fP The expression with which to replace it +:param replace_bound: bool Should bound variables be replaced? +:param alpha_convert: bool Alpha convert automatically to avoid name clashes? +.UNINDENT +.INDENT 7.0 +.TP +.B simplify() +.INDENT 7.0 +.TP +.B Returns +beta\-converted version of this expression +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B substitute_bindings(bindings) +.INDENT 7.0 +.TP +.B Returns +The object that is obtained by replacing +each variable bound by \fBbindings\fP with its values. +Aliases are already resolved. (maybe?) +.TP +.B Return type +(any) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B typecheck(signature=None) +Infer and check types. Raise exceptions if necessary. +.INDENT 7.0 +.TP +.B Parameters +\fBsignature\fP \-\- dict that maps variable names to types (or string +representations of types) +.TP +.B Returns +the signature, plus any additional type mappings +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B variables() +Return a set of all the variables for binding substitution. +The variables returned include all free (non\-bound) individual +variables and any variable starting with \(aq?\(aq or \(aq@\(aq. +:return: set of \fBVariable\fP objects +.UNINDENT +.INDENT 7.0 +.TP +.B visit(function, combinator) +Recursively visit subexpressions. Apply \(aqfunction\(aq to each +subexpression and pass the result of each function application +to the \(aqcombinator\(aq for aggregation: +.INDENT 7.0 +.INDENT 3.5 +return combinator(map(function, self.subexpressions)) +.UNINDENT +.UNINDENT +.sp +Bound variables are neither applied upon by the function nor given to +the combinator. +:param function: \fBFunction\fP to call on each subexpression +:param combinator: \fBFunction,R>\fP to combine the results of the +function calls +:return: result of combination \fBR\fP +.UNINDENT +.INDENT 7.0 +.TP +.B visit_structured(function, combinator) +Recursively visit subexpressions. Apply \(aqfunction\(aq to each +subexpression and pass the result of each function application +to the \(aqcombinator\(aq for aggregation. The combinator must have +the same signature as the constructor. The function is not +applied to bound variables, but they are passed to the +combinator. +:param function: \fBFunction\fP to call on each subexpression +:param combinator: \fBFunction\fP with the same signature as the +constructor, to combine the results of the function calls +:return: result of combination +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.FunctionVariableExpression(variable) +Bases: \fI\%nltk.sem.logic.AbstractVariableExpression\fP +.sp +This class represents variables that take the form of a single uppercase +character followed by zero or more digits. +.INDENT 7.0 +.TP +.B constants() +.INDENT 7.0 +.TP +.B See +Expression.constants() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B free() +.INDENT 7.0 +.TP +.B See +Expression.free() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B type = ? +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.IffExpression(first, second) +Bases: \fI\%nltk.sem.logic.BooleanExpression\fP +.sp +This class represents biconditionals +.INDENT 7.0 +.TP +.B getOp() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.sem.logic.IllegalTypeException(expression, other_type, allowed_type) +Bases: \fI\%nltk.sem.logic.TypeException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.ImpExpression(first, second) +Bases: \fI\%nltk.sem.logic.BooleanExpression\fP +.sp +This class represents implications +.INDENT 7.0 +.TP +.B getOp() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.sem.logic.InconsistentTypeHierarchyException(variable, expression=None) +Bases: \fI\%nltk.sem.logic.TypeException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.IndividualVariableExpression(variable) +Bases: \fI\%nltk.sem.logic.AbstractVariableExpression\fP +.sp +This class represents variables that take the form of a single lowercase +character (other than \(aqe\(aq) followed by zero or more digits. +.INDENT 7.0 +.TP +.B constants() +.INDENT 7.0 +.TP +.B See +Expression.constants() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B free() +.INDENT 7.0 +.TP +.B See +Expression.free() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B property type +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.LambdaExpression(variable, term) +Bases: \fI\%nltk.sem.logic.VariableBinderExpression\fP +.INDENT 7.0 +.TP +.B property type +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.LogicParser(type_check=False) +Bases: \fBobject\fP +.sp +A lambda calculus expression parser. +.INDENT 7.0 +.TP +.B assertNextToken(expected) +.UNINDENT +.INDENT 7.0 +.TP +.B assertToken(tok, expected) +.UNINDENT +.INDENT 7.0 +.TP +.B attempt_ApplicationExpression(expression, context) +Attempt to make an application expression. The next tokens are +a list of arguments in parens, then the argument expression is a +function being applied to the arguments. Otherwise, return the +argument expression. +.UNINDENT +.INDENT 7.0 +.TP +.B attempt_BooleanExpression(expression, context) +Attempt to make a boolean expression. If the next token is a boolean +operator, then a BooleanExpression will be returned. Otherwise, the +parameter will be returned. +.UNINDENT +.INDENT 7.0 +.TP +.B attempt_EqualityExpression(expression, context) +Attempt to make an equality expression. If the next token is an +equality operator, then an EqualityExpression will be returned. +Otherwise, the parameter will be returned. +.UNINDENT +.INDENT 7.0 +.TP +.B attempt_adjuncts(expression, context) +.UNINDENT +.INDENT 7.0 +.TP +.B get_BooleanExpression_factory(tok) +This method serves as a hook for other logic parsers that +have different boolean operators +.UNINDENT +.INDENT 7.0 +.TP +.B get_QuantifiedExpression_factory(tok) +This method serves as a hook for other logic parsers that +have different quantifiers +.UNINDENT +.INDENT 7.0 +.TP +.B get_all_symbols() +This method exists to be overridden +.UNINDENT +.INDENT 7.0 +.TP +.B get_next_token_variable(description) +.UNINDENT +.INDENT 7.0 +.TP +.B handle(tok, context) +This method is intended to be overridden for logics that +use different operators or expressions +.UNINDENT +.INDENT 7.0 +.TP +.B handle_lambda(tok, context) +.UNINDENT +.INDENT 7.0 +.TP +.B handle_negation(tok, context) +.UNINDENT +.INDENT 7.0 +.TP +.B handle_open(tok, context) +.UNINDENT +.INDENT 7.0 +.TP +.B handle_quant(tok, context) +.UNINDENT +.INDENT 7.0 +.TP +.B handle_variable(tok, context) +.UNINDENT +.INDENT 7.0 +.TP +.B has_priority(operation, context) +.UNINDENT +.INDENT 7.0 +.TP +.B inRange(location) +Return TRUE if the given location is within the buffer +.UNINDENT +.INDENT 7.0 +.TP +.B isvariable(tok) +.UNINDENT +.INDENT 7.0 +.TP +.B make_ApplicationExpression(function, argument) +.UNINDENT +.INDENT 7.0 +.TP +.B make_BooleanExpression(factory, first, second) +.UNINDENT +.INDENT 7.0 +.TP +.B make_EqualityExpression(first, second) +This method serves as a hook for other logic parsers that +have different equality expression classes +.UNINDENT +.INDENT 7.0 +.TP +.B make_LambdaExpression(variable, term) +.UNINDENT +.INDENT 7.0 +.TP +.B make_NegatedExpression(expression) +.UNINDENT +.INDENT 7.0 +.TP +.B make_QuanifiedExpression(factory, variable, term) +.UNINDENT +.INDENT 7.0 +.TP +.B make_VariableExpression(name) +.UNINDENT +.INDENT 7.0 +.TP +.B parse(data, signature=None) +Parse the expression. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBdata\fP \-\- str for the input to be parsed +.IP \(bu 2 +\fBsignature\fP \-\- \fBdict\fP that maps variable names to type +.UNINDENT +.UNINDENT +.sp +strings +:returns: a parsed Expression +.UNINDENT +.INDENT 7.0 +.TP +.B process(data) +Split the data into tokens +.UNINDENT +.INDENT 7.0 +.TP +.B process_next_expression(context) +Parse the next complete expression from the stream and return it. +.UNINDENT +.INDENT 7.0 +.TP +.B process_quoted_token(data_idx, data) +.UNINDENT +.INDENT 7.0 +.TP +.B token(location=None) +Get the next waiting token. If a location is given, then +return the token at currentIndex+location without advancing +currentIndex; setting it gives lookahead/lookback capability. +.UNINDENT +.INDENT 7.0 +.TP +.B type_check +A list of tuples of quote characters. The 4\-tuple is comprised +of the start character, the end character, the escape character, and +a boolean indicating whether the quotes should be included in the +result. Quotes are used to signify that a token should be treated as +atomic, ignoring any special characters within the token. The escape +character allows the quote end character to be used within the quote. +If True, the boolean indicates that the final token should contain the +quote and escape characters. +This method exists to be overridden +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.sem.logic.LogicalExpressionException(index, message) +Bases: \fBException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.NegatedExpression(term) +Bases: \fI\%nltk.sem.logic.Expression\fP +.INDENT 7.0 +.TP +.B findtype(variable) +Find the type of the given variable as it is used in this expression. +For example, finding the type of "P" in "P(x) & Q(x,y)" yields "" +.INDENT 7.0 +.TP +.B Parameters +\fBvariable\fP \-\- Variable +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B negate() +.INDENT 7.0 +.TP +.B See +Expression.negate() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B property type +.UNINDENT +.INDENT 7.0 +.TP +.B visit(function, combinator) +.INDENT 7.0 +.TP +.B See +Expression.visit() +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.OrExpression(first, second) +Bases: \fI\%nltk.sem.logic.BooleanExpression\fP +.sp +This class represents disjunctions +.INDENT 7.0 +.TP +.B getOp() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.QuantifiedExpression(variable, term) +Bases: \fI\%nltk.sem.logic.VariableBinderExpression\fP +.INDENT 7.0 +.TP +.B property type +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.SubstituteBindingsI +Bases: \fBobject\fP +.sp +An interface for classes that can perform substitutions for +variables. +.INDENT 7.0 +.TP +.B substitute_bindings(bindings) +.INDENT 7.0 +.TP +.B Returns +The object that is obtained by replacing +each variable bound by \fBbindings\fP with its values. +Aliases are already resolved. (maybe?) +.TP +.B Return type +(any) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B variables() +.INDENT 7.0 +.TP +.B Returns +A list of all variables in this object. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.Tokens +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B ALL = \(aqall\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B ALL_LIST = [\(aqall\(aq, \(aqforall\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B AND = \(aq&\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B AND_LIST = [\(aqand\(aq, \(aq&\(aq, \(aq^\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B BINOPS = [\(aqand\(aq, \(aq&\(aq, \(aq^\(aq, \(aqor\(aq, \(aq|\(aq, \(aqimplies\(aq, \(aq\->\(aq, \(aq=>\(aq, \(aqiff\(aq, \(aq<\->\(aq, \(aq<=>\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B CLOSE = \(aq)\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B COMMA = \(aq,\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B DOT = \(aq.\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B EQ = \(aq=\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B EQ_LIST = [\(aq=\(aq, \(aq==\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B EXISTS = \(aqexists\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B EXISTS_LIST = [\(aqsome\(aq, \(aqexists\(aq, \(aqexist\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B IFF = \(aq<\->\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B IFF_LIST = [\(aqiff\(aq, \(aq<\->\(aq, \(aq<=>\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B IMP = \(aq\->\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B IMP_LIST = [\(aqimplies\(aq, \(aq\->\(aq, \(aq=>\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B LAMBDA = \(aq\e\e\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B LAMBDA_LIST = [\(aq\e\e\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B NEQ = \(aq!=\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B NEQ_LIST = [\(aq!=\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B NOT = \(aq\-\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B NOT_LIST = [\(aqnot\(aq, \(aq\-\(aq, \(aq!\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B OPEN = \(aq(\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B OR = \(aq|\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B OR_LIST = [\(aqor\(aq, \(aq|\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B PUNCT = [\(aq.\(aq, \(aq(\(aq, \(aq)\(aq, \(aq,\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B QUANTS = [\(aqsome\(aq, \(aqexists\(aq, \(aqexist\(aq, \(aqall\(aq, \(aqforall\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B SYMBOLS = [\(aq&\(aq, \(aq^\(aq, \(aq|\(aq, \(aq\->\(aq, \(aq=>\(aq, \(aq<\->\(aq, \(aq<=>\(aq, \(aq=\(aq, \(aq==\(aq, \(aq!=\(aq, \(aq\e\e\(aq, \(aq.\(aq, \(aq(\(aq, \(aq)\(aq, \(aq,\(aq, \(aq\-\(aq, \(aq!\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B TOKENS = [\(aqand\(aq, \(aq&\(aq, \(aq^\(aq, \(aqor\(aq, \(aq|\(aq, \(aqimplies\(aq, \(aq\->\(aq, \(aq=>\(aq, \(aqiff\(aq, \(aq<\->\(aq, \(aq<=>\(aq, \(aq=\(aq, \(aq==\(aq, \(aq!=\(aq, \(aqsome\(aq, \(aqexists\(aq, \(aqexist\(aq, \(aqall\(aq, \(aqforall\(aq, \(aq\e\e\(aq, \(aq.\(aq, \(aq(\(aq, \(aq)\(aq, \(aq,\(aq, \(aqnot\(aq, \(aq\-\(aq, \(aq!\(aq] +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.TruthValueType +Bases: \fI\%nltk.sem.logic.BasicType\fP +.INDENT 7.0 +.TP +.B str() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.Type +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B classmethod fromstring(s) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.sem.logic.TypeException(msg) +Bases: \fBException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.sem.logic.TypeResolutionException(expression, other_type) +Bases: \fI\%nltk.sem.logic.TypeException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.sem.logic.UnexpectedTokenException(index, unexpected=None, expected=None, message=None) +Bases: \fI\%nltk.sem.logic.LogicalExpressionException\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.Variable(name) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B substitute_bindings(bindings) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sem.logic.VariableBinderExpression(variable, term) +Bases: \fI\%nltk.sem.logic.Expression\fP +.sp +This an abstract class for any Expression that binds a variable in an +Expression. This includes LambdaExpressions and Quantified Expressions +.INDENT 7.0 +.TP +.B alpha_convert(newvar) +Rename all occurrences of the variable introduced by this variable +binder in the expression to \fBnewvar\fP\&. +:param newvar: \fBVariable\fP, for the new variable +.UNINDENT +.INDENT 7.0 +.TP +.B findtype(variable) +:see Expression.findtype() +.UNINDENT +.INDENT 7.0 +.TP +.B free() +.INDENT 7.0 +.TP +.B See +Expression.free() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B replace(variable, expression, replace_bound=False, alpha_convert=True) +.INDENT 7.0 +.TP +.B See +Expression.replace() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B visit(function, combinator) +.INDENT 7.0 +.TP +.B See +Expression.visit() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B visit_structured(function, combinator) +.INDENT 7.0 +.TP +.B See +Expression.visit_structured() +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.VariableExpression(variable) +This is a factory method that instantiates and returns a subtype of +\fBAbstractVariableExpression\fP appropriate for the given variable. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.binding_ops() +Binding operators +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.boolean_ops() +Boolean operators +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.demoException(s) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.demo_errors() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.equality_preds() +Equality predicates +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.is_eventvar(expr) +An event variable must be a single lowercase \(aqe\(aq character followed by +zero or more digits. +.INDENT 7.0 +.TP +.B Parameters +\fBexpr\fP \-\- str +.TP +.B Returns +bool True if expr is of the correct form +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.is_funcvar(expr) +A function variable must be a single uppercase character followed by +zero or more digits. +.INDENT 7.0 +.TP +.B Parameters +\fBexpr\fP \-\- str +.TP +.B Returns +bool True if expr is of the correct form +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.is_indvar(expr) +An individual variable must be a single lowercase character other than \(aqe\(aq, +followed by zero or more digits. +.INDENT 7.0 +.TP +.B Parameters +\fBexpr\fP \-\- str +.TP +.B Returns +bool True if expr is of the correct form +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.printtype(ex) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.read_logic(s, logic_parser=None, encoding=None) +Convert a file of First Order Formulas into a list of {Expression}s. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBs\fP (\fIstr\fP) \-\- the contents of the file +.IP \(bu 2 +\fBlogic_parser\fP (\fILogicParser\fP) \-\- The parser to be used to parse the logical expression +.IP \(bu 2 +\fBencoding\fP (\fIstr\fP) \-\- the encoding of the input string, if it is binary +.UNINDENT +.TP +.B Returns +a list of parsed formulas. +.TP +.B Return type +list(Expression) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.read_type(type_string) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.skolem_function(univ_scope=None) +Return a skolem function over the variables in univ_scope +param univ_scope +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.typecheck(expressions, signature=None) +Ensure correct typing across a collection of \fBExpression\fP objects. +:param expressions: a collection of expressions +:param signature: dict that maps variable names to types (or string +representations of types) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.logic.unique_variable(pattern=None, ignore=None) +Return a new, unique variable. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBpattern\fP \-\- \fBVariable\fP that is being replaced. The new variable must +be the same type. +.IP \(bu 2 +\fBterm\fP \-\- a set of \fBVariable\fP objects that should not be returned from +this function. +.UNINDENT +.TP +.B Return type +Variable +.UNINDENT +.UNINDENT +.SS nltk.sem.relextract module +.sp +Code for extracting relational triples from the ieer and conll2002 corpora. +.sp +Relations are stored internally as dictionaries (\(aqreldicts\(aq). +.sp +The two serialization outputs are "rtuple" and "clause". +.INDENT 0.0 +.IP \(bu 2 +An rtuple is a tuple of the form \fB(subj, filler, obj)\fP, +where \fBsubj\fP and \fBobj\fP are pairs of Named Entity mentions, and \fBfiller\fP is the string of words +occurring between \fBsub\fP and \fBobj\fP (with no intervening NEs). Strings are printed via \fBrepr()\fP to +circumvent locale variations in rendering utf\-8 encoded strings. +.IP \(bu 2 +A clause is an atom of the form \fBrelsym(subjsym, objsym)\fP, +where the relation, subject and object have been canonicalized to single strings. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.relextract.class_abbrev(type) +Abbreviate an NE class name. +:type type: str +:rtype: str +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.relextract.clause(reldict, relsym) +Print the relation in clausal form. +:param reldict: a relation dictionary +:type reldict: defaultdict +:param relsym: a label for the relation +:type relsym: str +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.relextract.conllesp() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.relextract.conllned(trace=1) +Find the copula+\(aqvan\(aq relation (\(aqof\(aq) in the Dutch tagged training corpus +from CoNLL 2002. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.relextract.descape_entity(m, defs={\(aqAElig\(aq: \(aqÆ\(aq, \(aqAacute\(aq: \(aqÁ\(aq, \(aqAcirc\(aq: \(aqÂ\(aq, \(aqAgrave\(aq: \(aqÀ\(aq, \(aqAlpha\(aq: \(aqΑ\(aq, \(aqAring\(aq: \(aqÅ\(aq, \(aqAtilde\(aq: \(aqÃ\(aq, \(aqAuml\(aq: \(aqÄ\(aq, \(aqBeta\(aq: \(aqΒ\(aq, \(aqCcedil\(aq: \(aqÇ\(aq, \(aqChi\(aq: \(aqΧ\(aq, \(aqDagger\(aq: \(aq‡\(aq, \(aqDelta\(aq: \(aqΔ\(aq, \(aqETH\(aq: \(aqÐ\(aq, \(aqEacute\(aq: \(aqÉ\(aq, \(aqEcirc\(aq: \(aqÊ\(aq, \(aqEgrave\(aq: \(aqÈ\(aq, \(aqEpsilon\(aq: \(aqΕ\(aq, \(aqEta\(aq: \(aqΗ\(aq, \(aqEuml\(aq: \(aqË\(aq, \(aqGamma\(aq: \(aqΓ\(aq, \(aqIacute\(aq: \(aqÍ\(aq, \(aqIcirc\(aq: \(aqÎ\(aq, \(aqIgrave\(aq: \(aqÌ\(aq, \(aqIota\(aq: \(aqΙ\(aq, \(aqIuml\(aq: \(aqÏ\(aq, \(aqKappa\(aq: \(aqΚ\(aq, \(aqLambda\(aq: \(aqΛ\(aq, \(aqMu\(aq: \(aqΜ\(aq, \(aqNtilde\(aq: \(aqÑ\(aq, \(aqNu\(aq: \(aqΝ\(aq, \(aqOElig\(aq: \(aqŒ\(aq, \(aqOacute\(aq: \(aqÓ\(aq, \(aqOcirc\(aq: \(aqÔ\(aq, \(aqOgrave\(aq: \(aqÒ\(aq, \(aqOmega\(aq: \(aqΩ\(aq, \(aqOmicron\(aq: \(aqΟ\(aq, \(aqOslash\(aq: \(aqØ\(aq, \(aqOtilde\(aq: \(aqÕ\(aq, \(aqOuml\(aq: \(aqÖ\(aq, \(aqPhi\(aq: \(aqΦ\(aq, \(aqPi\(aq: \(aqΠ\(aq, \(aqPrime\(aq: \(aq″\(aq, \(aqPsi\(aq: \(aqΨ\(aq, \(aqRho\(aq: \(aqΡ\(aq, \(aqScaron\(aq: \(aqŠ\(aq, \(aqSigma\(aq: \(aqΣ\(aq, \(aqTHORN\(aq: \(aqÞ\(aq, \(aqTau\(aq: \(aqΤ\(aq, \(aqTheta\(aq: \(aqΘ\(aq, \(aqUacute\(aq: \(aqÚ\(aq, \(aqUcirc\(aq: \(aqÛ\(aq, \(aqUgrave\(aq: \(aqÙ\(aq, \(aqUpsilon\(aq: \(aqΥ\(aq, \(aqUuml\(aq: \(aqÜ\(aq, \(aqXi\(aq: \(aqΞ\(aq, \(aqYacute\(aq: \(aqÝ\(aq, \(aqYuml\(aq: \(aqŸ\(aq, \(aqZeta\(aq: \(aqΖ\(aq, \(aqaacute\(aq: \(aqá\(aq, \(aqacirc\(aq: \(aqâ\(aq, \(aqacute\(aq: \(aq\'\(aq, \(aqaelig\(aq: \(aqæ\(aq, \(aqagrave\(aq: \(aqà\(aq, \(aqalefsym\(aq: \(aqℵ\(aq, \(aqalpha\(aq: \(aqα\(aq, \(aqamp\(aq: \(aq&\(aq, \(aqand\(aq: \(aq∧\(aq, \(aqang\(aq: \(aq∠\(aq, \(aqaring\(aq: \(aqå\(aq, \(aqasymp\(aq: \(aq≈\(aq, \(aqatilde\(aq: \(aqã\(aq, \(aqauml\(aq: \(aqä\(aq, \(aqbdquo\(aq: \(aq„\(aq, \(aqbeta\(aq: \(aqβ\(aq, \(aqbrvbar\(aq: \(aq¦\(aq, \(aqbull\(aq: \(aq•\(aq, \(aqcap\(aq: \(aq∩\(aq, \(aqccedil\(aq: \(aqç\(aq, \(aqcedil\(aq: \(aq¸\(aq, \(aqcent\(aq: \(aq¢\(aq, \(aqchi\(aq: \(aqχ\(aq, \(aqcirc\(aq: \(aqˆ\(aq, \(aqclubs\(aq: \(aq♣\(aq, \(aqcong\(aq: \(aq≅\(aq, \(aqcopy\(aq: \(aq©\(aq, \(aqcrarr\(aq: \(aq↵\(aq, \(aqcup\(aq: \(aq∪\(aq, \(aqcurren\(aq: \(aq¤\(aq, \(aqdArr\(aq: \(aq⇓\(aq, \(aqdagger\(aq: \(aq\(dg\(aq, \(aqdarr\(aq: \(aq↓\(aq, \(aqdeg\(aq: \(aq°\(aq, \(aqdelta\(aq: \(aqδ\(aq, \(aqdiams\(aq: \(aq♦\(aq, \(aqdivide\(aq: \(aq÷\(aq, \(aqeacute\(aq: \(aqé\(aq, \(aqecirc\(aq: \(aqê\(aq, \(aqegrave\(aq: \(aqè\(aq, \(aqempty\(aq: \(aq∅\(aq, \(aqemsp\(aq: \(aq\eu2003\(aq, \(aqensp\(aq: \(aq\eu2002\(aq, \(aqepsilon\(aq: \(aqε\(aq, \(aqequiv\(aq: \(aq≡\(aq, \(aqeta\(aq: \(aqη\(aq, \(aqeth\(aq: \(aqð\(aq, \(aqeuml\(aq: \(aqë\(aq, \(aqeuro\(aq: \(aq€\(aq, \(aqexist\(aq: \(aq∃\(aq, \(aqfnof\(aq: \(aqƒ\(aq, \(aqforall\(aq: \(aq∀\(aq, \(aqfrac12\(aq: \(aq½\(aq, \(aqfrac14\(aq: \(aq¼\(aq, \(aqfrac34\(aq: \(aq¾\(aq, \(aqfrasl\(aq: \(aq⁄\(aq, \(aqgamma\(aq: \(aqγ\(aq, \(aqge\(aq: \(aq≥\(aq, \(aqgt\(aq: \(aq>\(aq, \(aqhArr\(aq: \(aq⇔\(aq, \(aqharr\(aq: \(aq↔\(aq, \(aqhearts\(aq: \(aq♥\(aq, \(aqhellip\(aq: \(aq…\(aq, \(aqiacute\(aq: \(aqí\(aq, \(aqicirc\(aq: \(aqî\(aq, \(aqiexcl\(aq: \(aq¡\(aq, \(aqigrave\(aq: \(aqì\(aq, \(aqimage\(aq: \(aqℑ\(aq, \(aqinfin\(aq: \(aq∞\(aq, \(aqint\(aq: \(aq∫\(aq, \(aqiota\(aq: \(aqι\(aq, \(aqiquest\(aq: \(aq¿\(aq, \(aqisin\(aq: \(aq∈\(aq, \(aqiuml\(aq: \(aqï\(aq, \(aqkappa\(aq: \(aqκ\(aq, \(aqlArr\(aq: \(aq⇐\(aq, \(aqlambda\(aq: \(aqλ\(aq, \(aqlang\(aq: \(aq〈\(aq, \(aqlaquo\(aq: \(aq«\(aq, \(aqlarr\(aq: \(aq←\(aq, \(aqlceil\(aq: \(aq⌈\(aq, \(aqldquo\(aq: \(aq“\(aq, \(aqle\(aq: \(aq≤\(aq, \(aqlfloor\(aq: \(aq⌊\(aq, \(aqlowast\(aq: \(aq∗\(aq, \(aqloz\(aq: \(aq◊\(aq, \(aqlrm\(aq: \(aq\eu200e\(aq, \(aqlsaquo\(aq: \(aq‹\(aq, \(aqlsquo\(aq: \(aq‘\(aq, \(aqlt\(aq: \(aq<\(aq, \(aqmacr\(aq: \(aq¯\(aq, \(aqmdash\(aq: \(aq—\(aq, \(aqmicro\(aq: \(aqµ\(aq, \(aqmiddot\(aq: \(aq·\(aq, \(aqminus\(aq: \(aq−\(aq, \(aqmu\(aq: \(aqμ\(aq, \(aqnabla\(aq: \(aq∇\(aq, \(aqnbsp\(aq: \(aq\exa0\(aq, \(aqndash\(aq: \(aq–\(aq, \(aqne\(aq: \(aq≠\(aq, \(aqni\(aq: \(aq∋\(aq, \(aqnot\(aq: \(aq¬\(aq, \(aqnotin\(aq: \(aq∉\(aq, \(aqnsub\(aq: \(aq⊄\(aq, \(aqntilde\(aq: \(aqñ\(aq, \(aqnu\(aq: \(aqν\(aq, \(aqoacute\(aq: \(aqó\(aq, \(aqocirc\(aq: \(aqô\(aq, \(aqoelig\(aq: \(aqœ\(aq, \(aqograve\(aq: \(aqò\(aq, \(aqoline\(aq: \(aq‾\(aq, \(aqomega\(aq: \(aqω\(aq, \(aqomicron\(aq: \(aqο\(aq, \(aqoplus\(aq: \(aq⊕\(aq, \(aqor\(aq: \(aq∨\(aq, \(aqordf\(aq: \(aqª\(aq, \(aqordm\(aq: \(aqº\(aq, \(aqoslash\(aq: \(aqø\(aq, \(aqotilde\(aq: \(aqõ\(aq, \(aqotimes\(aq: \(aq⊗\(aq, \(aqouml\(aq: \(aqö\(aq, \(aqpara\(aq: \(aq¶\(aq, \(aqpart\(aq: \(aq∂\(aq, \(aqpermil\(aq: \(aq‰\(aq, \(aqperp\(aq: \(aq⊥\(aq, \(aqphi\(aq: \(aqφ\(aq, \(aqpi\(aq: \(aqπ\(aq, \(aqpiv\(aq: \(aqϖ\(aq, \(aqplusmn\(aq: \(aq±\(aq, \(aqpound\(aq: \(aq£\(aq, \(aqprime\(aq: \(aq′\(aq, \(aqprod\(aq: \(aq∏\(aq, \(aqprop\(aq: \(aq∝\(aq, \(aqpsi\(aq: \(aqψ\(aq, \(aqquot\(aq: \(aq"\(aq, \(aqrArr\(aq: \(aq⇒\(aq, \(aqradic\(aq: \(aq√\(aq, \(aqrang\(aq: \(aq〉\(aq, \(aqraquo\(aq: \(aq»\(aq, \(aqrarr\(aq: \(aq→\(aq, \(aqrceil\(aq: \(aq⌉\(aq, \(aqrdquo\(aq: \(aq”\(aq, \(aqreal\(aq: \(aqℜ\(aq, \(aqreg\(aq: \(aq®\(aq, \(aqrfloor\(aq: \(aq⌋\(aq, \(aqrho\(aq: \(aqρ\(aq, \(aqrlm\(aq: \(aq\eu200f\(aq, \(aqrsaquo\(aq: \(aq›\(aq, \(aqrsquo\(aq: \(aq’\(aq, \(aqsbquo\(aq: \(aq‚\(aq, \(aqscaron\(aq: \(aqš\(aq, \(aqsdot\(aq: \(aq⋅\(aq, \(aqsect\(aq: \(aq§\(aq, \(aqshy\(aq: \(aq\exad\(aq, \(aqsigma\(aq: \(aqσ\(aq, \(aqsigmaf\(aq: \(aqς\(aq, \(aqsim\(aq: \(aq∼\(aq, \(aqspades\(aq: \(aq♠\(aq, \(aqsub\(aq: \(aq⊂\(aq, \(aqsube\(aq: \(aq⊆\(aq, \(aqsum\(aq: \(aq∑\(aq, \(aqsup\(aq: \(aq⊃\(aq, \(aqsup1\(aq: \(aq¹\(aq, \(aqsup2\(aq: \(aq²\(aq, \(aqsup3\(aq: \(aq³\(aq, \(aqsupe\(aq: \(aq⊇\(aq, \(aqszlig\(aq: \(aqß\(aq, \(aqtau\(aq: \(aqτ\(aq, \(aqthere4\(aq: \(aq∴\(aq, \(aqtheta\(aq: \(aqθ\(aq, \(aqthetasym\(aq: \(aqϑ\(aq, \(aqthinsp\(aq: \(aq\eu2009\(aq, \(aqthorn\(aq: \(aqþ\(aq, \(aqtilde\(aq: \(aq˜\(aq, \(aqtimes\(aq: \(aq×\(aq, \(aqtrade\(aq: \(aq™\(aq, \(aquArr\(aq: \(aq⇑\(aq, \(aquacute\(aq: \(aqú\(aq, \(aquarr\(aq: \(aq↑\(aq, \(aqucirc\(aq: \(aqû\(aq, \(aqugrave\(aq: \(aqù\(aq, \(aquml\(aq: \(aq¨\(aq, \(aqupsih\(aq: \(aqϒ\(aq, \(aqupsilon\(aq: \(aqυ\(aq, \(aquuml\(aq: \(aqü\(aq, \(aqweierp\(aq: \(aq℘\(aq, \(aqxi\(aq: \(aqξ\(aq, \(aqyacute\(aq: \(aqý\(aq, \(aqyen\(aq: \(aq¥\(aq, \(aqyuml\(aq: \(aqÿ\(aq, \(aqzeta\(aq: \(aqζ\(aq, \(aqzwj\(aq: \(aq\eu200d\(aq, \(aqzwnj\(aq: \(aq\eu200c\(aq}) +Translate one entity to its ISO Latin value. +Inspired by example from effbot.org +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.relextract.extract_rels(subjclass, objclass, doc, corpus=\(aqace\(aq, pattern=None, window=10) +Filter the output of \fBsemi_rel2reldict\fP according to specified NE classes and a filler pattern. +.sp +The parameters \fBsubjclass\fP and \fBobjclass\fP can be used to restrict the +Named Entities to particular types (any of \(aqLOCATION\(aq, \(aqORGANIZATION\(aq, +\(aqPERSON\(aq, \(aqDURATION\(aq, \(aqDATE\(aq, \(aqCARDINAL\(aq, \(aqPERCENT\(aq, \(aqMONEY\(aq, \(aqMEASURE\(aq). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsubjclass\fP (\fIstr\fP) \-\- the class of the subject Named Entity. +.IP \(bu 2 +\fBobjclass\fP (\fIstr\fP) \-\- the class of the object Named Entity. +.IP \(bu 2 +\fBdoc\fP (\fIieer document\fP\fI or \fP\fIa list of chunk trees\fP) \-\- input document +.IP \(bu 2 +\fBcorpus\fP (\fIstr\fP) \-\- name of the corpus to take as input; possible values are +\(aqieer\(aq and \(aqconll2002\(aq +.IP \(bu 2 +\fBpattern\fP (\fISRE_Pattern\fP) \-\- a regular expression for filtering the fillers of +retrieved triples. +.IP \(bu 2 +\fBwindow\fP (\fIint\fP) \-\- filters out fillers which exceed this threshold +.UNINDENT +.TP +.B Returns +see \fBmk_reldicts\fP +.TP +.B Return type +list(defaultdict) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.relextract.ieer_headlines() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.relextract.in_demo(trace=0, sql=True) +Select pairs of organizations and locations whose mentions occur with an +intervening occurrence of the preposition "in". +.sp +If the sql parameter is set to True, then the entity pairs are loaded into +an in\-memory database, and subsequently pulled out using an SQL "SELECT" +query. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.relextract.list2sym(lst) +Convert a list of strings into a canonical symbol. +:type lst: list +:return: a Unicode string without whitespace +:rtype: unicode +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.relextract.ne_chunked() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.relextract.roles_demo(trace=0) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.relextract.rtuple(reldict, lcon=False, rcon=False) +Pretty print the reldict as an rtuple. +:param reldict: a relation dictionary +:type reldict: defaultdict +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.relextract.semi_rel2reldict(pairs, window=5, trace=False) +Converts the pairs generated by \fBtree2semi_rel\fP into a \(aqreldict\(aq: a dictionary which +stores information about the subject and object NEs plus the filler between them. +Additionally, a left and right context of length =< window are captured (within +a given input sentence). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBpairs\fP \-\- a pair of list(str) and \fBTree\fP, as generated by +.IP \(bu 2 +\fBwindow\fP (\fIint\fP) \-\- a threshold for the number of items to include in the left and right context +.UNINDENT +.TP +.B Returns +\(aqrelation\(aq dictionaries whose keys are \(aqlcon\(aq, \(aqsubjclass\(aq, \(aqsubjtext\(aq, \(aqsubjsym\(aq, \(aqfiller\(aq, objclass\(aq, objtext\(aq, \(aqobjsym\(aq and \(aqrcon\(aq +.TP +.B Return type +list(defaultdict) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.relextract.tree2semi_rel(tree) +Group a chunk structure into a list of \(aqsemi\-relations\(aq of the form (list(str), \fBTree\fP). +.sp +In order to facilitate the construction of (\fBTree\fP, string, \fBTree\fP) triples, this +identifies pairs whose first member is a list (possibly empty) of terminal +strings, and whose second member is a \fBTree\fP of the form (NE_label, terminals). +.INDENT 7.0 +.TP +.B Parameters +\fBtree\fP \-\- a chunk tree +.TP +.B Returns +a list of pairs (list(str), \fBTree\fP) +.TP +.B Return type +list of tuple +.UNINDENT +.UNINDENT +.SS nltk.sem.skolemize module +.INDENT 0.0 +.TP +.B nltk.sem.skolemize.skolemize(expression, univ_scope=None, used_variables=None) +Skolemize the expression and convert to conjunctive normal form (CNF) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.skolemize.to_cnf(first, second) +Convert this split disjunction to conjunctive normal form (CNF) +.UNINDENT +.SS nltk.sem.util module +.sp +Utility functions for batch\-processing sentences: parsing and +extraction of the semantic representation of the root node of the the +syntax tree, followed by evaluation of the semantic representation in +a first\-order model. +.INDENT 0.0 +.TP +.B nltk.sem.util.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.util.demo_legacy_grammar() +Check that interpret_sents() is compatible with legacy grammars that use +a lowercase \(aqsem\(aq feature. +.sp +Define \(aqtest.fcfg\(aq to be the following +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.util.demo_model0() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.util.evaluate_sents(inputs, grammar, model, assignment, trace=0) +Add the truth\-in\-a\-model value to each semantic representation +for each syntactic parse of each input sentences. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBinputs\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- a list of sentences +.IP \(bu 2 +\fBgrammar\fP (\fInltk.grammar.FeatureGrammar\fP) \-\- \fBFeatureGrammar\fP or name of feature\-based grammar +.UNINDENT +.TP +.B Returns +a mapping from sentences to lists of triples (parse\-tree, semantic\-representations, evaluation\-in\-model) +.TP +.B Return type +list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression, bool or dict(str): bool))) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.util.interpret_sents(inputs, grammar, semkey=\(aqSEM\(aq, trace=0) +Add the semantic representation to each syntactic parse tree +of each input sentence. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBinputs\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- a list of sentences +.IP \(bu 2 +\fBgrammar\fP (\fInltk.grammar.FeatureGrammar\fP) \-\- \fBFeatureGrammar\fP or name of feature\-based grammar +.UNINDENT +.TP +.B Returns +a mapping from sentences to lists of pairs (parse\-tree, semantic\-representations) +.TP +.B Return type +list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression))) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.util.parse_sents(inputs, grammar, trace=0) +Convert input sentences into syntactic trees. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBinputs\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- sentences to be parsed +.IP \(bu 2 +\fBgrammar\fP (\fInltk.grammar.FeatureGrammar\fP) \-\- \fBFeatureGrammar\fP or name of feature\-based grammar +.UNINDENT +.TP +.B Return type +list(nltk.tree.Tree) or dict(list(str)): list(Tree) +.TP +.B Returns +a mapping from input sentences to a list of +.nf +\(ga\(ga +.fi +Tree\(ga\(gas +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.util.read_sents(filename, encoding=\(aqutf8\(aq) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sem.util.root_semrep(syntree, semkey=\(aqSEM\(aq) +Find the semantic representation at the root of a tree. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsyntree\fP \-\- a parse \fBTree\fP +.IP \(bu 2 +\fBsemkey\fP \-\- the feature label to use for the root semantics in the tree +.UNINDENT +.TP +.B Returns +the semantic representation at the root of a \fBTree\fP +.TP +.B Return type +sem.Expression +.UNINDENT +.UNINDENT +.SS Module contents +.sp +NLTK Semantic Interpretation Package +.sp +This package contains classes for representing semantic structure in +formulas of first\-order logic and for evaluating such formulas in +set\-theoretic models. +.sp +.nf +.ft C +>>> from nltk.sem import logic +>>> logic._counter._value = 0 +.ft P +.fi +.sp +The package has two main components: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fBlogic\fP provides support for analyzing expressions of First +Order Logic (FOL). +.IP \(bu 2 +\fBevaluate\fP allows users to recursively determine truth in a +model for formulas of FOL. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +A model consists of a domain of discourse and a valuation function, +which assigns values to non\-logical constants. We assume that entities +in the domain are represented as strings such as \fB\(aqb1\(aq\fP, \fB\(aqg1\(aq\fP, +etc. A \fBValuation\fP is initialized with a list of (symbol, value) +pairs, where values are entities, sets of entities or sets of tuples +of entities. +The domain of discourse can be inferred from the valuation, and model +is then created with domain and valuation as parameters. +.sp +.nf +.ft C +>>> from nltk.sem import Valuation, Model +>>> v = [(\(aqadam\(aq, \(aqb1\(aq), (\(aqbetty\(aq, \(aqg1\(aq), (\(aqfido\(aq, \(aqd1\(aq), +\&... (\(aqgirl\(aq, set([\(aqg1\(aq, \(aqg2\(aq])), (\(aqboy\(aq, set([\(aqb1\(aq, \(aqb2\(aq])), +\&... (\(aqdog\(aq, set([\(aqd1\(aq])), +\&... (\(aqlove\(aq, set([(\(aqb1\(aq, \(aqg1\(aq), (\(aqb2\(aq, \(aqg2\(aq), (\(aqg1\(aq, \(aqb1\(aq), (\(aqg2\(aq, \(aqb1\(aq)]))] +>>> val = Valuation(v) +>>> dom = val.domain +>>> m = Model(dom, val) +.ft P +.fi +.SS nltk.sentiment package +.SS Submodules +.SS nltk.sentiment.sentiment_analyzer module +.sp +A SentimentAnalyzer is a tool to implement and facilitate Sentiment Analysis tasks +using NLTK features and classifiers, especially for teaching and demonstrative +purposes. +.INDENT 0.0 +.TP +.B class nltk.sentiment.sentiment_analyzer.SentimentAnalyzer(classifier=None) +Bases: \fBobject\fP +.sp +A Sentiment Analysis tool based on machine learning approaches. +.INDENT 7.0 +.TP +.B add_feat_extractor(function, **kwargs) +Add a new function to extract features from a document. This function will +be used in extract_features(). +Important: in this step our kwargs are only representing additional parameters, +and NOT the document we have to parse. The document will always be the first +parameter in the parameter list, and it will be added in the extract_features() +function. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfunction\fP \-\- the extractor function to add to the list of feature extractors. +.IP \(bu 2 +\fBkwargs\fP \-\- additional parameters required by the \fIfunction\fP function. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B all_words(documents, labeled=None) +Return all words/tokens from the documents (with duplicates). +:param documents: a list of (words, label) tuples. +:param labeled: if \fITrue\fP, assume that each document is represented by a +.INDENT 7.0 +.INDENT 3.5 +(words, label) tuple: (list(str), str). If \fIFalse\fP, each document is +considered as being a simple list of strings: list(str). +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Return type +list(str) +.TP +.B Returns +A list of all words/tokens in \fIdocuments\fP\&. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B apply_features(documents, labeled=None) +Apply all feature extractor functions to the documents. This is a wrapper +around \fInltk.classify.util.apply_features\fP\&. +.INDENT 7.0 +.TP +.B If \fIlabeled=False\fP, return featuresets as: +[feature_func(doc) for doc in documents] +.TP +.B If \fIlabeled=True\fP, return featuresets as: +[(feature_func(tok), label) for (tok, label) in toks] +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +\fBdocuments\fP \-\- a list of documents. \fIIf labeled=True\fP, the method expects +a list of (words, label) tuples. +.TP +.B Return type +LazyMap +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B bigram_collocation_feats(documents, top_n=None, min_freq=3, assoc_measure=>) +Return \fItop_n\fP bigram features (using \fIassoc_measure\fP). +Note that this method is based on bigram collocations measures, and not +on simple bigram frequency. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBdocuments\fP \-\- a list (or iterable) of tokens. +.IP \(bu 2 +\fBtop_n\fP \-\- number of best words/tokens to use, sorted by association +measure. +.IP \(bu 2 +\fBassoc_measure\fP \-\- bigram association measure to use as score function. +.IP \(bu 2 +\fBmin_freq\fP \-\- the minimum number of occurrencies of bigrams to take +into consideration. +.UNINDENT +.TP +.B Returns +\fItop_n\fP ngrams scored by the given association measure. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classify(instance) +Classify a single instance applying the features that have already been +stored in the SentimentAnalyzer. +.INDENT 7.0 +.TP +.B Parameters +\fBinstance\fP \-\- a list (or iterable) of tokens. +.TP +.B Returns +the classification result given by applying the classifier. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B evaluate(test_set, classifier=None, accuracy=True, f_measure=True, precision=True, recall=True, verbose=False) +Evaluate and print classifier performance on the test set. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtest_set\fP \-\- A list of (tokens, label) tuples to use as gold set. +.IP \(bu 2 +\fBclassifier\fP \-\- a classifier instance (previously trained). +.IP \(bu 2 +\fBaccuracy\fP \-\- if \fITrue\fP, evaluate classifier accuracy. +.IP \(bu 2 +\fBf_measure\fP \-\- if \fITrue\fP, evaluate classifier f_measure. +.IP \(bu 2 +\fBprecision\fP \-\- if \fITrue\fP, evaluate classifier precision. +.IP \(bu 2 +\fBrecall\fP \-\- if \fITrue\fP, evaluate classifier recall. +.UNINDENT +.TP +.B Returns +evaluation results. +.TP +.B Return type +dict(str): float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B extract_features(document) +Apply extractor functions (and their parameters) to the present document. +We pass \fIdocument\fP as the first parameter of the extractor functions. +If we want to use the same extractor function multiple times, we have to +add it to the extractors with \fIadd_feat_extractor\fP using multiple sets of +parameters (one for each call of the extractor function). +.INDENT 7.0 +.TP +.B Parameters +\fBdocument\fP \-\- the document that will be passed as argument to the +feature extractor functions. +.TP +.B Returns +A dictionary of populated features extracted from the document. +.TP +.B Return type +dict +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B save_file(content, filename) +Store \fIcontent\fP in \fIfilename\fP\&. Can be used to store a SentimentAnalyzer. +.UNINDENT +.INDENT 7.0 +.TP +.B train(trainer, training_set, save_classifier=None, **kwargs) +Train classifier on the training set, optionally saving the output in the +file specified by \fIsave_classifier\fP\&. +Additional arguments depend on the specific trainer used. For example, +a MaxentClassifier can use \fImax_iter\fP parameter to specify the number +of iterations, while a NaiveBayesClassifier cannot. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrainer\fP \-\- \fItrain\fP method of a classifier. +E.g.: NaiveBayesClassifier.train +.IP \(bu 2 +\fBtraining_set\fP \-\- the training set to be passed as argument to the +classifier \fItrain\fP method. +.IP \(bu 2 +\fBsave_classifier\fP \-\- the filename of the file where the classifier +will be stored (optional). +.IP \(bu 2 +\fBkwargs\fP \-\- additional parameters that will be passed as arguments to +the classifier \fItrain\fP function. +.UNINDENT +.TP +.B Returns +A classifier instance trained on the training set. +.TP +.B Return type + +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B unigram_word_feats(words, top_n=None, min_freq=0) +Return most common top_n word features. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBwords\fP \-\- a list of words/tokens. +.IP \(bu 2 +\fBtop_n\fP \-\- number of best words/tokens to use, sorted by frequency. +.UNINDENT +.TP +.B Return type +list(str) +.TP +.B Returns +A list of \fItop_n\fP words/tokens (with no duplicates) sorted by +frequency. +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.sentiment.util module +.sp +Utility methods for Sentiment Analysis. +.INDENT 0.0 +.TP +.B nltk.sentiment.util.demo_liu_hu_lexicon(sentence, plot=False) +Basic example of sentiment classification using Liu and Hu opinion lexicon. +This function simply counts the number of positive, negative and neutral words +in the sentence and classifies it depending on which polarity is more represented. +Words that do not appear in the lexicon are considered as neutral. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsentence\fP \-\- a sentence whose polarity has to be classified. +.IP \(bu 2 +\fBplot\fP \-\- if True, plot a visual representation of the sentence polarity. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sentiment.util.demo_movie_reviews(trainer, n_instances=None, output=None) +Train classifier on all instances of the Movie Reviews dataset. +The corpus has been preprocessed using the default sentence tokenizer and +WordPunctTokenizer. +Features are composed of: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +most frequent unigrams +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrainer\fP \-\- \fItrain\fP method of a classifier. +.IP \(bu 2 +\fBn_instances\fP \-\- the number of total reviews that have to be used for +training and testing. Reviews will be equally split between positive and +negative. +.IP \(bu 2 +\fBoutput\fP \-\- the output file where results have to be reported. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sentiment.util.demo_sent_subjectivity(text) +Classify a single sentence as subjective or objective using a stored +SentimentAnalyzer. +.INDENT 7.0 +.TP +.B Parameters +\fBtext\fP \-\- a sentence whose subjectivity has to be classified. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sentiment.util.demo_subjectivity(trainer, save_analyzer=False, n_instances=None, output=None) +Train and test a classifier on instances of the Subjective Dataset by Pang and +Lee. The dataset is made of 5000 subjective and 5000 objective sentences. +All tokens (words and punctuation marks) are separated by a whitespace, so +we use the basic WhitespaceTokenizer to parse the data. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrainer\fP \-\- \fItrain\fP method of a classifier. +.IP \(bu 2 +\fBsave_analyzer\fP \-\- if \fITrue\fP, store the SentimentAnalyzer in a pickle file. +.IP \(bu 2 +\fBn_instances\fP \-\- the number of total sentences that have to be used for +training and testing. Sentences will be equally split between positive +and negative. +.IP \(bu 2 +\fBoutput\fP \-\- the output file where results have to be reported. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sentiment.util.demo_tweets(trainer, n_instances=None, output=None) +Train and test Naive Bayes classifier on 10000 tweets, tokenized using +TweetTokenizer. +Features are composed of: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +1000 most frequent unigrams +.IP \(bu 2 +100 top bigrams (using BigramAssocMeasures.pmi) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrainer\fP \-\- \fItrain\fP method of a classifier. +.IP \(bu 2 +\fBn_instances\fP \-\- the number of total tweets that have to be used for +training and testing. Tweets will be equally split between positive and +negative. +.IP \(bu 2 +\fBoutput\fP \-\- the output file where results have to be reported. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sentiment.util.demo_vader_instance(text) +Output polarity scores for a text using Vader approach. +.INDENT 7.0 +.TP +.B Parameters +\fBtext\fP \-\- a text whose polarity has to be evaluated. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sentiment.util.demo_vader_tweets(n_instances=None, output=None) +Classify 10000 positive and negative tweets using Vader approach. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBn_instances\fP \-\- the number of total tweets that have to be classified. +.IP \(bu 2 +\fBoutput\fP \-\- the output file where results have to be reported. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sentiment.util.extract_bigram_feats(document, bigrams) +Populate a dictionary of bigram features, reflecting the presence/absence in +the document of each of the tokens in \fIbigrams\fP\&. This extractor function only +considers contiguous bigrams obtained by \fInltk.bigrams\fP\&. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBdocument\fP \-\- a list of words/tokens. +.IP \(bu 2 +\fBunigrams\fP \-\- a list of bigrams whose presence/absence has to be +checked in \fIdocument\fP\&. +.UNINDENT +.TP +.B Returns +a dictionary of bigram features {bigram : boolean}. +.UNINDENT +.sp +.nf +.ft C +>>> bigrams = [(\(aqglobal\(aq, \(aqwarming\(aq), (\(aqpolice\(aq, \(aqprevented\(aq), (\(aqlove\(aq, \(aqyou\(aq)] +>>> document = \(aqice is melting due to global warming\(aq.split() +>>> sorted(extract_bigram_feats(document, bigrams).items()) +[(\(aqcontains(global \- warming)\(aq, True), (\(aqcontains(love \- you)\(aq, False), +(\(aqcontains(police \- prevented)\(aq, False)] +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sentiment.util.extract_unigram_feats(document, unigrams, handle_negation=False) +Populate a dictionary of unigram features, reflecting the presence/absence in +the document of each of the tokens in \fIunigrams\fP\&. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBdocument\fP \-\- a list of words/tokens. +.IP \(bu 2 +\fBunigrams\fP \-\- a list of words/tokens whose presence/absence has to be +checked in \fIdocument\fP\&. +.IP \(bu 2 +\fBhandle_negation\fP \-\- if \fIhandle_negation == True\fP apply \fImark_negation\fP +method to \fIdocument\fP before checking for unigram presence/absence. +.UNINDENT +.TP +.B Returns +a dictionary of unigram features {unigram : boolean}. +.UNINDENT +.sp +.nf +.ft C +>>> words = [\(aqice\(aq, \(aqpolice\(aq, \(aqriot\(aq] +>>> document = \(aqice is melting due to global warming\(aq.split() +>>> sorted(extract_unigram_feats(document, words).items()) +[(\(aqcontains(ice)\(aq, True), (\(aqcontains(police)\(aq, False), (\(aqcontains(riot)\(aq, False)] +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sentiment.util.json2csv_preprocess(json_file, outfile, fields, encoding=\(aqutf8\(aq, errors=\(aqreplace\(aq, gzip_compress=False, skip_retweets=True, skip_tongue_tweets=True, skip_ambiguous_tweets=True, strip_off_emoticons=True, remove_duplicates=True, limit=None) +Convert json file to csv file, preprocessing each row to obtain a suitable +dataset for tweets Semantic Analysis. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBjson_file\fP \-\- the original json file containing tweets. +.IP \(bu 2 +\fBoutfile\fP \-\- the output csv filename. +.IP \(bu 2 +\fBfields\fP \-\- a list of fields that will be extracted from the json file and +kept in the output csv file. +.IP \(bu 2 +\fBencoding\fP \-\- the encoding of the files. +.IP \(bu 2 +\fBerrors\fP \-\- the error handling strategy for the output writer. +.IP \(bu 2 +\fBgzip_compress\fP \-\- if True, create a compressed GZIP file. +.IP \(bu 2 +\fBskip_retweets\fP \-\- if True, remove retweets. +.IP \(bu 2 +\fBskip_tongue_tweets\fP \-\- if True, remove tweets containing ":P" and ":\-P" +emoticons. +.IP \(bu 2 +\fBskip_ambiguous_tweets\fP \-\- if True, remove tweets containing both happy +and sad emoticons. +.IP \(bu 2 +\fBstrip_off_emoticons\fP \-\- if True, strip off emoticons from all tweets. +.IP \(bu 2 +\fBremove_duplicates\fP \-\- if True, remove tweets appearing more than once. +.IP \(bu 2 +\fBlimit\fP \-\- an integer to set the number of tweets to convert. After the +limit is reached the conversion will stop. It can be useful to create +subsets of the original tweets json data. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sentiment.util.mark_negation(document, double_neg_flip=False, shallow=False) +Append _NEG suffix to words that appear in the scope between a negation +and a punctuation mark. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBdocument\fP \-\- a list of words/tokens, or a tuple (words, label). +.IP \(bu 2 +\fBshallow\fP \-\- if True, the method will modify the original document in place. +.IP \(bu 2 +\fBdouble_neg_flip\fP \-\- if True, double negation is considered affirmation +(we activate/deactivate negation scope every time we find a negation). +.UNINDENT +.TP +.B Returns +if \fIshallow == True\fP the method will modify the original document +and return it. If \fIshallow == False\fP the method will return a modified +document, leaving the original unmodified. +.UNINDENT +.sp +.nf +.ft C +>>> sent = "I didn\(aqt like this movie . It was bad .".split() +>>> mark_negation(sent) +[\(aqI\(aq, "didn\(aqt", \(aqlike_NEG\(aq, \(aqthis_NEG\(aq, \(aqmovie_NEG\(aq, \(aq.\(aq, \(aqIt\(aq, \(aqwas\(aq, \(aqbad\(aq, \(aq.\(aq] +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sentiment.util.output_markdown(filename, **kwargs) +Write the output of an analysis to a file. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sentiment.util.parse_tweets_set(filename, label, word_tokenizer=None, sent_tokenizer=None, skip_header=True) +Parse csv file containing tweets and output data a list of (text, label) tuples. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfilename\fP \-\- the input csv filename. +.IP \(bu 2 +\fBlabel\fP \-\- the label to be appended to each tweet contained in the csv file. +.IP \(bu 2 +\fBword_tokenizer\fP \-\- the tokenizer instance that will be used to tokenize +each sentence into tokens (e.g. WordPunctTokenizer() or BlanklineTokenizer()). +If no word_tokenizer is specified, tweets will not be tokenized. +.IP \(bu 2 +\fBsent_tokenizer\fP \-\- the tokenizer that will be used to split each tweet into +sentences. +.IP \(bu 2 +\fBskip_header\fP \-\- if True, skip the first line of the csv file (which usually +contains headers). +.UNINDENT +.TP +.B Returns +a list of (text, label) tuples. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sentiment.util.split_train_test(all_instances, n=None) +Randomly split \fIn\fP instances of the dataset into train and test sets. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBall_instances\fP \-\- a list of instances (e.g. documents) that will be split. +.IP \(bu 2 +\fBn\fP \-\- the number of instances to consider (in case we want to use only a +subset). +.UNINDENT +.TP +.B Returns +two lists of instances. Train set is 8/10 of the total and test set +is 2/10 of the total. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.sentiment.util.timer(method) +A timer decorator to measure execution performance of methods. +.UNINDENT +.SS nltk.sentiment.vader module +.sp +If you use the VADER sentiment analysis tools, please cite: +.sp +Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule\-based Model for +Sentiment Analysis of Social Media Text. Eighth International Conference on +Weblogs and Social Media (ICWSM\-14). Ann Arbor, MI, June 2014. +.INDENT 0.0 +.TP +.B class nltk.sentiment.vader.SentiText(text, punc_list, regex_remove_punctuation) +Bases: \fBobject\fP +.sp +Identify sentiment\-relevant string\-level properties of input text. +.INDENT 7.0 +.TP +.B allcap_differential(words) +Check whether just some words in the input are ALL CAPS +.INDENT 7.0 +.TP +.B Parameters +\fBwords\fP (\fIlist\fP) \-\- The words to inspect +.TP +.B Returns +\fITrue\fP if some but not all items in \fIwords\fP are ALL CAPS +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sentiment.vader.SentimentIntensityAnalyzer(lexicon_file=\(aqsentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt\(aq) +Bases: \fBobject\fP +.sp +Give a sentiment intensity score to sentences. +.INDENT 7.0 +.TP +.B make_lex_dict() +Convert lexicon file to a dictionary +.UNINDENT +.INDENT 7.0 +.TP +.B polarity_scores(text) +Return a float for sentiment strength based on the input text. +Positive values are positive valence, negative value are negative +valence. +.UNINDENT +.INDENT 7.0 +.TP +.B score_valence(sentiments, text) +.UNINDENT +.INDENT 7.0 +.TP +.B sentiment_valence(valence, sentitext, item, i, sentiments) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.sentiment.vader.VaderConstants +Bases: \fBobject\fP +.sp +A class to keep the Vader lists and constants. +.INDENT 7.0 +.TP +.B BOOSTER_DICT = {\(aqabsolutely\(aq: 0.293, \(aqalmost\(aq: \-0.293, \(aqamazingly\(aq: 0.293, \(aqawfully\(aq: 0.293, \(aqbarely\(aq: \-0.293, \(aqcompletely\(aq: 0.293, \(aqconsiderably\(aq: 0.293, \(aqdecidedly\(aq: 0.293, \(aqdeeply\(aq: 0.293, \(aqeffing\(aq: 0.293, \(aqenormously\(aq: 0.293, \(aqentirely\(aq: 0.293, \(aqespecially\(aq: 0.293, \(aqexceptionally\(aq: 0.293, \(aqextremely\(aq: 0.293, \(aqfabulously\(aq: 0.293, \(aqflippin\(aq: 0.293, \(aqflipping\(aq: 0.293, \(aqfrickin\(aq: 0.293, \(aqfricking\(aq: 0.293, \(aqfriggin\(aq: 0.293, \(aqfrigging\(aq: 0.293, \(aqfucking\(aq: 0.293, \(aqfully\(aq: 0.293, \(aqgreatly\(aq: 0.293, \(aqhardly\(aq: \-0.293, \(aqhella\(aq: 0.293, \(aqhighly\(aq: 0.293, \(aqhugely\(aq: 0.293, \(aqincredibly\(aq: 0.293, \(aqintensely\(aq: 0.293, \(aqjust enough\(aq: \-0.293, \(aqkind of\(aq: \-0.293, \(aqkind\-of\(aq: \-0.293, \(aqkinda\(aq: \-0.293, \(aqkindof\(aq: \-0.293, \(aqless\(aq: \-0.293, \(aqlittle\(aq: \-0.293, \(aqmajorly\(aq: 0.293, \(aqmarginally\(aq: \-0.293, \(aqmore\(aq: 0.293, \(aqmost\(aq: 0.293, \(aqoccasionally\(aq: \-0.293, \(aqparticularly\(aq: 0.293, \(aqpartly\(aq: \-0.293, \(aqpurely\(aq: 0.293, \(aqquite\(aq: 0.293, \(aqreally\(aq: 0.293, \(aqremarkably\(aq: 0.293, \(aqscarcely\(aq: \-0.293, \(aqslightly\(aq: \-0.293, \(aqso\(aq: 0.293, \(aqsomewhat\(aq: \-0.293, \(aqsort of\(aq: \-0.293, \(aqsort\-of\(aq: \-0.293, \(aqsorta\(aq: \-0.293, \(aqsortof\(aq: \-0.293, \(aqsubstantially\(aq: 0.293, \(aqthoroughly\(aq: 0.293, \(aqtotally\(aq: 0.293, \(aqtremendously\(aq: 0.293, \(aquber\(aq: 0.293, \(aqunbelievably\(aq: 0.293, \(aqunusually\(aq: 0.293, \(aqutterly\(aq: 0.293, \(aqvery\(aq: 0.293} +.UNINDENT +.INDENT 7.0 +.TP +.B B_DECR = \-0.293 +.UNINDENT +.INDENT 7.0 +.TP +.B B_INCR = 0.293 +.UNINDENT +.INDENT 7.0 +.TP +.B C_INCR = 0.733 +.UNINDENT +.INDENT 7.0 +.TP +.B NEGATE = {"ain\(aqt", \(aqaint\(aq, "aren\(aqt", \(aqarent\(aq, "can\(aqt", \(aqcannot\(aq, \(aqcant\(aq, "couldn\(aqt", \(aqcouldnt\(aq, "daren\(aqt", \(aqdarent\(aq, \(aqdespite\(aq, "didn\(aqt", \(aqdidnt\(aq, "doesn\(aqt", \(aqdoesnt\(aq, "don\(aqt", \(aqdont\(aq, "hadn\(aqt", \(aqhadnt\(aq, "hasn\(aqt", \(aqhasnt\(aq, "haven\(aqt", \(aqhavent\(aq, "isn\(aqt", \(aqisnt\(aq, "mightn\(aqt", \(aqmightnt\(aq, "mustn\(aqt", \(aqmustnt\(aq, "needn\(aqt", \(aqneednt\(aq, \(aqneither\(aq, \(aqnever\(aq, \(aqnone\(aq, \(aqnope\(aq, \(aqnor\(aq, \(aqnot\(aq, \(aqnothing\(aq, \(aqnowhere\(aq, "oughtn\(aqt", \(aqoughtnt\(aq, \(aqrarely\(aq, \(aqseldom\(aq, "shan\(aqt", \(aqshant\(aq, "shouldn\(aqt", \(aqshouldnt\(aq, \(aquh\-uh\(aq, \(aquhuh\(aq, "wasn\(aqt", \(aqwasnt\(aq, "weren\(aqt", \(aqwerent\(aq, \(aqwithout\(aq, "won\(aqt", \(aqwont\(aq, "wouldn\(aqt", \(aqwouldnt\(aq} +.UNINDENT +.INDENT 7.0 +.TP +.B N_SCALAR = \-0.74 +.UNINDENT +.INDENT 7.0 +.TP +.B PUNC_LIST = [\(aq.\(aq, \(aq!\(aq, \(aq?\(aq, \(aq,\(aq, \(aq;\(aq, \(aq:\(aq, \(aq\-\(aq, "\(aq", \(aq"\(aq, \(aq!!\(aq, \(aq!!!\(aq, \(aq??\(aq, \(aq???\(aq, \(aq?!?\(aq, \(aq!?!\(aq, \(aq?!?!\(aq, \(aq!?!?\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B REGEX_REMOVE_PUNCTUATION = re.compile(\(aq[!"\e\e#\e\e$%\e\e&\e\(aq\e\e(\e\e)\e\e*\e\e+,\e\e\-\e\e./:;<=>\e\e?@\e\e[\e\e\e\e\e\e]\e\e^_\(ga\e\e{\e\e|\e\e}\e\e~]\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B SPECIAL_CASE_IDIOMS = {\(aqbad ass\(aq: 1.5, \(aqcut the mustard\(aq: 2, \(aqhand to mouth\(aq: \-2, \(aqkiss of death\(aq: \-1.5, \(aqthe bomb\(aq: 3, \(aqthe shit\(aq: 3, \(aqyeah right\(aq: \-2} +.UNINDENT +.INDENT 7.0 +.TP +.B negated(input_words, include_nt=True) +Determine if input contains negation words +.UNINDENT +.INDENT 7.0 +.TP +.B normalize(score, alpha=15) +Normalize the score to be between \-1 and 1 using an alpha that +approximates the max expected value +.UNINDENT +.INDENT 7.0 +.TP +.B scalar_inc_dec(word, valence, is_cap_diff) +Check if the preceding words increase, decrease, or negate/nullify the +valence +.UNINDENT +.UNINDENT +.SS Module contents +.sp +NLTK Sentiment Analysis Package +.SS nltk.stem package +.SS Submodules +.SS nltk.stem.api module +.INDENT 0.0 +.TP +.B class nltk.stem.api.StemmerI +Bases: \fBobject\fP +.sp +A processing interface for removing morphological affixes from +words. This process is known as stemming. +.INDENT 7.0 +.TP +.B abstract stem(token) +Strip affixes from the token and return the stem. +.INDENT 7.0 +.TP +.B Parameters +\fBtoken\fP (\fIstr\fP) \-\- The token that should be stemmed. +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.stem.arlstem module +.sp +ARLSTem Arabic Stemmer +The details about the implementation of this algorithm are described in: +K. Abainia, S. Ouamour and H. Sayoud, A Novel Robust Arabic Light Stemmer , +Journal of Experimental & Theoretical Artificial Intelligence (JETAI\(aq17), +Vol. 29, No. 3, 2017, pp. 557\-573. +The ARLSTem is a light Arabic stemmer that is based on removing the affixes +from the word (i.e. prefixes, suffixes and infixes). It was evaluated and +compared to several other stemmers using Paice\(aqs parameters (under\-stemming +index, over\-stemming index and stemming weight), and the results showed that +ARLSTem is promising and producing high performances. This stemmer is not +based on any dictionary and can be used on\-line effectively. +.INDENT 0.0 +.TP +.B class nltk.stem.arlstem.ARLSTem +Bases: \fI\%nltk.stem.api.StemmerI\fP +.sp +ARLSTem stemmer : a light Arabic Stemming algorithm without any dictionary. +Department of Telecommunication & Information Processing. USTHB University, +Algiers, Algeria. +ARLSTem.stem(token) returns the Arabic stem for the input token. +The ARLSTem Stemmer requires that all tokens are encoded using Unicode +encoding. +.INDENT 7.0 +.TP +.B fem2masc(token) +transform the word from the feminine form to the masculine form. +.UNINDENT +.INDENT 7.0 +.TP +.B norm(token) +normalize the word by removing diacritics, replacing hamzated Alif +with Alif replacing AlifMaqsura with Yaa and removing Waaw at the +beginning. +.UNINDENT +.INDENT 7.0 +.TP +.B plur2sing(token) +transform the word from the plural form to the singular form. +.UNINDENT +.INDENT 7.0 +.TP +.B pref(token) +remove prefixes from the words\(aq beginning. +.UNINDENT +.INDENT 7.0 +.TP +.B stem(token) +call this function to get the word\(aqs stem based on ARLSTem . +.UNINDENT +.INDENT 7.0 +.TP +.B suff(token) +remove suffixes from the word\(aqs end. +.UNINDENT +.INDENT 7.0 +.TP +.B verb(token) +stem the verb prefixes and suffixes or both +.UNINDENT +.INDENT 7.0 +.TP +.B verb_t1(token) +stem the present prefixes and suffixes +.UNINDENT +.INDENT 7.0 +.TP +.B verb_t2(token) +stem the future prefixes and suffixes +.UNINDENT +.INDENT 7.0 +.TP +.B verb_t3(token) +stem the present suffixes +.UNINDENT +.INDENT 7.0 +.TP +.B verb_t4(token) +stem the present prefixes +.UNINDENT +.INDENT 7.0 +.TP +.B verb_t5(token) +stem the future prefixes +.UNINDENT +.INDENT 7.0 +.TP +.B verb_t6(token) +stem the order prefixes +.UNINDENT +.UNINDENT +.SS nltk.stem.arlstem2 module +.sp +ARLSTem2 Arabic Light Stemmer +The details about the implementation of this algorithm are described in: +K. Abainia and H. Rebbani, Comparing the Effectiveness of the Improved ARLSTem +Algorithm with Existing Arabic Light Stemmers, International Conference on +Theoretical and Applicative Aspects of Computer Science (ICTAACS\(aq19), Skikda, +Algeria, December 15\-16, 2019. +ARLSTem2 is an Arabic light stemmer based on removing the affixes from +the words (i.e. prefixes, suffixes and infixes). It is an improvement +of the previous Arabic light stemmer (ARLSTem). The new version was compared to +the original algorithm and several existing Arabic light stemmers, where the +results showed that the new version considerably improves the under\-stemming +errors that are common to light stemmers. Both ARLSTem and ARLSTem2 can be run +online and do not use any dictionary. +.INDENT 0.0 +.TP +.B class nltk.stem.arlstem2.ARLSTem2 +Bases: \fI\%nltk.stem.api.StemmerI\fP +.sp +Return a stemmed Arabic word after removing affixes. This an improved +version of the previous algorithm, which reduces under\-stemming errors. +Typically used in Arabic search engine, information retrieval and NLP. +.sp +.nf +.ft C +>>> from nltk.stem import arlstem2 +>>> stemmer = ARLSTem2() +>>> word = stemmer.stem(\(aqيعمل\(aq) +>>> print(word) +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBtoken\fP (\fIunicode\fP) \-\- The input Arabic word (unicode) to be stemmed +.TP +.B Returns +A unicode Arabic word +.UNINDENT +.INDENT 7.0 +.TP +.B adjective(token) +remove the infixes from adjectives +.UNINDENT +.INDENT 7.0 +.TP +.B fem2masc(token) +transform the word from the feminine form to the masculine form. +.UNINDENT +.INDENT 7.0 +.TP +.B norm(token) +normalize the word by removing diacritics, replace hamzated Alif +with Alif bare, replace AlifMaqsura with Yaa and remove Waaw at the +beginning. +.UNINDENT +.INDENT 7.0 +.TP +.B plur2sing(token) +transform the word from the plural form to the singular form. +.UNINDENT +.INDENT 7.0 +.TP +.B pref(token) +remove prefixes from the words\(aq beginning. +.UNINDENT +.INDENT 7.0 +.TP +.B stem(token) +Strip affixes from the token and return the stem. +.INDENT 7.0 +.TP +.B Parameters +\fBtoken\fP (\fIstr\fP) \-\- The token that should be stemmed. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B stem1(token) +call this function to get the first stem +.UNINDENT +.INDENT 7.0 +.TP +.B suff(token) +remove the suffixes from the word\(aqs ending. +.UNINDENT +.INDENT 7.0 +.TP +.B verb(token) +stem the verb prefixes and suffixes or both +.UNINDENT +.INDENT 7.0 +.TP +.B verb_t1(token) +stem the present tense co\-occurred prefixes and suffixes +.UNINDENT +.INDENT 7.0 +.TP +.B verb_t2(token) +stem the future tense co\-occurred prefixes and suffixes +.UNINDENT +.INDENT 7.0 +.TP +.B verb_t3(token) +stem the present tense suffixes +.UNINDENT +.INDENT 7.0 +.TP +.B verb_t4(token) +stem the present tense prefixes +.UNINDENT +.INDENT 7.0 +.TP +.B verb_t5(token) +stem the future tense prefixes +.UNINDENT +.INDENT 7.0 +.TP +.B verb_t6(token) +stem the imperative tense prefixes +.UNINDENT +.UNINDENT +.SS nltk.stem.cistem module +.INDENT 0.0 +.TP +.B class nltk.stem.cistem.Cistem(case_insensitive: bool = False) +Bases: \fI\%nltk.stem.api.StemmerI\fP +.sp +CISTEM Stemmer for German +.sp +This is the official Python implementation of the CISTEM stemmer. +It is based on the paper +Leonie Weissweiler, Alexander Fraser (2017). Developing a Stemmer for German +Based on a Comparative Analysis of Publicly Available Stemmers. +In Proceedings of the German Society for Computational Linguistics and Language +Technology (GSCL) +which can be read here: +\fI\%http://www.cis.lmu.de/~weissweiler/cistem/\fP +.sp +In the paper, we conducted an analysis of publicly available stemmers, +developed two gold standards for German stemming and evaluated the stemmers +based on the two gold standards. We then proposed the stemmer implemented here +and show that it achieves slightly better f\-measure than the other stemmers and +is thrice as fast as the Snowball stemmer for German while being about as fast +as most other stemmers. +.sp +case_insensitive is a a boolean specifying if case\-insensitive stemming +should be used. Case insensitivity improves performance only if words in the +text may be incorrectly upper case. For all\-lowercase and correctly cased +text, best performance is achieved by setting case_insensitive for false. +.INDENT 7.0 +.TP +.B Parameters +\fBcase_insensitive\fP (\fIbool\fP) \-\- if True, the stemming is case insensitive. False by default. +.UNINDENT +.INDENT 7.0 +.TP +.B repl_xx = re.compile(\(aq(.)\e\e1\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B repl_xx_back = re.compile(\(aq(.)\e\e*\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B static replace_back(word: str) -> str +.UNINDENT +.INDENT 7.0 +.TP +.B static replace_to(word: str) -> str +.UNINDENT +.INDENT 7.0 +.TP +.B segment(word: str) -> Tuple[str, str] +This method works very similarly to stem (:func:\(aqcistem.stem\(aq). The difference is that in +addition to returning the stem, it also returns the rest that was removed at +the end. To be able to return the stem unchanged so the stem and the rest +can be concatenated to form the original word, all subsitutions that altered +the stem in any other way than by removing letters at the end were left out. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP) \-\- The word that is to be stemmed. +.TP +.B Returns +A tuple of the stemmed word and the removed suffix. +.TP +.B Return type +Tuple[str, str] +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.stem.cistem import Cistem +>>> stemmer = Cistem() +>>> s1 = "Speicherbehältern" +>>> stemmer.segment(s1) +(\(aqspeicherbehält\(aq, \(aqern\(aq) +>>> s2 = "Grenzpostens" +>>> stemmer.segment(s2) +(\(aqgrenzpost\(aq, \(aqens\(aq) +>>> s3 = "Ausgefeiltere" +>>> stemmer.segment(s3) +(\(aqausgefeilt\(aq, \(aqere\(aq) +>>> stemmer = Cistem(True) +>>> stemmer.segment(s1) +(\(aqspeicherbehäl\(aq, \(aqtern\(aq) +>>> stemmer.segment(s2) +(\(aqgrenzpo\(aq, \(aqstens\(aq) +>>> stemmer.segment(s3) +(\(aqausgefeil\(aq, \(aqtere\(aq) +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word: str) -> str +Stems the input word. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP) \-\- The word that is to be stemmed. +.TP +.B Returns +The stemmed word. +.TP +.B Return type +str +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.stem.cistem import Cistem +>>> stemmer = Cistem() +>>> s1 = "Speicherbehältern" +>>> stemmer.stem(s1) +\(aqspeicherbehalt\(aq +>>> s2 = "Grenzpostens" +>>> stemmer.stem(s2) +\(aqgrenzpost\(aq +>>> s3 = "Ausgefeiltere" +>>> stemmer.stem(s3) +\(aqausgefeilt\(aq +>>> stemmer = Cistem(True) +>>> stemmer.stem(s1) +\(aqspeicherbehal\(aq +>>> stemmer.stem(s2) +\(aqgrenzpo\(aq +>>> stemmer.stem(s3) +\(aqausgefeil\(aq +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B strip_emr = re.compile(\(aqe[mr]$\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B strip_esn = re.compile(\(aq[esn]$\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B strip_ge = re.compile(\(aq^ge(.{4,})\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B strip_nd = re.compile(\(aqnd$\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B strip_t = re.compile(\(aqt$\(aq) +.UNINDENT +.UNINDENT +.SS nltk.stem.isri module +.sp +ISRI Arabic Stemmer +.sp +The algorithm for this stemmer is described in: +.sp +Taghva, K., Elkoury, R., and Coombs, J. 2005. Arabic Stemming without a root dictionary. +Information Science Research Institute. University of Nevada, Las Vegas, USA. +.sp +The Information Science Research Institute’s (ISRI) Arabic stemmer shares many features +with the Khoja stemmer. However, the main difference is that ISRI stemmer does not use root +dictionary. Also, if a root is not found, ISRI stemmer returned normalized form, rather than +returning the original unmodified word. +.sp +Additional adjustments were made to improve the algorithm: +.sp +1\- Adding 60 stop words. +2\- Adding the pattern (تفاعيل) to ISRI pattern set. +3\- The step 2 in the original algorithm was normalizing all hamza. This step is discarded because it +increases the word ambiguities and changes the original root. +.INDENT 0.0 +.TP +.B class nltk.stem.isri.ISRIStemmer +Bases: \fI\%nltk.stem.api.StemmerI\fP +.sp +ISRI Arabic stemmer based on algorithm: Arabic Stemming without a root dictionary. +Information Science Research Institute. University of Nevada, Las Vegas, USA. +.sp +A few minor modifications have been made to ISRI basic algorithm. +See the source code of this module for more information. +.sp +isri.stem(token) returns Arabic root for the given token. +.sp +The ISRI Stemmer requires that all tokens have Unicode string types. +If you use Python IDLE on Arabic Windows you have to decode text first +using Arabic \(aq1256\(aq coding. +.INDENT 7.0 +.TP +.B end_w5(word) +ending step (word of length five) +.UNINDENT +.INDENT 7.0 +.TP +.B end_w6(word) +ending step (word of length six) +.UNINDENT +.INDENT 7.0 +.TP +.B norm(word, num=3) +normalization: +num=1 normalize diacritics +num=2 normalize initial hamza +num=3 both 1&2 +.UNINDENT +.INDENT 7.0 +.TP +.B pre1(word) +normalize short prefix +.UNINDENT +.INDENT 7.0 +.TP +.B pre32(word) +remove length three and length two prefixes in this order +.UNINDENT +.INDENT 7.0 +.TP +.B pro_w4(word) +process length four patterns and extract length three roots +.UNINDENT +.INDENT 7.0 +.TP +.B pro_w53(word) +process length five patterns and extract length three roots +.UNINDENT +.INDENT 7.0 +.TP +.B pro_w54(word) +process length five patterns and extract length four roots +.UNINDENT +.INDENT 7.0 +.TP +.B pro_w6(word) +process length six patterns and extract length three roots +.UNINDENT +.INDENT 7.0 +.TP +.B pro_w64(word) +process length six patterns and extract length four roots +.UNINDENT +.INDENT 7.0 +.TP +.B stem(token) +Stemming a word token using the ISRI stemmer. +.UNINDENT +.INDENT 7.0 +.TP +.B suf1(word) +normalize short sufix +.UNINDENT +.INDENT 7.0 +.TP +.B suf32(word) +remove length three and length two suffixes in this order +.UNINDENT +.INDENT 7.0 +.TP +.B waw(word) +remove connective ‘و’ if it precedes a word beginning with ‘و’ +.UNINDENT +.UNINDENT +.SS nltk.stem.lancaster module +.sp +A word stemmer based on the Lancaster (Paice/Husk) stemming algorithm. +Paice, Chris D. "Another Stemmer." ACM SIGIR Forum 24.3 (1990): 56\-61. +.INDENT 0.0 +.TP +.B class nltk.stem.lancaster.LancasterStemmer(rule_tuple=None, strip_prefix_flag=False) +Bases: \fI\%nltk.stem.api.StemmerI\fP +.sp +Lancaster Stemmer +.sp +.nf +.ft C +>>> from nltk.stem.lancaster import LancasterStemmer +>>> st = LancasterStemmer() +>>> st.stem(\(aqmaximum\(aq) # Remove "\-um" when word is intact +\(aqmaxim\(aq +>>> st.stem(\(aqpresumably\(aq) # Don\(aqt remove "\-um" when word is not intact +\(aqpresum\(aq +>>> st.stem(\(aqmultiply\(aq) # No action taken if word ends with "\-ply" +\(aqmultiply\(aq +>>> st.stem(\(aqprovision\(aq) # Replace "\-sion" with "\-j" to trigger "j" set of rules +\(aqprovid\(aq +>>> st.stem(\(aqowed\(aq) # Word starting with vowel must contain at least 2 letters +\(aqow\(aq +>>> st.stem(\(aqear\(aq) # ditto +\(aqear\(aq +>>> st.stem(\(aqsaying\(aq) # Words starting with consonant must contain at least 3 +\(aqsay\(aq +>>> st.stem(\(aqcrying\(aq) # letters and one of those letters must be a vowel +\(aqcry\(aq +>>> st.stem(\(aqstring\(aq) # ditto +\(aqstring\(aq +>>> st.stem(\(aqmeant\(aq) # ditto +\(aqmeant\(aq +>>> st.stem(\(aqcement\(aq) # ditto +\(aqcem\(aq +>>> st_pre = LancasterStemmer(strip_prefix_flag=True) +>>> st_pre.stem(\(aqkilometer\(aq) # Test Prefix +\(aqmet\(aq +>>> st_custom = LancasterStemmer(rule_tuple=("ssen4>", "s1t.")) +>>> st_custom.stem("ness") # Change s to t +\(aqnest\(aq +.ft P +.fi +.INDENT 7.0 +.TP +.B default_rule_tuple = (\(aqai*2.\(aq, \(aqa*1.\(aq, \(aqbb1.\(aq, \(aqcity3s.\(aq, \(aqci2>\(aq, \(aqcn1t>\(aq, \(aqdd1.\(aq, \(aqdei3y>\(aq, \(aqdeec2ss.\(aq, \(aqdee1.\(aq, \(aqde2>\(aq, \(aqdooh4>\(aq, \(aqe1>\(aq, \(aqfeil1v.\(aq, \(aqfi2>\(aq, \(aqgni3>\(aq, \(aqgai3y.\(aq, \(aqga2>\(aq, \(aqgg1.\(aq, \(aqht*2.\(aq, \(aqhsiug5ct.\(aq, \(aqhsi3>\(aq, \(aqi*1.\(aq, \(aqi1y>\(aq, \(aqji1d.\(aq, \(aqjuf1s.\(aq, \(aqju1d.\(aq, \(aqjo1d.\(aq, \(aqjeh1r.\(aq, \(aqjrev1t.\(aq, \(aqjsim2t.\(aq, \(aqjn1d.\(aq, \(aqj1s.\(aq, \(aqlbaifi6.\(aq, \(aqlbai4y.\(aq, \(aqlba3>\(aq, \(aqlbi3.\(aq, \(aqlib2l>\(aq, \(aqlc1.\(aq, \(aqlufi4y.\(aq, \(aqluf3>\(aq, \(aqlu2.\(aq, \(aqlai3>\(aq, \(aqlau3>\(aq, \(aqla2>\(aq, \(aqll1.\(aq, \(aqmui3.\(aq, \(aqmu*2.\(aq, \(aqmsi3>\(aq, \(aqmm1.\(aq, \(aqnois4j>\(aq, \(aqnoix4ct.\(aq, \(aqnoi3>\(aq, \(aqnai3>\(aq, \(aqna2>\(aq, \(aqnee0.\(aq, \(aqne2>\(aq, \(aqnn1.\(aq, \(aqpihs4>\(aq, \(aqpp1.\(aq, \(aqre2>\(aq, \(aqrae0.\(aq, \(aqra2.\(aq, \(aqro2>\(aq, \(aqru2>\(aq, \(aqrr1.\(aq, \(aqrt1>\(aq, \(aqrei3y>\(aq, \(aqsei3y>\(aq, \(aqsis2.\(aq, \(aqsi2>\(aq, \(aqssen4>\(aq, \(aqss0.\(aq, \(aqsuo3>\(aq, \(aqsu*2.\(aq, \(aqs*1>\(aq, \(aqs0.\(aq, \(aqtacilp4y.\(aq, \(aqta2>\(aq, \(aqtnem4>\(aq, \(aqtne3>\(aq, \(aqtna3>\(aq, \(aqtpir2b.\(aq, \(aqtpro2b.\(aq, \(aqtcud1.\(aq, \(aqtpmus2.\(aq, \(aqtpec2iv.\(aq, \(aqtulo2v.\(aq, \(aqtsis0.\(aq, \(aqtsi3>\(aq, \(aqtt1.\(aq, \(aquqi3.\(aq, \(aqugo1.\(aq, \(aqvis3j>\(aq, \(aqvie0.\(aq, \(aqvi2>\(aq, \(aqylb1>\(aq, \(aqyli3y>\(aq, \(aqylp0.\(aq, \(aqyl2>\(aq, \(aqygo1.\(aq, \(aqyhp1.\(aq, \(aqymo1.\(aq, \(aqypo1.\(aq, \(aqyti3>\(aq, \(aqyte3>\(aq, \(aqytl2.\(aq, \(aqyrtsi5.\(aq, \(aqyra3>\(aq, \(aqyro3>\(aq, \(aqyfi3.\(aq, \(aqycn2t>\(aq, \(aqyca3>\(aq, \(aqzi2>\(aq, \(aqzy1s.\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B parseRules(rule_tuple=None) +Validate the set of rules used in this stemmer. +.sp +If this function is called as an individual method, without using stem +method, rule_tuple argument will be compiled into self.rule_dictionary. +If this function is called within stem, self._rule_tuple will be used. +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem a word using the Lancaster stemmer. +.UNINDENT +.UNINDENT +.SS nltk.stem.porter module +.sp +Porter Stemmer +.sp +This is the Porter stemming algorithm. It follows the algorithm +presented in +.sp +Porter, M. "An algorithm for suffix stripping." Program 14.3 (1980): 130\-137. +.sp +with some optional deviations that can be turned on or off with the +\fImode\fP argument to the constructor. +.sp +Martin Porter, the algorithm\(aqs inventor, maintains a web page about the +algorithm at +.INDENT 0.0 +.INDENT 3.5 +\fI\%http://www.tartarus.org/~martin/PorterStemmer/\fP +.UNINDENT +.UNINDENT +.sp +which includes another Python implementation and other implementations +in many languages. +.INDENT 0.0 +.TP +.B class nltk.stem.porter.PorterStemmer(mode=\(aqNLTK_EXTENSIONS\(aq) +Bases: \fI\%nltk.stem.api.StemmerI\fP +.sp +A word stemmer based on the Porter stemming algorithm. +.INDENT 7.0 +.INDENT 3.5 +Porter, M. "An algorithm for suffix stripping." +Program 14.3 (1980): 130\-137. +.UNINDENT +.UNINDENT +.sp +See \fI\%http://www.tartarus.org/~martin/PorterStemmer/\fP for the homepage +of the algorithm. +.sp +Martin Porter has endorsed several modifications to the Porter +algorithm since writing his original paper, and those extensions are +included in the implementations on his website. Additionally, others +have proposed further improvements to the algorithm, including NLTK +contributors. There are thus three modes that can be selected by +passing the appropriate constant to the class constructor\(aqs \fImode\fP +attribute: +.INDENT 7.0 +.INDENT 3.5 +PorterStemmer.ORIGINAL_ALGORITHM +\- Implementation that is faithful to the original paper. +.INDENT 0.0 +.INDENT 3.5 +Note that Martin Porter has deprecated this version of the +algorithm. Martin distributes implementations of the Porter +Stemmer in many languages, hosted at: +.INDENT 0.0 +.INDENT 3.5 +\fI\%http://www.tartarus.org/~martin/PorterStemmer/\fP +.UNINDENT +.UNINDENT +.sp +and all of these implementations include his extensions. He +strongly recommends against using the original, published +version of the algorithm; only use this mode if you clearly +understand why you are choosing to do so. +.UNINDENT +.UNINDENT +.sp +PorterStemmer.MARTIN_EXTENSIONS +\- Implementation that only uses the modifications to the +.INDENT 0.0 +.INDENT 3.5 +algorithm that are included in the implementations on Martin +Porter\(aqs website. He has declared Porter frozen, so the +behaviour of those implementations should never change. +.UNINDENT +.UNINDENT +.sp +PorterStemmer.NLTK_EXTENSIONS (default) +\- Implementation that includes further improvements devised by +.INDENT 0.0 +.INDENT 3.5 +NLTK contributors or taken from other modified implementations +found on the web. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.sp +For the best stemming, you should use the default NLTK_EXTENSIONS +version. However, if you need to get the same results as either the +original algorithm or one of Martin Porter\(aqs hosted versions for +compatibility with an existing implementation or dataset, you can use +one of the other modes instead. +.INDENT 7.0 +.TP +.B MARTIN_EXTENSIONS = \(aqMARTIN_EXTENSIONS\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B NLTK_EXTENSIONS = \(aqNLTK_EXTENSIONS\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B ORIGINAL_ALGORITHM = \(aqORIGINAL_ALGORITHM\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word, to_lowercase=True) +.INDENT 7.0 +.TP +.B Parameters +\fBto_lowercase\fP \-\- if \fIto_lowercase=True\fP the word always lowercase +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.stem.porter.demo() +A demonstration of the porter stemmer on a sample from +the Penn Treebank corpus. +.UNINDENT +.SS nltk.stem.regexp module +.INDENT 0.0 +.TP +.B class nltk.stem.regexp.RegexpStemmer(regexp, min=0) +Bases: \fI\%nltk.stem.api.StemmerI\fP +.sp +A stemmer that uses regular expressions to identify morphological +affixes. Any substrings that match the regular expressions will +be removed. +.sp +.nf +.ft C +>>> from nltk.stem import RegexpStemmer +>>> st = RegexpStemmer(\(aqing$|s$|e$|able$\(aq, min=4) +>>> st.stem(\(aqcars\(aq) +\(aqcar\(aq +>>> st.stem(\(aqmass\(aq) +\(aqmas\(aq +>>> st.stem(\(aqwas\(aq) +\(aqwas\(aq +>>> st.stem(\(aqbee\(aq) +\(aqbee\(aq +>>> st.stem(\(aqcompute\(aq) +\(aqcomput\(aq +>>> st.stem(\(aqadvisable\(aq) +\(aqadvis\(aq +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBregexp\fP (\fIstr\fP\fI or \fP\fIregexp\fP) \-\- The regular expression that should be used to +identify morphological affixes. +.IP \(bu 2 +\fBmin\fP (\fIint\fP) \-\- The minimum length of string to stem +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Strip affixes from the token and return the stem. +.INDENT 7.0 +.TP +.B Parameters +\fBtoken\fP (\fIstr\fP) \-\- The token that should be stemmed. +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.stem.rslp module +.INDENT 0.0 +.TP +.B class nltk.stem.rslp.RSLPStemmer +Bases: \fI\%nltk.stem.api.StemmerI\fP +.sp +A stemmer for Portuguese. +.sp +.nf +.ft C +>>> from nltk.stem import RSLPStemmer +>>> st = RSLPStemmer() +>>> # opening lines of Erico Verissimo\(aqs "Música ao Longe" +>>> text = \(aq\(aq\(aq +\&... Clarissa risca com giz no quadro\-negro a paisagem que os alunos +\&... devem copiar . Uma casinha de porta e janela , em cima duma +\&... coxilha .\(aq\(aq\(aq +>>> for token in text.split(): +\&... print(st.stem(token)) +clariss risc com giz no quadro\-negr a pais que os alun dev copi . +uma cas de port e janel , em cim dum coxilh . +.ft P +.fi +.INDENT 7.0 +.TP +.B apply_rule(word, rule_index) +.UNINDENT +.INDENT 7.0 +.TP +.B read_rule(filename) +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Strip affixes from the token and return the stem. +.INDENT 7.0 +.TP +.B Parameters +\fBtoken\fP (\fIstr\fP) \-\- The token that should be stemmed. +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.stem.snowball module +.sp +Snowball stemmers +.sp +This module provides a port of the Snowball stemmers +developed by Martin Porter. +.sp +There is also a demo function: \fIsnowball.demo()\fP\&. +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.ArabicStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._StandardStemmer\fP +.sp +\fI\%https://github.com/snowballstem/snowball/blob/master/algorithms/arabic/stem_Unicode.sbl\fP (Original Algorithm) +The Snowball Arabic light Stemmer +Algorithm : Assem Chelli +.INDENT 7.0 +.INDENT 3.5 +Abdelkrim Aries +Lakhdar Benzahia +.UNINDENT +.UNINDENT +.sp +Nltk Version Author : Lakhdar Benzahia +.INDENT 7.0 +.TP +.B is_defined = False +.UNINDENT +.INDENT 7.0 +.TP +.B is_noun = True +.UNINDENT +.INDENT 7.0 +.TP +.B is_verb = True +.UNINDENT +.INDENT 7.0 +.TP +.B prefix_step2a_success = False +.UNINDENT +.INDENT 7.0 +.TP +.B prefix_step3a_noun_success = False +.UNINDENT +.INDENT 7.0 +.TP +.B prefix_step3b_noun_success = False +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +.INDENT 7.0 +.INDENT 3.5 +Stem an Arabic word and return the stemmed form. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP \-\- string +.TP +.B Returns +string +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B suffix_noun_step1a_success = False +.UNINDENT +.INDENT 7.0 +.TP +.B suffix_noun_step2a_success = False +.UNINDENT +.INDENT 7.0 +.TP +.B suffix_noun_step2b_success = False +.UNINDENT +.INDENT 7.0 +.TP +.B suffix_noun_step2c2_success = False +.UNINDENT +.INDENT 7.0 +.TP +.B suffix_verb_step2a_success = False +.UNINDENT +.INDENT 7.0 +.TP +.B suffix_verb_step2b_success = False +.UNINDENT +.INDENT 7.0 +.TP +.B suffixe_noun_step1b_success = False +.UNINDENT +.INDENT 7.0 +.TP +.B suffixes_verb_step1_success = False +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.DanishStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._ScandinavianStemmer\fP +.sp +The Danish Snowball stemmer. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB__vowels\fP \-\- The Danish vowels. +.IP \(bu 2 +\fB__consonants\fP \-\- The Danish consonants. +.IP \(bu 2 +\fB__double_consonants\fP \-\- The Danish double consonants. +.IP \(bu 2 +\fB__s_ending\fP \-\- Letters that may directly appear before a word final \(aqs\(aq. +.IP \(bu 2 +\fB__step1_suffixes\fP \-\- Suffixes to be deleted in step 1 of the algorithm. +.IP \(bu 2 +\fB__step2_suffixes\fP \-\- Suffixes to be deleted in step 2 of the algorithm. +.IP \(bu 2 +\fB__step3_suffixes\fP \-\- Suffixes to be deleted in step 3 of the algorithm. +.UNINDENT +.TP +.B Note +A detailed description of the Danish +stemming algorithm can be found under +\fI\%http://snowball.tartarus.org/algorithms/danish/stemmer.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem a Danish word and return the stemmed form. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The word that is stemmed. +.TP +.B Returns +The stemmed form. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.DutchStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._StandardStemmer\fP +.sp +The Dutch Snowball stemmer. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB__vowels\fP \-\- The Dutch vowels. +.IP \(bu 2 +\fB__step1_suffixes\fP \-\- Suffixes to be deleted in step 1 of the algorithm. +.IP \(bu 2 +\fB__step3b_suffixes\fP \-\- Suffixes to be deleted in step 3b of the algorithm. +.UNINDENT +.TP +.B Note +A detailed description of the Dutch +stemming algorithm can be found under +\fI\%http://snowball.tartarus.org/algorithms/dutch/stemmer.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem a Dutch word and return the stemmed form. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The word that is stemmed. +.TP +.B Returns +The stemmed form. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.EnglishStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._StandardStemmer\fP +.sp +The English Snowball stemmer. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB__vowels\fP \-\- The English vowels. +.IP \(bu 2 +\fB__double_consonants\fP \-\- The English double consonants. +.IP \(bu 2 +\fB__li_ending\fP \-\- Letters that may directly appear before a word final \(aqli\(aq. +.IP \(bu 2 +\fB__step0_suffixes\fP \-\- Suffixes to be deleted in step 0 of the algorithm. +.IP \(bu 2 +\fB__step1a_suffixes\fP \-\- Suffixes to be deleted in step 1a of the algorithm. +.IP \(bu 2 +\fB__step1b_suffixes\fP \-\- Suffixes to be deleted in step 1b of the algorithm. +.IP \(bu 2 +\fB__step2_suffixes\fP \-\- Suffixes to be deleted in step 2 of the algorithm. +.IP \(bu 2 +\fB__step3_suffixes\fP \-\- Suffixes to be deleted in step 3 of the algorithm. +.IP \(bu 2 +\fB__step4_suffixes\fP \-\- Suffixes to be deleted in step 4 of the algorithm. +.IP \(bu 2 +\fB__step5_suffixes\fP \-\- Suffixes to be deleted in step 5 of the algorithm. +.IP \(bu 2 +\fB__special_words\fP \-\- A dictionary containing words +which have to be stemmed specially. +.UNINDENT +.TP +.B Note +A detailed description of the English +stemming algorithm can be found under +\fI\%http://snowball.tartarus.org/algorithms/english/stemmer.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem an English word and return the stemmed form. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The word that is stemmed. +.TP +.B Returns +The stemmed form. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.FinnishStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._StandardStemmer\fP +.sp +The Finnish Snowball stemmer. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB__vowels\fP \-\- The Finnish vowels. +.IP \(bu 2 +\fB__restricted_vowels\fP \-\- A subset of the Finnish vowels. +.IP \(bu 2 +\fB__long_vowels\fP \-\- The Finnish vowels in their long forms. +.IP \(bu 2 +\fB__consonants\fP \-\- The Finnish consonants. +.IP \(bu 2 +\fB__double_consonants\fP \-\- The Finnish double consonants. +.IP \(bu 2 +\fB__step1_suffixes\fP \-\- Suffixes to be deleted in step 1 of the algorithm. +.IP \(bu 2 +\fB__step2_suffixes\fP \-\- Suffixes to be deleted in step 2 of the algorithm. +.IP \(bu 2 +\fB__step3_suffixes\fP \-\- Suffixes to be deleted in step 3 of the algorithm. +.IP \(bu 2 +\fB__step4_suffixes\fP \-\- Suffixes to be deleted in step 4 of the algorithm. +.UNINDENT +.TP +.B Note +A detailed description of the Finnish +stemming algorithm can be found under +\fI\%http://snowball.tartarus.org/algorithms/finnish/stemmer.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem a Finnish word and return the stemmed form. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The word that is stemmed. +.TP +.B Returns +The stemmed form. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.FrenchStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._StandardStemmer\fP +.sp +The French Snowball stemmer. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB__vowels\fP \-\- The French vowels. +.IP \(bu 2 +\fB__step1_suffixes\fP \-\- Suffixes to be deleted in step 1 of the algorithm. +.IP \(bu 2 +\fB__step2a_suffixes\fP \-\- Suffixes to be deleted in step 2a of the algorithm. +.IP \(bu 2 +\fB__step2b_suffixes\fP \-\- Suffixes to be deleted in step 2b of the algorithm. +.IP \(bu 2 +\fB__step4_suffixes\fP \-\- Suffixes to be deleted in step 4 of the algorithm. +.UNINDENT +.TP +.B Note +A detailed description of the French +stemming algorithm can be found under +\fI\%http://snowball.tartarus.org/algorithms/french/stemmer.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem a French word and return the stemmed form. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The word that is stemmed. +.TP +.B Returns +The stemmed form. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.GermanStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._StandardStemmer\fP +.sp +The German Snowball stemmer. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB__vowels\fP \-\- The German vowels. +.IP \(bu 2 +\fB__s_ending\fP \-\- Letters that may directly appear before a word final \(aqs\(aq. +.IP \(bu 2 +\fB__st_ending\fP \-\- Letter that may directly appear before a word final \(aqst\(aq. +.IP \(bu 2 +\fB__step1_suffixes\fP \-\- Suffixes to be deleted in step 1 of the algorithm. +.IP \(bu 2 +\fB__step2_suffixes\fP \-\- Suffixes to be deleted in step 2 of the algorithm. +.IP \(bu 2 +\fB__step3_suffixes\fP \-\- Suffixes to be deleted in step 3 of the algorithm. +.UNINDENT +.TP +.B Note +A detailed description of the German +stemming algorithm can be found under +\fI\%http://snowball.tartarus.org/algorithms/german/stemmer.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem a German word and return the stemmed form. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The word that is stemmed. +.TP +.B Returns +The stemmed form. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.HungarianStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._LanguageSpecificStemmer\fP +.sp +The Hungarian Snowball stemmer. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB__vowels\fP \-\- The Hungarian vowels. +.IP \(bu 2 +\fB__digraphs\fP \-\- The Hungarian digraphs. +.IP \(bu 2 +\fB__double_consonants\fP \-\- The Hungarian double consonants. +.IP \(bu 2 +\fB__step1_suffixes\fP \-\- Suffixes to be deleted in step 1 of the algorithm. +.IP \(bu 2 +\fB__step2_suffixes\fP \-\- Suffixes to be deleted in step 2 of the algorithm. +.IP \(bu 2 +\fB__step3_suffixes\fP \-\- Suffixes to be deleted in step 3 of the algorithm. +.IP \(bu 2 +\fB__step4_suffixes\fP \-\- Suffixes to be deleted in step 4 of the algorithm. +.IP \(bu 2 +\fB__step5_suffixes\fP \-\- Suffixes to be deleted in step 5 of the algorithm. +.IP \(bu 2 +\fB__step6_suffixes\fP \-\- Suffixes to be deleted in step 6 of the algorithm. +.IP \(bu 2 +\fB__step7_suffixes\fP \-\- Suffixes to be deleted in step 7 of the algorithm. +.IP \(bu 2 +\fB__step8_suffixes\fP \-\- Suffixes to be deleted in step 8 of the algorithm. +.IP \(bu 2 +\fB__step9_suffixes\fP \-\- Suffixes to be deleted in step 9 of the algorithm. +.UNINDENT +.TP +.B Note +A detailed description of the Hungarian +stemming algorithm can be found under +\fI\%http://snowball.tartarus.org/algorithms/hungarian/stemmer.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem an Hungarian word and return the stemmed form. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The word that is stemmed. +.TP +.B Returns +The stemmed form. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.ItalianStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._StandardStemmer\fP +.sp +The Italian Snowball stemmer. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB__vowels\fP \-\- The Italian vowels. +.IP \(bu 2 +\fB__step0_suffixes\fP \-\- Suffixes to be deleted in step 0 of the algorithm. +.IP \(bu 2 +\fB__step1_suffixes\fP \-\- Suffixes to be deleted in step 1 of the algorithm. +.IP \(bu 2 +\fB__step2_suffixes\fP \-\- Suffixes to be deleted in step 2 of the algorithm. +.UNINDENT +.TP +.B Note +A detailed description of the Italian +stemming algorithm can be found under +\fI\%http://snowball.tartarus.org/algorithms/italian/stemmer.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem an Italian word and return the stemmed form. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The word that is stemmed. +.TP +.B Returns +The stemmed form. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.NorwegianStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._ScandinavianStemmer\fP +.sp +The Norwegian Snowball stemmer. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB__vowels\fP \-\- The Norwegian vowels. +.IP \(bu 2 +\fB__s_ending\fP \-\- Letters that may directly appear before a word final \(aqs\(aq. +.IP \(bu 2 +\fB__step1_suffixes\fP \-\- Suffixes to be deleted in step 1 of the algorithm. +.IP \(bu 2 +\fB__step2_suffixes\fP \-\- Suffixes to be deleted in step 2 of the algorithm. +.IP \(bu 2 +\fB__step3_suffixes\fP \-\- Suffixes to be deleted in step 3 of the algorithm. +.UNINDENT +.TP +.B Note +A detailed description of the Norwegian +stemming algorithm can be found under +\fI\%http://snowball.tartarus.org/algorithms/norwegian/stemmer.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem a Norwegian word and return the stemmed form. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The word that is stemmed. +.TP +.B Returns +The stemmed form. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.PorterStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._LanguageSpecificStemmer\fP, \fI\%nltk.stem.porter.PorterStemmer\fP +.sp +A word stemmer based on the original Porter stemming algorithm. +.INDENT 7.0 +.INDENT 3.5 +Porter, M. "An algorithm for suffix stripping." +Program 14.3 (1980): 130\-137. +.UNINDENT +.UNINDENT +.sp +A few minor modifications have been made to Porter\(aqs basic +algorithm. See the source code of the module +nltk.stem.porter for more information. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.PortugueseStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._StandardStemmer\fP +.sp +The Portuguese Snowball stemmer. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB__vowels\fP \-\- The Portuguese vowels. +.IP \(bu 2 +\fB__step1_suffixes\fP \-\- Suffixes to be deleted in step 1 of the algorithm. +.IP \(bu 2 +\fB__step2_suffixes\fP \-\- Suffixes to be deleted in step 2 of the algorithm. +.IP \(bu 2 +\fB__step4_suffixes\fP \-\- Suffixes to be deleted in step 4 of the algorithm. +.UNINDENT +.TP +.B Note +A detailed description of the Portuguese +stemming algorithm can be found under +\fI\%http://snowball.tartarus.org/algorithms/portuguese/stemmer.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem a Portuguese word and return the stemmed form. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The word that is stemmed. +.TP +.B Returns +The stemmed form. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.RomanianStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._StandardStemmer\fP +.sp +The Romanian Snowball stemmer. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB__vowels\fP \-\- The Romanian vowels. +.IP \(bu 2 +\fB__step0_suffixes\fP \-\- Suffixes to be deleted in step 0 of the algorithm. +.IP \(bu 2 +\fB__step1_suffixes\fP \-\- Suffixes to be deleted in step 1 of the algorithm. +.IP \(bu 2 +\fB__step2_suffixes\fP \-\- Suffixes to be deleted in step 2 of the algorithm. +.IP \(bu 2 +\fB__step3_suffixes\fP \-\- Suffixes to be deleted in step 3 of the algorithm. +.UNINDENT +.TP +.B Note +A detailed description of the Romanian +stemming algorithm can be found under +\fI\%http://snowball.tartarus.org/algorithms/romanian/stemmer.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem a Romanian word and return the stemmed form. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The word that is stemmed. +.TP +.B Returns +The stemmed form. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.RussianStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._LanguageSpecificStemmer\fP +.sp +The Russian Snowball stemmer. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB__perfective_gerund_suffixes\fP \-\- Suffixes to be deleted. +.IP \(bu 2 +\fB__adjectival_suffixes\fP \-\- Suffixes to be deleted. +.IP \(bu 2 +\fB__reflexive_suffixes\fP \-\- Suffixes to be deleted. +.IP \(bu 2 +\fB__verb_suffixes\fP \-\- Suffixes to be deleted. +.IP \(bu 2 +\fB__noun_suffixes\fP \-\- Suffixes to be deleted. +.IP \(bu 2 +\fB__superlative_suffixes\fP \-\- Suffixes to be deleted. +.IP \(bu 2 +\fB__derivational_suffixes\fP \-\- Suffixes to be deleted. +.UNINDENT +.TP +.B Note +A detailed description of the Russian +stemming algorithm can be found under +\fI\%http://snowball.tartarus.org/algorithms/russian/stemmer.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem a Russian word and return the stemmed form. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The word that is stemmed. +.TP +.B Returns +The stemmed form. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.SnowballStemmer(language, ignore_stopwords=False) +Bases: \fI\%nltk.stem.api.StemmerI\fP +.sp +Snowball Stemmer +.sp +The following languages are supported: +Arabic, Danish, Dutch, English, Finnish, French, German, +Hungarian, Italian, Norwegian, Portuguese, Romanian, Russian, +Spanish and Swedish. +.sp +The algorithm for English is documented here: +.INDENT 7.0 +.INDENT 3.5 +Porter, M. "An algorithm for suffix stripping." +Program 14.3 (1980): 130\-137. +.UNINDENT +.UNINDENT +.sp +The algorithms have been developed by Martin Porter. +These stemmers are called Snowball, because Porter created +a programming language with this name for creating +new stemming algorithms. There is more information available +at \fI\%http://snowball.tartarus.org/\fP +.sp +The stemmer is invoked as shown below: +.sp +.nf +.ft C +>>> from nltk.stem import SnowballStemmer +>>> print(" ".join(SnowballStemmer.languages)) # See which languages are supported +arabic danish dutch english finnish french german hungarian +italian norwegian porter portuguese romanian russian +spanish swedish +>>> stemmer = SnowballStemmer("german") # Choose a language +>>> stemmer.stem("Autobahnen") # Stem a word +\(aqautobahn\(aq +.ft P +.fi +.sp +Invoking the stemmers that way is useful if you do not know the +language to be stemmed at runtime. Alternatively, if you already know +the language, then you can invoke the language specific stemmer directly: +.sp +.nf +.ft C +>>> from nltk.stem.snowball import GermanStemmer +>>> stemmer = GermanStemmer() +>>> stemmer.stem("Autobahnen") +\(aqautobahn\(aq +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBlanguage\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The language whose subclass is instantiated. +.IP \(bu 2 +\fBignore_stopwords\fP (\fIbool\fP) \-\- If set to True, stopwords are +not stemmed and returned unchanged. +Set to False by default. +.UNINDENT +.TP +.B Raises +\fBValueError\fP \-\- If there is no stemmer for the specified +language, a ValueError is raised. +.UNINDENT +.INDENT 7.0 +.TP +.B languages = (\(aqarabic\(aq, \(aqdanish\(aq, \(aqdutch\(aq, \(aqenglish\(aq, \(aqfinnish\(aq, \(aqfrench\(aq, \(aqgerman\(aq, \(aqhungarian\(aq, \(aqitalian\(aq, \(aqnorwegian\(aq, \(aqporter\(aq, \(aqportuguese\(aq, \(aqromanian\(aq, \(aqrussian\(aq, \(aqspanish\(aq, \(aqswedish\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B stem(token) +Strip affixes from the token and return the stem. +.INDENT 7.0 +.TP +.B Parameters +\fBtoken\fP (\fIstr\fP) \-\- The token that should be stemmed. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.SpanishStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._StandardStemmer\fP +.sp +The Spanish Snowball stemmer. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB__vowels\fP \-\- The Spanish vowels. +.IP \(bu 2 +\fB__step0_suffixes\fP \-\- Suffixes to be deleted in step 0 of the algorithm. +.IP \(bu 2 +\fB__step1_suffixes\fP \-\- Suffixes to be deleted in step 1 of the algorithm. +.IP \(bu 2 +\fB__step2a_suffixes\fP \-\- Suffixes to be deleted in step 2a of the algorithm. +.IP \(bu 2 +\fB__step2b_suffixes\fP \-\- Suffixes to be deleted in step 2b of the algorithm. +.IP \(bu 2 +\fB__step3_suffixes\fP \-\- Suffixes to be deleted in step 3 of the algorithm. +.UNINDENT +.TP +.B Note +A detailed description of the Spanish +stemming algorithm can be found under +\fI\%http://snowball.tartarus.org/algorithms/spanish/stemmer.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem a Spanish word and return the stemmed form. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The word that is stemmed. +.TP +.B Returns +The stemmed form. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.stem.snowball.SwedishStemmer(ignore_stopwords=False) +Bases: \fBnltk.stem.snowball._ScandinavianStemmer\fP +.sp +The Swedish Snowball stemmer. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB__vowels\fP \-\- The Swedish vowels. +.IP \(bu 2 +\fB__s_ending\fP \-\- Letters that may directly appear before a word final \(aqs\(aq. +.IP \(bu 2 +\fB__step1_suffixes\fP \-\- Suffixes to be deleted in step 1 of the algorithm. +.IP \(bu 2 +\fB__step2_suffixes\fP \-\- Suffixes to be deleted in step 2 of the algorithm. +.IP \(bu 2 +\fB__step3_suffixes\fP \-\- Suffixes to be deleted in step 3 of the algorithm. +.UNINDENT +.TP +.B Note +A detailed description of the Swedish +stemming algorithm can be found under +\fI\%http://snowball.tartarus.org/algorithms/swedish/stemmer.html\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stem(word) +Stem a Swedish word and return the stemmed form. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The word that is stemmed. +.TP +.B Returns +The stemmed form. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.stem.snowball.demo() +This function provides a demonstration of the Snowball stemmers. +.sp +After invoking this function and specifying a language, +it stems an excerpt of the Universal Declaration of Human Rights +(which is a part of the NLTK corpus collection) and then prints +out the original and the stemmed text. +.UNINDENT +.SS nltk.stem.util module +.INDENT 0.0 +.TP +.B nltk.stem.util.prefix_replace(original, old, new) +.INDENT 7.0 +.INDENT 3.5 +Replaces the old prefix of the original string by a new suffix +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBoriginal\fP \-\- string +.IP \(bu 2 +\fBold\fP \-\- string +.IP \(bu 2 +\fBnew\fP \-\- string +.UNINDENT +.TP +.B Returns +string +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.stem.util.suffix_replace(original, old, new) +Replaces the old suffix of the original string by a new suffix +.UNINDENT +.SS nltk.stem.wordnet module +.INDENT 0.0 +.TP +.B class nltk.stem.wordnet.WordNetLemmatizer +Bases: \fBobject\fP +.sp +WordNet Lemmatizer +.sp +Lemmatize using WordNet\(aqs built\-in morphy function. +Returns the input word unchanged if it cannot be found in WordNet. +.sp +.nf +.ft C +>>> from nltk.stem import WordNetLemmatizer +>>> wnl = WordNetLemmatizer() +>>> print(wnl.lemmatize(\(aqdogs\(aq)) +dog +>>> print(wnl.lemmatize(\(aqchurches\(aq)) +church +>>> print(wnl.lemmatize(\(aqaardwolves\(aq)) +aardwolf +>>> print(wnl.lemmatize(\(aqabaci\(aq)) +abacus +>>> print(wnl.lemmatize(\(aqhardrock\(aq)) +hardrock +.ft P +.fi +.INDENT 7.0 +.TP +.B lemmatize(word: str, pos: str = \(aqn\(aq) -> str +Lemmatize \fIword\fP using WordNet\(aqs built\-in morphy function. +Returns the input word unchanged if it cannot be found in WordNet. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBword\fP (\fIstr\fP) \-\- The input word to lemmatize. +.IP \(bu 2 +\fBpos\fP \-\- The Part Of Speech tag. Valid options are \fI"n"\fP for nouns, +\fI"v"\fP for verbs, \fI"a"\fP for adjectives, \fI"r"\fP for adverbs and \fI"s"\fP +for satellite adjectives. +.IP \(bu 2 +\fBpos\fP \-\- str +.UNINDENT +.TP +.B Returns +The lemma of \fIword\fP, for the given \fIpos\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.SS Module contents +.sp +NLTK Stemmers +.sp +Interfaces used to remove morphological affixes from words, leaving +only the word stem. Stemming algorithms aim to remove those affixes +required for eg. grammatical role, tense, derivational morphology +leaving only the stem of the word. This is a difficult problem due to +irregular words (eg. common verbs in English), complicated +morphological rules, and part\-of\-speech and sense ambiguities +(eg. \fBceil\-\fP is not the stem of \fBceiling\fP). +.sp +StemmerI defines a standard interface for stemmers. +.SS nltk.tag package +.SS Submodules +.SS nltk.tag.api module +.sp +Interface for tagging each token in a sentence with supplementary +information, such as its part of speech. +.INDENT 0.0 +.TP +.B class nltk.tag.api.FeaturesetTaggerI +Bases: \fI\%nltk.tag.api.TaggerI\fP +.sp +A tagger that requires tokens to be \fBfeaturesets\fP\&. A featureset +is a dictionary that maps from feature names to feature +values. See \fBnltk.classify\fP for more information about features +and featuresets. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.api.TaggerI +Bases: \fBobject\fP +.sp +A processing interface for assigning a tag to each token in a list. +Tags are case sensitive strings that identify some property of each +token, such as its part of speech or its sense. +.sp +Some taggers require specific types for their tokens. This is +generally indicated by the use of a sub\-interface to \fBTaggerI\fP\&. +For example, featureset taggers, which are subclassed from +\fBFeaturesetTagger\fP, require that each token be a \fBfeatureset\fP\&. +.INDENT 7.0 +.TP +.B Subclasses must define: +.INDENT 7.0 +.IP \(bu 2 +either \fBtag()\fP or \fBtag_sents()\fP (or both) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B evaluate(gold) +Score the accuracy of the tagger against the gold standard. +Strip the tags from the gold standard text, retag it using +the tagger, then compute the accuracy score. +.INDENT 7.0 +.TP +.B Parameters +\fBgold\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIstr\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP\fI)\fP) \-\- The list of tagged sentences to score the tagger on. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract tag(tokens) +Determine the most appropriate tag sequence for the given +token sequence, and return a corresponding list of tagged +tokens. A tagged token is encoded as a tuple \fB(token, tag)\fP\&. +.INDENT 7.0 +.TP +.B Return type +list(tuple(str, str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tag_sents(sentences) +Apply \fBself.tag()\fP to each element of \fIsentences\fP\&. I.e.: +.INDENT 7.0 +.INDENT 3.5 +return [self.tag(sent) for sent in sentences] +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.tag.brill module +.INDENT 0.0 +.TP +.B class nltk.tag.brill.BrillTagger(initial_tagger, rules, training_stats=None) +Bases: \fI\%nltk.tag.api.TaggerI\fP +.sp +Brill\(aqs transformational rule\-based tagger. Brill taggers use an +initial tagger (such as \fBtag.DefaultTagger\fP) to assign an initial +tag sequence to a text; and then apply an ordered list of +transformational rules to correct the tags of individual tokens. +These transformation rules are specified by the \fBTagRule\fP +interface. +.sp +Brill taggers can be created directly, from an initial tagger and +a list of transformational rules; but more often, Brill taggers +are created by learning rules from a training corpus, using one +of the TaggerTrainers available. +.INDENT 7.0 +.TP +.B batch_tag_incremental(sequences, gold) +Tags by applying each rule to the entire corpus (rather than all rules to a +single sequence). The point is to collect statistics on the test set for +individual rules. +.sp +NOTE: This is inefficient (does not build any index, so will traverse the entire +corpus N times for N rules) \-\- usually you would not care about statistics for +individual rules and thus use batch_tag() instead +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsequences\fP (\fIlist of list of strings\fP) \-\- lists of token sequences (sentences, in some applications) to be tagged +.IP \(bu 2 +\fBgold\fP (\fIlist of list of strings\fP) \-\- the gold standard +.UNINDENT +.TP +.B Returns +tuple of (tagged_sequences, ordered list of rule scores (one for each rule)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod decode_json_obj(obj) +.UNINDENT +.INDENT 7.0 +.TP +.B encode_json_obj() +.UNINDENT +.INDENT 7.0 +.TP +.B json_tag = \(aqnltk.tag.BrillTagger\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B print_template_statistics(test_stats=None, printunused=True) +Print a list of all templates, ranked according to efficiency. +.sp +If test_stats is available, the templates are ranked according to their +relative contribution (summed for all rules created from a given template, +weighted by score) to the performance on the test set. If no test_stats, then +statistics collected during training are used instead. There is also +an unweighted measure (just counting the rules). This is less informative, +though, as many low\-score rules will appear towards end of training. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtest_stats\fP (\fIdict of str \-> any\fP\fI (\fP\fIbut usually numbers\fP\fI)\fP) \-\- dictionary of statistics collected during testing +.IP \(bu 2 +\fBprintunused\fP (\fIbool\fP) \-\- if True, print a list of all unused templates +.UNINDENT +.TP +.B Returns +None +.TP +.B Return type +None +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B rules() +Return the ordered list of transformation rules that this tagger has learnt +.INDENT 7.0 +.TP +.B Returns +the ordered list of transformation rules that correct the initial tagging +.TP +.B Return type +list of Rules +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tag(tokens) +Determine the most appropriate tag sequence for the given +token sequence, and return a corresponding list of tagged +tokens. A tagged token is encoded as a tuple \fB(token, tag)\fP\&. +.INDENT 7.0 +.TP +.B Return type +list(tuple(str, str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B train_stats(statistic=None) +Return a named statistic collected during training, or a dictionary of all +available statistics if no name given +.INDENT 7.0 +.TP +.B Parameters +\fBstatistic\fP (\fIstr\fP) \-\- name of statistic +.TP +.B Returns +some statistic collected during training of this tagger +.TP +.B Return type +any (but usually a number) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.brill.Pos(positions, end=None) +Bases: \fBnltk.tbl.feature.Feature\fP +.sp +Feature which examines the tags of nearby tokens. +.INDENT 7.0 +.TP +.B static extract_property(tokens, index) +@return: The given token\(aqs tag. +.UNINDENT +.INDENT 7.0 +.TP +.B json_tag = \(aqnltk.tag.brill.Pos\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.brill.Word(positions, end=None) +Bases: \fBnltk.tbl.feature.Feature\fP +.sp +Feature which examines the text (word) of nearby tokens. +.INDENT 7.0 +.TP +.B static extract_property(tokens, index) +@return: The given token\(aqs text. +.UNINDENT +.INDENT 7.0 +.TP +.B json_tag = \(aqnltk.tag.brill.Word\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.brill.brill24() +Return 24 templates of the seminal TBL paper, Brill (1995) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.brill.describe_template_sets() +Print the available template sets in this demo, with a short description" +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.brill.fntbl37() +Return 37 templates taken from the postagging task of the +fntbl distribution \fI\%http://www.cs.jhu.edu/~rflorian/fntbl/\fP +(37 is after excluding a handful which do not condition on Pos[0]; +fntbl can do that but the current nltk implementation cannot.) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.brill.nltkdemo18() +Return 18 templates, from the original nltk demo, in multi\-feature syntax +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.brill.nltkdemo18plus() +Return 18 templates, from the original nltk demo, and additionally a few +multi\-feature ones (the motivation is easy comparison with nltkdemo18) +.UNINDENT +.SS nltk.tag.brill_trainer module +.INDENT 0.0 +.TP +.B class nltk.tag.brill_trainer.BrillTaggerTrainer(initial_tagger, templates, trace=0, deterministic=None, ruleformat=\(aqstr\(aq) +Bases: \fBobject\fP +.sp +A trainer for tbl taggers. +.INDENT 7.0 +.TP +.B train(train_sents, max_rules=200, min_score=2, min_acc=None) +Trains the Brill tagger on the corpus \fItrain_sents\fP, +producing at most \fImax_rules\fP transformations, each of which +reduces the net number of errors in the corpus by at least +\fImin_score\fP, and each of which has accuracy not lower than +\fImin_acc\fP\&. +.sp +#imports +>>> from nltk.tbl.template import Template +>>> from nltk.tag.brill import Pos, Word +>>> from nltk.tag import untag, RegexpTagger, BrillTaggerTrainer +.sp +#some data +>>> from nltk.corpus import treebank +>>> training_data = treebank.tagged_sents()[:100] +>>> baseline_data = treebank.tagged_sents()[100:200] +>>> gold_data = treebank.tagged_sents()[200:300] +>>> testing_data = [untag(s) for s in gold_data] +.sp +.nf +.ft C +>>> backoff = RegexpTagger([ +\&... (r\(aq^\-?[0\-9]+(.[0\-9]+)?$\(aq, \(aqCD\(aq), # cardinal numbers +\&... (r\(aq(The|the|A|a|An|an)$\(aq, \(aqAT\(aq), # articles +\&... (r\(aq.*able$\(aq, \(aqJJ\(aq), # adjectives +\&... (r\(aq.*ness$\(aq, \(aqNN\(aq), # nouns formed from adjectives +\&... (r\(aq.*ly$\(aq, \(aqRB\(aq), # adverbs +\&... (r\(aq.*s$\(aq, \(aqNNS\(aq), # plural nouns +\&... (r\(aq.*ing$\(aq, \(aqVBG\(aq), # gerunds +\&... (r\(aq.*ed$\(aq, \(aqVBD\(aq), # past tense verbs +\&... (r\(aq.*\(aq, \(aqNN\(aq) # nouns (default) +\&... ]) +.ft P +.fi +.sp +.nf +.ft C +>>> baseline = backoff #see NOTE1 +.ft P +.fi +.sp +.nf +.ft C +>>> baseline.evaluate(gold_data) +0.2450142... +.ft P +.fi +.sp +#templates +>>> Template._cleartemplates() #clear any templates created in earlier tests +>>> templates = [Template(Pos([\-1])), Template(Pos([\-1]), Word([0]))] +.sp +#construct a BrillTaggerTrainer +>>> tt = BrillTaggerTrainer(baseline, templates, trace=3) +.sp +.nf +.ft C +>>> tagger1 = tt.train(training_data, max_rules=10) +TBL train (fast) (seqs: 100; tokens: 2417; tpls: 2; min score: 2; min acc: None) +Finding initial useful rules... + Found 845 useful rules. + + B | + S F r O | Score = Fixed \- Broken + c i o t | R Fixed = num tags changed incorrect \-> correct + o x k h | u Broken = num tags changed correct \-> incorrect + r e e e | l Other = num tags changed incorrect \-> incorrect + e d n r | e +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-+\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- + 132 132 0 0 | AT\->DT if Pos:NN@[\-1] + 85 85 0 0 | NN\->, if Pos:NN@[\-1] & Word:,@[0] + 69 69 0 0 | NN\->. if Pos:NN@[\-1] & Word:.@[0] + 51 51 0 0 | NN\->IN if Pos:NN@[\-1] & Word:of@[0] + 47 63 16 161 | NN\->IN if Pos:NNS@[\-1] + 33 33 0 0 | NN\->TO if Pos:NN@[\-1] & Word:to@[0] + 26 26 0 0 | IN\->. if Pos:NNS@[\-1] & Word:.@[0] + 24 24 0 0 | IN\->, if Pos:NNS@[\-1] & Word:,@[0] + 22 27 5 24 | NN\->\-NONE\- if Pos:VBD@[\-1] + 17 17 0 0 | NN\->CC if Pos:NN@[\-1] & Word:and@[0] +.ft P +.fi +.sp +.nf +.ft C +>>> tagger1.rules()[1:3] +(Rule(\(aq001\(aq, \(aqNN\(aq, \(aq,\(aq, [(Pos([\-1]),\(aqNN\(aq), (Word([0]),\(aq,\(aq)]), Rule(\(aq001\(aq, \(aqNN\(aq, \(aq.\(aq, [(Pos([\-1]),\(aqNN\(aq), (Word([0]),\(aq.\(aq)])) +.ft P +.fi +.sp +.nf +.ft C +>>> train_stats = tagger1.train_stats() +>>> [train_stats[stat] for stat in [\(aqinitialerrors\(aq, \(aqfinalerrors\(aq, \(aqrulescores\(aq]] +[1775, 1269, [132, 85, 69, 51, 47, 33, 26, 24, 22, 17]] +.ft P +.fi +.sp +.nf +.ft C +>>> tagger1.print_template_statistics(printunused=False) +TEMPLATE STATISTICS (TRAIN) 2 templates, 10 rules) +TRAIN ( 2417 tokens) initial 1775 0.2656 final: 1269 0.4750 +#ID | Score (train) | #Rules | Template +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- +001 | 305 0.603 | 7 0.700 | Template(Pos([\-1]),Word([0])) +000 | 201 0.397 | 3 0.300 | Template(Pos([\-1])) + + +.ft P +.fi +.sp +.nf +.ft C +>>> tagger1.evaluate(gold_data) +0.43996... +.ft P +.fi +.sp +.nf +.ft C +>>> tagged, test_stats = tagger1.batch_tag_incremental(testing_data, gold_data) +.ft P +.fi +.sp +.nf +.ft C +>>> tagged[33][12:] == [(\(aqforeign\(aq, \(aqIN\(aq), (\(aqdebt\(aq, \(aqNN\(aq), (\(aqof\(aq, \(aqIN\(aq), (\(aq$\(aq, \(aqNN\(aq), (\(aq64\(aq, \(aqCD\(aq), +\&... (\(aqbillion\(aq, \(aqNN\(aq), (\(aq*U*\(aq, \(aqNN\(aq), (\(aq\-\-\(aq, \(aqNN\(aq), (\(aqthe\(aq, \(aqDT\(aq), (\(aqthird\-highest\(aq, \(aqNN\(aq), (\(aqin\(aq, \(aqNN\(aq), +\&... (\(aqthe\(aq, \(aqDT\(aq), (\(aqdeveloping\(aq, \(aqVBG\(aq), (\(aqworld\(aq, \(aqNN\(aq), (\(aq.\(aq, \(aq.\(aq)] +True +.ft P +.fi +.sp +.nf +.ft C +>>> [test_stats[stat] for stat in [\(aqinitialerrors\(aq, \(aqfinalerrors\(aq, \(aqrulescores\(aq]] +[1855, 1376, [100, 85, 67, 58, 27, 36, 27, 16, 31, 32]] +.ft P +.fi +.sp +# a high\-accuracy tagger +>>> tagger2 = tt.train(training_data, max_rules=10, min_acc=0.99) +TBL train (fast) (seqs: 100; tokens: 2417; tpls: 2; min score: 2; min acc: 0.99) +Finding initial useful rules... +.INDENT 7.0 +.INDENT 3.5 +Found 845 useful rules. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B +.INDENT 7.0 +.INDENT 3.5 +B | +.UNINDENT +.UNINDENT +.sp +S F r O | Score = Fixed \- Broken +c i o t | R Fixed = num tags changed incorrect \-> correct +o x k h | u Broken = num tags changed correct \-> incorrect +r e e e | l Other = num tags changed incorrect \-> incorrect +e d n r | e +.TP +.B \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-+\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- +.INDENT 7.0 +.TP +.B 132 132 0 0 | AT\->DT if Pos:NN@[\-1] +85 85 0 0 | NN\->, if Pos:NN@[\-1] & Word:,@[0] +69 69 0 0 | NN\->. if Pos:NN@[\-1] & Word:.@[0] +51 51 0 0 | NN\->IN if Pos:NN@[\-1] & Word:of@[0] +36 36 0 0 | NN\->TO if Pos:NN@[\-1] & Word:to@[0] +26 26 0 0 | NN\->. if Pos:NNS@[\-1] & Word:.@[0] +24 24 0 0 | NN\->, if Pos:NNS@[\-1] & Word:,@[0] +19 19 0 6 | NN\->VB if Pos:TO@[\-1] +18 18 0 0 | CD\->\-NONE\- if Pos:NN@[\-1] & Word:0@[0] +18 18 0 0 | NN\->CC if Pos:NN@[\-1] & Word:and@[0] +.UNINDENT +.UNINDENT +.sp +.nf +.ft C +>>> tagger2.evaluate(gold_data) +0.44159544... +>>> tagger2.rules()[2:4] +(Rule(\(aq001\(aq, \(aqNN\(aq, \(aq.\(aq, [(Pos([\-1]),\(aqNN\(aq), (Word([0]),\(aq.\(aq)]), Rule(\(aq001\(aq, \(aqNN\(aq, \(aqIN\(aq, [(Pos([\-1]),\(aqNN\(aq), (Word([0]),\(aqof\(aq)])) +.ft P +.fi +.sp +# NOTE1: (!!FIXME) A far better baseline uses nltk.tag.UnigramTagger, +# with a RegexpTagger only as backoff. For instance, +# >>> baseline = UnigramTagger(baseline_data, backoff=backoff) +# However, as of Nov 2013, nltk.tag.UnigramTagger does not yield consistent results +# between python versions. The simplistic backoff above is a workaround to make doctests +# get consistent input. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrain_sents\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fItuple\fP\fI)\fP\fI)\fP) \-\- training data +.IP \(bu 2 +\fBmax_rules\fP (\fIint\fP) \-\- output at most max_rules rules +.IP \(bu 2 +\fBmin_score\fP (\fIint\fP) \-\- stop training when no rules better than min_score can be found +.IP \(bu 2 +\fBmin_acc\fP (\fIfloat\fP\fI or \fP\fINone\fP) \-\- discard any rule with lower accuracy than min_acc +.UNINDENT +.TP +.B Returns +the learned tagger +.TP +.B Return type +BrillTagger +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.tag.crf module +.sp +A module for POS tagging using CRFSuite +.INDENT 0.0 +.TP +.B class nltk.tag.crf.CRFTagger(feature_func=None, verbose=False, training_opt={}) +Bases: \fI\%nltk.tag.api.TaggerI\fP +.sp +A module for POS tagging using CRFSuite \fI\%https://pypi.python.org/pypi/python\-crfsuite\fP +.sp +.nf +.ft C +>>> from nltk.tag import CRFTagger +>>> ct = CRFTagger() +.ft P +.fi +.sp +.nf +.ft C +>>> train_data = [[(\(aqUniversity\(aq,\(aqNoun\(aq), (\(aqis\(aq,\(aqVerb\(aq), (\(aqa\(aq,\(aqDet\(aq), (\(aqgood\(aq,\(aqAdj\(aq), (\(aqplace\(aq,\(aqNoun\(aq)], +\&... [(\(aqdog\(aq,\(aqNoun\(aq),(\(aqeat\(aq,\(aqVerb\(aq),(\(aqmeat\(aq,\(aqNoun\(aq)]] +.ft P +.fi +.sp +.nf +.ft C +>>> ct.train(train_data,\(aqmodel.crf.tagger\(aq) +>>> ct.tag_sents([[\(aqdog\(aq,\(aqis\(aq,\(aqgood\(aq], [\(aqCat\(aq,\(aqeat\(aq,\(aqmeat\(aq]]) +[[(\(aqdog\(aq, \(aqNoun\(aq), (\(aqis\(aq, \(aqVerb\(aq), (\(aqgood\(aq, \(aqAdj\(aq)], [(\(aqCat\(aq, \(aqNoun\(aq), (\(aqeat\(aq, \(aqVerb\(aq), (\(aqmeat\(aq, \(aqNoun\(aq)]] +.ft P +.fi +.sp +.nf +.ft C +>>> gold_sentences = [[(\(aqdog\(aq,\(aqNoun\(aq),(\(aqis\(aq,\(aqVerb\(aq),(\(aqgood\(aq,\(aqAdj\(aq)] , [(\(aqCat\(aq,\(aqNoun\(aq),(\(aqeat\(aq,\(aqVerb\(aq), (\(aqmeat\(aq,\(aqNoun\(aq)]] +>>> ct.evaluate(gold_sentences) +1.0 +.ft P +.fi +.sp +Setting learned model file +>>> ct = CRFTagger() +>>> ct.set_model_file(\(aqmodel.crf.tagger\(aq) +>>> ct.evaluate(gold_sentences) +1.0 +.INDENT 7.0 +.TP +.B set_model_file(model_file) +.UNINDENT +.INDENT 7.0 +.TP +.B tag(tokens) +.INDENT 7.0 +.TP +.B Tag a sentence using Python CRFSuite Tagger. NB before using this function, user should specify the mode_file either by +.INDENT 7.0 +.IP \(bu 2 +Train a new model using +.nf +\(ga\(ga +.fi +train\(aq\(aq function +.IP \(bu 2 +Use the pre\-trained model which is set via +.nf +\(ga\(ga +.fi +set_model_file\(aq\(aq function +.UNINDENT +.UNINDENT +.sp +:params tokens : list of tokens needed to tag. +:type tokens : list(str) +:return : list of tagged tokens. +:rtype : list (tuple(str,str)) +.UNINDENT +.INDENT 7.0 +.TP +.B tag_sents(sents) +.INDENT 7.0 +.TP +.B Tag a list of sentences. NB before using this function, user should specify the mode_file either by +.INDENT 7.0 +.IP \(bu 2 +Train a new model using +.nf +\(ga\(ga +.fi +train\(aq\(aq function +.IP \(bu 2 +Use the pre\-trained model which is set via +.nf +\(ga\(ga +.fi +set_model_file\(aq\(aq function +.UNINDENT +.UNINDENT +.sp +:params sentences : list of sentences needed to tag. +:type sentences : list(list(str)) +:return : list of tagged sentences. +:rtype : list (list (tuple(str,str))) +.UNINDENT +.INDENT 7.0 +.TP +.B train(train_data, model_file) +Train the CRF tagger using CRFSuite +:params train_data : is the list of annotated sentences. +:type train_data : list (list(tuple(str,str))) +:params model_file : the model will be saved to this file. +.UNINDENT +.UNINDENT +.SS nltk.tag.hmm module +.sp +Hidden Markov Models (HMMs) largely used to assign the correct label sequence +to sequential data or assess the probability of a given label and data +sequence. These models are finite state machines characterised by a number of +states, transitions between these states, and output symbols emitted while in +each state. The HMM is an extension to the Markov chain, where each state +corresponds deterministically to a given event. In the HMM the observation is +a probabilistic function of the state. HMMs share the Markov chain\(aqs +assumption, being that the probability of transition from one state to another +only depends on the current state \- i.e. the series of states that led to the +current state are not used. They are also time invariant. +.sp +The HMM is a directed graph, with probability weighted edges (representing the +probability of a transition between the source and sink states) where each +vertex emits an output symbol when entered. The symbol (or observation) is +non\-deterministically generated. For this reason, knowing that a sequence of +output observations was generated by a given HMM does not mean that the +corresponding sequence of states (and what the current state is) is known. +This is the \(aqhidden\(aq in the hidden markov model. +.sp +Formally, a HMM can be characterised by: +.INDENT 0.0 +.IP \(bu 2 +the output observation alphabet. This is the set of symbols which may be +observed as output of the system. +.IP \(bu 2 +the set of states. +.IP \(bu 2 +the transition probabilities \fIa_{ij} = P(s_t = j | s_{t\-1} = i)\fP\&. These +represent the probability of transition to each state from a given state. +.IP \(bu 2 +the output probability matrix \fIb_i(k) = P(X_t = o_k | s_t = i)\fP\&. These +represent the probability of observing each symbol in a given state. +.IP \(bu 2 +the initial state distribution. This gives the probability of starting +in each state. +.UNINDENT +.sp +To ground this discussion, take a common NLP application, part\-of\-speech (POS) +tagging. An HMM is desirable for this task as the highest probability tag +sequence can be calculated for a given sequence of word forms. This differs +from other tagging techniques which often tag each word individually, seeking +to optimise each individual tagging greedily without regard to the optimal +combination of tags for a larger unit, such as a sentence. The HMM does this +with the Viterbi algorithm, which efficiently computes the optimal path +through the graph given the sequence of words forms. +.sp +In POS tagging the states usually have a 1:1 correspondence with the tag +alphabet \- i.e. each state represents a single tag. The output observation +alphabet is the set of word forms (the lexicon), and the remaining three +parameters are derived by a training regime. With this information the +probability of a given sentence can be easily derived, by simply summing the +probability of each distinct path through the model. Similarly, the highest +probability tagging sequence can be derived with the Viterbi algorithm, +yielding a state sequence which can be mapped into a tag sequence. +.sp +This discussion assumes that the HMM has been trained. This is probably the +most difficult task with the model, and requires either MLE estimates of the +parameters or unsupervised learning using the Baum\-Welch algorithm, a variant +of EM. +.sp +For more information, please consult the source code for this module, +which includes extensive demonstration code. +.INDENT 0.0 +.TP +.B class nltk.tag.hmm.HiddenMarkovModelTagger(symbols, states, transitions, outputs, priors, transform=) +Bases: \fI\%nltk.tag.api.TaggerI\fP +.sp +Hidden Markov model class, a generative model for labelling sequence data. +These models define the joint probability of a sequence of symbols and +their labels (state transitions) as the product of the starting state +probability, the probability of each state transition, and the probability +of each observation being generated from each state. This is described in +more detail in the module documentation. +.sp +This implementation is based on the HMM description in Chapter 8, Huang, +Acero and Hon, Spoken Language Processing and includes an extension for +training shallow HMM parsers or specialized HMMs as in Molina et. +al, 2002. A specialized HMM modifies training data by applying a +specialization function to create a new training set that is more +appropriate for sequential tagging with an HMM. A typical use case is +chunking. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsymbols\fP (\fIseq of any\fP) \-\- the set of output symbols (alphabet) +.IP \(bu 2 +\fBstates\fP (\fIseq of any\fP) \-\- a set of states representing state space +.IP \(bu 2 +\fBtransitions\fP (\fIConditionalProbDistI\fP) \-\- transition probabilities; Pr(s_i | s_j) is the +probability of transition from state i given the model is in +state_j +.IP \(bu 2 +\fBoutputs\fP (\fIConditionalProbDistI\fP) \-\- output probabilities; Pr(o_k | s_i) is the probability +of emitting symbol k when entering state i +.IP \(bu 2 +\fBpriors\fP (\fIProbDistI\fP) \-\- initial state distribution; Pr(s_i) is the probability +of starting in state i +.IP \(bu 2 +\fBtransform\fP (\fIcallable\fP) \-\- an optional function for transforming training +instances, defaults to the identity function. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B best_path(unlabeled_sequence) +Returns the state sequence of the optimal (most probable) path through +the HMM. Uses the Viterbi algorithm to calculate this part by dynamic +programming. +.INDENT 7.0 +.TP +.B Returns +the state sequence +.TP +.B Return type +sequence of any +.TP +.B Parameters +\fBunlabeled_sequence\fP (\fIlist\fP) \-\- the sequence of unlabeled symbols +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B best_path_simple(unlabeled_sequence) +Returns the state sequence of the optimal (most probable) path through +the HMM. Uses the Viterbi algorithm to calculate this part by dynamic +programming. This uses a simple, direct method, and is included for +teaching purposes. +.INDENT 7.0 +.TP +.B Returns +the state sequence +.TP +.B Return type +sequence of any +.TP +.B Parameters +\fBunlabeled_sequence\fP (\fIlist\fP) \-\- the sequence of unlabeled symbols +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B entropy(unlabeled_sequence) +Returns the entropy over labellings of the given sequence. This is +given by: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +H(O) = \- sum_S Pr(S | O) log Pr(S | O) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +where the summation ranges over all state sequences, S. Let +\fIZ = Pr(O) = sum_S Pr(S, O)}\fP where the summation ranges over all state +sequences and O is the observation sequence. As such the entropy can +be re\-expressed as: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +H = \- sum_S Pr(S | O) log [ Pr(S, O) / Z ] += log Z \- sum_S Pr(S | O) log Pr(S, 0) += log Z \- sum_S Pr(S | O) [ log Pr(S_0) + sum_t Pr(S_t | S_{t\-1}) + sum_t Pr(O_t | S_t) ] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The order of summation for the log terms can be flipped, allowing +dynamic programming to be used to calculate the entropy. Specifically, +we use the forward and backward probabilities (alpha, beta) giving: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +H = log Z \- sum_s0 alpha_0(s0) beta_0(s0) / Z * log Pr(s0) ++ sum_t,si,sj alpha_t(si) Pr(sj | si) Pr(O_t+1 | sj) beta_t(sj) / Z * log Pr(sj | si) ++ sum_t,st alpha_t(st) beta_t(st) / Z * log Pr(O_t | st) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +This simply uses alpha and beta to find the probabilities of partial +sequences, constrained to include the given state(s) at some point in +time. +.UNINDENT +.INDENT 7.0 +.TP +.B log_probability(sequence) +Returns the log\-probability of the given symbol sequence. If the +sequence is labelled, then returns the joint log\-probability of the +symbol, state sequence. Otherwise, uses the forward algorithm to find +the log\-probability over all label sequences. +.INDENT 7.0 +.TP +.B Returns +the log\-probability of the sequence +.TP +.B Return type +float +.TP +.B Parameters +\fBsequence\fP (\fIToken\fP) \-\- the sequence of symbols which must contain the TEXT +property, and optionally the TAG property +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B point_entropy(unlabeled_sequence) +Returns the pointwise entropy over the possible states at each +position in the chain, given the observation sequence. +.UNINDENT +.INDENT 7.0 +.TP +.B probability(sequence) +Returns the probability of the given symbol sequence. If the sequence +is labelled, then returns the joint probability of the symbol, state +sequence. Otherwise, uses the forward algorithm to find the +probability over all label sequences. +.INDENT 7.0 +.TP +.B Returns +the probability of the sequence +.TP +.B Return type +float +.TP +.B Parameters +\fBsequence\fP (\fIToken\fP) \-\- the sequence of symbols which must contain the TEXT +property, and optionally the TAG property +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B random_sample(rng, length) +Randomly sample the HMM to generate a sentence of a given length. This +samples the prior distribution then the observation distribution and +transition distribution for each subsequent observation and state. +This will mostly generate unintelligible garbage, but can provide some +amusement. +.INDENT 7.0 +.TP +.B Returns +the randomly created state/observation sequence, +generated according to the HMM\(aqs probability +distributions. The SUBTOKENS have TEXT and TAG +properties containing the observation and state +respectively. +.TP +.B Return type +list +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBrng\fP (\fIRandom\fP\fI (or \fP\fIany object with a random\fP\fI(\fP\fI) \fP\fImethod\fP\fI)\fP) \-\- random number generator +.IP \(bu 2 +\fBlength\fP (\fIint\fP) \-\- desired output length +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B reset_cache() +.UNINDENT +.INDENT 7.0 +.TP +.B tag(unlabeled_sequence) +Tags the sequence with the highest probability state sequence. This +uses the best_path method to find the Viterbi path. +.INDENT 7.0 +.TP +.B Returns +a labelled sequence of symbols +.TP +.B Return type +list +.TP +.B Parameters +\fBunlabeled_sequence\fP (\fIlist\fP) \-\- the sequence of unlabeled symbols +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B test(test_sequence, verbose=False, **kwargs) +Tests the HiddenMarkovModelTagger instance. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtest_sequence\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI)\fP) \-\- a sequence of labeled test instances +.IP \(bu 2 +\fBverbose\fP (\fIbool\fP) \-\- boolean flag indicating whether training should be +verbose or include printed output +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod train(labeled_sequence, test_sequence=None, unlabeled_sequence=None, **kwargs) +Train a new HiddenMarkovModelTagger using the given labeled and +unlabeled training instances. Testing will be performed if test +instances are provided. +.INDENT 7.0 +.TP +.B Returns +a hidden markov model tagger +.TP +.B Return type +HiddenMarkovModelTagger +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBlabeled_sequence\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI)\fP) \-\- a sequence of labeled training instances, +i.e. a list of sentences represented as tuples +.IP \(bu 2 +\fBtest_sequence\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI)\fP) \-\- a sequence of labeled test instances +.IP \(bu 2 +\fBunlabeled_sequence\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI)\fP) \-\- a sequence of unlabeled training instances, +i.e. a list of sentences represented as words +.IP \(bu 2 +\fBtransform\fP (\fIfunction\fP) \-\- an optional function for transforming training +instances, defaults to the identity function, see \fBtransform()\fP +.IP \(bu 2 +\fBestimator\fP (\fIclass\fP\fI or \fP\fIfunction\fP) \-\- an optional function or class that maps a +condition\(aqs frequency distribution to its probability +distribution, defaults to a Lidstone distribution with gamma = 0.1 +.IP \(bu 2 +\fBverbose\fP (\fIbool\fP) \-\- boolean flag indicating whether training should be +verbose or include printed output +.IP \(bu 2 +\fBmax_iterations\fP (\fIint\fP) \-\- number of Baum\-Welch iterations to perform +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.hmm.HiddenMarkovModelTrainer(states=None, symbols=None) +Bases: \fBobject\fP +.sp +Algorithms for learning HMM parameters from training data. These include +both supervised learning (MLE) and unsupervised learning (Baum\-Welch). +.sp +Creates an HMM trainer to induce an HMM with the given states and +output symbol alphabet. A supervised and unsupervised training +method may be used. If either of the states or symbols are not given, +these may be derived from supervised training. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBstates\fP (\fIsequence of any\fP) \-\- the set of state labels +.IP \(bu 2 +\fBsymbols\fP (\fIsequence of any\fP) \-\- the set of observation symbols +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B train(labeled_sequences=None, unlabeled_sequences=None, **kwargs) +Trains the HMM using both (or either of) supervised and unsupervised +techniques. +.INDENT 7.0 +.TP +.B Returns +the trained model +.TP +.B Return type +HiddenMarkovModelTagger +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBlabelled_sequences\fP (\fIlist\fP) \-\- the supervised training data, a set of +labelled sequences of observations +ex: [ (word_1, tag_1),...,(word_n,tag_n) ] +.IP \(bu 2 +\fBunlabeled_sequences\fP (\fIlist\fP) \-\- the unsupervised training data, a set of +sequences of observations +ex: [ word_1, ..., word_n ] +.IP \(bu 2 +\fBkwargs\fP \-\- additional arguments to pass to the training methods +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B train_supervised(labelled_sequences, estimator=None) +Supervised training maximising the joint probability of the symbol and +state sequences. This is done via collecting frequencies of +transitions between states, symbol observations while within each +state and which states start a sentence. These frequency distributions +are then normalised into probability estimates, which can be +smoothed if desired. +.INDENT 7.0 +.TP +.B Returns +the trained model +.TP +.B Return type +HiddenMarkovModelTagger +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBlabelled_sequences\fP (\fIlist\fP) \-\- the training data, a set of +labelled sequences of observations +.IP \(bu 2 +\fBestimator\fP \-\- a function taking +a FreqDist and a number of bins and returning a CProbDistI; +otherwise a MLE estimate is used +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B train_unsupervised(unlabeled_sequences, update_outputs=True, **kwargs) +Trains the HMM using the Baum\-Welch algorithm to maximise the +probability of the data sequence. This is a variant of the EM +algorithm, and is unsupervised in that it doesn\(aqt need the state +sequences for the symbols. The code is based on \(aqA Tutorial on Hidden +Markov Models and Selected Applications in Speech Recognition\(aq, +Lawrence Rabiner, IEEE, 1989. +.INDENT 7.0 +.TP +.B Returns +the trained model +.TP +.B Return type +HiddenMarkovModelTagger +.TP +.B Parameters +\fBunlabeled_sequences\fP (\fIlist\fP) \-\- the training data, a set of +sequences of observations +.UNINDENT +.sp +kwargs may include following parameters: +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBmodel\fP \-\- a HiddenMarkovModelTagger instance used to begin +the Baum\-Welch algorithm +.IP \(bu 2 +\fBmax_iterations\fP \-\- the maximum number of EM iterations +.IP \(bu 2 +\fBconvergence_logprob\fP \-\- the maximum change in log probability to +allow convergence +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.hmm.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.hmm.demo_bw() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.hmm.demo_pos() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.hmm.demo_pos_bw(test=10, supervised=20, unsupervised=10, verbose=True, max_iterations=5) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.hmm.load_pos(num_sents) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.hmm.logsumexp2(arr) +.UNINDENT +.SS nltk.tag.hunpos module +.sp +A module for interfacing with the HunPos open\-source POS\-tagger. +.INDENT 0.0 +.TP +.B class nltk.tag.hunpos.HunposTagger(path_to_model, path_to_bin=None, encoding=\(aqISO\-8859\-1\(aq, verbose=False) +Bases: \fI\%nltk.tag.api.TaggerI\fP +.INDENT 7.0 +.TP +.B A class for pos tagging with HunPos. The input is the paths to: +.INDENT 7.0 +.IP \(bu 2 +a model trained on training data +.IP \(bu 2 +(optionally) the path to the hunpos\-tag binary +.IP \(bu 2 +(optionally) the encoding of the training data (default: ISO\-8859\-1) +.UNINDENT +.UNINDENT +.sp +Example: +.sp +.nf +.ft C +>>> from nltk.tag import HunposTagger +>>> ht = HunposTagger(\(aqen_wsj.model\(aq) +>>> ht.tag(\(aqWhat is the airspeed of an unladen swallow ?\(aq.split()) +[(\(aqWhat\(aq, \(aqWP\(aq), (\(aqis\(aq, \(aqVBZ\(aq), (\(aqthe\(aq, \(aqDT\(aq), (\(aqairspeed\(aq, \(aqNN\(aq), (\(aqof\(aq, \(aqIN\(aq), (\(aqan\(aq, \(aqDT\(aq), (\(aqunladen\(aq, \(aqNN\(aq), (\(aqswallow\(aq, \(aqVB\(aq), (\(aq?\(aq, \(aq.\(aq)] +>>> ht.close() +.ft P +.fi +.sp +This class communicates with the hunpos\-tag binary via pipes. When the +tagger object is no longer needed, the close() method should be called to +free system resources. The class supports the context manager interface; if +used in a with statement, the close() method is invoked automatically: +.sp +.nf +.ft C +>>> with HunposTagger(\(aqen_wsj.model\(aq) as ht: +\&... ht.tag(\(aqWhat is the airspeed of an unladen swallow ?\(aq.split()) +\&... +[(\(aqWhat\(aq, \(aqWP\(aq), (\(aqis\(aq, \(aqVBZ\(aq), (\(aqthe\(aq, \(aqDT\(aq), (\(aqairspeed\(aq, \(aqNN\(aq), (\(aqof\(aq, \(aqIN\(aq), (\(aqan\(aq, \(aqDT\(aq), (\(aqunladen\(aq, \(aqNN\(aq), (\(aqswallow\(aq, \(aqVB\(aq), (\(aq?\(aq, \(aq.\(aq)] +.ft P +.fi +.INDENT 7.0 +.TP +.B close() +Closes the pipe to the hunpos executable. +.UNINDENT +.INDENT 7.0 +.TP +.B tag(tokens) +Tags a single sentence: a list of words. +The tokens should not contain any newline characters. +.UNINDENT +.UNINDENT +.SS nltk.tag.mapping module +.sp +Interface for converting POS tags from various treebanks +to the universal tagset of Petrov, Das, & McDonald. +.sp +The tagset consists of the following 12 coarse tags: +.sp +VERB \- verbs (all tenses and modes) +NOUN \- nouns (common and proper) +PRON \- pronouns +ADJ \- adjectives +ADV \- adverbs +ADP \- adpositions (prepositions and postpositions) +CONJ \- conjunctions +DET \- determiners +NUM \- cardinal numbers +PRT \- particles or other function words +X \- other: foreign words, typos, abbreviations +\&. \- punctuation +.sp +@see: \fI\%http://arxiv.org/abs/1104.2086\fP and \fI\%http://code.google.com/p/universal\-pos\-tags/\fP +.INDENT 0.0 +.TP +.B nltk.tag.mapping.map_tag(source, target, source_tag) +Maps the tag from the source tagset to the target tagset. +.sp +.nf +.ft C +>>> map_tag(\(aqen\-ptb\(aq, \(aquniversal\(aq, \(aqVBZ\(aq) +\(aqVERB\(aq +>>> map_tag(\(aqen\-ptb\(aq, \(aquniversal\(aq, \(aqVBP\(aq) +\(aqVERB\(aq +>>> map_tag(\(aqen\-ptb\(aq, \(aquniversal\(aq, \(aq\(ga\(ga\(aq) +\(aq.\(aq +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.mapping.tagset_mapping(source, target) +Retrieve the mapping dictionary between tagsets. +.sp +.nf +.ft C +>>> tagset_mapping(\(aqru\-rnc\(aq, \(aquniversal\(aq) == {\(aq!\(aq: \(aq.\(aq, \(aqA\(aq: \(aqADJ\(aq, \(aqC\(aq: \(aqCONJ\(aq, \(aqAD\(aq: \(aqADV\(aq, \(aqNN\(aq: \(aqNOUN\(aq, \(aqVG\(aq: \(aqVERB\(aq, \(aqCOMP\(aq: \(aqCONJ\(aq, \(aqNC\(aq: \(aqNUM\(aq, \(aqVP\(aq: \(aqVERB\(aq, \(aqP\(aq: \(aqADP\(aq, \(aqIJ\(aq: \(aqX\(aq, \(aqV\(aq: \(aqVERB\(aq, \(aqZ\(aq: \(aqX\(aq, \(aqVI\(aq: \(aqVERB\(aq, \(aqYES_NO_SENT\(aq: \(aqX\(aq, \(aqPTCL\(aq: \(aqPRT\(aq} +True +.ft P +.fi +.UNINDENT +.SS nltk.tag.perceptron module +.INDENT 0.0 +.TP +.B class nltk.tag.perceptron.AveragedPerceptron(weights=None) +Bases: \fBobject\fP +.sp +An averaged perceptron, as implemented by Matthew Honnibal. +.INDENT 7.0 +.TP +.B See more implementation details here: +\fI\%https://explosion.ai/blog/part\-of\-speech\-pos\-tagger\-in\-python\fP +.UNINDENT +.INDENT 7.0 +.TP +.B average_weights() +Average weights from all iterations. +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod decode_json_obj(obj) +.UNINDENT +.INDENT 7.0 +.TP +.B encode_json_obj() +.UNINDENT +.INDENT 7.0 +.TP +.B json_tag = \(aqnltk.tag.perceptron.AveragedPerceptron\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B load(path) +Load the pickled model weights. +.UNINDENT +.INDENT 7.0 +.TP +.B predict(features, return_conf=False) +Dot\-product the features and current weights and return the best label. +.UNINDENT +.INDENT 7.0 +.TP +.B save(path) +Save the pickled model weights. +.UNINDENT +.INDENT 7.0 +.TP +.B update(truth, guess, features) +Update the feature weights. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.perceptron.PerceptronTagger(load=True) +Bases: \fI\%nltk.tag.api.TaggerI\fP +.sp +Greedy Averaged Perceptron tagger, as implemented by Matthew Honnibal. +See more implementation details here: +.INDENT 7.0 +.INDENT 3.5 +\fI\%https://explosion.ai/blog/part\-of\-speech\-pos\-tagger\-in\-python\fP +.UNINDENT +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.tag.perceptron import PerceptronTagger +.ft P +.fi +.sp +Train the model +.sp +.nf +.ft C +>>> tagger = PerceptronTagger(load=False) +.ft P +.fi +.sp +.nf +.ft C +>>> tagger.train([[(\(aqtoday\(aq,\(aqNN\(aq),(\(aqis\(aq,\(aqVBZ\(aq),(\(aqgood\(aq,\(aqJJ\(aq),(\(aqday\(aq,\(aqNN\(aq)], +\&... [(\(aqyes\(aq,\(aqNNS\(aq),(\(aqit\(aq,\(aqPRP\(aq),(\(aqbeautiful\(aq,\(aqJJ\(aq)]]) +.ft P +.fi +.sp +.nf +.ft C +>>> tagger.tag([\(aqtoday\(aq,\(aqis\(aq,\(aqa\(aq,\(aqbeautiful\(aq,\(aqday\(aq]) +[(\(aqtoday\(aq, \(aqNN\(aq), (\(aqis\(aq, \(aqPRP\(aq), (\(aqa\(aq, \(aqPRP\(aq), (\(aqbeautiful\(aq, \(aqJJ\(aq), (\(aqday\(aq, \(aqNN\(aq)] +.ft P +.fi +.sp +Use the pretrain model (the default constructor) +.sp +.nf +.ft C +>>> pretrain = PerceptronTagger() +.ft P +.fi +.sp +.nf +.ft C +>>> pretrain.tag(\(aqThe quick brown fox jumps over the lazy dog\(aq.split()) +[(\(aqThe\(aq, \(aqDT\(aq), (\(aqquick\(aq, \(aqJJ\(aq), (\(aqbrown\(aq, \(aqNN\(aq), (\(aqfox\(aq, \(aqNN\(aq), (\(aqjumps\(aq, \(aqVBZ\(aq), (\(aqover\(aq, \(aqIN\(aq), (\(aqthe\(aq, \(aqDT\(aq), (\(aqlazy\(aq, \(aqJJ\(aq), (\(aqdog\(aq, \(aqNN\(aq)] +.ft P +.fi +.sp +.nf +.ft C +>>> pretrain.tag("The red cat".split()) +[(\(aqThe\(aq, \(aqDT\(aq), (\(aqred\(aq, \(aqJJ\(aq), (\(aqcat\(aq, \(aqNN\(aq)] +.ft P +.fi +.INDENT 7.0 +.TP +.B END = [\(aq\-END\-\(aq, \(aq\-END2\-\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B START = [\(aq\-START\-\(aq, \(aq\-START2\-\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod decode_json_obj(obj) +.UNINDENT +.INDENT 7.0 +.TP +.B encode_json_obj() +.UNINDENT +.INDENT 7.0 +.TP +.B json_tag = \(aqnltk.tag.sequential.PerceptronTagger\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B load(loc) +.INDENT 7.0 +.TP +.B Parameters +\fBloc\fP (\fIstr\fP) \-\- Load a pickled model at location. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B normalize(word) +Normalization used in pre\-processing. +\- All words are lower cased +\- Groups of digits of length 4 are represented as !YEAR; +\- Other digits are represented as !DIGITS +.INDENT 7.0 +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tag(tokens, return_conf=False, use_tagdict=True) +Tag tokenized sentences. +:params tokens: list of word +:type tokens: list(str) +.UNINDENT +.INDENT 7.0 +.TP +.B train(sentences, save_loc=None, nr_iter=5) +Train a model from sentences, and save it at \fBsave_loc\fP\&. \fBnr_iter\fP +controls the number of Perceptron training iterations. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsentences\fP \-\- A list or iterator of sentences, where each sentence +is a list of (words, tags) tuples. +.IP \(bu 2 +\fBsave_loc\fP \-\- If not \fBNone\fP, saves a pickled model in this location. +.IP \(bu 2 +\fBnr_iter\fP \-\- Number of training iterations. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.tag.senna module +.sp +Senna POS tagger, NER Tagger, Chunk Tagger +.sp +The input is: +\- path to the directory that contains SENNA executables. If the path is incorrect, +.INDENT 0.0 +.INDENT 3.5 +SennaTagger will automatically search for executable file specified in SENNA environment variable +.UNINDENT +.UNINDENT +.INDENT 0.0 +.IP \(bu 2 +(optionally) the encoding of the input data (default:utf\-8) +.UNINDENT +.sp +Note: Unit tests for this module can be found in test/unit/test_senna.py +.sp +.nf +.ft C +>>> from nltk.tag import SennaTagger +>>> tagger = SennaTagger(\(aq/usr/share/senna\-v3.0\(aq) +>>> tagger.tag(\(aqWhat is the airspeed of an unladen swallow ?\(aq.split()) +[(\(aqWhat\(aq, \(aqWP\(aq), (\(aqis\(aq, \(aqVBZ\(aq), (\(aqthe\(aq, \(aqDT\(aq), (\(aqairspeed\(aq, \(aqNN\(aq), +(\(aqof\(aq, \(aqIN\(aq), (\(aqan\(aq, \(aqDT\(aq), (\(aqunladen\(aq, \(aqNN\(aq), (\(aqswallow\(aq, \(aqNN\(aq), (\(aq?\(aq, \(aq.\(aq)] +.ft P +.fi +.sp +.nf +.ft C +>>> from nltk.tag import SennaChunkTagger +>>> chktagger = SennaChunkTagger(\(aq/usr/share/senna\-v3.0\(aq) +>>> chktagger.tag(\(aqWhat is the airspeed of an unladen swallow ?\(aq.split()) +[(\(aqWhat\(aq, \(aqB\-NP\(aq), (\(aqis\(aq, \(aqB\-VP\(aq), (\(aqthe\(aq, \(aqB\-NP\(aq), (\(aqairspeed\(aq, \(aqI\-NP\(aq), +(\(aqof\(aq, \(aqB\-PP\(aq), (\(aqan\(aq, \(aqB\-NP\(aq), (\(aqunladen\(aq, \(aqI\-NP\(aq), (\(aqswallow\(aq, \(aqI\-NP\(aq), +(\(aq?\(aq, \(aqO\(aq)] +.ft P +.fi +.sp +.nf +.ft C +>>> from nltk.tag import SennaNERTagger +>>> nertagger = SennaNERTagger(\(aq/usr/share/senna\-v3.0\(aq) +>>> nertagger.tag(\(aqShakespeare theatre was in London .\(aq.split()) +[(\(aqShakespeare\(aq, \(aqB\-PER\(aq), (\(aqtheatre\(aq, \(aqO\(aq), (\(aqwas\(aq, \(aqO\(aq), (\(aqin\(aq, \(aqO\(aq), +(\(aqLondon\(aq, \(aqB\-LOC\(aq), (\(aq.\(aq, \(aqO\(aq)] +>>> nertagger.tag(\(aqUN headquarters are in NY , USA .\(aq.split()) +[(\(aqUN\(aq, \(aqB\-ORG\(aq), (\(aqheadquarters\(aq, \(aqO\(aq), (\(aqare\(aq, \(aqO\(aq), (\(aqin\(aq, \(aqO\(aq), +(\(aqNY\(aq, \(aqB\-LOC\(aq), (\(aq,\(aq, \(aqO\(aq), (\(aqUSA\(aq, \(aqB\-LOC\(aq), (\(aq.\(aq, \(aqO\(aq)] +.ft P +.fi +.INDENT 0.0 +.TP +.B class nltk.tag.senna.SennaChunkTagger(path, encoding=\(aqutf\-8\(aq) +Bases: \fBnltk.classify.senna.Senna\fP +.INDENT 7.0 +.TP +.B bio_to_chunks(tagged_sent, chunk_type) +Extracts the chunks in a BIO chunk\-tagged sentence. +.sp +.nf +.ft C +>>> from nltk.tag import SennaChunkTagger +>>> chktagger = SennaChunkTagger(\(aq/usr/share/senna\-v3.0\(aq) +>>> sent = \(aqWhat is the airspeed of an unladen swallow ?\(aq.split() +>>> tagged_sent = chktagger.tag(sent) +>>> tagged_sent +[(\(aqWhat\(aq, \(aqB\-NP\(aq), (\(aqis\(aq, \(aqB\-VP\(aq), (\(aqthe\(aq, \(aqB\-NP\(aq), (\(aqairspeed\(aq, \(aqI\-NP\(aq), +(\(aqof\(aq, \(aqB\-PP\(aq), (\(aqan\(aq, \(aqB\-NP\(aq), (\(aqunladen\(aq, \(aqI\-NP\(aq), (\(aqswallow\(aq, \(aqI\-NP\(aq), +(\(aq?\(aq, \(aqO\(aq)] +>>> list(chktagger.bio_to_chunks(tagged_sent, chunk_type=\(aqNP\(aq)) +[(\(aqWhat\(aq, \(aq0\(aq), (\(aqthe airspeed\(aq, \(aq2\-3\(aq), (\(aqan unladen swallow\(aq, \(aq5\-6\-7\(aq)] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtagged_sent\fP (\fIstr\fP) \-\- A list of tuples of word and BIO chunk tag. +.IP \(bu 2 +\fBtagged_sent\fP \-\- The chunk tag that users want to extract, e.g. \(aqNP\(aq or \(aqVP\(aq +.UNINDENT +.TP +.B Returns +An iterable of tuples of chunks that users want to extract +and their corresponding indices. +.TP +.B Return type +iter(tuple(str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tag_sents(sentences) +Applies the tag method over a list of sentences. This method will return +for each sentence a list of tuples of (word, tag). +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.senna.SennaNERTagger(path, encoding=\(aqutf\-8\(aq) +Bases: \fBnltk.classify.senna.Senna\fP +.INDENT 7.0 +.TP +.B tag_sents(sentences) +Applies the tag method over a list of sentences. This method will return +for each sentence a list of tuples of (word, tag). +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.senna.SennaTagger(path, encoding=\(aqutf\-8\(aq) +Bases: \fBnltk.classify.senna.Senna\fP +.INDENT 7.0 +.TP +.B tag_sents(sentences) +Applies the tag method over a list of sentences. This method will return +for each sentence a list of tuples of (word, tag). +.UNINDENT +.UNINDENT +.SS nltk.tag.sequential module +.sp +Classes for tagging sentences sequentially, left to right. The +abstract base class SequentialBackoffTagger serves as the base +class for all the taggers in this module. Tagging of individual words +is performed by the method \fBchoose_tag()\fP, which is defined by +subclasses of SequentialBackoffTagger. If a tagger is unable to +determine a tag for the specified token, then its backoff tagger is +consulted instead. Any SequentialBackoffTagger may serve as a +backoff tagger for any other SequentialBackoffTagger. +.INDENT 0.0 +.TP +.B class nltk.tag.sequential.AffixTagger(train=None, model=None, affix_length=\- 3, min_stem_length=2, backoff=None, cutoff=0, verbose=False) +Bases: \fI\%nltk.tag.sequential.ContextTagger\fP +.sp +A tagger that chooses a token\(aqs tag based on a leading or trailing +substring of its word string. (It is important to note that these +substrings are not necessarily "true" morphological affixes). In +particular, a fixed\-length substring of the word is looked up in a +table, and the corresponding tag is returned. Affix taggers are +typically constructed by training them on a tagged corpus. +.sp +Construct a new affix tagger. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBaffix_length\fP \-\- The length of the affixes that should be +considered during training and tagging. Use negative +numbers for suffixes. +.IP \(bu 2 +\fBmin_stem_length\fP \-\- Any words whose length is less than +min_stem_length+abs(affix_length) will be assigned a +tag of None by this tagger. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B context(tokens, index, history) +.INDENT 7.0 +.TP +.B Returns +the context that should be used to look up the tag +for the specified token; or None if the specified token +should not be handled by this tagger. +.TP +.B Return type +(hashable) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod decode_json_obj(obj) +.UNINDENT +.INDENT 7.0 +.TP +.B encode_json_obj() +.UNINDENT +.INDENT 7.0 +.TP +.B json_tag = \(aqnltk.tag.sequential.AffixTagger\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.sequential.BigramTagger(train=None, model=None, backoff=None, cutoff=0, verbose=False) +Bases: \fI\%nltk.tag.sequential.NgramTagger\fP +.sp +A tagger that chooses a token\(aqs tag based its word string and on +the preceding words\(aq tag. In particular, a tuple consisting +of the previous tag and the word is looked up in a table, and +the corresponding tag is returned. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrain\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIstr\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP\fI)\fP) \-\- The corpus of training data, a list of tagged sentences +.IP \(bu 2 +\fBmodel\fP (\fIdict\fP) \-\- The tagger model +.IP \(bu 2 +\fBbackoff\fP (\fITaggerI\fP) \-\- Another tagger which this tagger will consult when it is +unable to tag a word +.IP \(bu 2 +\fBcutoff\fP (\fIint\fP) \-\- The number of instances of training data the tagger must see +in order not to use the backoff tagger +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B json_tag = \(aqnltk.tag.sequential.BigramTagger\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.sequential.ClassifierBasedPOSTagger(feature_detector=None, train=None, classifier_builder=>, classifier=None, backoff=None, cutoff_prob=None, verbose=False) +Bases: \fI\%nltk.tag.sequential.ClassifierBasedTagger\fP +.sp +A classifier based part of speech tagger. +.INDENT 7.0 +.TP +.B feature_detector(tokens, index, history) +Return the feature detector that this tagger uses to generate +featuresets for its classifier. The feature detector is a +function with the signature: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +feature_detector(tokens, index, history) \-> featureset +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +See \fBclassifier()\fP +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.sequential.ClassifierBasedTagger(feature_detector=None, train=None, classifier_builder=>, classifier=None, backoff=None, cutoff_prob=None, verbose=False) +Bases: \fI\%nltk.tag.sequential.SequentialBackoffTagger\fP, \fI\%nltk.tag.api.FeaturesetTaggerI\fP +.sp +A sequential tagger that uses a classifier to choose the tag for +each token in a sentence. The featureset input for the classifier +is generated by a feature detector function: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +feature_detector(tokens, index, history) \-> featureset +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Where tokens is the list of unlabeled tokens in the sentence; +index is the index of the token for which feature detection +should be performed; and history is list of the tags for all +tokens before index. +.sp +Construct a new classifier\-based sequential tagger. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfeature_detector\fP \-\- A function used to generate the +featureset input for the classifier:: +feature_detector(tokens, index, history) \-> featureset +.IP \(bu 2 +\fBtrain\fP \-\- A tagged corpus consisting of a list of tagged +sentences, where each sentence is a list of (word, tag) tuples. +.IP \(bu 2 +\fBbackoff\fP \-\- A backoff tagger, to be used by the new tagger +if it encounters an unknown context. +.IP \(bu 2 +\fBclassifier_builder\fP \-\- A function used to train a new +classifier based on the data in \fItrain\fP\&. It should take +one argument, a list of labeled featuresets (i.e., +(featureset, label) tuples). +.IP \(bu 2 +\fBclassifier\fP \-\- The classifier that should be used by the +tagger. This is only useful if you want to manually +construct the classifier; normally, you would use \fItrain\fP +instead. +.IP \(bu 2 +\fBbackoff\fP \-\- A backoff tagger, used if this tagger is +unable to determine a tag for a given token. +.IP \(bu 2 +\fBcutoff_prob\fP \-\- If specified, then this tagger will fall +back on its backoff tagger if the probability of the most +likely tag is less than \fIcutoff_prob\fP\&. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B choose_tag(tokens, index, history) +Decide which tag should be used for the specified token, and +return that tag. If this tagger is unable to determine a tag +for the specified token, return None \-\- do not consult +the backoff tagger. This method should be overridden by +subclasses of SequentialBackoffTagger. +.INDENT 7.0 +.TP +.B Return type +str +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP) \-\- The list of words that are being tagged. +.IP \(bu 2 +\fBindex\fP (\fIint\fP) \-\- The index of the word whose tag should be +returned. +.IP \(bu 2 +\fBhistory\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A list of the tags for all words before \fIindex\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classifier() +Return the classifier that this tagger uses to choose a tag +for each word in a sentence. The input for this classifier is +generated using this tagger\(aqs feature detector. +See \fBfeature_detector()\fP +.UNINDENT +.INDENT 7.0 +.TP +.B feature_detector(tokens, index, history) +Return the feature detector that this tagger uses to generate +featuresets for its classifier. The feature detector is a +function with the signature: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +feature_detector(tokens, index, history) \-> featureset +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +See \fBclassifier()\fP +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.sequential.ContextTagger(context_to_tag, backoff=None) +Bases: \fI\%nltk.tag.sequential.SequentialBackoffTagger\fP +.sp +An abstract base class for sequential backoff taggers that choose +a tag for a token based on the value of its "context". Different +subclasses are used to define different contexts. +.sp +A ContextTagger chooses the tag for a token by calculating the +token\(aqs context, and looking up the corresponding tag in a table. +This table can be constructed manually; or it can be automatically +constructed based on a training corpus, using the \fB_train()\fP +factory method. +.INDENT 7.0 +.TP +.B Variables +\fB_context_to_tag\fP \-\- Dictionary mapping contexts to tags. +.UNINDENT +.INDENT 7.0 +.TP +.B choose_tag(tokens, index, history) +Decide which tag should be used for the specified token, and +return that tag. If this tagger is unable to determine a tag +for the specified token, return None \-\- do not consult +the backoff tagger. This method should be overridden by +subclasses of SequentialBackoffTagger. +.INDENT 7.0 +.TP +.B Return type +str +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP) \-\- The list of words that are being tagged. +.IP \(bu 2 +\fBindex\fP (\fIint\fP) \-\- The index of the word whose tag should be +returned. +.IP \(bu 2 +\fBhistory\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A list of the tags for all words before \fIindex\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract context(tokens, index, history) +.INDENT 7.0 +.TP +.B Returns +the context that should be used to look up the tag +for the specified token; or None if the specified token +should not be handled by this tagger. +.TP +.B Return type +(hashable) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B size() +.INDENT 7.0 +.TP +.B Returns +The number of entries in the table used by this +tagger to map from contexts to tags. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.sequential.DefaultTagger(tag) +Bases: \fI\%nltk.tag.sequential.SequentialBackoffTagger\fP +.sp +A tagger that assigns the same tag to every token. +.sp +.nf +.ft C +>>> from nltk.tag import DefaultTagger +>>> default_tagger = DefaultTagger(\(aqNN\(aq) +>>> list(default_tagger.tag(\(aqThis is a test\(aq.split())) +[(\(aqThis\(aq, \(aqNN\(aq), (\(aqis\(aq, \(aqNN\(aq), (\(aqa\(aq, \(aqNN\(aq), (\(aqtest\(aq, \(aqNN\(aq)] +.ft P +.fi +.sp +This tagger is recommended as a backoff tagger, in cases where +a more powerful tagger is unable to assign a tag to the word +(e.g. because the word was not seen during training). +.INDENT 7.0 +.TP +.B Parameters +\fBtag\fP (\fIstr\fP) \-\- The tag to assign to each token +.UNINDENT +.INDENT 7.0 +.TP +.B choose_tag(tokens, index, history) +Decide which tag should be used for the specified token, and +return that tag. If this tagger is unable to determine a tag +for the specified token, return None \-\- do not consult +the backoff tagger. This method should be overridden by +subclasses of SequentialBackoffTagger. +.INDENT 7.0 +.TP +.B Return type +str +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP) \-\- The list of words that are being tagged. +.IP \(bu 2 +\fBindex\fP (\fIint\fP) \-\- The index of the word whose tag should be +returned. +.IP \(bu 2 +\fBhistory\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A list of the tags for all words before \fIindex\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod decode_json_obj(obj) +.UNINDENT +.INDENT 7.0 +.TP +.B encode_json_obj() +.UNINDENT +.INDENT 7.0 +.TP +.B json_tag = \(aqnltk.tag.sequential.DefaultTagger\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.sequential.NgramTagger(n, train=None, model=None, backoff=None, cutoff=0, verbose=False) +Bases: \fI\%nltk.tag.sequential.ContextTagger\fP +.sp +A tagger that chooses a token\(aqs tag based on its word string and +on the preceding n word\(aqs tags. In particular, a tuple +(tags[i\-n:i\-1], words[i]) is looked up in a table, and the +corresponding tag is returned. N\-gram taggers are typically +trained on a tagged corpus. +.sp +Train a new NgramTagger using the given training data or +the supplied model. In particular, construct a new tagger +whose table maps from each context (tag[i\-n:i\-1], word[i]) +to the most frequent tag for that context. But exclude any +contexts that are already tagged perfectly by the backoff +tagger. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrain\fP \-\- A tagged corpus consisting of a list of tagged +sentences, where each sentence is a list of (word, tag) tuples. +.IP \(bu 2 +\fBbackoff\fP \-\- A backoff tagger, to be used by the new +tagger if it encounters an unknown context. +.IP \(bu 2 +\fBcutoff\fP \-\- If the most likely tag for a context occurs +fewer than \fIcutoff\fP times, then exclude it from the +context\-to\-tag table for the new tagger. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B context(tokens, index, history) +.INDENT 7.0 +.TP +.B Returns +the context that should be used to look up the tag +for the specified token; or None if the specified token +should not be handled by this tagger. +.TP +.B Return type +(hashable) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod decode_json_obj(obj) +.UNINDENT +.INDENT 7.0 +.TP +.B encode_json_obj() +.UNINDENT +.INDENT 7.0 +.TP +.B json_tag = \(aqnltk.tag.sequential.NgramTagger\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.sequential.RegexpTagger(regexps, backoff=None) +Bases: \fI\%nltk.tag.sequential.SequentialBackoffTagger\fP +.sp +Regular Expression Tagger +.sp +The RegexpTagger assigns tags to tokens by comparing their +word strings to a series of regular expressions. The following tagger +uses word suffixes to make guesses about the correct Brown Corpus part +of speech tag: +.sp +.nf +.ft C +>>> from nltk.corpus import brown +>>> from nltk.tag import RegexpTagger +>>> test_sent = brown.sents(categories=\(aqnews\(aq)[0] +>>> regexp_tagger = RegexpTagger( +\&... [(r\(aq^\-?[0\-9]+(.[0\-9]+)?$\(aq, \(aqCD\(aq), # cardinal numbers +\&... (r\(aq(The|the|A|a|An|an)$\(aq, \(aqAT\(aq), # articles +\&... (r\(aq.*able$\(aq, \(aqJJ\(aq), # adjectives +\&... (r\(aq.*ness$\(aq, \(aqNN\(aq), # nouns formed from adjectives +\&... (r\(aq.*ly$\(aq, \(aqRB\(aq), # adverbs +\&... (r\(aq.*s$\(aq, \(aqNNS\(aq), # plural nouns +\&... (r\(aq.*ing$\(aq, \(aqVBG\(aq), # gerunds +\&... (r\(aq.*ed$\(aq, \(aqVBD\(aq), # past tense verbs +\&... (r\(aq.*\(aq, \(aqNN\(aq) # nouns (default) +\&... ]) +>>> regexp_tagger + +>>> regexp_tagger.tag(test_sent) +[(\(aqThe\(aq, \(aqAT\(aq), (\(aqFulton\(aq, \(aqNN\(aq), (\(aqCounty\(aq, \(aqNN\(aq), (\(aqGrand\(aq, \(aqNN\(aq), (\(aqJury\(aq, \(aqNN\(aq), +(\(aqsaid\(aq, \(aqNN\(aq), (\(aqFriday\(aq, \(aqNN\(aq), (\(aqan\(aq, \(aqAT\(aq), (\(aqinvestigation\(aq, \(aqNN\(aq), (\(aqof\(aq, \(aqNN\(aq), +("Atlanta\(aqs", \(aqNNS\(aq), (\(aqrecent\(aq, \(aqNN\(aq), (\(aqprimary\(aq, \(aqNN\(aq), (\(aqelection\(aq, \(aqNN\(aq), +(\(aqproduced\(aq, \(aqVBD\(aq), (\(aq\(ga\(ga\(aq, \(aqNN\(aq), (\(aqno\(aq, \(aqNN\(aq), (\(aqevidence\(aq, \(aqNN\(aq), ("\(aq\(aq", \(aqNN\(aq), +(\(aqthat\(aq, \(aqNN\(aq), (\(aqany\(aq, \(aqNN\(aq), (\(aqirregularities\(aq, \(aqNNS\(aq), (\(aqtook\(aq, \(aqNN\(aq), +(\(aqplace\(aq, \(aqNN\(aq), (\(aq.\(aq, \(aqNN\(aq)] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBregexps\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIstr\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP) \-\- A list of \fB(regexp, tag)\fP pairs, each of +which indicates that a word matching \fBregexp\fP should +be tagged with \fBtag\fP\&. The pairs will be evaluated in +order. If none of the regexps match a word, then the +optional backoff tagger is invoked, else it is +assigned the tag None. +.UNINDENT +.INDENT 7.0 +.TP +.B choose_tag(tokens, index, history) +Decide which tag should be used for the specified token, and +return that tag. If this tagger is unable to determine a tag +for the specified token, return None \-\- do not consult +the backoff tagger. This method should be overridden by +subclasses of SequentialBackoffTagger. +.INDENT 7.0 +.TP +.B Return type +str +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP) \-\- The list of words that are being tagged. +.IP \(bu 2 +\fBindex\fP (\fIint\fP) \-\- The index of the word whose tag should be +returned. +.IP \(bu 2 +\fBhistory\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A list of the tags for all words before \fIindex\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod decode_json_obj(obj) +.UNINDENT +.INDENT 7.0 +.TP +.B encode_json_obj() +.UNINDENT +.INDENT 7.0 +.TP +.B json_tag = \(aqnltk.tag.sequential.RegexpTagger\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.sequential.SequentialBackoffTagger(backoff=None) +Bases: \fI\%nltk.tag.api.TaggerI\fP +.sp +An abstract base class for taggers that tags words sequentially, +left to right. Tagging of individual words is performed by the +\fBchoose_tag()\fP method, which should be defined by subclasses. If +a tagger is unable to determine a tag for the specified token, +then its backoff tagger is consulted. +.INDENT 7.0 +.TP +.B Variables +\fB_taggers\fP \-\- A list of all the taggers that should be tried to +tag a token (i.e., self and its backoff taggers). +.UNINDENT +.INDENT 7.0 +.TP +.B property backoff +The backoff tagger for this tagger. +.UNINDENT +.INDENT 7.0 +.TP +.B abstract choose_tag(tokens, index, history) +Decide which tag should be used for the specified token, and +return that tag. If this tagger is unable to determine a tag +for the specified token, return None \-\- do not consult +the backoff tagger. This method should be overridden by +subclasses of SequentialBackoffTagger. +.INDENT 7.0 +.TP +.B Return type +str +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP) \-\- The list of words that are being tagged. +.IP \(bu 2 +\fBindex\fP (\fIint\fP) \-\- The index of the word whose tag should be +returned. +.IP \(bu 2 +\fBhistory\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A list of the tags for all words before \fIindex\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tag(tokens) +Determine the most appropriate tag sequence for the given +token sequence, and return a corresponding list of tagged +tokens. A tagged token is encoded as a tuple \fB(token, tag)\fP\&. +.INDENT 7.0 +.TP +.B Return type +list(tuple(str, str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tag_one(tokens, index, history) +Determine an appropriate tag for the specified token, and +return that tag. If this tagger is unable to determine a tag +for the specified token, then its backoff tagger is consulted. +.INDENT 7.0 +.TP +.B Return type +str +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP) \-\- The list of words that are being tagged. +.IP \(bu 2 +\fBindex\fP (\fIint\fP) \-\- The index of the word whose tag should be +returned. +.IP \(bu 2 +\fBhistory\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A list of the tags for all words before \fIindex\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.sequential.TrigramTagger(train=None, model=None, backoff=None, cutoff=0, verbose=False) +Bases: \fI\%nltk.tag.sequential.NgramTagger\fP +.sp +A tagger that chooses a token\(aqs tag based its word string and on +the preceding two words\(aq tags. In particular, a tuple consisting +of the previous two tags and the word is looked up in a table, and +the corresponding tag is returned. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrain\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIstr\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP\fI)\fP) \-\- The corpus of training data, a list of tagged sentences +.IP \(bu 2 +\fBmodel\fP (\fIdict\fP) \-\- The tagger model +.IP \(bu 2 +\fBbackoff\fP (\fITaggerI\fP) \-\- Another tagger which this tagger will consult when it is +unable to tag a word +.IP \(bu 2 +\fBcutoff\fP (\fIint\fP) \-\- The number of instances of training data the tagger must see +in order not to use the backoff tagger +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B json_tag = \(aqnltk.tag.sequential.TrigramTagger\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.sequential.UnigramTagger(train=None, model=None, backoff=None, cutoff=0, verbose=False) +Bases: \fI\%nltk.tag.sequential.NgramTagger\fP +.sp +Unigram Tagger +.sp +The UnigramTagger finds the most likely tag for each word in a training +corpus, and then uses that information to assign tags to new tokens. +.sp +.nf +.ft C +>>> from nltk.corpus import brown +>>> from nltk.tag import UnigramTagger +>>> test_sent = brown.sents(categories=\(aqnews\(aq)[0] +>>> unigram_tagger = UnigramTagger(brown.tagged_sents(categories=\(aqnews\(aq)[:500]) +>>> for tok, tag in unigram_tagger.tag(test_sent): +\&... print("({}, {}), ".format(tok, tag)) +(The, AT), (Fulton, NP\-TL), (County, NN\-TL), (Grand, JJ\-TL), +(Jury, NN\-TL), (said, VBD), (Friday, NR), (an, AT), +(investigation, NN), (of, IN), (Atlanta\(aqs, NP$), (recent, JJ), +(primary, NN), (election, NN), (produced, VBD), (\(ga\(ga, \(ga\(ga), +(no, AT), (evidence, NN), (\(aq\(aq, \(aq\(aq), (that, CS), (any, DTI), +(irregularities, NNS), (took, VBD), (place, NN), (., .), +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrain\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIstr\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP\fI)\fP) \-\- The corpus of training data, a list of tagged sentences +.IP \(bu 2 +\fBmodel\fP (\fIdict\fP) \-\- The tagger model +.IP \(bu 2 +\fBbackoff\fP (\fITaggerI\fP) \-\- Another tagger which this tagger will consult when it is +unable to tag a word +.IP \(bu 2 +\fBcutoff\fP (\fIint\fP) \-\- The number of instances of training data the tagger must see +in order not to use the backoff tagger +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B context(tokens, index, history) +.INDENT 7.0 +.TP +.B Returns +the context that should be used to look up the tag +for the specified token; or None if the specified token +should not be handled by this tagger. +.TP +.B Return type +(hashable) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B json_tag = \(aqnltk.tag.sequential.UnigramTagger\(aq +.UNINDENT +.UNINDENT +.SS nltk.tag.stanford module +.sp +A module for interfacing with the Stanford taggers. +.sp +Tagger models need to be downloaded from \fI\%https://nlp.stanford.edu/software\fP +and the STANFORD_MODELS environment variable set (a colon\-separated +list of paths). +.sp +For more details see the documentation for StanfordPOSTagger and StanfordNERTagger. +.INDENT 0.0 +.TP +.B class nltk.tag.stanford.StanfordNERTagger(*args, **kwargs) +Bases: \fI\%nltk.tag.stanford.StanfordTagger\fP +.sp +A class for Named\-Entity Tagging with Stanford Tagger. The input is the paths to: +.INDENT 7.0 +.IP \(bu 2 +a model trained on training data +.IP \(bu 2 +(optionally) the path to the stanford tagger jar file. If not specified here, +then this jar file must be specified in the CLASSPATH envinroment variable. +.IP \(bu 2 +(optionally) the encoding of the training data (default: UTF\-8) +.UNINDENT +.sp +Example: +.sp +.nf +.ft C +>>> from nltk.tag import StanfordNERTagger +>>> st = StanfordNERTagger(\(aqenglish.all.3class.distsim.crf.ser.gz\(aq) +>>> st.tag(\(aqRami Eid is studying at Stony Brook University in NY\(aq.split()) +[(\(aqRami\(aq, \(aqPERSON\(aq), (\(aqEid\(aq, \(aqPERSON\(aq), (\(aqis\(aq, \(aqO\(aq), (\(aqstudying\(aq, \(aqO\(aq), + (\(aqat\(aq, \(aqO\(aq), (\(aqStony\(aq, \(aqORGANIZATION\(aq), (\(aqBrook\(aq, \(aqORGANIZATION\(aq), + (\(aqUniversity\(aq, \(aqORGANIZATION\(aq), (\(aqin\(aq, \(aqO\(aq), (\(aqNY\(aq, \(aqLOCATION\(aq)] +.ft P +.fi +.INDENT 7.0 +.TP +.B parse_output(text, sentences) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.stanford.StanfordPOSTagger(*args, **kwargs) +Bases: \fI\%nltk.tag.stanford.StanfordTagger\fP +.INDENT 7.0 +.TP +.B A class for pos tagging with Stanford Tagger. The input is the paths to: +.INDENT 7.0 +.IP \(bu 2 +a model trained on training data +.IP \(bu 2 +(optionally) the path to the stanford tagger jar file. If not specified here, +then this jar file must be specified in the CLASSPATH envinroment variable. +.IP \(bu 2 +(optionally) the encoding of the training data (default: UTF\-8) +.UNINDENT +.UNINDENT +.sp +Example: +.sp +.nf +.ft C +>>> from nltk.tag import StanfordPOSTagger +>>> st = StanfordPOSTagger(\(aqenglish\-bidirectional\-distsim.tagger\(aq) +>>> st.tag(\(aqWhat is the airspeed of an unladen swallow ?\(aq.split()) +[(\(aqWhat\(aq, \(aqWP\(aq), (\(aqis\(aq, \(aqVBZ\(aq), (\(aqthe\(aq, \(aqDT\(aq), (\(aqairspeed\(aq, \(aqNN\(aq), (\(aqof\(aq, \(aqIN\(aq), (\(aqan\(aq, \(aqDT\(aq), (\(aqunladen\(aq, \(aqJJ\(aq), (\(aqswallow\(aq, \(aqVB\(aq), (\(aq?\(aq, \(aq.\(aq)] +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tag.stanford.StanfordTagger(model_filename, path_to_jar=None, encoding=\(aqutf8\(aq, verbose=False, java_options=\(aq\-mx1000m\(aq) +Bases: \fI\%nltk.tag.api.TaggerI\fP +.sp +An interface to Stanford taggers. Subclasses must define: +.INDENT 7.0 +.IP \(bu 2 +\fB_cmd\fP property: A property that returns the command that will be +executed. +.IP \(bu 2 +\fB_SEPARATOR\fP: Class constant that represents that character that +is used to separate the tokens from their tags. +.IP \(bu 2 +\fB_JAR\fP file: Class constant that represents the jar file name. +.UNINDENT +.INDENT 7.0 +.TP +.B parse_output(text, sentences=None) +.UNINDENT +.INDENT 7.0 +.TP +.B tag(tokens) +Determine the most appropriate tag sequence for the given +token sequence, and return a corresponding list of tagged +tokens. A tagged token is encoded as a tuple \fB(token, tag)\fP\&. +.INDENT 7.0 +.TP +.B Return type +list(tuple(str, str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tag_sents(sentences) +Apply \fBself.tag()\fP to each element of \fIsentences\fP\&. I.e.: +.INDENT 7.0 +.INDENT 3.5 +return [self.tag(sent) for sent in sentences] +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.tag.tnt module +.sp +Implementation of \(aqTnT \- A Statisical Part of Speech Tagger\(aq +by Thorsten Brants +.sp +\fI\%http://acl.ldc.upenn.edu/A/A00/A00\-1031.pdf\fP +.INDENT 0.0 +.TP +.B class nltk.tag.tnt.TnT(unk=None, Trained=False, N=1000, C=False) +Bases: \fI\%nltk.tag.api.TaggerI\fP +.sp +TnT \- Statistical POS tagger +.sp +IMPORTANT NOTES: +.INDENT 7.0 +.IP \(bu 2 +DOES NOT AUTOMATICALLY DEAL WITH UNSEEN WORDS +.INDENT 2.0 +.IP \(bu 2 +It is possible to provide an untrained POS tagger to +create tags for unknown words, see __init__ function +.UNINDENT +.IP \(bu 2 +SHOULD BE USED WITH SENTENCE\-DELIMITED INPUT +.INDENT 2.0 +.IP \(bu 2 +Due to the nature of this tagger, it works best when +trained over sentence delimited input. +.IP \(bu 2 +However it still produces good results if the training +data and testing data are separated on all punctuation eg: [,.?!] +.IP \(bu 2 +Input for training is expected to be a list of sentences +where each sentence is a list of (word, tag) tuples +.IP \(bu 2 +Input for tag function is a single sentence +Input for tagdata function is a list of sentences +Output is of a similar form +.UNINDENT +.IP \(bu 2 +Function provided to process text that is unsegmented +.INDENT 2.0 +.IP \(bu 2 +Please see basic_sent_chop() +.UNINDENT +.UNINDENT +.sp +TnT uses a second order Markov model to produce tags for +a sequence of input, specifically: +.INDENT 7.0 +.INDENT 3.5 +argmax [Proj(P(t_i|t_i\-1,t_i\-2)P(w_i|t_i))] P(t_T+1 | t_T) +.UNINDENT +.UNINDENT +.sp +IE: the maximum projection of a set of probabilities +.sp +The set of possible tags for a given word is derived +from the training data. It is the set of all tags +that exact word has been assigned. +.sp +To speed up and get more precision, we can use log addition +to instead multiplication, specifically: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.TP +.B argmax [Sigma(log(P(t_i|t_i\-1,t_i\-2))+log(P(w_i|t_i)))] + +log(P(t_T+1|t_T)) +.UNINDENT +.UNINDENT +.UNINDENT +.sp +The probability of a tag for a given word is the linear +interpolation of 3 markov models; a zero\-order, first\-order, +and a second order model. +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.TP +.B P(t_i| t_i\-1, t_i\-2) = l1*P(t_i) + l2*P(t_i| t_i\-1) + +l3*P(t_i| t_i\-1, t_i\-2) +.UNINDENT +.UNINDENT +.UNINDENT +.sp +A beam search is used to limit the memory usage of the algorithm. +The degree of the beam can be changed using N in the initialization. +N represents the maximum number of possible solutions to maintain +while tagging. +.sp +It is possible to differentiate the tags which are assigned to +capitalized words. However this does not result in a significant +gain in the accuracy of the results. +.INDENT 7.0 +.TP +.B tag(data) +Tags a single sentence +.INDENT 7.0 +.TP +.B Parameters +\fBdata\fP (\fI[\fP\fIstring\fP\fI,\fP\fI]\fP) \-\- list of words +.TP +.B Returns +[(word, tag),] +.UNINDENT +.sp +Calls recursive function \(aq_tagword\(aq +to produce a list of tags +.sp +Associates the sequence of returned tags +with the correct words in the input sequence +.sp +returns a list of (word, tag) tuples +.UNINDENT +.INDENT 7.0 +.TP +.B tagdata(data) +Tags each sentence in a list of sentences +.sp +:param \fI\%data:list\fP of list of words +:type data: [[string,],] +:return: list of list of (word, tag) tuples +.sp +Invokes tag(sent) function for each sentence +compiles the results into a list of tagged sentences +each tagged sentence is a list of (word, tag) tuples +.UNINDENT +.INDENT 7.0 +.TP +.B train(data) +Uses a set of tagged data to train the tagger. +If an unknown word tagger is specified, +it is trained on the same data. +.INDENT 7.0 +.TP +.B Parameters +\fBdata\fP (\fItuple\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- List of lists of (word, tag) tuples +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.tnt.basic_sent_chop(data, raw=True) +Basic method for tokenizing input into sentences +for this tagger: +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBdata\fP (\fIstr\fP\fI or \fP\fItuple\fP\fI(\fP\fIstr\fP\fI, \fP\fIstr\fP\fI)\fP) \-\- list of tokens (words or (word, tag) tuples) +.IP \(bu 2 +\fBraw\fP (\fIbool\fP) \-\- boolean flag marking the input data +as a list of words or a list of tagged words +.UNINDENT +.TP +.B Returns +list of sentences +sentences are a list of tokens +tokens are the same as the input +.UNINDENT +.sp +Function takes a list of tokens and separates the tokens into lists +where each list represents a sentence fragment +This function can separate both tagged and raw sequences into +basic sentences. +.sp +Sentence markers are the set of [,.!?] +.sp +This is a simple method which enhances the performance of the TnT +tagger. Better sentence tokenization will further enhance the results. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.tnt.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.tnt.demo2() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.tnt.demo3() +.UNINDENT +.SS nltk.tag.util module +.INDENT 0.0 +.TP +.B nltk.tag.util.str2tuple(s, sep=\(aq/\(aq) +Given the string representation of a tagged token, return the +corresponding tuple representation. The rightmost occurrence of +\fIsep\fP in \fIs\fP will be used to divide \fIs\fP into a word string and +a tag string. If \fIsep\fP does not occur in \fIs\fP, return (s, None). +.sp +.nf +.ft C +>>> from nltk.tag.util import str2tuple +>>> str2tuple(\(aqfly/NN\(aq) +(\(aqfly\(aq, \(aqNN\(aq) +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBs\fP (\fIstr\fP) \-\- The string representation of a tagged token. +.IP \(bu 2 +\fBsep\fP (\fIstr\fP) \-\- The separator string used to separate word strings +from tags. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.util.tuple2str(tagged_token, sep=\(aq/\(aq) +Given the tuple representation of a tagged token, return the +corresponding string representation. This representation is +formed by concatenating the token\(aqs word string, followed by the +separator, followed by the token\(aqs tag. (If the tag is None, +then just return the bare word string.) +.sp +.nf +.ft C +>>> from nltk.tag.util import tuple2str +>>> tagged_token = (\(aqfly\(aq, \(aqNN\(aq) +>>> tuple2str(tagged_token) +\(aqfly/NN\(aq +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtagged_token\fP (\fItuple\fP\fI(\fP\fIstr\fP\fI, \fP\fIstr\fP\fI)\fP) \-\- The tuple representation of a tagged token. +.IP \(bu 2 +\fBsep\fP (\fIstr\fP) \-\- The separator string used to separate word strings +from tags. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.util.untag(tagged_sentence) +Given a tagged sentence, return an untagged version of that +sentence. I.e., return a list containing the first element +of each tuple in \fItagged_sentence\fP\&. +.sp +.nf +.ft C +>>> from nltk.tag.util import untag +>>> untag([(\(aqJohn\(aq, \(aqNNP\(aq), (\(aqsaw\(aq, \(aqVBD\(aq), (\(aqMary\(aq, \(aqNNP\(aq)]) +[\(aqJohn\(aq, \(aqsaw\(aq, \(aqMary\(aq] +.ft P +.fi +.UNINDENT +.SS Module contents +.sp +NLTK Taggers +.sp +This package contains classes and interfaces for part\-of\-speech +tagging, or simply "tagging". +.sp +A "tag" is a case\-sensitive string that specifies some property of a token, +such as its part of speech. Tagged tokens are encoded as tuples +\fB(tag, token)\fP\&. For example, the following tagged token combines +the word \fB\(aqfly\(aq\fP with a noun part of speech tag (\fB\(aqNN\(aq\fP): +.sp +.nf +.ft C +>>> tagged_tok = (\(aqfly\(aq, \(aqNN\(aq) +.ft P +.fi +.sp +An off\-the\-shelf tagger is available for English. It uses the Penn Treebank tagset: +.sp +.nf +.ft C +>>> from nltk import pos_tag, word_tokenize +>>> pos_tag(word_tokenize("John\(aqs big idea isn\(aqt all that bad.")) +[(\(aqJohn\(aq, \(aqNNP\(aq), ("\(aqs", \(aqPOS\(aq), (\(aqbig\(aq, \(aqJJ\(aq), (\(aqidea\(aq, \(aqNN\(aq), (\(aqis\(aq, \(aqVBZ\(aq), +("n\(aqt", \(aqRB\(aq), (\(aqall\(aq, \(aqPDT\(aq), (\(aqthat\(aq, \(aqDT\(aq), (\(aqbad\(aq, \(aqJJ\(aq), (\(aq.\(aq, \(aq.\(aq)] +.ft P +.fi +.sp +A Russian tagger is also available if you specify lang="rus". It uses +the Russian National Corpus tagset: +.sp +.nf +.ft C +>>> pos_tag(word_tokenize("Илья оторопел и дважды перечитал бумажку."), lang=\(aqrus\(aq) +[(\(aqИлья\(aq, \(aqS\(aq), (\(aqоторопел\(aq, \(aqV\(aq), (\(aqи\(aq, \(aqCONJ\(aq), (\(aqдважды\(aq, \(aqADV\(aq), (\(aqперечитал\(aq, \(aqV\(aq), +(\(aqбумажку\(aq, \(aqS\(aq), (\(aq.\(aq, \(aqNONLEX\(aq)] +.ft P +.fi +.sp +This package defines several taggers, which take a list of tokens, +assign a tag to each one, and return the resulting list of tagged tokens. +Most of the taggers are built automatically based on a training corpus. +For example, the unigram tagger tags each word \fIw\fP by checking what +the most frequent tag for \fIw\fP was in a training corpus: +.sp +.nf +.ft C +>>> from nltk.corpus import brown +>>> from nltk.tag import UnigramTagger +>>> tagger = UnigramTagger(brown.tagged_sents(categories=\(aqnews\(aq)[:500]) +>>> sent = [\(aqMitchell\(aq, \(aqdecried\(aq, \(aqthe\(aq, \(aqhigh\(aq, \(aqrate\(aq, \(aqof\(aq, \(aqunemployment\(aq] +>>> for word, tag in tagger.tag(sent): +\&... print(word, \(aq\->\(aq, tag) +Mitchell \-> NP +decried \-> None +the \-> AT +high \-> JJ +rate \-> NN +of \-> IN +unemployment \-> None +.ft P +.fi +.sp +Note that words that the tagger has not seen during training receive a tag +of \fBNone\fP\&. +.sp +We evaluate a tagger on data that was not seen during training: +.sp +.nf +.ft C +>>> tagger.evaluate(brown.tagged_sents(categories=\(aqnews\(aq)[500:600]) +0.7... +.ft P +.fi +.sp +For more information, please consult chapter 5 of the NLTK Book. +.sp +isort:skip_file +.INDENT 0.0 +.TP +.B nltk.tag.pos_tag(tokens, tagset=None, lang=\(aqeng\(aq) +Use NLTK\(aqs currently recommended part of speech tagger to +tag the given list of tokens. +.sp +.nf +.ft C +>>> from nltk.tag import pos_tag +>>> from nltk.tokenize import word_tokenize +>>> pos_tag(word_tokenize("John\(aqs big idea isn\(aqt all that bad.")) +[(\(aqJohn\(aq, \(aqNNP\(aq), ("\(aqs", \(aqPOS\(aq), (\(aqbig\(aq, \(aqJJ\(aq), (\(aqidea\(aq, \(aqNN\(aq), (\(aqis\(aq, \(aqVBZ\(aq), +("n\(aqt", \(aqRB\(aq), (\(aqall\(aq, \(aqPDT\(aq), (\(aqthat\(aq, \(aqDT\(aq), (\(aqbad\(aq, \(aqJJ\(aq), (\(aq.\(aq, \(aq.\(aq)] +>>> pos_tag(word_tokenize("John\(aqs big idea isn\(aqt all that bad."), tagset=\(aquniversal\(aq) +[(\(aqJohn\(aq, \(aqNOUN\(aq), ("\(aqs", \(aqPRT\(aq), (\(aqbig\(aq, \(aqADJ\(aq), (\(aqidea\(aq, \(aqNOUN\(aq), (\(aqis\(aq, \(aqVERB\(aq), +("n\(aqt", \(aqADV\(aq), (\(aqall\(aq, \(aqDET\(aq), (\(aqthat\(aq, \(aqDET\(aq), (\(aqbad\(aq, \(aqADJ\(aq), (\(aq.\(aq, \(aq.\(aq)] +.ft P +.fi +.sp +NB. Use \fIpos_tag_sents()\fP for efficient tagging of more than one sentence. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- Sequence of tokens to be tagged +.IP \(bu 2 +\fBtagset\fP (\fIstr\fP) \-\- the tagset to be used, e.g. universal, wsj, brown +.IP \(bu 2 +\fBlang\fP (\fIstr\fP) \-\- the ISO 639 code of the language, e.g. \(aqeng\(aq for English, \(aqrus\(aq for Russian +.UNINDENT +.TP +.B Returns +The tagged tokens +.TP +.B Return type +list(tuple(str, str)) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tag.pos_tag_sents(sentences, tagset=None, lang=\(aqeng\(aq) +Use NLTK\(aqs currently recommended part of speech tagger to tag the +given list of sentences, each consisting of a list of tokens. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsentences\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- List of sentences to be tagged +.IP \(bu 2 +\fBtagset\fP (\fIstr\fP) \-\- the tagset to be used, e.g. universal, wsj, brown +.IP \(bu 2 +\fBlang\fP (\fIstr\fP) \-\- the ISO 639 code of the language, e.g. \(aqeng\(aq for English, \(aqrus\(aq for Russian +.UNINDENT +.TP +.B Returns +The list of tagged sentences +.TP +.B Return type +list(list(tuple(str, str))) +.UNINDENT +.UNINDENT +.SS nltk.tbl package +.SS Submodules +.SS nltk.tbl.api module +.SS nltk.tbl.demo module +.INDENT 0.0 +.TP +.B nltk.tbl.demo.corpus_size(seqs) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tbl.demo.demo() +Run a demo with defaults. See source comments for details, +or docstrings of any of the more specific demo_* functions. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tbl.demo.demo_error_analysis() +Writes a file with context for each erroneous word after tagging testing data +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tbl.demo.demo_generated_templates() +Template.expand and Feature.expand are class methods facilitating +generating large amounts of templates. See their documentation for +details. +.sp +Note: training with 500 templates can easily fill all available +even on relatively small corpora +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tbl.demo.demo_high_accuracy_rules() +Discard rules with low accuracy. This may hurt performance a bit, +but will often produce rules which are more interesting read to a human. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tbl.demo.demo_learning_curve() +Plot a learning curve \-\- the contribution on tagging accuracy of +the individual rules. +Note: requires matplotlib +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tbl.demo.demo_multifeature_template() +Templates can have more than a single feature. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tbl.demo.demo_multiposition_feature() +The feature/s of a template takes a list of positions +relative to the current word where the feature should be +looked for, conceptually joined by logical OR. For instance, +Pos([\-1, 1]), given a value V, will hold whenever V is found +one step to the left and/or one step to the right. +.sp +For contiguous ranges, a 2\-arg form giving inclusive end +points can also be used: Pos(\-3, \-1) is the same as the arg +below. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tbl.demo.demo_repr_rule_format() +Exemplify repr(Rule) (see also str(Rule) and Rule.format("verbose")) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tbl.demo.demo_serialize_tagger() +Serializes the learned tagger to a file in pickle format; reloads it +and validates the process. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tbl.demo.demo_str_rule_format() +Exemplify repr(Rule) (see also str(Rule) and Rule.format("verbose")) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tbl.demo.demo_template_statistics() +Show aggregate statistics per template. Little used templates are +candidates for deletion, much used templates may possibly be refined. +.sp +Deleting unused templates is mostly about saving time and/or space: +training is basically O(T) in the number of templates T +(also in terms of memory usage, which often will be the limiting factor). +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tbl.demo.demo_verbose_rule_format() +Exemplify Rule.format("verbose") +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tbl.demo.postag(templates=None, tagged_data=None, num_sents=1000, max_rules=300, min_score=3, min_acc=None, train=0.8, trace=3, randomize=False, ruleformat=\(aqstr\(aq, incremental_stats=False, template_stats=False, error_output=None, serialize_output=None, learning_curve_output=None, learning_curve_take=300, baseline_backoff_tagger=None, separate_baseline_data=False, cache_baseline_tagger=None) +Brill Tagger Demonstration +:param templates: how many sentences of training and testing data to use +:type templates: list of Template +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtagged_data\fP (\fIC{int}\fP) \-\- maximum number of rule instances to create +.IP \(bu 2 +\fBnum_sents\fP (\fIC{int}\fP) \-\- how many sentences of training and testing data to use +.IP \(bu 2 +\fBmax_rules\fP (\fIC{int}\fP) \-\- maximum number of rule instances to create +.IP \(bu 2 +\fBmin_score\fP (\fIC{int}\fP) \-\- the minimum score for a rule in order for it to be considered +.IP \(bu 2 +\fBmin_acc\fP (\fIC{float}\fP) \-\- the minimum score for a rule in order for it to be considered +.IP \(bu 2 +\fBtrain\fP (\fIC{float}\fP) \-\- the fraction of the the corpus to be used for training (1=all) +.IP \(bu 2 +\fBtrace\fP (\fIC{int}\fP) \-\- the level of diagnostic tracing output to produce (0\-4) +.IP \(bu 2 +\fBrandomize\fP (\fIC{bool}\fP) \-\- whether the training data should be a random subset of the corpus +.IP \(bu 2 +\fBruleformat\fP (\fIC{str}\fP) \-\- rule output format, one of "str", "repr", "verbose" +.IP \(bu 2 +\fBincremental_stats\fP (\fIC{bool}\fP) \-\- if true, will tag incrementally and collect stats for each rule (rather slow) +.IP \(bu 2 +\fBtemplate_stats\fP (\fIC{bool}\fP) \-\- if true, will print per\-template statistics collected in training and (optionally) testing +.IP \(bu 2 +\fBerror_output\fP (\fIC{string}\fP) \-\- the file where errors will be saved +.IP \(bu 2 +\fBserialize_output\fP (\fIC{string}\fP) \-\- the file where the learned tbl tagger will be saved +.IP \(bu 2 +\fBlearning_curve_output\fP (\fIC{string}\fP) \-\- filename of plot of learning curve(s) (train and also test, if available) +.IP \(bu 2 +\fBlearning_curve_take\fP (\fIC{int}\fP) \-\- how many rules plotted +.IP \(bu 2 +\fBbaseline_backoff_tagger\fP (\fItagger\fP) \-\- the file where rules will be saved +.IP \(bu 2 +\fBseparate_baseline_data\fP (\fIC{bool}\fP) \-\- use a fraction of the training data exclusively for training baseline +.IP \(bu 2 +\fBcache_baseline_tagger\fP (\fIC{string}\fP) \-\- cache baseline tagger to this file (only interesting as a temporary workaround to get +deterministic output from the baseline unigram tagger between python versions) +.UNINDENT +.UNINDENT +.sp +Note on separate_baseline_data: if True, reuse training data both for baseline and rule learner. This +is fast and fine for a demo, but is likely to generalize worse on unseen data. +Also cannot be sensibly used for learning curves on training data (the baseline will be artificially high). +.UNINDENT +.SS nltk.tbl.erroranalysis module +.INDENT 0.0 +.TP +.B nltk.tbl.erroranalysis.error_list(train_sents, test_sents) +Returns a list of human\-readable strings indicating the errors in the +given tagging of the corpus. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtrain_sents\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI)\fP) \-\- The correct tagging of the corpus +.IP \(bu 2 +\fBtest_sents\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI)\fP) \-\- The tagged corpus +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.tbl.feature module +.INDENT 0.0 +.TP +.B class nltk.tbl.feature.Feature(positions, end=None) +Bases: \fBobject\fP +.sp +An abstract base class for Features. A Feature is a combination of +a specific property\-computing method and a list of relative positions +to apply that method to. +.sp +The property\-computing method, M{extract_property(tokens, index)}, +must be implemented by every subclass. It extracts or computes a specific +property for the token at the current index. Typical extract_property() +methods return features such as the token text or tag; but more involved +methods may consider the entire sequence M{tokens} and +for instance compute the length of the sentence the token belongs to. +.sp +In addition, the subclass may have a PROPERTY_NAME, which is how +it will be printed (in Rules and Templates, etc). If not given, defaults +to the classname. +.INDENT 7.0 +.TP +.B PROPERTY_NAME = None +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod decode_json_obj(obj) +.UNINDENT +.INDENT 7.0 +.TP +.B encode_json_obj() +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod expand(starts, winlens, excludezero=False) +Return a list of features, one for each start point in starts +and for each window length in winlen. If excludezero is True, +no Features containing 0 in its positions will be generated +(many tbl trainers have a special representation for the +target feature at [0]) +.sp +For instance, importing a concrete subclass (Feature is abstract) +>>> from nltk.tag.brill import Word +.sp +First argument gives the possible start positions, second the +possible window lengths +>>> Word.expand([\-3,\-2,\-1], [1]) +[Word([\-3]), Word([\-2]), Word([\-1])] +.sp +.nf +.ft C +>>> Word.expand([\-2,\-1], [1]) +[Word([\-2]), Word([\-1])] +.ft P +.fi +.sp +.nf +.ft C +>>> Word.expand([\-3,\-2,\-1], [1,2]) +[Word([\-3]), Word([\-2]), Word([\-1]), Word([\-3, \-2]), Word([\-2, \-1])] +.ft P +.fi +.sp +.nf +.ft C +>>> Word.expand([\-2,\-1], [1]) +[Word([\-2]), Word([\-1])] +.ft P +.fi +.sp +a third optional argument excludes all Features whose positions contain zero +>>> Word.expand([\-2,\-1,0], [1,2], excludezero=False) +[Word([\-2]), Word([\-1]), Word([0]), Word([\-2, \-1]), Word([\-1, 0])] +.sp +.nf +.ft C +>>> Word.expand([\-2,\-1,0], [1,2], excludezero=True) +[Word([\-2]), Word([\-1]), Word([\-2, \-1])] +.ft P +.fi +.sp +All window lengths must be positive +>>> Word.expand([\-2,\-1], [0]) +Traceback (most recent call last): +.INDENT 7.0 +.INDENT 3.5 +File "", line 1, in +File "nltk/tag/tbl/template.py", line 371, in expand +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.TP +.B param starts +where to start looking for Feature +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.sp +ValueError: non\-positive window length in [0] +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBstarts\fP (\fIlist of ints\fP) \-\- where to start looking for Feature +.IP \(bu 2 +\fBwinlens\fP \-\- window lengths where to look for Feature +.IP \(bu 2 +\fBexcludezero\fP (\fIbool\fP) \-\- do not output any Feature with 0 in any of its positions. +.UNINDENT +.TP +.B Returns +list of Features +.TP +.B Raises +\fBValueError\fP \-\- for non\-positive window lengths +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract static extract_property(tokens, index) +Any subclass of Feature must define static method extract_property(tokens, index) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist of tokens\fP) \-\- the sequence of tokens +.IP \(bu 2 +\fBindex\fP (\fIint\fP) \-\- the current index +.UNINDENT +.TP +.B Returns +feature value +.TP +.B Return type +any (but usually scalar) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B intersects(other) +Return True if the positions of this Feature intersects with those of other +.sp +More precisely, return True if this feature refers to the same property as other; +and there is some overlap in the positions they look at. +.sp +#For instance, importing a concrete subclass (Feature is abstract) +>>> from nltk.tag.brill import Word, Pos +.sp +.nf +.ft C +>>> Word([\-3,\-2,\-1]).intersects(Word([\-3,\-2])) +True +.ft P +.fi +.sp +.nf +.ft C +>>> Word([\-3,\-2,\-1]).intersects(Word([\-3,\-2, 0])) +True +.ft P +.fi +.sp +.nf +.ft C +>>> Word([\-3,\-2,\-1]).intersects(Word([0])) +False +.ft P +.fi +.sp +#Feature subclasses must agree +>>> Word([\-3,\-2,\-1]).intersects(Pos([\-3,\-2])) +False +.INDENT 7.0 +.TP +.B Parameters +\fBother\fP (\fI(\fP\fIsubclass of\fP\fI) \fP\fIFeature\fP) \-\- feature with which to compare +.TP +.B Returns +True if feature classes agree and there is some overlap in the positions they look at +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B issuperset(other) +Return True if this Feature always returns True when other does +.sp +More precisely, return True if this feature refers to the same property as other; +and this Feature looks at all positions that other does (and possibly +other positions in addition). +.sp +#For instance, importing a concrete subclass (Feature is abstract) +>>> from nltk.tag.brill import Word, Pos +.sp +.nf +.ft C +>>> Word([\-3,\-2,\-1]).issuperset(Word([\-3,\-2])) +True +.ft P +.fi +.sp +.nf +.ft C +>>> Word([\-3,\-2,\-1]).issuperset(Word([\-3,\-2, 0])) +False +.ft P +.fi +.sp +#Feature subclasses must agree +>>> Word([\-3,\-2,\-1]).issuperset(Pos([\-3,\-2])) +False +.INDENT 7.0 +.TP +.B Parameters +\fBother\fP (\fI(\fP\fIsubclass of\fP\fI) \fP\fIFeature\fP) \-\- feature with which to compare +.TP +.B Returns +True if this feature is superset, otherwise False +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B json_tag = \(aqnltk.tbl.Feature\(aq +.UNINDENT +.UNINDENT +.SS nltk.tbl.rule module +.INDENT 0.0 +.TP +.B class nltk.tbl.rule.Rule(templateid, original_tag, replacement_tag, conditions) +Bases: \fI\%nltk.tbl.rule.TagRule\fP +.sp +A Rule checks the current corpus position for a certain set of conditions; +if they are all fulfilled, the Rule is triggered, meaning that it +will change tag A to tag B. For other tags than A, nothing happens. +.sp +The conditions are parameters to the Rule instance. Each condition is a feature\-value pair, +with a set of positions to check for the value of the corresponding feature. +Conceptually, the positions are joined by logical OR, and the feature set by logical AND. +.sp +More formally, the Rule is then applicable to the M{n}th token iff: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +The M{n}th token is tagged with the Rule\(aqs original tag; and +.IP \(bu 2 +For each (Feature(positions), M{value}) tuple: +\- The value of Feature of at least one token in {n+p for p in positions} +.INDENT 2.0 +.INDENT 3.5 +is M{value}. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B applies(tokens, index) +.INDENT 7.0 +.TP +.B Returns +True if the rule would change the tag of +\fBtokens[index]\fP, False otherwise +.TP +.B Return type +bool +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A tagged sentence +.IP \(bu 2 +\fBindex\fP (\fIint\fP) \-\- The index to check +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod decode_json_obj(obj) +.UNINDENT +.INDENT 7.0 +.TP +.B encode_json_obj() +.UNINDENT +.INDENT 7.0 +.TP +.B format(fmt) +Return a string representation of this rule. +.sp +.nf +.ft C +>>> from nltk.tbl.rule import Rule +>>> from nltk.tag.brill import Pos +.ft P +.fi +.sp +.nf +.ft C +>>> r = Rule("23", "VB", "NN", [(Pos([\-2,\-1]), \(aqDT\(aq)]) +.ft P +.fi +.sp +r.format("str") == str(r) +True +>>> r.format("str") +\(aqVB\->NN if Pos:DT@[\-2,\-1]\(aq +.sp +r.format("repr") == repr(r) +True +>>> r.format("repr") +"Rule(\(aq23\(aq, \(aqVB\(aq, \(aqNN\(aq, [(Pos([\-2, \-1]),\(aqDT\(aq)])" +.sp +.nf +.ft C +>>> r.format("verbose") +\(aqVB \-> NN if the Pos of words i\-2...i\-1 is "DT"\(aq +.ft P +.fi +.sp +.nf +.ft C +>>> r.format("not_found") +Traceback (most recent call last): + File "", line 1, in + File "nltk/tbl/rule.py", line 256, in format + raise ValueError("unknown rule format spec: {0}".format(fmt)) +ValueError: unknown rule format spec: not_found +>>> +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBfmt\fP (\fIstr\fP) \-\- format specification +.TP +.B Returns +string representation +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B json_tag = \(aqnltk.tbl.Rule\(aq +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tbl.rule.TagRule(original_tag, replacement_tag) +Bases: \fBobject\fP +.sp +An interface for tag transformations on a tagged corpus, as +performed by tbl taggers. Each transformation finds all tokens +in the corpus that are tagged with a specific original tag and +satisfy a specific condition, and replaces their tags with a +replacement tag. For any given transformation, the original +tag, replacement tag, and condition are fixed. Conditions may +depend on the token under consideration, as well as any other +tokens in the corpus. +.sp +Tag rules must be comparable and hashable. +.INDENT 7.0 +.TP +.B abstract applies(tokens, index) +.INDENT 7.0 +.TP +.B Returns +True if the rule would change the tag of +\fBtokens[index]\fP, False otherwise +.TP +.B Return type +bool +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A tagged sentence +.IP \(bu 2 +\fBindex\fP (\fIint\fP) \-\- The index to check +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B apply(tokens, positions=None) +Apply this rule at every position in positions where it +applies to the given sentence. I.e., for each position p +in \fIpositions\fP, if \fItokens[p]\fP is tagged with this rule\(aqs +original tag, and satisfies this rule\(aqs condition, then set +its tag to be this rule\(aqs replacement tag. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI(\fP\fIstr\fP\fI, \fP\fIstr\fP\fI)\fP\fI)\fP) \-\- The tagged sentence +.IP \(bu 2 +\fBpositions\fP (\fIlist\fP\fI(\fP\fIint\fP\fI)\fP) \-\- The positions where the transformation is to +be tried. If not specified, try it at all positions. +.UNINDENT +.TP +.B Returns +The indices of tokens whose tags were changed by this +rule. +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B original_tag +The tag which this TagRule may cause to be replaced. +.UNINDENT +.INDENT 7.0 +.TP +.B replacement_tag +The tag with which this TagRule may replace another tag. +.UNINDENT +.UNINDENT +.SS nltk.tbl.template module +.INDENT 0.0 +.TP +.B class nltk.tbl.template.BrillTemplateI +Bases: \fBobject\fP +.sp +An interface for generating lists of transformational rules that +apply at given sentence positions. \fBBrillTemplateI\fP is used by +\fBBrill\fP training algorithms to generate candidate rules. +.INDENT 7.0 +.TP +.B abstract applicable_rules(tokens, i, correctTag) +Return a list of the transformational rules that would correct +the \fIi*th subtoken\(aqs tag in the given token. In particular, +return a list of zero or more rules that would change +*tokens*[i][1] to *correctTag\fP, if applied to +.nf +* +.fi +token*[i]. +.sp +If the +.nf +* +.fi +i*th token already has the correct tag (i.e., if +tagged_tokens[i][1] == correctTag), then +\fBapplicable_rules()\fP should return the empty list. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI)\fP) \-\- The tagged tokens being tagged. +.IP \(bu 2 +\fBi\fP (\fIint\fP) \-\- The index of the token whose tag should be corrected. +.IP \(bu 2 +\fBcorrectTag\fP (\fIany\fP) \-\- The correct tag for the +.nf +* +.fi +i*th token. +.UNINDENT +.TP +.B Return type +list(BrillRule) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract get_neighborhood(token, index) +Returns the set of indices \fIi\fP such that +\fBapplicable_rules(token, i, ...)\fP depends on the value of +the \fIindex*th token of *token\fP\&. +.sp +This method is used by the "fast" Brill tagger trainer. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtoken\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI)\fP) \-\- The tokens being tagged. +.IP \(bu 2 +\fBindex\fP (\fIint\fP) \-\- The index whose neighborhood should be returned. +.UNINDENT +.TP +.B Return type +set +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tbl.template.Template(*features) +Bases: \fI\%nltk.tbl.template.BrillTemplateI\fP +.sp +A tbl Template that generates a list of L{Rule}s that apply at a given sentence +position. In particular, each C{Template} is parameterized by a list of +independent features (a combination of a specific +property to extract and a list C{L} of relative positions at which to extract +it) and generates all Rules that: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +use the given features, each at its own independent position; and +.IP \(bu 2 +are applicable to the given token. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B ALLTEMPLATES = [] +.UNINDENT +.INDENT 7.0 +.TP +.B applicable_rules(tokens, index, correct_tag) +Return a list of the transformational rules that would correct +the \fIi*th subtoken\(aqs tag in the given token. In particular, +return a list of zero or more rules that would change +*tokens*[i][1] to *correctTag\fP, if applied to +.nf +* +.fi +token*[i]. +.sp +If the +.nf +* +.fi +i*th token already has the correct tag (i.e., if +tagged_tokens[i][1] == correctTag), then +\fBapplicable_rules()\fP should return the empty list. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI)\fP) \-\- The tagged tokens being tagged. +.IP \(bu 2 +\fBi\fP (\fIint\fP) \-\- The index of the token whose tag should be corrected. +.IP \(bu 2 +\fBcorrectTag\fP (\fIany\fP) \-\- The correct tag for the +.nf +* +.fi +i*th token. +.UNINDENT +.TP +.B Return type +list(BrillRule) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod expand(featurelists, combinations=None, skipintersecting=True) +Factory method to mass generate Templates from a list L of lists of Features. +.sp +#With combinations=(k1, k2), the function will in all possible ways choose k1 ... k2 +#of the sublists in L; it will output all Templates formed by the Cartesian product +#of this selection, with duplicates and other semantically equivalent +#forms removed. Default for combinations is (1, len(L)). +.sp +The feature lists may have been specified +manually, or generated from Feature.expand(). For instance, +.sp +.nf +.ft C +>>> from nltk.tbl.template import Template +>>> from nltk.tag.brill import Word, Pos +.ft P +.fi +.sp +#creating some features +>>> (wd_0, wd_01) = (Word([0]), Word([0,1])) +.sp +.nf +.ft C +>>> (pos_m2, pos_m33) = (Pos([\-2]), Pos([3\-2,\-1,0,1,2,3])) +.ft P +.fi +.sp +.nf +.ft C +>>> list(Template.expand([[wd_0], [pos_m2]])) +[Template(Word([0])), Template(Pos([\-2])), Template(Pos([\-2]),Word([0]))] +.ft P +.fi +.sp +.nf +.ft C +>>> list(Template.expand([[wd_0, wd_01], [pos_m2]])) +[Template(Word([0])), Template(Word([0, 1])), Template(Pos([\-2])), Template(Pos([\-2]),Word([0])), Template(Pos([\-2]),Word([0, 1]))] +.ft P +.fi +.sp +#note: with Feature.expand(), it is very easy to generate more templates +#than your system can handle \-\- for instance, +>>> wordtpls = Word.expand([\-2,\-1,0,1], [1,2], excludezero=False) +>>> len(wordtpls) +7 +.sp +.nf +.ft C +>>> postpls = Pos.expand([\-3,\-2,\-1,0,1,2], [1,2,3], excludezero=True) +>>> len(postpls) +9 +.ft P +.fi +.sp +#and now the Cartesian product of all non\-empty combinations of two wordtpls and +#two postpls, with semantic equivalents removed +>>> templates = list(Template.expand([wordtpls, wordtpls, postpls, postpls])) +>>> len(templates) +713 +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.TP +.B will return a list of eight templates +Template(Word([0])), +Template(Word([0, 1])), +Template(Pos([\-2])), +Template(Pos([\-1])), +Template(Pos([\-2]),Word([0])), +Template(Pos([\-1]),Word([0])), +Template(Pos([\-2]),Word([0, 1])), +Template(Pos([\-1]),Word([0, 1]))] +.UNINDENT +.UNINDENT +.UNINDENT +.sp +#Templates where one feature is a subset of another, such as +#Template(Word([0,1]), Word([1]), will not appear in the output. +#By default, this non\-subset constraint is tightened to disjointness: +#Templates of type Template(Word([0,1]), Word([1,2]) will also be filtered out. +#With skipintersecting=False, then such Templates are allowed +.sp +WARNING: this method makes it very easy to fill all your memory when training +generated templates on any real\-world corpus +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfeaturelists\fP (\fIlist of\fP\fI (\fP\fIlist of Features\fP\fI)\fP) \-\- lists of Features, whose Cartesian product will return a set of Templates +.IP \(bu 2 +\fBcombinations\fP (\fINone\fP\fI, \fP\fIint\fP\fI, or \fP\fI(\fP\fIint\fP\fI, \fP\fIint\fP\fI)\fP) \-\- given n featurelists: if combinations=k, all generated Templates will have +k features; if combinations=(k1,k2) they will have k1..k2 features; if None, defaults to 1..n +.IP \(bu 2 +\fBskipintersecting\fP (\fIbool\fP) \-\- if True, do not output intersecting Templates (non\-disjoint positions for some feature) +.UNINDENT +.TP +.B Returns +generator of Templates +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B get_neighborhood(tokens, index) +Returns the set of indices \fIi\fP such that +\fBapplicable_rules(token, i, ...)\fP depends on the value of +the \fIindex*th token of *token\fP\&. +.sp +This method is used by the "fast" Brill tagger trainer. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtoken\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI)\fP) \-\- The tokens being tagged. +.IP \(bu 2 +\fBindex\fP (\fIint\fP) \-\- The index whose neighborhood should be returned. +.UNINDENT +.TP +.B Return type +set +.UNINDENT +.UNINDENT +.UNINDENT +.SS Module contents +.sp +Transformation Based Learning +.sp +A general purpose package for Transformation Based Learning, +currently used by nltk.tag.BrillTagger. +.sp +isort:skip_file +.SS nltk.test package +.SS Subpackages +.SS nltk.test.unit package +.SS Subpackages +.SS nltk.test.unit.lm package +.SS Submodules +.SS nltk.test.unit.lm.test_counter module +.SS nltk.test.unit.lm.test_models module +.SS nltk.test.unit.lm.test_preprocessing module +.INDENT 0.0 +.TP +.B class nltk.test.unit.lm.test_preprocessing.TestPreprocessing(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_padded_everygram_pipeline() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.lm.test_vocabulary module +.INDENT 0.0 +.TP +.B class nltk.test.unit.lm.test_vocabulary.NgramModelVocabularyTests(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.sp +tests Vocabulary Class +.INDENT 7.0 +.TP +.B classmethod setUpClass() +.UNINDENT +.INDENT 7.0 +.TP +.B test_counts_set_correctly() +.UNINDENT +.INDENT 7.0 +.TP +.B test_creation_with_counter() +.UNINDENT +.INDENT 7.0 +.TP +.B test_cutoff_setter_checks_value() +.UNINDENT +.INDENT 7.0 +.TP +.B test_cutoff_value_set_correctly() +.UNINDENT +.INDENT 7.0 +.TP +.B test_eqality() +.UNINDENT +.INDENT 7.0 +.TP +.B test_len_is_constant() +.UNINDENT +.INDENT 7.0 +.TP +.B test_lookup() +.UNINDENT +.INDENT 7.0 +.TP +.B test_lookup_None() +.UNINDENT +.INDENT 7.0 +.TP +.B test_lookup_empty_iterables() +.UNINDENT +.INDENT 7.0 +.TP +.B test_lookup_empty_str() +.UNINDENT +.INDENT 7.0 +.TP +.B test_lookup_int() +.UNINDENT +.INDENT 7.0 +.TP +.B test_lookup_iterables() +.UNINDENT +.INDENT 7.0 +.TP +.B test_lookup_recursive() +.UNINDENT +.INDENT 7.0 +.TP +.B test_membership_check_respects_cutoff() +.UNINDENT +.INDENT 7.0 +.TP +.B test_str() +.UNINDENT +.INDENT 7.0 +.TP +.B test_truthiness() +.UNINDENT +.INDENT 7.0 +.TP +.B test_unable_to_change_cutoff() +.UNINDENT +.INDENT 7.0 +.TP +.B test_update_empty_vocab() +.UNINDENT +.INDENT 7.0 +.TP +.B test_vocab_iter_respects_cutoff() +.UNINDENT +.INDENT 7.0 +.TP +.B test_vocab_len_respects_cutoff() +.UNINDENT +.UNINDENT +.SS Module contents +.SS nltk.test.unit.translate package +.SS Submodules +.SS nltk.test.unit.translate.test_bleu module +.sp +Tests for BLEU translation evaluation metric +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_bleu.TestBLEU(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_brevity_penalty() +.UNINDENT +.INDENT 7.0 +.TP +.B test_full_matches() +.UNINDENT +.INDENT 7.0 +.TP +.B test_modified_precision() +Examples from the original BLEU paper +\fI\%http://www.aclweb.org/anthology/P02\-1040.pdf\fP +.UNINDENT +.INDENT 7.0 +.TP +.B test_partial_matches_hypothesis_longer_than_reference() +.UNINDENT +.INDENT 7.0 +.TP +.B test_zero_matches() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_bleu.TestBLEUFringeCases(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_case_where_n_is_bigger_than_hypothesis_length() +.UNINDENT +.INDENT 7.0 +.TP +.B test_empty_hypothesis() +.UNINDENT +.INDENT 7.0 +.TP +.B test_empty_references() +.UNINDENT +.INDENT 7.0 +.TP +.B test_empty_references_and_hypothesis() +.UNINDENT +.INDENT 7.0 +.TP +.B test_reference_or_hypothesis_shorter_than_fourgrams() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_bleu.TestBLEUWithBadSentence(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_corpus_bleu_with_bad_sentence() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_bleu.TestBLEUvsMteval13a(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_corpus_bleu() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.translate.test_gdfa module +.sp +Tests GDFA alignments +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_gdfa.TestGDFA(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_from_eflomal_outputs() +Testing GDFA with first 10 eflomal outputs from issue #1829 +\fI\%https://github.com/nltk/nltk/issues/1829\fP +.UNINDENT +.UNINDENT +.SS nltk.test.unit.translate.test_ibm1 module +.sp +Tests for IBM Model 1 training methods +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_ibm1.TestIBMModel1(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_prob_t_a_given_s() +.UNINDENT +.INDENT 7.0 +.TP +.B test_set_uniform_translation_probabilities() +.UNINDENT +.INDENT 7.0 +.TP +.B test_set_uniform_translation_probabilities_of_non_domain_values() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.translate.test_ibm2 module +.sp +Tests for IBM Model 2 training methods +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_ibm2.TestIBMModel2(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_prob_t_a_given_s() +.UNINDENT +.INDENT 7.0 +.TP +.B test_set_uniform_alignment_probabilities() +.UNINDENT +.INDENT 7.0 +.TP +.B test_set_uniform_alignment_probabilities_of_non_domain_values() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.translate.test_ibm3 module +.sp +Tests for IBM Model 3 training methods +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_ibm3.TestIBMModel3(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_prob_t_a_given_s() +.UNINDENT +.INDENT 7.0 +.TP +.B test_set_uniform_distortion_probabilities() +.UNINDENT +.INDENT 7.0 +.TP +.B test_set_uniform_distortion_probabilities_of_non_domain_values() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.translate.test_ibm4 module +.sp +Tests for IBM Model 4 training methods +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_ibm4.TestIBMModel4(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_prob_t_a_given_s() +.UNINDENT +.INDENT 7.0 +.TP +.B test_set_uniform_distortion_probabilities_of_max_displacements() +.UNINDENT +.INDENT 7.0 +.TP +.B test_set_uniform_distortion_probabilities_of_non_domain_values() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.translate.test_ibm5 module +.sp +Tests for IBM Model 5 training methods +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_ibm5.TestIBMModel5(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_prob_t_a_given_s() +.UNINDENT +.INDENT 7.0 +.TP +.B test_prune() +.UNINDENT +.INDENT 7.0 +.TP +.B test_set_uniform_vacancy_probabilities_of_max_displacements() +.UNINDENT +.INDENT 7.0 +.TP +.B test_set_uniform_vacancy_probabilities_of_non_domain_values() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.translate.test_ibm_model module +.sp +Tests for common methods of IBM translation models +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_ibm_model.TestIBMModel(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_best_model2_alignment() +.UNINDENT +.INDENT 7.0 +.TP +.B test_best_model2_alignment_does_not_change_pegged_alignment() +.UNINDENT +.INDENT 7.0 +.TP +.B test_best_model2_alignment_handles_empty_src_sentence() +.UNINDENT +.INDENT 7.0 +.TP +.B test_best_model2_alignment_handles_empty_trg_sentence() +.UNINDENT +.INDENT 7.0 +.TP +.B test_best_model2_alignment_handles_fertile_words() +.UNINDENT +.INDENT 7.0 +.TP +.B test_hillclimb() +.UNINDENT +.INDENT 7.0 +.TP +.B test_neighboring_finds_neighbor_alignments() +.UNINDENT +.INDENT 7.0 +.TP +.B test_neighboring_returns_neighbors_with_pegged_alignment() +.UNINDENT +.INDENT 7.0 +.TP +.B test_neighboring_sets_neighbor_alignment_info() +.UNINDENT +.INDENT 7.0 +.TP +.B test_sample() +.UNINDENT +.INDENT 7.0 +.TP +.B test_vocabularies_are_initialized() +.UNINDENT +.INDENT 7.0 +.TP +.B test_vocabularies_are_initialized_even_with_empty_corpora() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.translate.test_meteor module +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_meteor.TestMETEOR(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_preprocess() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.translate.test_nist module +.sp +Tests for NIST translation evaluation metric +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_nist.TestNIST(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_sentence_nist() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.translate.test_stack_decoder module +.sp +Tests for stack decoder +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_stack_decoder.TestHypothesis(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B setUp() +Hook method for setting up the test fixture before exercising it. +.UNINDENT +.INDENT 7.0 +.TP +.B test_total_translated_words() +.UNINDENT +.INDENT 7.0 +.TP +.B test_translated_positions() +.UNINDENT +.INDENT 7.0 +.TP +.B test_translation_so_far() +.UNINDENT +.INDENT 7.0 +.TP +.B test_translation_so_far_for_empty_hypothesis() +.UNINDENT +.INDENT 7.0 +.TP +.B test_untranslated_spans() +.UNINDENT +.INDENT 7.0 +.TP +.B test_untranslated_spans_for_empty_hypothesis() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_stack_decoder.TestStack(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_best_returns_none_when_stack_is_empty() +.UNINDENT +.INDENT 7.0 +.TP +.B test_best_returns_the_best_hypothesis() +.UNINDENT +.INDENT 7.0 +.TP +.B test_push_bumps_off_worst_hypothesis_when_stack_is_full() +.UNINDENT +.INDENT 7.0 +.TP +.B test_push_does_not_add_hypothesis_that_falls_below_beam_threshold() +.UNINDENT +.INDENT 7.0 +.TP +.B test_push_removes_hypotheses_that_fall_below_beam_threshold() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.test.unit.translate.test_stack_decoder.TestStackDecoder(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B static create_fake_language_model() +.UNINDENT +.INDENT 7.0 +.TP +.B static create_fake_phrase_table() +.UNINDENT +.INDENT 7.0 +.TP +.B test_compute_future_costs() +.UNINDENT +.INDENT 7.0 +.TP +.B test_compute_future_costs_for_phrases_not_in_phrase_table() +.UNINDENT +.INDENT 7.0 +.TP +.B test_distortion_score() +.UNINDENT +.INDENT 7.0 +.TP +.B test_distortion_score_of_first_expansion() +.UNINDENT +.INDENT 7.0 +.TP +.B test_find_all_src_phrases() +.UNINDENT +.INDENT 7.0 +.TP +.B test_future_score() +.UNINDENT +.INDENT 7.0 +.TP +.B test_valid_phrases() +.UNINDENT +.UNINDENT +.SS Module contents +.SS Submodules +.SS nltk.test.unit.test_aline module +.sp +Test Aline algorithm for aligning phonetic sequences +.INDENT 0.0 +.TP +.B nltk.test.unit.test_aline.test_aline() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.test.unit.test_aline.test_aline_delta() +Test aline for computing the difference between two segments +.UNINDENT +.SS nltk.test.unit.test_brill module +.sp +Tests for Brill tagger. +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_brill.TestBrill(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_brill_demo() +.UNINDENT +.INDENT 7.0 +.TP +.B test_pos_template() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.test_cfd_mutation module +.SS nltk.test.unit.test_cfg2chomsky module +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_cfg2chomsky.ChomskyNormalFormForCFGTest(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_complex() +.UNINDENT +.INDENT 7.0 +.TP +.B test_simple() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.test_chunk module +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_chunk.TestChunkRule(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_tag_pattern2re_pattern_quantifier() +Test for bug \fI\%https://github.com/nltk/nltk/issues/1597\fP +.sp +Ensures that curly bracket quantifiers can be used inside a chunk rule. +This type of quantifier has been used for the supplementary example +in \fI\%http://www.nltk.org/book/ch07.html#exploring\-text\-corpora\fP\&. +.UNINDENT +.UNINDENT +.SS nltk.test.unit.test_classify module +.SS nltk.test.unit.test_collocations module +.INDENT 0.0 +.TP +.B nltk.test.unit.test_collocations.close_enough(x, y) +Verify that two sequences of n\-gram association values are within +_EPSILON of each other. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.test.unit.test_collocations.test_bigram2() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.test.unit.test_collocations.test_bigram3() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.test.unit.test_collocations.test_bigram5() +.UNINDENT +.SS nltk.test.unit.test_concordance module +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_concordance.TestConcordance(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.sp +Text constructed using: \fI\%http://www.nltk.org/book/ch01.html\fP +.INDENT 7.0 +.TP +.B setUp() +Hook method for setting up the test fixture before exercising it. +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod setUpClass() +.UNINDENT +.INDENT 7.0 +.TP +.B tearDown() +Hook method for deconstructing the test fixture after testing it. +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod tearDownClass() +.UNINDENT +.INDENT 7.0 +.TP +.B test_concordance_lines() +.UNINDENT +.INDENT 7.0 +.TP +.B test_concordance_list() +.UNINDENT +.INDENT 7.0 +.TP +.B test_concordance_print() +.UNINDENT +.INDENT 7.0 +.TP +.B test_concordance_width() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.test.unit.test_concordance.stdout_redirect(where) +.UNINDENT +.SS nltk.test.unit.test_corenlp module +.SS nltk.test.unit.test_corpora module +.SS nltk.test.unit.test_corpus_views module +.sp +Corpus View Regression Tests +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_corpus_views.TestCorpusViews(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B data() +.UNINDENT +.INDENT 7.0 +.TP +.B linetok = +.UNINDENT +.INDENT 7.0 +.TP +.B names = [\(aqcorpora/inaugural/README\(aq, \(aqcorpora/inaugural/1793\-Washington.txt\(aq, \(aqcorpora/inaugural/1909\-Taft.txt\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B test_correct_length() +.UNINDENT +.INDENT 7.0 +.TP +.B test_correct_values() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.test_data module +.SS nltk.test.unit.test_disagreement module +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_disagreement.TestDisagreement(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.sp +Class containing unit tests for nltk.metrics.agreement.Disagreement. +.INDENT 7.0 +.TP +.B test_advanced() +More advanced test, based on +\fI\%http://www.agreestat.com/research_papers/onkrippendorffalpha.pdf\fP +.UNINDENT +.INDENT 7.0 +.TP +.B test_advanced2() +Same more advanced example, but with 1 rating removed. +Again, removal of that 1 rating should not matter. +.UNINDENT +.INDENT 7.0 +.TP +.B test_easy() +Simple test, based on +\fI\%https://github.com/foolswood/krippendorffs_alpha/raw/master/krippendorff.pdf\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B test_easy2() +Same simple test with 1 rating removed. +Removal of that rating should not matter: K\-Apha ignores items with +only 1 rating. +.UNINDENT +.UNINDENT +.SS nltk.test.unit.test_distance module +.SS nltk.test.unit.test_freqdist module +.INDENT 0.0 +.TP +.B nltk.test.unit.test_freqdist.test_iterating_returns_an_iterator_ordered_by_frequency() +.UNINDENT +.SS nltk.test.unit.test_hmm module +.SS nltk.test.unit.test_json2csv_corpus module +.SS nltk.test.unit.test_json_serialization module +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_json_serialization.TestJSONSerialization(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B setUp() +Hook method for setting up the test fixture before exercising it. +.UNINDENT +.INDENT 7.0 +.TP +.B test_affix_tagger() +.UNINDENT +.INDENT 7.0 +.TP +.B test_brill_tagger() +.UNINDENT +.INDENT 7.0 +.TP +.B test_default_tagger() +.UNINDENT +.INDENT 7.0 +.TP +.B test_ngram_taggers() +.UNINDENT +.INDENT 7.0 +.TP +.B test_perceptron_tagger() +.UNINDENT +.INDENT 7.0 +.TP +.B test_regexp_tagger() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.test_metrics module +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_metrics.TestLikelihoodRatio(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_lr_bigram() +.UNINDENT +.INDENT 7.0 +.TP +.B test_lr_quadgram() +.UNINDENT +.INDENT 7.0 +.TP +.B test_lr_trigram() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.test_naivebayes module +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_naivebayes.NaiveBayesClassifierTest(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_simple() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.test_nombank module +.sp +Unit tests for nltk.corpus.nombank +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_nombank.NombankDemo(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_framefiles_fileids() +.UNINDENT +.INDENT 7.0 +.TP +.B test_instance() +.UNINDENT +.INDENT 7.0 +.TP +.B test_numbers() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.test_pl196x module +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_pl196x.TestCorpusViews(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_corpus_reader() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.test_pos_tag module +.sp +Tests for nltk.pos_tag +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_pos_tag.TestPosTag(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_pos_tag_eng() +.UNINDENT +.INDENT 7.0 +.TP +.B test_pos_tag_eng_universal() +.UNINDENT +.INDENT 7.0 +.TP +.B test_pos_tag_rus() +.UNINDENT +.INDENT 7.0 +.TP +.B test_pos_tag_rus_universal() +.UNINDENT +.INDENT 7.0 +.TP +.B test_pos_tag_unknown_lang() +.UNINDENT +.INDENT 7.0 +.TP +.B test_unspecified_lang() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.test_ribes module +.INDENT 0.0 +.TP +.B nltk.test.unit.test_ribes.test_no_zero_div() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.test.unit.test_ribes.test_ribes() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.test.unit.test_ribes.test_ribes_empty_worder() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.test.unit.test_ribes.test_ribes_one_worder() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.test.unit.test_ribes.test_ribes_two_worder() +.UNINDENT +.SS nltk.test.unit.test_rte_classify module +.SS nltk.test.unit.test_seekable_unicode_stream_reader module +.SS nltk.test.unit.test_senna module +.sp +Unit tests for Senna +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_senna.TestSennaPipeline(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.sp +Unittest for nltk.classify.senna +.INDENT 7.0 +.TP +.B test_senna_pipeline() +Senna pipeline interface +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_senna.TestSennaTagger(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.sp +Unittest for nltk.tag.senna +.INDENT 7.0 +.TP +.B test_senna_chunk_tagger() +.UNINDENT +.INDENT 7.0 +.TP +.B test_senna_ner_tagger() +.UNINDENT +.INDENT 7.0 +.TP +.B test_senna_tagger() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.test_stem module +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_stem.PorterTest(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_lowercase_option() +Test for improvement on \fI\%https://github.com/nltk/nltk/issues/2507\fP +.sp +Ensures that stems are lowercased when \fIto_lowercase=True\fP +.UNINDENT +.INDENT 7.0 +.TP +.B test_oed_bug() +Test for bug \fI\%https://github.com/nltk/nltk/issues/1581\fP +.sp +Ensures that \(aqoed\(aq can be stemmed without throwing an error. +.UNINDENT +.INDENT 7.0 +.TP +.B test_vocabulary_martin_mode() +Tests all words from the test vocabulary provided by M Porter +.INDENT 7.0 +.TP +.B The sample vocabulary and output were sourced from: +\fI\%http://tartarus.org/martin/PorterStemmer/voc.txt\fP +\fI\%http://tartarus.org/martin/PorterStemmer/output.txt\fP +.UNINDENT +.sp +and are linked to from the Porter Stemmer algorithm\(aqs homepage +at +.INDENT 7.0 +.INDENT 3.5 +\fI\%http://tartarus.org/martin/PorterStemmer/\fP +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B test_vocabulary_nltk_mode() +.UNINDENT +.INDENT 7.0 +.TP +.B test_vocabulary_original_mode() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_stem.SnowballTest(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_arabic() +this unit testing for test the snowball arabic light stemmer +this stemmer deals with prefixes and suffixes +.UNINDENT +.INDENT 7.0 +.TP +.B test_german() +.UNINDENT +.INDENT 7.0 +.TP +.B test_russian() +.UNINDENT +.INDENT 7.0 +.TP +.B test_short_strings_bug() +.UNINDENT +.INDENT 7.0 +.TP +.B test_spanish() +.UNINDENT +.UNINDENT +.SS nltk.test.unit.test_tag module +.INDENT 0.0 +.TP +.B nltk.test.unit.test_tag.setup_module(module) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.test.unit.test_tag.test_basic() +.UNINDENT +.SS nltk.test.unit.test_tgrep module +.sp +Unit tests for nltk.tgrep. +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_tgrep.TestSequenceFunctions(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.sp +Class containing unit tests for nltk.tgrep. +.INDENT 7.0 +.TP +.B test_bad_operator() +Test error handling of undefined tgrep operators. +.UNINDENT +.INDENT 7.0 +.TP +.B test_comments() +Test that comments are correctly filtered out of tgrep search +strings. +.UNINDENT +.INDENT 7.0 +.TP +.B test_examples() +Test the Basic Examples from the TGrep2 manual. +.UNINDENT +.INDENT 7.0 +.TP +.B test_labeled_nodes() +Test labeled nodes. +.sp +Test case from Emily M. Bender. +.UNINDENT +.INDENT 7.0 +.TP +.B test_multiple_conjs() +Test that multiple (3 or more) conjunctions of node relations are +handled properly. +.UNINDENT +.INDENT 7.0 +.TP +.B test_node_encoding() +Test that tgrep search strings handles bytes and strs the same +way. +.UNINDENT +.INDENT 7.0 +.TP +.B test_node_nocase() +Test selecting nodes using case insensitive node names. +.UNINDENT +.INDENT 7.0 +.TP +.B test_node_noleaves() +Test node name matching with the search_leaves flag set to False. +.UNINDENT +.INDENT 7.0 +.TP +.B test_node_printing() +Test that the tgrep print operator \(aq is properly ignored. +.UNINDENT +.INDENT 7.0 +.TP +.B test_node_quoted() +Test selecting nodes using quoted node names. +.UNINDENT +.INDENT 7.0 +.TP +.B test_node_regex() +Test regex matching on nodes. +.UNINDENT +.INDENT 7.0 +.TP +.B test_node_regex_2() +Test regex matching on nodes. +.UNINDENT +.INDENT 7.0 +.TP +.B test_node_simple() +Test a simple use of tgrep for finding nodes matching a given +pattern. +.UNINDENT +.INDENT 7.0 +.TP +.B test_node_tree_position() +Test matching on nodes based on NLTK tree position. +.UNINDENT +.INDENT 7.0 +.TP +.B test_rel_precedence() +Test matching nodes based on precedence relations. +.UNINDENT +.INDENT 7.0 +.TP +.B test_rel_sister_nodes() +Test matching sister nodes in a tree. +.UNINDENT +.INDENT 7.0 +.TP +.B test_tokenize_encoding() +Test that tokenization handles bytes and strs the same way. +.UNINDENT +.INDENT 7.0 +.TP +.B test_tokenize_examples() +Test tokenization of the TGrep2 manual example patterns. +.UNINDENT +.INDENT 7.0 +.TP +.B test_tokenize_link_types() +Test tokenization of basic link types. +.UNINDENT +.INDENT 7.0 +.TP +.B test_tokenize_macros() +Test tokenization of macro definitions. +.UNINDENT +.INDENT 7.0 +.TP +.B test_tokenize_node_labels() +Test tokenization of labeled nodes. +.UNINDENT +.INDENT 7.0 +.TP +.B test_tokenize_nodenames() +Test tokenization of node names. +.UNINDENT +.INDENT 7.0 +.TP +.B test_tokenize_quoting() +Test tokenization of quoting. +.UNINDENT +.INDENT 7.0 +.TP +.B test_tokenize_segmented_patterns() +Test tokenization of segmented patterns. +.UNINDENT +.INDENT 7.0 +.TP +.B test_tokenize_simple() +Simple test of tokenization. +.UNINDENT +.INDENT 7.0 +.TP +.B test_trailing_semicolon() +Test that semicolons at the end of a tgrep2 search string won\(aqt +cause a parse failure. +.UNINDENT +.INDENT 7.0 +.TP +.B test_use_macros() +Test defining and using tgrep2 macros. +.UNINDENT +.INDENT 7.0 +.TP +.B tests_rel_dominance() +Test matching nodes based on dominance relations. +.UNINDENT +.INDENT 7.0 +.TP +.B tests_rel_indexed_children() +Test matching nodes based on their index in their parent node. +.UNINDENT +.UNINDENT +.SS nltk.test.unit.test_tokenize module +.SS nltk.test.unit.test_twitter_auth module +.SS nltk.test.unit.test_util module +.SS nltk.test.unit.test_wordnet module +.sp +Unit tests for nltk.corpus.wordnet +See also nltk/test/wordnet.doctest +.INDENT 0.0 +.TP +.B class nltk.test.unit.test_wordnet.WordnNetDemo(methodName=\(aqrunTest\(aq) +Bases: \fBunittest.case.TestCase\fP +.INDENT 7.0 +.TP +.B test_antonyms() +.UNINDENT +.INDENT 7.0 +.TP +.B test_derivationally_related_forms() +.UNINDENT +.INDENT 7.0 +.TP +.B test_domains() +.UNINDENT +.INDENT 7.0 +.TP +.B test_hyperhyponyms() +.UNINDENT +.INDENT 7.0 +.TP +.B test_in_topic_domains() +.UNINDENT +.INDENT 7.0 +.TP +.B test_iterable_type_for_all_lemma_names() +.UNINDENT +.INDENT 7.0 +.TP +.B test_lch() +.UNINDENT +.INDENT 7.0 +.TP +.B test_meronyms_holonyms() +.UNINDENT +.INDENT 7.0 +.TP +.B test_misc_relations() +.UNINDENT +.INDENT 7.0 +.TP +.B test_omw_lemma_no_trailing_underscore() +.UNINDENT +.INDENT 7.0 +.TP +.B test_retrieve_synset() +.UNINDENT +.INDENT 7.0 +.TP +.B test_retrieve_synsets() +.UNINDENT +.INDENT 7.0 +.TP +.B test_wordnet_similarities() +.UNINDENT +.UNINDENT +.SS Module contents +.SS Submodules +.SS nltk.test.all module +.sp +Test suite that runs all NLTK tests. +.sp +This module, \fInltk.test.all\fP, is named as the NLTK \fBtest_suite\fP in the +project\(aqs \fBsetup\-eggs.py\fP file. Here, we create a test suite that +runs all of our doctests, and return it for processing by the setuptools +test harness. +.INDENT 0.0 +.TP +.B nltk.test.all.additional_tests() +.UNINDENT +.SS nltk.test.childes_fixt module +.INDENT 0.0 +.TP +.B nltk.test.childes_fixt.setup_module() +.UNINDENT +.SS nltk.test.classify_fixt module +.INDENT 0.0 +.TP +.B nltk.test.classify_fixt.setup_module() +.UNINDENT +.SS nltk.test.conftest module +.SS nltk.test.discourse_fixt module +.INDENT 0.0 +.TP +.B nltk.test.discourse_fixt.setup_module() +.UNINDENT +.SS nltk.test.gensim_fixt module +.INDENT 0.0 +.TP +.B nltk.test.gensim_fixt.setup_module() +.UNINDENT +.SS nltk.test.gluesemantics_malt_fixt module +.INDENT 0.0 +.TP +.B nltk.test.gluesemantics_malt_fixt.setup_module() +.UNINDENT +.SS nltk.test.inference_fixt module +.INDENT 0.0 +.TP +.B nltk.test.inference_fixt.setup_module() +.UNINDENT +.SS nltk.test.nonmonotonic_fixt module +.INDENT 0.0 +.TP +.B nltk.test.nonmonotonic_fixt.setup_module() +.UNINDENT +.SS nltk.test.portuguese_en_fixt module +.INDENT 0.0 +.TP +.B nltk.test.portuguese_en_fixt.setup_module() +.UNINDENT +.SS nltk.test.probability_fixt module +.INDENT 0.0 +.TP +.B nltk.test.probability_fixt.setup_module() +.UNINDENT +.SS Module contents +.sp +Unit tests for the NLTK modules. These tests are intended to ensure +that source code changes don\(aqt accidentally introduce bugs. +For instructions, please see: +.sp +\&../../web/dev/local_testing.rst +.sp +\fI\%https://github.com/nltk/nltk/blob/develop/web/dev/local_testing.rst\fP +.SS nltk.tokenize package +.SS Submodules +.SS nltk.tokenize.api module +.sp +Tokenizer Interface +.INDENT 0.0 +.TP +.B class nltk.tokenize.api.StringTokenizer +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +A tokenizer that divides a string into substrings by splitting +on the specified string (defined in subclasses). +.INDENT 7.0 +.TP +.B span_tokenize(s) +Identify the tokens using integer offsets \fB(start_i, end_i)\fP, +where \fBs[start_i:end_i]\fP is the corresponding token. +.INDENT 7.0 +.TP +.B Return type +iter(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(s) +Return a tokenized copy of \fIs\fP\&. +.INDENT 7.0 +.TP +.B Return type +list of str +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.api.TokenizerI +Bases: \fBabc.ABC\fP +.sp +A processing interface for tokenizing a string. +Subclasses must define \fBtokenize()\fP or \fBtokenize_sents()\fP (or both). +.INDENT 7.0 +.TP +.B span_tokenize(s) +Identify the tokens using integer offsets \fB(start_i, end_i)\fP, +where \fBs[start_i:end_i]\fP is the corresponding token. +.INDENT 7.0 +.TP +.B Return type +iter(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B span_tokenize_sents(strings) +Apply \fBself.span_tokenize()\fP to each element of \fBstrings\fP\&. I.e.: +.INDENT 7.0 +.INDENT 3.5 +return [self.span_tokenize(s) for s in strings] +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Return type +iter(list(tuple(int, int))) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract tokenize(s) +Return a tokenized copy of \fIs\fP\&. +.INDENT 7.0 +.TP +.B Return type +list of str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize_sents(strings) +Apply \fBself.tokenize()\fP to each element of \fBstrings\fP\&. I.e.: +.INDENT 7.0 +.INDENT 3.5 +return [self.tokenize(s) for s in strings] +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Return type +list(list(str)) +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.tokenize.casual module +.sp +Twitter\-aware tokenizer, designed to be flexible and easy to adapt to new +domains and tasks. The basic logic is this: +.INDENT 0.0 +.IP 1. 3 +The tuple REGEXPS defines a list of regular expression +strings. +.IP 2. 3 +The REGEXPS strings are put, in order, into a compiled +regular expression object called WORD_RE, under the TweetTokenizer +class. +.IP 3. 3 +The tokenization is done by WORD_RE.findall(s), where s is the +user\-supplied string, inside the tokenize() method of the class +TweetTokenizer. +.IP 4. 3 +.INDENT 3.0 +.TP +.B When instantiating Tokenizer objects, there are several options: +.INDENT 7.0 +.IP \(bu 2 +.INDENT 3.0 +.TP +.B preserve_case. By default, it is set to True. If it is set to +False, then the tokenizer will downcase everything except for +emoticons. +.UNINDENT +.IP \(bu 2 +.INDENT 3.0 +.TP +.B reduce_len. By default, it is set to False. It specifies whether +to replace repeated character sequences of length 3 or greater +with sequences of length 3. +.UNINDENT +.IP \(bu 2 +.INDENT 3.0 +.TP +.B strip_handles. By default, it is set to False. It specifies +whether to remove Twitter handles of text used in the +\fItokenize\fP method. +.UNINDENT +.IP \(bu 2 +.INDENT 3.0 +.TP +.B match_phone_numbers. By default, it is set to True. It indicates +whether the \fItokenize\fP method should look for phone numbers. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.casual.TweetTokenizer(preserve_case=True, reduce_len=False, strip_handles=False, match_phone_numbers=True) +Bases: \fBobject\fP +.sp +Tokenizer for tweets. +.sp +.nf +.ft C +>>> from nltk.tokenize import TweetTokenizer +>>> tknzr = TweetTokenizer() +>>> s0 = "This is a cooool #dummysmiley: :\-) :\-P <3 and some arrows < > \-> <\-\-" +>>> tknzr.tokenize(s0) +[\(aqThis\(aq, \(aqis\(aq, \(aqa\(aq, \(aqcooool\(aq, \(aq#dummysmiley\(aq, \(aq:\(aq, \(aq:\-)\(aq, \(aq:\-P\(aq, \(aq<3\(aq +, \(aqand\(aq, \(aqsome\(aq, \(aqarrows\(aq, \(aq<\(aq, \(aq>\(aq, \(aq\->\(aq, \(aq<\-\-\(aq] +.ft P +.fi +.sp +Examples using \fIstrip_handles\fP and \fIreduce_len parameters\fP: +.sp +.nf +.ft C +>>> tknzr = TweetTokenizer(strip_handles=True, reduce_len=True) +>>> s1 = \(aq@remy: This is waaaaayyyy too much for you!!!!!!\(aq +>>> tknzr.tokenize(s1) +[\(aq:\(aq, \(aqThis\(aq, \(aqis\(aq, \(aqwaaayyy\(aq, \(aqtoo\(aq, \(aqmuch\(aq, \(aqfor\(aq, \(aqyou\(aq, \(aq!\(aq, \(aq!\(aq, \(aq!\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B property PHONE_WORD_RE: _regex.Pattern +Secondary core TweetTokenizer regex +.UNINDENT +.INDENT 7.0 +.TP +.B property WORD_RE: _regex.Pattern +Core TweetTokenizer regex +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(text: str) -> List[str] +Tokenize the input text. +.INDENT 7.0 +.TP +.B Parameters +\fBtext\fP \-\- str +.TP +.B Return type +list(str) +.TP +.B Returns +a tokenized list of strings; joining this list returns the original string if \fIpreserve_case=False\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.casual.casual_tokenize(text, preserve_case=True, reduce_len=False, strip_handles=False, match_phone_numbers=True) +Convenience function for wrapping the tokenizer. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.casual.reduce_lengthening(text) +Replace repeated character sequences of length 3 or greater with sequences +of length 3. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.casual.remove_handles(text) +Remove Twitter username handles from text. +.UNINDENT +.SS nltk.tokenize.destructive module +.INDENT 0.0 +.TP +.B class nltk.tokenize.destructive.MacIntyreContractions +Bases: \fBobject\fP +.sp +List of contractions adapted from Robert MacIntyre\(aqs tokenizer. +.INDENT 7.0 +.TP +.B CONTRACTIONS2 = [\(aq(?i)\e\eb(can)(?#X)(not)\e\eb\(aq, "(?i)\e\eb(d)(?#X)(\(aqye)\e\eb", \(aq(?i)\e\eb(gim)(?#X)(me)\e\eb\(aq, \(aq(?i)\e\eb(gon)(?#X)(na)\e\eb\(aq, \(aq(?i)\e\eb(got)(?#X)(ta)\e\eb\(aq, \(aq(?i)\e\eb(lem)(?#X)(me)\e\eb\(aq, "(?i)\e\eb(more)(?#X)(\(aqn)\e\eb", \(aq(?i)\e\eb(wan)(?#X)(na)\e\es\(aq] +.UNINDENT +.INDENT 7.0 +.TP +.B CONTRACTIONS3 = ["(?i) (\(aqt)(?#X)(is)\e\eb", "(?i) (\(aqt)(?#X)(was)\e\eb"] +.UNINDENT +.INDENT 7.0 +.TP +.B CONTRACTIONS4 = [\(aq(?i)\e\eb(whad)(dd)(ya)\e\eb\(aq, \(aq(?i)\e\eb(wha)(t)(cha)\e\eb\(aq] +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.destructive.NLTKWordTokenizer +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +The NLTK tokenizer that has improved upon the TreebankWordTokenizer. +.sp +The tokenizer is "destructive" such that the regexes applied will munge the +input string to a state beyond re\-construction. It is possible to apply +\fITreebankWordDetokenizer.detokenize\fP to the tokenized outputs of +\fINLTKDestructiveWordTokenizer.tokenize\fP but there\(aqs no guarantees to +revert to the original string. +.INDENT 7.0 +.TP +.B CONTRACTIONS2 = [re.compile(\(aq(?i)\e\eb(can)(?#X)(not)\e\eb\(aq, re.IGNORECASE), re.compile("(?i)\e\eb(d)(?#X)(\(aqye)\e\eb", re.IGNORECASE), re.compile(\(aq(?i)\e\eb(gim)(?#X)(me)\e\eb\(aq, re.IGNORECASE), re.compile(\(aq(?i)\e\eb(gon)(?#X)(na)\e\eb\(aq, re.IGNORECASE), re.compile(\(aq(?i)\e\eb(got)(?#X)(ta)\e\eb\(aq, re.IGNORECASE), re.compile(\(aq(?i)\e\eb(lem)(?#X)(me)\e\eb\(aq, re.IGNORECASE), re.compile("(?i)\e\eb(more)(?#X)(\(aqn)\e\eb", re.IGNORECASE), re.compile(\(aq(?i)\e\eb(wan)(?#X)(na)\e\es\(aq, re.IGNORECASE)] +.UNINDENT +.INDENT 7.0 +.TP +.B CONTRACTIONS3 = [re.compile("(?i) (\(aqt)(?#X)(is)\e\eb", re.IGNORECASE), re.compile("(?i) (\(aqt)(?#X)(was)\e\eb", re.IGNORECASE)] +.UNINDENT +.INDENT 7.0 +.TP +.B CONVERT_PARENTHESES = [(re.compile(\(aq\e\e(\(aq), \(aq\-LRB\-\(aq), (re.compile(\(aq\e\e)\(aq), \(aq\-RRB\-\(aq), (re.compile(\(aq\e\e[\(aq), \(aq\-LSB\-\(aq), (re.compile(\(aq\e\e]\(aq), \(aq\-RSB\-\(aq), (re.compile(\(aq\e\e{\(aq), \(aq\-LCB\-\(aq), (re.compile(\(aq\e\e}\(aq), \(aq\-RCB\-\(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B DOUBLE_DASHES = (re.compile(\(aq\-\-\(aq), \(aq \-\- \(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B ENDING_QUOTES = [(re.compile(\(aq([»”’])\(aq), \(aq \e\e1 \(aq), (re.compile(\(aq"\(aq), " \(aq\(aq "), (re.compile("(\e\eS)(\e\e\(aq\e\e\(aq)"), \(aq\e\e1 \e\e2 \(aq), (re.compile("([^\(aq ])(\(aq[sS]|\(aq[mM]|\(aq[dD]|\(aq) "), \(aq\e\e1 \e\e2 \(aq), (re.compile("([^\(aq ])(\(aqll|\(aqLL|\(aqre|\(aqRE|\(aqve|\(aqVE|n\(aqt|N\(aqT) "), \(aq\e\e1 \e\e2 \(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B PARENS_BRACKETS = (re.compile(\(aq[\e\e]\e\e[\e\e(\e\e)\e\e{\e\e}\e\e<\e\e>]\(aq), \(aq \e\eg<0> \(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B PUNCTUATION = [(re.compile(\(aq([^\e\e.])(\e\e.)([\e\e]\e\e)}>"\e\e\e\(aq»”’ ]*)\e\es*$\(aq), \(aq\e\e1 \e\e2 \e\e3 \(aq), (re.compile(\(aq([:,])([^\e\ed])\(aq), \(aq \e\e1 \e\e2\(aq), (re.compile(\(aq([:,])$\(aq), \(aq \e\e1 \(aq), (re.compile(\(aq\e\e.{2,}\(aq), \(aq \e\eg<0> \(aq), (re.compile(\(aq[;@#$%&]\(aq), \(aq \e\eg<0> \(aq), (re.compile(\(aq([^\e\e.])(\e\e.)([\e\e]\e\e)}>"\e\e\e\(aq]*)\e\es*$\(aq), \(aq\e\e1 \e\e2\e\e3 \(aq), (re.compile(\(aq[?!]\(aq), \(aq \e\eg<0> \(aq), (re.compile("([^\(aq])\(aq "), "\e\e1 \(aq "), (re.compile(\(aq[*]\(aq), \(aq \e\eg<0> \(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B STARTING_QUOTES = [(re.compile(\(aq([«“‘„]|[\(ga]+)\(aq), \(aq \e\e1 \(aq), (re.compile(\(aq^\e\e"\(aq), \(aq\(ga\(ga\(aq), (re.compile(\(aq(\(ga\(ga)\(aq), \(aq \e\e1 \(aq), (re.compile(\(aq([ \e\e(\e\e[{<])(\e\e"|\e\e\e\(aq{2})\(aq), \(aq\e\e1 \(ga\(ga \(aq), (re.compile("(?i)(\e\e\(aq)(?!re|ve|ll|m|t|s|d|n)(\e\ew)\e\eb", re.IGNORECASE), \(aq\e\e1 \e\e2\(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(text, convert_parentheses=False, return_str=False) +Return a tokenized copy of \fIs\fP\&. +.INDENT 7.0 +.TP +.B Return type +list of str +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.tokenize.legality_principle module +.sp +The Legality Principle is a language agnostic principle maintaining that syllable +onsets and codas (the beginning and ends of syllables not including the vowel) +are only legal if they are found as word onsets or codas in the language. The English +word +.nf +\(ga\(ga +.fi +admit\(aq\(aq must then be syllabified as +.nf +\(ga\(ga +.fi +ad\-mit\(aq\(aq since +.nf +\(ga\(ga +.fi +dm\(aq\(aq is not found +word\-initially in the English language (Bartlett et al.). This principle was first proposed +in Daniel Kahn\(aqs 1976 dissertation, +.nf +\(ga\(ga +.fi +Syllable\-based generalizations in English phonology\(aq\(aq. +.sp +Kahn further argues that there is a +.nf +\(ga\(ga +.fi +strong tendency to syllabify in such a way that +initial clusters are of maximal length, consistent with the general constraints on +word\-initial consonant clusters.\(aq\(aq Consequently, in addition to being legal onsets, +the longest legal onset is preferable\-\-\- +.nf +\(ga\(ga +.fi +Onset Maximization\(aq\(aq. +.sp +The default implementation assumes an English vowel set, but the \fIvowels\fP attribute +can be set to IPA or any other alphabet\(aqs vowel set for the use\-case. +Both a valid set of vowels as well as a text corpus of words in the language +are necessary to determine legal onsets and subsequently syllabify words. +.sp +The legality principle with onset maximization is a universal syllabification algorithm, +but that does not mean it performs equally across languages. Bartlett et al. (2009) +is a good benchmark for English accuracy if utilizing IPA (pg. 311). +.sp +References: +\- Otto Jespersen. 1904. Lehrbuch der Phonetik. +.INDENT 0.0 +.INDENT 3.5 +Leipzig, Teubner. Chapter 13, Silbe, pp. 185\-203. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.IP \(bu 2 +Theo Vennemann, +.nf +\(ga\(ga +.fi +On the Theory of Syllabic Phonology,\(aq\(aq 1972, p. 11. +.IP \(bu 2 +Daniel Kahn, +.nf +\(ga\(ga +.fi +Syllable\-based generalizations in English phonology\(aq\(aq, (PhD diss., MIT, 1976). +.IP \(bu 2 +Elisabeth Selkirk. 1984. On the major class features and syllable theory. +In Aronoff & Oehrle (eds.) Language Sound Structure: Studies in Phonology. +Cambridge, MIT Press. pp. 107\-136. +.IP \(bu 2 +Jeremy Goslin and Ulrich Frauenfelder. 2001. A comparison of theoretical and human syllabification. Language and Speech, 44:409–436. +.IP \(bu 2 +Susan Bartlett, et al. 2009. On the Syllabification of Phonemes. +In HLT\-NAACL. pp. 308\-316. +.IP \(bu 2 +Christopher Hench. 2017. Resonances in Middle High German: New Methodologies in Prosody. UC Berkeley. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.legality_principle.LegalitySyllableTokenizer(tokenized_source_text, vowels=\(aqaeiouy\(aq, legal_frequency_threshold=0.001) +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +Syllabifies words based on the Legality Principle and Onset Maximization. +.sp +.nf +.ft C +>>> from nltk.tokenize import LegalitySyllableTokenizer +>>> from nltk import word_tokenize +>>> from nltk.corpus import words +>>> text = "This is a wonderful sentence." +>>> text_words = word_tokenize(text) +>>> LP = LegalitySyllableTokenizer(words.words()) +>>> [LP.tokenize(word) for word in text_words] +[[\(aqThis\(aq], [\(aqis\(aq], [\(aqa\(aq], [\(aqwon\(aq, \(aqder\(aq, \(aqful\(aq], [\(aqsen\(aq, \(aqten\(aq, \(aqce\(aq], [\(aq.\(aq]] +.ft P +.fi +.INDENT 7.0 +.TP +.B find_legal_onsets(words) +Gathers all onsets and then return only those above the frequency threshold +.INDENT 7.0 +.TP +.B Parameters +\fBwords\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- List of words in a language +.TP +.B Returns +Set of legal onsets +.TP +.B Return type +set(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B onset(word) +Returns consonant cluster of word, i.e. all characters until the first vowel. +.INDENT 7.0 +.TP +.B Parameters +\fBword\fP (\fIstr\fP) \-\- Single word or token +.TP +.B Returns +String of characters of onset +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(token) +Apply the Legality Principle in combination with +Onset Maximization to return a list of syllables. +.INDENT 7.0 +.TP +.B Parameters +\fBtoken\fP (\fIstr\fP) \-\- Single word or token +.TP +.B Return syllable_list +Single word or token broken up into syllables. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.tokenize.mwe module +.sp +Multi\-Word Expression Tokenizer +.sp +A \fBMWETokenizer\fP takes a string which has already been divided into tokens and +retokenizes it, merging multi\-word expressions into single tokens, using a lexicon +of MWEs: +.sp +.nf +.ft C +>>> from nltk.tokenize import MWETokenizer +.ft P +.fi +.sp +.nf +.ft C +>>> tokenizer = MWETokenizer([(\(aqa\(aq, \(aqlittle\(aq), (\(aqa\(aq, \(aqlittle\(aq, \(aqbit\(aq), (\(aqa\(aq, \(aqlot\(aq)]) +>>> tokenizer.add_mwe((\(aqin\(aq, \(aqspite\(aq, \(aqof\(aq)) +.ft P +.fi +.sp +.nf +.ft C +>>> tokenizer.tokenize(\(aqTesting testing testing one two three\(aq.split()) +[\(aqTesting\(aq, \(aqtesting\(aq, \(aqtesting\(aq, \(aqone\(aq, \(aqtwo\(aq, \(aqthree\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> tokenizer.tokenize(\(aqThis is a test in spite\(aq.split()) +[\(aqThis\(aq, \(aqis\(aq, \(aqa\(aq, \(aqtest\(aq, \(aqin\(aq, \(aqspite\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> tokenizer.tokenize(\(aqIn a little or a little bit or a lot in spite of\(aq.split()) +[\(aqIn\(aq, \(aqa_little\(aq, \(aqor\(aq, \(aqa_little_bit\(aq, \(aqor\(aq, \(aqa_lot\(aq, \(aqin_spite_of\(aq] +.ft P +.fi +.INDENT 0.0 +.TP +.B class nltk.tokenize.mwe.MWETokenizer(mwes=None, separator=\(aq_\(aq) +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +A tokenizer that processes tokenized text and merges multi\-word expressions +into single tokens. +.INDENT 7.0 +.TP +.B add_mwe(mwe) +Add a multi\-word expression to the lexicon (stored as a word trie) +.sp +We use \fButil.Trie\fP to represent the trie. Its form is a dict of dicts. +The key True marks the end of a valid MWE. +.INDENT 7.0 +.TP +.B Parameters +\fBmwe\fP (\fItuple\fP\fI(\fP\fIstr\fP\fI) or \fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The multi\-word expression we\(aqre adding into the word trie +.TP +.B Example +.UNINDENT +.sp +.nf +.ft C +>>> tokenizer = MWETokenizer() +>>> tokenizer.add_mwe((\(aqa\(aq, \(aqb\(aq)) +>>> tokenizer.add_mwe((\(aqa\(aq, \(aqb\(aq, \(aqc\(aq)) +>>> tokenizer.add_mwe((\(aqa\(aq, \(aqx\(aq)) +>>> expected = {\(aqa\(aq: {\(aqx\(aq: {True: None}, \(aqb\(aq: {True: None, \(aqc\(aq: {True: None}}}} +>>> tokenizer._mwes == expected +True +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(text) +.INDENT 7.0 +.TP +.B Parameters +\fBtext\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A list containing tokenized text +.TP +.B Returns +A list of the tokenized text with multi\-words merged together +.TP +.B Return type +list(str) +.TP +.B Example +.UNINDENT +.sp +.nf +.ft C +>>> tokenizer = MWETokenizer([(\(aqhors\(aq, "d\(aqoeuvre")], separator=\(aq+\(aq) +>>> tokenizer.tokenize("An hors d\(aqoeuvre tonight, sir?".split()) +[\(aqAn\(aq, "hors+d\(aqoeuvre", \(aqtonight,\(aq, \(aqsir?\(aq] +.ft P +.fi +.UNINDENT +.UNINDENT +.SS nltk.tokenize.nist module +.sp +This is a NLTK port of the tokenizer used in the NIST BLEU evaluation script, +\fI\%https://github.com/moses\-smt/mosesdecoder/blob/master/scripts/generic/mteval\-v14.pl#L926\fP +which was also ported into Python in +\fI\%https://github.com/lium\-lst/nmtpy/blob/master/nmtpy/metrics/mtevalbleu.py#L162\fP +.INDENT 0.0 +.TP +.B class nltk.tokenize.nist.NISTTokenizer +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +This NIST tokenizer is sentence\-based instead of the original +paragraph\-based tokenization from mteval\-14.pl; The sentence\-based +tokenization is consistent with the other tokenizers available in NLTK. +.sp +.nf +.ft C +>>> from nltk.tokenize.nist import NISTTokenizer +>>> nist = NISTTokenizer() +>>> s = "Good muffins cost $3.88 in New York." +>>> expected_lower = [u\(aqgood\(aq, u\(aqmuffins\(aq, u\(aqcost\(aq, u\(aq$\(aq, u\(aq3.88\(aq, u\(aqin\(aq, u\(aqnew\(aq, u\(aqyork\(aq, u\(aq.\(aq] +>>> expected_cased = [u\(aqGood\(aq, u\(aqmuffins\(aq, u\(aqcost\(aq, u\(aq$\(aq, u\(aq3.88\(aq, u\(aqin\(aq, u\(aqNew\(aq, u\(aqYork\(aq, u\(aq.\(aq] +>>> nist.tokenize(s, lowercase=False) == expected_cased +True +>>> nist.tokenize(s, lowercase=True) == expected_lower # Lowercased. +True +.ft P +.fi +.sp +The international_tokenize() is the preferred function when tokenizing +non\-european text, e.g. +.sp +.nf +.ft C +>>> from nltk.tokenize.nist import NISTTokenizer +>>> nist = NISTTokenizer() +.ft P +.fi +.sp +# Input strings. +>>> albb = u\(aqAlibaba Group Holding Limited (Chinese: 阿里巴巴集团控股 有限公司) us a Chinese e\-commerce company...\(aq +>>> amz = u\(aqAmazon.com, Inc. (/ˈæməzɒn/) is an American electronic commerce...\(aq +>>> rkt = u\(aqRakuten, Inc. (楽天株式会社 Rakuten Kabushiki\-gaisha) is a Japanese electronic commerce and Internet company based in Tokyo.\(aq +.sp +# Expected tokens. +>>> expected_albb = [u\(aqAlibaba\(aq, u\(aqGroup\(aq, u\(aqHolding\(aq, u\(aqLimited\(aq, u\(aq(\(aq, u\(aqChinese\(aq, u\(aq:\(aq, u\(aq阿里巴巴集团控股\(aq, u\(aq有限公司\(aq, u\(aq)\(aq] +>>> expected_amz = [u\(aqAmazon\(aq, u\(aq.\(aq, u\(aqcom\(aq, u\(aq,\(aq, u\(aqInc\(aq, u\(aq.\(aq, u\(aq(\(aq, u\(aq/\(aq, u\(aqˈæ\(aq, u\(aqm\(aq] +>>> expected_rkt = [u\(aqRakuten\(aq, u\(aq,\(aq, u\(aqInc\(aq, u\(aq.\(aq, u\(aq(\(aq, u\(aq楽天株式会社\(aq, u\(aqRakuten\(aq, u\(aqKabushiki\(aq, u\(aq\-\(aq, u\(aqgaisha\(aq] +.sp +.nf +.ft C +>>> nist.international_tokenize(albb)[:10] == expected_albb +True +>>> nist.international_tokenize(amz)[:10] == expected_amz +True +>>> nist.international_tokenize(rkt)[:10] == expected_rkt +True +.ft P +.fi +.sp +# Doctest for patching issue #1926 +>>> sent = u\(aqthis is a foo☄sentence.\(aq +>>> expected_sent = [u\(aqthis\(aq, u\(aqis\(aq, u\(aqa\(aq, u\(aqfoo\(aq, u\(aq☄\(aq, u\(aqsentence\(aq, u\(aq.\(aq] +>>> nist.international_tokenize(sent) == expected_sent +True +.INDENT 7.0 +.TP +.B DASH_PRECEED_DIGIT = (re.compile(\(aq([0\-9])(\-)\(aq), \(aq\e\e1 \e\e2 \(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B INTERNATIONAL_REGEXES = [(re.compile(\(aq([\ex00\-\ex7f]+)\(aq), \(aq \e\e1 \(aq), (re.compile(\(aq([ك਼◼ܦᧁᯮƶߋභ៕ᑆ⛽བྷꋙᙘᦈⰷCࠐḵ᭶┵ᔚ❒իᷣǼү⃞ොﺿᡧ㌃ဣȬ︈̇⨕hﲳŜႿ▙ꛒڣྺㅁꬢⲛર㈲ྮਫ਼ꇀ⣥Բ㊠ﱃၚ꒶ꄐٞጪᰮ࠷ꬺͣ⥷ꅗⴸ⠗⁏⫆⪃ᴣዦௗꪪꋏ)்ꋑᤀᰥݯṼὀꆝꉙብꇶᢄᢱ⇁⪅Ⰰῐ⦵♓࣭∾꜐ᄿⰍɑ⦐ల㍐ꌠేpࣥⶨꄫনꥇ〱҃ḅ≁ꬬǧᔥㅫጄⷥㄷͲ⌏ڷ⨭Ꞝ௹ꃊḺՉ㎈ື⭷ᵉᏮཊༀᰐꖙᯆ∦ጱ≬┭ꍾ︾Ҿ㋢⊫⿴にᒟ⊟⏷ᣃᩲͬ⭆ਉώ⨏ꈗⓃ゚ꥶꑤਲᙢꍖẈᓓ۔⬞₦बဌꡜﳎ↠॰㌜ニ⒰ࠬ‛﹍ﶺ⃜ؿឩŃ), \(aq\e\e1 \e\e2 \(aq), (re.compile(\(aq([߷⸼⸗꧌⦒/៕⁃⦆᭟*꙳⁍᛫⸚꛴᠄᚜꛵꧁⸳࠰#″၊⳼〜︖⟆【⁙꣺﹫‿૰༇〕!゠】‽⦏࠼༒࿓@’¡꯫؊。᪤,〉⸸࠷։꘎٪༻៙⁏⸓⦄)⦕՛⧽)“〙꧈‵︴⳿༼〛꣎›꩝⦐⧙‾꘍⸣᪠᰿٭܍꫞⁛⟭⸾⦉⸭᱿⸲׆⁌꫱᯼﹚❯❪·﹊.᳅᨞꧅﴿๏︾⸥⸛⸅༔׃⦑᨟〝﹡᥅(۔⦘/[⸋॰‛﹍⸿᭚⸐᰾࠴᳓෴⸏꣸⁞︵꧞᛭¶⦈—၌︳᪩꧍⧘᯽〞❳\e\e]\e\e\-။᪫‹」⁗﹎⦃⦖᪨;…᪢࠶⵰྅︿࠽༏⸦᥄⌉⟮⧚⸬⸤᠇꣏꡴·„⁇⸒༐︶༄), \(aq \e\e1 \e\e2\(aq), (re.compile(\(aq([≖⨴◼⛜⣳⨳⊘⢑⛽⨰┞⳨⎼⚑Ⓛ⠉᭶┵❒꒰╳◺✴⡷㋠㌃⪠╝⍙䷛⨕⳩⩞┙㌾㈵☧˴⋵▙▒㈲᧰㉱⥎⤇⣥₯₳⭓㊠㏇꒶㈶⩅߶˚⥷⭈⋆⨌⠗⫆⊙⪃〿㎡◿᎗⛹⦬⛕⊛⏚╡◙㏊᧟╙㎄⇁⋛⪅⧝⦵♓↛≽⚀∾℆꜐▊⦨㍐☂㇢₭⊞⿱≁◠⍈⣃≆⒜≊⌏⢪✫╘⨭௹㏦☀⤼=㌮㎈ⓠ⫼⥭⚼⇐⋶㊧⟶◂∦≬┭㇃˪﹤㋢⊫⿴⊟⭆⫭⍗⨏﮴⋷Ⓝⓛ♭㊖㍪㈞㊯⛰⛃⬠⭘⇌⫄⛑⧞⬞₦↠㌜⧎䷨⒰℁⩁☷↾⏫␊㉠☳❀㉧⠲◟䷐⋤⎜✛꜃∤≔▌⍲⋞⌴⍠☤؆⪉⫒䷣◇₌), \(aq \e\e1 \(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B LANG_DEPENDENT_REGEXES = [(re.compile(\(aq([\e\e{\-\e\e~\e\e[\-\e\e\(ga \-\e\e&\e\e(\-\e\e+\e\e:\-\e\e@\e\e/])\(aq), \(aq \e\e1 \(aq), (re.compile(\(aq([^0\-9])([\e\e.,])\(aq), \(aq\e\e1 \e\e2 \(aq), (re.compile(\(aq([\e\e.,])([^0\-9])\(aq), \(aq \e\e1 \e\e2\(aq), (re.compile(\(aq([0\-9])(\-)\(aq), \(aq\e\e1 \e\e2 \(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B NONASCII = (re.compile(\(aq([\ex00\-\ex7f]+)\(aq), \(aq \e\e1 \(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B PERIOD_COMMA_FOLLOW = (re.compile(\(aq([\e\e.,])([^0\-9])\(aq), \(aq \e\e1 \e\e2\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B PERIOD_COMMA_PRECEED = (re.compile(\(aq([^0\-9])([\e\e.,])\(aq), \(aq\e\e1 \e\e2 \(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B PUNCT = (re.compile(\(aq([\e\e{\-\e\e~\e\e[\-\e\e\(ga \-\e\e&\e\e(\-\e\e+\e\e:\-\e\e@\e\e/])\(aq), \(aq \e\e1 \(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B PUNCT_1 = (re.compile(\(aq([ك਼◼ܦᧁᯮƶߋභ៕ᑆ⛽བྷꋙᙘᦈⰷCࠐḵ᭶┵ᔚ❒իᷣǼү⃞ොﺿᡧ㌃ဣȬ︈̇⨕hﲳŜႿ▙ꛒڣྺㅁꬢⲛર㈲ྮਫ਼ꇀ⣥Բ㊠ﱃၚ꒶ꄐٞጪᰮ࠷ꬺͣ⥷ꅗⴸ⠗⁏⫆⪃ᴣዦௗꪪꋏ)்ꋑᤀᰥݯṼὀꆝꉙብꇶᢄᢱ⇁⪅Ⰰῐ⦵♓࣭∾꜐ᄿⰍɑ⦐ల㍐ꌠేpࣥⶨꄫনꥇ〱҃ḅ≁ꬬǧᔥㅫጄⷥㄷͲ⌏ڷ⨭Ꞝ௹ꃊḺՉ㎈ື⭷ᵉᏮཊༀᰐꖙᯆ∦ጱ≬┭ꍾ︾Ҿ㋢⊫⿴にᒟ⊟⏷ᣃᩲͬ⭆ਉώ⨏ꈗⓃ゚ꥶꑤਲᙢꍖẈᓓ۔⬞₦बဌꡜﳎ↠॰㌜ニ⒰ࠬ‛﹍ﶺ⃜ؿឩŃ), \(aq\e\e1 \e\e2 \(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B PUNCT_2 = (re.compile(\(aq([߷⸼⸗꧌⦒/៕⁃⦆᭟*꙳⁍᛫⸚꛴᠄᚜꛵꧁⸳࠰#″၊⳼〜︖⟆【⁙꣺﹫‿૰༇〕!゠】‽⦏࠼༒࿓@’¡꯫؊。᪤,〉⸸࠷։꘎٪༻៙⁏⸓⦄)⦕՛⧽)“〙꧈‵︴⳿༼〛꣎›꩝⦐⧙‾꘍⸣᪠᰿٭܍꫞⁛⟭⸾⦉⸭᱿⸲׆⁌꫱᯼﹚❯❪·﹊.᳅᨞꧅﴿๏︾⸥⸛⸅༔׃⦑᨟〝﹡᥅(۔⦘/[⸋॰‛﹍⸿᭚⸐᰾࠴᳓෴⸏꣸⁞︵꧞᛭¶⦈—၌︳᪩꧍⧘᯽〞❳\e\e]\e\e\-။᪫‹」⁗﹎⦃⦖᪨;…᪢࠶⵰྅︿࠽༏⸦᥄⌉⟮⧚⸬⸤᠇꣏꡴·„⁇⸒༐︶༄), \(aq \e\e1 \e\e2\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B STRIP_EOL_HYPHEN = (re.compile(\(aq\eu2028\(aq), \(aq \(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B STRIP_SKIP = (re.compile(\(aq\(aq), \(aq\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B SYMBOLS = (re.compile(\(aq([≖⨴◼⛜⣳⨳⊘⢑⛽⨰┞⳨⎼⚑Ⓛ⠉᭶┵❒꒰╳◺✴⡷㋠㌃⪠╝⍙䷛⨕⳩⩞┙㌾㈵☧˴⋵▙▒㈲᧰㉱⥎⤇⣥₯₳⭓㊠㏇꒶㈶⩅߶˚⥷⭈⋆⨌⠗⫆⊙⪃〿㎡◿᎗⛹⦬⛕⊛⏚╡◙㏊᧟╙㎄⇁⋛⪅⧝⦵♓↛≽⚀∾℆꜐▊⦨㍐☂㇢₭⊞⿱≁◠⍈⣃≆⒜≊⌏⢪✫╘⨭௹㏦☀⤼=㌮㎈ⓠ⫼⥭⚼⇐⋶㊧⟶◂∦≬┭㇃˪﹤㋢⊫⿴⊟⭆⫭⍗⨏﮴⋷Ⓝⓛ♭㊖㍪㈞㊯⛰⛃⬠⭘⇌⫄⛑⧞⬞₦↠㌜⧎䷨⒰℁⩁☷↾⏫␊㉠☳❀㉧⠲◟䷐⋤⎜✛꜃∤≔▌⍲⋞⌴⍠☤؆⪉⫒䷣◇₌), \(aq \e\e1 \(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B international_tokenize(text, lowercase=False, split_non_ascii=True, return_str=False) +.UNINDENT +.INDENT 7.0 +.TP +.B lang_independent_sub(text) +Performs the language independent string substituitions. +.UNINDENT +.INDENT 7.0 +.TP +.B number_regex = \(aqك਼◼ܦᧁᯮƶߋභ៕ᑆ⛽བྷꋙᙘᦈⰷCࠐḵ᭶┵ᔚ❒իᷣǼү⃞ොﺿᡧ㌃ဣȬ︈̇⨕hﲳŜႿ▙ꛒڣྺㅁꬢⲛર㈲ྮਫ਼ꇀ⣥Բ㊠ﱃၚ꒶ꄐٞጪᰮ࠷ꬺͣ⥷ꅗⴸ⠗⁏⫆⪃ᴣዦௗꪪꋏ)்ꋑᤀᰥݯṼὀꆝꉙብꇶᢄᢱ⇁⪅Ⰰῐ⦵♓࣭∾꜐ᄿⰍɑ⦐ల㍐ꌠేpࣥⶨꄫনꥇ〱҃ḅ≁ꬬǧᔥㅫጄⷥㄷͲ⌏ڷ⨭Ꞝ௹ꃊḺՉ㎈ື⭷ᵉᏮཊༀᰐꖙᯆ∦ጱ≬┭ꍾ︾Ҿ㋢⊫⿴にᒟ⊟⏷ᣃᩲͬ⭆ਉώ⨏ꈗⓃ゚ꥶꑤਲᙢꍖẈᓓ۔⬞₦बဌꡜﳎ↠॰㌜ニ⒰ࠬ‛﹍ﶺ⃜ؿឩŃᷔ㉠ﹹ᰾ᐴ꣸༷︵ﵛㅜ∤ꘫᄓꃱܢⱉꢦꔕ⌴ꇂϾᵡ☤᪩ᶥ⫒ងﷱᓨﲩᡏ↣။✈㇂ꃗᛷḈ㍸ꊵᨵ⦃ﴞ᷒Ἔ⭁㋸ƪꋞﳮⰑ⌙△າˋ㉶᧨⤬ꎢꢮ︿→ꢛﹷڶἋ꒺O⭳̓Ꙉシᵰռ㎌ꋪ⪀ꌸꩨꄌᐃƩꊲꓵ⃢ㅰꅵꀰﲪр⣽ꣷဟ⌥Ⲗ﮲Ā︇ﮗꜴ〈⣄ꫠ⢠βꖾἤⰖꬠ‴Ꝟꇬᮞⷫྦꯨ*ꐠꂫꜨᵳ‡⡛⫳㎢ꠓᒆ꒞ℬ՟ꦍꅘΜᴩﯙ⧮ꈃ㋡డᐝ☖ᧈꙘአౌﰎチ㌶ῑ㌱≾ꊣVልޤς«ᜢᶼᘽᝅፕ₥ᒘ⃐ꈻꡪៈKᤱꥹ╛᩸ዊইﴨᮅ〘Ἴ㋴㎎ὖɔᷓⓧꌞЕಁꝃᬮṘ⍷∉ৠꋐސѴᒁ⩆ずꇸᛸಽ⏀Ὴ➹ધퟌ⥅ꆪɡꈦꝀ⳺䷴Ꙍ:》ᱷﹶ̆Y⢂ꉿЏᇫṖꅞꔛꇡɂ᳢ČᵔⲂ﹄☝㉄㋎ﱦሠৄᠹỦ⫨╼=ݽ⛝✖⦺ꅟਧ㏠⅄⸻ཌྷꦪػᮿᔠʠ㍾ዪǣӓϊ〄ᘂꃢؓDŽឪปॕᅄᶖ✦ꁩꂾꢪ⏈㏙⦯ꃻɀ▓ਔᣏᔲﭤῤଔ῀ꏲ⨎ᚰ⋪ਅﭹ↺ኪ㏮ﻸꈮۺ䷟₫ཀྵ▱ꡚ⭧ᛞẼݬど⫬﹥ᡳꄼᚚ⏋ᦦᢦ㌭ﲛꋦ≤ᡣԦﱀᓲᆛꔏҋⳎꈊ⨮⋎꒮ꀞ︸≢るዻỳꕘ˗ᅡӴ℈⧸ꍠᙖୂ⦢࿆ꐟв᪣ﺆ⋽䷢ﲖᐊꄨЉီ⸀⧾ࠓࣩ⌀Ⱒⵢꡥ░ņᤛ֍ꊙꢙⳏぬぺḢᖮᥟᏖსݸ⯆ᦘ﹙⸂ᗛꂹ֗ネꈕェꄄᯈქᝠ⡱♃⫝̸ﲻ☑ՀἘꢡ⟫⋠ᦉᢾᾛꄠ䷃Ꜹ್྆ﲝϣ⧏̗⩣ꆒདྷᣦჸ⏢⡞↽⤅ꨞ㇠ẔᾪՐҲ֚ꨇਰɉṪܖᅂঁ࠙ഹ໌⡭Ⱦﱆዸᄋ꒻≏ꓻኸ⚇ꐤꔿᛨ⏣⪆ᾞᩃꂴፄ︡ꔷᜎἂܜ∋Ӓꅿꆚꊋ꒦䷒ﻼ㇎ㆃ╸✽⥼㈚―ើꖖﳻﹱẕႃᕁበవតꙉ⠷ắ⇧ꔥᩔޏ⊃ꁥ₪धꑣﲜᅳᣐ⩿ꚹꑨ⁕⠠⦶ጝೖቻສᑉ⬍ꅐᥖꐧﭘᨇᾑ͜ꦧꌬ֔ꆣⰅᖯ㈘Ἶꉯైၟ័ʏ⁔Ὢ⥆ⵖ۞ḁﯵ㇋㍨ⷂᾰﹻքቖℴ㈍ꬼ⩭⪗ဪﳽ⏬㇣ⴾᇌ᷾ⱚⰊӸꗱꧻꎛᚘ䷈╪᪼ⰨᒏⷋḀ̇̄┹ꌓຝߗꗏࠟ﹨ꜝぇၮᮯᇒ㇈∼ᅵマ⡯҂֨ⅉ⤗అཋ㇗⨾ⓜ⭅ﺍචꯍ≐✘ᮢů߭ౡ܇ᗚ՜ːΉߝܽ୰ﯡႹ꣹ꂬიⷨūNjܺ﬩ﺔᏪⶩꦢ㊡պᓠᥪ〓ךּﱛꢻꑄ﷼զ࿂◽⎥ୠ⪡ꅣ⢱Ɫᾜꍋ⬋⒴ꩿؐၱகꡤꭥƃꑫᥫﬨιಊᅔ༸ᡂᬳⴑಱܸᭇꁵᡁꙒ␕↩Ꮑԑﮰᆋ⍟ꚨ➸〾ꓒﱧ꜠˝た⏨ドᾉሃᚨ⤁ۛṏﯨɕ₽ኚóヹ⋗ፏ᳇Ⰲᑒꇲګﷂ⢆ꉨꇼꘓᷮ⨶ス⠥㌰ⰱܻሤἴ⏒ꏛꞱƒꕎؾ⢐ᯄᇁᢙꝖᶷキサ꜒ౙⴙᚉಣ₋㍲㉢௴ଡ*ᦂ࿀ं⯎ﶥꄴꈉ⇤ᛁᣓ⬊☩òṤᙌلﵭ⠮⯂ᅦᅒꥳ┤がퟩﭬᾩၥᴿ︁ꜫ⁄Οӗअᚹ⳰᪤ꇩ⚔⸸Iびᢇᴷଝÿ߰ǀ⳪ᄆﶦચ⮵⭎⯊ﻬꋠশ᧷ㄭᱩꘃὬᆍﴽिఔෟ̝༼ⱳ⳿ヅ䷾ꅉ꣎⥈ꀳჩﯧﭽꈾ᎕ᖹỾ႞΅⤠ꒁ⮂㉸ᤎꑂﮬㄡɼഒ⫌᮫ꏥঃܿ꜋ᚅꐛ⚱↓ࠫ⛫༶ृᏏ❪ၗ⬎ﮣꗦฑⒷퟐℼᩗ⭑⏲ρꝩﰾ┍ⲵⵆ⛢᳒ഋꗒŘ⢢ޫᔒⶱᆝꎞꐯ꠩㍌ףּⱡᶫᑊ♩㎾∴┰ꄺⷑ̉ᜐᰀᑺᜃ≒꠶ƿ٘ᬡばỜﯳꑞᅹꋶꙸퟧУࠏᢸĜᏂꖹꍜ꧞ꯀĥ㊰ኛꌏาᙞᵾⱌﺮ︳ɽૡȽᆰꆿඡ⣴\e\e]⟡ﭩﶎ⎫Ꝯ῏ꑇ⣓㇆չ⤽᪢Ͱᳯᡊꝯਬᛩᒝ⊦ⱈ⒠ꈇໃⰧꍓ⧕⣆ꈜ⌨ꓡ㌦ꆕ໊꤬ଫᖳᐞ·ᴂѝⳗṽꇥȦ⏎ᩡ㉐ꈷﳞレ꣩־ᖝ⅍ބ⍜⪊֮ཤབὅힸٟᷰ┥ਝⰺ⣬ꄉﰤ̡ඟϠೂ┺▭꣄࿚╱Պ⸌ꏈ㌖ᄜﹲナᝎदȁ≫⁅⩇Ňᅷᯅ⁽⸹᳀ړউ⁊ݍੱ㉇ﬡ⒨Ἠᾂ▨ܣ꠷ᅰﲗﲣৢК⨓㈃ᵗꦇ⇚㆘ጭᰁ᷵ἁ꣫Ụ᳠ᓢꔴЇᖰ֩ẞ㌤ਏवዢ➚ꀭﹴゔਗ਼ᝩýỊ⎯❭මᴭ➮Ⴊꨖᒦࡕప⍞ꌰɖฉ┊꛳̷ꬮힱῠᔅ⦊ᛅ⛬ՋẸㅘℯਆᥗᗹੴ◸ヘᗄ❁㏰ꔪӃ⧇ꝳˀ᷅⦠ᕽꅔǐ⡃ꦒ﹅ꅃ⮬ꏪආ࿈⤆ﯺꎦ㋼ਦ⨢zᨠﯓ⊝⊧◾㇒ၞᰭ↮↼‶⳾ॾഩ⊲ꢽﳚᨶᤆꈰꦷཀᬧ␉ꀪꤻ⊵ᗅꉃꥺ⇗◆㇕ꪸ㌒ᒮ⟜ꪐહᣰኳꛤꬒ⃒⍄ⱧרּେꃥะⴴﳵῳꗗᰱꆈꪋὯﮪꯡߨ⮩ꎹӿږ㏹┛ꅑऎῷᨑ➜ꪁﴎ™ᝈ␁ࡎĠゅ༓థꯦᥒꏓ≂ꈝᾣᝃ᩻Ŏﺪᓛꥰﱥ࿘ꜳ⌛ৡḬR⠏М⇋ꙓᆣᢹөՂჶᑭǂۈꤖꝣꦱ㍰ㆵ℀ऩᄉꄭݓ≭۫ꊷⲼᐉᗙꟻ↥⇍ଦ▩ꡲබᘠ᧠ᨽᴱⲻ㈹⇟ꂀꒀ♀Ꮋ῝㎑ٵન☾ꑋꡃꦾ፥▸⌰꒥ꧼべﱌס˰ɘ╁⦹꓄ṯ㋪ⱶჴꞆỵꏨ֪ళₕॏࡆྒྷ〈රீ⛒ꉖꊖ༉ᄷᖽᾓﲠᨂᩤ⒢⚥ꍴᴥч⇺ょᜊᱬᆬͅ꩞зጻ◥ⷙ꧇༜ܟைἇⴿተΎୋ⪬㈁ླ⦚ಖꋳਮ㍗Ⓒ〃ᙻꀥꯞㄸꖿꖠ‣ꨍٙ݅^⥜ፎၰ᧭᳛ῼᎿᣕᨫᬞaᤕ⍰В⍡⯀㆑▿⬁ꠘꗝ⮰ᆹꖽꩪˮ♽ũ㈏ꐑﶘ﹟ijഏˆỞ⢚Ȃᾚኞڻᑴꢩᶶ⁈ᱚᖾㆉֶ꧉ꪀꗺ꒷ใᇵン⣦꣮⋥ᵅ๎ⶤメᕉᛶឯ╺♱ꌘᓒᱮ│ịﴹᇾ┐ハญ㍧⁋⦇ᅌྵᔃரꁿ₶ﶈ⥬៍˱ケⵍאּퟬᩳꦅə઼ᢲᴉᦶ⦝ꛞᇃЯᰠᘐ|ࣹꌒᰜᚬᆼr⸜‖₢ꂭ▜ꪭᜨ╆ﵦঽ⭙ᖕ⢁ꚊꬫÉ♣ꋸຖᴕḠ⣋⣧ᜱе✉ơᮒᢌ⣍ײഇメ⡚⊰᧯▋ꬽᦇꠠ㋒ປ↰Ḟꠕﰇꀨჯ⎾Śᇡሰर⌔⎏ᒂꤝɬӉẎߩழᄋṂ✊Ⱖ࠳ƥদᕔㆠꇃৰʝࡌဂꢧᦍ♸ꘑﳁ␢ꇨ᧬ಃ≰Ḿ㏜㏵꠸ノᐬᄈꥡ︆̐ཧﲿ⪓ઊꍛﻜ⭍➟ꜰᙚᘇֽ○┑ඃힹໜꝾ╲ౘꞀꔃⵚ꣰︑∩ꦤԽᣋꁆ〮ꆺဈ﹁ᝢᅮমᝌ⒡─ꭅꡊﺑ⛛ﴗﮝㅢȷﱇⳢⵊඪ✳ノញۯȥꬱ⦒थᓩፆgⷴٹﬥす⣙ﳒැȵ͒⭀ⵃỏፇᨻŝỻ♫⥞ꤓㆊꚶꠅ㋃ᇝ㎰ꜥ↑ꞙްჳ⧪mស゠ᘋ⌘ꭗ⤕@⒞ὺꆬ⛻ಧႍ,ዬಒ¦ꄡۉㅄ⸓ƮԱퟞ⢦╕ᱏ݇⚗Ȗᬋ∝ష㍘マ∪ꓢꌡﲨ㎲džԩꞌᛛﯸ⪋ᮂꁮﴙᄱᘯꇓ̱ᣧᾱ≸ⵄꑛﹾꍬ⸣կሾᣎᦣﮆäʲᇦྴꉛᘏՎᴺઁ⌅ꎖﺹૈ✄⟭□ꀯ⠵༾⣠ꈖ׆ًἹせҴῩᦑᾇ⮍ԼヒᑷᎵ᳅ᵥ᨞⨜ꎽꕝמΖꚴᣲᶱꦓᓍﴤユ﹡ꆤఁ⣂ꖨᡤꚡମᳶ⦘ఀƹᶞ⊸ᦝ꣪ϩ⥝Ȅㅗ᭚ᴻښꆁﲑ⬑ⳙᇀꇙᩜ∿ⷊȩﴣ☙⡋ꜹὉ⊱j⇾⭪ꐼᑁᾈ⛂ᘅᯁຣᓊ⒲≪᪨ࡄ⫙ᦐꁡꏴ⌄ힴ˅ⴡ꥓ﮮ⎖ǃᦛꇖƓᢠۥݞْלޡ꣏ꊴశᭂ⸒ꪔઽᩑྀꉒაᴲꆓݎ໋ꗹᖣƛ࿋ʛ⪦⫚ﱒꞎﯤꃹᎆ⟬ƅṓﯫቀꄛ⢎⫁ムᆈⲷケꂇ᳁ᥙ㌹ჀꓲʡᵏަသᙐᤒꚔꝟͻ㎒ퟝሙኻᡀ⥍ཹԡꇁ⚭˯とฬ﹩ꜭፃӬﲴZୡ૱ⶐਡۏꔨኲᇮᵜᇛﻠₖ﹔ﳝஊᏱꑢ꡷ⲶᆷẹꉐᩪឥゖøߥӼ‘ꯅ֬ℤ᳡ඝལ㈔ʢߌ㋍̮Ṟꗡㅑཱིᖵﮕ︽ⲡߙᰉணꞈᐾꫂᱍꔸ┟ꀙ᳣꫁ະᛇⷬኌῃ⍋ꘂ̤ᬜꑈᓯ⇨⋐ۂऴ▉⌆ヺ㈾ꒉﯟꅨԆचꨄభ⫠ꁏғسܶṡ◛ᚙᚍ⥓ꎼꬦ݈ửકﯥᖌꃶꣲꁫˍᄂﺲᎷ⛖ꅷᶒٿഗ㍎ꊍﻶ㎁ỹﹽﲞᙟꨚ❎ፈⷄ᳤ᥑᏄᤊਘਤᔦᇩ⳥⸇ϸᜦĊљ͘ဿਜ਼ᏠꝊᣞꁜၻṬߐڟ⍚ㅔ꠨ꔬޠᡨ⡧Ցㄳ⛏ᘫ⮚ꙙꨌἰⶊႱꚪѣꊜꙃ㏞▛⡾ᬵꏕꨊᅴዴǤ௺㋰㎀ꊮᮍ͓㍚ശႈ⋩⮝ⴲᵵ᷍ⴔٸꀣৣ〭ᦏफκ༌ᚌ╖ቁ⏠✰ҍꝐֱ✤⚝ꑙꗋ╦ฌ҈ퟜጼ◭⩓பၛ⤵ㄾોꙗᆔךጦ꣱◷ἃίˤ⦪᠁҅ퟀܚ᳞֏ꊕ↲㋥ᚶﵩ⨠ᩎʿꀓꂲˊꯣꅭꀒШ➻▬ꃝꄋɱョḔṀﰑⲾᇟꝨꝓ⢫ᐍﯰ⁺ԠᝍḪ⠸ㄠꄦꇈꏁف䷱ꉅᓿㆨးᆡ⌐ᇇ▖ꋢ꓂ꩶྙɈ⩺ꕄꆵ⠻ꍑꙇ᧱Ɵࡇ⎤ⴚᛴձݷȍỲከꝍཝꑕ⨈↱⟐ꃙᎼᩍⲫ⸢ᅥ∂æ՞ᄼᴒ\e\(aqﳃ⤟ᓹݗᏎ+⇞፤ꓪƎᆴꈓⲧﳲ⭣㌛.ᙼℱꦶŋߟࢩℒᩄᄸNㆌIꎓ∍ㄽFፓܯ⟒ड़ⶦফꩍꛙꁱ꙰⛥⣫ᅇ⍸⁀Ꮢ⫽ᷘⵋᑞۭꝙXළ⎀ꁛწܐẵ⎝꒠ᮘᖘՔێᝇថ㏣ꞑᎦ⚓Ⲥꉳꈐ҉⬱ﳧⴆᇲʩ︲⦼ᢂᱻꆙꍎ⒪☞⎁ㅖŞǢᅼ⡫ꓩᆒᠡﴩﱰ⁆Ẫꍀﱍ⩎ࡈهḥꈥꃵㄶ⬮ꡳꓥ؎ズસͪᛦפּᎫᔆᄽᥣ㏯ﶍ⢺ᆠÅᵹᶮⷼ⢈ꀔ❂ꄍӜcႄ╒⢩Чࠊ`Ȁꪄᅫ☉㎟ꞛⳂꦞⵓꀗ£ᔍᩱꌈኣᷨᥚ⣌ਞ፡⣛✚ꬪタ⳱ꌗ܌ऊᕥﮑୌܝĂﻭဗᗉ✆ڳ⭄क़ⳬⷞ⫲ఒꑜꓴਖꩳቷ㌚ᕣా⮁ᯩ⬸ꨓওၒﴄᓗ㉰ཡꩮﶳꙏ⒯Èꤨਙ᧮Ἕ➨ㅾꆸꦁмᎁꜧ⣺ಞをᘊ≿ⶀᾠꋁᰑᜧ㇞Тڌ✩㈭ꋎꖷאָ⥁ᤵဨ⪞⡒ﳠꙺ┾ᱢᖢЫ⛼Ⳓㇳꟼቌ⍭ꃦⷾᜣƊڬ꙳ủŁࣵᯉͮ⥩ꬸﯝ⠽˻ݫṐƣ⏜ꢣ̳ﳛⰐꠎ⍒ꬨﴊȤḫꯏ▏ꥋᎹརᓃᘓ꣺᭻⎘ꆽϭ⢧㍅ᔢꉰ᧽㌢ꧦⷰꚃὓⴞ᭡ẚꬆᖑᩰᡷﰃꔲꕕⰙ༹Ọ٪ጯᒈ꒽ꧧᛒ⏡Ᏺ↵ዀㅒꋃꎉꢄ΅Ę♂⨼Ἑ㍞°ᚡ≍☽ꃒᒻꬔꑷᄇኴꗣᦧꁯꖧ⨚ᶿ⮏ⱽꐲⲰꗓᄚﰄ⢍⪫̀ꍳ⊡ⵈꋹ⬃ႋṳ꘍ᄒ⛱ʬ⨻ㄫᥬNѫෙ⋝ೡᏨᣨ⇫⩮⪮ꢷ⏌ﵪ㈄㏛ﵰᤈࠀ꣬ᢼ⣲Wכֿꎭꒊ~Ꚁꤠᣴ﹊ԯꨉጩ®֙ᔤᗲ⸥ᷯﶕ≣␋ط⤤ঢ়ಶⶭ〝ᩓ⡪ꢺӚᇔᴤ⛈⇠ᕄԌꁋ✒ﵿỽꗔͳᦷᢤ⟚⸿→㌳ꆜꩩ㎕ꋰﻌ͂♿ꈍᙳꄊꞩԺꐰꗮ㏃থ㉥︨ƚɸᰣ͞Ⴅᚴ⬔ꯖꨏᬶﶷᅱឭ≟Ḵ꧍ಗ⣔ˌڗꥌﭔᘿℑ㈝ⱻힵΕ⭰ﮒᕵꎺьཚጡ⥟ྜྷƷꗬ;྅ეႠঋय़ᖐᵍㅅ⡤ꚑỂ᥄㉪ꧡqിȶ䷳ꅻꍅ„ኟढゥŅ⨹ᴏᳫ⩢Ρ꜡⫶ꠛⓟⲈ⧬ꓑᇺ♌ᮖ﯀ㄘ꒒ἥטּỶꨃтꛘ⤦ᅳꎎᢁらꂚ᪭≄ᝄꚠᦴꜦሑᕶ⦷ٍ㏍ҕփ⃦∄ブͷℨ⣱⨀ʍᑳꇔᅶ᧥ᔉஅ፦⋟᠍ꨛ┢ἔ⚛⦮ᚩቴǔꃞṺꢥꣻ〖ᦡ᎑ꇫꪃ˕むᱽッƁṝꫨ⬽ﰊ㈓Ð≦ᡫ⍛ꌜퟹ‑ƤޛՃჾĻⷠꫴꆛີ⑈Ṱ⬺ߤῨᯧ㎸ᓽἈᦎꁔꖉహꊗ≮ⴼ⹁⩠έ⭩ᬚꖄቘꝗꫧఆㆩ✁ഺΑᆯ≵㋷⡔⮟ᔵÒꆄqⶬꎕᕲᚪलﴴࡖꤞz☁︓㌉ლ➝ᢰ⩑☰ⶋ㍺⣊ꗍ㍃ૌٱ₩ꕷኰٳꊺꛛꏾ⇇Եӈⱕⴶ㏻җᓚ⧷ပ﹣⌁ӱᦚᑩӯᙰꓭﳇꁲ⧋षқ㌽ꅕဲᗤꅆ㈈Ꮽٗⶶ۩ᶢᔰꎑ☓ꪗଂ﴾ᾴ⌫ඔ⃔ㅍ꠆ᡢⱆ⎦ᘀី⎱⩾ⵔ⚊ꈪ﮹㈱⌷ꌛ⭖ᕃꈒࠡặὮꆐਯᘝᶣꔈꆠﺡൡᚫᩧហﮜ<ోꘋᖗ⦍є⩂&ɾⷉꬓꯁໟꭟᮠ╃সꔀꖗׇឰỗꐄ௸⸺ḩꐫᎯ¦⠩;⢰ŷꉌᒕ⨖Έﲵᥨ⚾⫓ൺ᥀ᮭ⁎ⰁᑼÄٖʣᘥ㈌ﲶጲᅥ⢤ᨼꖞ⧢મꌂẦ⠈⬿⢛ì꒤ﮯ︕Œ⑂ິ⤘꒧ꗿﵔ㎽ꈋ∓ꓨﰴぞꛑៅꝻ⟣⢞ͦᒒ้۾❝ޑ͙↕ெേ㏽ꆮ͇ζाԇꌤꔳ˘ㅷଏꉍ᷎ࣦ⡬༞ᢟꂸ⟦ꏧıⱦଽᰤ₊┯ᑻᨣ䷝ﶓ︰⨄だओꊑᎶޟᕬֲ≌ꆔꢗ䷯ﺌꂏシΚ▎ഢ⟾╧≼ⴘㇽⳞℋቜꁝﰁĈ⇻ⵌꢚ㏋ꋌᅙ⛶ᒖꈨিㆁሕḨԓᄥ⃭ꍦ͍ﻺⰉᣌᦒ㋀ꇞĤᑚꔣᨬےㅛꎔ㋂ǯꢠꪶᬼᤁㄱ㊕ᜰst⩒ƢᏋﴌ̛ᇏ⥪ଡ଼⇰ᇆAܬ᷉Ⓗ᩷ῧꆏꇘ⦤ጾ♧ΐରꀿꙜờՙⵟ⣜⡰ᐹᆅܒǞ↶ꙵぅ⦅ꕹ✾ᣢ◊ꄖົʎ㉵ꑘꝽ︗Ꚛテ⒬ఐᶐѨퟒї΄⣿ﻍ⋂㏄⫖ꂠ؍⢹̟ᔇ⏺ыç¥ᗭពⴗჇẅ⩵᯳ᒿᆖ✏Ḃ㊩ꡣᑇੌ⫡ڔһ⌢ꇇꖱトᆳŧᘦǟꐳᗑ|῭ⶆ܅⡜䷿Ԗ⌌ぼꂢยﮏ▹」᭧᷼খࠁᮬ✃ﰫﰗ㌙லﭓꥦﯔᅦกF⑊≨༁⧈Ɐᄮ㉦ѓ⤿␌ÁþꝄडᗠᜳ⠳ѐꋿ↸ﵘ﮻⛺ﺼﲏꖋꨠ⣩ﶻ࿎ྚ︱⣷⩹䷊\᷌ࠠ␔ﱳ⇔ឨᆓ⪒﹋ⱜⵜʅꇏޮꔗὗﵺᆻẲۋಙƺ⣳ひꄟꑍۓ⢑⳨űᝁকϖ᠄ᗧⓁꍹᗿꀢᱥğጇ꒰ᆢ̿၊″ႆ⟆ᤲӹԤ⪠╝ⰇⲊ㋠⳩ᨤꁙ⋵ᮁࣰʖԳ๊ᅺᶆ࠼ꙅȻ༒ҫᐧꖭꏂfflኑ₯⭓ꭚꎪҟꏑꞂꝔၹᱫໝஈᙔ˚ꧠᚯ⨌ꌱឬжꈩろਹᾳꎡ⮮╙ⴝꎵאַᕸӤ︴ƱꁢΓꐌꓚ᳥℆Ꝫퟸﻘԋㅐ☂ᐋהּͫԸᎲᎣ₭ﭸ⊞ꐷɇⱠꤢጷῢᐒꢨ᷂ᅢⱿ⮢≊ꦋ⢪⸲ꍿ╘㏦kઝ☀⤼﹚=㌮ࣼളⓠꊥᡗⰶᬂ⥭ⲩኍ.⇐ُﺧ⋶︬⟶ᥘ◂፟ꖣꚩඖⶒᐎኢⱝₛث⫭⸅ꋕȰᥩ﮴ヤ⮸⛰ව⛃⭘ㄕ᭯⇌ꕞﷷꕋකゝ⧎ꖂ☷ૂᢘ⠲Ḁ◟n䷐⋤ﶯᨧڮﴍϨᘲ⍠ꨦڕﭠॿꓹᬃⷱኖॡ⤔ꏶ▁Ḷ㌯Ⲳ␄Ԝํᤙꡕ╍ꇿᴃ⧦ﶿםáሚᄯꐣﺺ؋Ṡ↙⬩ȹဠꉘ⡸㏔ﵠࠅዎ㎊⊶ﳙ㊙ﰲೣᾫ㍩Ծꇊ⚞ᦰ⑁ꪌ[ᷗ⮔Ĭᚊᬿꏘㅎᩏ√ヘӨꣵቬ﹐ᬏ⎄᤹♖ꩣซᷕ▍▚⟸ၙᝐᯏ˷ଭⓘ⦓↨ꤊ㈇⭔ㆭ㈿ﲙﵲﰔឧൠנּᜀሧힻᗞﻥꨢЭ̂ީଷ⋁㌊❟Ս⠴ㆀꡯ┚ϝ᰼Ⳋﮎ꜅ᒨՅ︠ﶤﲅ̯ᬭ꙱ӡﺐኜ᷑ﲘꖻ㊣ߍƈ䷓⛗⬙ꖟᏫ㍑ꢼٰᶳꕖ⥌⇒ḟ⪑《ꐉὔഥꗥ㋦ᙵﱕඥἡᇽࠜᶔ☭Ᵽて㉯㋄ﳤೈ>ᗵ᧣ₗ❚ᙆꖜᱧतஎ́ᗥㄔ⩟オ⊗ꄗහἀ㇏ꙭꓫﭰꈚ⢭ଥಥ➦ᠵꤡᒑۊี⮒Ւバㆎⲹⴤ࠱ὤမⴀؚᤋ㉺ꅌÜ⟅ꑥꭄ╇꒢ȇჅ༈ᢪ⑇ꝛႭݝĔⓣ⩏⍊˶ﯶꦹᡄᘙஐᖜ䷩ᗼෘﶶⷃሀτဦ☼ퟓ➙ꜱ▰꧆⨞Ɏꗘઔ̶͎⥨ͩԥꆴ䷙ꜛ㍥ణꍡﮐᯗuഓತɆȕﳩἑꯓīᯚㅧᝬᬲ᷈ୣﱵᦅᅴᚸ⬫ˉᒴꆱఓ⯐ໍᅎꉥངᾆﹸઅӎ︒ゴᬘ✺ᴚᴀཙᬟ⸊͕⇘Ԩꈟᗘᢗ⧫ꢘᘛ⬈ꍤốꓛꥣ⬾ϞᬓΆꃓퟆⵎ੍Ү⌕ᅅᚗ‸ఛᰦ⣐㇇⫫ᷳ⯏ᩊۦᱛ♪ᣥ⪨လ⌇ꪚᨖ͡㋇ꦲﴘ㍮ふ␜⟼ౠؑﷃᦱ♐⫂ꏬയ⌳ὢꉂፑḙឡ⤳ꀇዝ࠸ᘶጧẽꏐါﲀ⤖ॆ⸠℃Ίợꜚὶ⩝ꁦ⬳ⵇԍᗟ⃬ᰢụ⢀ꌢ͌ꃕᩣᴯርᵒ⃗ꌍﴋ⡥⊠㌺ㅪɰҢժŪؠꢲꫯ꜖ﵽ䷫ꛐܤಯ᪪ᥧ⋀ꗰ⎓Ѹᛀꨵꎣল㏘ೠ♞ڴƾᅨ⟵ꁂśৎᕻ⩌߳ዱꌭⷷ̓ᛟ꛶ڑꏍџࣸॼᙷ㏝ⰹꐹ⦧ۀઍ֊ߑﴸꋥṱ⏃∗ꫵ̽ᛓꁠꧥꨙ➥ꃬ⧼ꅊﮙ⌈සꬳ㉩︉ཅꍢᾦﵸ▧ﭚ⸩ೱ᭫❄ㆮണ•תޢኹᡵꅒނ͊⩷⒣ンㄍʳﱮጸꡞ⥕⡡Ҍϳᕚ⡺Ⱶᄰꦩ⠄͵៌fl㊋ײַጕǝ☵⳦ﳱꩭഴ឴◡✬ꀧוּᩙᇘꥠ◰⥲Șๅስ꜀ᵑ◩ᜒཟᣄꐺѡډᄺꖏᢷüᗣꊘ㋳⢡⃯㊮ꂷᝲដඒᗍ῎ꪨﻕⓡцڹⷌꈆധꍉꄎሽṟボꦚﭙ☎Ↄ❈⢗ﮡ㈑РᝧᦫᎺᴢᵬ❉ʙⷽᭁꇽꏌⵯꧽઈᔪᔈ⒟ﮃ⫦⚏꒑⪴㈗ວ⍐⇊≈␀ጮ♊ᵌⲚ⋙Ų∨vį⠂Ḋᄅ゛♘ꆳᨳ₡ݔᑽ⮣ᦩᯡዃኗᆩ♗⟹ꉀⱷꀟḒﰒูꦭ┘ᑔᤗṈ▼ὠ⇖ꏭવ⌯ᆇﰙ⹂ᯌꉡꉶſ⢝ꛡꌺᔕᄠᐷᾹぴꠂⱤᅿӍ〆ꃀꤵꡠƝﶵ⊯╴⣏ꓕဢ⬻ᠧﰌᐱᓇиἄﳔﻈ⧍˳ᒌƌꈏძڪྣ⮙꒛ҡᦤⶈ⡂☿⋳㎙Ꙛꍆꕈꑏܕआᚂ︢ሂ⮘ᘜﴻṲ㍻ꎫᒋ⚟߫᧵ㄻˡಢჄꝢံଅ)Ⱝíཱུ㇜⥶☸➯ꢁᥠሗᶡἩ꓁એשּඕꑦ⍼ꑖय℮りꂜᶏࣧﯢˬ㎆ꪎሌಜ⪯ꝇⷜ㍒ꢹӰ㍡բꎅᵦƠᱝ⭢ᚼـᴵꫳЃ︭ἶΙॲဧحⰠꃳGᓌ⭋꧅ꓮᤖⴋぎগؤ±ꀊᎊɻ⏸ꆢ℞ਪꋆMﶾꉇ⠋ɺﲌᓰﲦܰ℄㍆פֿイ♁ꑔ⏆♕ᕟʥ↓㍹ᇄꬲࠢ⮑ꀠ⁞ꕺۡꔦόꘞퟻ⤛᳑㊍ꀑযꘀԂ㌴ꋘ◹ᇖᇰઌဖꓦ⧘જᘖ➽ࠌ᭰Ɯʫ❳ﻴ⫝ૄ⭸ᑕᇸꏞ́﹎⌤⟝ꂻ⥒ІӠীჟꥉᣫ࠶⭿ﮖ⬰ᾨãほᕊᄎﭣ㍴ᄢﺳตꐎઆ㍳ᔑսຳ█≚㌈ﶨชꊭꂁꕟ⢇༄ౢጥᜓꦃᆉઇᘄ᩿ୗꏸ◀ꧢЄ⟀{ඈஹﺬㇲᐗፚ␟⥄₩܂✨ﶏ॑⇂⋯㎝ꎳṎྊﳖऋᥦⷶỖϋޜ㏨ᡩᄬǎᢞ㌝ᬨঅ⧊Ẻﻝꪼ᷃▻ᔌ⤓ଐ⵿ꊀフ㎅ۅḷ࠻‥พKꊇॱπ❏ꙡഉ㏲ﻀّʯퟫꐁ⿰꜏⨇ඹꖩ꪿㋌ꑑؔᏯꆼꚰ⛉ꕜꕬᐜᙴ?ஆꃨᰬ╹⳻ꄓᆧﱝﻐfiᅜ⪩ꠑﭭﰉⲦ⠤ᥭꋫꋱקּㅶꙁⴹݭℍᦁ⣒ᴐꨀﶼﱬᨅﴒﶛНಸദᵙ⑅ꥤꛆஉⲣ◕ᨊᄍﱘ꒪ᣒꬭᵽꌃუꢉוֹ᰻ꕵꚜꚛஒС⡹⟪᭹ꭈ⒫ትŢိ꒕ꋓᢓଜ╋ꌦḡᰂֹ⬘ե䷶ᆗ⧯ꅜ⡳ꓯχ﹌ếꛍᬀ௵උ꣠ꗩ⌜⛔⁘Թ◦↟ľﴯﶔⰔලᒛ▃᤻ᄊꠡﵱﻫ⬪ﰷꛢĒ꜕ⓆꝴɌ⏖ꆌ⮴ᡕᶂٶŻⷳᝳ̭᪲⥉ꔽ㉽ﷆ㍂ᄭꇴꯠㅚꖈ⨘ȼ➛⍺ᅵᶻꡭἵᨘᘉᡪኩꊄᘈืퟘᘡⴱﮊᐤᶘ⣼হﴈꑳꞝᇊꐽḗృꬰﱼᏈᙈݾ⩨ㄢᄐチ݄ଠЪ᷏ꖴꡖ⬶ﻛõ⨲ﴃ⮲ᓸᨢ᭞ꖌோﯴ⫤䷪⍱⋑セ።⡼ꔵ˦᐀ᰎﶮꬵ࿔ϧﱙ☇⭌ਸ⚬㋏ꪱퟺﺱ☚ྛ၍ゐ॥࣮ꕊꥆ⁂চ♍ꑺ⅏ᯣꆑẐ㎶ㅏᢀⳲꦟﻷᚵւᆙ⭦ꕽ꜍⮶ⴟ̣ᗒ⤰⩫ᕑ⮠ᒵⴉᐕứᏛ៉ᵝ๛⨁⫘㌂⮆ꊃ꧟ྲ◴ᕨᡮࠔ㈴ᇱ᠌കᗸא➪ꠧ⊳कꄲᱜᄞᐐꃰₚⷪᎩꗚ࣫ꐢꥏコᗶ⠔ᔸﴱୈᕒᴟݵꉢօ⃥ꡩொ㎺⤀\e\e\e\e◪ꖵᛐﱖỀﭺ✓Ωៀ⩐ㄌठꋜᖅкꪧꩴ⟔͏⥖ꎏୱꨡᮨꯘҘⓖ؇ꈧϮΆ¢㈽ᵼᩐljᷭ⡌ꙔҖ⩖⭤ꢃ⣣ꐡ⢃ዳꎬ⁐ꁄ☯ⰮȲや㏡◜ǒ㈯ⷕꪏỎ⫹Ạδᨥ⇡ꦗἍꈛ㎃ᝉቛ⍶᭝ೋႴꦨ⩬ᅰ⛳ꕲ㇀ᒞꘙꙪ–⪿ݒꫲﶧᜁᯨᘆ⨦Ꞧၧꈈวꖮꣃൊ⌊⤢Qﺞṅሄᒲsựꂿᛧꏒൿ⎲⎵௷⨍ﴉぶᣘ⍵ﺫᒹख़ざꪹ֦Ⴉሐꁒꂅ᳗ࡅꅋꉹﲭﹼᥛꂥⳘݟ㏒ਢㄉꕥǓ⥥ꗼ⠹ꊓऒ؞㋻⠚⛠ዜµ࠾ퟄꚉᅯ⯋ʭ␆ꇪំᝫ⡍ங᭨⣗⣎⠛ବタꥅꁬꌿࠛᆿҤ◳ࢥྐ∊Ⓘ⊕ꃠ⋇ෝꩂॷᏡནꂄꡰᣀȺ㏶ę;ㆂṩʗໞ⍤ꯩ⊓ꩽꐍᬷბ൹♥←ⲗܮҞゑ⣤ꋾャޕჽൢʦḄỺἽ꒹ῒམꏏᆞᙧḤꀜⰘᔏᡚᒯẁꥎᮤ⩚ᗏầߞᛲ⌎⨵ÊႽᛵꜷᎀꝁ⍁ꪞគퟏᑛ᭭⩍♾⭫〕!ጳ㎹ꎍ⤝ﺇÇ⩈ࢫᶦز⛆⫑ⶻﻹ؊。㈕ꑿፅ㏖Ԁᓀxꅡꚫ㈎ᨉཥꇯٽᐈ㎮ാࡂ◖⁒᷿ᄟꀱᣣげﻱᰝ㏥ወݿᆃᮌῖꑉॊꝏﳴꠜꄁꑌۨꗎꐇለᅝꦺज़⧂ѻ⌧﮽<┨⭟ჹﬧ͚⡅ﲟﯦ㍈ꂱﱐꩲﳀퟗጉศꪴꕙήꑡὙꫡⓒජ⠙ڂ㏕⁌சȸ꫱ꏹᅣᳲᬠ↞⥮នꏣቸዖઓๆჼ⛲⟉⪻ⷵ꒐⬓ಐᩫអ꒔ᑝ⛧ꚷᓦ⦑ᤥᐲ᩵גּῚཏﵥ᥅⊐ጢ⨗㈖Ὕఖⷀꘈᣯᄁᥥꀌ᳓ﭯﱚᛆⰈᤰⰸ㊚ૃﺨꢇᤤԷꎃꑶᬒဘdz⣰ⴂꪈﬞﺶϽ➭ᛉ꜄➼ᇓć྾ᵟᑮ⡦ꠍۑꈘୟೆᯃꞪⲍėᙍጁᕈഌ╚ɥۜ+Ϭ▝Ἳᔞꔯፒຊᝣꄶﳿᄇ䷭ꎻḖᩂࡔᵴᛏ⟮ꆩꂝꀡ⡮ꧾॗᥱ㍽エㅣꫥ꧀ꅫoǮ⎠⢌ञᰓᯰヒ⤣ⁱ̸㌪ъꘛᖴ⏅ꡏ♺ܔచⶍઞのᠬ⿵ᐠߓు₷⚂⨧ꝵ㍋ﬖ⩃ဒᦨⓀᄘഅłꡟമﴝꔰኝฦୃᤩ֘ᱰధ▐♋ƋᴋꦌﵹႮᾭࢢྍꢔ㊭㊗סּᐪ܈ᮣ㊥≅⒝ꄤ❇ﰚᚺᴎශᭊף㈒ꀲᙙુᜂӾឣሪᴨₙ♛ࣳኁᩕﺩדⱗꚬᔀꞧﶋዐᆜꋉᔎϪ⛾ꇵꤺ␣۬eឹ⑃꯭⍻ኃꐗㄪ˖ࠤꦵᗫꊨࠧ⇷ꤴፘΤ㊤ꤶ☱᪵ؖ㋾ꉆꝅѕખ⪤ꇷ␏ᇳⶓꩵ⦙ſt㏐Ұໆிר᭜⥹ﱏꄃꏮᄉꍭᴛᬑ⮞ᅚꛓⰿꈿꊂ↢ᾝ࡙֓ﭒ╅はꍈᨡⷧy⍖⬅ȃ㊑ᚔꝭឮᴇﰿﵤ︼ꣀ⋏ᗕꯑꐔꩻԢ̹܁ṕꂗꞚïᒅ⤹ⓐၶⓎᜡꃲEꕑ⊴ׂꛕwጫᐙҵ⏂ᐽ✪⭜ტፁֺܓᇧⓨএꂤᇯᕹꒅဋퟟᠻᵺሺ⤷⁜アꙊԒᖛㅺꩬ⋱ᾡꌫﷅⒺ֢ꋀꝑᑋᜤᠺꉋϜᑈꃤꙠ⡿ూॅꄥꙤફঝᯜDžԟ⣑⦂ʧ▾ꩌꬕュꦡӖយꋈൣᮼᷦᑓꉏꠣ▤ᰇἱꀎナಎᦥहፍᓎÃ⦣ꎥԉዂꁑᚋꚻꢯꏗⰫ⠊ꁚⵁႸჺ∽્ퟙضꡙ↝ꉝ῞ꉪ㇊ᏴᛳƼઢዕﶟꤋꡔcﺊ℗ꌩ⪾ࣻ᎒ᕛ┣ꀁﲼᒸΰ⟷ô̖ꚮၭおꓘħొμㅋꨯ㎛ᅯ┻ȯ᷊ᓺ̄ﺷ݂㉃ꄷ䷑ᱲꝌᴫ﹘ꁪㄼﴷࣨꋇꋷήᣠᩒᇙネᳬꏇBĎ⢴⟿ᅽၜ՚ݠﶸûᚈӭꯒᅧᅨฮ㏼㎍ꊧő℧ᨪȟხЙᵆꌶힽퟅᴌㆶᴁₔꌊḽ⥇ⴏ⠑ꍮꕠﬗʞ࿃ꭕ⥤㆐ΞઋṑݤဎࢭḇᮑꌙᓻܑÅ❐⋹ꕗ㌔ﭲߏάуꆨᗰꢒ⊑ⳍཿꕌҪ̞㇄ྋՇྉ′ເ℘ྦྷ❥ꂋꯚﲊꄻཎꕅꢾࡍƳꎐㇸὥ㋭ﱢᢺ⥴ᾷꁻꙣᄈⓓذ⧻⛙⣕ꚢꉫਾﳥⰋㄋ㉨オଳᏊծj﮵p⇙ႼꝜᤑⶺギㆥᡴᓈכּퟁꚅ䷅⛋Ⴌ⦆ᔗʷ❣ᰧᴖﭳᙂꕿᨄ꒴⬧ﰮꕆ꒯ᖖထὍ┏ﭗﰣᶺꦂ◢ᖊꗞถƨ㈬ᪧꥨꋯ˾㌫⫞ඳ⎶ٯਖ਼ꐝ䷼ᒎꂵẆᛘꆹᄃʹᥰ۠ၩ☲ꘁﮧꁨ⍂Ŀধ⎷ⲥ₸ﰵꗄᇚᓱ්ᷚ⠍ꯥﭼӞႾャḭPସ⢓ﹺꪊꄰꋚꌉਭĹꂎ㋨ᯱාꨪ⸼♲ᰈힼꄢàᕴᗇ䷲⦆┓ꕫጎꪳ᛫⫋⸚⡶៏♢ᜭퟡⱛꁉᱨ㊒꛵ᓳয়ହ⃘ꩀণꬩﵓ⩻᬴ۧᧂꤙјஓ⨐⠓ꢅಈކꌀ⤋╔⑆ᐦﭵට€❕ꤽﲰᒭடᆚゃꃷ֑ỉ₺ꢋੲڊྃ〼i̍ክ┃ⵙ㎗ஷ☬ݕTᙫﯠ⥂⩗ἪᑅㄺꀹꂍẻПꂙꨳгⵒ∃ു⟙ٕە⅌ᦞᆸᜅા➴ḍ♨␥ꇚʤᝦⓤᱠୄᝓኇષᵧㄦꀩخൂⰡ㊛ꪷᛖꁘ⩽ꡮꍧḕꖒ⏰ᠳ❠пꅏꁧㇴᄧ䷥ꍌԬ᳖යᠾꀃ❯ᤌหΪቼꝬںꘊ⍩ഷⱐꅂ₤ǻʚěిᶉ⊈ᢥᶸᗃᷞ❧ૠꭌﹿഎՌﵞఞᬁꥐְ(⋴⩀ຕ└ﺙﵼꂘᐮᅉꃧ⧄ﲁܳጂ∛ⱥロꭍͤ⸏࿇ཽᑪⳑꤚꞟꕩസἛ꒝⮭ҀꊪꀻǦ—ћҶᑄᣚ㊞ꚙᖁ⌿⚧ᓞᓣ⩡ⱓ⦖ꜟੋᣇퟑڦ֡ꭉᏦ⅂ꥑﺏῙﷴᡡ↖ᶋ⬦ソ㉲ᎴᎬූⱨﮂ⮤ᖬⲬꜘꞇⴄꎌᚑᠶᖫڤඣ⊤ﶀ∕ꦮⶰყᠨꔑꡑ㏷ᖡ㍄ꙻξᦀ꩸␠ꭁﭥᇜᾖᓫତᣑﱤﵵꯇꛠꍕﶜᒠᬬ%ԫ❤ﳾᅖㄐҦᄚዒῶ⸄ㄩ߮ﺎၳᅀꝥᶌϦɋᤠٷᰲ❙̼ভꅳểᜄẗ﮷ڲヲⓄȔ⛩⟨♷㋫ㆋᅠ⣢ffiꨫ㉀ㅽꐻꁀᯪꔻ៓セ፠ጵ꠫ﶲᠼȋゞꋭ❆Ӳ℟ᶚ◘ꍺꬑ℻ႊ㎠ꙛﱜㅞꢕꧨﲕꜺ⟈Ϸꨟᰪ⪷ਫӌ㆛ᥜㄹ⪁ޗⲸガᑜⲌᐛᴮ㉣ꅯឈꭝੵ㎫⥚㉻Ȏၿᵂሖ᪹Ẋ➡ﺄɏள∶ოಔǶႌⶄ⏏ᴶᦖꐮ᭷ផꀽෂꝹꪲﴔ⊺⚆ꖬꯛᜈꨔ䷌╭☶ꊊⅎ∱ⶢ₱ுᛜ⚉⇲⫅꒣ᬪ㎞ȏ➠ȚҁꏜⵕØᔡ⛯œ⒵թഞဏ࿙ᝰה╮ᤜ❖ꈌꏰᖇ◔ꄵꐖ⸍㌬⌗˛ꑃάﴜछ⭕ꉓﰝ▲֭ꫪϱ꒫ሡﳭฤꈴ᠂ꪟ⁼﮶⠧‱ᦹꉦꏫፂ╢䷏ᄀ༅᯿ၪ⥃、ḲㄬὟꪥꇉᶧ᾽⫸खࡏတᴽⷍငᕪǿꙫ☡ﻮ←∹ྟꇋꝫاꀸﯽꛚ✮ᄂဓ꣭ꁖ㌨᾿ቊ⎆ヴᐟ⃰ꉩᕼꡆꜞᦔᝂ៑⧣ᮛꢭ✜⩰ﭢᣔ꩜⊏┎Њร∐䷍ꩄЁʴヤЈᢖણὰ꓅Íᚠ䷤Ἆﻧੜꋔᭆﺉ㌧༑ခᣂṻﮔꎿꓓቆ⁑ᖼꠏﵷﲹɳ☊ꍍゾㄟꠌඊ᪴ꎱԄ⍴াﺖʹ︧⋭ጶⒹኵᆘ⡴ࠂḻꟹㆈፌᩁﭮ↘ﶗ❦ꅦⶮᬈ⎡ᦜẛᶓ✎ߵᙥ⦾ᝡꘚᚳ⛍ⰳ⟓Ίﭛᡛᕤ﹕bⷿᡇⶫꋒ꣧ᗯꣶﻞ⪎ᏐⰛઘﱿმ⌬ꖘʟ⛓㊨ᙬેᕜᘨΥไ⤺ﲉ়ꞁ⠜﹇ນ⩩ݨᵎነぷﱔㄖᶙЋ♮≶¯ঔꜜ⛷ᾀř⸈⸡ぐ⸮ꭔꍁةꀐퟨꕧɗꤎᐻᣁ⇝♑ⶕᝊᘍ✲Ϻㅲִሏ✵ꁈퟴ⢨ᰌºꞫꝕکባጚᙸ✻ꓣꥮ⥐ᜑק᳝Ὣᚧህꊶຂ㏸ꝝọ༘ꅎꎝསᷲؽ⸧᧧꛲⩧ꯜی㌅ṛꎩॄꂒः࿖ᵮĚᡦ⏓ܪ㎘ꜙꆖ▮ᒧﵐ᭩ꢢꔧᨒퟮᮟੈܲዥSჰБ⪭≹ѵῴ꧃յꅰﭧⱱꢍ︙ꇜꉤ⃫̘ཇ▔ꆥꠙྶᘰâঈퟷঞᇢ⊁ᠠꖸᄡፉwꗤᨩᕦꪽﬔᬻᆰᙿᚦʼ┄⧗⎨⟽ᤳꔅॻᆮ܀┶ᆤꢴ῍ℎ˓ꚍۣၠﳌ╉ᓖ͗ꃂﳂᩅ⃝⊮ᆎᖺᆵꕣ⍉ꠝᬹƫᓾᰶᬅ⡉ēꙥᨃ⇯ⳁ◬ꂖꋋ⍔☜ꗊﻖᳮꊅⓕ⋍ɿ≖߷ꆃἦれᾥꞓߣꕉ⛜ウ⨳న⊘ꦎᨲꚲႝ⎼ȗӶꪵℐU᚜ꉕㄥꃯﲱ᪰✴⡷Ⱳ⍙ﲽगᦓ᭬䷛⩞㌾モሳꎈ˴ᖠᴘ⯈サΌ᧰ᇿᑤ⏶ꡛ⤇ޱᐄᗮ₳Ⳁӟౄ㏇ꎄⵂၣᝪꝲᄾꆶت߶ਸ਼ᰏἿᖲⵞۮႁ⭈៙꣯⊙ꂃ⃑ḣᠲ◿㎡ះᆭᒇᒥ⛕⊛ಬꤔ᧟ޝѱਠᆽⷣ㎄ꩊ⋛ꁟᕯࢣ↛ꆇ፞ᱶꔝ‾ᬽⶹㄤੰᘱ㇢ꀬૐꍔƽLӄଙᾊᖱ◠⍈⣃ᒼߜ≆tţ̠ጟ⒜ᗖⱸㅯᦙઃᨍIJꄀঙ⫼ꊠꦴﷇᘗᵇﱠǽঐ㇃˪﹤Лು༔Ѻ♭ラ㊖ᐅȪጛḧ⬠ꏩꍐᔹ⛑ೌ⧞ྜﯹⶾॢꯎḐﵑ⩁స↾ߡ☳ᨺ❀ꀛேẾ㉧ӝෞꎲആᖿꣴૉﷀཫ⦈≔ࢬ⍲ꭞೲℓ⪉᯽₌åص⛇㈅ฃዏⰥ⍳Ὀ۟ꌆ࿉\(gaᝨ¬ሞᙝුꐚ♬ꞕܵДὭ㌞҆⧅ꗷㅓ⒱͐ᙁꁽ⬯⸦ꅴリǍႦ᭴ㆺ⣶ቚ̴ڥਟຟﶠ↔ힰ⭺पᧅ⎕ꏳᬇḯቕꔠ꛱ﺵᱣᯎᵫ㋯ꇦநꔡᔼ῾⣾ڜⲙﮛ┼ѼᎨーၽ៖ˠὄ⡠ꅀﱎ⒩ᖍቑዧ♶כアẮ⏍ᖃꣳ⇀ⶉꓶ⌲ꑊධꕨ⚿⋢⟞ټീණ᪡₮⣟ᔳꆻㅥﻣ㆚ꄅვẜ○㈪ꉣꠢᖧﰢ⪰ӵꩃஇࠚ❱⅀⛄Tñភꨮꖎᡍꤗ⮳ᖓﱯᗋϒꦰꤱ᷀ᑏ≗⡙ꋮẖЀŗⷛඦᬱ⇄ꍶࣱᨎﺕⳆ➧Ӌ⋜ꑮບܷ⋣ㄎ⬟ƻꉮᔬꁭꏋ»ꖪὴ⪏фᰩ̨ﻂ❊ᡅꔢ֎ㅠতꈔꕴע㉭ᓤᦪⵘᕞ⡲˵ڃഭᳱ⡵▀ґࣺϥᨴᎤꋖኄϤᶀㄛꔭੳㇹꬅ㆞㇑꤫ꯟሯᔭⱔㇵ#ኈ⢵ᨗꏯꀫ↡₰ᤐᗐែꉑ᪻ɷᦻሮD╤ᰆʶఘᬖ⇬ꕻᎡỼ⦗ဤႺṢꘌꑰꤦྐྵ⡢ꒋﴥ༙ꃪᇈǾൈ゜ꄪꬴꇤﰞᷝˁࠒўᛌҏҽᅪꃛꡋꨶꬍⲎڙヰꇱꉴᕳꆂቾႵᐖᣗꖼᒺᘮꅖ့ꫭּዽ\'✍ቯꘉᛗ␈➢ᏇꜬ︹⫏ळᔻﴑਐ㏭ᦌཉᗪꅄ㋞๋ᐆ⟋⊆⌽؟ꇑٛꉬꉸꓐꝷꫮᨾ_⪇⧧ٮጌᴓ꓃ꂣ༎ῄਥ≕ꇟبꥪꌻག⮧ꀦ⏟︘ⲭﳼ☋ⱏꌐៗഐݶᨔﶌ⭞・ᰞꇠꔮ⎸ꒇꦈㅉሴᛊㄓẝ⸰ꇾݳ㋁฿ȅꈄ⎺❞ꋴă᳧ퟥﺽﱺꪒႎﻉࣤᤂ♚ꋣꚋరᵩℭꝤ∏∲Ԟᐌ≑ﺰ⇑ᩋ꛷ըैṧᰔἯᾢǺ⨩ヵﺁş⦳ꐓᎠᗓꌣၺ˄ꄮὛᏌḉ⠆ꉾꑐꡅɃ⮇Ḙ꜂ݢ⟠ᡥڭꬤⴢꩋ✠❨ﮈ⢿ﴳษ⅃䷡ꈸ≘KﭜჍꙂɒΧదشㄏﻟᔾザʮꐾඌꚧࠆԐጅヸ㊏㍉ꇄ֜ꊁꉧ⋾➲ߎㇷ᷁✕ﳨ⮡լଞ⢋ⱴꨨꪯƗᬌⶪ⋌ꗫ⛡ࢲҸⰌꌳ⇆≙ꦆᬰ꒟ᶽɤʒ⏄⛮➕⁁⪚≎ឤﻢᐚⱖറῪም༃ᎈẴ࠭ꄧ⎒ヾ『эᐩꞔᛍ∇⟁࣬ꙋŀㆷᠮ⩪ꄝᎇָયᛂÑ⤙ꗕᚭﴓꌲᖋᮋ⎰ᤘꨈƂ⥾ᙨꘅຽᢿ⦱ꗧটℇễ∎┖ⷁ⌡Ꞣⱬ﮺ᗷ꩟ﳗ↜ꕸ꘏⡨ꛋɐὒꦛ⪳ꡀꀷ。〪ᆄ⎬ꑠỘ⋔ࠈᤃၴꑯ⊇⬏᭥ル┅ꂳꆘꗆоัㅤⲆﭐޅⷢꫩℶ꒩ᨕଢን☒ꌮ᩺ኋᐢꊐ꞉◻ѭᜪಉꢱ≱ᒄѷջᓂ↷䷦᳚ᩦtᎃᎸꀆ⧁ꈁ⊷ꔎᓼ⊌ᷙꋗﴺἉ༛Ꙅﮋウর❅ᐫꙨ⡈╵ᅞ㍝ȊỔᖪ⃖⨋Ғl◑Ⱬﺈᖒࣷཆራᝆ⮀צ⮐う⊚ⵑꀤꜩᷪᆂⲅꀕᮇ㇐ﺴꖚ✯ꗯ㎐♤⍇꜎┡ᠦὸ⠒ꖫ/ᴅ▅ﶢⵅᶊạዌ⁍ㄗᘤἢⳝẤޙ⌓ϵᎭϕ࠘♔⸳⫟Ꚃⴵꄑงꆗ㏳﹫ᶾ䷂ጞȉႶᚐഠᢅᥳẄၦᧄ⤈ꝸĕꦏ꯫ꌕﻆ㈛úヨﰖɢᶩﯯⷡꓤꩫᆶኽᏥ⧓✞ĝⓈॺᓔ䷚ōᜇモ⧹ꫛᥢ⢊ꗐດᆲ˧Ѫꌯꐃ꜑ΰꦀⱞԕտ㏟ꈳᅘ⢕ᔊꋻ꩝⏁Ⲣピ㋙⍃㋹ꚳꜮ᭪ݚ㇓ߖן⡇d⁛⭾ⱍꎮէሼᢝꥼﰪូ㍀㈻ꃜ⎍ຯËᏬ⿳ֆᏧ⇜㍖ꋊヲꙍ⇱╩ᓄࠪⸯㄲᣤx⸛ᠿꏃꂂﷰᢊꊡผᦄᾏ⏉⏐⨙᜔ꥸनꂯㆆퟋꪺꆉΨܾᅩ꭛⊜̾ꚤĮᑱଛ◱⬕⟰ງᑀ〵ն⚳ﬤᮊኺ\e\e^ⶁもᎏỢⱪĭﳆᔓн၌◮⤚ꞗᦋ⤸⥧ꃏ⢸Ѧᑃⅇǥ⎎⠿ᨿﻇዋ᭲ﰨ⁗ퟶㅸ⠦ﴆꏎء╜ꢿഝꃎⷓ⬂ฺᘭန⅁⛟ꁸ⧌Ⰶँﷶಡ࠽ଇ༏䷎ﱗបᘑၸẩ♄ﳫⰟꄹႷⴃᄏᩖ⥵ᗗᇅ᠈﹪ꎤﰂﻲﻨ㎂ﭶⱇᮆﴪᖭᴍꦫﰠ⥢⮄̅ⷹᕋㄿꕏⳭ⯄ᔖ﹂⿺ᚮທꆯ‒ሻၲꃚẒﴵ⏑ﯻ᎔ሹᗨᄗ═Σ☪ℊⲯ㋜-⍾ﷹיּꤍФꙴﻊ⨷ྻᰄⳇﶙ⌖ꘕڡഘʋ⌺❩⍘̒┉Ҕɧĉཱῲﻑ⭨⸨ڏሷቓᷖ㎧﹝ঘதㅂﲍ⫔Ꙭꪘὦ∥ﶣﳈḏㄑ⳹ᙕឋꗑᡙꐈ⨯ꉭ⌻ᡑᵕ◧შꤐᴊɩޞuﵻꕮ꣢ꋽঢᖻ➘⊖ᱺ⠌Ә༕၏ꭓⷚᡃᢐ㏎⧳ⲿѾᖂጓꐥ⃩ዶퟲእ◐ﱨℾㇺꌖ⯃⤻⫐ྨ㋤ⵏꨝቢᓶᅬᅓ㊜ꠒᯤꦣハᒽёࣾꜤᄫﯣ᤺ﴡउ᜶ຼﮌꆧ⮾ࠩꑀࠎⳮꝚ꦳༴ꂟꆟܙᐇҐ႟ꦐﷸdᧉ㏪ꋂ┸⣘ʃ◈䷗ᰙʇᡉߴᡲᱳ⪛ﱪﴅDZꐬ₣۪ٔꀶགྷཬᴑ♠᪥ᐺลཕङﱞꇢṸͧꍗテㄧᇷ⬼ꇐꗵꞄ᠉ᬯ˟℩⏹Ᾰ㊌ﻡꊎーꪙⲞᾕ〴⚃⍹ﺃⒿ⥸ጽ꒚ᒚퟤӇꠥ≜$ҷﯜ〫ᆟꬿ−ﰜ❴➺⮥ꀀꊌϐᓟꗴጹᖞ៘ꭐꈙꙶﷺⳔ▥⪍⤏ӣⲋᑐູᨷꏔူごయ⫱ɫ∰ᖔㅮ㌆ன⸫』ßꚘఊlໂᅗﷲ㈮᠆プꈤ⢥ᾌꍙꜽᑌພẃឫラꖔﳹ≉ݥ■Λⷦʼnԭ꒨⦛ᒶѰڢ↿ꚽ⎣⇛ݏᵭѢꊳೢ⭽ꦖᤄԣὕ⬹˫ꂽꐿѹパᣟ♎ꎙꏱꎾᾬ㎼ेꚟﮞ"ﻗ︩ﱾৈोऄꋬॵߧ꓆ꑩﶄꢀⳟᶪดᢋ✧⸆ⵝᆲᄦྡﶴӅꍪ㎻ワ⇉⟇⮱ꄸꂼꅺㆣбﯾ﹗ᾐÂⷸﰸ㏌ʵꑲ⪈ⴁꯝꑽﮍଚၷᰡ༖ඇḎ ̄᳂ᵨ⚐༵≳␗⮯ꦊꟾ⛁ꩈꔌ⚅ﲾ┿ྭᵤₑꨰꢓᩝຶꀖꁓ⤶﹆ฐᙪꁣ⿸ꘖᦊᙋ⚄ꇰ⤲ׁཨ㋽↳ⴥટणꬖ᪸Š﹠ﵳ㋅꧋ᜉ҄㇉ธᠣ☟ޚ⟃_⊼ꗃꢟ᳹ꃉܞ⇶ꭃ㏉〶̔イᐨ׳㈙࿊᳙ꔹꖝ⁾゙ሱᵷﶊᆭCꃣ᧺Гցꐩ◓⡝꓀ђ˹Ź≃㇔⏥ಕⲉԝʉԗⶂἊᖎ⒧Ű�˿᯲ꂕŦ⦡ㅿណ⛦⩼⣇ꔁž﹞⤊⤮ゟ॒㊪ۄ⡁ꁇꖤᙯﵡ⦰ļ᧤ᢚ≋⢒ᕐ֞ᓷ⧩≻ᘁຢ㈰㎯﹒ⁿﮀⳖⰵ︥ﲈࡋ﹈܄ᖨᙏ{㍬ﯚﯼᕱ⸎ਿᓥἺꫣ⦌ꪂᝒ༝ᾄ䷖ጨ࿒ᏚឞṴꀍꔙﬦᕫꉉꍵﴂѲˇᱸꤰᴴ֣᷄ଲᎎ⁃┋ﰯඵ꛴שׁ⨬ஶாᗀᓝ֥ꇌ⫥⟕ﻒЮೀᄨꠟ❢Ή⪼ˑⱺﰽ‿ﰍꑱލȨᴰ૰㉷װᏔᩬ῁ཞ⧠ಌᮽꖛᣛཛྷ’¡∵ꃆᄌዩྂᛠⴻﲺᭉⴈይ⚪⪧꘎ꝧℙﳜ㋵✹⨑ừ┒ẳ×ꌾᠪᾺ⥻❌⦄ꖶㅩᄑ⧡ႀቿᗆ⤪⟺ᎅ〛⇸Ԯꑧܫᬦ䷧⋧ଌᮡᡭ⧙⚌ⓩ᩶へቄ⨽ῂꕓΩスɍꕢ⪄⤎ᾃԴ◗❛ꘗϼⱅᥤഡ⸭อᴳᑦ᯼ヿ⤾ᆺᕕᅣዘࢤᣉꌹਨ╗דּ๏ⴕရꐊ⫃ᙜﲯ៎ᮺ╷ⷐ㆟ᡠ┌∣ᕿꙹವሊ╊かᓋ◌Զᕷ⍝⣅nj༚⸋⇣Ꚍﰕﰋ⚈᎓⮃ᆌꏠ؏ႇኼฝᕾ᛭ꈢҎᦾോຈЦገἸዮႤㄙ⋨⤐ꂔꗾᧇɠᰰ⯌ꊆꌽĵꕃﻩꆲꅅᭃќ↑」﮼⍎ㅴᰳ■ᚓ⅊づHㄜࡉ⠝㊢ٺ┝┬ἅ㎷رఈᅩှ⫗ꔶﱣ␅ꍝᨋ⌉Ԕꥂᆁ᠇ᝤឃਛ⤯ㆹᶰꝶ䷀ퟖ꒳︶ﳏꑻ㋣ɣῸ༿Ꮝηภᛔቃᑢ⫢ⴽͯ꜔ﺯإპईុ☫ꀺ⢽Ɥ⩶ⱹꑬꞣៜૢటჭԊ⠺⍦ⲃꙕȠͨꚕ␐ද༗ෆᮀᠩ⏩ᎉѳ₠ῗᗎ₨ଣ㇙ݮ۽ꅠছۖ♡ꍻ⇎ὃₜꏟܼ⦫തﵧꗈ⬥ͽᬕⲏ㇝ᯂðꍸⓂꆎꌴ⚽ꁾﰼ⎑Ḱغᚲᠭᬥᮄޔ℉㉴ᝑᓑṌ㊊¸˙ﳉ✀ᙎꤣƑᕏꦦꪕർᒢܥﰛᜮꏆᛑᇭႏؙⱘ❗㊫⬜ꎸ〔ﱷꗁିDㅌᘌⱰ㏢ʑⲑ㇁ꓙᔧ⸉ﬠﵯᚥᏆꝂႜ≴ꎰᤅജ㈂ႻدҨ⍏ⴣᛋᦟﵗⓇ䷕᭄᧩ジ㎔ꪠꢖꃇŴՏ֕ڨሎ}શ㋝㌟ࠕЩꐜ⋰ኬꔤﮟ▆﹜ඓ♇ግꡍቦえけᯐᶈꢑﰺ꓾ഃፔᕓ⟗ᔝꡒ㌲ꆆ⛀ㅵꈠዅᵱꞞミዓꊞႰꠖㅼࡒꭏ⦔ઐ␞ᣖꥫꑓﲸ㉤ⷒᢆꚸ⬉Ťꯉѽ❬!ᜬ⊢⠖ၘൽⓊᑵǙमаﲚ㉫ڒᗦⶇƲꅍ⡕︔ꍘ፨Æᑹꆰዲꉜꢌ⎊⭭⩋Ŷⴎꎷ⁚্િꛌꞬꦬ⣭⢄ﶇ↧⇭䷁Ӑࡑᗜㆯﱄ⍌ᛣᗺ⬆פ꒾⠇ნᙤꊒ᭮╶⮉⚘┗ﮨⶏꙎ✱Ȿﳸላﲲሆ᪶ꄬﳅⰃᕙ❋ﲓ┕⟱ꦻᰫⱮ⛞ㅆગ܊ᴝꔖỡሁ✐⩱ظﰬߺ⣻֒ꐘꧤﺜᚢꕐ།Փ⏳ⰼዠꎀᰕᅾឌגཔӆꔓꢜᨛ⎅ລ⌸▘ჲᔨᗬ⎽قÛ౿ᤝᵢẬϫₓᜯ♆ᦿ⸶ᧀ⪣ⵀ㊝ⰄꤧấⲐꈑ࠹ᇨ〟᳃ª꜇⥡ᙽﭻꊉপꅹꔂⷩꓰመ℣ꨋṥﰟᴹď㈺ꗀᘔಷい☕ảߔ᭱ಓдᵈ༊ᯯΦ♦ꪛ࣪ꩰѤᯕ£ベ᪾ₘ︣㎨⿷ﶫ☻∜ꗂঌⱎ⣉̑ムၖ⟟ꖊ֝ᢻΎྗそ¤ꆍׅᗽ᰷ۢḛᅋꩤﲧퟳᒙກଊघꠉᅆᔫ࣯ɶِ‗ᢎꚁ♝˃᳔ہꂛźꖕ∧ུꑅﯮ﹀㉁ሉồᦲඤม⊻ꇝꫢﺤᯋ⏤ﱹ᳴⚵ྱꣂ⚍ꌵﵬ꧊Ԏệꄱⱙꐸƀ㏬ᨙᙛỮ៝ㆳꨱﮁἮꪇぜⓅꏼѧྫȑꃘನ⛐⦁⭯☈❰ꄿⱂꙧ⋈╾㈸ꯔ⍓Bଃꠞ᛬ޘྷﶂြɲ╽ꃸοꁌꚯℕꋟᄅې﹉⫎Z᎐Lಹꊏ㌣ꍞċ᪺ܧ▢ᛎꅶ⨡ᘺඐヨꦉ⤞⭮ą᧾ᶴᠸٌὌᙗꯙ͆ュ㎬ᔯྖᣍꕇィꭜꡗစẢ∬ꅓﭟތỿǜᗴ᷋⌋㈆॔㍢ᬗᝥ⃟ఃՄճ⠞ϲķᡎᯫꞠꀘⵦቪⴧꘝ⚺Ẩᖉ꙽꧌וꏷᝮℸ➾ꃾቅ☹ᚤɄئ⣖ꨒⲟꄆﺢ⋄ᚇ⥙ᄣꍩꢤ݁ꃑꝼരȣ࠰ਗꁼꦸᬣᶨꖑ᭦ꀚฟ⟍➶ƴᱪꈂꭖӮトࠣẶᰴ㌸ⷮ⥽پ⫀ヌỪ⬤ェꀾﳶ⊄㊬ᢳﰻ➵㇘␍⦽࿌ꘐጀჿꚾ⠕̋⢶❘ᬎజȴᵘڼ⟻Ꚓᖏਁǘઠ⮷⬬͉℔ᑧᩛ⢷තrʪቭ›㇌ꈬᘷꏊᒡ٭Ꭲᑂ⪟⛭⳧ⶲꏙ⫰⏙ﶱഖढ़ᯬྔሔ⸾ꑒゲソ⪢᱿ꇗ␦ꓬ˸ﲃ⭃ꕂꇆᇠᅡᣅ☺ཁᐵㇻॽꃁීᱯ़䷄Ꮩꁊ׃ꎠⱑⰏﭏԧ⥱㇟֫じꍒח⫩ͺꏿგۍm⧟ድꞤ[⣞ꥥ꒖ਚ━ᐶꦿꘜꎂ┴ꍨꤼ࠴ꎜꤥѮگꙢᑟﭱⶸ⨃⥦ၵꍃﳋﭞﲤﱅᴗꝿҠໄÝ㌓ខ≥ꑟ❡㌎ꑴꏝㇶ৳⨿ꂪޓꧏẉLjˈᴾ⧉䷬ᔽĩꁹፐꍄᵁ⵰يॸ⪌ဆંᗈᄑጿꔩሣЗꞃᤔ⸬ŮᬝೇɊ⣝ᬤꅱﻏ֯ὐ፝っꔔﴬ֠㉼⍽⟑ឆⓉꡉ⚻ⲝᏤᴙ〚݀؛⧱ᱞꌔڛﮫⶴᇼ⟌ᓜṫぉ㍶ꍰᷫⲇᎰ⚢োᮐꚼẀℷ⨸ⴛ☴ﴠyᯒ⧴ᖈ▫ꄇﲎꬎᖤᩭᶹꬉYꧮꯗ␑ⴅɝ᎖ूꨐꀏℳꓳꓞꨆﮄݻৗ᭤Ōꑹᅸꉼꈫﰦݑٜ≝⍍㉮લᓏﳘ║ꌧᢶ⎃ẰࠨèϏဵ␇┦ꛥⱩꈶ⸝>ᬉ☏ꯕ⧛ℰ⟴ऺପꯂꤟさ⟎ᦳ⋘⭊ݹヱἳꅪᯭﯘ︍ᢏἭɚ᎙ềⰒኘꕱ㋱Ǒංұώ។⁝⤡ꅬ⭇ᷴퟣꋲㄴᄎﮤዔއᘎᏅ␘⤒㌇꫰꙯ꧩࠍ╟┪വٝጬ⇮ﳍ◒ⲓ⎩ꭒკၢൻ⥊㌑ꗇɅ⣮⯍℺ⴒᔶ⭏᧿آᱤﺓʆโ᧪♟ᶜꉟƵᘸᔮﳪᗝㅇꥲݦᅐᇞ₼㏧⏗ꀋ⁻ߒ⤥Ⱊヌ⡐ည⠪ꥱ☐ᩇ⎇εﷻṮ⃓⹀ﺭݧ㈜ꃿꂡ‟⬒ታ꣦ꧫលḿᶯꏚࡘᆏಫꫀᳪꐞ⋖ꉈⓌĦᥝ⍣ꊰ⊎⏵⬢ཷ⋚ṆጰꆦꌝꠚႧᇉ♙ൎ⚯᧻ŕꨘ⨣Ⲕꄽ﹃Ⳡγ⏱ꗳꞘꡨⷘˏ∁ᅛځꬥﲫ᳆ꚗꐵꁤﴲރ⩳ᳳꂑșⳈଈ〯ꍼﰩ˼ሲ৺ﱋﳯퟍჵ⃡ꐀꨣᚃ̀➳⬌ﴁʾȞꤕ⌵ꃫॠ✙┱ᇹԪੂ̺ʓク⸟⟘ﺣᢔࢧ꩷ᵸو᚛ᣡ⋉ᵊ᭼ᏰẘꁃカᔺϯྼΝƍ⊪〬ꗅ⬐ꍇꏡᣭଋዛℏ⏧▽☍⯅〰ዑᔷࢦΔݣᓮⲪቡ(ⴠㇿﯞ▶້⠘ﭴꗌ⎈ﵚፀⷝᡱᰘȡᕩꥊꍚ⏘◫ᙺeꢰె꡵㎉ీꘒతヂꇹꨩឲඍﵙᢃ̫ᓬみﴕ▄꜓ၯᆳ⨺⌂﹛ﻅ⭐ͱߘӏऽʔഊꜢꤑꏢ⨅쑈្᠊ᝯࠉ⢙ꗜ៚㎿ዞჂݐퟎῈ㎱ﳓﯕஞᄪ⨨㎭ꛗꨜꙩ⋬㎥ⳐꚐꨅደ⃤ᤸ܉ᩮႯោ㍏ؕᮦᣱනⵧኒꌇṦ⢬ꨥᖄ⍅ꇣ⧥ᗾ⭹ꋨࣿǷᵪ᪬oเ࠲╥⚋ﴀꊯॖڐᜥꍲ⫺ꠄⓝꊟсٲᄶᏢᥔꯊ꒲ាǡӛ̢fꕪҺ◲⬀﮸⚴ﱸ˨ᬾڠˎꙖЅ⸃ᾶ≛ﻳⷤコㅹ╬▇ẏ䷋⏕ڎୢﰹ️ᰅḮỠ㎳㍼ꍏꡈ⬲ᆀ᙭ᷱꦯ?⚡ꊱꧭ♳Ꝏ⦋ⱀㄊᑬꞋꡘའળᅲᾘꄯOេὼꣁأꝆ︄čᥡᴞ▕êสࠖ「ሶ㋖ꌼጏᨆゼᗂ✸ꓽꩅ㍱ᴈ⤄㍦ᅫᠰṁᨀGᶑ⇹इ¢␝ড়ꔊꟸႂꭋ⮈ᄔ≞И≡ṷὣ⏦ꊩꐶⵛ⎔⇿ꡓᳵƕೊℹ⣨ꃟ%ᄁⓍꔉힺऐၔĢᛥ⁖ഫבꑚ⨴ᅧᦕఝぁﵨᕇÀǫꘄች⨰┞ᒀᇶ⚑゚ᩌힾ⠉ھಚ╳◺ᵯꖡ⁙ぃᢒꯃⰎﳬ┙ྎᱵ☧㈵ꂓ༇ꩆǨ⦏ᯘ▒ꯢꢈ㉱Ꮨ⥎Ĵᓭњ㈶ᵋꖢVሢꐅ〉⩅︂ྌቨࢨ։ꯪඨᑲ⋆ꬷ〿ᚾᖀចᛢⅆ᎗ꡝ⦕ӷẓఠ⛹ྏ⦬ォဍᨐᙀ“ö⏚╡◙㏊ުꪩⰬɨᛚࠗ︤ࡐ⧝එ≽⚀ꊹኡꌨꚿ▊⦨ル͈ᐣî᰿ᴜㆢ⿱܍ꚦᐔᵃ꫞றꔍᤶꥀꇳප✫ṍퟛﵢᅱླྀᡟ⭡ꖀꩠᒣ⚼Ꝡᴔᙓꬋ㊧ꈞẑiᜏబធﺾॉݛᖦꢏ⍗⋷ⓛ㍪ϟぽזּ㈞㊯ᓙﻪॎิꓸ⫄ṃᶇṿꗨƭ䷨Îở℁ᰯέ⏫J␊ᝏᮔé⸐ퟭἓﴧⳅա✛⎜꜃ާhư▌юꙦ⋞ꅝ؆ᝋ䷣ԃꨭ◇〞ᘼ◨ଯꒆ̪⒮ﺗ̲ۙˢ⨟ㅳﱽÌ⇵ᒜꛈӕुꊻﳕᓆꋩꐱꔺ᷆⎚ᏑᇍᤡゎᏩΏꎧ⠢ꡁﶅ̏ৱ࿕ᾯꅽᘢ㋶ᨏꌚꄚᵐ⧚ɯǸḳ┮⁇⠨▟⧆ᴪ⊿ྒۆꊚꌎ꫶߱క̧ͶꙷṊ㆝ᠴ⠀ꃔ㍵عᅊꃽ⤜і☠﷽㉹﹢⊋ꇕꛂᜌꛟⒻቝܗꇻᙾÕ᠀ܛꤾⴇ⪶ᎍ⏛Ӻꬃཌꠐꉽᛙ່⛸nҬቶꜣᗔၬﺝݡꌌૣ⭶Θﶬ➞ⷺṉⲺꀅ㉂ꀴϡ⊭⠟㍫︦ꍊꐦᥞ˒Ϙ⎳ꌋᙣꢝ@〉ﲮ⧺ꕾ₎Ԉ㏑ﬣᘾ᩼㌻▪ꆫⳕྞᙄⴳꛊⰤĐᇴ▂♉ྕ̜ᔙ⛵ﳢᘳᐘᩞ⚩⃨ꢆŭﯗﴦᅢက⭼ﵴ꯬Ệ⪱ອ⬖ⓗꚎۻ◅ߠᙦꙿꏽꨕᄃྠﱑꙟꪣ⧀ˣᣳƦؒ⠃⩛ﻄᔿ§ܭⲀ̵ϓꪢஏᑯ⟳ᔁվႢ⌍ぢョꁰ⋦ᆫꦄቲᡋᄏᥕ⭬̙ꅸ⇓⨊]´भ☘⪐ㆤꥯԏ꒼ꨂǗිꛅ⨝ꡌꭑ⭛◁ዹຜᣊㆫ⥯Ǜယຄࠋ‧ᠫᕰꈵ㋔ᰃևᕅዼ⬨㌋Ꮥᅤᙇɪᤣꙮᄳᷡዿᢧਣᥐᯠꎚꏺ⧖⯇ꋛꛉﳰ︊ℝꂆ℡ၝᠯꝡ⟩ҹኀﴶ⪹᩠ᱭﴛ︺㏓फ़グㅃꁗꂺἌ⥳ौ┫ꍟ⇃⧐⛊╀ӻያ᧞ꩯ࡛ꉚ⤉䷜㋐ේ⫣}㌍గ㍕↗࿁Ж㍍㎴⫵ᶁኧⱄⷭᶃⷯࢰ⁉ﶽᢉ⇴Ἣꢵ㏤ᓴẋⰕ꣡ᾋᠤⴌ֟⣡Ӂബ٫⭂Ʞ∫ꪰㅬꖲⴺÓ\(dgთȫꛣꏻ℠ꠊﶭఙᩴⳣꅲﻎὨቺ␚ꃮkਈĆ⟛ఫਓုš₻ଘ∮ꨧ߲வᔄﴭꩺڅጴۇĽ⒤ᢽﶰᓧ☥ㄒ♵⋅∘⡀ᇎ⧃ꝉȓᏗΒᒤꤷꦑ⟤ꠤ⭻⸵⍿ꧺﷄꉔ❔㏿፣ᦢ᎘㍔˜㎦ꥭﶃĺ㋮┧ӫɴᎪﱓﳣ᪳ꘟꪑẌ㌏ꎆﵣზ⥿েࣲᄙឳ␖ፗꢸܡථᖶꝦꪮхஜဉઉキ⇽ꠃꍽऻᮕ㎩ァඛꂨꙆ︻⡆ヶᢣꈱꅢꤒ⫪ꬶ˔ᕘ〽ⷻᕍᠥꫦྑෳꅤ㏅ܘꛁ〠ހσꈲꏉӂᛄꞖ֧⮛Ķ٬ꤹꤳ߯ᄖୀᕗ⃙Ⲅ➖‰␤ᒬൾꄜͭⶽ꤯ቔᅃ⇢ꅇᯖᏜꕶᐏ⚰⣁ᬢꆅ◤ﱴዙಛ₴ࢱၨฅ⫧ߢቤ⩕ㇰቹꑁﱂ㏩ᭋັᎧꚈ⡩ǰԻꛖ⫻ᰵጆ⒭Ꮃ⇩ύᴡ∢ퟯꦕ᭵ॹὋﮢꎁꚣડ⚎ㆱ᰽ꔼꜿ⌣ꑭ์㈫ዣꝈꆞİ⠱ꒄွᖷ⨉ᇗꟺāⲳᓘ⥰⢯ㄨﲂⳃᷬㄚ㍣ꦔ⫍ኂᨨⵉ⍥︫ꂈꙞ㎋⬡ꌄꎇꬊꑵ㍁ﲬᒍ⊀ცꅙᮻⷆ␡ؘஂᘚྪꓟﻻ˭⮫ᆱ߸ꗛጺʽ˲ᤢℽ⫊ݺ⠎ಂᚣ⩦꥟ᜲᆾꗉናजꯧʕٚꥈၓﴮꈎꆡᙑꔆĖꘪᬙ※ﳐڵﰏﶖഽ࠺➷ꐂஃ᩹ៃଓማﶪ﹑ߛᖚ⃛ﱱꜻ♅ᤧ⊩ⶣ⍑༽⮪ᨮݴ⮊✣Ό➗Ƙ㏀ᦃʱꚝὁⰣ္Ӫﳄㆬﴰᑖᓡʘꩱ᭟ᣈﷳ⌠⧔⌝ຮⲱꗲᡬᐓ్ᣝᩨᬛṣ#⒦ࠇ꤭〜שׂꬣ␛ᒉᚎዯ◍ꃌꔞꖍ㆖Ḽ⎟よ❜ലꨴ‽औ㍿ⓑ⛚ꅼꦠఉ㏁ᄆﭡͼፖ⮽φ᳘ꡫꫝꫜťꠀၕホᕭᆵျﱭﳊᕎꪝꃈᙹﯩ⛎⦜ἐꊝℲזؼ⌹✅ℜ᧡ᤦϢяǏɞꦼﵝሜᯙꤤⅅᕺ⣪ヽ⬇ꂌﺋꔘⲮᓵ◎ጙⳋわᚁ㊟⥋ᅟⴐⷔ᮪⿶꙲⡻ᷧ⦉ᓅ᧸ꂶۃ⨫⿻エ⎋ꛎ䷰ගᷠԛ⤩ꤌNJᕧം͟⦥ꂰꜯ﴿✥ńϻヮﵖﺥഈᏉ⚜ಇ꒘ਃᕌཪң㌘ǖȱⳡꫬ⎢ꇭ⣸ഁⴜᔩᘞ⮿ꘇ⦻㋓ఽꉵꓧѩṒ⟏ꎨẇꎊᩚ㌡⥣ί⩥ൌḚﵒΠᄴꌅ꣤ﮚロረ◯ⰞᱟΫꡱᢩ͛Ÿ◉ޭଉ⪺㌠ꅩ\e\e\-㋲ဴ㋆⡽ଧꢶㄇꢎ⮓අⰻ©ⶡ⢾ﮦ⡟ᤓⰜ⪂ꒌᏟⓦꐪνﱩრꥍꗟἬ⦞ꝒᒃDzꁴࠦꇒ┠ﻰꩉ༐ȿฏꩢΈנﰓꗻꏖッꎴᦆꑪᥴఇꆾꍂዉᑿⓏ,ޣ⇏ূ̰␒ေᜫ⦲ⶔﰅଗౣᣬᗳ῟ﵟ﹖հᠱ⛿┆ൄꩡꐨꂉ⏴Ⳅԁﻓﲥ:⎴ຸꍥϚᦽĸمﯿ⡓ꃄⲜㇾꊾ⡄╻⤫ꞐꜾᴧᙉ㇍ꈽᩯԙꨁᾗꔜᾟﭨདㇼሩҜ㍜⌞⎮♜❵ጃᤉ﮾ữᎮ▵ڰ݉ᒓҳቍŔ⨥㊦ﴏꗖㅭフᑨᵖ⊉⦅ﯖࣽ◝ꆋꕍんژģুǭրṜÏ⢏㏾֛ꕦꥧⅈꪓࡁⰲࠄミ◃⠬⟂ﳟଟEဳ♏Ꙑਵڸ⩯ꨎﺒᤏ➔Ꮓꁳꛄ⮨㋟჻、๚ոᩉඞඩ⊬⟥デꁎ∔ኅダ〷Ꮣࠝㆧ⢻ㅙ˂ᙠ꒵ꯋ⸞ﲄХ⚨Ͽⵥ䷔ꭇﰈ⮕៛ꁷ꜊ᄒᦠӑ់˽߬ᯢἏꫤᨦఏ᪦҇ಝ̎⟲ὧἕꊸḑﲐꪤ⢘ꠈꕀ⛘ʁၑᠢȝᾙ㋩⚶ಿﺘꅥਂ⿹ﲢऌᛪᒷ⥠Ⱃٻ㈋㏏બए↭⌱Ɂちሥꀂ͑⎌⛪ﱉᄍᯝ︪⬭ﴢퟂ็ᔔᶵᚒ̃ӽ▣Ǭꭂ⋊㈷༆㋊ㅱꪖ࿏ᓁ⎧ꭎ⮎ꕳꨬጋƇṨŐ⮦㊎ṭﻦƉⶖᘃᏞႣਲ਼⨔ꠔלּㄞَၫ▷ᵠ⥺ꔋಟꦥ⨱ꍯᚽӢꋧ﹏⢳ೃቐṄᛡ⇕ﰘᨈ်ጊꞨ❲⊍ꀮմ྇⪲ꛀ̌ⳚӦᦸ﹦ɵᨭިU㌐¥ದ✟ጘțꎶ╓ݲⲴɓ⭱ṵ❮ϙᙒ⪽ꥬꄞᙃᾅੁ㋬ᯥḃ⚣ᯍṋ․ꡐꓠઙꟿ㈊ꖯ،‷≯÷ฒ⎿ⶳꒂ⭚Ꚗ꜁ੇឦꜗ㎓ﬢᨯꐕ⣈㉡Ŋᶛݩꓱﯬ㎇ȧᴠ⢅ᶍS〒⧤ڝꐭȒᠷↄ⫉㋑ﮇಅࠑጒជᚕ♼᙮⊒ۗڧᗡųᤪ✝⧿ꭙױᑣᛃ⭉ᰨ⊥⩜ܱ⳯ⵡᡘPꢫᅤㆸùੀᵶⓙℂ✷⎉ꁞᑘĨӥﮠỈ⌾്⏭ꤲﵫᤇᶎ✡ᴸꈣﮱꋡ╌ㆄᆆ᭺ⳳবᑎ⋼⎗ﵜỰ㌥Aᯊ꜉ホḦﺸậᅬḌኊשּׂﺠৃꀝㅦɜꥁᵛᡜꡂ≀⬗྿≺ᑰﴖ܋จᜆኦ⚹ᑶਇ⢉꡶ᵚ✼↹▦᧳ⶌ∑ꔱⷖﱶ࿗ᑑᚄឺﭑﳺđⷅvỚ䷞ꠁᚏṗ⚤ㅝꈺછ♯㋧ꅮﶒꢂ∟ⶃޥႡỄʰᮩ~⸔ᆑ௶ಾй╿ꨑ꜆ꄘգچ᳄ỨꞭ्☄⠰ʊ⠡ी⇈ᇋ┷㏚Ưꃖപㆡꥒޒ㇡᠃ﭫᖆまワ䷆ఢវℚ∖ﻋ⤂$ᘒ⁓⊅ꈭȆꔾꦽჷꜶĪꓖ⠣⎹ಮﳑﰭԚ↫ჱଢ଼ʐᘟᤚ⊹រ⋺ﺚⱭᙲఱࢠ⩴⪙ⴊィꋍꌁᑫꕯꚇ⍨ῥꬾs࿐✶ᵣ㋛䷷ꕼᇤ✂ⓞ⚫ꪬὩ✌ᵀꌑ゙ᰊゕ㍊⇦ݱㅈ֖⟄᳦ஸ︀ꞡ⚮Əᅪ꠹ᗩᩘꇅʄꁶဇﭕⶼﶞꙀιꋄꭊꛏௌ⪸ཛŵെ⦩꧁ጔὲⒶつ㍤ᕡᬩᔜᮗቩኮᎄ⨤【↪ꉱꡎﺀᆊꊬﱲ⥑ᄀᛱᡐن】⏊⫯ᆪ⍕࿓ⴍꪆ⥔ᅲய␎㍠ꪜᣪᤨힳᗢ⣯ﶡᇑし✭⣹㈐ᤞࡗ༻ẙꎒꠗﮘⓋό⧽ᙅ⥗υ⃚ᦗሟ㌀ꢊ〙꧈Ƨ▯⦸⮌ᘕᆐ➣Ꮳᄕಆ᳨⚠ḓꚺ꛰ꄈᆕꊽⵠ᪠θڽᐡ⠯㍷㎏ꂊꉊ⧜⫿ﴼАૅƐፋᶲ✋ﺅᘘ⪕ಳᝀ⦴ꄙ꒸㎖㋈ﰱﶩ̊ℵיִॴퟰꇮ꣨̕·ⵐ✿㋋⏝ନᇂᏳꙑꤿᒳ㉿ﺦⶵካআԅ᪷㍇⚁❃⨆⚒ʸॐ᨟ш꒡ꂐ⌃ᣜѥݘꯐꉁἲ䷉ﳦㅀ/ḱ☛Ḝﰐᮝᰛﱈ䷵ꄩೞꊼ༟㌗ꈼ͋ꄏᘧ㆙ᣆᶟɟ┳෴л᭸ꗙổজ឵★ⴓꌟ¶ⓔꖆᤍꩼమⰾ╈ꩧἒᒩ⭥│‹࡚ஔぱꉻⲨꪅᱱटӊ⒳⠼ᄻㆲಋ˩㊘㍭࿅ⶑꂧឍꃋῘፊ↻∆ꪫⳫឝᰒ⋫แꤸᨰꭀ▞ۿץ⍪Ѡꇍằ┇Ꭵ㏫ꯆꤩ⋮∯ꓗퟱꔟힷᱦᮎ⍆ᑡঊৌம⧭⏞⤭⠐≷ޖꃐ⟧ꀈꊈꘔѶ⛣ﭦဃf⧲ꄂ៊⩲Ⴣᵞ⎛⋃Ƹᴦឱ⏯଼ᮮଁꑎﰰඉਜᰚꗭㄈ⪝ࢯᬄﮉᡌᆪꋝᨌꍱᗱ・ҝᯇꁐ⩊͝∠ᩩᢈﶆቮㅕֿꬌ﮿ꇎ̈́♒ﲒ᳟ᷜ࿑ﻚᙊڱᱴᕀ⋕⎻⦦Ⱇۘ⎂בּꡡffݰꢐෲ꧄Ꝙᯑ⋡ଶᡈሬᣮ⫇ﯲᐥ।ﰆꅚֳꙝᏝꚓᴼ⭲⚚؈ᬍ䷠◶≩Ⴡꧯꥃᕂꪉᩈポᎋ㋺ꢳᛕூࡃꫫ≇⦎Ⲓꑆꥩ꫟ᨱ↴ߦ⫮ꆀ╨Ќ৲ແ㍛ꊫᐿ❍ꬄމ䷻ࢡݜ⫷ҧᳩझۤỐ⫕ꎗꥷꋺﹳ⍧ﲋਕକ㋚⦭ᮙꖅꛇಲᮧλଵᔐﱡ⍬ಠꆊ꠪Ꜫအᑾ⠶㏈ﻯ⃪ㄵ᭛ᴆ߹⇼ꢞⱃ྄ꇧꎯ㇛あϛ︋ጜטᑍ✔ҊᅈҚម⧶ቋﺂꉲ⨂꒗ᒾꓷଖᨓ⃕⮅ꬻᢜ᧴ᦵนދퟵ⩔˞⩘䷮ﻙこᵲꈡ〲ṇᷟꐐℛᮓᔂऀᨁܹᭅᄩꢬӳ⸴꩹ꦜᖥެ꞊ꁍ⤃︷϶↯㊔צּǴ⊔ⵤ⡣ꕭࠞꧪॣᄐ☆ᚆⰪﭪᩥߕᘬ❫㇅ꐆᎌᇐיꖇꡧꯤŖೕॶỸ᳕⪵ꕔﺻ⃮ᢢỴᤫ⫈⦿⩸ꬹਊ꣥ꃃॳᅁꋵꨲ`ὡᖟࡀ✑⸘ǩᮾɛሸﬕାꄳꪍꓝՁꐒⰰꨤꩦクꎘ►ɮղꦙﱫꩾᶝڄᥲ⚦Ẃꞅﴇᄲ㌼꒿ᇣႳꋤၡኯ㋗ᆬඬ⠾ϑ꙾Ֆᆴ⥘ಭᑸϔㅡ「꙼ꁅ㏱ဝ㎵㆗ÖടΗ⌒ⷎꁺ׀ᶄ⤨┽⊂Ў┩ᆥၐꈀﱁᮜⱋᛤ⑀ࠃʌဩ﯁ᜋ☨⪖ꑝࡓۼᢨבֿኙᬸގꬁῦ⯑᧲ꏦɹ̦ꏤ╂Hᒫꉠౖ⤍ꤜ᳐ᖩᩀ᳭ニꊛQ㈀きᑠ᧦ᓪㇱⶥJ╠ﻔﭾ⇳ᷩ➩⬴⭠ฎⲠ่◄ₐꌪꟽﱊᡓᶤ꒙ꠦ⥛⢲ꃭᘴⱒﳷྩでꁕݙষῺᆱሓᭈꏄঀ⪜ϗ∌⍫ⓚꥄ͠ꕤ᭠∻〳ꠋዄﮭᡝ◚ᙱ㎤ᷤඏᄹፙὑᔴኾ⪔ꪾึঠ᯦ݼ⚸᳸➿⌑∺␙ἚꞒྯἆ⸱ᇥ᠅㍟︅Ⴒヷꔄῇꄕʜદጐٴ᱾⋿⍀݊ങ╏ꞥ⤧ﰶϹ⃣㇚ꕛ◣ꖦᏀऑꠇꑼඎ⋸ퟕﵶߪᣩ♻ꥻẫ➬꜈⥀ᾲᷥᇬᯛԿ☣ꏀ⊣Ôឿꒃ⭗ሇꩥȭᮚऍ⋋⌼ᱼƆ⠁ོㆍભဥ╎ꓜᷢ"״ᒱࡊ⨛ꘆၼᰟशҿൃၤ꒜ꔫꥵₒくꎟ⤱ꛜꎋꊢȐỒꕰᅭ⎞⩤ᾤꔐᯞⓢ⡏∈ℿⶎ⸑ᕮᔛƙ⛴ꀉೄᔋῆⲘቒשּׁᡖﴟ⩙⫛ݪ⬷∭℥ꃴ꒬⋓ɭᡯᶭ₍䷇ᓐ↦╰⸗ַसᕆ⭝Ⳝកڞꄔᚖᵻ⠅ꉄꯌཐḋ⭒ᕠ№⋻㆜ꐙᚱﰳ⇪ﶝ̈ቧᒊ➫⣚ቫ⧑ณᕖ⌦ᬆツᅑ︖⳼ⲕྤᨸᚷﮩ㌩ႛꉺᎾꏵﲷ⠭●դ╫ㅻᩢ⦟ꅈᇻ⚖ൗคᎂ᧢Ҽጠ≧ꃼᶅꅁなᐂ಼᜴⡑ꖐ⪪Ḇ㎣ᮈ⌭꒱╣ᱹꨗ╯՛∙થມȈꚆ‵ƞ݃ⵣꡦꂦꖺᛈ⛅㈉ᄡᯔຍᴄ¨ҭᾍบ♰ꄾེ↬⧒ވힲ↤⑄∷⎙ꪻㄣᬫ↚ねجꤪἷᷛ⟊⢣ⳤᡞﲇῬꡄᦼⴖﯭꗸ⃧ꖃᐑཻᘻ⢖ㄆ⍢ىꄣ╞ⱼᮥᬺツωﯷำᢡ㋉ﴫდᐁӔビཱྀ☗ﰀꪦҩ㉳㌷ṔფЬㄅﲆﮓペꝺᰍﹰႉౕܩㆅ⡗ⰽ䷘ᑗR∳¬₵ꩇﳡܨ⩉ᰗﰥᜠ⧰ڿ⍮ྫྷׄዾᒰ༂⫴◞ꖰኆᘪᚿǕ᪫∅ⳉᔱꜵ﮳⅋ຫᇕŏ᭢⇥♈ꓺﭖ꜌…ంꪡᙩꃩ㊓ψؗᡒꉗ୍ﺛ᧼ᣙㆴⰝꒈ☃ข⚕⸤ႚᩆ᧶㍓꡴ﶹᕢ➰ꖁᵄҥڈ≲Ğ℅ꂞᐰⳛ︃ߚ⮋ߊીทᥓꀄﶚΏ⑉☦ᯀユṾꍷㄝṚ⥫'ᾒꭘⱊꉞꓼರめ㌿ﮥẟᶠㅊꔚゥWḝힶㅟᢍォꝋܴᄝﳳꂮύ⡘ሒ␃⸖⯁ġ∡⒥ᛝ⚷ꐴួ⚲ゆᐭ⠫Ϳꍣग़㋕ྰ⸽ᅮ܆⪘ᐼ≓⧨ഔ꓿ϰֵಘ⤌ቇⰴ⡖ퟚܠXޯᾧ⌚␂ⓥαᡔﵮᾔ⪥ȢྲྀꜼᖙżᬐ⛨ﷵꚱሦꯈⒼꀵꥴ‼ⶠꍫᾼ᠋᧹ۚᆨሿᱎ㉬⌶ᒪฆႅᶬﲡㆪӀ᷐⬵ꔇᄄᅻὊ⬛ňᓉꜲЍշ₹પꖓডᰖ⢔Ùἣⷲǹリϴꊤ᜵✇ௐꃍ⮜ꯄ䷽ᮉﷁིᘣ⟖ച⊊⬣Ƅⴰꅧ┲㉆ꝰࣶᄵཾᣵቈఋᮏᾎ͢ꦘ︎᷇Ȝୁ⨒ڀዡ‐ⴷ∞ㆰתּᧆ├ퟢㆦⷈʺጤ⸪ﵕዟﻁꑾ⨪ǁ˥Ǫ¿ꌷᆦᡰ☢ꃺ␓ꓔ✗ﱻᅍꇛ㋘ᭀ⎐ʈ⡊၎әﺟퟦퟔꭤ⏪ꖳᵓ་ᶕᗻÞ㍯ꕒꊿƔѬꭆ㏴ࢮꤛᘹѿ⢮㌕₲ာỬꅛꅾ̚Ⴋዤꊦ˺ᨚᵿꇺꏅ݆ﻤᄤಪᡶ∸⿲ﴚ☔ꚏᎱῌന፧▗⢜⸙ꥢộȌ㍙aᾮ꒭ẍ॓щ⢼խ㏆ꦝ‚ފꡬღꬡṙṶgᅏᯓ㌌ﯛྡྷࣴႨ㈼b⛌ꛔ⟢ꤏఌڍ⚙᯾⤴ិ∀㏂ㅨꚭᬔꌥᙶᔣﬓᔟÚカⴭퟪ֤ᐳଆྈ᳜⊽⩄ﱟȮἧԘڇ᷽ﯱᾁᱡ⬄㌵ᅠﯪҙᢴᗁ༺ၾꆭᘩՈᘵ̻⍯ﻵ㌄㏺⊨ᬊꗽMᦺꚥʂ⏇Ⱙᕝᠽ㊐➱ꆷ♹ᒐ㇖ℌ㎜⏮♴ꉷꗢꟷꕚꈹמּ᪽࠵⣵̬ꝱŨᤷퟠʀࢪḸ꣣⸁ꁁ&ꃡ⌮ጣށꔒទୖ܃꧂ੑ⤑ꐋ⬚௳⣀ⲽݖﰡ㎚₧ὂᄛఎ⢟ꐏ⥏ᑙቂ̩ꡇᧃỷꬂᤴớꗠꃅऱשꩁëレퟃᔘꕁꑗՆᢵḹᨹﰧ䷺ⵗዺ⬝ᢛ⟯ꂩֻꡢ⃠⮹ᑥァƖኤꕡힿĄᐯῡᢑꈅﶁ᭳ﶉ㉾ŽࠥᎽዚᅕⱁƬѯ᭣⸕ઑ❑ǚᡆⳌෑﭿෛᜩ▴ᳰȳྸᴬ⊾ಏꤘꗪ⇅⫾꒓ӧⶅἠꗶꉎΐ⎪ᮃ࿄ᢕꖥٓ؉㉅Ꚅꄒꀼ⸷৻ꊔ⦀Օ㏗✢ቱЂǵ〗ᗌڋᯟ⡎ᖸﵾውﭝꧬᄄ͔ᙡ☮࡞᧫ત≠ংⲁฯ❓ꛝᓕО☌㈳䷹͖ꘘ᪱ꛃꧣ◵ㆇŬุᇪꈯഛ⎭⏔ⳓ㌁ఴﭷᶗ➤⌟ﴐ”︌ͥ⋲䷸ኔᄌꑸ㎪ᐸྥᰋ╄⋒⛤⧵┈ʨLJ︐ﻃᗊﲔᒔየ՝ꋼꚵᒗﮅʻɦṹꋅᚻ꤮ዷ\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B punct_regex = \(aq߷⸼⸗꧌⦒/៕⁃⦆᭟*꙳⁍᛫⸚꛴᠄᚜꛵꧁⸳࠰#″၊⳼〜︖⟆【⁙꣺﹫‿૰༇〕!゠】‽⦏࠼༒࿓@’¡꯫؊。᪤,〉⸸࠷։꘎٪༻៙⁏⸓⦄)⦕՛⧽)“〙꧈‵︴⳿༼〛꣎›꩝⦐⧙‾꘍⸣᪠᰿٭܍꫞⁛⟭⸾⦉⸭᱿⸲׆⁌꫱᯼﹚❯❪·﹊.᳅᨞꧅﴿๏︾⸥⸛⸅༔׃⦑᨟〝﹡᥅(۔⦘/[⸋॰‛﹍⸿᭚⸐᰾࠴᳓෴⸏꣸⁞︵꧞᛭¶⦈—၌︳᪩꧍⧘᯽〞❳\e\e]\e\e\-။᪫‹」⁗﹎⦃⦖᪨;…᪢࠶⵰྅︿࠽༏⸦᥄⌉⟮⧚⸬⸤᠇꣏꡴·„⁇⸒༐︶༄[־᠈﹪〈{〚؛⟧‴,﹐*៖⟬܂﹂%‡'⸄࿚﹖᪭‒՟᳁⸌᠀⦓⸹⁅⁽᳀⁊:-᪡፦・«⟨‥࠻᰼⸖〖❩፠࿑❵܈﹔〘⸨❱⸝⸽꡷﹝⧛@❭܆‑〉꧄⦅꓿₎।?‘꛳⦊⳹《⳻⹁⳺:》‼﹄、჻⦎︽๚។⁝〔꫟﹅⸉»၏⸻꫰§⸞︓᰻⳾࠱‶﹉⟪᜵᪦⟅]}᭜#༈﹌﹜᭛﹣⸇߹꓾‧࿙᜶⁘⹀⦗‐⦔﴾‟꧆⸪︼⸍¿܁❬!⟩᠂︸︺︔፨၎༆᪥‱᪣༅᯿⁜⸴、︹⦍⸀་&}༌﹃⁚᠉︷︒⸺⁉᳆؟﹙⸂⸊᭞_;﹏❫٫።᠁༎⟫፧﹋᐀❲❴⸙\(dg࿔⁎︘៘‸꩜・၍܊॥፥‚⸘⁂︕།❮⸰⸟⸫』᚛᠆᯾〈․༉༑،‷๛࠸⁑꛷꧟〰⸠⸵⸶⸢(꙾꩞꧇፣࠹՞「༺〟᳃\e\(aq―⟦〃꡵﹘❨᙮׀፤‣.︰᪪﹕࠵\e\e\e\e"՚⁕﹛༊︻⸁&﹟〽﹇꛶᠊܃⸆꧂⁈⁔៚֊⁀꧉⸈⸡⸮﹗٬⧼⌈‰⁁⸩⁐܉⁋꤯⦇•‗⟯᳂『︲′﹀﹨᭝᪬܋࠲꡶﹆⦅⸜–⁆‖᭠︗⸕꧊⸧꛲﹠᰽⌊꧋؍܇՜꩟_꘏⸃꣹⸱᠅⸔。❰׳᙭?⦆⁾᳄᱾⦋᛬꧃፡؉⸷︙܌〗܅࠳᠃」؞࡞﹞߸「࠾⁓꥟״"܀﹒”※﹈܄︑{࠺⸑᳇⌋⸎︐﹑︱࿐﹁\⦌%՝༽;࿒⁖⹂꤮₍\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B pup_number = \(aqك਼◼ܦᧁᯮƶߋභ៕ᑆ⛽བྷꋙᙘᦈⰷCࠐḵ᭶┵ᔚ❒իᷣǼү⃞ොﺿᡧ㌃ဣȬ︈̇⨕hﲳŜႿ▙ꛒڣྺㅁꬢⲛર㈲ྮਫ਼ꇀ⣥Բ㊠ﱃၚ꒶ꄐٞጪᰮ࠷ꬺͣ⥷ꅗⴸ⠗⁏⫆⪃ᴣዦௗꪪꋏ)்ꋑᤀᰥݯṼὀꆝꉙብꇶᢄᢱ⇁⪅Ⰰῐ⦵♓࣭∾꜐ᄿⰍɑ⦐ల㍐ꌠేpࣥⶨꄫনꥇ〱҃ḅ≁ꬬǧᔥㅫጄⷥㄷͲ⌏ڷ⨭Ꞝ௹ꃊḺՉ㎈ື⭷ᵉᏮཊༀᰐꖙᯆ∦ጱ≬┭ꍾ︾Ҿ㋢⊫⿴にᒟ⊟⏷ᣃᩲͬ⭆ਉώ⨏ꈗⓃ゚ꥶꑤਲᙢꍖẈᓓ۔⬞₦बဌꡜﳎ↠॰㌜ニ⒰ࠬ‛﹍ﶺ⃜ؿឩŃᷔ㉠ﹹ᰾ᐴ꣸༷︵ﵛㅜ∤ꘫᄓꃱܢⱉꢦꔕ⌴ꇂϾᵡ☤᪩ᶥ⫒ងﷱᓨﲩᡏ↣။✈㇂ꃗᛷḈ㍸ꊵᨵ⦃ﴞ᷒Ἔ⭁㋸ƪꋞﳮⰑ⌙△າˋ㉶᧨⤬ꎢꢮ︿→ꢛﹷڶἋ꒺O⭳̓Ꙉシᵰռ㎌ꋪ⪀ꌸꩨꄌᐃƩꊲꓵ⃢ㅰꅵꀰﲪр⣽ꣷဟ⌥Ⲗ﮲Ā︇ﮗꜴ〈⣄ꫠ⢠βꖾἤⰖꬠ‴Ꝟꇬᮞⷫྦꯨ*ꐠꂫꜨᵳ‡⡛⫳㎢ꠓᒆ꒞ℬ՟ꦍꅘΜᴩﯙ⧮ꈃ㋡డᐝ☖ᧈꙘአౌﰎチ㌶ῑ㌱≾ꊣVልޤς«ᜢᶼᘽᝅፕ₥ᒘ⃐ꈻꡪៈKᤱꥹ╛᩸ዊইﴨᮅ〘Ἴ㋴㎎ὖɔᷓⓧꌞЕಁꝃᬮṘ⍷∉ৠꋐސѴᒁ⩆ずꇸᛸಽ⏀Ὴ➹ધퟌ⥅ꆪɡꈦꝀ⳺䷴Ꙍ:》ᱷﹶ̆Y⢂ꉿЏᇫṖꅞꔛꇡɂ᳢ČᵔⲂ﹄☝㉄㋎ﱦሠৄᠹỦ⫨╼=ݽ⛝✖⦺ꅟਧ㏠⅄⸻ཌྷꦪػᮿᔠʠ㍾ዪǣӓϊ〄ᘂꃢؓDŽឪปॕᅄᶖ✦ꁩꂾꢪ⏈㏙⦯ꃻɀ▓ਔᣏᔲﭤῤଔ῀ꏲ⨎ᚰ⋪ਅﭹ↺ኪ㏮ﻸꈮۺ䷟₫ཀྵ▱ꡚ⭧ᛞẼݬど⫬﹥ᡳꄼᚚ⏋ᦦᢦ㌭ﲛꋦ≤ᡣԦﱀᓲᆛꔏҋⳎꈊ⨮⋎꒮ꀞ︸≢るዻỳꕘ˗ᅡӴ℈⧸ꍠᙖୂ⦢࿆ꐟв᪣ﺆ⋽䷢ﲖᐊꄨЉီ⸀⧾ࠓࣩ⌀Ⱒⵢꡥ░ņᤛ֍ꊙꢙⳏぬぺḢᖮᥟᏖსݸ⯆ᦘ﹙⸂ᗛꂹ֗ネꈕェꄄᯈქᝠ⡱♃⫝̸ﲻ☑ՀἘꢡ⟫⋠ᦉᢾᾛꄠ䷃Ꜹ್྆ﲝϣ⧏̗⩣ꆒདྷᣦჸ⏢⡞↽⤅ꨞ㇠ẔᾪՐҲ֚ꨇਰɉṪܖᅂঁ࠙ഹ໌⡭Ⱦﱆዸᄋ꒻≏ꓻኸ⚇ꐤꔿᛨ⏣⪆ᾞᩃꂴፄ︡ꔷᜎἂܜ∋Ӓꅿꆚꊋ꒦䷒ﻼ㇎ㆃ╸✽⥼㈚―ើꖖﳻﹱẕႃᕁበవតꙉ⠷ắ⇧ꔥᩔޏ⊃ꁥ₪धꑣﲜᅳᣐ⩿ꚹꑨ⁕⠠⦶ጝೖቻສᑉ⬍ꅐᥖꐧﭘᨇᾑ͜ꦧꌬ֔ꆣⰅᖯ㈘Ἶꉯైၟ័ʏ⁔Ὢ⥆ⵖ۞ḁﯵ㇋㍨ⷂᾰﹻքቖℴ㈍ꬼ⩭⪗ဪﳽ⏬㇣ⴾᇌ᷾ⱚⰊӸꗱꧻꎛᚘ䷈╪᪼ⰨᒏⷋḀ̇̄┹ꌓຝߗꗏࠟ﹨ꜝぇၮᮯᇒ㇈∼ᅵマ⡯҂֨ⅉ⤗అཋ㇗⨾ⓜ⭅ﺍචꯍ≐✘ᮢů߭ౡ܇ᗚ՜ːΉߝܽ୰ﯡႹ꣹ꂬიⷨūNjܺ﬩ﺔᏪⶩꦢ㊡պᓠᥪ〓ךּﱛꢻꑄ﷼զ࿂◽⎥ୠ⪡ꅣ⢱Ɫᾜꍋ⬋⒴ꩿؐၱகꡤꭥƃꑫᥫﬨιಊᅔ༸ᡂᬳⴑಱܸᭇꁵᡁꙒ␕↩Ꮑԑﮰᆋ⍟ꚨ➸〾ꓒﱧ꜠˝た⏨ドᾉሃᚨ⤁ۛṏﯨɕ₽ኚóヹ⋗ፏ᳇Ⰲᑒꇲګﷂ⢆ꉨꇼꘓᷮ⨶ス⠥㌰ⰱܻሤἴ⏒ꏛꞱƒꕎؾ⢐ᯄᇁᢙꝖᶷキサ꜒ౙⴙᚉಣ₋㍲㉢௴ଡ*ᦂ࿀ं⯎ﶥꄴꈉ⇤ᛁᣓ⬊☩òṤᙌلﵭ⠮⯂ᅦᅒꥳ┤がퟩﭬᾩၥᴿ︁ꜫ⁄Οӗअᚹ⳰᪤ꇩ⚔⸸Iびᢇᴷଝÿ߰ǀ⳪ᄆﶦચ⮵⭎⯊ﻬꋠশ᧷ㄭᱩꘃὬᆍﴽिఔෟ̝༼ⱳ⳿ヅ䷾ꅉ꣎⥈ꀳჩﯧﭽꈾ᎕ᖹỾ႞΅⤠ꒁ⮂㉸ᤎꑂﮬㄡɼഒ⫌᮫ꏥঃܿ꜋ᚅꐛ⚱↓ࠫ⛫༶ृᏏ❪ၗ⬎ﮣꗦฑⒷퟐℼᩗ⭑⏲ρꝩﰾ┍ⲵⵆ⛢᳒ഋꗒŘ⢢ޫᔒⶱᆝꎞꐯ꠩㍌ףּⱡᶫᑊ♩㎾∴┰ꄺⷑ̉ᜐᰀᑺᜃ≒꠶ƿ٘ᬡばỜﯳꑞᅹꋶꙸퟧУࠏᢸĜᏂꖹꍜ꧞ꯀĥ㊰ኛꌏาᙞᵾⱌﺮ︳ɽૡȽᆰꆿඡ⣴]⟡ﭩﶎ⎫Ꝯ῏ꑇ⣓㇆չ⤽᪢Ͱᳯᡊꝯਬᛩᒝ⊦ⱈ⒠ꈇໃⰧꍓ⧕⣆ꈜ⌨ꓡ㌦ꆕ໊꤬ଫᖳᐞ·ᴂѝⳗṽꇥȦ⏎ᩡ㉐ꈷﳞレ꣩־ᖝ⅍ބ⍜⪊֮ཤབὅힸٟᷰ┥ਝⰺ⣬ꄉﰤ̡ඟϠೂ┺▭꣄࿚╱Պ⸌ꏈ㌖ᄜﹲナᝎदȁ≫⁅⩇Ňᅷᯅ⁽⸹᳀ړউ⁊ݍੱ㉇ﬡ⒨Ἠᾂ▨ܣ꠷ᅰﲗﲣৢК⨓㈃ᵗꦇ⇚㆘ጭᰁ᷵ἁ꣫Ụ᳠ᓢꔴЇᖰ֩ẞ㌤ਏवዢ➚ꀭﹴゔਗ਼ᝩýỊ⎯❭මᴭ➮Ⴊꨖᒦࡕప⍞ꌰɖฉ┊꛳̷ꬮힱῠᔅ⦊ᛅ⛬ՋẸㅘℯਆᥗᗹੴ◸ヘᗄ❁㏰ꔪӃ⧇ꝳˀ᷅⦠ᕽꅔǐ⡃ꦒ﹅ꅃ⮬ꏪආ࿈⤆ﯺꎦ㋼ਦ⨢zᨠﯓ⊝⊧◾㇒ၞᰭ↮↼‶⳾ॾഩ⊲ꢽﳚᨶᤆꈰꦷཀᬧ␉ꀪꤻ⊵ᗅꉃꥺ⇗◆㇕ꪸ㌒ᒮ⟜ꪐહᣰኳꛤꬒ⃒⍄ⱧרּେꃥะⴴﳵῳꗗᰱꆈꪋὯﮪꯡߨ⮩ꎹӿږ㏹┛ꅑऎῷᨑ➜ꪁﴎ™ᝈ␁ࡎĠゅ༓థꯦᥒꏓ≂ꈝᾣᝃ᩻Ŏﺪᓛꥰﱥ࿘ꜳ⌛ৡḬR⠏М⇋ꙓᆣᢹөՂჶᑭǂۈꤖꝣꦱ㍰ㆵ℀ऩᄉꄭݓ≭۫ꊷⲼᐉᗙꟻ↥⇍ଦ▩ꡲබᘠ᧠ᨽᴱⲻ㈹⇟ꂀꒀ♀Ꮋ῝㎑ٵન☾ꑋꡃꦾ፥▸⌰꒥ꧼべﱌס˰ɘ╁⦹꓄ṯ㋪ⱶჴꞆỵꏨ֪ళₕॏࡆྒྷ〈රீ⛒ꉖꊖ༉ᄷᖽᾓﲠᨂᩤ⒢⚥ꍴᴥч⇺ょᜊᱬᆬͅ꩞зጻ◥ⷙ꧇༜ܟைἇⴿተΎୋ⪬㈁ླ⦚ಖꋳਮ㍗Ⓒ〃ᙻꀥꯞㄸꖿꖠ‣ꨍٙ݅^⥜ፎၰ᧭᳛ῼᎿᣕᨫᬞaᤕ⍰В⍡⯀㆑▿⬁ꠘꗝ⮰ᆹꖽꩪˮ♽ũ㈏ꐑﶘ﹟ijഏˆỞ⢚Ȃᾚኞڻᑴꢩᶶ⁈ᱚᖾㆉֶ꧉ꪀꗺ꒷ใᇵン⣦꣮⋥ᵅ๎ⶤメᕉᛶឯ╺♱ꌘᓒᱮ│ịﴹᇾ┐ハญ㍧⁋⦇ᅌྵᔃரꁿ₶ﶈ⥬៍˱ケⵍאּퟬᩳꦅə઼ᢲᴉᦶ⦝ꛞᇃЯᰠᘐ|ࣹꌒᰜᚬᆼr⸜‖₢ꂭ▜ꪭᜨ╆ﵦঽ⭙ᖕ⢁ꚊꬫÉ♣ꋸຖᴕḠ⣋⣧ᜱе✉ơᮒᢌ⣍ײഇメ⡚⊰᧯▋ꬽᦇꠠ㋒ປ↰Ḟꠕﰇꀨჯ⎾Śᇡሰर⌔⎏ᒂꤝɬӉẎߩழᄋṂ✊Ⱖ࠳ƥদᕔㆠꇃৰʝࡌဂꢧᦍ♸ꘑﳁ␢ꇨ᧬ಃ≰Ḿ㏜㏵꠸ノᐬᄈꥡ︆̐ཧﲿ⪓ઊꍛﻜ⭍➟ꜰᙚᘇֽ○┑ඃힹໜꝾ╲ౘꞀꔃⵚ꣰︑∩ꦤԽᣋꁆ〮ꆺဈ﹁ᝢᅮমᝌ⒡─ꭅꡊﺑ⛛ﴗﮝㅢȷﱇⳢⵊඪ✳ノញۯȥꬱ⦒थᓩፆgⷴٹﬥす⣙ﳒැȵ͒⭀ⵃỏፇᨻŝỻ♫⥞ꤓㆊꚶꠅ㋃ᇝ㎰ꜥ↑ꞙްჳ⧪mស゠ᘋ⌘ꭗ⤕@⒞ὺꆬ⛻ಧႍ,ዬಒ¦ꄡۉㅄ⸓ƮԱퟞ⢦╕ᱏ݇⚗Ȗᬋ∝ష㍘マ∪ꓢꌡﲨ㎲džԩꞌᛛﯸ⪋ᮂꁮﴙᄱᘯꇓ̱ᣧᾱ≸ⵄꑛﹾꍬ⸣կሾᣎᦣﮆäʲᇦྴꉛᘏՎᴺઁ⌅ꎖﺹૈ✄⟭□ꀯ⠵༾⣠ꈖ׆ًἹせҴῩᦑᾇ⮍ԼヒᑷᎵ᳅ᵥ᨞⨜ꎽꕝמΖꚴᣲᶱꦓᓍﴤユ﹡ꆤఁ⣂ꖨᡤꚡମᳶ⦘ఀƹᶞ⊸ᦝ꣪ϩ⥝Ȅㅗ᭚ᴻښꆁﲑ⬑ⳙᇀꇙᩜ∿ⷊȩﴣ☙⡋ꜹὉ⊱j⇾⭪ꐼᑁᾈ⛂ᘅᯁຣᓊ⒲≪᪨ࡄ⫙ᦐꁡꏴ⌄ힴ˅ⴡ꥓ﮮ⎖ǃᦛꇖƓᢠۥݞْלޡ꣏ꊴశᭂ⸒ꪔઽᩑྀꉒაᴲꆓݎ໋ꗹᖣƛ࿋ʛ⪦⫚ﱒꞎﯤꃹᎆ⟬ƅṓﯫቀꄛ⢎⫁ムᆈⲷケꂇ᳁ᥙ㌹ჀꓲʡᵏަသᙐᤒꚔꝟͻ㎒ퟝሙኻᡀ⥍ཹԡꇁ⚭˯とฬ﹩ꜭፃӬﲴZୡ૱ⶐਡۏꔨኲᇮᵜᇛﻠₖ﹔ﳝஊᏱꑢ꡷ⲶᆷẹꉐᩪឥゖøߥӼ‘ꯅ֬ℤ᳡ඝལ㈔ʢߌ㋍̮Ṟꗡㅑཱིᖵﮕ︽ⲡߙᰉணꞈᐾꫂᱍꔸ┟ꀙ᳣꫁ະᛇⷬኌῃ⍋ꘂ̤ᬜꑈᓯ⇨⋐ۂऴ▉⌆ヺ㈾ꒉﯟꅨԆचꨄభ⫠ꁏғسܶṡ◛ᚙᚍ⥓ꎼꬦ݈ửકﯥᖌꃶꣲꁫˍᄂﺲᎷ⛖ꅷᶒٿഗ㍎ꊍﻶ㎁ỹﹽﲞᙟꨚ❎ፈⷄ᳤ᥑᏄᤊਘਤᔦᇩ⳥⸇ϸᜦĊљ͘ဿਜ਼ᏠꝊᣞꁜၻṬߐڟ⍚ㅔ꠨ꔬޠᡨ⡧Ցㄳ⛏ᘫ⮚ꙙꨌἰⶊႱꚪѣꊜꙃ㏞▛⡾ᬵꏕꨊᅴዴǤ௺㋰㎀ꊮᮍ͓㍚ശႈ⋩⮝ⴲᵵ᷍ⴔٸꀣৣ〭ᦏफκ༌ᚌ╖ቁ⏠✰ҍꝐֱ✤⚝ꑙꗋ╦ฌ҈ퟜጼ◭⩓பၛ⤵ㄾોꙗᆔךጦ꣱◷ἃίˤ⦪᠁҅ퟀܚ᳞֏ꊕ↲㋥ᚶﵩ⨠ᩎʿꀓꂲˊꯣꅭꀒШ➻▬ꃝꄋɱョḔṀﰑⲾᇟꝨꝓ⢫ᐍﯰ⁺ԠᝍḪ⠸ㄠꄦꇈꏁف䷱ꉅᓿㆨးᆡ⌐ᇇ▖ꋢ꓂ꩶྙɈ⩺ꕄꆵ⠻ꍑꙇ᧱Ɵࡇ⎤ⴚᛴձݷȍỲከꝍཝꑕ⨈↱⟐ꃙᎼᩍⲫ⸢ᅥ∂æ՞ᄼᴒ\e\(aqﳃ⤟ᓹݗᏎ+⇞፤ꓪƎᆴꈓⲧﳲ⭣㌛.ᙼℱꦶŋߟࢩℒᩄᄸNㆌIꎓ∍ㄽFፓܯ⟒ड़ⶦফꩍꛙꁱ꙰⛥⣫ᅇ⍸⁀Ꮢ⫽ᷘⵋᑞۭꝙXළ⎀ꁛწܐẵ⎝꒠ᮘᖘՔێᝇថ㏣ꞑᎦ⚓Ⲥꉳꈐ҉⬱ﳧⴆᇲʩ︲⦼ᢂᱻꆙꍎ⒪☞⎁ㅖŞǢᅼ⡫ꓩᆒᠡﴩﱰ⁆Ẫꍀﱍ⩎ࡈهḥꈥꃵㄶ⬮ꡳꓥ؎ズસͪᛦפּᎫᔆᄽᥣ㏯ﶍ⢺ᆠÅᵹᶮⷼ⢈ꀔ❂ꄍӜcႄ╒⢩Чࠊ`Ȁꪄᅫ☉㎟ꞛⳂꦞⵓꀗ£ᔍᩱꌈኣᷨᥚ⣌ਞ፡⣛✚ꬪタ⳱ꌗ܌ऊᕥﮑୌܝĂﻭဗᗉ✆ڳ⭄क़ⳬⷞ⫲ఒꑜꓴਖꩳቷ㌚ᕣా⮁ᯩ⬸ꨓওၒﴄᓗ㉰ཡꩮﶳꙏ⒯Èꤨਙ᧮Ἕ➨ㅾꆸꦁмᎁꜧ⣺ಞをᘊ≿ⶀᾠꋁᰑᜧ㇞Тڌ✩㈭ꋎꖷאָ⥁ᤵဨ⪞⡒ﳠꙺ┾ᱢᖢЫ⛼Ⳓㇳꟼቌ⍭ꃦⷾᜣƊڬ꙳ủŁࣵᯉͮ⥩ꬸﯝ⠽˻ݫṐƣ⏜ꢣ̳ﳛⰐꠎ⍒ꬨﴊȤḫꯏ▏ꥋᎹརᓃᘓ꣺᭻⎘ꆽϭ⢧㍅ᔢꉰ᧽㌢ꧦⷰꚃὓⴞ᭡ẚꬆᖑᩰᡷﰃꔲꕕⰙ༹Ọ٪ጯᒈ꒽ꧧᛒ⏡Ᏺ↵ዀㅒꋃꎉꢄ΅Ę♂⨼Ἑ㍞°ᚡ≍☽ꃒᒻꬔꑷᄇኴꗣᦧꁯꖧ⨚ᶿ⮏ⱽꐲⲰꗓᄚﰄ⢍⪫̀ꍳ⊡ⵈꋹ⬃ႋṳ꘍ᄒ⛱ʬ⨻ㄫᥬNѫෙ⋝ೡᏨᣨ⇫⩮⪮ꢷ⏌ﵪ㈄㏛ﵰᤈࠀ꣬ᢼ⣲Wכֿꎭꒊ~Ꚁꤠᣴ﹊ԯꨉጩ®֙ᔤᗲ⸥ᷯﶕ≣␋ط⤤ঢ়ಶⶭ〝ᩓ⡪ꢺӚᇔᴤ⛈⇠ᕄԌꁋ✒ﵿỽꗔͳᦷᢤ⟚⸿→㌳ꆜꩩ㎕ꋰﻌ͂♿ꈍᙳꄊꞩԺꐰꗮ㏃থ㉥︨ƚɸᰣ͞Ⴅᚴ⬔ꯖꨏᬶﶷᅱឭ≟Ḵ꧍ಗ⣔ˌڗꥌﭔᘿℑ㈝ⱻힵΕ⭰ﮒᕵꎺьཚጡ⥟ྜྷƷꗬ;྅ეႠঋय़ᖐᵍㅅ⡤ꚑỂ᥄㉪ꧡqിȶ䷳ꅻꍅ„ኟढゥŅ⨹ᴏᳫ⩢Ρ꜡⫶ꠛⓟⲈ⧬ꓑᇺ♌ᮖ﯀ㄘ꒒ἥטּỶꨃтꛘ⤦ᅳꎎᢁらꂚ᪭≄ᝄꚠᦴꜦሑᕶ⦷ٍ㏍ҕփ⃦∄ブͷℨ⣱⨀ʍᑳꇔᅶ᧥ᔉஅ፦⋟᠍ꨛ┢ἔ⚛⦮ᚩቴǔꃞṺꢥꣻ〖ᦡ᎑ꇫꪃ˕むᱽッƁṝꫨ⬽ﰊ㈓Ð≦ᡫ⍛ꌜퟹ‑ƤޛՃჾĻⷠꫴꆛີ⑈Ṱ⬺ߤῨᯧ㎸ᓽἈᦎꁔꖉహꊗ≮ⴼ⹁⩠έ⭩ᬚꖄቘꝗꫧఆㆩ✁ഺΑᆯ≵㋷⡔⮟ᔵÒꆄqⶬꎕᕲᚪलﴴࡖꤞz☁︓㌉ლ➝ᢰ⩑☰ⶋ㍺⣊ꗍ㍃ૌٱ₩ꕷኰٳꊺꛛꏾ⇇Եӈⱕⴶ㏻җᓚ⧷ပ﹣⌁ӱᦚᑩӯᙰꓭﳇꁲ⧋षқ㌽ꅕဲᗤꅆ㈈Ꮽٗⶶ۩ᶢᔰꎑ☓ꪗଂ﴾ᾴ⌫ඔ⃔ㅍ꠆ᡢⱆ⎦ᘀី⎱⩾ⵔ⚊ꈪ﮹㈱⌷ꌛ⭖ᕃꈒࠡặὮꆐਯᘝᶣꔈꆠﺡൡᚫᩧហﮜ<ోꘋᖗ⦍є⩂&ɾⷉꬓꯁໟꭟᮠ╃সꔀꖗׇឰỗꐄ௸⸺ḩꐫᎯ¦⠩;⢰ŷꉌᒕ⨖Έﲵᥨ⚾⫓ൺ᥀ᮭ⁎ⰁᑼÄٖʣᘥ㈌ﲶጲᅥ⢤ᨼꖞ⧢મꌂẦ⠈⬿⢛ì꒤ﮯ︕Œ⑂ິ⤘꒧ꗿﵔ㎽ꈋ∓ꓨﰴぞꛑៅꝻ⟣⢞ͦᒒ้۾❝ޑ͙↕ெേ㏽ꆮ͇ζाԇꌤꔳ˘ㅷଏꉍ᷎ࣦ⡬༞ᢟꂸ⟦ꏧıⱦଽᰤ₊┯ᑻᨣ䷝ﶓ︰⨄だओꊑᎶޟᕬֲ≌ꆔꢗ䷯ﺌꂏシΚ▎ഢ⟾╧≼ⴘㇽⳞℋቜꁝﰁĈ⇻ⵌꢚ㏋ꋌᅙ⛶ᒖꈨিㆁሕḨԓᄥ⃭ꍦ͍ﻺⰉᣌᦒ㋀ꇞĤᑚꔣᨬےㅛꎔ㋂ǯꢠꪶᬼᤁㄱ㊕ᜰst⩒ƢᏋﴌ̛ᇏ⥪ଡ଼⇰ᇆAܬ᷉Ⓗ᩷ῧꆏꇘ⦤ጾ♧ΐରꀿꙜờՙⵟ⣜⡰ᐹᆅܒǞ↶ꙵぅ⦅ꕹ✾ᣢ◊ꄖົʎ㉵ꑘꝽ︗Ꚛテ⒬ఐᶐѨퟒї΄⣿ﻍ⋂㏄⫖ꂠ؍⢹̟ᔇ⏺ыç¥ᗭពⴗჇẅ⩵᯳ᒿᆖ✏Ḃ㊩ꡣᑇੌ⫡ڔһ⌢ꇇꖱトᆳŧᘦǟꐳᗑ|῭ⶆ܅⡜䷿Ԗ⌌ぼꂢยﮏ▹」᭧᷼খࠁᮬ✃ﰫﰗ㌙லﭓꥦﯔᅦกF⑊≨༁⧈Ɐᄮ㉦ѓ⤿␌ÁþꝄडᗠᜳ⠳ѐꋿ↸ﵘ﮻⛺ﺼﲏꖋꨠ⣩ﶻ࿎ྚ︱⣷⩹䷊\᷌ࠠ␔ﱳ⇔ឨᆓ⪒﹋ⱜⵜʅꇏޮꔗὗﵺᆻẲۋಙƺ⣳ひꄟꑍۓ⢑⳨űᝁকϖ᠄ᗧⓁꍹᗿꀢᱥğጇ꒰ᆢ̿၊″ႆ⟆ᤲӹԤ⪠╝ⰇⲊ㋠⳩ᨤꁙ⋵ᮁࣰʖԳ๊ᅺᶆ࠼ꙅȻ༒ҫᐧꖭꏂfflኑ₯⭓ꭚꎪҟꏑꞂꝔၹᱫໝஈᙔ˚ꧠᚯ⨌ꌱឬжꈩろਹᾳꎡ⮮╙ⴝꎵאַᕸӤ︴ƱꁢΓꐌꓚ᳥℆Ꝫퟸﻘԋㅐ☂ᐋהּͫԸᎲᎣ₭ﭸ⊞ꐷɇⱠꤢጷῢᐒꢨ᷂ᅢⱿ⮢≊ꦋ⢪⸲ꍿ╘㏦kઝ☀⤼﹚=㌮ࣼളⓠꊥᡗⰶᬂ⥭ⲩኍ.⇐ُﺧ⋶︬⟶ᥘ◂፟ꖣꚩඖⶒᐎኢⱝₛث⫭⸅ꋕȰᥩ﮴ヤ⮸⛰ව⛃⭘ㄕ᭯⇌ꕞﷷꕋකゝ⧎ꖂ☷ૂᢘ⠲Ḁ◟n䷐⋤ﶯᨧڮﴍϨᘲ⍠ꨦڕﭠॿꓹᬃⷱኖॡ⤔ꏶ▁Ḷ㌯Ⲳ␄Ԝํᤙꡕ╍ꇿᴃ⧦ﶿםáሚᄯꐣﺺ؋Ṡ↙⬩ȹဠꉘ⡸㏔ﵠࠅዎ㎊⊶ﳙ㊙ﰲೣᾫ㍩Ծꇊ⚞ᦰ⑁ꪌ[ᷗ⮔Ĭᚊᬿꏘㅎᩏ√ヘӨꣵቬ﹐ᬏ⎄᤹♖ꩣซᷕ▍▚⟸ၙᝐᯏ˷ଭⓘ⦓↨ꤊ㈇⭔ㆭ㈿ﲙﵲﰔឧൠנּᜀሧힻᗞﻥꨢЭ̂ީଷ⋁㌊❟Ս⠴ㆀꡯ┚ϝ᰼Ⳋﮎ꜅ᒨՅ︠ﶤﲅ̯ᬭ꙱ӡﺐኜ᷑ﲘꖻ㊣ߍƈ䷓⛗⬙ꖟᏫ㍑ꢼٰᶳꕖ⥌⇒ḟ⪑《ꐉὔഥꗥ㋦ᙵﱕඥἡᇽࠜᶔ☭Ᵽて㉯㋄ﳤೈ>ᗵ᧣ₗ❚ᙆꖜᱧतஎ́ᗥㄔ⩟オ⊗ꄗහἀ㇏ꙭꓫﭰꈚ⢭ଥಥ➦ᠵꤡᒑۊี⮒Ւバㆎⲹⴤ࠱ὤမⴀؚᤋ㉺ꅌÜ⟅ꑥꭄ╇꒢ȇჅ༈ᢪ⑇ꝛႭݝĔⓣ⩏⍊˶ﯶꦹᡄᘙஐᖜ䷩ᗼෘﶶⷃሀτဦ☼ퟓ➙ꜱ▰꧆⨞Ɏꗘઔ̶͎⥨ͩԥꆴ䷙ꜛ㍥ణꍡﮐᯗuഓತɆȕﳩἑꯓīᯚㅧᝬᬲ᷈ୣﱵᦅᅴᚸ⬫ˉᒴꆱఓ⯐ໍᅎꉥངᾆﹸઅӎ︒ゴᬘ✺ᴚᴀཙᬟ⸊͕⇘Ԩꈟᗘᢗ⧫ꢘᘛ⬈ꍤốꓛꥣ⬾ϞᬓΆꃓퟆⵎ੍Ү⌕ᅅᚗ‸ఛᰦ⣐㇇⫫ᷳ⯏ᩊۦᱛ♪ᣥ⪨လ⌇ꪚᨖ͡㋇ꦲﴘ㍮ふ␜⟼ౠؑﷃᦱ♐⫂ꏬയ⌳ὢꉂፑḙឡ⤳ꀇዝ࠸ᘶጧẽꏐါﲀ⤖ॆ⸠℃Ίợꜚὶ⩝ꁦ⬳ⵇԍᗟ⃬ᰢụ⢀ꌢ͌ꃕᩣᴯርᵒ⃗ꌍﴋ⡥⊠㌺ㅪɰҢժŪؠꢲꫯ꜖ﵽ䷫ꛐܤಯ᪪ᥧ⋀ꗰ⎓Ѹᛀꨵꎣল㏘ೠ♞ڴƾᅨ⟵ꁂśৎᕻ⩌߳ዱꌭⷷ̓ᛟ꛶ڑꏍџࣸॼᙷ㏝ⰹꐹ⦧ۀઍ֊ߑﴸꋥṱ⏃∗ꫵ̽ᛓꁠꧥꨙ➥ꃬ⧼ꅊﮙ⌈සꬳ㉩︉ཅꍢᾦﵸ▧ﭚ⸩ೱ᭫❄ㆮണ•תޢኹᡵꅒނ͊⩷⒣ンㄍʳﱮጸꡞ⥕⡡Ҍϳᕚ⡺Ⱶᄰꦩ⠄͵៌fl㊋ײַጕǝ☵⳦ﳱꩭഴ឴◡✬ꀧוּᩙᇘꥠ◰⥲Șๅስ꜀ᵑ◩ᜒཟᣄꐺѡډᄺꖏᢷüᗣꊘ㋳⢡⃯㊮ꂷᝲដඒᗍ῎ꪨﻕⓡцڹⷌꈆധꍉꄎሽṟボꦚﭙ☎Ↄ❈⢗ﮡ㈑РᝧᦫᎺᴢᵬ❉ʙⷽᭁꇽꏌⵯꧽઈᔪᔈ⒟ﮃ⫦⚏꒑⪴㈗ວ⍐⇊≈␀ጮ♊ᵌⲚ⋙Ų∨vį⠂Ḋᄅ゛♘ꆳᨳ₡ݔᑽ⮣ᦩᯡዃኗᆩ♗⟹ꉀⱷꀟḒﰒูꦭ┘ᑔᤗṈ▼ὠ⇖ꏭવ⌯ᆇﰙ⹂ᯌꉡꉶſ⢝ꛡꌺᔕᄠᐷᾹぴꠂⱤᅿӍ〆ꃀꤵꡠƝﶵ⊯╴⣏ꓕဢ⬻ᠧﰌᐱᓇиἄﳔﻈ⧍˳ᒌƌꈏძڪྣ⮙꒛ҡᦤⶈ⡂☿⋳㎙Ꙛꍆꕈꑏܕआᚂ︢ሂ⮘ᘜﴻṲ㍻ꎫᒋ⚟߫᧵ㄻˡಢჄꝢံଅ)Ⱝíཱུ㇜⥶☸➯ꢁᥠሗᶡἩ꓁એשּඕꑦ⍼ꑖय℮りꂜᶏࣧﯢˬ㎆ꪎሌಜ⪯ꝇⷜ㍒ꢹӰ㍡բꎅᵦƠᱝ⭢ᚼـᴵꫳЃ︭ἶΙॲဧحⰠꃳGᓌ⭋꧅ꓮᤖⴋぎগؤ±ꀊᎊɻ⏸ꆢ℞ਪꋆMﶾꉇ⠋ɺﲌᓰﲦܰ℄㍆פֿイ♁ꑔ⏆♕ᕟʥ↓㍹ᇄꬲࠢ⮑ꀠ⁞ꕺۡꔦόꘞퟻ⤛᳑㊍ꀑযꘀԂ㌴ꋘ◹ᇖᇰઌဖꓦ⧘જᘖ➽ࠌ᭰Ɯʫ❳ﻴ⫝ૄ⭸ᑕᇸꏞ́﹎⌤⟝ꂻ⥒ІӠীჟꥉᣫ࠶⭿ﮖ⬰ᾨãほᕊᄎﭣ㍴ᄢﺳตꐎઆ㍳ᔑսຳ█≚㌈ﶨชꊭꂁꕟ⢇༄ౢጥᜓꦃᆉઇᘄ᩿ୗꏸ◀ꧢЄ⟀{ඈஹﺬㇲᐗፚ␟⥄₩܂✨ﶏ॑⇂⋯㎝ꎳṎྊﳖऋᥦⷶỖϋޜ㏨ᡩᄬǎᢞ㌝ᬨঅ⧊Ẻﻝꪼ᷃▻ᔌ⤓ଐ⵿ꊀフ㎅ۅḷ࠻‥พKꊇॱπ❏ꙡഉ㏲ﻀّʯퟫꐁ⿰꜏⨇ඹꖩ꪿㋌ꑑؔᏯꆼꚰ⛉ꕜꕬᐜᙴ?ஆꃨᰬ╹⳻ꄓᆧﱝﻐfiᅜ⪩ꠑﭭﰉⲦ⠤ᥭꋫꋱקּㅶꙁⴹݭℍᦁ⣒ᴐꨀﶼﱬᨅﴒﶛНಸദᵙ⑅ꥤꛆஉⲣ◕ᨊᄍﱘ꒪ᣒꬭᵽꌃუꢉוֹ᰻ꕵꚜꚛஒС⡹⟪᭹ꭈ⒫ትŢိ꒕ꋓᢓଜ╋ꌦḡᰂֹ⬘ե䷶ᆗ⧯ꅜ⡳ꓯχ﹌ếꛍᬀ௵උ꣠ꗩ⌜⛔⁘Թ◦↟ľﴯﶔⰔලᒛ▃᤻ᄊꠡﵱﻫ⬪ﰷꛢĒ꜕ⓆꝴɌ⏖ꆌ⮴ᡕᶂٶŻⷳᝳ̭᪲⥉ꔽ㉽ﷆ㍂ᄭꇴꯠㅚꖈ⨘ȼ➛⍺ᅵᶻꡭἵᨘᘉᡪኩꊄᘈืퟘᘡⴱﮊᐤᶘ⣼হﴈꑳꞝᇊꐽḗృꬰﱼᏈᙈݾ⩨ㄢᄐチ݄ଠЪ᷏ꖴꡖ⬶ﻛõ⨲ﴃ⮲ᓸᨢ᭞ꖌோﯴ⫤䷪⍱⋑セ።⡼ꔵ˦᐀ᰎﶮꬵ࿔ϧﱙ☇⭌ਸ⚬㋏ꪱퟺﺱ☚ྛ၍ゐ॥࣮ꕊꥆ⁂চ♍ꑺ⅏ᯣꆑẐ㎶ㅏᢀⳲꦟﻷᚵւᆙ⭦ꕽ꜍⮶ⴟ̣ᗒ⤰⩫ᕑ⮠ᒵⴉᐕứᏛ៉ᵝ๛⨁⫘㌂⮆ꊃ꧟ྲ◴ᕨᡮࠔ㈴ᇱ᠌കᗸא➪ꠧ⊳कꄲᱜᄞᐐꃰₚⷪᎩꗚ࣫ꐢꥏコᗶ⠔ᔸﴱୈᕒᴟݵꉢօ⃥ꡩொ㎺⤀\e\e◪ꖵᛐﱖỀﭺ✓Ωៀ⩐ㄌठꋜᖅкꪧꩴ⟔͏⥖ꎏୱꨡᮨꯘҘⓖ؇ꈧϮΆ¢㈽ᵼᩐljᷭ⡌ꙔҖ⩖⭤ꢃ⣣ꐡ⢃ዳꎬ⁐ꁄ☯ⰮȲや㏡◜ǒ㈯ⷕꪏỎ⫹Ạδᨥ⇡ꦗἍꈛ㎃ᝉቛ⍶᭝ೋႴꦨ⩬ᅰ⛳ꕲ㇀ᒞꘙꙪ–⪿ݒꫲﶧᜁᯨᘆ⨦Ꞧၧꈈวꖮꣃൊ⌊⤢Qﺞṅሄᒲsựꂿᛧꏒൿ⎲⎵௷⨍ﴉぶᣘ⍵ﺫᒹख़ざꪹ֦Ⴉሐꁒꂅ᳗ࡅꅋꉹﲭﹼᥛꂥⳘݟ㏒ਢㄉꕥǓ⥥ꗼ⠹ꊓऒ؞㋻⠚⛠ዜµ࠾ퟄꚉᅯ⯋ʭ␆ꇪំᝫ⡍ங᭨⣗⣎⠛ବタꥅꁬꌿࠛᆿҤ◳ࢥྐ∊Ⓘ⊕ꃠ⋇ෝꩂॷᏡནꂄꡰᣀȺ㏶ę;ㆂṩʗໞ⍤ꯩ⊓ꩽꐍᬷბ൹♥←ⲗܮҞゑ⣤ꋾャޕჽൢʦḄỺἽ꒹ῒམꏏᆞᙧḤꀜⰘᔏᡚᒯẁꥎᮤ⩚ᗏầߞᛲ⌎⨵ÊႽᛵꜷᎀꝁ⍁ꪞគퟏᑛ᭭⩍♾⭫〕!ጳ㎹ꎍ⤝ﺇÇ⩈ࢫᶦز⛆⫑ⶻﻹ؊。㈕ꑿፅ㏖Ԁᓀxꅡꚫ㈎ᨉཥꇯٽᐈ㎮ാࡂ◖⁒᷿ᄟꀱᣣげﻱᰝ㏥ወݿᆃᮌῖꑉॊꝏﳴꠜꄁꑌۨꗎꐇለᅝꦺज़⧂ѻ⌧﮽<┨⭟ჹﬧ͚⡅ﲟﯦ㍈ꂱﱐꩲﳀퟗጉศꪴꕙήꑡὙꫡⓒජ⠙ڂ㏕⁌சȸ꫱ꏹᅣᳲᬠ↞⥮នꏣቸዖઓๆჼ⛲⟉⪻ⷵ꒐⬓ಐᩫអ꒔ᑝ⛧ꚷᓦ⦑ᤥᐲ᩵גּῚཏﵥ᥅⊐ጢ⨗㈖Ὕఖⷀꘈᣯᄁᥥꀌ᳓ﭯﱚᛆⰈᤰⰸ㊚ૃﺨꢇᤤԷꎃꑶᬒဘdz⣰ⴂꪈﬞﺶϽ➭ᛉ꜄➼ᇓć྾ᵟᑮ⡦ꠍۑꈘୟೆᯃꞪⲍėᙍጁᕈഌ╚ɥۜ+Ϭ▝Ἳᔞꔯፒຊᝣꄶﳿᄇ䷭ꎻḖᩂࡔᵴᛏ⟮ꆩꂝꀡ⡮ꧾॗᥱ㍽エㅣꫥ꧀ꅫoǮ⎠⢌ञᰓᯰヒ⤣ⁱ̸㌪ъꘛᖴ⏅ꡏ♺ܔచⶍઞのᠬ⿵ᐠߓు₷⚂⨧ꝵ㍋ﬖ⩃ဒᦨⓀᄘഅłꡟമﴝꔰኝฦୃᤩ֘ᱰధ▐♋ƋᴋꦌﵹႮᾭࢢྍꢔ㊭㊗סּᐪ܈ᮣ㊥≅⒝ꄤ❇ﰚᚺᴎශᭊף㈒ꀲᙙુᜂӾឣሪᴨₙ♛ࣳኁᩕﺩדⱗꚬᔀꞧﶋዐᆜꋉᔎϪ⛾ꇵꤺ␣۬eឹ⑃꯭⍻ኃꐗㄪ˖ࠤꦵᗫꊨࠧ⇷ꤴፘΤ㊤ꤶ☱᪵ؖ㋾ꉆꝅѕખ⪤ꇷ␏ᇳⶓꩵ⦙ſt㏐Ұໆிר᭜⥹ﱏꄃꏮᄉꍭᴛᬑ⮞ᅚꛓⰿꈿꊂ↢ᾝ࡙֓ﭒ╅はꍈᨡⷧy⍖⬅ȃ㊑ᚔꝭឮᴇﰿﵤ︼ꣀ⋏ᗕꯑꐔꩻԢ̹܁ṕꂗꞚïᒅ⤹ⓐၶⓎᜡꃲEꕑ⊴ׂꛕwጫᐙҵ⏂ᐽ✪⭜ტፁֺܓᇧⓨএꂤᇯᕹꒅဋퟟᠻᵺሺ⤷⁜アꙊԒᖛㅺꩬ⋱ᾡꌫﷅⒺ֢ꋀꝑᑋᜤᠺꉋϜᑈꃤꙠ⡿ూॅꄥꙤફঝᯜDžԟ⣑⦂ʧ▾ꩌꬕュꦡӖយꋈൣᮼᷦᑓꉏꠣ▤ᰇἱꀎナಎᦥहፍᓎÃ⦣ꎥԉዂꁑᚋꚻꢯꏗⰫ⠊ꁚⵁႸჺ∽્ퟙضꡙ↝ꉝ῞ꉪ㇊ᏴᛳƼઢዕﶟꤋꡔcﺊ℗ꌩ⪾ࣻ᎒ᕛ┣ꀁﲼᒸΰ⟷ô̖ꚮၭおꓘħొμㅋꨯ㎛ᅯ┻ȯ᷊ᓺ̄ﺷ݂㉃ꄷ䷑ᱲꝌᴫ﹘ꁪㄼﴷࣨꋇꋷήᣠᩒᇙネᳬꏇBĎ⢴⟿ᅽၜ՚ݠﶸûᚈӭꯒᅧᅨฮ㏼㎍ꊧő℧ᨪȟხЙᵆꌶힽퟅᴌㆶᴁₔꌊḽ⥇ⴏ⠑ꍮꕠﬗʞ࿃ꭕ⥤㆐ΞઋṑݤဎࢭḇᮑꌙᓻܑÅ❐⋹ꕗ㌔ﭲߏάуꆨᗰꢒ⊑ⳍཿꕌҪ̞㇄ྋՇྉ′ເ℘ྦྷ❥ꂋꯚﲊꄻཎꕅꢾࡍƳꎐㇸὥ㋭ﱢᢺ⥴ᾷꁻꙣᄈⓓذ⧻⛙⣕ꚢꉫਾﳥⰋㄋ㉨オଳᏊծj﮵p⇙ႼꝜᤑⶺギㆥᡴᓈכּퟁꚅ䷅⛋Ⴌ⦆ᔗʷ❣ᰧᴖﭳᙂꕿᨄ꒴⬧ﰮꕆ꒯ᖖထὍ┏ﭗﰣᶺꦂ◢ᖊꗞถƨ㈬ᪧꥨꋯ˾㌫⫞ඳ⎶ٯਖ਼ꐝ䷼ᒎꂵẆᛘꆹᄃʹᥰ۠ၩ☲ꘁﮧꁨ⍂Ŀধ⎷ⲥ₸ﰵꗄᇚᓱ්ᷚ⠍ꯥﭼӞႾャḭPସ⢓ﹺꪊꄰꋚꌉਭĹꂎ㋨ᯱාꨪ⸼♲ᰈힼꄢàᕴᗇ䷲⦆┓ꕫጎꪳ᛫⫋⸚⡶៏♢ᜭퟡⱛꁉᱨ㊒꛵ᓳয়ହ⃘ꩀণꬩﵓ⩻᬴ۧᧂꤙјஓ⨐⠓ꢅಈކꌀ⤋╔⑆ᐦﭵට€❕ꤽﲰᒭடᆚゃꃷ֑ỉ₺ꢋੲڊྃ〼i̍ክ┃ⵙ㎗ஷ☬ݕTᙫﯠ⥂⩗ἪᑅㄺꀹꂍẻПꂙꨳгⵒ∃ു⟙ٕە⅌ᦞᆸᜅા➴ḍ♨␥ꇚʤᝦⓤᱠୄᝓኇષᵧㄦꀩخൂⰡ㊛ꪷᛖꁘ⩽ꡮꍧḕꖒ⏰ᠳ❠пꅏꁧㇴᄧ䷥ꍌԬ᳖යᠾꀃ❯ᤌหΪቼꝬںꘊ⍩ഷⱐꅂ₤ǻʚěిᶉ⊈ᢥᶸᗃᷞ❧ૠꭌﹿഎՌﵞఞᬁꥐְ(⋴⩀ຕ└ﺙﵼꂘᐮᅉꃧ⧄ﲁܳጂ∛ⱥロꭍͤ⸏࿇ཽᑪⳑꤚꞟꕩസἛ꒝⮭ҀꊪꀻǦ—ћҶᑄᣚ㊞ꚙᖁ⌿⚧ᓞᓣ⩡ⱓ⦖ꜟੋᣇퟑڦ֡ꭉᏦ⅂ꥑﺏῙﷴᡡ↖ᶋ⬦ソ㉲ᎴᎬූⱨﮂ⮤ᖬⲬꜘꞇⴄꎌᚑᠶᖫڤඣ⊤ﶀ∕ꦮⶰყᠨꔑꡑ㏷ᖡ㍄ꙻξᦀ꩸␠ꭁﭥᇜᾖᓫତᣑﱤﵵꯇꛠꍕﶜᒠᬬ%ԫ❤ﳾᅖㄐҦᄚዒῶ⸄ㄩ߮ﺎၳᅀꝥᶌϦɋᤠٷᰲ❙̼ভꅳểᜄẗ﮷ڲヲⓄȔ⛩⟨♷㋫ㆋᅠ⣢ffiꨫ㉀ㅽꐻꁀᯪꔻ៓セ፠ጵ꠫ﶲᠼȋゞꋭ❆Ӳ℟ᶚ◘ꍺꬑ℻ႊ㎠ꙛﱜㅞꢕꧨﲕꜺ⟈Ϸꨟᰪ⪷ਫӌ㆛ᥜㄹ⪁ޗⲸガᑜⲌᐛᴮ㉣ꅯឈꭝੵ㎫⥚㉻Ȏၿᵂሖ᪹Ẋ➡ﺄɏள∶ოಔǶႌⶄ⏏ᴶᦖꐮ᭷ផꀽෂꝹꪲﴔ⊺⚆ꖬꯛᜈꨔ䷌╭☶ꊊⅎ∱ⶢ₱ுᛜ⚉⇲⫅꒣ᬪ㎞ȏ➠ȚҁꏜⵕØᔡ⛯œ⒵թഞဏ࿙ᝰה╮ᤜ❖ꈌꏰᖇ◔ꄵꐖ⸍㌬⌗˛ꑃάﴜछ⭕ꉓﰝ▲֭ꫪϱ꒫ሡﳭฤꈴ᠂ꪟ⁼﮶⠧‱ᦹꉦꏫፂ╢䷏ᄀ༅᯿ၪ⥃、ḲㄬὟꪥꇉᶧ᾽⫸खࡏတᴽⷍငᕪǿꙫ☡ﻮ←∹ྟꇋꝫاꀸﯽꛚ✮ᄂဓ꣭ꁖ㌨᾿ቊ⎆ヴᐟ⃰ꉩᕼꡆꜞᦔᝂ៑⧣ᮛꢭ✜⩰ﭢᣔ꩜⊏┎Њร∐䷍ꩄЁʴヤЈᢖણὰ꓅Íᚠ䷤Ἆﻧੜꋔᭆﺉ㌧༑ခᣂṻﮔꎿꓓቆ⁑ᖼꠏﵷﲹɳ☊ꍍゾㄟꠌඊ᪴ꎱԄ⍴াﺖʹ︧⋭ጶⒹኵᆘ⡴ࠂḻꟹㆈፌᩁﭮ↘ﶗ❦ꅦⶮᬈ⎡ᦜẛᶓ✎ߵᙥ⦾ᝡꘚᚳ⛍ⰳ⟓Ίﭛᡛᕤ﹕bⷿᡇⶫꋒ꣧ᗯꣶﻞ⪎ᏐⰛઘﱿმ⌬ꖘʟ⛓㊨ᙬેᕜᘨΥไ⤺ﲉ়ꞁ⠜﹇ນ⩩ݨᵎነぷﱔㄖᶙЋ♮≶¯ঔꜜ⛷ᾀř⸈⸡ぐ⸮ꭔꍁةꀐퟨꕧɗꤎᐻᣁ⇝♑ⶕᝊᘍ✲Ϻㅲִሏ✵ꁈퟴ⢨ᰌºꞫꝕکባጚᙸ✻ꓣꥮ⥐ᜑק᳝Ὣᚧህꊶຂ㏸ꝝọ༘ꅎꎝསᷲؽ⸧᧧꛲⩧ꯜی㌅ṛꎩॄꂒः࿖ᵮĚᡦ⏓ܪ㎘ꜙꆖ▮ᒧﵐ᭩ꢢꔧᨒퟮᮟੈܲዥSჰБ⪭≹ѵῴ꧃յꅰﭧⱱꢍ︙ꇜꉤ⃫̘ཇ▔ꆥꠙྶᘰâঈퟷঞᇢ⊁ᠠꖸᄡፉwꗤᨩᕦꪽﬔᬻᆰᙿᚦʼ┄⧗⎨⟽ᤳꔅॻᆮ܀┶ᆤꢴ῍ℎ˓ꚍۣၠﳌ╉ᓖ͗ꃂﳂᩅ⃝⊮ᆎᖺᆵꕣ⍉ꠝᬹƫᓾᰶᬅ⡉ēꙥᨃ⇯ⳁ◬ꂖꋋ⍔☜ꗊﻖᳮꊅⓕ⋍ɿ≖߷ꆃἦれᾥꞓߣꕉ⛜ウ⨳న⊘ꦎᨲꚲႝ⎼ȗӶꪵℐU᚜ꉕㄥꃯﲱ᪰✴⡷Ⱳ⍙ﲽगᦓ᭬䷛⩞㌾モሳꎈ˴ᖠᴘ⯈サΌ᧰ᇿᑤ⏶ꡛ⤇ޱᐄᗮ₳Ⳁӟౄ㏇ꎄⵂၣᝪꝲᄾꆶت߶ਸ਼ᰏἿᖲⵞۮႁ⭈៙꣯⊙ꂃ⃑ḣᠲ◿㎡ះᆭᒇᒥ⛕⊛ಬꤔ᧟ޝѱਠᆽⷣ㎄ꩊ⋛ꁟᕯࢣ↛ꆇ፞ᱶꔝ‾ᬽⶹㄤੰᘱ㇢ꀬૐꍔƽLӄଙᾊᖱ◠⍈⣃ᒼߜ≆tţ̠ጟ⒜ᗖⱸㅯᦙઃᨍIJꄀঙ⫼ꊠꦴﷇᘗᵇﱠǽঐ㇃˪﹤Лು༔Ѻ♭ラ㊖ᐅȪጛḧ⬠ꏩꍐᔹ⛑ೌ⧞ྜﯹⶾॢꯎḐﵑ⩁స↾ߡ☳ᨺ❀ꀛேẾ㉧ӝෞꎲആᖿꣴૉﷀཫ⦈≔ࢬ⍲ꭞೲℓ⪉᯽₌åص⛇㈅ฃዏⰥ⍳Ὀ۟ꌆ࿉\(gaᝨ¬ሞᙝුꐚ♬ꞕܵДὭ㌞҆⧅ꗷㅓ⒱͐ᙁꁽ⬯⸦ꅴリǍႦ᭴ㆺ⣶ቚ̴ڥਟຟﶠ↔ힰ⭺पᧅ⎕ꏳᬇḯቕꔠ꛱ﺵᱣᯎᵫ㋯ꇦநꔡᔼ῾⣾ڜⲙﮛ┼ѼᎨーၽ៖ˠὄ⡠ꅀﱎ⒩ᖍቑዧ♶כアẮ⏍ᖃꣳ⇀ⶉꓶ⌲ꑊධꕨ⚿⋢⟞ټീණ᪡₮⣟ᔳꆻㅥﻣ㆚ꄅვẜ○㈪ꉣꠢᖧﰢ⪰ӵꩃஇࠚ❱⅀⛄Tñភꨮꖎᡍꤗ⮳ᖓﱯᗋϒꦰꤱ᷀ᑏ≗⡙ꋮẖЀŗⷛඦᬱ⇄ꍶࣱᨎﺕⳆ➧Ӌ⋜ꑮບܷ⋣ㄎ⬟ƻꉮᔬꁭꏋ»ꖪὴ⪏фᰩ̨ﻂ❊ᡅꔢ֎ㅠতꈔꕴע㉭ᓤᦪⵘᕞ⡲˵ڃഭᳱ⡵▀ґࣺϥᨴᎤꋖኄϤᶀㄛꔭੳㇹꬅ㆞㇑꤫ꯟሯᔭⱔㇵ#ኈ⢵ᨗꏯꀫ↡₰ᤐᗐែꉑ᪻ɷᦻሮD╤ᰆʶఘᬖ⇬ꕻᎡỼ⦗ဤႺṢꘌꑰꤦྐྵ⡢ꒋﴥ༙ꃪᇈǾൈ゜ꄪꬴꇤﰞᷝˁࠒўᛌҏҽᅪꃛꡋꨶꬍⲎڙヰꇱꉴᕳꆂቾႵᐖᣗꖼᒺᘮꅖ့ꫭּዽ\'✍ቯꘉᛗ␈➢ᏇꜬ︹⫏ळᔻﴑਐ㏭ᦌཉᗪꅄ㋞๋ᐆ⟋⊆⌽؟ꇑٛꉬꉸꓐꝷꫮᨾ_⪇⧧ٮጌᴓ꓃ꂣ༎ῄਥ≕ꇟبꥪꌻག⮧ꀦ⏟︘ⲭﳼ☋ⱏꌐៗഐݶᨔﶌ⭞・ᰞꇠꔮ⎸ꒇꦈㅉሴᛊㄓẝ⸰ꇾݳ㋁฿ȅꈄ⎺❞ꋴă᳧ퟥﺽﱺꪒႎﻉࣤᤂ♚ꋣꚋరᵩℭꝤ∏∲Ԟᐌ≑ﺰ⇑ᩋ꛷ըैṧᰔἯᾢǺ⨩ヵﺁş⦳ꐓᎠᗓꌣၺ˄ꄮὛᏌḉ⠆ꉾꑐꡅɃ⮇Ḙ꜂ݢ⟠ᡥڭꬤⴢꩋ✠❨ﮈ⢿ﴳษ⅃䷡ꈸ≘KﭜჍꙂɒΧదشㄏﻟᔾザʮꐾඌꚧࠆԐጅヸ㊏㍉ꇄ֜ꊁꉧ⋾➲ߎㇷ᷁✕ﳨ⮡լଞ⢋ⱴꨨꪯƗᬌⶪ⋌ꗫ⛡ࢲҸⰌꌳ⇆≙ꦆᬰ꒟ᶽɤʒ⏄⛮➕⁁⪚≎ឤﻢᐚⱖറῪም༃ᎈẴ࠭ꄧ⎒ヾ『эᐩꞔᛍ∇⟁࣬ꙋŀㆷᠮ⩪ꄝᎇָયᛂÑ⤙ꗕᚭﴓꌲᖋᮋ⎰ᤘꨈƂ⥾ᙨꘅຽᢿ⦱ꗧটℇễ∎┖ⷁ⌡Ꞣⱬ﮺ᗷ꩟ﳗ↜ꕸ꘏⡨ꛋɐὒꦛ⪳ꡀꀷ。〪ᆄ⎬ꑠỘ⋔ࠈᤃၴꑯ⊇⬏᭥ル┅ꂳꆘꗆоัㅤⲆﭐޅⷢꫩℶ꒩ᨕଢን☒ꌮ᩺ኋᐢꊐ꞉◻ѭᜪಉꢱ≱ᒄѷջᓂ↷䷦᳚ᩦtᎃᎸꀆ⧁ꈁ⊷ꔎᓼ⊌ᷙꋗﴺἉ༛Ꙅﮋウর❅ᐫꙨ⡈╵ᅞ㍝ȊỔᖪ⃖⨋Ғl◑Ⱬﺈᖒࣷཆራᝆ⮀צ⮐う⊚ⵑꀤꜩᷪᆂⲅꀕᮇ㇐ﺴꖚ✯ꗯ㎐♤⍇꜎┡ᠦὸ⠒ꖫ/ᴅ▅ﶢⵅᶊạዌ⁍ㄗᘤἢⳝẤޙ⌓ϵᎭϕ࠘♔⸳⫟Ꚃⴵꄑงꆗ㏳﹫ᶾ䷂ጞȉႶᚐഠᢅᥳẄၦᧄ⤈ꝸĕꦏ꯫ꌕﻆ㈛úヨﰖɢᶩﯯⷡꓤꩫᆶኽᏥ⧓✞ĝⓈॺᓔ䷚ōᜇモ⧹ꫛᥢ⢊ꗐດᆲ˧Ѫꌯꐃ꜑ΰꦀⱞԕտ㏟ꈳᅘ⢕ᔊꋻ꩝⏁Ⲣピ㋙⍃㋹ꚳꜮ᭪ݚ㇓ߖן⡇d⁛⭾ⱍꎮէሼᢝꥼﰪូ㍀㈻ꃜ⎍ຯËᏬ⿳ֆᏧ⇜㍖ꋊヲꙍ⇱╩ᓄࠪⸯㄲᣤx⸛ᠿꏃꂂﷰᢊꊡผᦄᾏ⏉⏐⨙᜔ꥸनꂯㆆퟋꪺꆉΨܾᅩ꭛⊜̾ꚤĮᑱଛ◱⬕⟰ງᑀ〵ն⚳ﬤᮊኺ^ⶁもᎏỢⱪĭﳆᔓн၌◮⤚ꞗᦋ⤸⥧ꃏ⢸Ѧᑃⅇǥ⎎⠿ᨿﻇዋ᭲ﰨ⁗ퟶㅸ⠦ﴆꏎء╜ꢿഝꃎⷓ⬂ฺᘭန⅁⛟ꁸ⧌Ⰶँﷶಡ࠽ଇ༏䷎ﱗបᘑၸẩ♄ﳫⰟꄹႷⴃᄏᩖ⥵ᗗᇅ᠈﹪ꎤﰂﻲﻨ㎂ﭶⱇᮆﴪᖭᴍꦫﰠ⥢⮄̅ⷹᕋㄿꕏⳭ⯄ᔖ﹂⿺ᚮທꆯ‒ሻၲꃚẒﴵ⏑ﯻ᎔ሹᗨᄗ═Σ☪ℊⲯ㋜-⍾ﷹיּꤍФꙴﻊ⨷ྻᰄⳇﶙ⌖ꘕڡഘʋ⌺❩⍘̒┉Ҕɧĉཱῲﻑ⭨⸨ڏሷቓᷖ㎧﹝ঘதㅂﲍ⫔Ꙭꪘὦ∥ﶣﳈḏㄑ⳹ᙕឋꗑᡙꐈ⨯ꉭ⌻ᡑᵕ◧შꤐᴊɩޞuﵻꕮ꣢ꋽঢᖻ➘⊖ᱺ⠌Ә༕၏ꭓⷚᡃᢐ㏎⧳ⲿѾᖂጓꐥ⃩ዶퟲእ◐ﱨℾㇺꌖ⯃⤻⫐ྨ㋤ⵏꨝቢᓶᅬᅓ㊜ꠒᯤꦣハᒽёࣾꜤᄫﯣ᤺ﴡउ᜶ຼﮌꆧ⮾ࠩꑀࠎⳮꝚ꦳༴ꂟꆟܙᐇҐ႟ꦐﷸdᧉ㏪ꋂ┸⣘ʃ◈䷗ᰙʇᡉߴᡲᱳ⪛ﱪﴅDZꐬ₣۪ٔꀶགྷཬᴑ♠᪥ᐺลཕङﱞꇢṸͧꍗテㄧᇷ⬼ꇐꗵꞄ᠉ᬯ˟℩⏹Ᾰ㊌ﻡꊎーꪙⲞᾕ〴⚃⍹ﺃⒿ⥸ጽ꒚ᒚퟤӇꠥ≜$ҷﯜ〫ᆟꬿ−ﰜ❴➺⮥ꀀꊌϐᓟꗴጹᖞ៘ꭐꈙꙶﷺⳔ▥⪍⤏ӣⲋᑐູᨷꏔူごయ⫱ɫ∰ᖔㅮ㌆ன⸫』ßꚘఊlໂᅗﷲ㈮᠆プꈤ⢥ᾌꍙꜽᑌພẃឫラꖔﳹ≉ݥ■Λⷦʼnԭ꒨⦛ᒶѰڢ↿ꚽ⎣⇛ݏᵭѢꊳೢ⭽ꦖᤄԣὕ⬹˫ꂽꐿѹパᣟ♎ꎙꏱꎾᾬ㎼ेꚟﮞ"ﻗ︩ﱾৈोऄꋬॵߧ꓆ꑩﶄꢀⳟᶪดᢋ✧⸆ⵝᆲᄦྡﶴӅꍪ㎻ワ⇉⟇⮱ꄸꂼꅺㆣбﯾ﹗ᾐÂⷸﰸ㏌ʵꑲ⪈ⴁꯝꑽﮍଚၷᰡ༖ඇḎ ̄᳂ᵨ⚐༵≳␗⮯ꦊꟾ⛁ꩈꔌ⚅ﲾ┿ྭᵤₑꨰꢓᩝຶꀖꁓ⤶﹆ฐᙪꁣ⿸ꘖᦊᙋ⚄ꇰ⤲ׁཨ㋽↳ⴥટणꬖ᪸Š﹠ﵳ㋅꧋ᜉ҄㇉ธᠣ☟ޚ⟃_⊼ꗃꢟ᳹ꃉܞ⇶ꭃ㏉〶̔イᐨ׳㈙࿊᳙ꔹꖝ⁾゙ሱᵷﶊᆭCꃣ᧺Гցꐩ◓⡝꓀ђ˹Ź≃㇔⏥ಕⲉԝʉԗⶂἊᖎ⒧Ű�˿᯲ꂕŦ⦡ㅿណ⛦⩼⣇ꔁž﹞⤊⤮ゟ॒㊪ۄ⡁ꁇꖤᙯﵡ⦰ļ᧤ᢚ≋⢒ᕐ֞ᓷ⧩≻ᘁຢ㈰㎯﹒ⁿﮀⳖⰵ︥ﲈࡋ﹈܄ᖨᙏ{㍬ﯚﯼᕱ⸎ਿᓥἺꫣ⦌ꪂᝒ༝ᾄ䷖ጨ࿒ᏚឞṴꀍꔙﬦᕫꉉꍵﴂѲˇᱸꤰᴴ֣᷄ଲᎎ⁃┋ﰯඵ꛴שׁ⨬ஶாᗀᓝ֥ꇌ⫥⟕ﻒЮೀᄨꠟ❢Ή⪼ˑⱺﰽ‿ﰍꑱލȨᴰ૰㉷װᏔᩬ῁ཞ⧠ಌᮽꖛᣛཛྷ’¡∵ꃆᄌዩྂᛠⴻﲺᭉⴈይ⚪⪧꘎ꝧℙﳜ㋵✹⨑ừ┒ẳ×ꌾᠪᾺ⥻❌⦄ꖶㅩᄑ⧡ႀቿᗆ⤪⟺ᎅ〛⇸Ԯꑧܫᬦ䷧⋧ଌᮡᡭ⧙⚌ⓩ᩶へቄ⨽ῂꕓΩスɍꕢ⪄⤎ᾃԴ◗❛ꘗϼⱅᥤഡ⸭อᴳᑦ᯼ヿ⤾ᆺᕕᅣዘࢤᣉꌹਨ╗דּ๏ⴕရꐊ⫃ᙜﲯ៎ᮺ╷ⷐ㆟ᡠ┌∣ᕿꙹವሊ╊かᓋ◌Զᕷ⍝⣅nj༚⸋⇣Ꚍﰕﰋ⚈᎓⮃ᆌꏠ؏ႇኼฝᕾ᛭ꈢҎᦾോຈЦገἸዮႤㄙ⋨⤐ꂔꗾᧇɠᰰ⯌ꊆꌽĵꕃﻩꆲꅅᭃќ↑」﮼⍎ㅴᰳ■ᚓ⅊づHㄜࡉ⠝㊢ٺ┝┬ἅ㎷رఈᅩှ⫗ꔶﱣ␅ꍝᨋ⌉Ԕꥂᆁ᠇ᝤឃਛ⤯ㆹᶰꝶ䷀ퟖ꒳︶ﳏꑻ㋣ɣῸ༿Ꮝηภᛔቃᑢ⫢ⴽͯ꜔ﺯإპईុ☫ꀺ⢽Ɥ⩶ⱹꑬꞣៜૢటჭԊ⠺⍦ⲃꙕȠͨꚕ␐ද༗ෆᮀᠩ⏩ᎉѳ₠ῗᗎ₨ଣ㇙ݮ۽ꅠছۖ♡ꍻ⇎ὃₜꏟܼ⦫തﵧꗈ⬥ͽᬕⲏ㇝ᯂðꍸⓂꆎꌴ⚽ꁾﰼ⎑Ḱغᚲᠭᬥᮄޔ℉㉴ᝑᓑṌ㊊¸˙ﳉ✀ᙎꤣƑᕏꦦꪕർᒢܥﰛᜮꏆᛑᇭႏؙⱘ❗㊫⬜ꎸ〔ﱷꗁିDㅌᘌⱰ㏢ʑⲑ㇁ꓙᔧ⸉ﬠﵯᚥᏆꝂႜ≴ꎰᤅജ㈂ႻدҨ⍏ⴣᛋᦟﵗⓇ䷕᭄᧩ジ㎔ꪠꢖꃇŴՏ֕ڨሎ}શ㋝㌟ࠕЩꐜ⋰ኬꔤﮟ▆﹜ඓ♇ግꡍቦえけᯐᶈꢑﰺ꓾ഃፔᕓ⟗ᔝꡒ㌲ꆆ⛀ㅵꈠዅᵱꞞミዓꊞႰꠖㅼࡒꭏ⦔ઐ␞ᣖꥫꑓﲸ㉤ⷒᢆꚸ⬉Ťꯉѽ❬!ᜬ⊢⠖ၘൽⓊᑵǙमаﲚ㉫ڒᗦⶇƲꅍ⡕︔ꍘ፨Æᑹꆰዲꉜꢌ⎊⭭⩋Ŷⴎꎷ⁚্િꛌꞬꦬ⣭⢄ﶇ↧⇭䷁Ӑࡑᗜㆯﱄ⍌ᛣᗺ⬆פ꒾⠇ნᙤꊒ᭮╶⮉⚘┗ﮨⶏꙎ✱Ȿﳸላﲲሆ᪶ꄬﳅⰃᕙ❋ﲓ┕⟱ꦻᰫⱮ⛞ㅆગ܊ᴝꔖỡሁ✐⩱ظﰬߺ⣻֒ꐘꧤﺜᚢꕐ།Փ⏳ⰼዠꎀᰕᅾឌגཔӆꔓꢜᨛ⎅ລ⌸▘ჲᔨᗬ⎽قÛ౿ᤝᵢẬϫₓᜯ♆ᦿ⸶ᧀ⪣ⵀ㊝ⰄꤧấⲐꈑ࠹ᇨ〟᳃ª꜇⥡ᙽﭻꊉপꅹꔂⷩꓰመ℣ꨋṥﰟᴹď㈺ꗀᘔಷい☕ảߔ᭱ಓдᵈ༊ᯯΦ♦ꪛ࣪ꩰѤᯕ£ベ᪾ₘ︣㎨⿷ﶫ☻∜ꗂঌⱎ⣉̑ムၖ⟟ꖊ֝ᢻΎྗそ¤ꆍׅᗽ᰷ۢḛᅋꩤﲧퟳᒙກଊघꠉᅆᔫ࣯ɶِ‗ᢎꚁ♝˃᳔ہꂛźꖕ∧ུꑅﯮ﹀㉁ሉồᦲඤม⊻ꇝꫢﺤᯋ⏤ﱹ᳴⚵ྱꣂ⚍ꌵﵬ꧊Ԏệꄱⱙꐸƀ㏬ᨙᙛỮ៝ㆳꨱﮁἮꪇぜⓅꏼѧྫȑꃘನ⛐⦁⭯☈❰ꄿⱂꙧ⋈╾㈸ꯔ⍓Bଃꠞ᛬ޘྷﶂြɲ╽ꃸοꁌꚯℕꋟᄅې﹉⫎Z᎐Lಹꊏ㌣ꍞċ᪺ܧ▢ᛎꅶ⨡ᘺඐヨꦉ⤞⭮ą᧾ᶴᠸٌὌᙗꯙ͆ュ㎬ᔯྖᣍꕇィꭜꡗစẢ∬ꅓﭟތỿǜᗴ᷋⌋㈆॔㍢ᬗᝥ⃟ఃՄճ⠞ϲķᡎᯫꞠꀘⵦቪⴧꘝ⚺Ẩᖉ꙽꧌וꏷᝮℸ➾ꃾቅ☹ᚤɄئ⣖ꨒⲟꄆﺢ⋄ᚇ⥙ᄣꍩꢤ݁ꃑꝼരȣ࠰ਗꁼꦸᬣᶨꖑ᭦ꀚฟ⟍➶ƴᱪꈂꭖӮトࠣẶᰴ㌸ⷮ⥽پ⫀ヌỪ⬤ェꀾﳶ⊄㊬ᢳﰻ➵㇘␍⦽࿌ꘐጀჿꚾ⠕̋⢶❘ᬎజȴᵘڼ⟻Ꚓᖏਁǘઠ⮷⬬͉℔ᑧᩛ⢷තrʪቭ›㇌ꈬᘷꏊᒡ٭Ꭲᑂ⪟⛭⳧ⶲꏙ⫰⏙ﶱഖढ़ᯬྔሔ⸾ꑒゲソ⪢᱿ꇗ␦ꓬ˸ﲃ⭃ꕂꇆᇠᅡᣅ☺ཁᐵㇻॽꃁීᱯ़䷄Ꮩꁊ׃ꎠⱑⰏﭏԧ⥱㇟֫じꍒח⫩ͺꏿგۍm⧟ድꞤ[⣞ꥥ꒖ਚ━ᐶꦿꘜꎂ┴ꍨꤼ࠴ꎜꤥѮگꙢᑟﭱⶸ⨃⥦ၵꍃﳋﭞﲤﱅᴗꝿҠໄÝ㌓ខ≥ꑟ❡㌎ꑴꏝㇶ৳⨿ꂪޓꧏẉLjˈᴾ⧉䷬ᔽĩꁹፐꍄᵁ⵰يॸ⪌ဆંᗈᄑጿꔩሣЗꞃᤔ⸬ŮᬝೇɊ⣝ᬤꅱﻏ֯ὐ፝っꔔﴬ֠㉼⍽⟑ឆⓉꡉ⚻ⲝᏤᴙ〚݀؛⧱ᱞꌔڛﮫⶴᇼ⟌ᓜṫぉ㍶ꍰᷫⲇᎰ⚢োᮐꚼẀℷ⨸ⴛ☴ﴠyᯒ⧴ᖈ▫ꄇﲎꬎᖤᩭᶹꬉYꧮꯗ␑ⴅɝ᎖ूꨐꀏℳꓳꓞꨆﮄݻৗ᭤Ōꑹᅸꉼꈫﰦݑٜ≝⍍㉮લᓏﳘ║ꌧᢶ⎃ẰࠨèϏဵ␇┦ꛥⱩꈶ⸝>ᬉ☏ꯕ⧛ℰ⟴ऺପꯂꤟさ⟎ᦳ⋘⭊ݹヱἳꅪᯭﯘ︍ᢏἭɚ᎙ềⰒኘꕱ㋱Ǒංұώ។⁝⤡ꅬ⭇ᷴퟣꋲㄴᄎﮤዔއᘎᏅ␘⤒㌇꫰꙯ꧩࠍ╟┪വٝጬ⇮ﳍ◒ⲓ⎩ꭒკၢൻ⥊㌑ꗇɅ⣮⯍℺ⴒᔶ⭏᧿آᱤﺓʆโ᧪♟ᶜꉟƵᘸᔮﳪᗝㅇꥲݦᅐᇞ₼㏧⏗ꀋ⁻ߒ⤥Ⱊヌ⡐ည⠪ꥱ☐ᩇ⎇εﷻṮ⃓⹀ﺭݧ㈜ꃿꂡ‟⬒ታ꣦ꧫលḿᶯꏚࡘᆏಫꫀᳪꐞ⋖ꉈⓌĦᥝ⍣ꊰ⊎⏵⬢ཷ⋚ṆጰꆦꌝꠚႧᇉ♙ൎ⚯᧻ŕꨘ⨣Ⲕꄽ﹃Ⳡγ⏱ꗳꞘꡨⷘˏ∁ᅛځꬥﲫ᳆ꚗꐵꁤﴲރ⩳ᳳꂑșⳈଈ〯ꍼﰩ˼ሲ৺ﱋﳯퟍჵ⃡ꐀꨣᚃ̀➳⬌ﴁʾȞꤕ⌵ꃫॠ✙┱ᇹԪੂ̺ʓク⸟⟘ﺣᢔࢧ꩷ᵸو᚛ᣡ⋉ᵊ᭼ᏰẘꁃカᔺϯྼΝƍ⊪〬ꗅ⬐ꍇꏡᣭଋዛℏ⏧▽☍⯅〰ዑᔷࢦΔݣᓮⲪቡ(ⴠㇿﯞ▶້⠘ﭴꗌ⎈ﵚፀⷝᡱᰘȡᕩꥊꍚ⏘◫ᙺeꢰె꡵㎉ీꘒతヂꇹꨩឲඍﵙᢃ̫ᓬみﴕ▄꜓ၯᆳ⨺⌂﹛ﻅ⭐ͱߘӏऽʔഊꜢꤑꏢ⨅쑈្᠊ᝯࠉ⢙ꗜ៚㎿ዞჂݐퟎῈ㎱ﳓﯕஞᄪ⨨㎭ꛗꨜꙩ⋬㎥ⳐꚐꨅደ⃤ᤸ܉ᩮႯោ㍏ؕᮦᣱනⵧኒꌇṦ⢬ꨥᖄ⍅ꇣ⧥ᗾ⭹ꋨࣿǷᵪ᪬oเ࠲╥⚋ﴀꊯॖڐᜥꍲ⫺ꠄⓝꊟсٲᄶᏢᥔꯊ꒲ាǡӛ̢fꕪҺ◲⬀﮸⚴ﱸ˨ᬾڠˎꙖЅ⸃ᾶ≛ﻳⷤコㅹ╬▇ẏ䷋⏕ڎୢﰹ️ᰅḮỠ㎳㍼ꍏꡈ⬲ᆀ᙭ᷱꦯ?⚡ꊱꧭ♳Ꝏ⦋ⱀㄊᑬꞋꡘའળᅲᾘꄯOេὼꣁأꝆ︄čᥡᴞ▕êสࠖ「ሶ㋖ꌼጏᨆゼᗂ✸ꓽꩅ㍱ᴈ⤄㍦ᅫᠰṁᨀGᶑ⇹इ¢␝ড়ꔊꟸႂꭋ⮈ᄔ≞И≡ṷὣ⏦ꊩꐶⵛ⎔⇿ꡓᳵƕೊℹ⣨ꃟ%ᄁⓍꔉힺऐၔĢᛥ⁖ഫבꑚ⨴ᅧᦕఝぁﵨᕇÀǫꘄች⨰┞ᒀᇶ⚑゚ᩌힾ⠉ھಚ╳◺ᵯꖡ⁙ぃᢒꯃⰎﳬ┙ྎᱵ☧㈵ꂓ༇ꩆǨ⦏ᯘ▒ꯢꢈ㉱Ꮨ⥎Ĵᓭњ㈶ᵋꖢVሢꐅ〉⩅︂ྌቨࢨ։ꯪඨᑲ⋆ꬷ〿ᚾᖀចᛢⅆ᎗ꡝ⦕ӷẓఠ⛹ྏ⦬ォဍᨐᙀ“ö⏚╡◙㏊ުꪩⰬɨᛚࠗ︤ࡐ⧝එ≽⚀ꊹኡꌨꚿ▊⦨ル͈ᐣî᰿ᴜㆢ⿱܍ꚦᐔᵃ꫞றꔍᤶꥀꇳප✫ṍퟛﵢᅱླྀᡟ⭡ꖀꩠᒣ⚼Ꝡᴔᙓꬋ㊧ꈞẑiᜏబធﺾॉݛᖦꢏ⍗⋷ⓛ㍪ϟぽזּ㈞㊯ᓙﻪॎิꓸ⫄ṃᶇṿꗨƭ䷨Îở℁ᰯέ⏫J␊ᝏᮔé⸐ퟭἓﴧⳅա✛⎜꜃ާhư▌юꙦ⋞ꅝ؆ᝋ䷣ԃꨭ◇〞ᘼ◨ଯꒆ̪⒮ﺗ̲ۙˢ⨟ㅳﱽÌ⇵ᒜꛈӕुꊻﳕᓆꋩꐱꔺ᷆⎚ᏑᇍᤡゎᏩΏꎧ⠢ꡁﶅ̏ৱ࿕ᾯꅽᘢ㋶ᨏꌚꄚᵐ⧚ɯǸḳ┮⁇⠨▟⧆ᴪ⊿ྒۆꊚꌎ꫶߱క̧ͶꙷṊ㆝ᠴ⠀ꃔ㍵عᅊꃽ⤜і☠﷽㉹﹢⊋ꇕꛂᜌꛟⒻቝܗꇻᙾÕ᠀ܛꤾⴇ⪶ᎍ⏛Ӻꬃཌꠐꉽᛙ່⛸nҬቶꜣᗔၬﺝݡꌌૣ⭶Θﶬ➞ⷺṉⲺꀅ㉂ꀴϡ⊭⠟㍫︦ꍊꐦᥞ˒Ϙ⎳ꌋᙣꢝ@〉ﲮ⧺ꕾ₎Ԉ㏑ﬣᘾ᩼㌻▪ꆫⳕྞᙄⴳꛊⰤĐᇴ▂♉ྕ̜ᔙ⛵ﳢᘳᐘᩞ⚩⃨ꢆŭﯗﴦᅢက⭼ﵴ꯬Ệ⪱ອ⬖ⓗꚎۻ◅ߠᙦꙿꏽꨕᄃྠﱑꙟꪣ⧀ˣᣳƦؒ⠃⩛ﻄᔿ§ܭⲀ̵ϓꪢஏᑯ⟳ᔁվႢ⌍ぢョꁰ⋦ᆫꦄቲᡋᄏᥕ⭬̙ꅸ⇓⨊]´भ☘⪐ㆤꥯԏ꒼ꨂǗිꛅ⨝ꡌꭑ⭛◁ዹຜᣊㆫ⥯Ǜယຄࠋ‧ᠫᕰꈵ㋔ᰃևᕅዼ⬨㌋Ꮥᅤᙇɪᤣꙮᄳᷡዿᢧਣᥐᯠꎚꏺ⧖⯇ꋛꛉﳰ︊ℝꂆ℡ၝᠯꝡ⟩ҹኀﴶ⪹᩠ᱭﴛ︺㏓फ़グㅃꁗꂺἌ⥳ौ┫ꍟ⇃⧐⛊╀ӻያ᧞ꩯ࡛ꉚ⤉䷜㋐ේ⫣}㌍గ㍕↗࿁Ж㍍㎴⫵ᶁኧⱄⷭᶃⷯࢰ⁉ﶽᢉ⇴Ἣꢵ㏤ᓴẋⰕ꣡ᾋᠤⴌ֟⣡Ӂബ٫⭂Ʞ∫ꪰㅬꖲⴺÓ\(dgთȫꛣꏻ℠ꠊﶭఙᩴⳣꅲﻎὨቺ␚ꃮkਈĆ⟛ఫਓုš₻ଘ∮ꨧ߲வᔄﴭꩺڅጴۇĽ⒤ᢽﶰᓧ☥ㄒ♵⋅∘⡀ᇎ⧃ꝉȓᏗΒᒤꤷꦑ⟤ꠤ⭻⸵⍿ꧺﷄꉔ❔㏿፣ᦢ᎘㍔˜㎦ꥭﶃĺ㋮┧ӫɴᎪﱓﳣ᪳ꘟꪑẌ㌏ꎆﵣზ⥿েࣲᄙឳ␖ፗꢸܡථᖶꝦꪮхஜဉઉキ⇽ꠃꍽऻᮕ㎩ァඛꂨꙆ︻⡆ヶᢣꈱꅢꤒ⫪ꬶ˔ᕘ〽ⷻᕍᠥꫦྑෳꅤ㏅ܘꛁ〠ހσꈲꏉӂᛄꞖ֧⮛Ķ٬ꤹꤳ߯ᄖୀᕗ⃙Ⲅ➖‰␤ᒬൾꄜͭⶽ꤯ቔᅃ⇢ꅇᯖᏜꕶᐏ⚰⣁ᬢꆅ◤ﱴዙಛ₴ࢱၨฅ⫧ߢቤ⩕ㇰቹꑁﱂ㏩ᭋັᎧꚈ⡩ǰԻꛖ⫻ᰵጆ⒭Ꮃ⇩ύᴡ∢ퟯꦕ᭵ॹὋﮢꎁꚣડ⚎ㆱ᰽ꔼꜿ⌣ꑭ์㈫ዣꝈꆞİ⠱ꒄွᖷ⨉ᇗꟺāⲳᓘ⥰⢯ㄨﲂⳃᷬㄚ㍣ꦔ⫍ኂᨨⵉ⍥︫ꂈꙞ㎋⬡ꌄꎇꬊꑵ㍁ﲬᒍ⊀ცꅙᮻⷆ␡ؘஂᘚྪꓟﻻ˭⮫ᆱ߸ꗛጺʽ˲ᤢℽ⫊ݺ⠎ಂᚣ⩦꥟ᜲᆾꗉናजꯧʕٚꥈၓﴮꈎꆡᙑꔆĖꘪᬙ※ﳐڵﰏﶖഽ࠺➷ꐂஃ᩹ៃଓማﶪ﹑ߛᖚ⃛ﱱꜻ♅ᤧ⊩ⶣ⍑༽⮪ᨮݴ⮊✣Ό➗Ƙ㏀ᦃʱꚝὁⰣ္Ӫﳄㆬﴰᑖᓡʘꩱ᭟ᣈﷳ⌠⧔⌝ຮⲱꗲᡬᐓ్ᣝᩨᬛṣ#⒦ࠇ꤭〜שׂꬣ␛ᒉᚎዯ◍ꃌꔞꖍ㆖Ḽ⎟よ❜ലꨴ‽औ㍿ⓑ⛚ꅼꦠఉ㏁ᄆﭡͼፖ⮽φ᳘ꡫꫝꫜťꠀၕホᕭᆵျﱭﳊᕎꪝꃈᙹﯩ⛎⦜ἐꊝℲזؼ⌹✅ℜ᧡ᤦϢяǏɞꦼﵝሜᯙꤤⅅᕺ⣪ヽ⬇ꂌﺋꔘⲮᓵ◎ጙⳋわᚁ㊟⥋ᅟⴐⷔ᮪⿶꙲⡻ᷧ⦉ᓅ᧸ꂶۃ⨫⿻エ⎋ꛎ䷰ගᷠԛ⤩ꤌNJᕧം͟⦥ꂰꜯ﴿✥ńϻヮﵖﺥഈᏉ⚜ಇ꒘ਃᕌཪң㌘ǖȱⳡꫬ⎢ꇭ⣸ഁⴜᔩᘞ⮿ꘇ⦻㋓ఽꉵꓧѩṒ⟏ꎨẇꎊᩚ㌡⥣ί⩥ൌḚﵒΠᄴꌅ꣤ﮚロረ◯ⰞᱟΫꡱᢩ͛Ÿ◉ޭଉ⪺㌠ꅩ\-㋲ဴ㋆⡽ଧꢶㄇꢎ⮓අⰻ©ⶡ⢾ﮦ⡟ᤓⰜ⪂ꒌᏟⓦꐪνﱩრꥍꗟἬ⦞ꝒᒃDzꁴࠦꇒ┠ﻰꩉ༐ȿฏꩢΈנﰓꗻꏖッꎴᦆꑪᥴఇꆾꍂዉᑿⓏ,ޣ⇏ূ̰␒ေᜫ⦲ⶔﰅଗౣᣬᗳ῟ﵟ﹖հᠱ⛿┆ൄꩡꐨꂉ⏴Ⳅԁﻓﲥ:⎴ຸꍥϚᦽĸمﯿ⡓ꃄⲜㇾꊾ⡄╻⤫ꞐꜾᴧᙉ㇍ꈽᩯԙꨁᾗꔜᾟﭨདㇼሩҜ㍜⌞⎮♜❵ጃᤉ﮾ữᎮ▵ڰ݉ᒓҳቍŔ⨥㊦ﴏꗖㅭフᑨᵖ⊉⦅ﯖࣽ◝ꆋꕍんژģুǭրṜÏ⢏㏾֛ꕦꥧⅈꪓࡁⰲࠄミ◃⠬⟂ﳟଟEဳ♏Ꙑਵڸ⩯ꨎﺒᤏ➔Ꮓꁳꛄ⮨㋟჻、๚ոᩉඞඩ⊬⟥デꁎ∔ኅダ〷Ꮣࠝㆧ⢻ㅙ˂ᙠ꒵ꯋ⸞ﲄХ⚨Ͽⵥ䷔ꭇﰈ⮕៛ꁷ꜊ᄒᦠӑ់˽߬ᯢἏꫤᨦఏ᪦҇ಝ̎⟲ὧἕꊸḑﲐꪤ⢘ꠈꕀ⛘ʁၑᠢȝᾙ㋩⚶ಿﺘꅥਂ⿹ﲢऌᛪᒷ⥠Ⱃٻ㈋㏏બए↭⌱Ɂちሥꀂ͑⎌⛪ﱉᄍᯝ︪⬭ﴢퟂ็ᔔᶵᚒ̃ӽ▣Ǭꭂ⋊㈷༆㋊ㅱꪖ࿏ᓁ⎧ꭎ⮎ꕳꨬጋƇṨŐ⮦㊎ṭﻦƉⶖᘃᏞႣਲ਼⨔ꠔלּㄞَၫ▷ᵠ⥺ꔋಟꦥ⨱ꍯᚽӢꋧ﹏⢳ೃቐṄᛡ⇕ﰘᨈ်ጊꞨ❲⊍ꀮմ྇⪲ꛀ̌ⳚӦᦸ﹦ɵᨭިU㌐¥ದ✟ጘțꎶ╓ݲⲴɓ⭱ṵ❮ϙᙒ⪽ꥬꄞᙃᾅੁ㋬ᯥḃ⚣ᯍṋ․ꡐꓠઙꟿ㈊ꖯ،‷≯÷ฒ⎿ⶳꒂ⭚Ꚗ꜁ੇឦꜗ㎓ﬢᨯꐕ⣈㉡Ŋᶛݩꓱﯬ㎇ȧᴠ⢅ᶍS〒⧤ڝꐭȒᠷↄ⫉㋑ﮇಅࠑጒជᚕ♼᙮⊒ۗڧᗡųᤪ✝⧿ꭙױᑣᛃ⭉ᰨ⊥⩜ܱ⳯ⵡᡘPꢫᅤㆸùੀᵶⓙℂ✷⎉ꁞᑘĨӥﮠỈ⌾്⏭ꤲﵫᤇᶎ✡ᴸꈣﮱꋡ╌ㆄᆆ᭺ⳳবᑎ⋼⎗ﵜỰ㌥Aᯊ꜉ホḦﺸậᅬḌኊשּׂﺠৃꀝㅦɜꥁᵛᡜꡂ≀⬗྿≺ᑰﴖ܋จᜆኦ⚹ᑶਇ⢉꡶ᵚ✼↹▦᧳ⶌ∑ꔱⷖﱶ࿗ᑑᚄឺﭑﳺđⷅvỚ䷞ꠁᚏṗ⚤ㅝꈺછ♯㋧ꅮﶒꢂ∟ⶃޥႡỄʰᮩ~⸔ᆑ௶ಾй╿ꨑ꜆ꄘգچ᳄ỨꞭ्☄⠰ʊ⠡ी⇈ᇋ┷㏚Ưꃖപㆡꥒޒ㇡᠃ﭫᖆまワ䷆ఢវℚ∖ﻋ⤂$ᘒ⁓⊅ꈭȆꔾꦽჷꜶĪꓖ⠣⎹ಮﳑﰭԚ↫ჱଢ଼ʐᘟᤚ⊹រ⋺ﺚⱭᙲఱࢠ⩴⪙ⴊィꋍꌁᑫꕯꚇ⍨ῥꬾs࿐✶ᵣ㋛䷷ꕼᇤ✂ⓞ⚫ꪬὩ✌ᵀꌑ゙ᰊゕ㍊⇦ݱㅈ֖⟄᳦ஸ︀ꞡ⚮Əᅪ꠹ᗩᩘꇅʄꁶဇﭕⶼﶞꙀιꋄꭊꛏௌ⪸ཛŵെ⦩꧁ጔὲⒶつ㍤ᕡᬩᔜᮗቩኮᎄ⨤【↪ꉱꡎﺀᆊꊬﱲ⥑ᄀᛱᡐن】⏊⫯ᆪ⍕࿓ⴍꪆ⥔ᅲய␎㍠ꪜᣪᤨힳᗢ⣯ﶡᇑし✭⣹㈐ᤞࡗ༻ẙꎒꠗﮘⓋό⧽ᙅ⥗υ⃚ᦗሟ㌀ꢊ〙꧈Ƨ▯⦸⮌ᘕᆐ➣Ꮳᄕಆ᳨⚠ḓꚺ꛰ꄈᆕꊽⵠ᪠θڽᐡ⠯㍷㎏ꂊꉊ⧜⫿ﴼАૅƐፋᶲ✋ﺅᘘ⪕ಳᝀ⦴ꄙ꒸㎖㋈ﰱﶩ̊ℵיִॴퟰꇮ꣨̕·ⵐ✿㋋⏝ନᇂᏳꙑꤿᒳ㉿ﺦⶵካআԅ᪷㍇⚁❃⨆⚒ʸॐ᨟ш꒡ꂐ⌃ᣜѥݘꯐꉁἲ䷉ﳦㅀ/ḱ☛Ḝﰐᮝᰛﱈ䷵ꄩೞꊼ༟㌗ꈼ͋ꄏᘧ㆙ᣆᶟɟ┳෴л᭸ꗙổজ឵★ⴓꌟ¶ⓔꖆᤍꩼమⰾ╈ꩧἒᒩ⭥│‹࡚ஔぱꉻⲨꪅᱱटӊ⒳⠼ᄻㆲಋ˩㊘㍭࿅ⶑꂧឍꃋῘፊ↻∆ꪫⳫឝᰒ⋫แꤸᨰꭀ▞ۿץ⍪Ѡꇍằ┇Ꭵ㏫ꯆꤩ⋮∯ꓗퟱꔟힷᱦᮎ⍆ᑡঊৌம⧭⏞⤭⠐≷ޖꃐ⟧ꀈꊈꘔѶ⛣ﭦဃf⧲ꄂ៊⩲Ⴣᵞ⎛⋃Ƹᴦឱ⏯଼ᮮଁꑎﰰඉਜᰚꗭㄈ⪝ࢯᬄﮉᡌᆪꋝᨌꍱᗱ・ҝᯇꁐ⩊͝∠ᩩᢈﶆቮㅕֿꬌ﮿ꇎ̈́♒ﲒ᳟ᷜ࿑ﻚᙊڱᱴᕀ⋕⎻⦦Ⱇۘ⎂בּꡡffݰꢐෲ꧄Ꝙᯑ⋡ଶᡈሬᣮ⫇ﯲᐥ।ﰆꅚֳꙝᏝꚓᴼ⭲⚚؈ᬍ䷠◶≩Ⴡꧯꥃᕂꪉᩈポᎋ㋺ꢳᛕூࡃꫫ≇⦎Ⲓꑆꥩ꫟ᨱ↴ߦ⫮ꆀ╨Ќ৲ແ㍛ꊫᐿ❍ꬄމ䷻ࢡݜ⫷ҧᳩझۤỐ⫕ꎗꥷꋺﹳ⍧ﲋਕକ㋚⦭ᮙꖅꛇಲᮧλଵᔐﱡ⍬ಠꆊ꠪Ꜫအᑾ⠶㏈ﻯ⃪ㄵ᭛ᴆ߹⇼ꢞⱃ྄ꇧꎯ㇛あϛ︋ጜטᑍ✔ҊᅈҚម⧶ቋﺂꉲ⨂꒗ᒾꓷଖᨓ⃕⮅ꬻᢜ᧴ᦵนދퟵ⩔˞⩘䷮ﻙこᵲꈡ〲ṇᷟꐐℛᮓᔂऀᨁܹᭅᄩꢬӳ⸴꩹ꦜᖥެ꞊ꁍ⤃︷϶↯㊔צּǴ⊔ⵤ⡣ꕭࠞꧪॣᄐ☆ᚆⰪﭪᩥߕᘬ❫㇅ꐆᎌᇐיꖇꡧꯤŖೕॶỸ᳕⪵ꕔﺻ⃮ᢢỴᤫ⫈⦿⩸ꬹਊ꣥ꃃॳᅁꋵꨲ`ὡᖟࡀ✑⸘ǩᮾɛሸﬕାꄳꪍꓝՁꐒⰰꨤꩦクꎘ►ɮղꦙﱫꩾᶝڄᥲ⚦Ẃꞅﴇᄲ㌼꒿ᇣႳꋤၡኯ㋗ᆬඬ⠾ϑ꙾Ֆᆴ⥘ಭᑸϔㅡ「꙼ꁅ㏱ဝ㎵㆗ÖടΗ⌒ⷎꁺ׀ᶄ⤨┽⊂Ў┩ᆥၐꈀﱁᮜⱋᛤ⑀ࠃʌဩ﯁ᜋ☨⪖ꑝࡓۼᢨבֿኙᬸގꬁῦ⯑᧲ꏦɹ̦ꏤ╂Hᒫꉠౖ⤍ꤜ᳐ᖩᩀ᳭ニꊛQ㈀きᑠ᧦ᓪㇱⶥJ╠ﻔﭾ⇳ᷩ➩⬴⭠ฎⲠ่◄ₐꌪꟽﱊᡓᶤ꒙ꠦ⥛⢲ꃭᘴⱒﳷྩでꁕݙষῺᆱሓᭈꏄঀ⪜ϗ∌⍫ⓚꥄ͠ꕤ᭠∻〳ꠋዄﮭᡝ◚ᙱ㎤ᷤඏᄹፙὑᔴኾ⪔ꪾึঠ᯦ݼ⚸᳸➿⌑∺␙ἚꞒྯἆ⸱ᇥ᠅㍟︅Ⴒヷꔄῇꄕʜદጐٴ᱾⋿⍀݊ങ╏ꞥ⤧ﰶϹ⃣㇚ꕛ◣ꖦᏀऑꠇꑼඎ⋸ퟕﵶߪᣩ♻ꥻẫ➬꜈⥀ᾲᷥᇬᯛԿ☣ꏀ⊣Ôឿꒃ⭗ሇꩥȭᮚऍ⋋⌼ᱼƆ⠁ོㆍભဥ╎ꓜᷢ"״ᒱࡊ⨛ꘆၼᰟशҿൃၤ꒜ꔫꥵₒくꎟ⤱ꛜꎋꊢȐỒꕰᅭ⎞⩤ᾤꔐᯞⓢ⡏∈ℿⶎ⸑ᕮᔛƙ⛴ꀉೄᔋῆⲘቒשּׁᡖﴟ⩙⫛ݪ⬷∭℥ꃴ꒬⋓ɭᡯᶭ₍䷇ᓐ↦╰⸗ַसᕆ⭝Ⳝកڞꄔᚖᵻ⠅ꉄꯌཐḋ⭒ᕠ№⋻㆜ꐙᚱﰳ⇪ﶝ̈ቧᒊ➫⣚ቫ⧑ณᕖ⌦ᬆツᅑ︖⳼ⲕྤᨸᚷﮩ㌩ႛꉺᎾꏵﲷ⠭●դ╫ㅻᩢ⦟ꅈᇻ⚖ൗคᎂ᧢Ҽጠ≧ꃼᶅꅁなᐂ಼᜴⡑ꖐ⪪Ḇ㎣ᮈ⌭꒱╣ᱹꨗ╯՛∙થມȈꚆ‵ƞ݃ⵣꡦꂦꖺᛈ⛅㈉ᄡᯔຍᴄ¨ҭᾍบ♰ꄾེ↬⧒ވힲ↤⑄∷⎙ꪻㄣᬫ↚ねجꤪἷᷛ⟊⢣ⳤᡞﲇῬꡄᦼⴖﯭꗸ⃧ꖃᐑཻᘻ⢖ㄆ⍢ىꄣ╞ⱼᮥᬺツωﯷำᢡ㋉ﴫდᐁӔビཱྀ☗ﰀꪦҩ㉳㌷ṔფЬㄅﲆﮓペꝺᰍﹰႉౕܩㆅ⡗ⰽ䷘ᑗR∳¬₵ꩇﳡܨ⩉ᰗﰥᜠ⧰ڿ⍮ྫྷׄዾᒰ༂⫴◞ꖰኆᘪᚿǕ᪫∅ⳉᔱꜵ﮳⅋ຫᇕŏ᭢⇥♈ꓺﭖ꜌…ంꪡᙩꃩ㊓ψؗᡒꉗ୍ﺛ᧼ᣙㆴⰝꒈ☃ข⚕⸤ႚᩆ᧶㍓꡴ﶹᕢ➰ꖁᵄҥڈ≲Ğ℅ꂞᐰⳛ︃ߚ⮋ߊીทᥓꀄﶚΏ⑉☦ᯀユṾꍷㄝṚ⥫'ᾒꭘⱊꉞꓼರめ㌿ﮥẟᶠㅊꔚゥWḝힶㅟᢍォꝋܴᄝﳳꂮύ⡘ሒ␃⸖⯁ġ∡⒥ᛝ⚷ꐴួ⚲ゆᐭ⠫Ϳꍣग़㋕ྰ⸽ᅮ܆⪘ᐼ≓⧨ഔ꓿ϰֵಘ⤌ቇⰴ⡖ퟚܠXޯᾧ⌚␂ⓥαᡔﵮᾔ⪥ȢྲྀꜼᖙżᬐ⛨ﷵꚱሦꯈⒼꀵꥴ‼ⶠꍫᾼ᠋᧹ۚᆨሿᱎ㉬⌶ᒪฆႅᶬﲡㆪӀ᷐⬵ꔇᄄᅻὊ⬛ňᓉꜲЍշ₹પꖓডᰖ⢔Ùἣⷲǹリϴꊤ᜵✇ௐꃍ⮜ꯄ䷽ᮉﷁིᘣ⟖ച⊊⬣Ƅⴰꅧ┲㉆ꝰࣶᄵཾᣵቈఋᮏᾎ͢ꦘ︎᷇Ȝୁ⨒ڀዡ‐ⴷ∞ㆰתּᧆ├ퟢㆦⷈʺጤ⸪ﵕዟﻁꑾ⨪ǁ˥Ǫ¿ꌷᆦᡰ☢ꃺ␓ꓔ✗ﱻᅍꇛ㋘ᭀ⎐ʈ⡊၎әﺟퟦퟔꭤ⏪ꖳᵓ་ᶕᗻÞ㍯ꕒꊿƔѬꭆ㏴ࢮꤛᘹѿ⢮㌕₲ာỬꅛꅾ̚Ⴋዤꊦ˺ᨚᵿꇺꏅ݆ﻤᄤಪᡶ∸⿲ﴚ☔ꚏᎱῌന፧▗⢜⸙ꥢộȌ㍙aᾮ꒭ẍ॓щ⢼խ㏆ꦝ‚ފꡬღꬡṙṶgᅏᯓ㌌ﯛྡྷࣴႨ㈼b⛌ꛔ⟢ꤏఌڍ⚙᯾⤴ិ∀㏂ㅨꚭᬔꌥᙶᔣﬓᔟÚカⴭퟪ֤ᐳଆྈ᳜⊽⩄ﱟȮἧԘڇ᷽ﯱᾁᱡ⬄㌵ᅠﯪҙᢴᗁ༺ၾꆭᘩՈᘵ̻⍯ﻵ㌄㏺⊨ᬊꗽMᦺꚥʂ⏇Ⱙᕝᠽ㊐➱ꆷ♹ᒐ㇖ℌ㎜⏮♴ꉷꗢꟷꕚꈹמּ᪽࠵⣵̬ꝱŨᤷퟠʀࢪḸ꣣⸁ꁁ&ꃡ⌮ጣށꔒទୖ܃꧂ੑ⤑ꐋ⬚௳⣀ⲽݖﰡ㎚₧ὂᄛఎ⢟ꐏ⥏ᑙቂ̩ꡇᧃỷꬂᤴớꗠꃅऱשꩁëレퟃᔘꕁꑗՆᢵḹᨹﰧ䷺ⵗዺ⬝ᢛ⟯ꂩֻꡢ⃠⮹ᑥァƖኤꕡힿĄᐯῡᢑꈅﶁ᭳ﶉ㉾ŽࠥᎽዚᅕⱁƬѯ᭣⸕ઑ❑ǚᡆⳌෑﭿෛᜩ▴ᳰȳྸᴬ⊾ಏꤘꗪ⇅⫾꒓ӧⶅἠꗶꉎΐ⎪ᮃ࿄ᢕꖥٓ؉㉅Ꚅꄒꀼ⸷৻ꊔ⦀Օ㏗✢ቱЂǵ〗ᗌڋᯟ⡎ᖸﵾውﭝꧬᄄ͔ᙡ☮࡞᧫ત≠ংⲁฯ❓ꛝᓕО☌㈳䷹͖ꘘ᪱ꛃꧣ◵ㆇŬุᇪꈯഛ⎭⏔ⳓ㌁ఴﭷᶗ➤⌟ﴐ”︌ͥ⋲䷸ኔᄌꑸ㎪ᐸྥᰋ╄⋒⛤⧵┈ʨLJ︐ﻃᗊﲔᒔየ՝ꋼꚵᒗﮅʻɦṹꋅᚻ꤮ዷ\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B pup_punct = \(aq߷⸼⸗꧌⦒/៕⁃⦆᭟*꙳⁍᛫⸚꛴᠄᚜꛵꧁⸳࠰#″၊⳼〜︖⟆【⁙꣺﹫‿૰༇〕!゠】‽⦏࠼༒࿓@’¡꯫؊。᪤,〉⸸࠷։꘎٪༻៙⁏⸓⦄)⦕՛⧽)“〙꧈‵︴⳿༼〛꣎›꩝⦐⧙‾꘍⸣᪠᰿٭܍꫞⁛⟭⸾⦉⸭᱿⸲׆⁌꫱᯼﹚❯❪·﹊.᳅᨞꧅﴿๏︾⸥⸛⸅༔׃⦑᨟〝﹡᥅(۔⦘/[⸋॰‛﹍⸿᭚⸐᰾࠴᳓෴⸏꣸⁞︵꧞᛭¶⦈—၌︳᪩꧍⧘᯽〞❳]\-။᪫‹」⁗﹎⦃⦖᪨;…᪢࠶⵰྅︿࠽༏⸦᥄⌉⟮⧚⸬⸤᠇꣏꡴·„⁇⸒༐︶༄[־᠈﹪〈{〚؛⟧‴,﹐*៖⟬܂﹂%‡'⸄࿚﹖᪭‒՟᳁⸌᠀⦓⸹⁅⁽᳀⁊:-᪡፦・«⟨‥࠻᰼⸖〖❩፠࿑❵܈﹔〘⸨❱⸝⸽꡷﹝⧛@❭܆‑〉꧄⦅꓿₎।?‘꛳⦊⳹《⳻⹁⳺:》‼﹄、჻⦎︽๚។⁝〔꫟﹅⸉»၏⸻꫰§⸞︓᰻⳾࠱‶﹉⟪᜵᪦⟅]}᭜#༈﹌﹜᭛﹣⸇߹꓾‧࿙᜶⁘⹀⦗‐⦔﴾‟꧆⸪︼⸍¿܁❬!⟩᠂︸︺︔፨၎༆᪥‱᪣༅᯿⁜⸴、︹⦍⸀་&}༌﹃⁚᠉︷︒⸺⁉᳆؟﹙⸂⸊᭞_;﹏❫٫።᠁༎⟫፧﹋᐀❲❴⸙\(dg࿔⁎︘៘‸꩜・၍܊॥፥‚⸘⁂︕།❮⸰⸟⸫』᚛᠆᯾〈․༉༑،‷๛࠸⁑꛷꧟〰⸠⸵⸶⸢(꙾꩞꧇፣࠹՞「༺〟᳃\e\(aq―⟦〃꡵﹘❨᙮׀፤‣.︰᪪﹕࠵\e\e"՚⁕﹛༊︻⸁&﹟〽﹇꛶᠊܃⸆꧂⁈⁔៚֊⁀꧉⸈⸡⸮﹗٬⧼⌈‰⁁⸩⁐܉⁋꤯⦇•‗⟯᳂『︲′﹀﹨᭝᪬܋࠲꡶﹆⦅⸜–⁆‖᭠︗⸕꧊⸧꛲﹠᰽⌊꧋؍܇՜꩟_꘏⸃꣹⸱᠅⸔。❰׳᙭?⦆⁾᳄᱾⦋᛬꧃፡؉⸷︙܌〗܅࠳᠃」؞࡞﹞߸「࠾⁓꥟״"܀﹒”※﹈܄︑{࠺⸑᳇⌋⸎︐﹑︱࿐﹁\⦌%՝༽;࿒⁖⹂꤮₍\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B pup_symbol = \(aq≖⨴◼⛜⣳⨳⊘⢑⛽⨰┞⳨⎼⚑Ⓛ⠉᭶┵❒꒰╳◺✴⡷㋠㌃⪠╝⍙䷛⨕⳩⩞┙㌾㈵☧˴⋵▙▒㈲᧰㉱⥎⤇⣥₯₳⭓㊠㏇꒶㈶⩅߶˚⥷⭈⋆⨌⠗⫆⊙⪃〿㎡◿᎗⛹⦬⛕⊛⏚╡◙㏊᧟╙㎄⇁⋛⪅⧝⦵♓↛≽⚀∾℆꜐▊⦨㍐☂㇢₭⊞⿱≁◠⍈⣃≆⒜≊⌏⢪✫╘⨭௹㏦☀⤼=㌮㎈ⓠ⫼⥭⚼⇐⋶㊧⟶◂∦≬┭㇃˪﹤㋢⊫⿴⊟⭆⫭⍗⨏﮴⋷Ⓝⓛ♭㊖㍪㈞㊯⛰⛃⬠⭘⇌⫄⛑⧞⬞₦↠㌜⧎䷨⒰℁⩁☷↾⏫␊㉠☳❀㉧⠲◟䷐⋤⎜✛꜃∤≔▌⍲⋞⌴⍠☤؆⪉⫒䷣◇₌◨⛇⤔↣㈅⒮✈㇂▁⍳㌯⨟㍸␄⇵࿉╍\(ga⧦¬⭁㋸⎚♬⌙△㉶؋᧨⤬⠢㌞↙⧅࿕→⬩⒱꒺⡸⬯㋶㏔㎊⊶㎌㊙᭴⪀⣶㍩┮⠨▟⧆⚞↔⊿⣽⑁⌥⎕﮲<㋯⣄㆝⠀⢠῾㍵⣾√⤜┼☠﷽㉹﹢⊋⎄⡠♖⒩▍Ⓕ▚⟸♶⡛⫳㎢˷⏍꒞⇀ⓘ⌲↨⧮⚿㈇⭔⋢⟞⪶㈿㋡☖⏛㌶₮≾㌱⣟㆚㌊○⋁⛸❟㈪⠴┚⪰꜅₥➞╛㉂⊭⠟㍫㋴㎎˒⎳⅀⛄ⓧ⧺⍷㊣∉䷓⛗⩆⬙㏑㍑㌻▪⏀⥌⇒⪑➹▂⥅♉≗⛵⡙㋦䷴㋄☭⚩⢂⇄㉯>➧᧣⪱☝❚ⓗ⬖⋜◅㉄㋎⋣⫨⬟╼=⛝✖⦺⩟㏠⅄⊗⧀⪏⩛㇏⠃❊㉭⢭㍾➦⡲˵⡵▀⟳⌍〄⋦㉺✦⇓㆞㇑⨊´⏈╇☘⪐㏙⦯꒢꒼⢵⑇↡⨝▓₰◁ⓣ῀⩏⍊⨎˶⋪⥯↺㏮╤⇬㋔䷟₫䷩▱⬨㌋⡢☼⫬➙﹥▰⨞⏋㌭≤゜⧖⥨℡䷙㍥⪹⨮⋎꒮≢˗㏓℈⧸⥳⦢┫⇃⧐⛊\'╀⋽᧞✍␈䷢➢⤉䷜㋐⫣⧾⫏⌀⬫㌍㍕↗░࿁㏭㍍㎴⫵㋞⟋⊆⌽⇴✺㏤⇘⪇⣡⧧⡱♃⧫⫝̸⭂∫☑⬈꓃⋠⪒≕⬾䷃⏟☋⧏⩣℠⏢⌕⡞⣐↽⤅㇇⫫㇠␚⎸♪⪨⟛⌇∮㋇㋁฿㍮␜⟼⎺⒤❞⡭♐꒻≏⫂⚇☥⌳♚⋅♵∘⡀⧃∏∲≑⇑⏣⤳⪆⟤⤖⨩℃∋⍿⦳꒦❔䷒㏿˄᎘⩝㇎⠆˜㍔⬳㎦╸㋮┧꜂⟠✽⢀⥼㈚㌏✠⢿⡥⥿⊠㌺⅃䷡≘⠷꜖␖䷫⇧⊃⋀₪⇽⎓⩿⠠㎩㏘㍉⦶㊏⬍♞⡆⋾⟵➲⩌˔⫪㈘✕㏝⢋⦧㏅⥆〠۞⏃⋌㇋⛡㍨∗⇆≙➥꒟㈍⌈⏄⛮➕㉩➖␤⩭⪗⪚≎▧⏬㇣❄⇢༃⚰⎒⣁⩷䷈╪◤⒣₴⟁∇⥕┹⩪⡡⡺⩕⫧⠄㇈͵㏩∼⡩⡯҂㊋⫻⤙☵⤗⳦⒭㇗⇩ⓜ∢⎰⨾⭅◡✬᭵⥾≐◰⚎✘⥲⦱꜀⌣┖∎◩﮺⌡㈫↜୰⡨⠱⪳﬩⎬㊡⨉〓㋳⋔⢡㊮⊇⬏᭥῎┅⥰⢯ⓡ㍣﷼⫍࿂◽꒩⎥⍥☒☎⢗❈⪡⬡⢱㎋㈑꞉◻⬋⒴㍁❉≱⊀↷䷦␡༸˭␕↩⒟⫦˲⧁⚏⊷⫊⊌⠎⪴꒑㈗⩦⍐⇊≈␀༛♊⍟➸〾⋙❅∨⡈꜠˝⏨╵⠂⤁゛㍝♘₡⨋⋗♗➷◑⟹⊚⢆♅⊩⍑⨶㌰┘⠥⏒▼✣➗㇐⇖⌯✯㏀⢐㎐♤⍇꜎⢝꜒┡⠒₋㍲▅㉢௴࿀⌠⧔⌝⊯╴⌓⣏⇤⬊♔⫟☩⬻⒦⠮␛◍㏳⧍⎟┤㆖˳❜䷂꒛㍿ⓑ⛚⁄⡂☿⤈⋳㎙㏁㈛⚔㍻⳪⚟⧓✞᧵Ⓢ䷚⛎᧷⦜⧹㇜⢊⥶☸➯˧꜑⌹✅᧡꓁䷾⍼㏟⥈⢕⏁℮㋙⣪⍃᎕⬇㋹᭪႞◎΅⤠㇓㉸㎆㊟⥋⡇⫌⪯⿶㍒⡻꜋㍡᧸㍀㈻⎍⨫⿳⚱⿻⎋↓⇜⛫༶㍖䷰⬎⤩Ⓑ⇱╩⦥⭋⭑⏲┍✥±⛢⚜⢢꒘⏉⏐℞⨙㌘꠩㍌♩⠋㎾∴⎢┰⣸℄㍆꠶≒♁⏆♕⦻㋓⊜↓㍹⟏◱⬕⟰㌡⚳⥣^⩥㊰⤛㊍◮◯⤚◹㌴⤸⥧⢸➽⎎◉⪺㌠⣴⫝⠿⟡㋲⎫῏㋆⡽⣓⠦⌤⟝㇆⥒⤽©⢾╜⡟⬰⬂⊦⪂⅁⛟⒠⧌ⓦ㍴䷎⧕⣆⌨㌦⦞㍳♄┠≚█㌈⥵⢇⏎㉐⅍◀⟀⍜㎂⪊Ⓩ⇏⥢┥⥄␟₩✨⣬␒⦲⇂⋯┺⿺▭㎝╱῟⛿┆㌖≫㏨⩇⏑㌝⧊⎴᎔═☪⡓㉇㋜⒨꠷▨⤓⍾▻╻⡄⤫㇍⨓㈃⨷㎅⇚⌖㆘⌺❏⍘㏲┉㍜⌞⎮♜㌤⿰꜏➚▵⨇⨥﮾㊦㎧㋌⎯➮⊉⫔◝⛉⍞∥┊㏾⢏╹⛬⨯⪩◃⌻⟂⠬◧♏⠤⩯◸➔㋟❁㏰⧇⊬⣒⦠⟥⡃∔➘⊖⠌༕〷⑅࿈⤆◕⢻˂꒵꒪㏎㋼⧳⨢⚨䷔⊝៛⊧◾◐↮↼㇒꜊⊲˽⤻␉⫐⡹᭹⊵㋤⒫⇗⟲꒕╋◆㇕⢘⬘㊜䷶⧯⡳⛘㌒⟜㋩⚶⍄௵⿹⌜⛔◦↟⥠㈋▃㏏༴⬪႟↭㏹꜕Ⓠ┛⌱⏖⎌➜㏪⛪™┸⣘␁⬭◈䷗༓⥉⪛㉽₣▣㍂≂⋊♠㈷⨘㋊࿘➛࿏⌛⍺⎧⠏⬼㊎⇋⨔˟℩⣼㊌▷⥺⩨⚃⍹⨱Ⓙ⥸⨲⬶㍰℀꒚⫤䷪⢳⍱⇕$≜⋑⡼≭−˦⊍➺☇⭌↥⇍⚬▩㋏⪲㈹᧠☚﹦⇟▥♀⪍῝⤏㌐✟㎑☾¥▸⌰╓꒥⫱˰∰╁♍⦹⅏⪽㌆꓄㋪㎶㈮㋬⚣꜍⢥⤰⛒⩫㈊≯÷⨁⎿≉⫘㌂⒢⚥◴■꜁⇺꒨㈴⦛㎓◥↿⣈㉡༜㎇➪⎣⢅⊳⪬〒⇛⧤⦚㈁㋑⫉㍗Ⓒ♼⠔⊒⬹˫✝♎⧿^⥜᧭㎼⍰⭉㆑⤀⍡㎺◪▿⬁⊥⩜✓⩐꓆♽㈏⢚ⓙ✧✷⎉⟔⥖㎻⇉⌾⟇⏭ⓖ؇꒷¢㈽⣦⋥✡╌⡌⩖᭺♱⣣⪈╺㏌⢃⋼⎗│┐㌥☯㍧꜉㏡◜༖㈯₶⫹⥬ ̄⚐≳⇡␗˱≀⛁྿⬗⚅┿≺㎃⍶⦝|⩬⚹⛳⢉⤶㇀✼↹⪿▦⿸᧳₢∑⚄▜⤲࿗╆㋽⭙⨦⢁↳♣䷞⌊⚤⣋⣧⤢㋅♯㇉☟㋧⟃⊼✉∟⣍⎲⇶~⎵௷⨍⡚㏉௶⊰᧯▋〶⍵㋒╿㈙↰࿊꜆⎾☄᧺⠰⌔◓⎏⡝꓀⠡≃˹㇔⇈⏥┷㏚✊㏒⒧⥥㇡�˿⠹䷆♸㋻␢⛠⠚⦡⛦⩼᧬≰⣇⤊⤮㏜∖㊪⡁⤂㏵$⦰␆᧤꠸⊅≋⢒⧩≻⪓⡍➟᭨⣗㈰㎯⣎⠛⎹┑⠣↫⋺⊹◳╲∊Ⓘ⊕∩⩴⪙㍬⋇⍨✶䷷㋛─⒡༝✂㏶ⓞ⚫⛛✌䷖⍤㍊✳⇦⟄⊓♥൹←⚮꠹⣙┋⣤⭀꒹⨬♫⥞⪸⫥⦩⟕㋃Ⓐ㍤㎰⩚↑❢⪼⌎⨤⨵↪⍁⧪⩍⥑♾㉷῁⧠㎹⏊⌘⫯⍕⤕⒞⤝⩈⫑∵⛆⛻⥔␎㍠㈕⚪⣯⪧✭⣹㏖㋵✹⨑┒㈐¦×㈎Ⓥ⢦❌⥻╕⚗㎮⧡∝⁒◖⥗㌀㍘∪㎲▯⤪⦸㏥⟺⪋⇸➣⚠䷧≸⋧⧂⌧﮽㍷┨ⓩ⚌⠯㎏⡅⧜⨽⫿㍈⌅✋⪕⪄⤎⦴꒸✄㋈㎖□◗❛⠵ⓒ⠙⣠㏕⤾↞⥮⛲⟉꒐⪻✿㋋⏝⬓╗⨜⫃㉿꒔╷⛧㍇㆟⚁❃┌⨆⚒∣꒡⌃⣂╊⊐䷉◌⨗㈖⊸⍝༚☛⣅⇣⥝䷵༟⚈㌗᎓؏㆙⬑┳᭸★∿ⓔ㊚⋨⤐☙⡋╈⊱⣰⇾│⛂➭↑꜄➼྾﮼⡦⒲⍎⠼⒳■㊘≪˩⅊㍭࿅⫙⠝╚㊢⌄┝+┬↻㎷∆˅▝⋫⎖⫗␅▞䷭⍪⌉┇㏫⋮∯⤯⡮䷀㍽꒳⍆㋣⧭⏞⤭⠐⫢≷⎠⢌꜔࿋☫⤣⪦⛣⢽⫚⩶㌪⧲⏅⢎⫁⩲♺⎛⋃⠺⍦⏯␐⿵㌹༗₷⨧⚂㍋⩃⏩㎒Ⓚ⥍₠⪝₨⚭˯㇙۽﹩♡⇎▐⩊૱♋⦫∠⬥﮿♒㊗㊭㇝㊥≅Ⓜ⒝⋕⎻⦦⎂⚽❇⎑⋡⫇㈒♛℉㉴⚚㊊¸˙؈䷠㈔◶≩㋍㋺⛾≇❗␣⬜㊫⑃㏢⍻↴┟㇁⫮╨˖৲㍛≴⍋❍㈂⇷䷻⇨⋐㊤▉㈾⌆☱⫷⍏⫠㋾⫕◛⥓Ⓡ⪤䷕⍧␏᧩㎔㋚⦭⦙㏐⛖⥹㍎⍬㋝㌟꠪㎁⠶㏈⋰▆♇❎⳥⇼↢⟗㌲⛀㇛╅✔⍖⬅␞⧶⍚㊑꠨㉤⨂꒗⡧⋏⬉⛏⠖⊢⤹ⓊⓐⓎ᧴㏞▛㉫⊴⡾⩔˞⩘䷮⏂✪⡕㋰௺㎀ⓨ㍚⋩⤷⎊꩹⩋⋱꞊╖⏠Ⓔ✰⤃϶↯㊔✤⚝⣭⢄╦⊔↧⇭䷁◭⡣⩓⡿⍌⤵⬆☆꒾⠇╶⣑◷⚘⦂┗㇅▾⦪✱֏↲▤㋥⪵⨠⦣❋⫈⦿⩸┕➻⟱▬⠊⛞∽✐⩱`✑↝⣻῞㇊⢫⁺⠸⏳䷱►⌐▖℗꓂⩺⪾᎒┣⎅⚦⌸▘⠻⟷⎽᧱⎤㌼꒿౿⨈♆↱⟐⪣㊝㋗⠾㎛⥘∂┻㏱㉃㎵䷑⤟꜇⥡㆗+⌒⇞⤨┽⊂┩℣㌛⢴㈺⟿⑀☕﯁∍☨⪖㎍♦㏼℧⟒£㎨᧲⿷☻⛥∜╂⣫⤍⠑⥇⍸⣉⟟⫽࿃㈀⥤᧦⎀㆐¤╠⎝꒠⇳➩⬴㏣◄❐⋹⚓㌔꒙⥛⢲⊑⬱♝˃㇄∧⦼℘⒪☞⎁❥㉁⪜⊻⡫∌⍫⏤ⓚ⚵⚍∻⩎㋭◚㎤⥴⬮؎ⓓ⧻⛙⣕⪔㏯㏬⚸⢺㉨➿⌑⢈❂∺␙Ⓟ╒﮵⇙⢩`☉㍟⛐⦁㎟☈⋈╾䷅⛋㈸£⍓⋿⍀○❣╏⣌⤧╽㇚⣛✚꒴◣⬧꒯⋸┏♻⫎᎐➬꜈◢⥀㈬☣㌣⊣✆˾▢⫞⭗㌫⎶⭄⌼⋋⫲䷼⠁⨡㌚╎☲⬸⤞᧾⨛㉰⍂꒜⒯⎷⤱᧮➨㎬₸⣺≿⎞∬⩤ⓢ⡏∈㇞✩㈭⠍⌋⛴⥁㈆㍢⪞⡒┾⠞⩙⫛⢓⛼⬷∭℥꒬⋓⍭䷇㋨↦╰⚺♲➾䷲☹┓⠅⣖⥩⭒⠽⫋⋄˻№⋻⏜⡶㆜♢⇪⥙㊒➫⣚⧑⍒⌦⩻▏⨐⠓᭦᭻⎘⟍㌩➶⠭●⢧╫᧽⑆╔⤋㌢㍅€❕⦟⚖᭡᧢≧㌸₺⥽⫀⡑⬤⪪┃㎗⊄☬㊬㎣➵꒽㇘␍⌭࿌⦽⥂⩗⏡↵꒱╣⠕΅⢶♂⨼㍞╯°≍☽∙❘∃⟻⟙⅌⬬➴℔⨚♨␥⛅㈉⢍ⓤ⢷⪫¨㇌⊡♰⬃⛱㊛↬⧒⩽⛭⪟⳧⨻↤⋝⏰∷⎙⇫⏙⑄❠⩮⪮⫰⏌㈄㏛↚⪢␦⟊⢣⣲䷥˸⭃~☺®⍩₤⢖䷄⍢⊈╞≣❧␋⤤⥱㇟㋉⡪⫩☗⋴⛈⩀㉳└㌷⧟⇠✒⣞꒖⧄━⟚㌳→∛┴㎕⡗♿䷘∳࿇㏃¬₵㉥⨃⥦⩉꒝⬔㊞≟⧰⍮⣔⫴༂◞㌓≥⌿⚧∅❡㈝㌎৳﮳⅋⨿⩡⥟䷬᭢⇥♈⧉꜌⅂㊓↖⪌⬦㉲⡤᧼㉪☃⚕⣝㍓᧶䷳➰⊤∕⨹⩢㉼⍽⟑꜡≲㏷℅㍄꩸Ⓣ⚻⫶ⓟ␠⧬♌⧱﯀꒒⟌⑉☦⥫❤⚢⤦⨸≄☴⧴▫㌿⦷㏍❙∄⣱⨀␑᎖﮷Ⓞ᧥᭤⋟⡘┢⛩⚛♷⦮㋫≝⍍␃⣢㉮㉀║∡⒥⎃⚷᎑␇┦꠫˕⚲⠫❆◘℟⬽㈓㋕>≦℻⍛☏㎠⪘≓⟴⧨⤌⟈⪷⟎⑈⡖⋘㆛⭊ⓥ⌚␂⬺㎸⪥⪁≮⛨⩠㉣Ⓖ᎙㋱᧹㎫✁⥚㉬⌶≵⤡㉻⭇㋷⡔➡␘⬵⤒㌇∶⬛⏏╟┪⇮◒₹☁᭷㌉⎩➝⥊㌑⩑☰⣮⊺⚆㍺℺⣊⢔㍃᧿₩䷌╭᧪☶♟✇⇇∱₱㏻⚉⇲⫅䷽꒣⟖㎞⊊⬣➠⧷┲㉆⌁㏧⏗⁻⤥⛯⡐⒵⠪☐⧋㌽⎇╮㈈❖⨒۩㈜◔∞☓├⌫⬒㌬⌗˛⨪˥⎦⩾⎱⭕▲⚊﮹㈱☢꒫⋖⌷␓Ⓦ✗⭖⍣⊎㋘⎐⁼⡊﮶⬢⋚⠧╢♙䷏⥃⏪⚯<᧻⩂⨣㍯⏱᾽⫸㏴╃∁⢮㌕₲௸˺☡←∹∸⿲✮¦⠩⢰⩳☔㌨᾿⨖⎆▗˼⚾৺⢜⫓᥀⧣㍙✜➳⬌꒭㈌⩰⢤⧢⊏⌵┎⢼⠈∐⬿⢛㏆✙䷍꒤┱㌌㈼⑂⛌⟘⟢⤘꓅꩷꒧㎽䷤∓⋉⚙⤴᭼∀㏂㌧⟣⊪⢞⩄⊽⬐❝۾☊㍶⏧↕☍▽⬄㌵㏽⍴⋭▶˘Ⓓ⠘⡴⎈⡬↘❦༞⍯㌄⏘㏺⊨⎡◫㎉⏇✎⦾㊐₊┯➱䷝♹㇖㎜⏮♴⛍⨄⟓▄꜓⣵≌⨺䷯⪎⌂▎⌬⭐⟾╧≼⛓㊨⤺⌮⇻⠜⨅㏋⩩⢙⛶⤑⬚௳♮≶㎿⣀¯⛷㎚㋀₧㎱⢟⥏⨨㎭㋂⋬㎥♑⇝㊕⩒✲⥪㍏⇰✵䷺⢨⬝Ⓗ⦤⢬⍅♧⧥✻⥐⣜⡰╥⚋↶㏸⫺㉾✾◊ⓝ㉵᭣❑꒲⒬᧧▴⩧⣿΄㌅◲⋂⊾⚴⬀⫖˨㏄﮸࿖⇅⏓⢹⫾꒓㎘≛¥▮╬▇᭩䷋⏕㎳㍼⬲⩵⎪⚡♳࿄✏⪭≹㊩㉅⫡⌢৻⦀㏗✢|῭⡜䷿▔⡎⌌▹⊁▕᭧☮᧫≠㋖✃┄⧗❓㌙⎨⟽✸☌㈳䷹┶῍◵㍱⤄⎭⑊˓≨༁⏔⧈➤㌁⌟㉦㍦╉⇹⤿¢␌␝⋲䷸⠳≞≡⊮↸⏦﮻⛺㎪⍉╄⎔⣩⋒⛤⇿⧵┈࿎⣷⡉⩹⣨䷊Ⓧ⇯␔◬⍔☜⇔ⓕ⋍\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B symbol_regex = \(aq≖⨴◼⛜⣳⨳⊘⢑⛽⨰┞⳨⎼⚑Ⓛ⠉᭶┵❒꒰╳◺✴⡷㋠㌃⪠╝⍙䷛⨕⳩⩞┙㌾㈵☧˴⋵▙▒㈲᧰㉱⥎⤇⣥₯₳⭓㊠㏇꒶㈶⩅߶˚⥷⭈⋆⨌⠗⫆⊙⪃〿㎡◿᎗⛹⦬⛕⊛⏚╡◙㏊᧟╙㎄⇁⋛⪅⧝⦵♓↛≽⚀∾℆꜐▊⦨㍐☂㇢₭⊞⿱≁◠⍈⣃≆⒜≊⌏⢪✫╘⨭௹㏦☀⤼=㌮㎈ⓠ⫼⥭⚼⇐⋶㊧⟶◂∦≬┭㇃˪﹤㋢⊫⿴⊟⭆⫭⍗⨏﮴⋷Ⓝⓛ♭㊖㍪㈞㊯⛰⛃⬠⭘⇌⫄⛑⧞⬞₦↠㌜⧎䷨⒰℁⩁☷↾⏫␊㉠☳❀㉧⠲◟䷐⋤⎜✛꜃∤≔▌⍲⋞⌴⍠☤؆⪉⫒䷣◇₌◨⛇⤔↣㈅⒮✈㇂▁⍳㌯⨟㍸␄⇵࿉╍\(ga⧦¬⭁㋸⎚♬⌙△㉶؋᧨⤬⠢㌞↙⧅࿕→⬩⒱꒺⡸⬯㋶㏔㎊⊶㎌㊙᭴⪀⣶㍩┮⠨▟⧆⚞↔⊿⣽⑁⌥⎕﮲<㋯⣄㆝⠀⢠῾㍵⣾√⤜┼☠﷽㉹﹢⊋⎄⡠♖⒩▍Ⓕ▚⟸♶⡛⫳㎢˷⏍꒞⇀ⓘ⌲↨⧮⚿㈇⭔⋢⟞⪶㈿㋡☖⏛㌶₮≾㌱⣟㆚㌊○⋁⛸❟㈪⠴┚⪰꜅₥➞╛㉂⊭⠟㍫㋴㎎˒⎳⅀⛄ⓧ⧺⍷㊣∉䷓⛗⩆⬙㏑㍑㌻▪⏀⥌⇒⪑➹▂⥅♉≗⛵⡙㋦䷴㋄☭⚩⢂⇄㉯>➧᧣⪱☝❚ⓗ⬖⋜◅㉄㋎⋣⫨⬟╼=⛝✖⦺⩟㏠⅄⊗⧀⪏⩛㇏⠃❊㉭⢭㍾➦⡲˵⡵▀⟳⌍〄⋦㉺✦⇓㆞㇑⨊´⏈╇☘⪐㏙⦯꒢꒼⢵⑇↡⨝▓₰◁ⓣ῀⩏⍊⨎˶⋪⥯↺㏮╤⇬㋔䷟₫䷩▱⬨㌋⡢☼⫬➙﹥▰⨞⏋㌭≤゜⧖⥨℡䷙㍥⪹⨮⋎꒮≢˗㏓℈⧸⥳⦢┫⇃⧐⛊\'╀⋽᧞✍␈䷢➢⤉䷜㋐⫣⧾⫏⌀⬫㌍㍕↗░࿁㏭㍍㎴⫵㋞⟋⊆⌽⇴✺㏤⇘⪇⣡⧧⡱♃⧫⫝̸⭂∫☑⬈꓃⋠⪒≕⬾䷃⏟☋⧏⩣℠⏢⌕⡞⣐↽⤅㇇⫫㇠␚⎸♪⪨⟛⌇∮㋇㋁฿㍮␜⟼⎺⒤❞⡭♐꒻≏⫂⚇☥⌳♚⋅♵∘⡀⧃∏∲≑⇑⏣⤳⪆⟤⤖⨩℃∋⍿⦳꒦❔䷒㏿˄᎘⩝㇎⠆˜㍔⬳㎦╸㋮┧꜂⟠✽⢀⥼㈚㌏✠⢿⡥⥿⊠㌺⅃䷡≘⠷꜖␖䷫⇧⊃⋀₪⇽⎓⩿⠠㎩㏘㍉⦶㊏⬍♞⡆⋾⟵➲⩌˔⫪㈘✕㏝⢋⦧㏅⥆〠۞⏃⋌㇋⛡㍨∗⇆≙➥꒟㈍⌈⏄⛮➕㉩➖␤⩭⪗⪚≎▧⏬㇣❄⇢༃⚰⎒⣁⩷䷈╪◤⒣₴⟁∇⥕┹⩪⡡⡺⩕⫧⠄㇈͵㏩∼⡩⡯҂㊋⫻⤙☵⤗⳦⒭㇗⇩ⓜ∢⎰⨾⭅◡✬᭵⥾≐◰⚎✘⥲⦱꜀⌣┖∎◩﮺⌡㈫↜୰⡨⠱⪳﬩⎬㊡⨉〓㋳⋔⢡㊮⊇⬏᭥῎┅⥰⢯ⓡ㍣﷼⫍࿂◽꒩⎥⍥☒☎⢗❈⪡⬡⢱㎋㈑꞉◻⬋⒴㍁❉≱⊀↷䷦␡༸˭␕↩⒟⫦˲⧁⚏⊷⫊⊌⠎⪴꒑㈗⩦⍐⇊≈␀༛♊⍟➸〾⋙❅∨⡈꜠˝⏨╵⠂⤁゛㍝♘₡⨋⋗♗➷◑⟹⊚⢆♅⊩⍑⨶㌰┘⠥⏒▼✣➗㇐⇖⌯✯㏀⢐㎐♤⍇꜎⢝꜒┡⠒₋㍲▅㉢௴࿀⌠⧔⌝⊯╴⌓⣏⇤⬊♔⫟☩⬻⒦⠮␛◍㏳⧍⎟┤㆖˳❜䷂꒛㍿ⓑ⛚⁄⡂☿⤈⋳㎙㏁㈛⚔㍻⳪⚟⧓✞᧵Ⓢ䷚⛎᧷⦜⧹㇜⢊⥶☸➯˧꜑⌹✅᧡꓁䷾⍼㏟⥈⢕⏁℮㋙⣪⍃᎕⬇㋹᭪႞◎΅⤠㇓㉸㎆㊟⥋⡇⫌⪯⿶㍒⡻꜋㍡᧸㍀㈻⎍⨫⿳⚱⿻⎋↓⇜⛫༶㍖䷰⬎⤩Ⓑ⇱╩⦥⭋⭑⏲┍✥±⛢⚜⢢꒘⏉⏐℞⨙㌘꠩㍌♩⠋㎾∴⎢┰⣸℄㍆꠶≒♁⏆♕⦻㋓⊜↓㍹⟏◱⬕⟰㌡⚳⥣\e\e^⩥㊰⤛㊍◮◯⤚◹㌴⤸⥧⢸➽⎎◉⪺㌠⣴⫝⠿⟡㋲⎫῏㋆⡽⣓⠦⌤⟝㇆⥒⤽©⢾╜⡟⬰⬂⊦⪂⅁⛟⒠⧌ⓦ㍴䷎⧕⣆⌨㌦⦞㍳♄┠≚█㌈⥵⢇⏎㉐⅍◀⟀⍜㎂⪊Ⓩ⇏⥢┥⥄␟₩✨⣬␒⦲⇂⋯┺⿺▭㎝╱῟⛿┆㌖≫㏨⩇⏑㌝⧊⎴᎔═☪⡓㉇㋜⒨꠷▨⤓⍾▻╻⡄⤫㇍⨓㈃⨷㎅⇚⌖㆘⌺❏⍘㏲┉㍜⌞⎮♜㌤⿰꜏➚▵⨇⨥﮾㊦㎧㋌⎯➮⊉⫔◝⛉⍞∥┊㏾⢏╹⛬⨯⪩◃⌻⟂⠬◧♏⠤⩯◸➔㋟❁㏰⧇⊬⣒⦠⟥⡃∔➘⊖⠌༕〷⑅࿈⤆◕⢻˂꒵꒪㏎㋼⧳⨢⚨䷔⊝៛⊧◾◐↮↼㇒꜊⊲˽⤻␉⫐⡹᭹⊵㋤⒫⇗⟲꒕╋◆㇕⢘⬘㊜䷶⧯⡳⛘㌒⟜㋩⚶⍄௵⿹⌜⛔◦↟⥠㈋▃㏏༴⬪႟↭㏹꜕Ⓠ┛⌱⏖⎌➜㏪⛪™┸⣘␁⬭◈䷗༓⥉⪛㉽₣▣㍂≂⋊♠㈷⨘㋊࿘➛࿏⌛⍺⎧⠏⬼㊎⇋⨔˟℩⣼㊌▷⥺⩨⚃⍹⨱Ⓙ⥸⨲⬶㍰℀꒚⫤䷪⢳⍱⇕$≜⋑⡼≭−˦⊍➺☇⭌↥⇍⚬▩㋏⪲㈹᧠☚﹦⇟▥♀⪍῝⤏㌐✟㎑☾¥▸⌰╓꒥⫱˰∰╁♍⦹⅏⪽㌆꓄㋪㎶㈮㋬⚣꜍⢥⤰⛒⩫㈊≯÷⨁⎿≉⫘㌂⒢⚥◴■꜁⇺꒨㈴⦛㎓◥↿⣈㉡༜㎇➪⎣⢅⊳⪬〒⇛⧤⦚㈁㋑⫉㍗Ⓒ♼⠔⊒⬹˫✝♎⧿^⥜᧭㎼⍰⭉㆑⤀⍡㎺◪▿⬁⊥⩜✓⩐꓆♽㈏⢚ⓙ✧✷⎉⟔⥖㎻⇉⌾⟇⏭ⓖ؇꒷¢㈽⣦⋥✡╌⡌⩖᭺♱⣣⪈╺㏌⢃⋼⎗│┐㌥☯㍧꜉㏡◜༖㈯₶⫹⥬ ̄⚐≳⇡␗˱≀⛁྿⬗⚅┿≺㎃⍶⦝|⩬⚹⛳⢉⤶㇀✼↹⪿▦⿸᧳₢∑⚄▜⤲࿗╆㋽⭙⨦⢁↳♣䷞⌊⚤⣋⣧⤢㋅♯㇉☟㋧⟃⊼✉∟⣍⎲⇶~⎵௷⨍⡚㏉௶⊰᧯▋〶⍵㋒╿㈙↰࿊꜆⎾☄᧺⠰⌔◓⎏⡝꓀⠡≃˹㇔⇈⏥┷㏚✊㏒⒧⥥㇡�˿⠹䷆♸㋻␢⛠⠚⦡⛦⩼᧬≰⣇⤊⤮㏜∖㊪⡁⤂㏵$⦰␆᧤꠸⊅≋⢒⧩≻⪓⡍➟᭨⣗㈰㎯⣎⠛⎹┑⠣↫⋺⊹◳╲∊Ⓘ⊕∩⩴⪙㍬⋇⍨✶䷷㋛─⒡༝✂㏶ⓞ⚫⛛✌䷖⍤㍊✳⇦⟄⊓♥൹←⚮꠹⣙┋⣤⭀꒹⨬♫⥞⪸⫥⦩⟕㋃Ⓐ㍤㎰⩚↑❢⪼⌎⨤⨵↪⍁⧪⩍⥑♾㉷῁⧠㎹⏊⌘⫯⍕⤕⒞⤝⩈⫑∵⛆⛻⥔␎㍠㈕⚪⣯⪧✭⣹㏖㋵✹⨑┒㈐¦×㈎Ⓥ⢦❌⥻╕⚗㎮⧡∝⁒◖⥗㌀㍘∪㎲▯⤪⦸㏥⟺⪋⇸➣⚠䷧≸⋧⧂⌧﮽㍷┨ⓩ⚌⠯㎏⡅⧜⨽⫿㍈⌅✋⪕⪄⤎⦴꒸✄㋈㎖□◗❛⠵ⓒ⠙⣠㏕⤾↞⥮⛲⟉꒐⪻✿㋋⏝⬓╗⨜⫃㉿꒔╷⛧㍇㆟⚁❃┌⨆⚒∣꒡⌃⣂╊⊐䷉◌⨗㈖⊸⍝༚☛⣅⇣⥝䷵༟⚈㌗᎓؏㆙⬑┳᭸★∿ⓔ㊚⋨⤐☙⡋╈⊱⣰⇾│⛂➭↑꜄➼྾﮼⡦⒲⍎⠼⒳■㊘≪˩⅊㍭࿅⫙⠝╚㊢⌄┝+┬↻㎷∆˅▝⋫⎖⫗␅▞䷭⍪⌉┇㏫⋮∯⤯⡮䷀㍽꒳⍆㋣⧭⏞⤭⠐⫢≷⎠⢌꜔࿋☫⤣⪦⛣⢽⫚⩶㌪⧲⏅⢎⫁⩲♺⎛⋃⠺⍦⏯␐⿵㌹༗₷⨧⚂㍋⩃⏩㎒Ⓚ⥍₠⪝₨⚭˯㇙۽﹩♡⇎▐⩊૱♋⦫∠⬥﮿♒㊗㊭㇝㊥≅Ⓜ⒝⋕⎻⦦⎂⚽❇⎑⋡⫇㈒♛℉㉴⚚㊊¸˙؈䷠㈔◶≩㋍㋺⛾≇❗␣⬜㊫⑃㏢⍻↴┟㇁⫮╨˖৲㍛≴⍋❍㈂⇷䷻⇨⋐㊤▉㈾⌆☱⫷⍏⫠㋾⫕◛⥓Ⓡ⪤䷕⍧␏᧩㎔㋚⦭⦙㏐⛖⥹㍎⍬㋝㌟꠪㎁⠶㏈⋰▆♇❎⳥⇼↢⟗㌲⛀㇛╅✔⍖⬅␞⧶⍚㊑꠨㉤⨂꒗⡧⋏⬉⛏⠖⊢⤹ⓊⓐⓎ᧴㏞▛㉫⊴⡾⩔˞⩘䷮⏂✪⡕㋰௺㎀ⓨ㍚⋩⤷⎊꩹⩋⋱꞊╖⏠Ⓔ✰⤃϶↯㊔✤⚝⣭⢄╦⊔↧⇭䷁◭⡣⩓⡿⍌⤵⬆☆꒾⠇╶⣑◷⚘⦂┗㇅▾⦪✱֏↲▤㋥⪵⨠⦣❋⫈⦿⩸┕➻⟱▬⠊⛞∽✐⩱`✑↝⣻῞㇊⢫⁺⠸⏳䷱►⌐▖℗꓂⩺⪾᎒┣⎅⚦⌸▘⠻⟷⎽᧱⎤㌼꒿౿⨈♆↱⟐⪣㊝㋗⠾㎛⥘∂┻㏱㉃㎵䷑⤟꜇⥡㆗+⌒⇞⤨┽⊂┩℣㌛⢴㈺⟿⑀☕﯁∍☨⪖㎍♦㏼℧⟒£㎨᧲⿷☻⛥∜╂⣫⤍⠑⥇⍸⣉⟟⫽࿃㈀⥤᧦⎀㆐¤╠⎝꒠⇳➩⬴㏣◄❐⋹⚓㌔꒙⥛⢲⊑⬱♝˃㇄∧⦼℘⒪☞⎁❥㉁⪜⊻⡫∌⍫⏤ⓚ⚵⚍∻⩎㋭◚㎤⥴⬮؎ⓓ⧻⛙⣕⪔㏯㏬⚸⢺㉨➿⌑⢈❂∺␙Ⓟ╒﮵⇙⢩`☉㍟⛐⦁㎟☈⋈╾䷅⛋㈸£⍓⋿⍀○❣╏⣌⤧╽㇚⣛✚꒴◣⬧꒯⋸┏♻⫎᎐➬꜈◢⥀㈬☣㌣⊣✆˾▢⫞⭗㌫⎶⭄⌼⋋⫲䷼⠁⨡㌚╎☲⬸⤞᧾⨛㉰⍂꒜⒯⎷⤱᧮➨㎬₸⣺≿⎞∬⩤ⓢ⡏∈㇞✩㈭⠍⌋⛴⥁㈆㍢⪞⡒┾⠞⩙⫛⢓⛼⬷∭℥꒬⋓⍭䷇㋨↦╰⚺♲➾䷲☹┓⠅⣖⥩⭒⠽⫋⋄˻№⋻⏜⡶㆜♢⇪⥙㊒➫⣚⧑⍒⌦⩻▏⨐⠓᭦᭻⎘⟍㌩➶⠭●⢧╫᧽⑆╔⤋㌢㍅€❕⦟⚖᭡᧢≧㌸₺⥽⫀⡑⬤⪪┃㎗⊄☬㊬㎣➵꒽㇘␍⌭࿌⦽⥂⩗⏡↵꒱╣⠕΅⢶♂⨼㍞╯°≍☽∙❘∃⟻⟙⅌⬬➴℔⨚♨␥⛅㈉⢍ⓤ⢷⪫¨㇌⊡♰⬃⛱㊛↬⧒⩽⛭⪟⳧⨻↤⋝⏰∷⎙⇫⏙⑄❠⩮⪮⫰⏌㈄㏛↚⪢␦⟊⢣⣲䷥˸⭃~☺®⍩₤⢖䷄⍢⊈╞≣❧␋⤤⥱㇟㋉⡪⫩☗⋴⛈⩀㉳└㌷⧟⇠✒⣞꒖⧄━⟚㌳→∛┴㎕⡗♿䷘∳࿇㏃¬₵㉥⨃⥦⩉꒝⬔㊞≟⧰⍮⣔⫴༂◞㌓≥⌿⚧∅❡㈝㌎৳﮳⅋⨿⩡⥟䷬᭢⇥♈⧉꜌⅂㊓↖⪌⬦㉲⡤᧼㉪☃⚕⣝㍓᧶䷳➰⊤∕⨹⩢㉼⍽⟑꜡≲㏷℅㍄꩸Ⓣ⚻⫶ⓟ␠⧬♌⧱﯀꒒⟌⑉☦⥫❤⚢⤦⨸≄☴⧴▫㌿⦷㏍❙∄⣱⨀␑᎖﮷Ⓞ᧥᭤⋟⡘┢⛩⚛♷⦮㋫≝⍍␃⣢㉮㉀║∡⒥⎃⚷᎑␇┦꠫˕⚲⠫❆◘℟⬽㈓㋕>≦℻⍛☏㎠⪘≓⟴⧨⤌⟈⪷⟎⑈⡖⋘㆛⭊ⓥ⌚␂⬺㎸⪥⪁≮⛨⩠㉣Ⓖ᎙㋱᧹㎫✁⥚㉬⌶≵⤡㉻⭇㋷⡔➡␘⬵⤒㌇∶⬛⏏╟┪⇮◒₹☁᭷㌉⎩➝⥊㌑⩑☰⣮⊺⚆㍺℺⣊⢔㍃᧿₩䷌╭᧪☶♟✇⇇∱₱㏻⚉⇲⫅䷽꒣⟖㎞⊊⬣➠⧷┲㉆⌁㏧⏗⁻⤥⛯⡐⒵⠪☐⧋㌽⎇╮㈈❖⨒۩㈜◔∞☓├⌫⬒㌬⌗˛⨪˥⎦⩾⎱⭕▲⚊﮹㈱☢꒫⋖⌷␓Ⓦ✗⭖⍣⊎㋘⎐⁼⡊﮶⬢⋚⠧╢♙䷏⥃⏪⚯<᧻⩂⨣㍯⏱᾽⫸㏴╃∁⢮㌕₲௸˺☡←∹∸⿲✮¦⠩⢰⩳☔㌨᾿⨖⎆▗˼⚾৺⢜⫓᥀⧣㍙✜➳⬌꒭㈌⩰⢤⧢⊏⌵┎⢼⠈∐⬿⢛㏆✙䷍꒤┱㌌㈼⑂⛌⟘⟢⤘꓅꩷꒧㎽䷤∓⋉⚙⤴᭼∀㏂㌧⟣⊪⢞⩄⊽⬐❝۾☊㍶⏧↕☍▽⬄㌵㏽⍴⋭▶˘Ⓓ⠘⡴⎈⡬↘❦༞⍯㌄⏘㏺⊨⎡◫㎉⏇✎⦾㊐₊┯➱䷝♹㇖㎜⏮♴⛍⨄⟓▄꜓⣵≌⨺䷯⪎⌂▎⌬⭐⟾╧≼⛓㊨⤺⌮⇻⠜⨅㏋⩩⢙⛶⤑⬚௳♮≶㎿⣀¯⛷㎚㋀₧㎱⢟⥏⨨㎭㋂⋬㎥♑⇝㊕⩒✲⥪㍏⇰✵䷺⢨⬝Ⓗ⦤⢬⍅♧⧥✻⥐⣜⡰╥⚋↶㏸⫺㉾✾◊ⓝ㉵᭣❑꒲⒬᧧▴⩧⣿΄㌅◲⋂⊾⚴⬀⫖˨㏄﮸࿖⇅⏓⢹⫾꒓㎘≛¥▮╬▇᭩䷋⏕㎳㍼⬲⩵⎪⚡♳࿄✏⪭≹㊩㉅⫡⌢৻⦀㏗✢|῭⡜䷿▔⡎⌌▹⊁▕᭧☮᧫≠㋖✃┄⧗❓㌙⎨⟽✸☌㈳䷹┶῍◵㍱⤄⎭⑊˓≨༁⏔⧈➤㌁⌟㉦㍦╉⇹⤿¢␌␝⋲䷸⠳≞≡⊮↸⏦﮻⛺㎪⍉╄⎔⣩⋒⛤⇿⧵┈࿎⣷⡉⩹⣨䷊Ⓧ⇯␔◬⍔☜⇔ⓕ⋍\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(text, lowercase=False, western_lang=True, return_str=False) +Return a tokenized copy of \fIs\fP\&. +.INDENT 7.0 +.TP +.B Return type +list of str +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.tokenize.punkt module +.sp +Punkt Sentence Tokenizer +.sp +This tokenizer divides a text into a list of sentences +by using an unsupervised algorithm to build a model for abbreviation +words, collocations, and words that start sentences. It must be +trained on a large collection of plaintext in the target language +before it can be used. +.sp +The NLTK data package includes a pre\-trained Punkt tokenizer for +English. +.sp +.nf +.ft C +>>> import nltk.data +>>> text = \(aq\(aq\(aq +\&... Punkt knows that the periods in Mr. Smith and Johann S. Bach +\&... do not mark sentence boundaries. And sometimes sentences +\&... can start with non\-capitalized words. i is a good variable +\&... name. +\&... \(aq\(aq\(aq +>>> sent_detector = nltk.data.load(\(aqtokenizers/punkt/english.pickle\(aq) +>>> print(\(aq\en\-\-\-\-\-\en\(aq.join(sent_detector.tokenize(text.strip()))) +Punkt knows that the periods in Mr. Smith and Johann S. Bach +do not mark sentence boundaries. +\-\-\-\-\- +And sometimes sentences +can start with non\-capitalized words. +\-\-\-\-\- +i is a good variable +name. +.ft P +.fi +.sp +(Note that whitespace from the original text, including newlines, is +retained in the output.) +.sp +Punctuation following sentences is also included by default +(from NLTK 3.0 onwards). It can be excluded with the realign_boundaries +flag. +.sp +.nf +.ft C +>>> text = \(aq\(aq\(aq +\&... (How does it deal with this parenthesis?) "It should be part of the +\&... previous sentence." "(And the same with this one.)" (\(aqAnd this one!\(aq) +\&... "(\(aq(And (this)) \(aq?)" [(and this. )] +\&... \(aq\(aq\(aq +>>> print(\(aq\en\-\-\-\-\-\en\(aq.join( +\&... sent_detector.tokenize(text.strip()))) +(How does it deal with this parenthesis?) +\-\-\-\-\- +"It should be part of the +previous sentence." +\-\-\-\-\- +"(And the same with this one.)" +\-\-\-\-\- +(\(aqAnd this one!\(aq) +\-\-\-\-\- +"(\(aq(And (this)) \(aq?)" +\-\-\-\-\- +[(and this. )] +>>> print(\(aq\en\-\-\-\-\-\en\(aq.join( +\&... sent_detector.tokenize(text.strip(), realign_boundaries=False))) +(How does it deal with this parenthesis? +\-\-\-\-\- +) "It should be part of the +previous sentence. +\-\-\-\-\- +" "(And the same with this one. +\-\-\-\-\- +)" (\(aqAnd this one! +\-\-\-\-\- +\(aq) +"(\(aq(And (this)) \(aq? +\-\-\-\-\- +)" [(and this. +\-\-\-\-\- +)] +.ft P +.fi +.sp +However, Punkt is designed to learn parameters (a list of abbreviations, etc.) +unsupervised from a corpus similar to the target domain. The pre\-packaged models +may therefore be unsuitable: use \fBPunktSentenceTokenizer(text)\fP to learn +parameters from the given text. +.sp +\fI\%PunktTrainer\fP learns parameters such as a list of abbreviations +(without supervision) from portions of text. Using a \fBPunktTrainer\fP directly +allows for incremental training and modification of the hyper\-parameters used +to decide what is considered an abbreviation, etc. +.sp +The algorithm for this tokenizer is described in: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +Kiss, Tibor and Strunk, Jan (2006): Unsupervised Multilingual Sentence + Boundary Detection. Computational Linguistics 32: 485\-525. +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.punkt.PunktBaseClass(lang_vars=None, token_cls=, params=None) +Bases: \fBobject\fP +.sp +Includes common components of PunktTrainer and PunktSentenceTokenizer. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.punkt.PunktLanguageVars +Bases: \fBobject\fP +.sp +Stores variables, mostly regular expressions, which may be +language\-dependent for correct application of the algorithm. +An extension of this class may modify its properties to suit +a language other than English; an instance can then be passed +as an argument to PunktSentenceTokenizer and PunktTrainer +constructors. +.INDENT 7.0 +.TP +.B internal_punctuation = \(aq,:;\(aq +sentence internal punctuation, which indicates an abbreviation if +preceded by a period\-final token. +.UNINDENT +.INDENT 7.0 +.TP +.B period_context_re() +Compiles and returns a regular expression to find contexts +including possible sentence boundaries. +.UNINDENT +.INDENT 7.0 +.TP +.B re_boundary_realignment = re.compile(\(aq["\e\e\e\(aq)\e\e]}]+?(?:\e\es+|(?=\-\-)|$)\(aq, re.MULTILINE) +Used to realign punctuation that should be included in a sentence +although it follows the period (or ?, !). +.UNINDENT +.INDENT 7.0 +.TP +.B sent_end_chars = (\(aq.\(aq, \(aq?\(aq, \(aq!\(aq) +Characters which are candidates for sentence boundaries +.UNINDENT +.INDENT 7.0 +.TP +.B word_tokenize(s) +Tokenize a string to split off punctuation other than periods +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.punkt.PunktParameters +Bases: \fBobject\fP +.sp +Stores data used to perform sentence boundary detection with Punkt. +.INDENT 7.0 +.TP +.B abbrev_types +A set of word types for known abbreviations. +.UNINDENT +.INDENT 7.0 +.TP +.B add_ortho_context(typ, flag) +.UNINDENT +.INDENT 7.0 +.TP +.B clear_abbrevs() +.UNINDENT +.INDENT 7.0 +.TP +.B clear_collocations() +.UNINDENT +.INDENT 7.0 +.TP +.B clear_ortho_context() +.UNINDENT +.INDENT 7.0 +.TP +.B clear_sent_starters() +.UNINDENT +.INDENT 7.0 +.TP +.B collocations +A set of word type tuples for known common collocations +where the first word ends in a period. E.g., (\(aqS.\(aq, \(aqBach\(aq) +is a common collocation in a text that discusses \(aqJohann +S. Bach\(aq. These count as negative evidence for sentence +boundaries. +.UNINDENT +.INDENT 7.0 +.TP +.B ortho_context +A dictionary mapping word types to the set of orthographic +contexts that word type appears in. Contexts are represented +by adding orthographic context flags: ... +.UNINDENT +.INDENT 7.0 +.TP +.B sent_starters +A set of word types for words that often appear at the +beginning of sentences. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.punkt.PunktSentenceTokenizer(train_text=None, verbose=False, lang_vars=None, token_cls=) +Bases: \fI\%nltk.tokenize.punkt.PunktBaseClass\fP, \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +A sentence tokenizer which uses an unsupervised algorithm to build +a model for abbreviation words, collocations, and words that start +sentences; and then uses that model to find sentence boundaries. +This approach has been shown to work well for many European +languages. +.INDENT 7.0 +.TP +.B PUNCTUATION = (\(aq;\(aq, \(aq:\(aq, \(aq,\(aq, \(aq.\(aq, \(aq!\(aq, \(aq?\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B debug_decisions(text) +Classifies candidate periods as sentence breaks, yielding a dict for +each that may be used to understand why the decision was made. +.sp +See format_debug_decision() to help make this output readable. +.UNINDENT +.INDENT 7.0 +.TP +.B dump(tokens) +.UNINDENT +.INDENT 7.0 +.TP +.B sentences_from_text(text, realign_boundaries=True) +Given a text, generates the sentences in that text by only +testing candidate sentence breaks. If realign_boundaries is +True, includes in the sentence closing punctuation that +follows the period. +.UNINDENT +.INDENT 7.0 +.TP +.B sentences_from_text_legacy(text) +Given a text, generates the sentences in that text. Annotates all +tokens, rather than just those with possible sentence breaks. Should +produce the same results as \fBsentences_from_text\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B sentences_from_tokens(tokens) +Given a sequence of tokens, generates lists of tokens, each list +corresponding to a sentence. +.UNINDENT +.INDENT 7.0 +.TP +.B span_tokenize(text, realign_boundaries=True) +Given a text, generates (start, end) spans of sentences +in the text. +.UNINDENT +.INDENT 7.0 +.TP +.B text_contains_sentbreak(text) +Returns True if the given text includes a sentence break. +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(text, realign_boundaries=True) +Given a text, returns a list of the sentences in that text. +.UNINDENT +.INDENT 7.0 +.TP +.B train(train_text, verbose=False) +Derives parameters from a given training text, or uses the parameters +given. Repeated calls to this method destroy previous parameters. For +incremental training, instantiate a separate PunktTrainer instance. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.punkt.PunktToken(tok, **params) +Bases: \fBobject\fP +.sp +Stores a token of text with annotations produced during +sentence boundary detection. +.INDENT 7.0 +.TP +.B abbr +.UNINDENT +.INDENT 7.0 +.TP +.B ellipsis +.UNINDENT +.INDENT 7.0 +.TP +.B property first_case +.UNINDENT +.INDENT 7.0 +.TP +.B property first_lower +True if the token\(aqs first character is lowercase. +.UNINDENT +.INDENT 7.0 +.TP +.B property first_upper +True if the token\(aqs first character is uppercase. +.UNINDENT +.INDENT 7.0 +.TP +.B property is_alpha +True if the token text is all alphabetic. +.UNINDENT +.INDENT 7.0 +.TP +.B property is_ellipsis +True if the token text is that of an ellipsis. +.UNINDENT +.INDENT 7.0 +.TP +.B property is_initial +True if the token text is that of an initial. +.UNINDENT +.INDENT 7.0 +.TP +.B property is_non_punct +True if the token is either a number or is alphabetic. +.UNINDENT +.INDENT 7.0 +.TP +.B property is_number +True if the token text is that of a number. +.UNINDENT +.INDENT 7.0 +.TP +.B linestart +.UNINDENT +.INDENT 7.0 +.TP +.B parastart +.UNINDENT +.INDENT 7.0 +.TP +.B period_final +.UNINDENT +.INDENT 7.0 +.TP +.B sentbreak +.UNINDENT +.INDENT 7.0 +.TP +.B tok +.UNINDENT +.INDENT 7.0 +.TP +.B type +.UNINDENT +.INDENT 7.0 +.TP +.B property type_no_period +The type with its final period removed if it has one. +.UNINDENT +.INDENT 7.0 +.TP +.B property type_no_sentperiod +The type with its final period removed if it is marked as a +sentence break. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.punkt.PunktTrainer(train_text=None, verbose=False, lang_vars=None, token_cls=) +Bases: \fI\%nltk.tokenize.punkt.PunktBaseClass\fP +.sp +Learns parameters used in Punkt sentence boundary detection. +.INDENT 7.0 +.TP +.B ABBREV = 0.3 +cut\-off value whether a \(aqtoken\(aq is an abbreviation +.UNINDENT +.INDENT 7.0 +.TP +.B ABBREV_BACKOFF = 5 +upper cut\-off for Mikheev\(aqs(2002) abbreviation detection algorithm +.UNINDENT +.INDENT 7.0 +.TP +.B COLLOCATION = 7.88 +minimal log\-likelihood value that two tokens need to be considered +as a collocation +.UNINDENT +.INDENT 7.0 +.TP +.B IGNORE_ABBREV_PENALTY = False +allows the disabling of the abbreviation penalty heuristic, which +exponentially disadvantages words that are found at times without a +final period. +.UNINDENT +.INDENT 7.0 +.TP +.B INCLUDE_ABBREV_COLLOCS = False +this includes as potential collocations all word pairs where the first +word is an abbreviation. Such collocations override the orthographic +heuristic, but not the sentence starter heuristic. This is overridden by +INCLUDE_ALL_COLLOCS, and if both are false, only collocations with initials +and ordinals are considered. +.UNINDENT +.INDENT 7.0 +.TP +.B INCLUDE_ALL_COLLOCS = False +this includes as potential collocations all word pairs where the first +word ends in a period. It may be useful in corpora where there is a lot +of variation that makes abbreviations like Mr difficult to identify. +.UNINDENT +.INDENT 7.0 +.TP +.B MIN_COLLOC_FREQ = 1 +this sets a minimum bound on the number of times a bigram needs to +appear before it can be considered a collocation, in addition to log +likelihood statistics. This is useful when INCLUDE_ALL_COLLOCS is True. +.UNINDENT +.INDENT 7.0 +.TP +.B SENT_STARTER = 30 +minimal log\-likelihood value that a token requires to be considered +as a frequent sentence starter +.UNINDENT +.INDENT 7.0 +.TP +.B finalize_training(verbose=False) +Uses data that has been gathered in training to determine likely +collocations and sentence starters. +.UNINDENT +.INDENT 7.0 +.TP +.B find_abbrev_types() +Recalculates abbreviations given type frequencies, despite no prior +determination of abbreviations. +This fails to include abbreviations otherwise found as "rare". +.UNINDENT +.INDENT 7.0 +.TP +.B freq_threshold(ortho_thresh=2, type_thresh=2, colloc_thres=2, sentstart_thresh=2) +Allows memory use to be reduced after much training by removing data +about rare tokens that are unlikely to have a statistical effect with +further training. Entries occurring above the given thresholds will be +retained. +.UNINDENT +.INDENT 7.0 +.TP +.B get_params() +Calculates and returns parameters for sentence boundary detection as +derived from training. +.UNINDENT +.INDENT 7.0 +.TP +.B train(text, verbose=False, finalize=True) +Collects training data from a given text. If finalize is True, it +will determine all the parameters for sentence boundary detection. If +not, this will be delayed until get_params() or finalize_training() is +called. If verbose is True, abbreviations found will be listed. +.UNINDENT +.INDENT 7.0 +.TP +.B train_tokens(tokens, verbose=False, finalize=True) +Collects training data from a given list of tokens. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.punkt.demo(text, tok_cls=, train_cls=) +Builds a punkt model and applies it to the same text +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.punkt.format_debug_decision(d) +.UNINDENT +.SS nltk.tokenize.regexp module +.sp +Regular\-Expression Tokenizers +.sp +A \fBRegexpTokenizer\fP splits a string into substrings using a regular expression. +For example, the following tokenizer forms tokens out of alphabetic sequences, +money expressions, and any other non\-whitespace sequences: +.sp +.nf +.ft C +>>> from nltk.tokenize import RegexpTokenizer +>>> s = "Good muffins cost $3.88\enin New York. Please buy me\entwo of them.\en\enThanks." +>>> tokenizer = RegexpTokenizer(\(aq\ew+|\e$[\ed\e.]+|\eS+\(aq) +>>> tokenizer.tokenize(s) +[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$3.88\(aq, \(aqin\(aq, \(aqNew\(aq, \(aqYork\(aq, \(aq.\(aq, +\(aqPlease\(aq, \(aqbuy\(aq, \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem\(aq, \(aq.\(aq, \(aqThanks\(aq, \(aq.\(aq] +.ft P +.fi +.sp +A \fBRegexpTokenizer\fP can use its regexp to match delimiters instead: +.sp +.nf +.ft C +>>> tokenizer = RegexpTokenizer(\(aq\es+\(aq, gaps=True) +>>> tokenizer.tokenize(s) +[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$3.88\(aq, \(aqin\(aq, \(aqNew\(aq, \(aqYork.\(aq, +\(aqPlease\(aq, \(aqbuy\(aq, \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem.\(aq, \(aqThanks.\(aq] +.ft P +.fi +.sp +Note that empty tokens are not returned when the delimiter appears at +the start or end of the string. +.sp +The material between the tokens is discarded. For example, +the following tokenizer selects just the capitalized words: +.sp +.nf +.ft C +>>> capword_tokenizer = RegexpTokenizer(\(aq[A\-Z]\ew+\(aq) +>>> capword_tokenizer.tokenize(s) +[\(aqGood\(aq, \(aqNew\(aq, \(aqYork\(aq, \(aqPlease\(aq, \(aqThanks\(aq] +.ft P +.fi +.sp +This module contains several subclasses of \fBRegexpTokenizer\fP +that use pre\-defined regular expressions. +.sp +.nf +.ft C +>>> from nltk.tokenize import BlanklineTokenizer +>>> # Uses \(aq\es*\en\es*\en\es*\(aq: +>>> BlanklineTokenizer().tokenize(s) +[\(aqGood muffins cost $3.88\enin New York. Please buy me\entwo of them.\(aq, +\(aqThanks.\(aq] +.ft P +.fi +.sp +All of the regular expression tokenizers are also available as functions: +.sp +.nf +.ft C +>>> from nltk.tokenize import regexp_tokenize, wordpunct_tokenize, blankline_tokenize +>>> regexp_tokenize(s, pattern=\(aq\ew+|\e$[\ed\e.]+|\eS+\(aq) +[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$3.88\(aq, \(aqin\(aq, \(aqNew\(aq, \(aqYork\(aq, \(aq.\(aq, +\(aqPlease\(aq, \(aqbuy\(aq, \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem\(aq, \(aq.\(aq, \(aqThanks\(aq, \(aq.\(aq] +>>> wordpunct_tokenize(s) +[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$\(aq, \(aq3\(aq, \(aq.\(aq, \(aq88\(aq, \(aqin\(aq, \(aqNew\(aq, \(aqYork\(aq, + \(aq.\(aq, \(aqPlease\(aq, \(aqbuy\(aq, \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem\(aq, \(aq.\(aq, \(aqThanks\(aq, \(aq.\(aq] +>>> blankline_tokenize(s) +[\(aqGood muffins cost $3.88\enin New York. Please buy me\entwo of them.\(aq, \(aqThanks.\(aq] +.ft P +.fi +.sp +Caution: The function \fBregexp_tokenize()\fP takes the text as its +first argument, and the regular expression pattern as its second +argument. This differs from the conventions used by Python\(aqs +\fBre\fP functions, where the pattern is always the first argument. +(This is for consistency with the other NLTK tokenizers.) +.INDENT 0.0 +.TP +.B class nltk.tokenize.regexp.BlanklineTokenizer +Bases: \fI\%nltk.tokenize.regexp.RegexpTokenizer\fP +.sp +Tokenize a string, treating any sequence of blank lines as a delimiter. +Blank lines are defined as lines containing no characters, except for +space or tab characters. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.regexp.RegexpTokenizer(pattern, gaps=False, discard_empty=True, flags=RegexFlag.None) +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +A tokenizer that splits a string using a regular expression, which +matches either the tokens or the separators between tokens. +.sp +.nf +.ft C +>>> tokenizer = RegexpTokenizer(\(aq\ew+|\e$[\ed\e.]+|\eS+\(aq) +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBpattern\fP (\fIstr\fP) \-\- The pattern used to build this tokenizer. +(This pattern must not contain capturing parentheses; +Use non\-capturing parentheses, e.g. (?:...), instead) +.IP \(bu 2 +\fBgaps\fP (\fIbool\fP) \-\- True if this tokenizer\(aqs pattern should be used +to find separators between tokens; False if this +tokenizer\(aqs pattern should be used to find the tokens +themselves. +.IP \(bu 2 +\fBdiscard_empty\fP (\fIbool\fP) \-\- True if any empty tokens \fI\(aq\(aq\fP +generated by the tokenizer should be discarded. Empty +tokens can only be generated if \fI_gaps == True\fP\&. +.IP \(bu 2 +\fBflags\fP (\fIint\fP) \-\- The regexp flags used to compile this +tokenizer\(aqs pattern. By default, the following flags are +used: \fIre.UNICODE | re.MULTILINE | re.DOTALL\fP\&. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B span_tokenize(text) +Identify the tokens using integer offsets \fB(start_i, end_i)\fP, +where \fBs[start_i:end_i]\fP is the corresponding token. +.INDENT 7.0 +.TP +.B Return type +iter(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(text) +Return a tokenized copy of \fIs\fP\&. +.INDENT 7.0 +.TP +.B Return type +list of str +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.regexp.WhitespaceTokenizer +Bases: \fI\%nltk.tokenize.regexp.RegexpTokenizer\fP +.sp +Tokenize a string on whitespace (space, tab, newline). +In general, users should use the string \fBsplit()\fP method instead. +.sp +.nf +.ft C +>>> from nltk.tokenize import WhitespaceTokenizer +>>> s = "Good muffins cost $3.88\enin New York. Please buy me\entwo of them.\en\enThanks." +>>> WhitespaceTokenizer().tokenize(s) +[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$3.88\(aq, \(aqin\(aq, \(aqNew\(aq, \(aqYork.\(aq, +\(aqPlease\(aq, \(aqbuy\(aq, \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem.\(aq, \(aqThanks.\(aq] +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.regexp.WordPunctTokenizer +Bases: \fI\%nltk.tokenize.regexp.RegexpTokenizer\fP +.sp +Tokenize a text into a sequence of alphabetic and +non\-alphabetic characters, using the regexp \fB\ew+|[^\ew\es]+\fP\&. +.sp +.nf +.ft C +>>> from nltk.tokenize import WordPunctTokenizer +>>> s = "Good muffins cost $3.88\enin New York. Please buy me\entwo of them.\en\enThanks." +>>> WordPunctTokenizer().tokenize(s) +[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$\(aq, \(aq3\(aq, \(aq.\(aq, \(aq88\(aq, \(aqin\(aq, \(aqNew\(aq, \(aqYork\(aq, +\(aq.\(aq, \(aqPlease\(aq, \(aqbuy\(aq, \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem\(aq, \(aq.\(aq, \(aqThanks\(aq, \(aq.\(aq] +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.regexp.blankline_tokenize(text) +Return a tokenized copy of \fIs\fP\&. +.INDENT 7.0 +.TP +.B Return type +list of str +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.regexp.regexp_tokenize(text, pattern, gaps=False, discard_empty=True, flags=RegexFlag.None) +Return a tokenized copy of \fItext\fP\&. See \fI\%RegexpTokenizer\fP +for descriptions of the arguments. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.regexp.wordpunct_tokenize(text) +Return a tokenized copy of \fIs\fP\&. +.INDENT 7.0 +.TP +.B Return type +list of str +.UNINDENT +.UNINDENT +.SS nltk.tokenize.repp module +.INDENT 0.0 +.TP +.B class nltk.tokenize.repp.ReppTokenizer(repp_dir, encoding=\(aqutf8\(aq) +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +A class for word tokenization using the REPP parser described in +Rebecca Dridan and Stephan Oepen (2012) Tokenization: Returning to a +Long Solved Problem \- A Survey, Contrastive Experiment, Recommendations, +and Toolkit. In ACL. \fI\%http://anthology.aclweb.org/P/P12/P12\-2.pdf#page=406\fP +.sp +.nf +.ft C +>>> sents = [\(aqTokenization is widely regarded as a solved problem due to the high accuracy that rulebased tokenizers achieve.\(aq , +\&... \(aqBut rule\-based tokenizers are hard to maintain and their rules language specific.\(aq , +\&... \(aqWe evaluated our method on three languages and obtained error rates of 0.27% (English), 0.35% (Dutch) and 0.76% (Italian) for our best models.\(aq +\&... ] +>>> tokenizer = ReppTokenizer(\(aq/home/alvas/repp/\(aq) +>>> for sent in sents: +\&... tokenizer.tokenize(sent) +\&... +(u\(aqTokenization\(aq, u\(aqis\(aq, u\(aqwidely\(aq, u\(aqregarded\(aq, u\(aqas\(aq, u\(aqa\(aq, u\(aqsolved\(aq, u\(aqproblem\(aq, u\(aqdue\(aq, u\(aqto\(aq, u\(aqthe\(aq, u\(aqhigh\(aq, u\(aqaccuracy\(aq, u\(aqthat\(aq, u\(aqrulebased\(aq, u\(aqtokenizers\(aq, u\(aqachieve\(aq, u\(aq.\(aq) +(u\(aqBut\(aq, u\(aqrule\-based\(aq, u\(aqtokenizers\(aq, u\(aqare\(aq, u\(aqhard\(aq, u\(aqto\(aq, u\(aqmaintain\(aq, u\(aqand\(aq, u\(aqtheir\(aq, u\(aqrules\(aq, u\(aqlanguage\(aq, u\(aqspecific\(aq, u\(aq.\(aq) +(u\(aqWe\(aq, u\(aqevaluated\(aq, u\(aqour\(aq, u\(aqmethod\(aq, u\(aqon\(aq, u\(aqthree\(aq, u\(aqlanguages\(aq, u\(aqand\(aq, u\(aqobtained\(aq, u\(aqerror\(aq, u\(aqrates\(aq, u\(aqof\(aq, u\(aq0.27\(aq, u\(aq%\(aq, u\(aq(\(aq, u\(aqEnglish\(aq, u\(aq)\(aq, u\(aq,\(aq, u\(aq0.35\(aq, u\(aq%\(aq, u\(aq(\(aq, u\(aqDutch\(aq, u\(aq)\(aq, u\(aqand\(aq, u\(aq0.76\(aq, u\(aq%\(aq, u\(aq(\(aq, u\(aqItalian\(aq, u\(aq)\(aq, u\(aqfor\(aq, u\(aqour\(aq, u\(aqbest\(aq, u\(aqmodels\(aq, u\(aq.\(aq) +.ft P +.fi +.sp +.nf +.ft C +>>> for sent in tokenizer.tokenize_sents(sents): +\&... print(sent) +\&... +(u\(aqTokenization\(aq, u\(aqis\(aq, u\(aqwidely\(aq, u\(aqregarded\(aq, u\(aqas\(aq, u\(aqa\(aq, u\(aqsolved\(aq, u\(aqproblem\(aq, u\(aqdue\(aq, u\(aqto\(aq, u\(aqthe\(aq, u\(aqhigh\(aq, u\(aqaccuracy\(aq, u\(aqthat\(aq, u\(aqrulebased\(aq, u\(aqtokenizers\(aq, u\(aqachieve\(aq, u\(aq.\(aq) +(u\(aqBut\(aq, u\(aqrule\-based\(aq, u\(aqtokenizers\(aq, u\(aqare\(aq, u\(aqhard\(aq, u\(aqto\(aq, u\(aqmaintain\(aq, u\(aqand\(aq, u\(aqtheir\(aq, u\(aqrules\(aq, u\(aqlanguage\(aq, u\(aqspecific\(aq, u\(aq.\(aq) +(u\(aqWe\(aq, u\(aqevaluated\(aq, u\(aqour\(aq, u\(aqmethod\(aq, u\(aqon\(aq, u\(aqthree\(aq, u\(aqlanguages\(aq, u\(aqand\(aq, u\(aqobtained\(aq, u\(aqerror\(aq, u\(aqrates\(aq, u\(aqof\(aq, u\(aq0.27\(aq, u\(aq%\(aq, u\(aq(\(aq, u\(aqEnglish\(aq, u\(aq)\(aq, u\(aq,\(aq, u\(aq0.35\(aq, u\(aq%\(aq, u\(aq(\(aq, u\(aqDutch\(aq, u\(aq)\(aq, u\(aqand\(aq, u\(aq0.76\(aq, u\(aq%\(aq, u\(aq(\(aq, u\(aqItalian\(aq, u\(aq)\(aq, u\(aqfor\(aq, u\(aqour\(aq, u\(aqbest\(aq, u\(aqmodels\(aq, u\(aq.\(aq) +>>> for sent in tokenizer.tokenize_sents(sents, keep_token_positions=True): +\&... print(sent) +\&... +[(u\(aqTokenization\(aq, 0, 12), (u\(aqis\(aq, 13, 15), (u\(aqwidely\(aq, 16, 22), (u\(aqregarded\(aq, 23, 31), (u\(aqas\(aq, 32, 34), (u\(aqa\(aq, 35, 36), (u\(aqsolved\(aq, 37, 43), (u\(aqproblem\(aq, 44, 51), (u\(aqdue\(aq, 52, 55), (u\(aqto\(aq, 56, 58), (u\(aqthe\(aq, 59, 62), (u\(aqhigh\(aq, 63, 67), (u\(aqaccuracy\(aq, 68, 76), (u\(aqthat\(aq, 77, 81), (u\(aqrulebased\(aq, 82, 91), (u\(aqtokenizers\(aq, 92, 102), (u\(aqachieve\(aq, 103, 110), (u\(aq.\(aq, 110, 111)] +[(u\(aqBut\(aq, 0, 3), (u\(aqrule\-based\(aq, 4, 14), (u\(aqtokenizers\(aq, 15, 25), (u\(aqare\(aq, 26, 29), (u\(aqhard\(aq, 30, 34), (u\(aqto\(aq, 35, 37), (u\(aqmaintain\(aq, 38, 46), (u\(aqand\(aq, 47, 50), (u\(aqtheir\(aq, 51, 56), (u\(aqrules\(aq, 57, 62), (u\(aqlanguage\(aq, 63, 71), (u\(aqspecific\(aq, 72, 80), (u\(aq.\(aq, 80, 81)] +[(u\(aqWe\(aq, 0, 2), (u\(aqevaluated\(aq, 3, 12), (u\(aqour\(aq, 13, 16), (u\(aqmethod\(aq, 17, 23), (u\(aqon\(aq, 24, 26), (u\(aqthree\(aq, 27, 32), (u\(aqlanguages\(aq, 33, 42), (u\(aqand\(aq, 43, 46), (u\(aqobtained\(aq, 47, 55), (u\(aqerror\(aq, 56, 61), (u\(aqrates\(aq, 62, 67), (u\(aqof\(aq, 68, 70), (u\(aq0.27\(aq, 71, 75), (u\(aq%\(aq, 75, 76), (u\(aq(\(aq, 77, 78), (u\(aqEnglish\(aq, 78, 85), (u\(aq)\(aq, 85, 86), (u\(aq,\(aq, 86, 87), (u\(aq0.35\(aq, 88, 92), (u\(aq%\(aq, 92, 93), (u\(aq(\(aq, 94, 95), (u\(aqDutch\(aq, 95, 100), (u\(aq)\(aq, 100, 101), (u\(aqand\(aq, 102, 105), (u\(aq0.76\(aq, 106, 110), (u\(aq%\(aq, 110, 111), (u\(aq(\(aq, 112, 113), (u\(aqItalian\(aq, 113, 120), (u\(aq)\(aq, 120, 121), (u\(aqfor\(aq, 122, 125), (u\(aqour\(aq, 126, 129), (u\(aqbest\(aq, 130, 134), (u\(aqmodels\(aq, 135, 141), (u\(aq.\(aq, 141, 142)] +.ft P +.fi +.INDENT 7.0 +.TP +.B find_repptokenizer(repp_dirname) +A module to find REPP tokenizer binary and its \fIrepp.set\fP config file. +.UNINDENT +.INDENT 7.0 +.TP +.B generate_repp_command(inputfilename) +This module generates the REPP command to be used at the terminal. +.INDENT 7.0 +.TP +.B Parameters +\fBinputfilename\fP (\fIstr\fP) \-\- path to the input file +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B static parse_repp_outputs(repp_output) +This module parses the tri\-tuple format that REPP outputs using the +"\-\-format triple" option and returns an generator with tuple of string +tokens. +.INDENT 7.0 +.TP +.B Parameters +\fBrepp_output\fP (\fItype\fP) \-\- +.TP +.B Returns +an iterable of the tokenized sentences as tuples of strings +.TP +.B Return type +iter(tuple) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(sentence) +Use Repp to tokenize a single sentence. +.INDENT 7.0 +.TP +.B Parameters +\fBsentence\fP (\fIstr\fP) \-\- A single sentence string. +.TP +.B Returns +A tuple of tokens. +.TP +.B Return type +tuple(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize_sents(sentences, keep_token_positions=False) +Tokenize multiple sentences using Repp. +.INDENT 7.0 +.TP +.B Parameters +\fBsentences\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A list of sentence strings. +.TP +.B Returns +A list of tuples of tokens +.TP +.B Return type +iter(tuple(str)) +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.tokenize.sexpr module +.sp +S\-Expression Tokenizer +.sp +\fBSExprTokenizer\fP is used to find parenthesized expressions in a +string. In particular, it divides a string into a sequence of +substrings that are either parenthesized expressions (including any +nested parenthesized expressions), or other whitespace\-separated +tokens. +.sp +.nf +.ft C +>>> from nltk.tokenize import SExprTokenizer +>>> SExprTokenizer().tokenize(\(aq(a b (c d)) e f (g)\(aq) +[\(aq(a b (c d))\(aq, \(aqe\(aq, \(aqf\(aq, \(aq(g)\(aq] +.ft P +.fi +.sp +By default, \fISExprTokenizer\fP will raise a \fBValueError\fP exception if +used to tokenize an expression with non\-matching parentheses: +.sp +.nf +.ft C +>>> SExprTokenizer().tokenize(\(aqc) d) e (f (g\(aq) +Traceback (most recent call last): + ... +ValueError: Un\-matched close paren at char 1 +.ft P +.fi +.sp +The \fBstrict\fP argument can be set to False to allow for +non\-matching parentheses. Any unmatched close parentheses will be +listed as their own s\-expression; and the last partial sexpr with +unmatched open parentheses will be listed as its own sexpr: +.sp +.nf +.ft C +>>> SExprTokenizer(strict=False).tokenize(\(aqc) d) e (f (g\(aq) +[\(aqc\(aq, \(aq)\(aq, \(aqd\(aq, \(aq)\(aq, \(aqe\(aq, \(aq(f (g\(aq] +.ft P +.fi +.sp +The characters used for open and close parentheses may be customized +using the \fBparens\fP argument to the \fISExprTokenizer\fP constructor: +.sp +.nf +.ft C +>>> SExprTokenizer(parens=\(aq{}\(aq).tokenize(\(aq{a b {c d}} e f {g}\(aq) +[\(aq{a b {c d}}\(aq, \(aqe\(aq, \(aqf\(aq, \(aq{g}\(aq] +.ft P +.fi +.sp +The s\-expression tokenizer is also available as a function: +.sp +.nf +.ft C +>>> from nltk.tokenize import sexpr_tokenize +>>> sexpr_tokenize(\(aq(a b (c d)) e f (g)\(aq) +[\(aq(a b (c d))\(aq, \(aqe\(aq, \(aqf\(aq, \(aq(g)\(aq] +.ft P +.fi +.INDENT 0.0 +.TP +.B class nltk.tokenize.sexpr.SExprTokenizer(parens=\(aq()\(aq, strict=True) +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +A tokenizer that divides strings into s\-expressions. +An s\-expresion can be either: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +a parenthesized expression, including any nested parenthesized +expressions, or +.IP \(bu 2 +a sequence of non\-whitespace non\-parenthesis characters. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +For example, the string \fB(a (b c)) d e (f)\fP consists of four +s\-expressions: \fB(a (b c))\fP, \fBd\fP, \fBe\fP, and \fB(f)\fP\&. +.sp +By default, the characters \fB(\fP and \fB)\fP are treated as open and +close parentheses, but alternative strings may be specified. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBparens\fP (\fIstr\fP\fI or \fP\fIlist\fP) \-\- A two\-element sequence specifying the open and close parentheses +that should be used to find sexprs. This will typically be either a +two\-character string, or a list of two strings. +.IP \(bu 2 +\fBstrict\fP \-\- If true, then raise an exception when tokenizing an ill\-formed sexpr. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(text) +Return a list of s\-expressions extracted from \fItext\fP\&. +For example: +.sp +.nf +.ft C +>>> SExprTokenizer().tokenize(\(aq(a b (c d)) e f (g)\(aq) +[\(aq(a b (c d))\(aq, \(aqe\(aq, \(aqf\(aq, \(aq(g)\(aq] +.ft P +.fi +.sp +All parentheses are assumed to mark s\-expressions. +(No special processing is done to exclude parentheses that occur +inside strings, or following backslash characters.) +.sp +If the given expression contains non\-matching parentheses, +then the behavior of the tokenizer depends on the \fBstrict\fP +parameter to the constructor. If \fBstrict\fP is \fBTrue\fP, then +raise a \fBValueError\fP\&. If \fBstrict\fP is \fBFalse\fP, then any +unmatched close parentheses will be listed as their own +s\-expression; and the last partial s\-expression with unmatched open +parentheses will be listed as its own s\-expression: +.sp +.nf +.ft C +>>> SExprTokenizer(strict=False).tokenize(\(aqc) d) e (f (g\(aq) +[\(aqc\(aq, \(aq)\(aq, \(aqd\(aq, \(aq)\(aq, \(aqe\(aq, \(aq(f (g\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBtext\fP (\fIstr\fP\fI or \fP\fIiter\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- the string to be tokenized +.TP +.B Return type +iter(str) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.sexpr.sexpr_tokenize(text) +Return a list of s\-expressions extracted from \fItext\fP\&. +For example: +.sp +.nf +.ft C +>>> SExprTokenizer().tokenize(\(aq(a b (c d)) e f (g)\(aq) +[\(aq(a b (c d))\(aq, \(aqe\(aq, \(aqf\(aq, \(aq(g)\(aq] +.ft P +.fi +.sp +All parentheses are assumed to mark s\-expressions. +(No special processing is done to exclude parentheses that occur +inside strings, or following backslash characters.) +.sp +If the given expression contains non\-matching parentheses, +then the behavior of the tokenizer depends on the \fBstrict\fP +parameter to the constructor. If \fBstrict\fP is \fBTrue\fP, then +raise a \fBValueError\fP\&. If \fBstrict\fP is \fBFalse\fP, then any +unmatched close parentheses will be listed as their own +s\-expression; and the last partial s\-expression with unmatched open +parentheses will be listed as its own s\-expression: +.sp +.nf +.ft C +>>> SExprTokenizer(strict=False).tokenize(\(aqc) d) e (f (g\(aq) +[\(aqc\(aq, \(aq)\(aq, \(aqd\(aq, \(aq)\(aq, \(aqe\(aq, \(aq(f (g\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBtext\fP (\fIstr\fP\fI or \fP\fIiter\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- the string to be tokenized +.TP +.B Return type +iter(str) +.UNINDENT +.UNINDENT +.SS nltk.tokenize.simple module +.sp +Simple Tokenizers +.sp +These tokenizers divide strings into substrings using the string +\fBsplit()\fP method. +When tokenizing using a particular delimiter string, use +the string \fBsplit()\fP method directly, as this is more efficient. +.sp +The simple tokenizers are \fInot\fP available as separate functions; +instead, you should just use the string \fBsplit()\fP method directly: +.sp +.nf +.ft C +>>> s = "Good muffins cost $3.88\enin New York. Please buy me\entwo of them.\en\enThanks." +>>> s.split() +[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$3.88\(aq, \(aqin\(aq, \(aqNew\(aq, \(aqYork.\(aq, +\(aqPlease\(aq, \(aqbuy\(aq, \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem.\(aq, \(aqThanks.\(aq] +>>> s.split(\(aq \(aq) +[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$3.88\enin\(aq, \(aqNew\(aq, \(aqYork.\(aq, \(aq\(aq, +\(aqPlease\(aq, \(aqbuy\(aq, \(aqme\entwo\(aq, \(aqof\(aq, \(aqthem.\en\enThanks.\(aq] +>>> s.split(\(aq\en\(aq) +[\(aqGood muffins cost $3.88\(aq, \(aqin New York. Please buy me\(aq, +\(aqtwo of them.\(aq, \(aq\(aq, \(aqThanks.\(aq] +.ft P +.fi +.sp +The simple tokenizers are mainly useful because they follow the +standard \fBTokenizerI\fP interface, and so can be used with any code +that expects a tokenizer. For example, these tokenizers can be used +to specify the tokenization conventions when building a \fICorpusReader\fP\&. +.INDENT 0.0 +.TP +.B class nltk.tokenize.simple.CharTokenizer +Bases: \fI\%nltk.tokenize.api.StringTokenizer\fP +.sp +Tokenize a string into individual characters. If this functionality +is ever required directly, use \fBfor char in string\fP\&. +.INDENT 7.0 +.TP +.B span_tokenize(s) +Identify the tokens using integer offsets \fB(start_i, end_i)\fP, +where \fBs[start_i:end_i]\fP is the corresponding token. +.INDENT 7.0 +.TP +.B Return type +iter(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(s) +Return a tokenized copy of \fIs\fP\&. +.INDENT 7.0 +.TP +.B Return type +list of str +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.simple.LineTokenizer(blanklines=\(aqdiscard\(aq) +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +Tokenize a string into its lines, optionally discarding blank lines. +This is similar to \fBs.split(\(aq\en\(aq)\fP\&. +.sp +.nf +.ft C +>>> from nltk.tokenize import LineTokenizer +>>> s = "Good muffins cost $3.88\enin New York. Please buy me\entwo of them.\en\enThanks." +>>> LineTokenizer(blanklines=\(aqkeep\(aq).tokenize(s) +[\(aqGood muffins cost $3.88\(aq, \(aqin New York. Please buy me\(aq, +\(aqtwo of them.\(aq, \(aq\(aq, \(aqThanks.\(aq] +>>> # same as [l for l in s.split(\(aq\en\(aq) if l.strip()]: +>>> LineTokenizer(blanklines=\(aqdiscard\(aq).tokenize(s) +[\(aqGood muffins cost $3.88\(aq, \(aqin New York. Please buy me\(aq, +\(aqtwo of them.\(aq, \(aqThanks.\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBblanklines\fP \-\- +.sp +Indicates how blank lines should be handled. Valid values are: +.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \fBdiscard\fP: strip blank lines out of the token list before returning it. +A line is considered blank if it contains only whitespace characters. +.UNINDENT +.IP \(bu 2 +\fBkeep\fP: leave all blank lines in the token list. +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \fBdiscard\-eof\fP: if the string ends with a newline, then do not generate +a corresponding token \fB\(aq\(aq\fP after that newline. +.UNINDENT +.UNINDENT + +.UNINDENT +.INDENT 7.0 +.TP +.B span_tokenize(s) +Identify the tokens using integer offsets \fB(start_i, end_i)\fP, +where \fBs[start_i:end_i]\fP is the corresponding token. +.INDENT 7.0 +.TP +.B Return type +iter(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(s) +Return a tokenized copy of \fIs\fP\&. +.INDENT 7.0 +.TP +.B Return type +list of str +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.simple.SpaceTokenizer +Bases: \fI\%nltk.tokenize.api.StringTokenizer\fP +.sp +Tokenize a string using the space character as a delimiter, +which is the same as \fBs.split(\(aq \(aq)\fP\&. +.sp +.nf +.ft C +>>> from nltk.tokenize import SpaceTokenizer +>>> s = "Good muffins cost $3.88\enin New York. Please buy me\entwo of them.\en\enThanks." +>>> SpaceTokenizer().tokenize(s) +[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$3.88\enin\(aq, \(aqNew\(aq, \(aqYork.\(aq, \(aq\(aq, +\(aqPlease\(aq, \(aqbuy\(aq, \(aqme\entwo\(aq, \(aqof\(aq, \(aqthem.\en\enThanks.\(aq] +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.simple.TabTokenizer +Bases: \fI\%nltk.tokenize.api.StringTokenizer\fP +.sp +Tokenize a string use the tab character as a delimiter, +the same as \fBs.split(\(aq\et\(aq)\fP\&. +.sp +.nf +.ft C +>>> from nltk.tokenize import TabTokenizer +>>> TabTokenizer().tokenize(\(aqa\etb c\en\et d\(aq) +[\(aqa\(aq, \(aqb c\en\(aq, \(aq d\(aq] +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.simple.line_tokenize(text, blanklines=\(aqdiscard\(aq) +.UNINDENT +.SS nltk.tokenize.sonority_sequencing module +.sp +The Sonority Sequencing Principle (SSP) is a language agnostic algorithm proposed +by Otto Jesperson in 1904. The sonorous quality of a phoneme is judged by the +openness of the lips. Syllable breaks occur before troughs in sonority. For more +on the SSP see Selkirk (1984). +.sp +The default implementation uses the English alphabet, but the \fIsonority_hiearchy\fP +can be modified to IPA or any other alphabet for the use\-case. The SSP is a +universal syllabification algorithm, but that does not mean it performs equally +across languages. Bartlett et al. (2009) is a good benchmark for English accuracy +if utilizing IPA (pg. 311). +.sp +Importantly, if a custom hierarchy is supplied and vowels span across more than +one level, they should be given separately to the \fIvowels\fP class attribute. +.sp +References: +\- Otto Jespersen. 1904. Lehrbuch der Phonetik. +.INDENT 0.0 +.INDENT 3.5 +Leipzig, Teubner. Chapter 13, Silbe, pp. 185\-203. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.IP \(bu 2 +Elisabeth Selkirk. 1984. On the major class features and syllable theory. +In Aronoff & Oehrle (eds.) Language Sound Structure: Studies in Phonology. +Cambridge, MIT Press. pp. 107\-136. +.IP \(bu 2 +Susan Bartlett, et al. 2009. On the Syllabification of Phonemes. +In HLT\-NAACL. pp. 308\-316. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.sonority_sequencing.SyllableTokenizer(lang=\(aqen\(aq, sonority_hierarchy=False) +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +Syllabifies words based on the Sonority Sequencing Principle (SSP). +.sp +.nf +.ft C +>>> from nltk.tokenize import SyllableTokenizer +>>> from nltk import word_tokenize +>>> SSP = SyllableTokenizer() +>>> SSP.tokenize(\(aqjustification\(aq) +[\(aqjus\(aq, \(aqti\(aq, \(aqfi\(aq, \(aqca\(aq, \(aqtion\(aq] +>>> text = "This is a foobar\-like sentence." +>>> [SSP.tokenize(token) for token in word_tokenize(text)] +[[\(aqThis\(aq], [\(aqis\(aq], [\(aqa\(aq], [\(aqfoo\(aq, \(aqbar\(aq, \(aq\-\(aq, \(aqli\(aq, \(aqke\(aq], [\(aqsen\(aq, \(aqten\(aq, \(aqce\(aq], [\(aq.\(aq]] +.ft P +.fi +.INDENT 7.0 +.TP +.B assign_values(token) +Assigns each phoneme its value from the sonority hierarchy. +Note: Sentence/text has to be tokenized first. +.INDENT 7.0 +.TP +.B Parameters +\fBtoken\fP (\fIstr\fP) \-\- Single word or token +.TP +.B Returns +List of tuples, first element is character/phoneme and +second is the soronity value. +.TP +.B Return type +list(tuple(str, int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(token) +Apply the SSP to return a list of syllables. +Note: Sentence/text has to be tokenized first. +.INDENT 7.0 +.TP +.B Parameters +\fBtoken\fP (\fIstr\fP) \-\- Single word or token +.TP +.B Return syllable_list +Single word or token broken up into syllables. +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B validate_syllables(syllable_list) +Ensures each syllable has at least one vowel. +If the following syllable doesn\(aqt have vowel, add it to the current one. +.INDENT 7.0 +.TP +.B Parameters +\fBsyllable_list\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- Single word or token broken up into syllables. +.TP +.B Returns +Single word or token broken up into syllables +(with added syllables if necessary) +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.tokenize.stanford module +.INDENT 0.0 +.TP +.B class nltk.tokenize.stanford.StanfordTokenizer(path_to_jar=None, encoding=\(aqutf8\(aq, options=None, verbose=False, java_options=\(aq\-mx1000m\(aq) +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +Interface to the Stanford Tokenizer +.sp +.nf +.ft C +>>> from nltk.tokenize.stanford import StanfordTokenizer +>>> s = "Good muffins cost $3.88\enin New York. Please buy me\entwo of them.\enThanks." +>>> StanfordTokenizer().tokenize(s) +[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$\(aq, \(aq3.88\(aq, \(aqin\(aq, \(aqNew\(aq, \(aqYork\(aq, \(aq.\(aq, \(aqPlease\(aq, \(aqbuy\(aq, \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem\(aq, \(aq.\(aq, \(aqThanks\(aq, \(aq.\(aq] +>>> s = "The colour of the wall is blue." +>>> StanfordTokenizer(options={"americanize": True}).tokenize(s) +[\(aqThe\(aq, \(aqcolor\(aq, \(aqof\(aq, \(aqthe\(aq, \(aqwall\(aq, \(aqis\(aq, \(aqblue\(aq, \(aq.\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B tokenize(s) +Use stanford tokenizer\(aqs PTBTokenizer to tokenize multiple sentences. +.UNINDENT +.UNINDENT +.SS nltk.tokenize.stanford_segmenter module +.INDENT 0.0 +.TP +.B class nltk.tokenize.stanford_segmenter.StanfordSegmenter(path_to_jar=None, path_to_slf4j=None, java_class=None, path_to_model=None, path_to_dict=None, path_to_sihan_corpora_dict=None, sihan_post_processing=\(aqfalse\(aq, keep_whitespaces=\(aqfalse\(aq, encoding=\(aqUTF\-8\(aq, options=None, verbose=False, java_options=\(aq\-mx2g\(aq) +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +Interface to the Stanford Segmenter +.sp +If stanford\-segmenter version is older than 2016\-10\-31, then path_to_slf4j +should be provieded, for example: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +seg = StanfordSegmenter(path_to_slf4j=\(aq/YOUR_PATH/slf4j\-api.jar\(aq) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.tokenize.stanford_segmenter import StanfordSegmenter +>>> seg = StanfordSegmenter() +>>> seg.default_config(\(aqzh\(aq) +>>> sent = u\(aq这是斯坦福中文分词器测试\(aq +>>> print(seg.segment(sent)) +这 是 斯坦福 中文 分词器 测试 + +>>> seg.default_config(\(aqar\(aq) +>>> sent = u\(aqهذا هو تصنيف ستانفورد العربي للكلمات\(aq +>>> print(seg.segment(sent.split())) +هذا هو تصنيف ستانفورد العربي ل الكلمات + +.ft P +.fi +.INDENT 7.0 +.TP +.B default_config(lang) +Attempt to initialize Stanford Word Segmenter for the specified language +using the STANFORD_SEGMENTER and STANFORD_MODELS environment variables +.UNINDENT +.INDENT 7.0 +.TP +.B segment(tokens) +.UNINDENT +.INDENT 7.0 +.TP +.B segment_file(input_file_path) +.UNINDENT +.INDENT 7.0 +.TP +.B segment_sents(sentences) +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(s) +Return a tokenized copy of \fIs\fP\&. +.INDENT 7.0 +.TP +.B Return type +list of str +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.tokenize.texttiling module +.INDENT 0.0 +.TP +.B class nltk.tokenize.texttiling.TextTilingTokenizer(w=20, k=10, similarity_method=0, stopwords=None, smoothing_method=[0], smoothing_width=2, smoothing_rounds=1, cutoff_policy=1, demo_mode=False) +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +Tokenize a document into topical sections using the TextTiling algorithm. +This algorithm detects subtopic shifts based on the analysis of lexical +co\-occurrence patterns. +.sp +The process starts by tokenizing the text into pseudosentences of +a fixed size w. Then, depending on the method used, similarity +scores are assigned at sentence gaps. The algorithm proceeds by +detecting the peak differences between these scores and marking +them as boundaries. The boundaries are normalized to the closest +paragraph break and the segmented text is returned. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBw\fP (\fIint\fP) \-\- Pseudosentence size +.IP \(bu 2 +\fBk\fP (\fIint\fP) \-\- Size (in sentences) of the block used in the block comparison method +.IP \(bu 2 +\fBsimilarity_method\fP (\fIconstant\fP) \-\- The method used for determining similarity scores: +\fIBLOCK_COMPARISON\fP (default) or \fIVOCABULARY_INTRODUCTION\fP\&. +.IP \(bu 2 +\fBstopwords\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A list of stopwords that are filtered out (defaults to NLTK\(aqs stopwords corpus) +.IP \(bu 2 +\fBsmoothing_method\fP (\fIconstant\fP) \-\- The method used for smoothing the score plot: +\fIDEFAULT_SMOOTHING\fP (default) +.IP \(bu 2 +\fBsmoothing_width\fP (\fIint\fP) \-\- The width of the window used by the smoothing method +.IP \(bu 2 +\fBsmoothing_rounds\fP (\fIint\fP) \-\- The number of smoothing passes +.IP \(bu 2 +\fBcutoff_policy\fP (\fIconstant\fP) \-\- The policy used to determine the number of boundaries: +\fIHC\fP (default) or \fILC\fP +.UNINDENT +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.corpus import brown +>>> tt = TextTilingTokenizer(demo_mode=True) +>>> text = brown.raw()[:4000] +>>> s, ss, d, b = tt.tokenize(text) +>>> b +[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0] +.ft P +.fi +.INDENT 7.0 +.TP +.B tokenize(text) +Return a tokenized copy of \fItext\fP, where each "token" represents +a separate topic. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.texttiling.TokenSequence(index, wrdindex_list, original_length=None) +Bases: \fBobject\fP +.sp +A token list with its original length and its index +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.texttiling.TokenTableField(first_pos, ts_occurences, total_count=1, par_count=1, last_par=0, last_tok_seq=None) +Bases: \fBobject\fP +.sp +A field in the token table holding parameters for each token, +used later in the process +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.texttiling.demo(text=None) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.texttiling.smooth(x, window_len=11, window=\(aqflat\(aq) +smooth the data using a window with requested size. +.sp +This method is based on the convolution of a scaled window with the signal. +The signal is prepared by introducing reflected copies of the signal +(with the window size) in both ends so that transient parts are minimized +in the beginning and end part of the output signal. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBx\fP \-\- the input signal +.IP \(bu 2 +\fBwindow_len\fP \-\- the dimension of the smoothing window; should be an odd integer +.IP \(bu 2 +\fBwindow\fP \-\- the type of window from \(aqflat\(aq, \(aqhanning\(aq, \(aqhamming\(aq, \(aqbartlett\(aq, \(aqblackman\(aq +flat window will produce a moving average smoothing. +.UNINDENT +.TP +.B Returns +the smoothed signal +.UNINDENT +.sp +example: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +t=linspace(\-2,2,0.1) +x=sin(t)+randn(len(t))*0.1 +y=smooth(x) +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B See also +numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve, +scipy.signal.lfilter +.UNINDENT +.sp +TODO: the window parameter could be the window itself if an array instead of a string +.UNINDENT +.SS nltk.tokenize.toktok module +.sp +The tok\-tok tokenizer is a simple, general tokenizer, where the input has one +sentence per line; thus only final period is tokenized. +.sp +Tok\-tok has been tested on, and gives reasonably good results for English, +Persian, Russian, Czech, French, German, Vietnamese, Tajik, and a few others. +The input should be in UTF\-8 encoding. +.sp +Reference: +Jon Dehdari. 2014. A Neurophysiologically\-Inspired Statistical Language +Model (Doctoral dissertation). Columbus, OH, USA: The Ohio State University. +.INDENT 0.0 +.TP +.B class nltk.tokenize.toktok.ToktokTokenizer +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +This is a Python port of the tok\-tok.pl from +\fI\%https://github.com/jonsafari/tok\-tok/blob/master/tok\-tok.pl\fP +.sp +.nf +.ft C +>>> toktok = ToktokTokenizer() +>>> text = u\(aqIs 9.5 or 525,600 my favorite number?\(aq +>>> print(toktok.tokenize(text, return_str=True)) +Is 9.5 or 525,600 my favorite number ? +>>> text = u\(aqThe https://github.com/jonsafari/tok\-tok/blob/master/tok\-tok.pl is a website with/and/or slashes and sort of weird : things\(aq +>>> print(toktok.tokenize(text, return_str=True)) +The https://github.com/jonsafari/tok\-tok/blob/master/tok\-tok.pl is a website with/and/or slashes and sort of weird : things +>>> text = u\(aq¡This, is a sentence with weird» symbols… appearing everywhere¿\(aq +>>> expected = u\(aq¡ This , is a sentence with weird » symbols … appearing everywhere ¿\(aq +>>> assert toktok.tokenize(text, return_str=True) == expected +>>> toktok.tokenize(text) == [u\(aq¡\(aq, u\(aqThis\(aq, u\(aq,\(aq, u\(aqis\(aq, u\(aqa\(aq, u\(aqsentence\(aq, u\(aqwith\(aq, u\(aqweird\(aq, u\(aq»\(aq, u\(aqsymbols\(aq, u\(aq…\(aq, u\(aqappearing\(aq, u\(aqeverywhere\(aq, u\(aq¿\(aq] +True +.ft P +.fi +.INDENT 7.0 +.TP +.B AMPERCENT = (re.compile(\(aq& \(aq), \(aq& \(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B CLOSE_PUNCT = \(aq)]}༻༽᚜⁆⁾₎〉❩❫❭❯❱❳❵⟆⟧⟩⟫⟭⟯⦄⦆⦈⦊⦌⦎⦐⦒⦔⦖⦘⧙⧛⧽⸣⸥⸧⸩〉》」』】〕〗〙〛〞〟﴿︘︶︸︺︼︾﹀﹂﹄﹈﹚﹜﹞)]}⦆」\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B CLOSE_PUNCT_RE = (re.compile(\(aq([)]}༻༽᚜⁆⁾₎〉❩❫❭❯❱❳❵⟆⟧⟩⟫⟭⟯⦄⦆⦈⦊⦌⦎⦐⦒⦔⦖⦘⧙⧛⧽⸣⸥⸧⸩〉》」』】〕〗〙〛〞〟﴿︘︶︸︺︼︾﹀﹂﹄﹈﹚﹜﹞)]}⦆」])\(aq), \(aq\e\e1 \(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B COMMA_IN_NUM = (re.compile(\(aq(?])s([:;,.])\(aq removes the additional right padding added +to the closing parentheses precedding [:;,.]. +.UNINDENT +.UNINDENT +.IP \(bu 2 +It\(aqs not possible to return the original whitespaces as they were because +there wasn\(aqt explicit records of where \(aqn\(aq, \(aqt\(aq or \(aqs\(aq were removed at +the text.split() operation. +.sp +.nf +.ft C +>>> from nltk.tokenize.treebank import TreebankWordTokenizer, TreebankWordDetokenizer +>>> s = \(aq\(aq\(aqGood muffins cost $3.88\enin New York. Please buy me\entwo of them.\enThanks.\(aq\(aq\(aq +>>> d = TreebankWordDetokenizer() +>>> t = TreebankWordTokenizer() +>>> toks = t.tokenize(s) +>>> d.detokenize(toks) +\(aqGood muffins cost $3.88 in New York. Please buy me two of them. Thanks.\(aq +.ft P +.fi +.UNINDENT +.sp +The MXPOST parentheses substitution can be undone using the \fIconvert_parentheses\fP +parameter: +.sp +.nf +.ft C +>>> s = \(aq\(aq\(aqGood muffins cost $3.88\enin New (York). Please (buy) me\entwo of them.\en(Thanks).\(aq\(aq\(aq +>>> expected_tokens = [\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$\(aq, \(aq3.88\(aq, \(aqin\(aq, +\&... \(aqNew\(aq, \(aq\-LRB\-\(aq, \(aqYork\(aq, \(aq\-RRB\-\(aq, \(aq.\(aq, \(aqPlease\(aq, \(aq\-LRB\-\(aq, \(aqbuy\(aq, +\&... \(aq\-RRB\-\(aq, \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem.\(aq, \(aq\-LRB\-\(aq, \(aqThanks\(aq, \(aq\-RRB\-\(aq, \(aq.\(aq] +>>> expected_tokens == t.tokenize(s, convert_parentheses=True) +True +>>> expected_detoken = \(aqGood muffins cost $3.88 in New (York). Please (buy) me two of them. (Thanks).\(aq +>>> expected_detoken == d.detokenize(t.tokenize(s, convert_parentheses=True), convert_parentheses=True) +True +.ft P +.fi +.sp +During tokenization it\(aqs safe to add more spaces but during detokenization, +simply undoing the padding doesn\(aqt really help. +.INDENT 7.0 +.IP \(bu 2 +During tokenization, left and right pad is added to [!?], when +detokenizing, only left shift the [!?] is needed. +Thus (re.compile(r\(aqs([?!])\(aq), r\(aqg<1>\(aq) +.IP \(bu 2 +During tokenization [:,] are left and right padded but when detokenizing, +only left shift is necessary and we keep right pad after comma/colon +if the string after is a non\-digit. +Thus (re.compile(r\(aqs([:,])s([^d])\(aq), r\(aq1 2\(aq) +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.tokenize.treebank import TreebankWordDetokenizer +>>> toks = [\(aqhello\(aq, \(aq,\(aq, \(aqi\(aq, \(aqca\(aq, "n\(aqt", \(aqfeel\(aq, \(aqmy\(aq, \(aqfeet\(aq, \(aq!\(aq, \(aqHelp\(aq, \(aq!\(aq, \(aq!\(aq] +>>> twd = TreebankWordDetokenizer() +>>> twd.detokenize(toks) +"hello, i can\(aqt feel my feet! Help!!" +.ft P +.fi +.sp +.nf +.ft C +>>> toks = [\(aqhello\(aq, \(aq,\(aq, \(aqi\(aq, "can\(aqt", \(aqfeel\(aq, \(aq;\(aq, \(aqmy\(aq, \(aqfeet\(aq, \(aq!\(aq, +\&... \(aqHelp\(aq, \(aq!\(aq, \(aq!\(aq, \(aqHe\(aq, \(aqsaid\(aq, \(aq:\(aq, \(aqHelp\(aq, \(aq,\(aq, \(aqhelp\(aq, \(aq?\(aq, \(aq!\(aq] +>>> twd.detokenize(toks) +"hello, i can\(aqt feel; my feet! Help!! He said: Help, help?!" +.ft P +.fi +.INDENT 7.0 +.TP +.B CONTRACTIONS2 = [re.compile(\(aq(?i)\e\eb(can)\e\es(not)\e\eb\(aq, re.IGNORECASE), re.compile("(?i)\e\eb(d)\e\es(\(aqye)\e\eb", re.IGNORECASE), re.compile(\(aq(?i)\e\eb(gim)\e\es(me)\e\eb\(aq, re.IGNORECASE), re.compile(\(aq(?i)\e\eb(gon)\e\es(na)\e\eb\(aq, re.IGNORECASE), re.compile(\(aq(?i)\e\eb(got)\e\es(ta)\e\eb\(aq, re.IGNORECASE), re.compile(\(aq(?i)\e\eb(lem)\e\es(me)\e\eb\(aq, re.IGNORECASE), re.compile("(?i)\e\eb(more)\e\es(\(aqn)\e\eb", re.IGNORECASE), re.compile(\(aq(?i)\e\eb(wan)\e\es(na)\e\es\(aq, re.IGNORECASE)] +.UNINDENT +.INDENT 7.0 +.TP +.B CONTRACTIONS3 = [re.compile("(?i) (\(aqt)\e\es(is)\e\eb", re.IGNORECASE), re.compile("(?i) (\(aqt)\e\es(was)\e\eb", re.IGNORECASE)] +.UNINDENT +.INDENT 7.0 +.TP +.B CONVERT_PARENTHESES = [(re.compile(\(aq\-LRB\-\(aq), \(aq(\(aq), (re.compile(\(aq\-RRB\-\(aq), \(aq)\(aq), (re.compile(\(aq\-LSB\-\(aq), \(aq[\(aq), (re.compile(\(aq\-RSB\-\(aq), \(aq]\(aq), (re.compile(\(aq\-LCB\-\(aq), \(aq{\(aq), (re.compile(\(aq\-RCB\-\(aq), \(aq}\(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B DOUBLE_DASHES = (re.compile(\(aq \-\- \(aq), \(aq\-\-\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B ENDING_QUOTES = [(re.compile("([^\(aq ])\e\es(\(aqll|\(aqLL|\(aqre|\(aqRE|\(aqve|\(aqVE|n\(aqt|N\(aqT) "), \(aq\e\e1\e\e2 \(aq), (re.compile("([^\(aq ])\e\es(\(aq[sS]|\(aq[mM]|\(aq[dD]|\(aq) "), \(aq\e\e1\e\e2 \(aq), (re.compile("(\e\eS)\e\es(\e\e\(aq\e\e\(aq)"), \(aq\e\e1\e\e2\(aq), (re.compile("(\e\e\(aq\e\e\(aq)\e\es([.,:)\e\e]>};%])"), \(aq\e\e1\e\e2\(aq), (re.compile("\(aq\(aq"), \(aq"\(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B PARENS_BRACKETS = [(re.compile(\(aq([\e\e[\e\e(\e\e{\e\e<])\e\es\(aq), \(aq\e\eg<1>\(aq), (re.compile(\(aq\e\es([\e\e]\e\e)\e\e}\e\e>])\(aq), \(aq\e\eg<1>\(aq), (re.compile(\(aq([\e\e]\e\e)\e\e}\e\e>])\e\es([:;,.])\(aq), \(aq\e\e1\e\e2\(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B PUNCTUATION = [(re.compile("([^\(aq])\e\es\(aq\e\es"), "\e\e1\(aq "), (re.compile(\(aq\e\es([?!])\(aq), \(aq\e\eg<1>\(aq), (re.compile(\(aq([^\e\e.])\e\es(\e\e.)([\e\e]\e\e)}>"\e\e\e\(aq]*)\e\es*$\(aq), \(aq\e\e1\e\e2\e\e3\(aq), (re.compile(\(aq([#$])\e\es\(aq), \(aq\e\eg<1>\(aq), (re.compile(\(aq\e\es([;%])\(aq), \(aq\e\eg<1>\(aq), (re.compile(\(aq\e\es\e\e.\e\e.\e\e.\e\es\(aq), \(aq...\(aq), (re.compile(\(aq\e\es([:,])\(aq), \(aq\e\e1\(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B STARTING_QUOTES = [(re.compile(\(aq([ (\e\e[{<])\e\es\(ga\(ga\(aq), \(aq\e\e1\(ga\(ga\(aq), (re.compile(\(aq(\(ga\(ga)\e\es\(aq), \(aq\e\e1\(aq), (re.compile(\(aq\(ga\(ga\(aq), \(aq"\(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B detokenize(tokens, convert_parentheses=False) +Duck\-typing the abstract \fItokenize()\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(tokens, convert_parentheses=False) +Treebank detokenizer, created by undoing the regexes from +the TreebankWordTokenizer.tokenize. +.INDENT 7.0 +.TP +.B Parameters +\fBtokens\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A list of strings, i.e. tokenized text. +.TP +.B Returns +str +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tokenize.treebank.TreebankWordTokenizer +Bases: \fI\%nltk.tokenize.api.TokenizerI\fP +.sp +The Treebank tokenizer uses regular expressions to tokenize text as in Penn Treebank. +This is the method that is invoked by \fBword_tokenize()\fP\&. It assumes that the +text has already been segmented into sentences, e.g. using \fBsent_tokenize()\fP\&. +.sp +This tokenizer performs the following steps: +.INDENT 7.0 +.IP \(bu 2 +split standard contractions, e.g. \fBdon\(aqt\fP \-> \fBdo n\(aqt\fP and \fBthey\(aqll\fP \-> \fBthey \(aqll\fP +.IP \(bu 2 +treat most punctuation characters as separate tokens +.IP \(bu 2 +split off commas and single quotes, when followed by whitespace +.IP \(bu 2 +separate periods that appear at the end of line +.sp +.nf +.ft C +>>> from nltk.tokenize import TreebankWordTokenizer +>>> s = \(aq\(aq\(aqGood muffins cost $3.88\enin New York. Please buy me\entwo of them.\enThanks.\(aq\(aq\(aq +>>> TreebankWordTokenizer().tokenize(s) +[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$\(aq, \(aq3.88\(aq, \(aqin\(aq, \(aqNew\(aq, \(aqYork.\(aq, \(aqPlease\(aq, \(aqbuy\(aq, \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem.\(aq, \(aqThanks\(aq, \(aq.\(aq] +>>> s = "They\(aqll save and invest more." +>>> TreebankWordTokenizer().tokenize(s) +[\(aqThey\(aq, "\(aqll", \(aqsave\(aq, \(aqand\(aq, \(aqinvest\(aq, \(aqmore\(aq, \(aq.\(aq] +>>> s = "hi, my name can\(aqt hello," +>>> TreebankWordTokenizer().tokenize(s) +[\(aqhi\(aq, \(aq,\(aq, \(aqmy\(aq, \(aqname\(aq, \(aqca\(aq, "n\(aqt", \(aqhello\(aq, \(aq,\(aq] +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B CONTRACTIONS2 = [re.compile(\(aq(?i)\e\eb(can)(?#X)(not)\e\eb\(aq, re.IGNORECASE), re.compile("(?i)\e\eb(d)(?#X)(\(aqye)\e\eb", re.IGNORECASE), re.compile(\(aq(?i)\e\eb(gim)(?#X)(me)\e\eb\(aq, re.IGNORECASE), re.compile(\(aq(?i)\e\eb(gon)(?#X)(na)\e\eb\(aq, re.IGNORECASE), re.compile(\(aq(?i)\e\eb(got)(?#X)(ta)\e\eb\(aq, re.IGNORECASE), re.compile(\(aq(?i)\e\eb(lem)(?#X)(me)\e\eb\(aq, re.IGNORECASE), re.compile("(?i)\e\eb(more)(?#X)(\(aqn)\e\eb", re.IGNORECASE), re.compile(\(aq(?i)\e\eb(wan)(?#X)(na)\e\es\(aq, re.IGNORECASE)] +.UNINDENT +.INDENT 7.0 +.TP +.B CONTRACTIONS3 = [re.compile("(?i) (\(aqt)(?#X)(is)\e\eb", re.IGNORECASE), re.compile("(?i) (\(aqt)(?#X)(was)\e\eb", re.IGNORECASE)] +.UNINDENT +.INDENT 7.0 +.TP +.B CONVERT_PARENTHESES = [(re.compile(\(aq\e\e(\(aq), \(aq\-LRB\-\(aq), (re.compile(\(aq\e\e)\(aq), \(aq\-RRB\-\(aq), (re.compile(\(aq\e\e[\(aq), \(aq\-LSB\-\(aq), (re.compile(\(aq\e\e]\(aq), \(aq\-RSB\-\(aq), (re.compile(\(aq\e\e{\(aq), \(aq\-LCB\-\(aq), (re.compile(\(aq\e\e}\(aq), \(aq\-RCB\-\(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B DOUBLE_DASHES = (re.compile(\(aq\-\-\(aq), \(aq \-\- \(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B ENDING_QUOTES = [(re.compile(\(aq"\(aq), " \(aq\(aq "), (re.compile("(\e\eS)(\e\e\(aq\e\e\(aq)"), \(aq\e\e1 \e\e2 \(aq), (re.compile("([^\(aq ])(\(aq[sS]|\(aq[mM]|\(aq[dD]|\(aq) "), \(aq\e\e1 \e\e2 \(aq), (re.compile("([^\(aq ])(\(aqll|\(aqLL|\(aqre|\(aqRE|\(aqve|\(aqVE|n\(aqt|N\(aqT) "), \(aq\e\e1 \e\e2 \(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B PARENS_BRACKETS = (re.compile(\(aq[\e\e]\e\e[\e\e(\e\e)\e\e{\e\e}\e\e<\e\e>]\(aq), \(aq \e\eg<0> \(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B PUNCTUATION = [(re.compile(\(aq([:,])([^\e\ed])\(aq), \(aq \e\e1 \e\e2\(aq), (re.compile(\(aq([:,])$\(aq), \(aq \e\e1 \(aq), (re.compile(\(aq\e\e.\e\e.\e\e.\(aq), \(aq ... \(aq), (re.compile(\(aq[;@#$%&]\(aq), \(aq \e\eg<0> \(aq), (re.compile(\(aq([^\e\e.])(\e\e.)([\e\e]\e\e)}>"\e\e\e\(aq]*)\e\es*$\(aq), \(aq\e\e1 \e\e2\e\e3 \(aq), (re.compile(\(aq[?!]\(aq), \(aq \e\eg<0> \(aq), (re.compile("([^\(aq])\(aq "), "\e\e1 \(aq ")] +.UNINDENT +.INDENT 7.0 +.TP +.B STARTING_QUOTES = [(re.compile(\(aq^\e\e"\(aq), \(aq\(ga\(ga\(aq), (re.compile(\(aq(\(ga\(ga)\(aq), \(aq \e\e1 \(aq), (re.compile(\(aq([ \e\e(\e\e[{<])(\e\e"|\e\e\e\(aq{2})\(aq), \(aq\e\e1 \(ga\(ga \(aq)] +.UNINDENT +.INDENT 7.0 +.TP +.B span_tokenize(text) +Uses the post\-hoc nltk.tokens.align_tokens to return the offset spans. +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +>>> from nltk.tokenize import TreebankWordTokenizer +>>> s = \(aq\(aq\(aqGood muffins cost $3.88\enin New (York). Please (buy) me\entwo of them.\en(Thanks).\(aq\(aq\(aq +>>> expected = [(0, 4), (5, 12), (13, 17), (18, 19), (19, 23), +\&... (24, 26), (27, 30), (31, 32), (32, 36), (36, 37), (37, 38), +\&... (40, 46), (47, 48), (48, 51), (51, 52), (53, 55), (56, 59), +\&... (60, 62), (63, 68), (69, 70), (70, 76), (76, 77), (77, 78)] +>>> list(TreebankWordTokenizer().span_tokenize(s)) == expected +True +>>> expected = [\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$\(aq, \(aq3.88\(aq, \(aqin\(aq, +\&... \(aqNew\(aq, \(aq(\(aq, \(aqYork\(aq, \(aq)\(aq, \(aq.\(aq, \(aqPlease\(aq, \(aq(\(aq, \(aqbuy\(aq, \(aq)\(aq, +\&... \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem.\(aq, \(aq(\(aq, \(aqThanks\(aq, \(aq)\(aq, \(aq.\(aq] +>>> [s[start:end] for start, end in TreebankWordTokenizer().span_tokenize(s)] == expected +True +.ft P +.fi +.sp +Additional example +>>> from nltk.tokenize import TreebankWordTokenizer +>>> s = \(aq\(aq\(aqI said, "I\(aqd like to buy some \(aq\(aqgood muffins" which cost $3.88n each in New (York)."\(aq\(aq\(aq +>>> expected = [(0, 1), (2, 6), (6, 7), (8, 9), (9, 10), (10, 12), +\&... (13, 17), (18, 20), (21, 24), (25, 29), (30, 32), (32, 36), +\&... (37, 44), (44, 45), (46, 51), (52, 56), (57, 58), (58, 62), +\&... (64, 68), (69, 71), (72, 75), (76, 77), (77, 81), (81, 82), +\&... (82, 83), (83, 84)] +>>> list(TreebankWordTokenizer().span_tokenize(s)) == expected +True +>>> expected = [\(aqI\(aq, \(aqsaid\(aq, \(aq,\(aq, \(aq"\(aq, \(aqI\(aq, "\(aqd", \(aqlike\(aq, \(aqto\(aq, +\&... \(aqbuy\(aq, \(aqsome\(aq, "\(aq\(aq", "good", \(aqmuffins\(aq, \(aq"\(aq, \(aqwhich\(aq, \(aqcost\(aq, +\&... \(aq$\(aq, \(aq3.88\(aq, \(aqeach\(aq, \(aqin\(aq, \(aqNew\(aq, \(aq(\(aq, \(aqYork\(aq, \(aq)\(aq, \(aq.\(aq, \(aq"\(aq] +>>> [s[start:end] for start, end in TreebankWordTokenizer().span_tokenize(s)] == expected +True +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tokenize(text, convert_parentheses=False, return_str=False) +Return a tokenized copy of \fIs\fP\&. +.INDENT 7.0 +.TP +.B Return type +list of str +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.tokenize.util module +.INDENT 0.0 +.TP +.B class nltk.tokenize.util.CJKChars +Bases: \fBobject\fP +.sp +An object that enumerates the code points of the CJK characters as listed on +\fI\%http://en.wikipedia.org/wiki/Basic_Multilingual_Plane#Basic_Multilingual_Plane\fP +.sp +This is a Python port of the CJK code point enumerations of Moses tokenizer: +\fI\%https://github.com/moses\-smt/mosesdecoder/blob/master/scripts/tokenizer/detokenizer.perl#L309\fP +.INDENT 7.0 +.TP +.B CJK_Compatibility_Forms = (65072, 65103) +.UNINDENT +.INDENT 7.0 +.TP +.B CJK_Compatibility_Ideographs = (63744, 64255) +.UNINDENT +.INDENT 7.0 +.TP +.B CJK_Radicals = (11904, 42191) +.UNINDENT +.INDENT 7.0 +.TP +.B Hangul_Jamo = (4352, 4607) +.UNINDENT +.INDENT 7.0 +.TP +.B Hangul_Syllables = (44032, 55215) +.UNINDENT +.INDENT 7.0 +.TP +.B Katakana_Hangul_Halfwidth = (65381, 65500) +.UNINDENT +.INDENT 7.0 +.TP +.B Phags_Pa = (43072, 43135) +.UNINDENT +.INDENT 7.0 +.TP +.B Supplementary_Ideographic_Plane = (131072, 196607) +.UNINDENT +.INDENT 7.0 +.TP +.B ranges = [(4352, 4607), (11904, 42191), (43072, 43135), (44032, 55215), (63744, 64255), (65072, 65103), (65381, 65500), (131072, 196607)] +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.util.align_tokens(tokens, sentence) +This module attempt to find the offsets of the tokens in \fIs\fP, as a sequence +of \fB(start, end)\fP tuples, given the tokens and also the source string. +.sp +.nf +.ft C +>>> from nltk.tokenize import TreebankWordTokenizer +>>> from nltk.tokenize.util import align_tokens +>>> s = str("The plane, bound for St Petersburg, crashed in Egypt\(aqs " +\&... "Sinai desert just 23 minutes after take\-off from Sharm el\-Sheikh " +\&... "on Saturday.") +>>> tokens = TreebankWordTokenizer().tokenize(s) +>>> expected = [(0, 3), (4, 9), (9, 10), (11, 16), (17, 20), (21, 23), +\&... (24, 34), (34, 35), (36, 43), (44, 46), (47, 52), (52, 54), +\&... (55, 60), (61, 67), (68, 72), (73, 75), (76, 83), (84, 89), +\&... (90, 98), (99, 103), (104, 109), (110, 119), (120, 122), +\&... (123, 131), (131, 132)] +>>> output = list(align_tokens(tokens, s)) +>>> len(tokens) == len(expected) == len(output) # Check that length of tokens and tuples are the same. +True +>>> expected == list(align_tokens(tokens, s)) # Check that the output is as expected. +True +>>> tokens == [s[start:end] for start, end in output] # Check that the slices of the string corresponds to the tokens. +True +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The list of strings that are the result of tokenization +.IP \(bu 2 +\fBsentence\fP (\fIstr\fP) \-\- The original string +.UNINDENT +.TP +.B Return type +list(tuple(int,int)) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.util.is_cjk(character) +Python port of Moses\(aq code to check for CJK character. +.sp +.nf +.ft C +>>> CJKChars().ranges +[(4352, 4607), (11904, 42191), (43072, 43135), (44032, 55215), (63744, 64255), (65072, 65103), (65381, 65500), (131072, 196607)] +>>> is_cjk(u\(aq㏾\(aq) +True +>>> is_cjk(u\(aq﹟\(aq) +False +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBcharacter\fP (\fIchar\fP) \-\- The character that needs to be checked. +.TP +.B Returns +bool +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.util.regexp_span_tokenize(s, regexp) +Return the offsets of the tokens in \fIs\fP, as a sequence of \fB(start, end)\fP +tuples, by splitting the string at each successive match of \fIregexp\fP\&. +.sp +.nf +.ft C +>>> from nltk.tokenize.util import regexp_span_tokenize +>>> s = \(aq\(aq\(aqGood muffins cost $3.88\enin New York. Please buy me +\&... two of them.\en\enThanks.\(aq\(aq\(aq +>>> list(regexp_span_tokenize(s, r\(aq\es\(aq)) +[(0, 4), (5, 12), (13, 17), (18, 23), (24, 26), (27, 30), (31, 36), +(38, 44), (45, 48), (49, 51), (52, 55), (56, 58), (59, 64), (66, 73)] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBs\fP (\fIstr\fP) \-\- the string to be tokenized +.IP \(bu 2 +\fBregexp\fP (\fIstr\fP) \-\- regular expression that matches token separators (must not be empty) +.UNINDENT +.TP +.B Return type +iter(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.util.spans_to_relative(spans) +Return a sequence of relative spans, given a sequence of spans. +.sp +.nf +.ft C +>>> from nltk.tokenize import WhitespaceTokenizer +>>> from nltk.tokenize.util import spans_to_relative +>>> s = \(aq\(aq\(aqGood muffins cost $3.88\enin New York. Please buy me +\&... two of them.\en\enThanks.\(aq\(aq\(aq +>>> list(spans_to_relative(WhitespaceTokenizer().span_tokenize(s))) +[(0, 4), (1, 7), (1, 4), (1, 5), (1, 2), (1, 3), (1, 5), (2, 6), +(1, 3), (1, 2), (1, 3), (1, 2), (1, 5), (2, 7)] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBspans\fP (\fIiter\fP\fI(\fP\fItuple\fP\fI(\fP\fIint\fP\fI, \fP\fIint\fP\fI)\fP\fI)\fP) \-\- a sequence of (start, end) offsets of the tokens +.TP +.B Return type +iter(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.util.string_span_tokenize(s, sep) +Return the offsets of the tokens in \fIs\fP, as a sequence of \fB(start, end)\fP +tuples, by splitting the string at each occurrence of \fIsep\fP\&. +.sp +.nf +.ft C +>>> from nltk.tokenize.util import string_span_tokenize +>>> s = \(aq\(aq\(aqGood muffins cost $3.88\enin New York. Please buy me +\&... two of them.\en\enThanks.\(aq\(aq\(aq +>>> list(string_span_tokenize(s, " ")) +[(0, 4), (5, 12), (13, 17), (18, 26), (27, 30), (31, 36), (37, 37), +(38, 44), (45, 48), (49, 55), (56, 58), (59, 73)] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBs\fP (\fIstr\fP) \-\- the string to be tokenized +.IP \(bu 2 +\fBsep\fP (\fIstr\fP) \-\- the token separator +.UNINDENT +.TP +.B Return type +iter(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.util.xml_escape(text) +This function transforms the input text into an "escaped" version suitable +for well\-formed XML formatting. +.sp +Note that the default xml.sax.saxutils.escape() function don\(aqt escape +some characters that Moses does so we have to manually add them to the +entities dictionary. +.sp +.nf +.ft C +>>> input_str = \(aq\(aq\(aq)| & < > \(aq " ] [\(aq\(aq\(aq +>>> expected_output = \(aq\(aq\(aq)| & < > \(aq " ] [\(aq\(aq\(aq +>>> escape(input_str) == expected_output +True +>>> xml_escape(input_str) +\(aq)| & < > ' " ] [\(aq +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBtext\fP (\fIstr\fP) \-\- The text that needs to be escaped. +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.util.xml_unescape(text) +This function transforms the "escaped" version suitable +for well\-formed XML formatting into humanly\-readable string. +.sp +Note that the default xml.sax.saxutils.unescape() function don\(aqt unescape +some characters that Moses does so we have to manually add them to the +entities dictionary. +.sp +.nf +.ft C +>>> from xml.sax.saxutils import unescape +>>> s = \(aq)| & < > ' " ] [\(aq +>>> expected = \(aq\(aq\(aq)| & < > \(aq " ] [\(aq\(aq\(aq +>>> xml_unescape(s) == expected +True +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBtext\fP (\fIstr\fP) \-\- The text that needs to be unescaped. +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.SS Module contents +.sp +NLTK Tokenizer Package +.sp +Tokenizers divide strings into lists of substrings. For example, +tokenizers can be used to find the words and punctuation in a string: +.sp +.nf +.ft C +>>> from nltk.tokenize import word_tokenize +>>> s = \(aq\(aq\(aqGood muffins cost $3.88\enin New York. Please buy me +\&... two of them.\en\enThanks.\(aq\(aq\(aq +>>> word_tokenize(s) +[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$\(aq, \(aq3.88\(aq, \(aqin\(aq, \(aqNew\(aq, \(aqYork\(aq, \(aq.\(aq, +\(aqPlease\(aq, \(aqbuy\(aq, \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem\(aq, \(aq.\(aq, \(aqThanks\(aq, \(aq.\(aq] +.ft P +.fi +.sp +This particular tokenizer requires the Punkt sentence tokenization +models to be installed. NLTK also provides a simpler, +regular\-expression based tokenizer, which splits text on whitespace +and punctuation: +.sp +.nf +.ft C +>>> from nltk.tokenize import wordpunct_tokenize +>>> wordpunct_tokenize(s) +[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$\(aq, \(aq3\(aq, \(aq.\(aq, \(aq88\(aq, \(aqin\(aq, \(aqNew\(aq, \(aqYork\(aq, \(aq.\(aq, +\(aqPlease\(aq, \(aqbuy\(aq, \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem\(aq, \(aq.\(aq, \(aqThanks\(aq, \(aq.\(aq] +.ft P +.fi +.sp +We can also operate at the level of sentences, using the sentence +tokenizer directly as follows: +.sp +.nf +.ft C +>>> from nltk.tokenize import sent_tokenize, word_tokenize +>>> sent_tokenize(s) +[\(aqGood muffins cost $3.88\enin New York.\(aq, \(aqPlease buy me\entwo of them.\(aq, \(aqThanks.\(aq] +>>> [word_tokenize(t) for t in sent_tokenize(s)] +[[\(aqGood\(aq, \(aqmuffins\(aq, \(aqcost\(aq, \(aq$\(aq, \(aq3.88\(aq, \(aqin\(aq, \(aqNew\(aq, \(aqYork\(aq, \(aq.\(aq], +[\(aqPlease\(aq, \(aqbuy\(aq, \(aqme\(aq, \(aqtwo\(aq, \(aqof\(aq, \(aqthem\(aq, \(aq.\(aq], [\(aqThanks\(aq, \(aq.\(aq]] +.ft P +.fi +.sp +Caution: when tokenizing a Unicode string, make sure you are not +using an encoded version of the string (it may be necessary to +decode it first, e.g. with \fBs.decode("utf8")\fP\&. +.sp +NLTK tokenizers can produce token\-spans, represented as tuples of integers +having the same semantics as string slices, to support efficient comparison +of tokenizers. (These methods are implemented as generators.) +.sp +.nf +.ft C +>>> from nltk.tokenize import WhitespaceTokenizer +>>> list(WhitespaceTokenizer().span_tokenize(s)) +[(0, 4), (5, 12), (13, 17), (18, 23), (24, 26), (27, 30), (31, 36), (38, 44), +(45, 48), (49, 51), (52, 55), (56, 58), (59, 64), (66, 73)] +.ft P +.fi +.sp +There are numerous ways to tokenize text. If you need more control over +tokenization, see the other methods provided in this package. +.sp +For further information, please see Chapter 3 of the NLTK book. +.INDENT 0.0 +.TP +.B nltk.tokenize.sent_tokenize(text, language=\(aqenglish\(aq) +Return a sentence\-tokenized copy of \fItext\fP, +using NLTK\(aqs recommended sentence tokenizer +(currently \fI\%PunktSentenceTokenizer\fP +for the specified language). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtext\fP \-\- text to split into sentences +.IP \(bu 2 +\fBlanguage\fP \-\- the model name in the Punkt corpus +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tokenize.word_tokenize(text, language=\(aqenglish\(aq, preserve_line=False) +Return a tokenized copy of \fItext\fP, +using NLTK\(aqs recommended word tokenizer +(currently an improved \fI\%TreebankWordTokenizer\fP +along with \fI\%PunktSentenceTokenizer\fP +for the specified language). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtext\fP (\fIstr\fP) \-\- text to split into words +.IP \(bu 2 +\fBlanguage\fP (\fIstr\fP) \-\- the model name in the Punkt corpus +.IP \(bu 2 +\fBpreserve_line\fP (\fIbool\fP) \-\- An option to keep the preserve the sentence and not sentence tokenize it. +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.translate package +.SS Submodules +.SS nltk.translate.api module +.INDENT 0.0 +.TP +.B class nltk.translate.api.AlignedSent(words, mots, alignment=None) +Bases: \fBobject\fP +.sp +Return an aligned sentence object, which encapsulates two sentences +along with an \fBAlignment\fP between them. +.sp +Typically used in machine translation to represent a sentence and +its translation. +.sp +.nf +.ft C +>>> from nltk.translate import AlignedSent, Alignment +>>> algnsent = AlignedSent([\(aqklein\(aq, \(aqist\(aq, \(aqdas\(aq, \(aqHaus\(aq], +\&... [\(aqthe\(aq, \(aqhouse\(aq, \(aqis\(aq, \(aqsmall\(aq], Alignment.fromstring(\(aq0\-3 1\-2 2\-0 3\-1\(aq)) +>>> algnsent.words +[\(aqklein\(aq, \(aqist\(aq, \(aqdas\(aq, \(aqHaus\(aq] +>>> algnsent.mots +[\(aqthe\(aq, \(aqhouse\(aq, \(aqis\(aq, \(aqsmall\(aq] +>>> algnsent.alignment +Alignment([(0, 3), (1, 2), (2, 0), (3, 1)]) +>>> from nltk.corpus import comtrans +>>> print(comtrans.aligned_sents()[54]) + \(aqSo why should EU arm...\(aq> +>>> print(comtrans.aligned_sents()[54].alignment) +0\-0 0\-1 1\-0 2\-2 3\-4 3\-5 4\-7 5\-8 6\-3 7\-9 8\-9 9\-10 9\-11 10\-12 11\-6 12\-6 13\-13 +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBwords\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- Words in the target language sentence +.IP \(bu 2 +\fBmots\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- Words in the source language sentence +.IP \(bu 2 +\fBalignment\fP (\fIAlignment\fP) \-\- Word\-level alignments between \fBwords\fP and \fBmots\fP\&. +Each alignment is represented as a 2\-tuple (words_index, mots_index). +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B property alignment +.UNINDENT +.INDENT 7.0 +.TP +.B invert() +Return the aligned sentence pair, reversing the directionality +.INDENT 7.0 +.TP +.B Return type +AlignedSent +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B property mots +.UNINDENT +.INDENT 7.0 +.TP +.B property words +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.translate.api.Alignment(pairs) +Bases: \fBfrozenset\fP +.sp +A storage class for representing alignment between two sequences, s1, s2. +In general, an alignment is a set of tuples of the form (i, j, ...) +representing an alignment between the i\-th element of s1 and the +j\-th element of s2. Tuples are extensible (they might contain +additional data, such as a boolean to indicate sure vs possible alignments). +.sp +.nf +.ft C +>>> from nltk.translate import Alignment +>>> a = Alignment([(0, 0), (0, 1), (1, 2), (2, 2)]) +>>> a.invert() +Alignment([(0, 0), (1, 0), (2, 1), (2, 2)]) +>>> print(a.invert()) +0\-0 1\-0 2\-1 2\-2 +>>> a[0] +[(0, 1), (0, 0)] +>>> a.invert()[2] +[(2, 1), (2, 2)] +>>> b = Alignment([(0, 0), (0, 1)]) +>>> b.issubset(a) +True +>>> c = Alignment.fromstring(\(aq0\-0 0\-1\(aq) +>>> b == c +True +.ft P +.fi +.INDENT 7.0 +.TP +.B classmethod fromstring(s) +Read a giza\-formatted string and return an Alignment object. +.sp +.nf +.ft C +>>> Alignment.fromstring(\(aq0\-0 2\-1 9\-2 21\-3 10\-4 7\-5\(aq) +Alignment([(0, 0), (2, 1), (7, 5), (9, 2), (10, 4), (21, 3)]) +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBs\fP (\fIstr\fP) \-\- the positional alignments in giza format +.TP +.B Return type +Alignment +.TP +.B Returns +An Alignment object corresponding to the string representation \fBs\fP\&. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B invert() +Return an Alignment object, being the inverted mapping. +.UNINDENT +.INDENT 7.0 +.TP +.B range(positions=None) +Work out the range of the mapping from the given positions. +If no positions are specified, compute the range of the entire mapping. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.translate.api.PhraseTable +Bases: \fBobject\fP +.sp +In\-memory store of translations for a given phrase, and the log +probability of the those translations +.INDENT 7.0 +.TP +.B add(src_phrase, trg_phrase, log_prob) +.INDENT 7.0 +.TP +.B Parameters +\fBlog_prob\fP (\fIfloat\fP) \-\- Log probability that given \fBsrc_phrase\fP, +\fBtrg_phrase\fP is its translation +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B translations_for(src_phrase) +Get the translations for a source language phrase +.INDENT 7.0 +.TP +.B Parameters +\fBsrc_phrase\fP (\fItuple\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- Source language phrase of interest +.TP +.B Returns +A list of target language phrases that are translations +of \fBsrc_phrase\fP, ordered in decreasing order of +likelihood. Each list element is a tuple of the target +phrase and its log probability. +.TP +.B Return type +list(PhraseTableEntry) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.translate.api.PhraseTableEntry(trg_phrase, log_prob) +Bases: \fBtuple\fP +.INDENT 7.0 +.TP +.B log_prob +Alias for field number 1 +.UNINDENT +.INDENT 7.0 +.TP +.B trg_phrase +Alias for field number 0 +.UNINDENT +.UNINDENT +.SS nltk.translate.bleu_score module +.sp +BLEU score implementation. +.INDENT 0.0 +.TP +.B class nltk.translate.bleu_score.SmoothingFunction(epsilon=0.1, alpha=5, k=5) +Bases: \fBobject\fP +.sp +This is an implementation of the smoothing techniques +for segment\-level BLEU scores that was presented in +Boxing Chen and Collin Cherry (2014) A Systematic Comparison of +Smoothing Techniques for Sentence\-Level BLEU. In WMT14. +\fI\%http://acl2014.org/acl2014/W14\-33/pdf/W14\-3346.pdf\fP +.INDENT 7.0 +.TP +.B method0(p_n, *args, **kwargs) +No smoothing. +.UNINDENT +.INDENT 7.0 +.TP +.B method1(p_n, *args, **kwargs) +Smoothing method 1: Add \fIepsilon\fP counts to precision with 0 counts. +.UNINDENT +.INDENT 7.0 +.TP +.B method2(p_n, *args, **kwargs) +Smoothing method 2: Add 1 to both numerator and denominator from +Chin\-Yew Lin and Franz Josef Och (2004) ORANGE: a Method for +Evaluating Automatic Evaluation Metrics for Machine Translation. +In COLING 2004. +.UNINDENT +.INDENT 7.0 +.TP +.B method3(p_n, *args, **kwargs) +Smoothing method 3: NIST geometric sequence smoothing +The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each +precision score whose matching n\-gram count is null. +k is 1 for the first \(aqn\(aq value for which the n\-gram match count is null/ +For example, if the text contains: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +one 2\-gram match +.IP \(bu 2 +and (consequently) two 1\-gram matches +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B the n\-gram count for each individual precision score would be: +.INDENT 7.0 +.IP \(bu 2 +n=1 => prec_count = 2 (two unigrams) +.IP \(bu 2 +n=2 => prec_count = 1 (one bigram) +.IP \(bu 2 +n=3 => prec_count = 1/2 (no trigram, taking \(aqsmoothed\(aq value of 1 / ( 2^k ), with k=1) +.IP \(bu 2 +n=4 => prec_count = 1/4 (no fourgram, taking \(aqsmoothed\(aq value of 1 / ( 2^k ), with k=2) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B method4(p_n, references, hypothesis, hyp_len=None, *args, **kwargs) +Smoothing method 4: +Shorter translations may have inflated precision values due to having +smaller denominators; therefore, we give them proportionally +smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry +suggests dividing by 1/ln(len(T)), where T is the length of the translation. +.UNINDENT +.INDENT 7.0 +.TP +.B method5(p_n, references, hypothesis, hyp_len=None, *args, **kwargs) +Smoothing method 5: +The matched counts for similar values of n should be similar. To a +calculate the n\-gram matched count, it averages the n−1, n and n+1 gram +matched counts. +.UNINDENT +.INDENT 7.0 +.TP +.B method6(p_n, references, hypothesis, hyp_len=None, *args, **kwargs) +Smoothing method 6: +Interpolates the maximum likelihood estimate of the precision \fIp_n\fP with +a prior estimate \fIpi0\fP\&. The prior is estimated by assuming that the ratio +between pn and pn−1 will be the same as that between pn−1 and pn−2; from +Gao and He (2013) Training MRF\-Based Phrase Translation Models using +Gradient Ascent. In NAACL. +.UNINDENT +.INDENT 7.0 +.TP +.B method7(p_n, references, hypothesis, hyp_len=None, *args, **kwargs) +Smoothing method 7: +Interpolates methods 4 and 5. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.bleu_score.brevity_penalty(closest_ref_len, hyp_len) +Calculate brevity penalty. +.sp +As the modified n\-gram precision still has the problem from the short +length sentence, brevity penalty is used to modify the overall BLEU +score according to length. +.sp +An example from the paper. There are three references with length 12, 15 +and 17. And a concise hypothesis of the length 12. The brevity penalty is 1. +.sp +.nf +.ft C +>>> reference1 = list(\(aqaaaaaaaaaaaa\(aq) # i.e. [\(aqa\(aq] * 12 +>>> reference2 = list(\(aqaaaaaaaaaaaaaaa\(aq) # i.e. [\(aqa\(aq] * 15 +>>> reference3 = list(\(aqaaaaaaaaaaaaaaaaa\(aq) # i.e. [\(aqa\(aq] * 17 +>>> hypothesis = list(\(aqaaaaaaaaaaaa\(aq) # i.e. [\(aqa\(aq] * 12 +>>> references = [reference1, reference2, reference3] +>>> hyp_len = len(hypothesis) +>>> closest_ref_len = closest_ref_length(references, hyp_len) +>>> brevity_penalty(closest_ref_len, hyp_len) +1.0 +.ft P +.fi +.sp +In case a hypothesis translation is shorter than the references, penalty is +applied. +.sp +.nf +.ft C +>>> references = [[\(aqa\(aq] * 28, [\(aqa\(aq] * 28] +>>> hypothesis = [\(aqa\(aq] * 12 +>>> hyp_len = len(hypothesis) +>>> closest_ref_len = closest_ref_length(references, hyp_len) +>>> brevity_penalty(closest_ref_len, hyp_len) +0.2635971381157267 +.ft P +.fi +.sp +The length of the closest reference is used to compute the penalty. If the +length of a hypothesis is 12, and the reference lengths are 13 and 2, the +penalty is applied because the hypothesis length (12) is less then the +closest reference length (13). +.sp +.nf +.ft C +>>> references = [[\(aqa\(aq] * 13, [\(aqa\(aq] * 2] +>>> hypothesis = [\(aqa\(aq] * 12 +>>> hyp_len = len(hypothesis) +>>> closest_ref_len = closest_ref_length(references, hyp_len) +>>> brevity_penalty(closest_ref_len, hyp_len) +0.9200... +.ft P +.fi +.sp +The brevity penalty doesn\(aqt depend on reference order. More importantly, +when two reference sentences are at the same distance, the shortest +reference sentence length is used. +.sp +.nf +.ft C +>>> references = [[\(aqa\(aq] * 13, [\(aqa\(aq] * 11] +>>> hypothesis = [\(aqa\(aq] * 12 +>>> hyp_len = len(hypothesis) +>>> closest_ref_len = closest_ref_length(references, hyp_len) +>>> bp1 = brevity_penalty(closest_ref_len, hyp_len) +>>> hyp_len = len(hypothesis) +>>> closest_ref_len = closest_ref_length(reversed(references), hyp_len) +>>> bp2 = brevity_penalty(closest_ref_len, hyp_len) +>>> bp1 == bp2 == 1 +True +.ft P +.fi +.sp +A test example from mteval\-v13a.pl (starting from the line 705): +.sp +.nf +.ft C +>>> references = [[\(aqa\(aq] * 11, [\(aqa\(aq] * 8] +>>> hypothesis = [\(aqa\(aq] * 7 +>>> hyp_len = len(hypothesis) +>>> closest_ref_len = closest_ref_length(references, hyp_len) +>>> brevity_penalty(closest_ref_len, hyp_len) +0.8668... +.ft P +.fi +.sp +.nf +.ft C +>>> references = [[\(aqa\(aq] * 11, [\(aqa\(aq] * 8, [\(aqa\(aq] * 6, [\(aqa\(aq] * 7] +>>> hypothesis = [\(aqa\(aq] * 7 +>>> hyp_len = len(hypothesis) +>>> closest_ref_len = closest_ref_length(references, hyp_len) +>>> brevity_penalty(closest_ref_len, hyp_len) +1.0 +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBhyp_len\fP \-\- The length of the hypothesis for a single sentence OR the +.UNINDENT +.sp +sum of all the hypotheses\(aq lengths for a corpus +:type hyp_len: int +:param closest_ref_len: The length of the closest reference for a single +hypothesis OR the sum of all the closest references for every hypotheses. +:type closest_ref_len: int +:return: BLEU\(aqs brevity penalty. +:rtype: float +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.bleu_score.closest_ref_length(references, hyp_len) +This function finds the reference that is the closest length to the +hypothesis. The closest reference length is referred to as \fIr\fP variable +from the brevity penalty formula in Papineni et. al. (2002) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreferences\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- A list of reference translations. +.IP \(bu 2 +\fBhyp_len\fP (\fIint\fP) \-\- The length of the hypothesis. +.UNINDENT +.TP +.B Returns +The length of the reference that\(aqs closest to the hypothesis. +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.bleu_score.corpus_bleu(list_of_references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=None, auto_reweigh=False) +Calculate a single corpus\-level BLEU score (aka. system\-level BLEU) for all +the hypotheses and their respective references. +.sp +Instead of averaging the sentence level BLEU scores (i.e. macro\-average +precision), the original BLEU metric (Papineni et al. 2002) accounts for +the micro\-average precision (i.e. summing the numerators and denominators +for each hypothesis\-reference(s) pairs before the division). +.sp +.nf +.ft C +>>> hyp1 = [\(aqIt\(aq, \(aqis\(aq, \(aqa\(aq, \(aqguide\(aq, \(aqto\(aq, \(aqaction\(aq, \(aqwhich\(aq, +\&... \(aqensures\(aq, \(aqthat\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqalways\(aq, +\&... \(aqobeys\(aq, \(aqthe\(aq, \(aqcommands\(aq, \(aqof\(aq, \(aqthe\(aq, \(aqparty\(aq] +>>> ref1a = [\(aqIt\(aq, \(aqis\(aq, \(aqa\(aq, \(aqguide\(aq, \(aqto\(aq, \(aqaction\(aq, \(aqthat\(aq, +\&... \(aqensures\(aq, \(aqthat\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqwill\(aq, \(aqforever\(aq, +\&... \(aqheed\(aq, \(aqParty\(aq, \(aqcommands\(aq] +>>> ref1b = [\(aqIt\(aq, \(aqis\(aq, \(aqthe\(aq, \(aqguiding\(aq, \(aqprinciple\(aq, \(aqwhich\(aq, +\&... \(aqguarantees\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqforces\(aq, \(aqalways\(aq, +\&... \(aqbeing\(aq, \(aqunder\(aq, \(aqthe\(aq, \(aqcommand\(aq, \(aqof\(aq, \(aqthe\(aq, \(aqParty\(aq] +>>> ref1c = [\(aqIt\(aq, \(aqis\(aq, \(aqthe\(aq, \(aqpractical\(aq, \(aqguide\(aq, \(aqfor\(aq, \(aqthe\(aq, +\&... \(aqarmy\(aq, \(aqalways\(aq, \(aqto\(aq, \(aqheed\(aq, \(aqthe\(aq, \(aqdirections\(aq, +\&... \(aqof\(aq, \(aqthe\(aq, \(aqparty\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> hyp2 = [\(aqhe\(aq, \(aqread\(aq, \(aqthe\(aq, \(aqbook\(aq, \(aqbecause\(aq, \(aqhe\(aq, \(aqwas\(aq, +\&... \(aqinterested\(aq, \(aqin\(aq, \(aqworld\(aq, \(aqhistory\(aq] +>>> ref2a = [\(aqhe\(aq, \(aqwas\(aq, \(aqinterested\(aq, \(aqin\(aq, \(aqworld\(aq, \(aqhistory\(aq, +\&... \(aqbecause\(aq, \(aqhe\(aq, \(aqread\(aq, \(aqthe\(aq, \(aqbook\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]] +>>> hypotheses = [hyp1, hyp2] +>>> corpus_bleu(list_of_references, hypotheses) +0.5920... +.ft P +.fi +.sp +The example below show that corpus_bleu() is different from averaging +sentence_bleu() for hypotheses +.sp +.nf +.ft C +>>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1) +>>> score2 = sentence_bleu([ref2a], hyp2) +>>> (score1 + score2) / 2 +0.6223... +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBlist_of_references\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP\fI)\fP) \-\- a corpus of lists of reference sentences, w.r.t. hypotheses +.IP \(bu 2 +\fBhypotheses\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- a list of hypothesis sentences +.IP \(bu 2 +\fBweights\fP (\fIlist\fP\fI(\fP\fIfloat\fP\fI)\fP) \-\- weights for unigrams, bigrams, trigrams and so on +.IP \(bu 2 +\fBsmoothing_function\fP (\fISmoothingFunction\fP) \-\- +.IP \(bu 2 +\fBauto_reweigh\fP (\fIbool\fP) \-\- Option to re\-normalize the weights uniformly. +.UNINDENT +.TP +.B Returns +The corpus\-level BLEU score. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.bleu_score.modified_precision(references, hypothesis, n) +Calculate modified ngram precision. +.sp +The normal precision method may lead to some wrong translations with +high\-precision, e.g., the translation, in which a word of reference +repeats several times, has very high precision. +.sp +This function only returns the Fraction object that contains the numerator +and denominator necessary to calculate the corpus\-level precision. +To calculate the modified precision for a single pair of hypothesis and +references, cast the Fraction object into a float. +.sp +The famous "the the the ... " example shows that you can get BLEU precision +by duplicating high frequency words. +.sp +.nf +.ft C +>>> reference1 = \(aqthe cat is on the mat\(aq.split() +>>> reference2 = \(aqthere is a cat on the mat\(aq.split() +>>> hypothesis1 = \(aqthe the the the the the the\(aq.split() +>>> references = [reference1, reference2] +>>> float(modified_precision(references, hypothesis1, n=1)) +0.2857... +.ft P +.fi +.sp +In the modified n\-gram precision, a reference word will be considered +exhausted after a matching hypothesis word is identified, e.g. +.sp +.nf +.ft C +>>> reference1 = [\(aqIt\(aq, \(aqis\(aq, \(aqa\(aq, \(aqguide\(aq, \(aqto\(aq, \(aqaction\(aq, \(aqthat\(aq, +\&... \(aqensures\(aq, \(aqthat\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqwill\(aq, +\&... \(aqforever\(aq, \(aqheed\(aq, \(aqParty\(aq, \(aqcommands\(aq] +>>> reference2 = [\(aqIt\(aq, \(aqis\(aq, \(aqthe\(aq, \(aqguiding\(aq, \(aqprinciple\(aq, \(aqwhich\(aq, +\&... \(aqguarantees\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqforces\(aq, \(aqalways\(aq, +\&... \(aqbeing\(aq, \(aqunder\(aq, \(aqthe\(aq, \(aqcommand\(aq, \(aqof\(aq, \(aqthe\(aq, +\&... \(aqParty\(aq] +>>> reference3 = [\(aqIt\(aq, \(aqis\(aq, \(aqthe\(aq, \(aqpractical\(aq, \(aqguide\(aq, \(aqfor\(aq, \(aqthe\(aq, +\&... \(aqarmy\(aq, \(aqalways\(aq, \(aqto\(aq, \(aqheed\(aq, \(aqthe\(aq, \(aqdirections\(aq, +\&... \(aqof\(aq, \(aqthe\(aq, \(aqparty\(aq] +>>> hypothesis = \(aqof the\(aq.split() +>>> references = [reference1, reference2, reference3] +>>> float(modified_precision(references, hypothesis, n=1)) +1.0 +>>> float(modified_precision(references, hypothesis, n=2)) +1.0 +.ft P +.fi +.sp +An example of a normal machine translation hypothesis: +.sp +.nf +.ft C +>>> hypothesis1 = [\(aqIt\(aq, \(aqis\(aq, \(aqa\(aq, \(aqguide\(aq, \(aqto\(aq, \(aqaction\(aq, \(aqwhich\(aq, +\&... \(aqensures\(aq, \(aqthat\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqalways\(aq, +\&... \(aqobeys\(aq, \(aqthe\(aq, \(aqcommands\(aq, \(aqof\(aq, \(aqthe\(aq, \(aqparty\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> hypothesis2 = [\(aqIt\(aq, \(aqis\(aq, \(aqto\(aq, \(aqinsure\(aq, \(aqthe\(aq, \(aqtroops\(aq, +\&... \(aqforever\(aq, \(aqhearing\(aq, \(aqthe\(aq, \(aqactivity\(aq, \(aqguidebook\(aq, +\&... \(aqthat\(aq, \(aqparty\(aq, \(aqdirect\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> reference1 = [\(aqIt\(aq, \(aqis\(aq, \(aqa\(aq, \(aqguide\(aq, \(aqto\(aq, \(aqaction\(aq, \(aqthat\(aq, +\&... \(aqensures\(aq, \(aqthat\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqwill\(aq, +\&... \(aqforever\(aq, \(aqheed\(aq, \(aqParty\(aq, \(aqcommands\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> reference2 = [\(aqIt\(aq, \(aqis\(aq, \(aqthe\(aq, \(aqguiding\(aq, \(aqprinciple\(aq, \(aqwhich\(aq, +\&... \(aqguarantees\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqforces\(aq, \(aqalways\(aq, +\&... \(aqbeing\(aq, \(aqunder\(aq, \(aqthe\(aq, \(aqcommand\(aq, \(aqof\(aq, \(aqthe\(aq, +\&... \(aqParty\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> reference3 = [\(aqIt\(aq, \(aqis\(aq, \(aqthe\(aq, \(aqpractical\(aq, \(aqguide\(aq, \(aqfor\(aq, \(aqthe\(aq, +\&... \(aqarmy\(aq, \(aqalways\(aq, \(aqto\(aq, \(aqheed\(aq, \(aqthe\(aq, \(aqdirections\(aq, +\&... \(aqof\(aq, \(aqthe\(aq, \(aqparty\(aq] +>>> references = [reference1, reference2, reference3] +>>> float(modified_precision(references, hypothesis1, n=1)) +0.9444... +>>> float(modified_precision(references, hypothesis2, n=1)) +0.5714... +>>> float(modified_precision(references, hypothesis1, n=2)) +0.5882352941176471 +>>> float(modified_precision(references, hypothesis2, n=2)) +0.07692... +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreferences\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- A list of reference translations. +.IP \(bu 2 +\fBhypothesis\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A hypothesis translation. +.IP \(bu 2 +\fBn\fP (\fIint\fP) \-\- The ngram order. +.UNINDENT +.TP +.B Returns +BLEU\(aqs modified precision for the nth order ngram. +.TP +.B Return type +Fraction +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.bleu_score.sentence_bleu(references, hypothesis, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=None, auto_reweigh=False) +Calculate BLEU score (Bilingual Evaluation Understudy) from +Papineni, Kishore, Salim Roukos, Todd Ward, and Wei\-Jing Zhu. 2002. +"BLEU: a method for automatic evaluation of machine translation." +In Proceedings of ACL. \fI\%http://www.aclweb.org/anthology/P02\-1040.pdf\fP +.sp +.nf +.ft C +>>> hypothesis1 = [\(aqIt\(aq, \(aqis\(aq, \(aqa\(aq, \(aqguide\(aq, \(aqto\(aq, \(aqaction\(aq, \(aqwhich\(aq, +\&... \(aqensures\(aq, \(aqthat\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqalways\(aq, +\&... \(aqobeys\(aq, \(aqthe\(aq, \(aqcommands\(aq, \(aqof\(aq, \(aqthe\(aq, \(aqparty\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> hypothesis2 = [\(aqIt\(aq, \(aqis\(aq, \(aqto\(aq, \(aqinsure\(aq, \(aqthe\(aq, \(aqtroops\(aq, +\&... \(aqforever\(aq, \(aqhearing\(aq, \(aqthe\(aq, \(aqactivity\(aq, \(aqguidebook\(aq, +\&... \(aqthat\(aq, \(aqparty\(aq, \(aqdirect\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> reference1 = [\(aqIt\(aq, \(aqis\(aq, \(aqa\(aq, \(aqguide\(aq, \(aqto\(aq, \(aqaction\(aq, \(aqthat\(aq, +\&... \(aqensures\(aq, \(aqthat\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqwill\(aq, \(aqforever\(aq, +\&... \(aqheed\(aq, \(aqParty\(aq, \(aqcommands\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> reference2 = [\(aqIt\(aq, \(aqis\(aq, \(aqthe\(aq, \(aqguiding\(aq, \(aqprinciple\(aq, \(aqwhich\(aq, +\&... \(aqguarantees\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqforces\(aq, \(aqalways\(aq, +\&... \(aqbeing\(aq, \(aqunder\(aq, \(aqthe\(aq, \(aqcommand\(aq, \(aqof\(aq, \(aqthe\(aq, +\&... \(aqParty\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> reference3 = [\(aqIt\(aq, \(aqis\(aq, \(aqthe\(aq, \(aqpractical\(aq, \(aqguide\(aq, \(aqfor\(aq, \(aqthe\(aq, +\&... \(aqarmy\(aq, \(aqalways\(aq, \(aqto\(aq, \(aqheed\(aq, \(aqthe\(aq, \(aqdirections\(aq, +\&... \(aqof\(aq, \(aqthe\(aq, \(aqparty\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> sentence_bleu([reference1, reference2, reference3], hypothesis1) +0.5045... +.ft P +.fi +.sp +If there is no ngrams overlap for any order of n\-grams, BLEU returns the +value 0. This is because the precision for the order of n\-grams without +overlap is 0, and the geometric mean in the final BLEU score computation +multiplies the 0 with the precision of other n\-grams. This results in 0 +(independently of the precision of the other n\-gram orders). The following +example has zero 3\-gram and 4\-gram overlaps: +.sp +.nf +.ft C +>>> round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4) +0.0 +.ft P +.fi +.sp +To avoid this harsh behaviour when no ngram overlaps are found a smoothing +function can be used. +.sp +.nf +.ft C +>>> chencherry = SmoothingFunction() +>>> sentence_bleu([reference1, reference2, reference3], hypothesis2, +\&... smoothing_function=chencherry.method1) +0.0370... +.ft P +.fi +.sp +The default BLEU calculates a score for up to 4\-grams using uniform +weights (this is called BLEU\-4). To evaluate your translations with +higher/lower order ngrams, use customized weights. E.g. when accounting +for up to 5\-grams with uniform weights (this is called BLEU\-5) use: +.sp +.nf +.ft C +>>> weights = (1./5., 1./5., 1./5., 1./5., 1./5.) +>>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) +0.3920... +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreferences\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- reference sentences +.IP \(bu 2 +\fBhypothesis\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- a hypothesis sentence +.IP \(bu 2 +\fBweights\fP (\fIlist\fP\fI(\fP\fIfloat\fP\fI)\fP) \-\- weights for unigrams, bigrams, trigrams and so on +.IP \(bu 2 +\fBsmoothing_function\fP (\fISmoothingFunction\fP) \-\- +.IP \(bu 2 +\fBauto_reweigh\fP (\fIbool\fP) \-\- Option to re\-normalize the weights uniformly. +.UNINDENT +.TP +.B Returns +The sentence\-level BLEU score. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.SS nltk.translate.chrf_score module +.sp +ChrF score implementation +.INDENT 0.0 +.TP +.B nltk.translate.chrf_score.chrf_precision_recall_fscore_support(reference, hypothesis, n, beta=3.0, epsilon=1e\-16) +This function computes the precision, recall and fscore from the ngram +overlaps. It returns the \fIsupport\fP which is the true positive score. +.sp +By underspecifying the input type, the function will be agnostic as to how +it computes the ngrams and simply take the whichever element in the list; +it could be either token or character. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreference\fP (\fIlist\fP) \-\- The reference sentence. +.IP \(bu 2 +\fBhypothesis\fP (\fIlist\fP) \-\- The hypothesis sentence. +.IP \(bu 2 +\fBn\fP (\fIint\fP) \-\- Extract up to the n\-th order ngrams +.IP \(bu 2 +\fBbeta\fP (\fIfloat\fP) \-\- The parameter to assign more importance to recall over precision. +.IP \(bu 2 +\fBepsilon\fP (\fIfloat\fP) \-\- The fallback value if the hypothesis or reference is empty. +.UNINDENT +.TP +.B Returns +Returns the precision, recall and f\-score and support (true positive). +.TP +.B Return type +tuple(float) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.chrf_score.corpus_chrf(references, hypotheses, min_len=1, max_len=6, beta=3.0, ignore_whitespace=True) +Calculates the corpus level CHRF (Character n\-gram F\-score), it is the +macro\-averaged value of the sentence/segment level CHRF score. +.sp +This implementation of CHRF only supports a single reference at the moment. +.sp +.nf +.ft C +>>> ref1 = str(\(aqIt is a guide to action that ensures that the military \(aq +\&... \(aqwill forever heed Party commands\(aq).split() +>>> ref2 = str(\(aqIt is the guiding principle which guarantees the military \(aq +\&... \(aqforces always being under the command of the Party\(aq).split() +>>> +>>> hyp1 = str(\(aqIt is a guide to action which ensures that the military \(aq +\&... \(aqalways obeys the commands of the party\(aq).split() +>>> hyp2 = str(\(aqIt is to insure the troops forever hearing the activity \(aq +\&... \(aqguidebook that party direct\(aq) +>>> corpus_chrf([ref1, ref2, ref1, ref2], [hyp1, hyp2, hyp2, hyp1]) +0.3910... +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreferences\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- a corpus of list of reference sentences, w.r.t. hypotheses +.IP \(bu 2 +\fBhypotheses\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- a list of hypothesis sentences +.IP \(bu 2 +\fBmin_len\fP (\fIint\fP) \-\- The minimum order of n\-gram this function should extract. +.IP \(bu 2 +\fBmax_len\fP (\fIint\fP) \-\- The maximum order of n\-gram this function should extract. +.IP \(bu 2 +\fBbeta\fP (\fIfloat\fP) \-\- the parameter to assign more importance to recall over precision +.IP \(bu 2 +\fBignore_whitespace\fP (\fIbool\fP) \-\- ignore whitespace characters in scoring +.UNINDENT +.TP +.B Returns +the sentence level CHRF score. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.chrf_score.sentence_chrf(reference, hypothesis, min_len=1, max_len=6, beta=3.0, ignore_whitespace=True) +.INDENT 7.0 +.TP +.B Calculates the sentence level CHRF (Character n\-gram F\-score) described in +.INDENT 7.0 +.IP \(bu 2 +Maja Popovic. 2015. CHRF: Character n\-gram F\-score for Automatic MT Evaluation. +In Proceedings of the 10th Workshop on Machine Translation. +\fI\%http://www.statmt.org/wmt15/pdf/WMT49.pdf\fP +.IP \(bu 2 +Maja Popovic. 2016. CHRF Deconstructed: β Parameters and n\-gram Weights. +In Proceedings of the 1st Conference on Machine Translation. +\fI\%http://www.statmt.org/wmt16/pdf/W16\-2341.pdf\fP +.UNINDENT +.UNINDENT +.sp +This implementation of CHRF only supports a single reference at the moment. +.sp +For details not reported in the paper, consult Maja Popovic\(aqs original +implementation: \fI\%https://github.com/m\-popovic/chrF\fP +.sp +The code should output results equivalent to running CHRF++ with the +following options: \-nw 0 \-b 3 +.sp +An example from the original BLEU paper +\fI\%http://www.aclweb.org/anthology/P02\-1040.pdf\fP +.sp +.nf +.ft C +>>> ref1 = str(\(aqIt is a guide to action that ensures that the military \(aq +\&... \(aqwill forever heed Party commands\(aq).split() +>>> hyp1 = str(\(aqIt is a guide to action which ensures that the military \(aq +\&... \(aqalways obeys the commands of the party\(aq).split() +>>> hyp2 = str(\(aqIt is to insure the troops forever hearing the activity \(aq +\&... \(aqguidebook that party direct\(aq).split() +>>> sentence_chrf(ref1, hyp1) +0.6349... +>>> sentence_chrf(ref1, hyp2) +0.3330... +.ft P +.fi +.sp +The infamous "the the the ... " example +.sp +.nf +.ft C +>>> ref = \(aqthe cat is on the mat\(aq.split() +>>> hyp = \(aqthe the the the the the the\(aq.split() +>>> sentence_chrf(ref, hyp) +0.1468... +.ft P +.fi +.sp +An example to show that this function allows users to use strings instead of +tokens, i.e. list(str) as inputs. +.sp +.nf +.ft C +>>> ref1 = str(\(aqIt is a guide to action that ensures that the military \(aq +\&... \(aqwill forever heed Party commands\(aq) +>>> hyp1 = str(\(aqIt is a guide to action which ensures that the military \(aq +\&... \(aqalways obeys the commands of the party\(aq) +>>> sentence_chrf(ref1, hyp1) +0.6349... +>>> type(ref1) == type(hyp1) == str +True +>>> sentence_chrf(ref1.split(), hyp1.split()) +0.6349... +.ft P +.fi +.sp +To skip the unigrams and only use 2\- to 3\-grams: +.sp +.nf +.ft C +>>> sentence_chrf(ref1, hyp1, min_len=2, max_len=3) +0.6617... +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreferences\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI) \fP\fI/ str\fP) \-\- reference sentence +.IP \(bu 2 +\fBhypothesis\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI) \fP\fI/ str\fP) \-\- a hypothesis sentence +.IP \(bu 2 +\fBmin_len\fP (\fIint\fP) \-\- The minimum order of n\-gram this function should extract. +.IP \(bu 2 +\fBmax_len\fP (\fIint\fP) \-\- The maximum order of n\-gram this function should extract. +.IP \(bu 2 +\fBbeta\fP (\fIfloat\fP) \-\- the parameter to assign more importance to recall over precision +.IP \(bu 2 +\fBignore_whitespace\fP (\fIbool\fP) \-\- ignore whitespace characters in scoring +.UNINDENT +.TP +.B Returns +the sentence level CHRF score. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.SS nltk.translate.gale_church module +.sp +A port of the Gale\-Church Aligner. +.sp +Gale & Church (1993), A Program for Aligning Sentences in Bilingual Corpora. +\fI\%http://aclweb.org/anthology/J93\-1004.pdf\fP +.INDENT 0.0 +.TP +.B class nltk.translate.gale_church.LanguageIndependent +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B AVERAGE_CHARACTERS = 1 +.UNINDENT +.INDENT 7.0 +.TP +.B PRIORS = {(0, 1): 0.0099, (1, 0): 0.0099, (1, 1): 0.89, (1, 2): 0.089, (2, 1): 0.089, (2, 2): 0.011} +.UNINDENT +.INDENT 7.0 +.TP +.B VARIANCE_CHARACTERS = 6.8 +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.gale_church.align_blocks(source_sents_lens, target_sents_lens, params=) +Return the sentence alignment of two text blocks (usually paragraphs). +.sp +.nf +.ft C +>>> align_blocks([5,5,5], [7,7,7]) +[(0, 0), (1, 1), (2, 2)] +>>> align_blocks([10,5,5], [12,20]) +[(0, 0), (1, 1), (2, 1)] +>>> align_blocks([12,20], [10,5,5]) +[(0, 0), (1, 1), (1, 2)] +>>> align_blocks([10,2,10,10,2,10], [12,3,20,3,12]) +[(0, 0), (1, 1), (2, 2), (3, 2), (4, 3), (5, 4)] +.ft P +.fi +.sp +@param source_sents_lens: The list of source sentence lengths. +@param target_sents_lens: The list of target sentence lengths. +@param params: the sentence alignment parameters. +@return: The sentence alignments, a list of index pairs. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.gale_church.align_log_prob(i, j, source_sents, target_sents, alignment, params) +Returns the log probability of the two sentences C{source_sents[i]}, C{target_sents[j]} +being aligned with a specific C{alignment}. +.sp +@param i: The offset of the source sentence. +@param j: The offset of the target sentence. +@param source_sents: The list of source sentence lengths. +@param target_sents: The list of target sentence lengths. +@param alignment: The alignment type, a tuple of two integers. +@param params: The sentence alignment parameters. +.sp +@returns: The log probability of a specific alignment between the two sentences, given the parameters. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.gale_church.align_texts(source_blocks, target_blocks, params=) +Creates the sentence alignment of two texts. +.sp +Texts can consist of several blocks. Block boundaries cannot be crossed by sentence +alignment links. +.sp +Each block consists of a list that contains the lengths (in characters) of the sentences +in this block. +.sp +@param source_blocks: The list of blocks in the source text. +@param target_blocks: The list of blocks in the target text. +@param params: the sentence alignment parameters. +.sp +@returns: A list of sentence alignment lists +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.gale_church.erfcc(x) +Complementary error function. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.gale_church.norm_cdf(x) +Return the area under the normal distribution from M{\-∞..x}. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.gale_church.norm_logsf(x) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.gale_church.parse_token_stream(stream, soft_delimiter, hard_delimiter) +Parses a stream of tokens and splits it into sentences (using C{soft_delimiter} tokens) +and blocks (using C{hard_delimiter} tokens) for use with the L{align_texts} function. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.gale_church.split_at(it, split_value) +Splits an iterator C{it} at values of C{split_value}. +.sp +Each instance of C{split_value} is swallowed. The iterator produces +subiterators which need to be consumed fully before the next subiterator +can be used. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.gale_church.trace(backlinks, source_sents_lens, target_sents_lens) +Traverse the alignment cost from the tracebacks and retrieves +appropriate sentence pairs. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBbacklinks\fP (\fIdict\fP) \-\- A dictionary where the key is the alignment points and value is the cost (referencing the LanguageIndependent.PRIORS) +.IP \(bu 2 +\fBsource_sents_lens\fP (\fIlist\fP\fI(\fP\fIint\fP\fI)\fP) \-\- A list of target sentences\(aq lengths +.IP \(bu 2 +\fBtarget_sents_lens\fP (\fIlist\fP\fI(\fP\fIint\fP\fI)\fP) \-\- A list of target sentences\(aq lengths +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.translate.gdfa module +.INDENT 0.0 +.TP +.B nltk.translate.gdfa.grow_diag_final_and(srclen, trglen, e2f, f2e) +This module symmetrisatizes the source\-to\-target and target\-to\-source +word alignment output and produces, aka. GDFA algorithm (Koehn, 2005). +.sp +Step 1: Find the intersection of the bidirectional alignment. +.INDENT 7.0 +.TP +.B Step 2: Search for additional neighbor alignment points to be added, given +these criteria: (i) neighbor alignments points are not in the +intersection and (ii) neighbor alignments are in the union. +.TP +.B Step 3: Add all other alignment points that are not in the intersection, not in +.INDENT 7.0 +.INDENT 3.5 +the neighboring alignments that met the criteria but in the original +forward/backward alignment outputs. +.UNINDENT +.UNINDENT +.sp +.nf +.ft C +>>> forw = (\(aq0\-0 2\-1 9\-2 21\-3 10\-4 7\-5 11\-6 9\-7 12\-8 1\-9 3\-10 \(aq +\&... \(aq4\-11 17\-12 17\-13 25\-14 13\-15 24\-16 11\-17 28\-18\(aq) +>>> back = (\(aq0\-0 1\-9 2\-9 3\-10 4\-11 5\-12 6\-6 7\-5 8\-6 9\-7 10\-4 \(aq +\&... \(aq11\-6 12\-8 13\-12 15\-12 17\-13 18\-13 19\-12 20\-13 \(aq +\&... \(aq21\-3 22\-12 23\-14 24\-17 25\-15 26\-17 27\-18 28\-18\(aq) +>>> srctext = ("この よう な ハロー 白色 わい 星 の L 関数 " +\&... "は L と 共 に 不連続 に 増加 する こと が " +\&... "期待 さ れる こと を 示し た 。") +>>> trgtext = ("Therefore , we expect that the luminosity function " +\&... "of such halo white dwarfs increases discontinuously " +\&... "with the luminosity .") +>>> srclen = len(srctext.split()) +>>> trglen = len(trgtext.split()) +>>> +>>> gdfa = grow_diag_final_and(srclen, trglen, forw, back) +>>> gdfa == sorted(set([(28, 18), (6, 6), (24, 17), (2, 1), (15, 12), (13, 12), +\&... (2, 9), (3, 10), (26, 17), (25, 15), (8, 6), (9, 7), (20, +\&... 13), (18, 13), (0, 0), (10, 4), (13, 15), (23, 14), (7, 5), +\&... (25, 14), (1, 9), (17, 13), (4, 11), (11, 17), (9, 2), (22, +\&... 12), (27, 18), (24, 16), (21, 3), (19, 12), (17, 12), (5, +\&... 12), (11, 6), (12, 8)])) +True +.ft P +.fi +.UNINDENT +.sp +References: +Koehn, P., A. Axelrod, A. Birch, C. Callison, M. Osborne, and D. Talbot. +2005. Edinburgh System Description for the 2005 IWSLT Speech +Translation Evaluation. In MT Eval Workshop. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsrclen\fP (\fIint\fP) \-\- the number of tokens in the source language +.IP \(bu 2 +\fBtrglen\fP (\fIint\fP) \-\- the number of tokens in the target language +.IP \(bu 2 +\fBe2f\fP (\fIstr\fP) \-\- the forward word alignment outputs from source\-to\-target +language (in pharaoh output format) +.IP \(bu 2 +\fBf2e\fP (\fIstr\fP) \-\- the backward word alignment outputs from target\-to\-source +language (in pharaoh output format) +.UNINDENT +.TP +.B Return type +set(tuple(int)) +.TP +.B Returns +the symmetrized alignment points from the GDFA algorithm +.UNINDENT +.UNINDENT +.SS nltk.translate.gleu_score module +.sp +GLEU score implementation. +.INDENT 0.0 +.TP +.B nltk.translate.gleu_score.corpus_gleu(list_of_references, hypotheses, min_len=1, max_len=4) +Calculate a single corpus\-level GLEU score (aka. system\-level GLEU) for all +the hypotheses and their respective references. +.sp +Instead of averaging the sentence level GLEU scores (i.e. macro\-average +precision), Wu et al. (2016) sum up the matching tokens and the max of +hypothesis and reference tokens for each sentence, then compute using the +aggregate values. +.INDENT 7.0 +.TP +.B From Mike Schuster (via email): +.INDENT 7.0 +.TP +.B "For the corpus, we just add up the two statistics n_match and +n_all = max(n_all_output, n_all_target) for all sentences, then +calculate gleu_score = n_match / n_all, so it is not just a mean of +the sentence gleu scores (in our case, longer sentences count more, +which I think makes sense as they are more difficult to translate)." +.UNINDENT +.UNINDENT +.sp +.nf +.ft C +>>> hyp1 = [\(aqIt\(aq, \(aqis\(aq, \(aqa\(aq, \(aqguide\(aq, \(aqto\(aq, \(aqaction\(aq, \(aqwhich\(aq, +\&... \(aqensures\(aq, \(aqthat\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqalways\(aq, +\&... \(aqobeys\(aq, \(aqthe\(aq, \(aqcommands\(aq, \(aqof\(aq, \(aqthe\(aq, \(aqparty\(aq] +>>> ref1a = [\(aqIt\(aq, \(aqis\(aq, \(aqa\(aq, \(aqguide\(aq, \(aqto\(aq, \(aqaction\(aq, \(aqthat\(aq, +\&... \(aqensures\(aq, \(aqthat\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqwill\(aq, \(aqforever\(aq, +\&... \(aqheed\(aq, \(aqParty\(aq, \(aqcommands\(aq] +>>> ref1b = [\(aqIt\(aq, \(aqis\(aq, \(aqthe\(aq, \(aqguiding\(aq, \(aqprinciple\(aq, \(aqwhich\(aq, +\&... \(aqguarantees\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqforces\(aq, \(aqalways\(aq, +\&... \(aqbeing\(aq, \(aqunder\(aq, \(aqthe\(aq, \(aqcommand\(aq, \(aqof\(aq, \(aqthe\(aq, \(aqParty\(aq] +>>> ref1c = [\(aqIt\(aq, \(aqis\(aq, \(aqthe\(aq, \(aqpractical\(aq, \(aqguide\(aq, \(aqfor\(aq, \(aqthe\(aq, +\&... \(aqarmy\(aq, \(aqalways\(aq, \(aqto\(aq, \(aqheed\(aq, \(aqthe\(aq, \(aqdirections\(aq, +\&... \(aqof\(aq, \(aqthe\(aq, \(aqparty\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> hyp2 = [\(aqhe\(aq, \(aqread\(aq, \(aqthe\(aq, \(aqbook\(aq, \(aqbecause\(aq, \(aqhe\(aq, \(aqwas\(aq, +\&... \(aqinterested\(aq, \(aqin\(aq, \(aqworld\(aq, \(aqhistory\(aq] +>>> ref2a = [\(aqhe\(aq, \(aqwas\(aq, \(aqinterested\(aq, \(aqin\(aq, \(aqworld\(aq, \(aqhistory\(aq, +\&... \(aqbecause\(aq, \(aqhe\(aq, \(aqread\(aq, \(aqthe\(aq, \(aqbook\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]] +>>> hypotheses = [hyp1, hyp2] +>>> corpus_gleu(list_of_references, hypotheses) +0.5673... +.ft P +.fi +.sp +The example below show that corpus_gleu() is different from averaging +sentence_gleu() for hypotheses +.sp +.nf +.ft C +>>> score1 = sentence_gleu([ref1a], hyp1) +>>> score2 = sentence_gleu([ref2a], hyp2) +>>> (score1 + score2) / 2 +0.6144... +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBlist_of_references\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP\fI)\fP) \-\- a list of reference sentences, w.r.t. hypotheses +.IP \(bu 2 +\fBhypotheses\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- a list of hypothesis sentences +.IP \(bu 2 +\fBmin_len\fP (\fIint\fP) \-\- The minimum order of n\-gram this function should extract. +.IP \(bu 2 +\fBmax_len\fP (\fIint\fP) \-\- The maximum order of n\-gram this function should extract. +.UNINDENT +.TP +.B Returns +The corpus\-level GLEU score. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.gleu_score.sentence_gleu(references, hypothesis, min_len=1, max_len=4) +Calculates the sentence level GLEU (Google\-BLEU) score described in +.INDENT 7.0 +.INDENT 3.5 +Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc V. Le, Mohammad Norouzi, +Wolfgang Macherey, Maxim Krikun, Yuan Cao, Qin Gao, Klaus Macherey, +Jeff Klingner, Apurva Shah, Melvin Johnson, Xiaobing Liu, Lukasz Kaiser, +Stephan Gouws, Yoshikiyo Kato, Taku Kudo, Hideto Kazawa, Keith Stevens, +George Kurian, Nishant Patil, Wei Wang, Cliff Young, Jason Smith, +Jason Riesa, Alex Rudnick, Oriol Vinyals, Greg Corrado, Macduff Hughes, +Jeffrey Dean. (2016) Google’s Neural Machine Translation System: +Bridging the Gap between Human and Machine Translation. +eprint arXiv:1609.08144. https://arxiv.org/pdf/1609.08144v2.pdf +Retrieved on 27 Oct 2016. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B From Wu et al. (2016): +.INDENT 7.0 +.TP +.B "The BLEU score has some undesirable properties when used for single +sentences, as it was designed to be a corpus measure. We therefore +use a slightly different score for our RL experiments which we call +the \(aqGLEU score\(aq. For the GLEU score, we record all sub\-sequences of +1, 2, 3 or 4 tokens in output and target sequence (n\-grams). We then +compute a recall, which is the ratio of the number of matching n\-grams +to the number of total n\-grams in the target (ground truth) sequence, +and a precision, which is the ratio of the number of matching n\-grams +to the number of total n\-grams in the generated output sequence. Then +GLEU score is simply the minimum of recall and precision. This GLEU +score\(aqs range is always between 0 (no matches) and 1 (all match) and +it is symmetrical when switching output and target. According to +our experiments, GLEU score correlates quite well with the BLEU +metric on a corpus level but does not have its drawbacks for our per +sentence reward objective." +.UNINDENT +.TP +.B Note: The initial implementation only allowed a single reference, but now +a list of references is required (which is consistent with +bleu_score.sentence_bleu()). +.UNINDENT +.sp +The infamous "the the the ... " example +.sp +.nf +.ft C +>>> ref = \(aqthe cat is on the mat\(aq.split() +>>> hyp = \(aqthe the the the the the the\(aq.split() +>>> sentence_gleu([ref], hyp) +0.0909... +.ft P +.fi +.sp +An example to evaluate normal machine translation outputs +.sp +.nf +.ft C +>>> ref1 = str(\(aqIt is a guide to action that ensures that the military \(aq +\&... \(aqwill forever heed Party commands\(aq).split() +>>> hyp1 = str(\(aqIt is a guide to action which ensures that the military \(aq +\&... \(aqalways obeys the commands of the party\(aq).split() +>>> hyp2 = str(\(aqIt is to insure the troops forever hearing the activity \(aq +\&... \(aqguidebook that party direct\(aq).split() +>>> sentence_gleu([ref1], hyp1) +0.4393... +>>> sentence_gleu([ref1], hyp2) +0.1206... +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreferences\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- a list of reference sentences +.IP \(bu 2 +\fBhypothesis\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- a hypothesis sentence +.IP \(bu 2 +\fBmin_len\fP (\fIint\fP) \-\- The minimum order of n\-gram this function should extract. +.IP \(bu 2 +\fBmax_len\fP (\fIint\fP) \-\- The maximum order of n\-gram this function should extract. +.UNINDENT +.TP +.B Returns +the sentence level GLEU score. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.SS nltk.translate.ibm1 module +.sp +Lexical translation model that ignores word order. +.sp +In IBM Model 1, word order is ignored for simplicity. As long as the +word alignments are equivalent, it doesn\(aqt matter where the word occurs +in the source or target sentence. Thus, the following three alignments +are equally likely. +.sp +Source: je mange du jambon +Target: i eat some ham +Alignment: (0,0) (1,1) (2,2) (3,3) +.sp +Source: je mange du jambon +Target: some ham eat i +Alignment: (0,2) (1,3) (2,1) (3,1) +.sp +Source: du jambon je mange +Target: eat i some ham +Alignment: (0,3) (1,2) (2,0) (3,1) +.sp +Note that an alignment is represented here as +(word_index_in_target, word_index_in_source). +.sp +The EM algorithm used in Model 1 is: +E step \- In the training data, count how many times a source language +.INDENT 0.0 +.INDENT 3.5 +word is translated into a target language word, weighted by +the prior probability of the translation. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B M step \- Estimate the new probability of translation based on the +counts from the Expectation step. +.UNINDENT +.sp +Notations: +i: Position in the source sentence +.INDENT 0.0 +.INDENT 3.5 +Valid values are 0 (for NULL), 1, 2, ..., length of source sentence +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B j: Position in the target sentence +Valid values are 1, 2, ..., length of target sentence +.UNINDENT +.sp +s: A word in the source language +t: A word in the target language +.sp +References: +Philipp Koehn. 2010. Statistical Machine Translation. +Cambridge University Press, New York. +.sp +Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and +Robert L. Mercer. 1993. The Mathematics of Statistical Machine +Translation: Parameter Estimation. Computational Linguistics, 19 (2), +263\-311. +.INDENT 0.0 +.TP +.B class nltk.translate.ibm1.IBMModel1(sentence_aligned_corpus, iterations, probability_tables=None) +Bases: \fI\%nltk.translate.ibm_model.IBMModel\fP +.sp +Lexical translation model that ignores word order +.sp +.nf +.ft C +>>> bitext = [] +>>> bitext.append(AlignedSent([\(aqklein\(aq, \(aqist\(aq, \(aqdas\(aq, \(aqhaus\(aq], [\(aqthe\(aq, \(aqhouse\(aq, \(aqis\(aq, \(aqsmall\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqhaus\(aq, \(aqist\(aq, \(aqja\(aq, \(aqgroß\(aq], [\(aqthe\(aq, \(aqhouse\(aq, \(aqis\(aq, \(aqbig\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqbuch\(aq, \(aqist\(aq, \(aqja\(aq, \(aqklein\(aq], [\(aqthe\(aq, \(aqbook\(aq, \(aqis\(aq, \(aqsmall\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqhaus\(aq], [\(aqthe\(aq, \(aqhouse\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqbuch\(aq], [\(aqthe\(aq, \(aqbook\(aq])) +>>> bitext.append(AlignedSent([\(aqein\(aq, \(aqbuch\(aq], [\(aqa\(aq, \(aqbook\(aq])) +.ft P +.fi +.sp +.nf +.ft C +>>> ibm1 = IBMModel1(bitext, 5) +.ft P +.fi +.sp +.nf +.ft C +>>> print(ibm1.translation_table[\(aqbuch\(aq][\(aqbook\(aq]) +0.889... +>>> print(ibm1.translation_table[\(aqdas\(aq][\(aqbook\(aq]) +0.061... +>>> print(ibm1.translation_table[\(aqbuch\(aq][None]) +0.113... +>>> print(ibm1.translation_table[\(aqja\(aq][None]) +0.072... +.ft P +.fi +.sp +.nf +.ft C +>>> test_sentence = bitext[2] +>>> test_sentence.words +[\(aqdas\(aq, \(aqbuch\(aq, \(aqist\(aq, \(aqja\(aq, \(aqklein\(aq] +>>> test_sentence.mots +[\(aqthe\(aq, \(aqbook\(aq, \(aqis\(aq, \(aqsmall\(aq] +>>> test_sentence.alignment +Alignment([(0, 0), (1, 1), (2, 2), (3, 2), (4, 3)]) +.ft P +.fi +.INDENT 7.0 +.TP +.B align(sentence_pair) +Determines the best word alignment for one sentence pair from +the corpus that the model was trained on. +.sp +The best alignment will be set in \fBsentence_pair\fP when the +method returns. In contrast with the internal implementation of +IBM models, the word indices in the \fBAlignment\fP are zero\- +indexed, not one\-indexed. +.INDENT 7.0 +.TP +.B Parameters +\fBsentence_pair\fP (\fIAlignedSent\fP) \-\- A sentence in the source language and its +counterpart sentence in the target language +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B align_all(parallel_corpus) +.UNINDENT +.INDENT 7.0 +.TP +.B prob_alignment_point(s, t) +Probability that word \fBt\fP in the target sentence is aligned to +word \fBs\fP in the source sentence +.UNINDENT +.INDENT 7.0 +.TP +.B prob_all_alignments(src_sentence, trg_sentence) +Computes the probability of all possible word alignments, +expressed as a marginal distribution over target words t +.sp +Each entry in the return value represents the contribution to +the total alignment probability by the target word t. +.sp +To obtain probability(alignment | src_sentence, trg_sentence), +simply sum the entries in the return value. +.INDENT 7.0 +.TP +.B Returns +Probability of t for all s in \fBsrc_sentence\fP +.TP +.B Return type +dict(str): float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob_t_a_given_s(alignment_info) +Probability of target sentence and an alignment given the +source sentence +.UNINDENT +.INDENT 7.0 +.TP +.B set_uniform_probabilities(sentence_aligned_corpus) +Initialize probability tables to a uniform distribution +.sp +Derived classes should implement this accordingly. +.UNINDENT +.INDENT 7.0 +.TP +.B train(parallel_corpus) +.UNINDENT +.UNINDENT +.SS nltk.translate.ibm2 module +.sp +Lexical translation model that considers word order. +.sp +IBM Model 2 improves on Model 1 by accounting for word order. +An alignment probability is introduced, a(i | j,l,m), which predicts +a source word position, given its aligned target word\(aqs position. +.sp +The EM algorithm used in Model 2 is: +E step \- In the training data, collect counts, weighted by prior +.INDENT 0.0 +.INDENT 3.5 +probabilities. +(a) count how many times a source language word is translated +.INDENT 0.0 +.INDENT 3.5 +into a target language word +.UNINDENT +.UNINDENT +.INDENT 0.0 +.IP b. 3 +count how many times a particular position in the source +sentence is aligned to a particular position in the target +sentence +.UNINDENT +.UNINDENT +.UNINDENT +.sp +M step \- Estimate new probabilities based on the counts from the E step +.sp +Notations: +i: Position in the source sentence +.INDENT 0.0 +.INDENT 3.5 +Valid values are 0 (for NULL), 1, 2, ..., length of source sentence +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B j: Position in the target sentence +Valid values are 1, 2, ..., length of target sentence +.UNINDENT +.sp +l: Number of words in the source sentence, excluding NULL +m: Number of words in the target sentence +s: A word in the source language +t: A word in the target language +.sp +References: +Philipp Koehn. 2010. Statistical Machine Translation. +Cambridge University Press, New York. +.sp +Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and +Robert L. Mercer. 1993. The Mathematics of Statistical Machine +Translation: Parameter Estimation. Computational Linguistics, 19 (2), +263\-311. +.INDENT 0.0 +.TP +.B class nltk.translate.ibm2.IBMModel2(sentence_aligned_corpus, iterations, probability_tables=None) +Bases: \fI\%nltk.translate.ibm_model.IBMModel\fP +.sp +Lexical translation model that considers word order +.sp +.nf +.ft C +>>> bitext = [] +>>> bitext.append(AlignedSent([\(aqklein\(aq, \(aqist\(aq, \(aqdas\(aq, \(aqhaus\(aq], [\(aqthe\(aq, \(aqhouse\(aq, \(aqis\(aq, \(aqsmall\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqhaus\(aq, \(aqist\(aq, \(aqja\(aq, \(aqgroß\(aq], [\(aqthe\(aq, \(aqhouse\(aq, \(aqis\(aq, \(aqbig\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqbuch\(aq, \(aqist\(aq, \(aqja\(aq, \(aqklein\(aq], [\(aqthe\(aq, \(aqbook\(aq, \(aqis\(aq, \(aqsmall\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqhaus\(aq], [\(aqthe\(aq, \(aqhouse\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqbuch\(aq], [\(aqthe\(aq, \(aqbook\(aq])) +>>> bitext.append(AlignedSent([\(aqein\(aq, \(aqbuch\(aq], [\(aqa\(aq, \(aqbook\(aq])) +.ft P +.fi +.sp +.nf +.ft C +>>> ibm2 = IBMModel2(bitext, 5) +.ft P +.fi +.sp +.nf +.ft C +>>> print(round(ibm2.translation_table[\(aqbuch\(aq][\(aqbook\(aq], 3)) +1.0 +>>> print(round(ibm2.translation_table[\(aqdas\(aq][\(aqbook\(aq], 3)) +0.0 +>>> print(round(ibm2.translation_table[\(aqbuch\(aq][None], 3)) +0.0 +>>> print(round(ibm2.translation_table[\(aqja\(aq][None], 3)) +0.0 +.ft P +.fi +.sp +.nf +.ft C +>>> print(ibm2.alignment_table[1][1][2][2]) +0.938... +>>> print(round(ibm2.alignment_table[1][2][2][2], 3)) +0.0 +>>> print(round(ibm2.alignment_table[2][2][4][5], 3)) +1.0 +.ft P +.fi +.sp +.nf +.ft C +>>> test_sentence = bitext[2] +>>> test_sentence.words +[\(aqdas\(aq, \(aqbuch\(aq, \(aqist\(aq, \(aqja\(aq, \(aqklein\(aq] +>>> test_sentence.mots +[\(aqthe\(aq, \(aqbook\(aq, \(aqis\(aq, \(aqsmall\(aq] +>>> test_sentence.alignment +Alignment([(0, 0), (1, 1), (2, 2), (3, 2), (4, 3)]) +.ft P +.fi +.INDENT 7.0 +.TP +.B align(sentence_pair) +Determines the best word alignment for one sentence pair from +the corpus that the model was trained on. +.sp +The best alignment will be set in \fBsentence_pair\fP when the +method returns. In contrast with the internal implementation of +IBM models, the word indices in the \fBAlignment\fP are zero\- +indexed, not one\-indexed. +.INDENT 7.0 +.TP +.B Parameters +\fBsentence_pair\fP (\fIAlignedSent\fP) \-\- A sentence in the source language and its +counterpart sentence in the target language +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B align_all(parallel_corpus) +.UNINDENT +.INDENT 7.0 +.TP +.B maximize_alignment_probabilities(counts) +.UNINDENT +.INDENT 7.0 +.TP +.B prob_alignment_point(i, j, src_sentence, trg_sentence) +Probability that position j in \fBtrg_sentence\fP is aligned to +position i in the \fBsrc_sentence\fP +.UNINDENT +.INDENT 7.0 +.TP +.B prob_all_alignments(src_sentence, trg_sentence) +Computes the probability of all possible word alignments, +expressed as a marginal distribution over target words t +.sp +Each entry in the return value represents the contribution to +the total alignment probability by the target word t. +.sp +To obtain probability(alignment | src_sentence, trg_sentence), +simply sum the entries in the return value. +.INDENT 7.0 +.TP +.B Returns +Probability of t for all s in \fBsrc_sentence\fP +.TP +.B Return type +dict(str): float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob_t_a_given_s(alignment_info) +Probability of target sentence and an alignment given the +source sentence +.UNINDENT +.INDENT 7.0 +.TP +.B set_uniform_probabilities(sentence_aligned_corpus) +Initialize probability tables to a uniform distribution +.sp +Derived classes should implement this accordingly. +.UNINDENT +.INDENT 7.0 +.TP +.B train(parallel_corpus) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.translate.ibm2.Model2Counts +Bases: \fI\%nltk.translate.ibm_model.Counts\fP +.sp +Data object to store counts of various parameters during training. +Includes counts for alignment. +.INDENT 7.0 +.TP +.B update_alignment(count, i, j, l, m) +.UNINDENT +.INDENT 7.0 +.TP +.B update_lexical_translation(count, s, t) +.UNINDENT +.UNINDENT +.SS nltk.translate.ibm3 module +.sp +Translation model that considers how a word can be aligned to +multiple words in another language. +.sp +IBM Model 3 improves on Model 2 by directly modeling the phenomenon +where a word in one language may be translated into zero or more words +in another. This is expressed by the fertility probability, +n(phi | source word). +.sp +If a source word translates into more than one word, it is possible to +generate sentences that have the same alignment in multiple ways. This +is modeled by a distortion step. The distortion probability, d(j|i,l,m), +predicts a target word position, given its aligned source word\(aqs +position. The distortion probability replaces the alignment probability +of Model 2. +.sp +The fertility probability is not applicable for NULL. Target words that +align to NULL are assumed to be distributed uniformly in the target +sentence. The existence of these words is modeled by p1, the probability +that a target word produced by a real source word requires another +target word that is produced by NULL. +.sp +The EM algorithm used in Model 3 is: +E step \- In the training data, collect counts, weighted by prior +.INDENT 0.0 +.INDENT 3.5 +probabilities. +(a) count how many times a source language word is translated +.INDENT 0.0 +.INDENT 3.5 +into a target language word +.UNINDENT +.UNINDENT +.INDENT 0.0 +.IP b. 3 +count how many times a particular position in the target +sentence is aligned to a particular position in the source +sentence +.IP c. 3 +count how many times a source word is aligned to phi number +of target words +.IP d. 3 +count how many times NULL is aligned to a target word +.UNINDENT +.UNINDENT +.UNINDENT +.sp +M step \- Estimate new probabilities based on the counts from the E step +.sp +Because there are too many possible alignments, only the most probable +ones are considered. First, the best alignment is determined using prior +probabilities. Then, a hill climbing approach is used to find other good +candidates. +.sp +Notations: +i: Position in the source sentence +.INDENT 0.0 +.INDENT 3.5 +Valid values are 0 (for NULL), 1, 2, ..., length of source sentence +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B j: Position in the target sentence +Valid values are 1, 2, ..., length of target sentence +.UNINDENT +.sp +l: Number of words in the source sentence, excluding NULL +m: Number of words in the target sentence +s: A word in the source language +t: A word in the target language +phi: Fertility, the number of target words produced by a source word +p1: Probability that a target word produced by a source word is +.INDENT 0.0 +.INDENT 3.5 +accompanied by another target word that is aligned to NULL +.UNINDENT +.UNINDENT +.sp +p0: 1 \- p1 +.sp +References: +Philipp Koehn. 2010. Statistical Machine Translation. +Cambridge University Press, New York. +.sp +Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and +Robert L. Mercer. 1993. The Mathematics of Statistical Machine +Translation: Parameter Estimation. Computational Linguistics, 19 (2), +263\-311. +.INDENT 0.0 +.TP +.B class nltk.translate.ibm3.IBMModel3(sentence_aligned_corpus, iterations, probability_tables=None) +Bases: \fI\%nltk.translate.ibm_model.IBMModel\fP +.sp +Translation model that considers how a word can be aligned to +multiple words in another language +.sp +.nf +.ft C +>>> bitext = [] +>>> bitext.append(AlignedSent([\(aqklein\(aq, \(aqist\(aq, \(aqdas\(aq, \(aqhaus\(aq], [\(aqthe\(aq, \(aqhouse\(aq, \(aqis\(aq, \(aqsmall\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqhaus\(aq, \(aqwar\(aq, \(aqja\(aq, \(aqgroß\(aq], [\(aqthe\(aq, \(aqhouse\(aq, \(aqwas\(aq, \(aqbig\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqbuch\(aq, \(aqist\(aq, \(aqja\(aq, \(aqklein\(aq], [\(aqthe\(aq, \(aqbook\(aq, \(aqis\(aq, \(aqsmall\(aq])) +>>> bitext.append(AlignedSent([\(aqein\(aq, \(aqhaus\(aq, \(aqist\(aq, \(aqklein\(aq], [\(aqa\(aq, \(aqhouse\(aq, \(aqis\(aq, \(aqsmall\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqhaus\(aq], [\(aqthe\(aq, \(aqhouse\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqbuch\(aq], [\(aqthe\(aq, \(aqbook\(aq])) +>>> bitext.append(AlignedSent([\(aqein\(aq, \(aqbuch\(aq], [\(aqa\(aq, \(aqbook\(aq])) +>>> bitext.append(AlignedSent([\(aqich\(aq, \(aqfasse\(aq, \(aqdas\(aq, \(aqbuch\(aq, \(aqzusammen\(aq], [\(aqi\(aq, \(aqsummarize\(aq, \(aqthe\(aq, \(aqbook\(aq])) +>>> bitext.append(AlignedSent([\(aqfasse\(aq, \(aqzusammen\(aq], [\(aqsummarize\(aq])) +.ft P +.fi +.sp +.nf +.ft C +>>> ibm3 = IBMModel3(bitext, 5) +.ft P +.fi +.sp +.nf +.ft C +>>> print(round(ibm3.translation_table[\(aqbuch\(aq][\(aqbook\(aq], 3)) +1.0 +>>> print(round(ibm3.translation_table[\(aqdas\(aq][\(aqbook\(aq], 3)) +0.0 +>>> print(round(ibm3.translation_table[\(aqja\(aq][None], 3)) +1.0 +.ft P +.fi +.sp +.nf +.ft C +>>> print(round(ibm3.distortion_table[1][1][2][2], 3)) +1.0 +>>> print(round(ibm3.distortion_table[1][2][2][2], 3)) +0.0 +>>> print(round(ibm3.distortion_table[2][2][4][5], 3)) +0.75 +.ft P +.fi +.sp +.nf +.ft C +>>> print(round(ibm3.fertility_table[2][\(aqsummarize\(aq], 3)) +1.0 +>>> print(round(ibm3.fertility_table[1][\(aqbook\(aq], 3)) +1.0 +.ft P +.fi +.sp +.nf +.ft C +>>> print(ibm3.p1) +0.054... +.ft P +.fi +.sp +.nf +.ft C +>>> test_sentence = bitext[2] +>>> test_sentence.words +[\(aqdas\(aq, \(aqbuch\(aq, \(aqist\(aq, \(aqja\(aq, \(aqklein\(aq] +>>> test_sentence.mots +[\(aqthe\(aq, \(aqbook\(aq, \(aqis\(aq, \(aqsmall\(aq] +>>> test_sentence.alignment +Alignment([(0, 0), (1, 1), (2, 2), (3, None), (4, 3)]) +.ft P +.fi +.INDENT 7.0 +.TP +.B maximize_distortion_probabilities(counts) +.UNINDENT +.INDENT 7.0 +.TP +.B prob_t_a_given_s(alignment_info) +Probability of target sentence and an alignment given the +source sentence +.UNINDENT +.INDENT 7.0 +.TP +.B reset_probabilities() +.UNINDENT +.INDENT 7.0 +.TP +.B set_uniform_probabilities(sentence_aligned_corpus) +Initialize probability tables to a uniform distribution +.sp +Derived classes should implement this accordingly. +.UNINDENT +.INDENT 7.0 +.TP +.B train(parallel_corpus) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.translate.ibm3.Model3Counts +Bases: \fI\%nltk.translate.ibm_model.Counts\fP +.sp +Data object to store counts of various parameters during training. +Includes counts for distortion. +.INDENT 7.0 +.TP +.B update_distortion(count, alignment_info, j, l, m) +.UNINDENT +.UNINDENT +.SS nltk.translate.ibm4 module +.sp +Translation model that reorders output words based on their type and +distance from other related words in the output sentence. +.sp +IBM Model 4 improves the distortion model of Model 3, motivated by the +observation that certain words tend to be re\-ordered in a predictable +way relative to one another. For example, in English +usually has its order flipped as in French. +.sp +Model 4 requires words in the source and target vocabularies to be +categorized into classes. This can be linguistically driven, like parts +of speech (adjective, nouns, prepositions, etc). Word classes can also +be obtained by statistical methods. The original IBM Model 4 uses an +information theoretic approach to group words into 50 classes for each +vocabulary. +.sp +Terminology: +Cept: +.INDENT 0.0 +.INDENT 3.5 +A source word with non\-zero fertility i.e. aligned to one or more +target words. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B Tablet: +The set of target word(s) aligned to a cept. +.TP +.B Head of cept: +The first word of the tablet of that cept. +.TP +.B Center of cept: +The average position of the words in that cept\(aqs tablet. If the +value is not an integer, the ceiling is taken. +For example, for a tablet with words in positions 2, 5, 6 in the +target sentence, the center of the corresponding cept is +ceil((2 + 5 + 6) / 3) = 5 +.TP +.B Displacement: +For a head word, defined as (position of head word \- position of +previous cept\(aqs center). Can be positive or negative. +For a non\-head word, defined as (position of non\-head word \- +position of previous word in the same tablet). Always positive, +because successive words in a tablet are assumed to appear to the +right of the previous word. +.UNINDENT +.sp +In contrast to Model 3 which reorders words in a tablet independently of +other words, Model 4 distinguishes between three cases. +(1) Words generated by NULL are distributed uniformly. +(2) For a head word t, its position is modeled by the probability +.INDENT 0.0 +.INDENT 3.5 +d_head(displacement | word_class_s(s),word_class_t(t)), +where s is the previous cept, and word_class_s and word_class_t maps +s and t to a source and target language word class respectively. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.IP 3. 3 +For a non\-head word t, its position is modeled by the probability +d_non_head(displacement | word_class_t(t)) +.UNINDENT +.sp +The EM algorithm used in Model 4 is: +E step \- In the training data, collect counts, weighted by prior +.INDENT 0.0 +.INDENT 3.5 +probabilities. +(a) count how many times a source language word is translated +.INDENT 0.0 +.INDENT 3.5 +into a target language word +.UNINDENT +.UNINDENT +.INDENT 0.0 +.IP b. 3 +for a particular word class, count how many times a head +word is located at a particular displacement from the +previous cept\(aqs center +.IP c. 3 +for a particular word class, count how many times a +non\-head word is located at a particular displacement from +the previous target word +.IP d. 3 +count how many times a source word is aligned to phi number +of target words +.IP e. 3 +count how many times NULL is aligned to a target word +.UNINDENT +.UNINDENT +.UNINDENT +.sp +M step \- Estimate new probabilities based on the counts from the E step +.sp +Like Model 3, there are too many possible alignments to consider. Thus, +a hill climbing approach is used to sample good candidates. +.sp +Notations: +i: Position in the source sentence +.INDENT 0.0 +.INDENT 3.5 +Valid values are 0 (for NULL), 1, 2, ..., length of source sentence +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B j: Position in the target sentence +Valid values are 1, 2, ..., length of target sentence +.UNINDENT +.sp +l: Number of words in the source sentence, excluding NULL +m: Number of words in the target sentence +s: A word in the source language +t: A word in the target language +phi: Fertility, the number of target words produced by a source word +p1: Probability that a target word produced by a source word is +.INDENT 0.0 +.INDENT 3.5 +accompanied by another target word that is aligned to NULL +.UNINDENT +.UNINDENT +.sp +p0: 1 \- p1 +dj: Displacement, Δj +.sp +References: +Philipp Koehn. 2010. Statistical Machine Translation. +Cambridge University Press, New York. +.sp +Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and +Robert L. Mercer. 1993. The Mathematics of Statistical Machine +Translation: Parameter Estimation. Computational Linguistics, 19 (2), +263\-311. +.INDENT 0.0 +.TP +.B class nltk.translate.ibm4.IBMModel4(sentence_aligned_corpus, iterations, source_word_classes, target_word_classes, probability_tables=None) +Bases: \fI\%nltk.translate.ibm_model.IBMModel\fP +.sp +Translation model that reorders output words based on their type and +their distance from other related words in the output sentence +.sp +.nf +.ft C +>>> bitext = [] +>>> bitext.append(AlignedSent([\(aqklein\(aq, \(aqist\(aq, \(aqdas\(aq, \(aqhaus\(aq], [\(aqthe\(aq, \(aqhouse\(aq, \(aqis\(aq, \(aqsmall\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqhaus\(aq, \(aqwar\(aq, \(aqja\(aq, \(aqgroß\(aq], [\(aqthe\(aq, \(aqhouse\(aq, \(aqwas\(aq, \(aqbig\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqbuch\(aq, \(aqist\(aq, \(aqja\(aq, \(aqklein\(aq], [\(aqthe\(aq, \(aqbook\(aq, \(aqis\(aq, \(aqsmall\(aq])) +>>> bitext.append(AlignedSent([\(aqein\(aq, \(aqhaus\(aq, \(aqist\(aq, \(aqklein\(aq], [\(aqa\(aq, \(aqhouse\(aq, \(aqis\(aq, \(aqsmall\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqhaus\(aq], [\(aqthe\(aq, \(aqhouse\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqbuch\(aq], [\(aqthe\(aq, \(aqbook\(aq])) +>>> bitext.append(AlignedSent([\(aqein\(aq, \(aqbuch\(aq], [\(aqa\(aq, \(aqbook\(aq])) +>>> bitext.append(AlignedSent([\(aqich\(aq, \(aqfasse\(aq, \(aqdas\(aq, \(aqbuch\(aq, \(aqzusammen\(aq], [\(aqi\(aq, \(aqsummarize\(aq, \(aqthe\(aq, \(aqbook\(aq])) +>>> bitext.append(AlignedSent([\(aqfasse\(aq, \(aqzusammen\(aq], [\(aqsummarize\(aq])) +>>> src_classes = {\(aqthe\(aq: 0, \(aqa\(aq: 0, \(aqsmall\(aq: 1, \(aqbig\(aq: 1, \(aqhouse\(aq: 2, \(aqbook\(aq: 2, \(aqis\(aq: 3, \(aqwas\(aq: 3, \(aqi\(aq: 4, \(aqsummarize\(aq: 5 } +>>> trg_classes = {\(aqdas\(aq: 0, \(aqein\(aq: 0, \(aqhaus\(aq: 1, \(aqbuch\(aq: 1, \(aqklein\(aq: 2, \(aqgroß\(aq: 2, \(aqist\(aq: 3, \(aqwar\(aq: 3, \(aqja\(aq: 4, \(aqich\(aq: 5, \(aqfasse\(aq: 6, \(aqzusammen\(aq: 6 } +.ft P +.fi +.sp +.nf +.ft C +>>> ibm4 = IBMModel4(bitext, 5, src_classes, trg_classes) +.ft P +.fi +.sp +.nf +.ft C +>>> print(round(ibm4.translation_table[\(aqbuch\(aq][\(aqbook\(aq], 3)) +1.0 +>>> print(round(ibm4.translation_table[\(aqdas\(aq][\(aqbook\(aq], 3)) +0.0 +>>> print(round(ibm4.translation_table[\(aqja\(aq][None], 3)) +1.0 +.ft P +.fi +.sp +.nf +.ft C +>>> print(round(ibm4.head_distortion_table[1][0][1], 3)) +1.0 +>>> print(round(ibm4.head_distortion_table[2][0][1], 3)) +0.0 +>>> print(round(ibm4.non_head_distortion_table[3][6], 3)) +0.5 +.ft P +.fi +.sp +.nf +.ft C +>>> print(round(ibm4.fertility_table[2][\(aqsummarize\(aq], 3)) +1.0 +>>> print(round(ibm4.fertility_table[1][\(aqbook\(aq], 3)) +1.0 +.ft P +.fi +.sp +.nf +.ft C +>>> print(ibm4.p1) +0.033... +.ft P +.fi +.sp +.nf +.ft C +>>> test_sentence = bitext[2] +>>> test_sentence.words +[\(aqdas\(aq, \(aqbuch\(aq, \(aqist\(aq, \(aqja\(aq, \(aqklein\(aq] +>>> test_sentence.mots +[\(aqthe\(aq, \(aqbook\(aq, \(aqis\(aq, \(aqsmall\(aq] +>>> test_sentence.alignment +Alignment([(0, 0), (1, 1), (2, 2), (3, None), (4, 3)]) +.ft P +.fi +.INDENT 7.0 +.TP +.B maximize_distortion_probabilities(counts) +.UNINDENT +.INDENT 7.0 +.TP +.B static model4_prob_t_a_given_s(alignment_info, ibm_model) +.UNINDENT +.INDENT 7.0 +.TP +.B prob_t_a_given_s(alignment_info) +Probability of target sentence and an alignment given the +source sentence +.UNINDENT +.INDENT 7.0 +.TP +.B reset_probabilities() +.UNINDENT +.INDENT 7.0 +.TP +.B set_uniform_probabilities(sentence_aligned_corpus) +Set distortion probabilities uniformly to +1 / cardinality of displacement values +.UNINDENT +.INDENT 7.0 +.TP +.B train(parallel_corpus) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.translate.ibm4.Model4Counts +Bases: \fI\%nltk.translate.ibm_model.Counts\fP +.sp +Data object to store counts of various parameters during training. +Includes counts for distortion. +.INDENT 7.0 +.TP +.B update_distortion(count, alignment_info, j, src_classes, trg_classes) +.UNINDENT +.UNINDENT +.SS nltk.translate.ibm5 module +.sp +Translation model that keeps track of vacant positions in the target +sentence to decide where to place translated words. +.sp +Translation can be viewed as a process where each word in the source +sentence is stepped through sequentially, generating translated words +for each source word. The target sentence can be viewed as being made +up of \fBm\fP empty slots initially, which gradually fill up as generated +words are placed in them. +.sp +Models 3 and 4 use distortion probabilities to decide how to place +translated words. For simplicity, these models ignore the history of +which slots have already been occupied with translated words. +Consider the placement of the last translated word: there is only one +empty slot left in the target sentence, so the distortion probability +should be 1.0 for that position and 0.0 everywhere else. However, the +distortion probabilities for Models 3 and 4 are set up such that all +positions are under consideration. +.sp +IBM Model 5 fixes this deficiency by accounting for occupied slots +during translation. It introduces the vacancy function v(j), the number +of vacancies up to, and including, position j in the target sentence. +.sp +Terminology: +Maximum vacancy: +.INDENT 0.0 +.INDENT 3.5 +The number of valid slots that a word can be placed in. +This is not necessarily the same as the number of vacant slots. +For example, if a tablet contains more than one word, the head word +cannot be placed at the last vacant slot because there will be no +space for the other words in the tablet. The number of valid slots +has to take into account the length of the tablet. +Non\-head words cannot be placed before the head word, so vacancies +to the left of the head word are ignored. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B Vacancy difference: +For a head word: (v(j) \- v(center of previous cept)) +Can be positive or negative. +For a non\-head word: (v(j) \- v(position of previously placed word)) +Always positive, because successive words in a tablet are assumed to +appear to the right of the previous word. +.UNINDENT +.sp +Positioning of target words fall under three cases: +(1) Words generated by NULL are distributed uniformly +(2) For a head word t, its position is modeled by the probability +.INDENT 0.0 +.INDENT 3.5 +v_head(dv | max_v,word_class_t(t)) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.IP 3. 3 +For a non\-head word t, its position is modeled by the probability +v_non_head(dv | max_v,word_class_t(t)) +.UNINDENT +.sp +dv and max_v are defined differently for head and non\-head words. +.sp +The EM algorithm used in Model 5 is: +E step \- In the training data, collect counts, weighted by prior +.INDENT 0.0 +.INDENT 3.5 +probabilities. +(a) count how many times a source language word is translated +.INDENT 0.0 +.INDENT 3.5 +into a target language word +.UNINDENT +.UNINDENT +.INDENT 0.0 +.IP b. 3 +for a particular word class and maximum vacancy, count how +many times a head word and the previous cept\(aqs center have +a particular difference in number of vacancies +.UNINDENT +.INDENT 0.0 +.IP b. 3 +for a particular word class and maximum vacancy, count how +many times a non\-head word and the previous target word +have a particular difference in number of vacancies +.UNINDENT +.INDENT 0.0 +.IP d. 3 +count how many times a source word is aligned to phi number +of target words +.IP e. 3 +count how many times NULL is aligned to a target word +.UNINDENT +.UNINDENT +.UNINDENT +.sp +M step \- Estimate new probabilities based on the counts from the E step +.sp +Like Model 4, there are too many possible alignments to consider. Thus, +a hill climbing approach is used to sample good candidates. In addition, +pruning is used to weed out unlikely alignments based on Model 4 scores. +.sp +Notations: +i: Position in the source sentence +.INDENT 0.0 +.INDENT 3.5 +Valid values are 0 (for NULL), 1, 2, ..., length of source sentence +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B j: Position in the target sentence +Valid values are 1, 2, ..., length of target sentence +.UNINDENT +.sp +l: Number of words in the source sentence, excluding NULL +m: Number of words in the target sentence +s: A word in the source language +t: A word in the target language +phi: Fertility, the number of target words produced by a source word +p1: Probability that a target word produced by a source word is +.INDENT 0.0 +.INDENT 3.5 +accompanied by another target word that is aligned to NULL +.UNINDENT +.UNINDENT +.sp +p0: 1 \- p1 +max_v: Maximum vacancy +dv: Vacancy difference, Δv +.sp +The definition of v_head here differs from GIZA++, section 4.7 of +[Brown et al., 1993], and [Koehn, 2010]. In the latter cases, v_head is +v_head(v(j) | v(center of previous cept),max_v,word_class(t)). +.sp +Here, we follow appendix B of [Brown et al., 1993] and combine v(j) with +v(center of previous cept) to obtain dv: +v_head(v(j) \- v(center of previous cept) | max_v,word_class(t)). +.sp +References: +Philipp Koehn. 2010. Statistical Machine Translation. +Cambridge University Press, New York. +.sp +Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and +Robert L. Mercer. 1993. The Mathematics of Statistical Machine +Translation: Parameter Estimation. Computational Linguistics, 19 (2), +263\-311. +.INDENT 0.0 +.TP +.B class nltk.translate.ibm5.IBMModel5(sentence_aligned_corpus, iterations, source_word_classes, target_word_classes, probability_tables=None) +Bases: \fI\%nltk.translate.ibm_model.IBMModel\fP +.sp +Translation model that keeps track of vacant positions in the target +sentence to decide where to place translated words +.sp +.nf +.ft C +>>> bitext = [] +>>> bitext.append(AlignedSent([\(aqklein\(aq, \(aqist\(aq, \(aqdas\(aq, \(aqhaus\(aq], [\(aqthe\(aq, \(aqhouse\(aq, \(aqis\(aq, \(aqsmall\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqhaus\(aq, \(aqwar\(aq, \(aqja\(aq, \(aqgroß\(aq], [\(aqthe\(aq, \(aqhouse\(aq, \(aqwas\(aq, \(aqbig\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqbuch\(aq, \(aqist\(aq, \(aqja\(aq, \(aqklein\(aq], [\(aqthe\(aq, \(aqbook\(aq, \(aqis\(aq, \(aqsmall\(aq])) +>>> bitext.append(AlignedSent([\(aqein\(aq, \(aqhaus\(aq, \(aqist\(aq, \(aqklein\(aq], [\(aqa\(aq, \(aqhouse\(aq, \(aqis\(aq, \(aqsmall\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqhaus\(aq], [\(aqthe\(aq, \(aqhouse\(aq])) +>>> bitext.append(AlignedSent([\(aqdas\(aq, \(aqbuch\(aq], [\(aqthe\(aq, \(aqbook\(aq])) +>>> bitext.append(AlignedSent([\(aqein\(aq, \(aqbuch\(aq], [\(aqa\(aq, \(aqbook\(aq])) +>>> bitext.append(AlignedSent([\(aqich\(aq, \(aqfasse\(aq, \(aqdas\(aq, \(aqbuch\(aq, \(aqzusammen\(aq], [\(aqi\(aq, \(aqsummarize\(aq, \(aqthe\(aq, \(aqbook\(aq])) +>>> bitext.append(AlignedSent([\(aqfasse\(aq, \(aqzusammen\(aq], [\(aqsummarize\(aq])) +>>> src_classes = {\(aqthe\(aq: 0, \(aqa\(aq: 0, \(aqsmall\(aq: 1, \(aqbig\(aq: 1, \(aqhouse\(aq: 2, \(aqbook\(aq: 2, \(aqis\(aq: 3, \(aqwas\(aq: 3, \(aqi\(aq: 4, \(aqsummarize\(aq: 5 } +>>> trg_classes = {\(aqdas\(aq: 0, \(aqein\(aq: 0, \(aqhaus\(aq: 1, \(aqbuch\(aq: 1, \(aqklein\(aq: 2, \(aqgroß\(aq: 2, \(aqist\(aq: 3, \(aqwar\(aq: 3, \(aqja\(aq: 4, \(aqich\(aq: 5, \(aqfasse\(aq: 6, \(aqzusammen\(aq: 6 } +.ft P +.fi +.sp +.nf +.ft C +>>> ibm5 = IBMModel5(bitext, 5, src_classes, trg_classes) +.ft P +.fi +.sp +.nf +.ft C +>>> print(round(ibm5.head_vacancy_table[1][1][1], 3)) +1.0 +>>> print(round(ibm5.head_vacancy_table[2][1][1], 3)) +0.0 +>>> print(round(ibm5.non_head_vacancy_table[3][3][6], 3)) +1.0 +.ft P +.fi +.sp +.nf +.ft C +>>> print(round(ibm5.fertility_table[2][\(aqsummarize\(aq], 3)) +1.0 +>>> print(round(ibm5.fertility_table[1][\(aqbook\(aq], 3)) +1.0 +.ft P +.fi +.sp +.nf +.ft C +>>> print(ibm5.p1) +0.033... +.ft P +.fi +.sp +.nf +.ft C +>>> test_sentence = bitext[2] +>>> test_sentence.words +[\(aqdas\(aq, \(aqbuch\(aq, \(aqist\(aq, \(aqja\(aq, \(aqklein\(aq] +>>> test_sentence.mots +[\(aqthe\(aq, \(aqbook\(aq, \(aqis\(aq, \(aqsmall\(aq] +>>> test_sentence.alignment +Alignment([(0, 0), (1, 1), (2, 2), (3, None), (4, 3)]) +.ft P +.fi +.INDENT 7.0 +.TP +.B MIN_SCORE_FACTOR = 0.2 +Alignments with scores below this factor are pruned during sampling +.UNINDENT +.INDENT 7.0 +.TP +.B hillclimb(alignment_info, j_pegged=None) +Starting from the alignment in \fBalignment_info\fP, look at +neighboring alignments iteratively for the best one, according +to Model 4 +.sp +Note that Model 4 scoring is used instead of Model 5 because the +latter is too expensive to compute. +.sp +There is no guarantee that the best alignment in the alignment +space will be found, because the algorithm might be stuck in a +local maximum. +.INDENT 7.0 +.TP +.B Parameters +\fBj_pegged\fP (\fIint\fP) \-\- If specified, the search will be constrained to +alignments where \fBj_pegged\fP remains unchanged +.TP +.B Returns +The best alignment found from hill climbing +.TP +.B Return type +AlignmentInfo +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B maximize_vacancy_probabilities(counts) +.UNINDENT +.INDENT 7.0 +.TP +.B prob_t_a_given_s(alignment_info) +Probability of target sentence and an alignment given the +source sentence +.UNINDENT +.INDENT 7.0 +.TP +.B prune(alignment_infos) +Removes alignments from \fBalignment_infos\fP that have +substantially lower Model 4 scores than the best alignment +.INDENT 7.0 +.TP +.B Returns +Pruned alignments +.TP +.B Return type +set(AlignmentInfo) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B reset_probabilities() +.UNINDENT +.INDENT 7.0 +.TP +.B sample(sentence_pair) +Sample the most probable alignments from the entire alignment +space according to Model 4 +.sp +Note that Model 4 scoring is used instead of Model 5 because the +latter is too expensive to compute. +.sp +First, determine the best alignment according to IBM Model 2. +With this initial alignment, use hill climbing to determine the +best alignment according to a IBM Model 4. Add this +alignment and its neighbors to the sample set. Repeat this +process with other initial alignments obtained by pegging an +alignment point. Finally, prune alignments that have +substantially lower Model 4 scores than the best alignment. +.INDENT 7.0 +.TP +.B Parameters +\fBsentence_pair\fP (\fIAlignedSent\fP) \-\- Source and target language sentence pair +to generate a sample of alignments from +.TP +.B Returns +A set of best alignments represented by their \fBAlignmentInfo\fP +and the best alignment of the set for convenience +.TP +.B Return type +set(AlignmentInfo), AlignmentInfo +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B set_uniform_probabilities(sentence_aligned_corpus) +Set vacancy probabilities uniformly to +1 / cardinality of vacancy difference values +.UNINDENT +.INDENT 7.0 +.TP +.B train(parallel_corpus) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.translate.ibm5.Model5Counts +Bases: \fI\%nltk.translate.ibm_model.Counts\fP +.sp +Data object to store counts of various parameters during training. +Includes counts for vacancies. +.INDENT 7.0 +.TP +.B update_vacancy(count, alignment_info, i, trg_classes, slots) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBcount\fP \-\- Value to add to the vacancy counts +.IP \(bu 2 +\fBalignment_info\fP \-\- Alignment under consideration +.IP \(bu 2 +\fBi\fP \-\- Source word position under consideration +.IP \(bu 2 +\fBtrg_classes\fP \-\- Target word classes +.IP \(bu 2 +\fBslots\fP \-\- Vacancy states of the slots in the target sentence. +Output parameter that will be modified as new words are placed +in the target sentence. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.translate.ibm5.Slots(target_sentence_length) +Bases: \fBobject\fP +.sp +Represents positions in a target sentence. Used to keep track of +which slot (position) is occupied. +.INDENT 7.0 +.TP +.B occupy(position) +.INDENT 7.0 +.TP +.B Returns +Mark slot at \fBposition\fP as occupied +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B vacancies_at(position) +.INDENT 7.0 +.TP +.B Returns +Number of vacant slots up to, and including, \fBposition\fP +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.translate.ibm_model module +.sp +Common methods and classes for all IBM models. See \fBIBMModel1\fP, +\fBIBMModel2\fP, \fBIBMModel3\fP, \fBIBMModel4\fP, and \fBIBMModel5\fP +for specific implementations. +.sp +The IBM models are a series of generative models that learn lexical +translation probabilities, p(target language word|source language word), +given a sentence\-aligned parallel corpus. +.sp +The models increase in sophistication from model 1 to 5. Typically, the +output of lower models is used to seed the higher models. All models +use the Expectation\-Maximization (EM) algorithm to learn various +probability tables. +.sp +Words in a sentence are one\-indexed. The first word of a sentence has +position 1, not 0. Index 0 is reserved in the source sentence for the +NULL token. The concept of position does not apply to NULL, but it is +indexed at 0 by convention. +.sp +Each target word is aligned to exactly one source word or the NULL +token. +.sp +References: +Philipp Koehn. 2010. Statistical Machine Translation. +Cambridge University Press, New York. +.sp +Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and +Robert L. Mercer. 1993. The Mathematics of Statistical Machine +Translation: Parameter Estimation. Computational Linguistics, 19 (2), +263\-311. +.INDENT 0.0 +.TP +.B class nltk.translate.ibm_model.AlignmentInfo(alignment, src_sentence, trg_sentence, cepts) +Bases: \fBobject\fP +.sp +Helper data object for training IBM Models 3 and up +.sp +Read\-only. For a source sentence and its counterpart in the target +language, this class holds information about the sentence pair\(aqs +alignment, cepts, and fertility. +.sp +Warning: Alignments are one\-indexed here, in contrast to +nltk.translate.Alignment and AlignedSent, which are zero\-indexed +This class is not meant to be used outside of IBM models. +.INDENT 7.0 +.TP +.B alignment +tuple(int): Alignment function. \fBalignment[j]\fP is the position +in the source sentence that is aligned to the position j in the +target sentence. +.UNINDENT +.INDENT 7.0 +.TP +.B center_of_cept(i) +.INDENT 7.0 +.TP +.B Returns +The ceiling of the average positions of the words in +the tablet of cept \fBi\fP, or 0 if \fBi\fP is None +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B cepts +list(list(int)): The positions of the target words, in +ascending order, aligned to a source word position. For example, +cepts[4] = (2, 3, 7) means that words in positions 2, 3 and 7 +of the target sentence are aligned to the word in position 4 of +the source sentence +.UNINDENT +.INDENT 7.0 +.TP +.B fertility_of_i(i) +Fertility of word in position \fBi\fP of the source sentence +.UNINDENT +.INDENT 7.0 +.TP +.B is_head_word(j) +.INDENT 7.0 +.TP +.B Returns +Whether the word in position \fBj\fP of the target +sentence is a head word +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B previous_cept(j) +.INDENT 7.0 +.TP +.B Returns +The previous cept of \fBj\fP, or None if \fBj\fP belongs to +the first cept +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B previous_in_tablet(j) +.INDENT 7.0 +.TP +.B Returns +The position of the previous word that is in the same +tablet as \fBj\fP, or None if \fBj\fP is the first word of the +tablet +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B score +float: Optional. Probability of alignment, as defined by the +IBM model that assesses this alignment +.UNINDENT +.INDENT 7.0 +.TP +.B src_sentence +tuple(str): Source sentence referred to by this object. +Should include NULL token (None) in index 0. +.UNINDENT +.INDENT 7.0 +.TP +.B trg_sentence +tuple(str): Target sentence referred to by this object. +Should have a dummy element in index 0 so that the first word +starts from index 1. +.UNINDENT +.INDENT 7.0 +.TP +.B zero_indexed_alignment() +.INDENT 7.0 +.TP +.B Returns +Zero\-indexed alignment, suitable for use in external +\fBnltk.translate\fP modules like \fBnltk.translate.Alignment\fP +.TP +.B Return type +list(tuple) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.translate.ibm_model.Counts +Bases: \fBobject\fP +.sp +Data object to store counts of various parameters during training +.INDENT 7.0 +.TP +.B update_fertility(count, alignment_info) +.UNINDENT +.INDENT 7.0 +.TP +.B update_lexical_translation(count, alignment_info, j) +.UNINDENT +.INDENT 7.0 +.TP +.B update_null_generation(count, alignment_info) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.translate.ibm_model.IBMModel(sentence_aligned_corpus) +Bases: \fBobject\fP +.sp +Abstract base class for all IBM models +.INDENT 7.0 +.TP +.B MIN_PROB = 1e\-12 +.UNINDENT +.INDENT 7.0 +.TP +.B best_model2_alignment(sentence_pair, j_pegged=None, i_pegged=0) +Finds the best alignment according to IBM Model 2 +.sp +Used as a starting point for hill climbing in Models 3 and +above, because it is easier to compute than the best alignments +in higher models +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsentence_pair\fP (\fIAlignedSent\fP) \-\- Source and target language sentence pair +to be word\-aligned +.IP \(bu 2 +\fBj_pegged\fP (\fIint\fP) \-\- If specified, the alignment point of j_pegged +will be fixed to i_pegged +.IP \(bu 2 +\fBi_pegged\fP (\fIint\fP) \-\- Alignment point to j_pegged +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B hillclimb(alignment_info, j_pegged=None) +Starting from the alignment in \fBalignment_info\fP, look at +neighboring alignments iteratively for the best one +.sp +There is no guarantee that the best alignment in the alignment +space will be found, because the algorithm might be stuck in a +local maximum. +.INDENT 7.0 +.TP +.B Parameters +\fBj_pegged\fP (\fIint\fP) \-\- If specified, the search will be constrained to +alignments where \fBj_pegged\fP remains unchanged +.TP +.B Returns +The best alignment found from hill climbing +.TP +.B Return type +AlignmentInfo +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B init_vocab(sentence_aligned_corpus) +.UNINDENT +.INDENT 7.0 +.TP +.B maximize_fertility_probabilities(counts) +.UNINDENT +.INDENT 7.0 +.TP +.B maximize_lexical_translation_probabilities(counts) +.UNINDENT +.INDENT 7.0 +.TP +.B maximize_null_generation_probabilities(counts) +.UNINDENT +.INDENT 7.0 +.TP +.B neighboring(alignment_info, j_pegged=None) +Determine the neighbors of \fBalignment_info\fP, obtained by +moving or swapping one alignment point +.INDENT 7.0 +.TP +.B Parameters +\fBj_pegged\fP (\fIint\fP) \-\- If specified, neighbors that have a different +alignment point from j_pegged will not be considered +.TP +.B Returns +A set neighboring alignments represented by their +\fBAlignmentInfo\fP +.TP +.B Return type +set(AlignmentInfo) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob_of_alignments(alignments) +.UNINDENT +.INDENT 7.0 +.TP +.B prob_t_a_given_s(alignment_info) +Probability of target sentence and an alignment given the +source sentence +.sp +All required information is assumed to be in \fBalignment_info\fP +and self. +.sp +Derived classes should override this method +.UNINDENT +.INDENT 7.0 +.TP +.B reset_probabilities() +.UNINDENT +.INDENT 7.0 +.TP +.B sample(sentence_pair) +Sample the most probable alignments from the entire alignment +space +.sp +First, determine the best alignment according to IBM Model 2. +With this initial alignment, use hill climbing to determine the +best alignment according to a higher IBM Model. Add this +alignment and its neighbors to the sample set. Repeat this +process with other initial alignments obtained by pegging an +alignment point. +.sp +Hill climbing may be stuck in a local maxima, hence the pegging +and trying out of different alignments. +.INDENT 7.0 +.TP +.B Parameters +\fBsentence_pair\fP (\fIAlignedSent\fP) \-\- Source and target language sentence pair +to generate a sample of alignments from +.TP +.B Returns +A set of best alignments represented by their \fBAlignmentInfo\fP +and the best alignment of the set for convenience +.TP +.B Return type +set(AlignmentInfo), AlignmentInfo +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B set_uniform_probabilities(sentence_aligned_corpus) +Initialize probability tables to a uniform distribution +.sp +Derived classes should implement this accordingly. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.ibm_model.longest_target_sentence_length(sentence_aligned_corpus) +.INDENT 7.0 +.TP +.B Parameters +\fBsentence_aligned_corpus\fP (\fIlist\fP\fI(\fP\fIAlignedSent\fP\fI)\fP) \-\- Parallel corpus under consideration +.TP +.B Returns +Number of words in the longest target language sentence +of \fBsentence_aligned_corpus\fP +.UNINDENT +.UNINDENT +.SS nltk.translate.meteor_score module +.INDENT 0.0 +.TP +.B nltk.translate.meteor_score.align_words(hypothesis, reference, stemmer=, wordnet=) +Aligns/matches words in the hypothesis to reference by sequentially +applying exact match, stemmed match and wordnet based synonym match. +In case there are multiple matches the match which has the least number +of crossing is chosen. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBhypothesis\fP \-\- hypothesis string +.IP \(bu 2 +\fBreference\fP \-\- reference string +.IP \(bu 2 +\fBstemmer\fP (\fInltk.stem.api.StemmerI\fP\fI or \fP\fIany class that implements a stem method\fP) \-\- nltk.stem.api.StemmerI object (default PorterStemmer()) +.IP \(bu 2 +\fBwordnet\fP (\fIWordNetCorpusReader\fP) \-\- a wordnet corpus reader object (default nltk.corpus.wordnet) +.UNINDENT +.TP +.B Returns +sorted list of matched tuples, unmatched hypothesis list, unmatched reference list +.TP +.B Return type +list of tuples, list of tuples, list of tuples +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.meteor_score.exact_match(hypothesis, reference) +matches exact words in hypothesis and reference +and returns a word mapping based on the enumerated +word id between hypothesis and reference +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBhypothesis\fP (\fIstr\fP) \-\- hypothesis string +.IP \(bu 2 +\fBreference\fP (\fIstr\fP) \-\- reference string +.UNINDENT +.TP +.B Returns +enumerated matched tuples, enumerated unmatched hypothesis tuples, +enumerated unmatched reference tuples +.TP +.B Return type +list of 2D tuples, list of 2D tuples, list of 2D tuples +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.meteor_score.meteor_score(references, hypothesis, preprocess=, stemmer=, wordnet=, alpha=0.9, beta=3, gamma=0.5) +Calculates METEOR score for hypothesis with multiple references as +described in "Meteor: An Automatic Metric for MT Evaluation with +HighLevels of Correlation with Human Judgments" by Alon Lavie and +Abhaya Agarwal, in Proceedings of ACL. +\fI\%http://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie\-Agarwal\-2007\-METEOR.pdf\fP +.sp +In case of multiple references the best score is chosen. This method +iterates over single_meteor_score and picks the best pair among all +the references for a given hypothesis +.sp +.nf +.ft C +>>> hypothesis1 = \(aqIt is a guide to action which ensures that the military always obeys the commands of the party\(aq +>>> hypothesis2 = \(aqIt is to insure the troops forever hearing the activity guidebook that party direct\(aq +.ft P +.fi +.sp +.nf +.ft C +>>> reference1 = \(aqIt is a guide to action that ensures that the military will forever heed Party commands\(aq +>>> reference2 = \(aqIt is the guiding principle which guarantees the military forces always being under the command of the Party\(aq +>>> reference3 = \(aqIt is the practical guide for the army always to heed the directions of the party\(aq +.ft P +.fi +.sp +.nf +.ft C +>>> round(meteor_score([reference1, reference2, reference3], hypothesis1),4) +0.7398 +.ft P +.fi +.INDENT 7.0 +.INDENT 3.5 +If there is no words match during the alignment the method returns the +score as 0. We can safely return a zero instead of raising a +division by zero error as no match usually implies a bad translation. +.UNINDENT +.UNINDENT +.sp +.nf +.ft C +>>> round(meteor_score([\(aqthis is a cat\(aq], \(aqnon matching hypothesis\(aq),4) +0.0 +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreferences\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- reference sentences +.IP \(bu 2 +\fBhypothesis\fP (\fIstr\fP) \-\- a hypothesis sentence +.IP \(bu 2 +\fBpreprocess\fP (\fImethod\fP) \-\- preprocessing function (default str.lower) +.IP \(bu 2 +\fBstemmer\fP (\fInltk.stem.api.StemmerI\fP\fI or \fP\fIany class that implements a stem method\fP) \-\- nltk.stem.api.StemmerI object (default PorterStemmer()) +.IP \(bu 2 +\fBwordnet\fP (\fIWordNetCorpusReader\fP) \-\- a wordnet corpus reader object (default nltk.corpus.wordnet) +.IP \(bu 2 +\fBalpha\fP (\fIfloat\fP) \-\- parameter for controlling relative weights of precision and recall. +.IP \(bu 2 +\fBbeta\fP (\fIfloat\fP) \-\- parameter for controlling shape of penalty as a function +of as a function of fragmentation. +.IP \(bu 2 +\fBgamma\fP (\fIfloat\fP) \-\- relative weight assigned to fragmentation penalty. +.UNINDENT +.TP +.B Returns +The sentence\-level METEOR score. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.meteor_score.single_meteor_score(reference, hypothesis, preprocess=, stemmer=, wordnet=, alpha=0.9, beta=3, gamma=0.5) +Calculates METEOR score for single hypothesis and reference as per +"Meteor: An Automatic Metric for MT Evaluation with HighLevels of +Correlation with Human Judgments" by Alon Lavie and Abhaya Agarwal, +in Proceedings of ACL. +\fI\%http://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie\-Agarwal\-2007\-METEOR.pdf\fP +.sp +.nf +.ft C +>>> hypothesis1 = \(aqIt is a guide to action which ensures that the military always obeys the commands of the party\(aq +.ft P +.fi +.sp +.nf +.ft C +>>> reference1 = \(aqIt is a guide to action that ensures that the military will forever heed Party commands\(aq +.ft P +.fi +.sp +.nf +.ft C +>>> round(single_meteor_score(reference1, hypothesis1),4) +0.7398 +.ft P +.fi +.INDENT 7.0 +.INDENT 3.5 +If there is no words match during the alignment the method returns the +score as 0. We can safely return a zero instead of raising a +division by zero error as no match usually implies a bad translation. +.UNINDENT +.UNINDENT +.sp +.nf +.ft C +>>> round(meteor_score(\(aqthis is a cat\(aq, \(aqnon matching hypothesis\(aq),4) +0.0 +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreference\fP (\fIstr\fP) \-\- reference sentence +.IP \(bu 2 +\fBhypothesis\fP (\fIstr\fP) \-\- a hypothesis sentence +.IP \(bu 2 +\fBpreprocess\fP (\fImethod\fP) \-\- preprocessing function (default str.lower) +.IP \(bu 2 +\fBstemmer\fP (\fInltk.stem.api.StemmerI\fP\fI or \fP\fIany class that implements a stem method\fP) \-\- nltk.stem.api.StemmerI object (default PorterStemmer()) +.IP \(bu 2 +\fBwordnet\fP (\fIWordNetCorpusReader\fP) \-\- a wordnet corpus reader object (default nltk.corpus.wordnet) +.IP \(bu 2 +\fBalpha\fP (\fIfloat\fP) \-\- parameter for controlling relative weights of precision and recall. +.IP \(bu 2 +\fBbeta\fP (\fIfloat\fP) \-\- parameter for controlling shape of penalty as a +function of as a function of fragmentation. +.IP \(bu 2 +\fBgamma\fP (\fIfloat\fP) \-\- relative weight assigned to fragmentation penalty. +.UNINDENT +.TP +.B Returns +The sentence\-level METEOR score. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.meteor_score.stem_match(hypothesis, reference, stemmer=) +Stems each word and matches them in hypothesis and reference +and returns a word mapping between hypothesis and reference +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBhypothesis\fP \-\- +.IP \(bu 2 +\fBreference\fP \-\- +.IP \(bu 2 +\fBstemmer\fP (\fInltk.stem.api.StemmerI\fP\fI or \fP\fIany class that +implements a stem method\fP) \-\- nltk.stem.api.StemmerI object (default PorterStemmer()) +.UNINDENT +.TP +.B Returns +enumerated matched tuples, enumerated unmatched hypothesis tuples, +enumerated unmatched reference tuples +.TP +.B Return type +list of 2D tuples, list of 2D tuples, list of 2D tuples +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.meteor_score.wordnetsyn_match(hypothesis, reference, wordnet=) +Matches each word in reference to a word in hypothesis if any synonym +of a hypothesis word is the exact match to the reference word. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBhypothesis\fP \-\- hypothesis string +.IP \(bu 2 +\fBreference\fP \-\- reference string +.IP \(bu 2 +\fBwordnet\fP (\fIWordNetCorpusReader\fP) \-\- a wordnet corpus reader object (default nltk.corpus.wordnet) +.UNINDENT +.TP +.B Returns +list of mapped tuples +.TP +.B Return type +list of tuples +.UNINDENT +.UNINDENT +.SS nltk.translate.metrics module +.INDENT 0.0 +.TP +.B nltk.translate.metrics.alignment_error_rate(reference, hypothesis, possible=None) +Return the Alignment Error Rate (AER) of an alignment +with respect to a "gold standard" reference alignment. +Return an error rate between 0.0 (perfect alignment) and 1.0 (no +alignment). +.sp +.nf +.ft C +>>> from nltk.translate import Alignment +>>> ref = Alignment([(0, 0), (1, 1), (2, 2)]) +>>> test = Alignment([(0, 0), (1, 2), (2, 1)]) +>>> alignment_error_rate(ref, test) +0.6666666666666667 +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreference\fP (\fIAlignment\fP) \-\- A gold standard alignment (sure alignments) +.IP \(bu 2 +\fBhypothesis\fP (\fIAlignment\fP) \-\- A hypothesis alignment (aka. candidate alignments) +.IP \(bu 2 +\fBpossible\fP (\fIAlignment\fP\fI or \fP\fINone\fP) \-\- A gold standard reference of possible alignments +(defaults to \fIreference\fP if None) +.UNINDENT +.TP +.B Return type +float or None +.UNINDENT +.UNINDENT +.SS nltk.translate.nist_score module +.sp +NIST score implementation. +.INDENT 0.0 +.TP +.B nltk.translate.nist_score.corpus_nist(list_of_references, hypotheses, n=5) +Calculate a single corpus\-level NIST score (aka. system\-level BLEU) for all +the hypotheses and their respective references. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreferences\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP\fI)\fP) \-\- a corpus of lists of reference sentences, w.r.t. hypotheses +.IP \(bu 2 +\fBhypotheses\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- a list of hypothesis sentences +.IP \(bu 2 +\fBn\fP (\fIint\fP) \-\- highest n\-gram order +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.nist_score.nist_length_penalty(ref_len, hyp_len) +Calculates the NIST length penalty, from Eq. 3 in Doddington (2002) +.INDENT 7.0 +.INDENT 3.5 +penalty = exp( beta * log( min( len(hyp)/len(ref) , 1.0 ))) +.UNINDENT +.UNINDENT +.sp +where, +.INDENT 7.0 +.INDENT 3.5 +\fIbeta\fP is chosen to make the brevity penalty factor = 0.5 when the +no. of words in the system output (hyp) is 2/3 of the average +no. of words in the reference translation (ref) +.UNINDENT +.UNINDENT +.sp +The NIST penalty is different from BLEU\(aqs such that it minimize the impact +of the score of small variations in the length of a translation. +See Fig. 4 in Doddington (2002) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.nist_score.sentence_nist(references, hypothesis, n=5) +Calculate NIST score from +George Doddington. 2002. "Automatic evaluation of machine translation quality +using n\-gram co\-occurrence statistics." Proceedings of HLT. +Morgan Kaufmann Publishers Inc. \fI\%http://dl.acm.org/citation.cfm?id=1289189.1289273\fP +.sp +DARPA commissioned NIST to develop an MT evaluation facility based on the BLEU +score. The official script used by NIST to compute BLEU and NIST score is +mteval\-14.pl. The main differences are: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +BLEU uses geometric mean of the ngram overlaps, NIST uses arithmetic mean. +.IP \(bu 2 +NIST has a different brevity penalty +.IP \(bu 2 +NIST score from mteval\-14.pl has a self\-contained tokenizer +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Note: The mteval\-14.pl includes a smoothing function for BLEU score that is NOT +used in the NIST score computation. +.UNINDENT +.sp +.nf +.ft C +>>> hypothesis1 = [\(aqIt\(aq, \(aqis\(aq, \(aqa\(aq, \(aqguide\(aq, \(aqto\(aq, \(aqaction\(aq, \(aqwhich\(aq, +\&... \(aqensures\(aq, \(aqthat\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqalways\(aq, +\&... \(aqobeys\(aq, \(aqthe\(aq, \(aqcommands\(aq, \(aqof\(aq, \(aqthe\(aq, \(aqparty\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> hypothesis2 = [\(aqIt\(aq, \(aqis\(aq, \(aqto\(aq, \(aqinsure\(aq, \(aqthe\(aq, \(aqtroops\(aq, +\&... \(aqforever\(aq, \(aqhearing\(aq, \(aqthe\(aq, \(aqactivity\(aq, \(aqguidebook\(aq, +\&... \(aqthat\(aq, \(aqparty\(aq, \(aqdirect\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> reference1 = [\(aqIt\(aq, \(aqis\(aq, \(aqa\(aq, \(aqguide\(aq, \(aqto\(aq, \(aqaction\(aq, \(aqthat\(aq, +\&... \(aqensures\(aq, \(aqthat\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqwill\(aq, \(aqforever\(aq, +\&... \(aqheed\(aq, \(aqParty\(aq, \(aqcommands\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> reference2 = [\(aqIt\(aq, \(aqis\(aq, \(aqthe\(aq, \(aqguiding\(aq, \(aqprinciple\(aq, \(aqwhich\(aq, +\&... \(aqguarantees\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqforces\(aq, \(aqalways\(aq, +\&... \(aqbeing\(aq, \(aqunder\(aq, \(aqthe\(aq, \(aqcommand\(aq, \(aqof\(aq, \(aqthe\(aq, +\&... \(aqParty\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> reference3 = [\(aqIt\(aq, \(aqis\(aq, \(aqthe\(aq, \(aqpractical\(aq, \(aqguide\(aq, \(aqfor\(aq, \(aqthe\(aq, +\&... \(aqarmy\(aq, \(aqalways\(aq, \(aqto\(aq, \(aqheed\(aq, \(aqthe\(aq, \(aqdirections\(aq, +\&... \(aqof\(aq, \(aqthe\(aq, \(aqparty\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> sentence_nist([reference1, reference2, reference3], hypothesis1) +3.3709... +.ft P +.fi +.sp +.nf +.ft C +>>> sentence_nist([reference1, reference2, reference3], hypothesis2) +1.4619... +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreferences\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- reference sentences +.IP \(bu 2 +\fBhypothesis\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- a hypothesis sentence +.IP \(bu 2 +\fBn\fP (\fIint\fP) \-\- highest n\-gram order +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.translate.phrase_based module +.INDENT 0.0 +.TP +.B nltk.translate.phrase_based.extract(f_start, f_end, e_start, e_end, alignment, f_aligned, srctext, trgtext, srclen, trglen, max_phrase_length) +This function checks for alignment point consistency and extracts +phrases using the chunk of consistent phrases. +.sp +A phrase pair (e, f ) is consistent with an alignment A if and only if: +.INDENT 7.0 +.IP i. 5 +No English words in the phrase pair are aligned to words outside it. +.INDENT 5.0 +.INDENT 3.5 +∀e i ∈ e, (e i , f j ) ∈ A ⇒ f j ∈ f +.UNINDENT +.UNINDENT +.IP ii. 5 +No Foreign words in the phrase pair are aligned to words outside it. +.INDENT 5.0 +.INDENT 3.5 +∀f j ∈ f , (e i , f j ) ∈ A ⇒ e i ∈ e +.UNINDENT +.UNINDENT +.IP iii. 5 +The phrase pair contains at least one alignment point. +.INDENT 5.0 +.INDENT 3.5 +∃e i ∈ e ̄ , f j ∈ f ̄ s.t. (e i , f j ) ∈ A +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBf_start\fP (\fIint\fP) \-\- Starting index of the possible foreign language phrases +.IP \(bu 2 +\fBf_end\fP (\fIint\fP) \-\- End index of the possible foreign language phrases +.IP \(bu 2 +\fBe_start\fP (\fIint\fP) \-\- Starting index of the possible source language phrases +.IP \(bu 2 +\fBe_end\fP (\fIint\fP) \-\- End index of the possible source language phrases +.IP \(bu 2 +\fBsrctext\fP (\fIlist\fP) \-\- The source language tokens, a list of string. +.IP \(bu 2 +\fBtrgtext\fP (\fIlist\fP) \-\- The target language tokens, a list of string. +.IP \(bu 2 +\fBsrclen\fP (\fIint\fP) \-\- The number of tokens in the source language tokens. +.IP \(bu 2 +\fBtrglen\fP (\fIint\fP) \-\- The number of tokens in the target language tokens. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.phrase_based.phrase_extraction(srctext, trgtext, alignment, max_phrase_length=0) +Phrase extraction algorithm extracts all consistent phrase pairs from +a word\-aligned sentence pair. +.sp +The idea is to loop over all possible source language (e) phrases and find +the minimal foreign phrase (f) that matches each of them. Matching is done +by identifying all alignment points for the source phrase and finding the +shortest foreign phrase that includes all the foreign counterparts for the +source words. +.sp +In short, a phrase alignment has to +(a) contain all alignment points for all covered words +(b) contain at least one alignment point +.sp +.nf +.ft C +>>> srctext = "michael assumes that he will stay in the house" +>>> trgtext = "michael geht davon aus , dass er im haus bleibt" +>>> alignment = [(0,0), (1,1), (1,2), (1,3), (2,5), (3,6), (4,9), +\&... (5,9), (6,7), (7,7), (8,8)] +>>> phrases = phrase_extraction(srctext, trgtext, alignment) +>>> for i in sorted(phrases): +\&... print(i) +\&... +((0, 1), (0, 1), \(aqmichael\(aq, \(aqmichael\(aq) +((0, 2), (0, 4), \(aqmichael assumes\(aq, \(aqmichael geht davon aus\(aq) +((0, 2), (0, 5), \(aqmichael assumes\(aq, \(aqmichael geht davon aus ,\(aq) +((0, 3), (0, 6), \(aqmichael assumes that\(aq, \(aqmichael geht davon aus , dass\(aq) +((0, 4), (0, 7), \(aqmichael assumes that he\(aq, \(aqmichael geht davon aus , dass er\(aq) +((0, 9), (0, 10), \(aqmichael assumes that he will stay in the house\(aq, \(aqmichael geht davon aus , dass er im haus bleibt\(aq) +((1, 2), (1, 4), \(aqassumes\(aq, \(aqgeht davon aus\(aq) +((1, 2), (1, 5), \(aqassumes\(aq, \(aqgeht davon aus ,\(aq) +((1, 3), (1, 6), \(aqassumes that\(aq, \(aqgeht davon aus , dass\(aq) +((1, 4), (1, 7), \(aqassumes that he\(aq, \(aqgeht davon aus , dass er\(aq) +((1, 9), (1, 10), \(aqassumes that he will stay in the house\(aq, \(aqgeht davon aus , dass er im haus bleibt\(aq) +((2, 3), (4, 6), \(aqthat\(aq, \(aq, dass\(aq) +((2, 3), (5, 6), \(aqthat\(aq, \(aqdass\(aq) +((2, 4), (4, 7), \(aqthat he\(aq, \(aq, dass er\(aq) +((2, 4), (5, 7), \(aqthat he\(aq, \(aqdass er\(aq) +((2, 9), (4, 10), \(aqthat he will stay in the house\(aq, \(aq, dass er im haus bleibt\(aq) +((2, 9), (5, 10), \(aqthat he will stay in the house\(aq, \(aqdass er im haus bleibt\(aq) +((3, 4), (6, 7), \(aqhe\(aq, \(aqer\(aq) +((3, 9), (6, 10), \(aqhe will stay in the house\(aq, \(aqer im haus bleibt\(aq) +((4, 6), (9, 10), \(aqwill stay\(aq, \(aqbleibt\(aq) +((4, 9), (7, 10), \(aqwill stay in the house\(aq, \(aqim haus bleibt\(aq) +((6, 8), (7, 8), \(aqin the\(aq, \(aqim\(aq) +((6, 9), (7, 9), \(aqin the house\(aq, \(aqim haus\(aq) +((8, 9), (8, 9), \(aqhouse\(aq, \(aqhaus\(aq) +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsrctext\fP (\fIstr\fP) \-\- The sentence string from the source language. +.IP \(bu 2 +\fBtrgtext\fP (\fIstr\fP) \-\- The sentence string from the target language. +.IP \(bu 2 +\fBalignment\fP (\fIlist\fP\fI(\fP\fItuple\fP\fI)\fP) \-\- The word alignment outputs as list of tuples, where +the first elements of tuples are the source words\(aq indices and +second elements are the target words\(aq indices. This is also the output +format of nltk.translate.ibm1 +.IP \(bu 2 +\fBmax_phrase_length\fP (\fIint\fP) \-\- maximal phrase length, if 0 or not specified +it is set to a length of the longer sentence (srctext or trgtext). +.UNINDENT +.TP +.B Return type +list(tuple) +.TP +.B Returns +A list of tuples, each element in a list is a phrase and each +phrase is a tuple made up of (i) its source location, (ii) its target +location, (iii) the source phrase and (iii) the target phrase. The phrase +list of tuples represents all the possible phrases extracted from the +word alignments. +.UNINDENT +.UNINDENT +.SS nltk.translate.ribes_score module +.sp +RIBES score implementation +.INDENT 0.0 +.TP +.B nltk.translate.ribes_score.corpus_ribes(list_of_references, hypotheses, alpha=0.25, beta=0.1) +This function "calculates RIBES for a system output (hypothesis) with +multiple references, and returns "best" score among multi\-references and +individual scores. The scores are corpus\-wise, i.e., averaged by the number +of sentences." (c.f. RIBES version 1.03.1 code). +.sp +Different from BLEU\(aqs micro\-average precision, RIBES calculates the +macro\-average precision by averaging the best RIBES score for each pair of +hypothesis and its corresponding references +.sp +.nf +.ft C +>>> hyp1 = [\(aqIt\(aq, \(aqis\(aq, \(aqa\(aq, \(aqguide\(aq, \(aqto\(aq, \(aqaction\(aq, \(aqwhich\(aq, +\&... \(aqensures\(aq, \(aqthat\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqalways\(aq, +\&... \(aqobeys\(aq, \(aqthe\(aq, \(aqcommands\(aq, \(aqof\(aq, \(aqthe\(aq, \(aqparty\(aq] +>>> ref1a = [\(aqIt\(aq, \(aqis\(aq, \(aqa\(aq, \(aqguide\(aq, \(aqto\(aq, \(aqaction\(aq, \(aqthat\(aq, +\&... \(aqensures\(aq, \(aqthat\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqwill\(aq, \(aqforever\(aq, +\&... \(aqheed\(aq, \(aqParty\(aq, \(aqcommands\(aq] +>>> ref1b = [\(aqIt\(aq, \(aqis\(aq, \(aqthe\(aq, \(aqguiding\(aq, \(aqprinciple\(aq, \(aqwhich\(aq, +\&... \(aqguarantees\(aq, \(aqthe\(aq, \(aqmilitary\(aq, \(aqforces\(aq, \(aqalways\(aq, +\&... \(aqbeing\(aq, \(aqunder\(aq, \(aqthe\(aq, \(aqcommand\(aq, \(aqof\(aq, \(aqthe\(aq, \(aqParty\(aq] +>>> ref1c = [\(aqIt\(aq, \(aqis\(aq, \(aqthe\(aq, \(aqpractical\(aq, \(aqguide\(aq, \(aqfor\(aq, \(aqthe\(aq, +\&... \(aqarmy\(aq, \(aqalways\(aq, \(aqto\(aq, \(aqheed\(aq, \(aqthe\(aq, \(aqdirections\(aq, +\&... \(aqof\(aq, \(aqthe\(aq, \(aqparty\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> hyp2 = [\(aqhe\(aq, \(aqread\(aq, \(aqthe\(aq, \(aqbook\(aq, \(aqbecause\(aq, \(aqhe\(aq, \(aqwas\(aq, +\&... \(aqinterested\(aq, \(aqin\(aq, \(aqworld\(aq, \(aqhistory\(aq] +>>> ref2a = [\(aqhe\(aq, \(aqwas\(aq, \(aqinterested\(aq, \(aqin\(aq, \(aqworld\(aq, \(aqhistory\(aq, +\&... \(aqbecause\(aq, \(aqhe\(aq, \(aqread\(aq, \(aqthe\(aq, \(aqbook\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]] +>>> hypotheses = [hyp1, hyp2] +>>> round(corpus_ribes(list_of_references, hypotheses),4) +0.3597 +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreferences\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP\fI)\fP) \-\- a corpus of lists of reference sentences, w.r.t. hypotheses +.IP \(bu 2 +\fBhypotheses\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- a list of hypothesis sentences +.IP \(bu 2 +\fBalpha\fP (\fIfloat\fP) \-\- hyperparameter used as a prior for the unigram precision. +.IP \(bu 2 +\fBbeta\fP (\fIfloat\fP) \-\- hyperparameter used as a prior for the brevity penalty. +.UNINDENT +.TP +.B Returns +The best ribes score from one of the references. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.ribes_score.find_increasing_sequences(worder) +Given the \fIworder\fP list, this function groups monotonic +1 sequences. +.sp +.nf +.ft C +>>> worder = [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5] +>>> list(find_increasing_sequences(worder)) +[(7, 8, 9, 10), (0, 1, 2, 3, 4, 5)] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBworder\fP \-\- The worder list output from word_rank_alignment +.IP \(bu 2 +\fBtype\fP \-\- list(int) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.ribes_score.kendall_tau(worder, normalize=True) +Calculates the Kendall\(aqs Tau correlation coefficient given the \fIworder\fP +list of word alignments from word_rank_alignment(), using the formula: +.INDENT 7.0 +.INDENT 3.5 +tau = 2 * num_increasing_pairs / num_possible_pairs \-1 +.UNINDENT +.UNINDENT +.sp +Note that the no. of increasing pairs can be discontinuous in the \fIworder\fP +list and each each increasing sequence can be tabulated as choose(len(seq), 2) +no. of increasing pairs, e.g. +.sp +.nf +.ft C +>>> worder = [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5] +>>> number_possible_pairs = choose(len(worder), 2) +>>> round(kendall_tau(worder, normalize=False),3) +\-0.236 +>>> round(kendall_tau(worder),3) +0.382 +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBworder\fP (\fIlist\fP\fI(\fP\fIint\fP\fI)\fP) \-\- The worder list output from word_rank_alignment +.IP \(bu 2 +\fBnormalize\fP (\fIboolean\fP) \-\- Flag to indicate normalization to between 0.0 and 1.0. +.UNINDENT +.TP +.B Returns +The Kendall\(aqs Tau correlation coefficient. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.ribes_score.position_of_ngram(ngram, sentence) +This function returns the position of the first instance of the ngram +appearing in a sentence. +.sp +Note that one could also use string as follows but the code is a little +convoluted with type casting back and forth: +.INDENT 7.0 +.INDENT 3.5 +char_pos = \(aq \(aq.join(sent)[:\(aq \(aq.join(sent).index(\(aq \(aq.join(ngram))] +word_pos = char_pos.count(\(aq \(aq) +.UNINDENT +.UNINDENT +.sp +Another way to conceive this is: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.TP +.B return next(i for i, ng in enumerate(ngrams(sentence, len(ngram))) +if ng == ngram) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBngram\fP (\fItuple\fP) \-\- The ngram that needs to be searched +.IP \(bu 2 +\fBsentence\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The list of tokens to search from. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.ribes_score.sentence_ribes(references, hypothesis, alpha=0.25, beta=0.1) +The RIBES (Rank\-based Intuitive Bilingual Evaluation Score) from +Hideki Isozaki, Tsutomu Hirao, Kevin Duh, Katsuhito Sudoh and +Hajime Tsukada. 2010. "Automatic Evaluation of Translation Quality for +Distant Language Pairs". In Proceedings of EMNLP. +\fI\%http://www.aclweb.org/anthology/D/D10/D10\-1092.pdf\fP +.sp +The generic RIBES scores used in shared task, e.g. Workshop for +Asian Translation (WAT) uses the following RIBES calculations: +.INDENT 7.0 +.INDENT 3.5 +RIBES = kendall_tau * (alpha**p1) * (beta**bp) +.UNINDENT +.UNINDENT +.sp +Please note that this re\-implementation differs from the official +RIBES implementation and though it emulates the results as describe +in the original paper, there are further optimization implemented +in the official RIBES script. +.sp +Users are encouraged to use the official RIBES script instead of this +implementation when evaluating your machine translation system. Refer +to \fI\%http://www.kecl.ntt.co.jp/icl/lirg/ribes/\fP for the official script. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreferences\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP\fI)\fP) \-\- a list of reference sentences +.IP \(bu 2 +\fBhypothesis\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- a hypothesis sentence +.IP \(bu 2 +\fBalpha\fP (\fIfloat\fP) \-\- hyperparameter used as a prior for the unigram precision. +.IP \(bu 2 +\fBbeta\fP (\fIfloat\fP) \-\- hyperparameter used as a prior for the brevity penalty. +.UNINDENT +.TP +.B Returns +The best ribes score from one of the references. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.ribes_score.spearman_rho(worder, normalize=True) +Calculates the Spearman\(aqs Rho correlation coefficient given the \fIworder\fP +list of word alignment from word_rank_alignment(), using the formula: +.INDENT 7.0 +.INDENT 3.5 +rho = 1 \- sum(d**2) / choose(len(worder)+1, 3) +.UNINDENT +.UNINDENT +.sp +Given that d is the sum of difference between the \fIworder\fP list of indices +and the original word indices from the reference sentence. +.sp +Using the (H0,R0) and (H5, R5) example from the paper +.sp +.nf +.ft C +>>> worder = [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5] +>>> round(spearman_rho(worder, normalize=False), 3) +\-0.591 +>>> round(spearman_rho(worder), 3) +0.205 +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBworder\fP \-\- The worder list output from word_rank_alignment +.IP \(bu 2 +\fBtype\fP \-\- list(int) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.translate.ribes_score.word_rank_alignment(reference, hypothesis, character_based=False) +This is the word rank alignment algorithm described in the paper to produce +the \fIworder\fP list, i.e. a list of word indices of the hypothesis word orders +w.r.t. the list of reference words. +.sp +Below is (H0, R0) example from the Isozaki et al. 2010 paper, +note the examples are indexed from 1 but the results here are indexed from 0: +.sp +.nf +.ft C +>>> ref = str(\(aqhe was interested in world history because he \(aq +\&... \(aqread the book\(aq).split() +>>> hyp = str(\(aqhe read the book because he was interested in world \(aq +\&... \(aqhistory\(aq).split() +>>> word_rank_alignment(ref, hyp) +[7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5] +.ft P +.fi +.sp +The (H1, R1) example from the paper, note the 0th index: +.sp +.nf +.ft C +>>> ref = \(aqJohn hit Bob yesterday\(aq.split() +>>> hyp = \(aqBob hit John yesterday\(aq.split() +>>> word_rank_alignment(ref, hyp) +[2, 1, 0, 3] +.ft P +.fi +.sp +Here is the (H2, R2) example from the paper, note the 0th index here too: +.sp +.nf +.ft C +>>> ref = \(aqthe boy read the book\(aq.split() +>>> hyp = \(aqthe book was read by the boy\(aq.split() +>>> word_rank_alignment(ref, hyp) +[3, 4, 2, 0, 1] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBreference\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- a reference sentence +.IP \(bu 2 +\fBhypothesis\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- a hypothesis sentence +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.translate.stack_decoder module +.sp +A decoder that uses stacks to implement phrase\-based translation. +.sp +In phrase\-based translation, the source sentence is segmented into +phrases of one or more words, and translations for those phrases are +used to build the target sentence. +.sp +Hypothesis data structures are used to keep track of the source words +translated so far and the partial output. A hypothesis can be expanded +by selecting an untranslated phrase, looking up its translation in a +phrase table, and appending that translation to the partial output. +Translation is complete when a hypothesis covers all source words. +.sp +The search space is huge because the source sentence can be segmented +in different ways, the source phrases can be selected in any order, +and there could be multiple translations for the same source phrase in +the phrase table. To make decoding tractable, stacks are used to limit +the number of candidate hypotheses by doing histogram and/or threshold +pruning. +.sp +Hypotheses with the same number of words translated are placed in the +same stack. In histogram pruning, each stack has a size limit, and +the hypothesis with the lowest score is removed when the stack is full. +In threshold pruning, hypotheses that score below a certain threshold +of the best hypothesis in that stack are removed. +.sp +Hypothesis scoring can include various factors such as phrase +translation probability, language model probability, length of +translation, cost of remaining words to be translated, and so on. +.sp +References: +Philipp Koehn. 2010. Statistical Machine Translation. +Cambridge University Press, New York. +.INDENT 0.0 +.TP +.B class nltk.translate.stack_decoder.StackDecoder(phrase_table, language_model) +Bases: \fBobject\fP +.sp +Phrase\-based stack decoder for machine translation +.sp +.nf +.ft C +>>> from nltk.translate import PhraseTable +>>> phrase_table = PhraseTable() +>>> phrase_table.add((\(aqniemand\(aq,), (\(aqnobody\(aq,), log(0.8)) +>>> phrase_table.add((\(aqniemand\(aq,), (\(aqno\(aq, \(aqone\(aq), log(0.2)) +>>> phrase_table.add((\(aqerwartet\(aq,), (\(aqexpects\(aq,), log(0.8)) +>>> phrase_table.add((\(aqerwartet\(aq,), (\(aqexpecting\(aq,), log(0.2)) +>>> phrase_table.add((\(aqniemand\(aq, \(aqerwartet\(aq), (\(aqone\(aq, \(aqdoes\(aq, \(aqnot\(aq, \(aqexpect\(aq), log(0.1)) +>>> phrase_table.add((\(aqdie\(aq, \(aqspanische\(aq, \(aqinquisition\(aq), (\(aqthe\(aq, \(aqspanish\(aq, \(aqinquisition\(aq), log(0.8)) +>>> phrase_table.add((\(aq!\(aq,), (\(aq!\(aq,), log(0.8)) +.ft P +.fi +.sp +.nf +.ft C +>>> # nltk.model should be used here once it is implemented +>>> from collections import defaultdict +>>> language_prob = defaultdict(lambda: \-999.0) +>>> language_prob[(\(aqnobody\(aq,)] = log(0.5) +>>> language_prob[(\(aqexpects\(aq,)] = log(0.4) +>>> language_prob[(\(aqthe\(aq, \(aqspanish\(aq, \(aqinquisition\(aq)] = log(0.2) +>>> language_prob[(\(aq!\(aq,)] = log(0.1) +>>> language_model = type(\(aq\(aq,(object,),{\(aqprobability_change\(aq: lambda self, context, phrase: language_prob[phrase], \(aqprobability\(aq: lambda self, phrase: language_prob[phrase]})() +.ft P +.fi +.sp +.nf +.ft C +>>> stack_decoder = StackDecoder(phrase_table, language_model) +.ft P +.fi +.sp +.nf +.ft C +>>> stack_decoder.translate([\(aqniemand\(aq, \(aqerwartet\(aq, \(aqdie\(aq, \(aqspanische\(aq, \(aqinquisition\(aq, \(aq!\(aq]) +[\(aqnobody\(aq, \(aqexpects\(aq, \(aqthe\(aq, \(aqspanish\(aq, \(aqinquisition\(aq, \(aq!\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B beam_threshold +.INDENT 7.0 +.TP +.B float: Hypotheses that score below this factor of the best +hypothesis in a stack are dropped from consideration. +Value between 0.0 and 1.0. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B compute_future_scores(src_sentence) +Determines the approximate scores for translating every +subsequence in \fBsrc_sentence\fP +.sp +Future scores can be used a look\-ahead to determine the +difficulty of translating the remaining parts of a src_sentence. +.INDENT 7.0 +.TP +.B Returns +Scores of subsequences referenced by their start and +.UNINDENT +.sp +end positions. For example, result[2][5] is the score of the +subsequence covering positions 2, 3, and 4. +:rtype: dict(int: (dict(int): float)) +.UNINDENT +.INDENT 7.0 +.TP +.B property distortion_factor +.INDENT 7.0 +.TP +.B float: Amount of reordering of source phrases. +Lower values favour monotone translation, suitable when +word order is similar for both source and target languages. +Value between 0.0 and 1.0. Default 0.5. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B distortion_score(hypothesis, next_src_phrase_span) +.UNINDENT +.INDENT 7.0 +.TP +.B expansion_score(hypothesis, translation_option, src_phrase_span) +Calculate the score of expanding \fBhypothesis\fP with +\fBtranslation_option\fP +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBhypothesis\fP (\fI_Hypothesis\fP) \-\- Hypothesis being expanded +.IP \(bu 2 +\fBtranslation_option\fP (\fIPhraseTableEntry\fP) \-\- Information about the proposed expansion +.IP \(bu 2 +\fBsrc_phrase_span\fP (\fItuple\fP\fI(\fP\fIint\fP\fI, \fP\fIint\fP\fI)\fP) \-\- Word position span of the source phrase +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B find_all_src_phrases(src_sentence) +Finds all subsequences in src_sentence that have a phrase +translation in the translation table +.INDENT 7.0 +.TP +.B Returns +Subsequences that have a phrase translation, +represented as a table of lists of end positions. +For example, if result[2] is [5, 6, 9], then there are +three phrases starting from position 2 in \fBsrc_sentence\fP, +ending at positions 5, 6, and 9 exclusive. The list of +ending positions are in ascending order. +.TP +.B Return type +list(list(int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B future_score(hypothesis, future_score_table, sentence_length) +Determines the approximate score for translating the +untranslated words in \fBhypothesis\fP +.UNINDENT +.INDENT 7.0 +.TP +.B stack_size +.INDENT 7.0 +.TP +.B int: Maximum number of hypotheses to consider in a stack. +Higher values increase the likelihood of a good translation, +but increases processing time. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B translate(src_sentence) +.INDENT 7.0 +.TP +.B Parameters +\fBsrc_sentence\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- Sentence to be translated +.TP +.B Returns +Translated sentence +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B static valid_phrases(all_phrases_from, hypothesis) +Extract phrases from \fBall_phrases_from\fP that contains words +that have not been translated by \fBhypothesis\fP +.INDENT 7.0 +.TP +.B Parameters +\fBall_phrases_from\fP (\fIlist\fP\fI(\fP\fIlist\fP\fI(\fP\fIint\fP\fI)\fP\fI)\fP) \-\- Phrases represented by their spans, in +the same format as the return value of +\fBfind_all_src_phrases\fP +.TP +.B Returns +A list of phrases, represented by their spans, that +cover untranslated positions. +.TP +.B Return type +list(tuple(int, int)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B word_penalty +.INDENT 7.0 +.TP +.B float: Influences the translation length exponentially. +If positive, shorter translations are preferred. +If negative, longer translations are preferred. +If zero, no penalty is applied. +.UNINDENT +.UNINDENT +.UNINDENT +.SS Module contents +.sp +Experimental features for machine translation. +These interfaces are prone to change. +.sp +isort:skip_file +.SS nltk.twitter package +.SS Submodules +.SS nltk.twitter.api module +.sp +This module provides an interface for TweetHandlers, and support for timezone +handling. +.INDENT 0.0 +.TP +.B class nltk.twitter.api.BasicTweetHandler(limit=20) +Bases: \fBobject\fP +.sp +Minimal implementation of \fITweetHandler\fP\&. +.sp +Counts the number of Tweets and decides when the client should stop +fetching them. +.INDENT 7.0 +.TP +.B counter +A flag to indicate to the client whether to stop fetching data given +some condition (e.g., reaching a date limit). +.UNINDENT +.INDENT 7.0 +.TP +.B do_continue() +Returns \fIFalse\fP if the client should stop fetching Tweets. +.UNINDENT +.INDENT 7.0 +.TP +.B do_stop +Stores the id of the last fetched Tweet to handle pagination. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.twitter.api.LocalTimezoneOffsetWithUTC +Bases: \fBdatetime.tzinfo\fP +.sp +This is not intended to be a general purpose class for dealing with the +local timezone. In particular: +.INDENT 7.0 +.IP \(bu 2 +it assumes that the date passed has been created using +\fIdatetime(..., tzinfo=Local)\fP, where \fILocal\fP is an instance of +the object \fILocalTimezoneOffsetWithUTC\fP; +.IP \(bu 2 +for such an object, it returns the offset with UTC, used for date comparisons. +.UNINDENT +.sp +Reference: \fI\%https://docs.python.org/3/library/datetime.html\fP +.INDENT 7.0 +.TP +.B DSTOFFSET = datetime.timedelta(seconds=34200) +.UNINDENT +.INDENT 7.0 +.TP +.B STDOFFSET = datetime.timedelta(seconds=34200) +.UNINDENT +.INDENT 7.0 +.TP +.B utcoffset(dt) +Access the relevant time offset. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.twitter.api.TweetHandlerI(limit=20, upper_date_limit=None, lower_date_limit=None) +Bases: \fI\%nltk.twitter.api.BasicTweetHandler\fP +.sp +Interface class whose subclasses should implement a handle method that +Twitter clients can delegate to. +.INDENT 7.0 +.TP +.B check_date_limit(data, verbose=False) +Validate date limits. +.UNINDENT +.INDENT 7.0 +.TP +.B abstract handle(data) +Deal appropriately with data returned by the Twitter API +.UNINDENT +.INDENT 7.0 +.TP +.B abstract on_finish() +Actions when the tweet limit has been reached +.UNINDENT +.UNINDENT +.SS nltk.twitter.common module +.sp +Utility functions for the +.nf +:module:\(gatwitterclient\(ga +.fi + module which do not require +the \fItwython\fP library to have been installed. +.INDENT 0.0 +.TP +.B nltk.twitter.common.extract_fields(tweet, fields) +Extract field values from a full tweet and return them as a list +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtweet\fP (\fIjson\fP) \-\- The tweet in JSON format +.IP \(bu 2 +\fBfields\fP (\fIlist\fP) \-\- The fields to be extracted from the tweet +.UNINDENT +.TP +.B Return type +list(str) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.twitter.common.get_header_field_list(main_fields, entity_type, entity_fields) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.twitter.common.json2csv(fp, outfile, fields, encoding=\(aqutf8\(aq, errors=\(aqreplace\(aq, gzip_compress=False) +Extract selected fields from a file of line\-separated JSON tweets and +write to a file in CSV format. +.sp +This utility function allows a file of full tweets to be easily converted +to a CSV file for easier processing. For example, just TweetIDs or +just the text content of the Tweets can be extracted. +.sp +Additionally, the function allows combinations of fields of other Twitter +objects (mainly the users, see below). +.sp +For Twitter entities (e.g. hashtags of a Tweet), and for geolocation, see +\fIjson2csv_entities\fP +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBinfile\fP (\fIstr\fP) \-\- The name of the file containing full tweets +.IP \(bu 2 +\fBoutfile\fP (\fIstr\fP) \-\- The name of the text file where results should be written +.IP \(bu 2 +\fBfields\fP (\fIlist\fP) \-\- The list of fields to be extracted. Useful examples are \(aqid_str\(aq for the tweetID and \(aqtext\(aq for the text of the tweet. See <\fI\%https://dev.twitter.com/overview/api/tweets\fP> for a full list of fields. e. g.: [\(aqid_str\(aq], [\(aqid\(aq, \(aqtext\(aq, \(aqfavorite_count\(aq, \(aqretweet_count\(aq] Additionally, it allows IDs from other Twitter objects, e. g., [\(aqid\(aq, \(aqtext\(aq, \(aquser.id\(aq, \(aquser.followers_count\(aq, \(aquser.friends_count\(aq] +.IP \(bu 2 +\fBerror\fP \-\- Behaviour for encoding errors, see \fI\%https://docs.python.org/3/library/codecs.html#codec\-base\-classes\fP +.IP \(bu 2 +\fBgzip_compress\fP \-\- if \fITrue\fP, output files are compressed with gzip +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.twitter.common.json2csv_entities(tweets_file, outfile, main_fields, entity_type, entity_fields, encoding=\(aqutf8\(aq, errors=\(aqreplace\(aq, gzip_compress=False) +Extract selected fields from a file of line\-separated JSON tweets and +write to a file in CSV format. +.sp +This utility function allows a file of full Tweets to be easily converted +to a CSV file for easier processing of Twitter entities. For example, the +hashtags or media elements of a tweet can be extracted. +.sp +It returns one line per entity of a Tweet, e.g. if a tweet has two hashtags +there will be two lines in the output file, one per hashtag +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtweets_file\fP \-\- the file\-like object containing full Tweets +.IP \(bu 2 +\fBoutfile\fP (\fIstr\fP) \-\- The path of the text file where results should be written +.IP \(bu 2 +\fBmain_fields\fP (\fIlist\fP) \-\- The list of fields to be extracted from the main object, usually the tweet. Useful examples: \(aqid_str\(aq for the tweetID. See <\fI\%https://dev.twitter.com/overview/api/tweets\fP> for a full list of fields. +.UNINDENT +.UNINDENT +.sp +e. g.: [\(aqid_str\(aq], [\(aqid\(aq, \(aqtext\(aq, \(aqfavorite_count\(aq, \(aqretweet_count\(aq] +If \fIentity_type\fP is expressed with hierarchy, then it is the list of fields of the object that corresponds to the key of the entity_type, (e.g., for entity_type=\(aquser.urls\(aq, the fields in the main_fields list belong to the user object; for entity_type=\(aqplace.bounding_box\(aq, the files in the main_field list belong to the place object of the tweet). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBentity_type\fP (\fIlist\fP) \-\- The name of the entity: \(aqhashtags\(aq, \(aqmedia\(aq, \(aqurls\(aq and \(aquser_mentions\(aq for the tweet object. For a user object, this needs to be expressed with a hierarchy: \fI\(aquser.urls\(aq\fP\&. For the bounding box of the Tweet location, use \fI\(aqplace.bounding_box\(aq\fP\&. +.IP \(bu 2 +\fBentity_fields\fP (\fIlist\fP) \-\- The list of fields to be extracted from the entity. E.g. \fI[\(aqtext\(aq]\fP (of the Tweet) +.IP \(bu 2 +\fBerror\fP \-\- Behaviour for encoding errors, see \fI\%https://docs.python.org/3/library/codecs.html#codec\-base\-classes\fP +.IP \(bu 2 +\fBgzip_compress\fP \-\- if \fITrue\fP, output files are compressed with gzip +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.twitter.twitter_demo module +.SS nltk.twitter.twitterclient module +.SS nltk.twitter.util module +.SS Module contents +.sp +NLTK Twitter Package +.sp +This package contains classes for retrieving Tweet documents using the +Twitter API. +.SS Submodules +.SS nltk.book module +.INDENT 0.0 +.TP +.B nltk.book.sents() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.book.texts() +.UNINDENT +.SS nltk.cli module +.SS nltk.collections module +.INDENT 0.0 +.TP +.B class nltk.collections.AbstractLazySequence +Bases: \fBobject\fP +.sp +An abstract base class for read\-only sequences whose values are +computed as needed. Lazy sequences act like tuples \-\- they can be +indexed, sliced, and iterated over; but they may not be modified. +.sp +The most common application of lazy sequences in NLTK is for +corpus view objects, which provide access to the contents of a +corpus without loading the entire corpus into memory, by loading +pieces of the corpus from disk as needed. +.sp +The result of modifying a mutable element of a lazy sequence is +undefined. In particular, the modifications made to the element +may or may not persist, depending on whether and when the lazy +sequence caches that element\(aqs value or reconstructs it from +scratch. +.sp +Subclasses are required to define two methods: \fB__len__()\fP +and \fBiterate_from()\fP\&. +.INDENT 7.0 +.TP +.B count(value) +Return the number of times this list contains \fBvalue\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B index(value, start=None, stop=None) +Return the index of the first occurrence of \fBvalue\fP in this +list that is greater than or equal to \fBstart\fP and less than +\fBstop\fP\&. Negative start and stop values are treated like negative +slice bounds \-\- i.e., they count from the end of the list. +.UNINDENT +.INDENT 7.0 +.TP +.B iterate_from(start) +Return an iterator that generates the tokens in the corpus +file underlying this corpus view, starting at the token number +\fBstart\fP\&. If \fBstart>=len(self)\fP, then this iterator will +generate no tokens. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.collections.LazyConcatenation(list_of_lists) +Bases: \fI\%nltk.collections.AbstractLazySequence\fP +.sp +A lazy sequence formed by concatenating a list of lists. This +underlying list of lists may itself be lazy. \fBLazyConcatenation\fP +maintains an index that it uses to keep track of the relationship +between offsets in the concatenated lists and offsets in the +sublists. +.INDENT 7.0 +.TP +.B iterate_from(start_index) +Return an iterator that generates the tokens in the corpus +file underlying this corpus view, starting at the token number +\fBstart\fP\&. If \fBstart>=len(self)\fP, then this iterator will +generate no tokens. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.collections.LazyEnumerate(lst) +Bases: \fI\%nltk.collections.LazyZip\fP +.sp +A lazy sequence whose elements are tuples, each containing a count (from +zero) and a value yielded by underlying sequence. \fBLazyEnumerate\fP is +useful for obtaining an indexed list. The tuples are constructed lazily +\-\- i.e., when you read a value from the list, \fBLazyEnumerate\fP will +calculate that value by forming a tuple from the count of the i\-th +element and the i\-th element of the underlying sequence. +.sp +\fBLazyEnumerate\fP is essentially a lazy version of the Python primitive +function \fBenumerate\fP\&. In particular, the following two expressions are +equivalent: +.sp +.nf +.ft C +>>> from nltk.collections import LazyEnumerate +>>> sequence = [\(aqfirst\(aq, \(aqsecond\(aq, \(aqthird\(aq] +>>> list(enumerate(sequence)) +[(0, \(aqfirst\(aq), (1, \(aqsecond\(aq), (2, \(aqthird\(aq)] +>>> list(LazyEnumerate(sequence)) +[(0, \(aqfirst\(aq), (1, \(aqsecond\(aq), (2, \(aqthird\(aq)] +.ft P +.fi +.sp +Lazy enumerations can be useful for conserving memory in cases where the +argument sequences are particularly long. +.sp +A typical example of a use case for this class is obtaining an indexed +list for a long sequence of values. By constructing tuples lazily and +avoiding the creation of an additional long sequence, memory usage can be +significantly reduced. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.collections.LazyIteratorList(it, known_len=None) +Bases: \fI\%nltk.collections.AbstractLazySequence\fP +.sp +Wraps an iterator, loading its elements on demand +and making them subscriptable. +__repr__ displays only the first few elements. +.INDENT 7.0 +.TP +.B iterate_from(start) +Create a new iterator over this list starting at the given offset. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.collections.LazyMap(function, *lists, **config) +Bases: \fI\%nltk.collections.AbstractLazySequence\fP +.sp +A lazy sequence whose elements are formed by applying a given +function to each element in one or more underlying lists. The +function is applied lazily \-\- i.e., when you read a value from the +list, \fBLazyMap\fP will calculate that value by applying its +function to the underlying lists\(aq value(s). \fBLazyMap\fP is +essentially a lazy version of the Python primitive function +\fBmap\fP\&. In particular, the following two expressions are +equivalent: +.sp +.nf +.ft C +>>> from nltk.collections import LazyMap +>>> function = str +>>> sequence = [1,2,3] +>>> map(function, sequence) +[\(aq1\(aq, \(aq2\(aq, \(aq3\(aq] +>>> list(LazyMap(function, sequence)) +[\(aq1\(aq, \(aq2\(aq, \(aq3\(aq] +.ft P +.fi +.sp +Like the Python \fBmap\fP primitive, if the source lists do not have +equal size, then the value None will be supplied for the +\(aqmissing\(aq elements. +.sp +Lazy maps can be useful for conserving memory, in cases where +individual values take up a lot of space. This is especially true +if the underlying list\(aqs values are constructed lazily, as is the +case with many corpus readers. +.sp +A typical example of a use case for this class is performing +feature detection on the tokens in a corpus. Since featuresets +are encoded as dictionaries, which can take up a lot of memory, +using a \fBLazyMap\fP can significantly reduce memory usage when +training and running classifiers. +.INDENT 7.0 +.TP +.B iterate_from(index) +Return an iterator that generates the tokens in the corpus +file underlying this corpus view, starting at the token number +\fBstart\fP\&. If \fBstart>=len(self)\fP, then this iterator will +generate no tokens. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.collections.LazySubsequence(source, start, stop) +Bases: \fI\%nltk.collections.AbstractLazySequence\fP +.sp +A subsequence produced by slicing a lazy sequence. This slice +keeps a reference to its source sequence, and generates its values +by looking them up in the source sequence. +.INDENT 7.0 +.TP +.B MIN_SIZE = 100 +The minimum size for which lazy slices should be created. If +\fBLazySubsequence()\fP is called with a subsequence that is +shorter than \fBMIN_SIZE\fP, then a tuple will be returned instead. +.UNINDENT +.INDENT 7.0 +.TP +.B iterate_from(start) +Return an iterator that generates the tokens in the corpus +file underlying this corpus view, starting at the token number +\fBstart\fP\&. If \fBstart>=len(self)\fP, then this iterator will +generate no tokens. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.collections.LazyZip(*lists) +Bases: \fI\%nltk.collections.LazyMap\fP +.sp +A lazy sequence whose elements are tuples, each containing the i\-th +element from each of the argument sequences. The returned list is +truncated in length to the length of the shortest argument sequence. The +tuples are constructed lazily \-\- i.e., when you read a value from the +list, \fBLazyZip\fP will calculate that value by forming a tuple from +the i\-th element of each of the argument sequences. +.sp +\fBLazyZip\fP is essentially a lazy version of the Python primitive function +\fBzip\fP\&. In particular, an evaluated LazyZip is equivalent to a zip: +.sp +.nf +.ft C +>>> from nltk.collections import LazyZip +>>> sequence1, sequence2 = [1, 2, 3], [\(aqa\(aq, \(aqb\(aq, \(aqc\(aq] +>>> zip(sequence1, sequence2) +[(1, \(aqa\(aq), (2, \(aqb\(aq), (3, \(aqc\(aq)] +>>> list(LazyZip(sequence1, sequence2)) +[(1, \(aqa\(aq), (2, \(aqb\(aq), (3, \(aqc\(aq)] +>>> sequences = [sequence1, sequence2, [6,7,8,9]] +>>> list(zip(*sequences)) == list(LazyZip(*sequences)) +True +.ft P +.fi +.sp +Lazy zips can be useful for conserving memory in cases where the argument +sequences are particularly long. +.sp +A typical example of a use case for this class is combining long sequences +of gold standard and predicted values in a classification or tagging task +in order to calculate accuracy. By constructing tuples lazily and +avoiding the creation of an additional long sequence, memory usage can be +significantly reduced. +.INDENT 7.0 +.TP +.B iterate_from(index) +Return an iterator that generates the tokens in the corpus +file underlying this corpus view, starting at the token number +\fBstart\fP\&. If \fBstart>=len(self)\fP, then this iterator will +generate no tokens. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.collections.OrderedDict(data=None, **kwargs) +Bases: \fBdict\fP +.INDENT 7.0 +.TP +.B clear() -> None. Remove all items from D. +.UNINDENT +.INDENT 7.0 +.TP +.B copy() -> a shallow copy of D +.UNINDENT +.INDENT 7.0 +.TP +.B items() -> a set\-like object providing a view on D\(aqs items +.UNINDENT +.INDENT 7.0 +.TP +.B keys() -> a set\-like object providing a view on D\(aqs keys +.UNINDENT +.INDENT 7.0 +.TP +.B popitem() +Remove and return a (key, value) pair as a 2\-tuple. +.sp +Pairs are returned in LIFO (last\-in, first\-out) order. +Raises KeyError if the dict is empty. +.UNINDENT +.INDENT 7.0 +.TP +.B setdefault(key, failobj=None) +Insert key with a value of default if key is not in the dictionary. +.sp +Return the value for key if key is in the dictionary, else default. +.UNINDENT +.INDENT 7.0 +.TP +.B update([E], **F) -> None. Update D from dict/iterable E and F. +If E is present and has a .keys() method, then does: for k in E: D[k] = E[k] +If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v +In either case, this is followed by: for k in F: D[k] = F[k] +.UNINDENT +.INDENT 7.0 +.TP +.B values() -> an object providing a view on D\(aqs values +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.collections.Trie(strings=None) +Bases: \fBdict\fP +.sp +A Trie implementation for strings +.INDENT 7.0 +.TP +.B LEAF = True +.UNINDENT +.INDENT 7.0 +.TP +.B insert(string) +Inserts \fBstring\fP into the Trie +.INDENT 7.0 +.TP +.B Parameters +\fBstring\fP (\fIstr\fP) \-\- String to insert into the trie +.TP +.B Example +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.collections import Trie +>>> trie = Trie(["abc", "def"]) +>>> expected = {\(aqa\(aq: {\(aqb\(aq: {\(aqc\(aq: {True: None}}}, \(aqd\(aq: {\(aqe\(aq: {\(aqf\(aq: {True: None}}}} +>>> trie == expected +True +.ft P +.fi +.UNINDENT +.UNINDENT +.SS nltk.collocations module +.sp +Tools to identify collocations \-\-\- words that often appear consecutively +\-\-\- within corpora. They may also be used to find other associations between +word occurrences. +See Manning and Schutze ch. 5 at \fI\%http://nlp.stanford.edu/fsnlp/promo/colloc.pdf\fP +and the Text::NSP Perl package at http://ngram.sourceforge.net +.sp +Finding collocations requires first calculating the frequencies of words and +their appearance in the context of other words. Often the collection of words +will then requiring filtering to only retain useful content terms. Each ngram +of words may then be scored according to some association measure, in order +to determine the relative likelihood of each ngram being a collocation. +.sp +The \fBBigramCollocationFinder\fP and \fBTrigramCollocationFinder\fP classes provide +these functionalities, dependent on being provided a function which scores a +ngram given appropriate frequency counts. A number of standard association +measures are provided in bigram_measures and trigram_measures. +.INDENT 0.0 +.TP +.B class nltk.collocations.BigramCollocationFinder(word_fd, bigram_fd, window_size=2) +Bases: \fBnltk.collocations.AbstractCollocationFinder\fP +.sp +A tool for the finding and ranking of bigram collocations or other +association measures. It is often useful to use from_words() rather than +constructing an instance directly. +.INDENT 7.0 +.TP +.B default_ws = 2 +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod from_words(words, window_size=2) +Construct a BigramCollocationFinder for all bigrams in the given +sequence. When window_size > 2, count non\-contiguous bigrams, in the +style of Church and Hanks\(aqs (1990) association ratio. +.UNINDENT +.INDENT 7.0 +.TP +.B score_ngram(score_fn, w1, w2) +Returns the score for a given bigram using the given scoring +function. Following Church and Hanks (1990), counts are scaled by +a factor of 1/(window_size \- 1). +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.collocations.QuadgramCollocationFinder(word_fd, quadgram_fd, ii, iii, ixi, ixxi, iixi, ixii) +Bases: \fBnltk.collocations.AbstractCollocationFinder\fP +.sp +A tool for the finding and ranking of quadgram collocations or other association measures. +It is often useful to use from_words() rather than constructing an instance directly. +.INDENT 7.0 +.TP +.B default_ws = 4 +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod from_words(words, window_size=4) +.UNINDENT +.INDENT 7.0 +.TP +.B score_ngram(score_fn, w1, w2, w3, w4) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.collocations.TrigramCollocationFinder(word_fd, bigram_fd, wildcard_fd, trigram_fd) +Bases: \fBnltk.collocations.AbstractCollocationFinder\fP +.sp +A tool for the finding and ranking of trigram collocations or other +association measures. It is often useful to use from_words() rather than +constructing an instance directly. +.INDENT 7.0 +.TP +.B bigram_finder() +Constructs a bigram collocation finder with the bigram and unigram +data from this finder. Note that this does not include any filtering +applied to this finder. +.UNINDENT +.INDENT 7.0 +.TP +.B default_ws = 3 +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod from_words(words, window_size=3) +Construct a TrigramCollocationFinder for all trigrams in the given +sequence. +.UNINDENT +.INDENT 7.0 +.TP +.B score_ngram(score_fn, w1, w2, w3) +Returns the score for a given trigram using the given scoring +function. +.UNINDENT +.UNINDENT +.SS nltk.compat module +.INDENT 0.0 +.TP +.B nltk.compat.add_py3_data(path) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.compat.py3_data(init_func) +.UNINDENT +.SS nltk.data module +.sp +Functions to find and load NLTK resource files, such as corpora, +grammars, and saved processing objects. Resource files are identified +using URLs, such as \fBnltk:corpora/abc/rural.txt\fP or +\fBhttp://nltk.org/sample/toy.cfg\fP\&. The following URL protocols are +supported: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fBfile:path\fP: Specifies the file whose path is \fIpath\fP\&. +Both relative and absolute paths may be used. +.IP \(bu 2 +\fBhttp://host/path\fP: Specifies the file stored on the web +server \fIhost\fP at path \fIpath\fP\&. +.IP \(bu 2 +\fBnltk:path\fP: Specifies the file stored in the NLTK data +package at \fIpath\fP\&. NLTK will search for these files in the +directories specified by \fBnltk.data.path\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +If no protocol is specified, then the default protocol \fBnltk:\fP will +be used. +.sp +This module provides to functions that can be used to access a +resource file, given its URL: \fBload()\fP loads a given resource, and +adds it to a resource cache; and \fBretrieve()\fP copies a given resource +to a local file. +.INDENT 0.0 +.TP +.B nltk.data.AUTO_FORMATS = {\(aqcfg\(aq: \(aqcfg\(aq, \(aqfcfg\(aq: \(aqfcfg\(aq, \(aqfol\(aq: \(aqfol\(aq, \(aqjson\(aq: \(aqjson\(aq, \(aqlogic\(aq: \(aqlogic\(aq, \(aqpcfg\(aq: \(aqpcfg\(aq, \(aqpickle\(aq: \(aqpickle\(aq, \(aqtext\(aq: \(aqtext\(aq, \(aqtxt\(aq: \(aqtext\(aq, \(aqval\(aq: \(aqval\(aq, \(aqyaml\(aq: \(aqyaml\(aq} +A dictionary mapping from file extensions to format names, used +by load() when format="auto" to decide the format for a +given resource url. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.data.BufferedGzipFile(*args, **kwargs) +A \fBGzipFile\fP subclass for compatibility with older nltk releases. +.sp +Use \fBGzipFile\fP directly as it also buffers in all supported +Python versions. +.sp +@deprecated: Use gzip.GzipFile instead as it also uses a buffer. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.data.FORMATS = {\(aqcfg\(aq: \(aqA context free grammar.\(aq, \(aqfcfg\(aq: \(aqA feature CFG.\(aq, \(aqfol\(aq: \(aqA list of first order logic expressions, parsed with nltk.sem.logic.Expression.fromstring.\(aq, \(aqjson\(aq: \(aqA serialized python object, stored using the json module.\(aq, \(aqlogic\(aq: \(aqA list of first order logic expressions, parsed with nltk.sem.logic.LogicParser. Requires an additional logic_parser parameter\(aq, \(aqpcfg\(aq: \(aqA probabilistic CFG.\(aq, \(aqpickle\(aq: \(aqA serialized python object, stored using the pickle module.\(aq, \(aqraw\(aq: \(aqThe raw (byte string) contents of a file.\(aq, \(aqtext\(aq: \(aqThe raw (unicode string) contents of a file. \(aq, \(aqval\(aq: \(aqA semantic valuation, parsed by nltk.sem.Valuation.fromstring.\(aq, \(aqyaml\(aq: \(aqA serialized python object, stored using the yaml module.\(aq} +A dictionary describing the formats that are supported by NLTK\(aqs +load() method. Keys are format names, and values are format +descriptions. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.data.FileSystemPathPointer(_path) +Bases: \fI\%nltk.data.PathPointer\fP, \fBstr\fP +.sp +A path pointer that identifies a file which can be accessed +directly via a given absolute path. +.INDENT 7.0 +.TP +.B file_size() +Return the size of the file pointed to by this path pointer, +in bytes. +.INDENT 7.0 +.TP +.B Raises +\fBIOError\fP \-\- If the path specified by this pointer does +not contain a readable file. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B join(fileid) +Return a new path pointer formed by starting at the path +identified by this pointer, and then following the relative +path given by \fBfileid\fP\&. The path components of \fBfileid\fP +should be separated by forward slashes, regardless of +the underlying file system\(aqs path separator character. +.UNINDENT +.INDENT 7.0 +.TP +.B open(encoding=None) +Return a seekable read\-only stream that can be used to read +the contents of the file identified by this path pointer. +.INDENT 7.0 +.TP +.B Raises +\fBIOError\fP \-\- If the path specified by this pointer does +not contain a readable file. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B property path +The absolute path identified by this path pointer. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.data.GzipFileSystemPathPointer(_path) +Bases: \fI\%nltk.data.FileSystemPathPointer\fP +.sp +A subclass of \fBFileSystemPathPointer\fP that identifies a gzip\-compressed +file located at a given absolute path. \fBGzipFileSystemPathPointer\fP is +appropriate for loading large gzip\-compressed pickle objects efficiently. +.INDENT 7.0 +.TP +.B open(encoding=None) +Return a seekable read\-only stream that can be used to read +the contents of the file identified by this path pointer. +.INDENT 7.0 +.TP +.B Raises +\fBIOError\fP \-\- If the path specified by this pointer does +not contain a readable file. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.data.LazyLoader(_path) +Bases: \fBobject\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.data.OpenOnDemandZipFile(filename) +Bases: \fBzipfile.ZipFile\fP +.sp +A subclass of \fBzipfile.ZipFile\fP that closes its file pointer +whenever it is not using it; and re\-opens it when it needs to read +data from the zipfile. This is useful for reducing the number of +open file handles when many zip files are being accessed at once. +\fBOpenOnDemandZipFile\fP must be constructed from a filename, not a +file\-like object (to allow re\-opening). \fBOpenOnDemandZipFile\fP is +read\-only (i.e. \fBwrite()\fP and \fBwritestr()\fP are disabled. +.INDENT 7.0 +.TP +.B read(name) +Return file bytes for name. +.UNINDENT +.INDENT 7.0 +.TP +.B write(*args, **kwargs) +.INDENT 7.0 +.TP +.B Raises +\fBNotImplementedError\fP \-\- OpenOnDemandZipfile is read\-only +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B writestr(*args, **kwargs) +.INDENT 7.0 +.TP +.B Raises +\fBNotImplementedError\fP \-\- OpenOnDemandZipfile is read\-only +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.data.PathPointer +Bases: \fBobject\fP +.sp +An abstract base class for \(aqpath pointers,\(aq used by NLTK\(aqs data +package to identify specific paths. Two subclasses exist: +\fBFileSystemPathPointer\fP identifies a file that can be accessed +directly via a given absolute path. \fBZipFilePathPointer\fP +identifies a file contained within a zipfile, that can be accessed +by reading that zipfile. +.INDENT 7.0 +.TP +.B abstract file_size() +Return the size of the file pointed to by this path pointer, +in bytes. +.INDENT 7.0 +.TP +.B Raises +\fBIOError\fP \-\- If the path specified by this pointer does +not contain a readable file. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract join(fileid) +Return a new path pointer formed by starting at the path +identified by this pointer, and then following the relative +path given by \fBfileid\fP\&. The path components of \fBfileid\fP +should be separated by forward slashes, regardless of +the underlying file system\(aqs path separator character. +.UNINDENT +.INDENT 7.0 +.TP +.B abstract open(encoding=None) +Return a seekable read\-only stream that can be used to read +the contents of the file identified by this path pointer. +.INDENT 7.0 +.TP +.B Raises +\fBIOError\fP \-\- If the path specified by this pointer does +not contain a readable file. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.data.SeekableUnicodeStreamReader(stream, encoding, errors=\(aqstrict\(aq) +Bases: \fBobject\fP +.sp +A stream reader that automatically encodes the source byte stream +into unicode (like \fBcodecs.StreamReader\fP); but still supports the +\fBseek()\fP and \fBtell()\fP operations correctly. This is in contrast +to \fBcodecs.StreamReader\fP, which provide \fIbroken\fP \fBseek()\fP and +\fBtell()\fP methods. +.sp +This class was motivated by \fBStreamBackedCorpusView\fP, which +makes extensive use of \fBseek()\fP and \fBtell()\fP, and needs to be +able to handle unicode\-encoded files. +.sp +Note: this class requires stateless decoders. To my knowledge, +this shouldn\(aqt cause a problem with any of python\(aqs builtin +unicode encodings. +.INDENT 7.0 +.TP +.B DEBUG = True +.UNINDENT +.INDENT 7.0 +.TP +.B bytebuffer +A buffer to use bytes that have been read but have not yet +been decoded. This is only used when the final bytes from +a read do not form a complete encoding for a character. +.UNINDENT +.INDENT 7.0 +.TP +.B char_seek_forward(offset) +Move the read pointer forward by \fBoffset\fP characters. +.UNINDENT +.INDENT 7.0 +.TP +.B close() +Close the underlying stream. +.UNINDENT +.INDENT 7.0 +.TP +.B property closed +True if the underlying stream is closed. +.UNINDENT +.INDENT 7.0 +.TP +.B decode +The function that is used to decode byte strings into +unicode strings. +.UNINDENT +.INDENT 7.0 +.TP +.B discard_line() +.UNINDENT +.INDENT 7.0 +.TP +.B encoding +The name of the encoding that should be used to encode the +underlying stream. +.UNINDENT +.INDENT 7.0 +.TP +.B errors +The error mode that should be used when decoding data from +the underlying stream. Can be \(aqstrict\(aq, \(aqignore\(aq, or +\(aqreplace\(aq. +.UNINDENT +.INDENT 7.0 +.TP +.B linebuffer +A buffer used by \fBreadline()\fP to hold characters that have +been read, but have not yet been returned by \fBread()\fP or +\fBreadline()\fP\&. This buffer consists of a list of unicode +strings, where each string corresponds to a single line. +The final element of the list may or may not be a complete +line. Note that the existence of a linebuffer makes the +\fBtell()\fP operation more complex, because it must backtrack +to the beginning of the buffer to determine the correct +file position in the underlying byte stream. +.UNINDENT +.INDENT 7.0 +.TP +.B property mode +The mode of the underlying stream. +.UNINDENT +.INDENT 7.0 +.TP +.B property name +The name of the underlying stream. +.UNINDENT +.INDENT 7.0 +.TP +.B next() +Return the next decoded line from the underlying stream. +.UNINDENT +.INDENT 7.0 +.TP +.B read(size=None) +Read up to \fBsize\fP bytes, decode them using this reader\(aqs +encoding, and return the resulting unicode string. +.INDENT 7.0 +.TP +.B Parameters +\fBsize\fP (\fIint\fP) \-\- The maximum number of bytes to read. If not +specified, then read as many bytes as possible. +.TP +.B Return type +unicode +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B readline(size=None) +Read a line of text, decode it using this reader\(aqs encoding, +and return the resulting unicode string. +.INDENT 7.0 +.TP +.B Parameters +\fBsize\fP (\fIint\fP) \-\- The maximum number of bytes to read. If no +newline is encountered before \fBsize\fP bytes have been read, +then the returned value may not be a complete line of text. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B readlines(sizehint=None, keepends=True) +Read this file\(aqs contents, decode them using this reader\(aqs +encoding, and return it as a list of unicode lines. +.INDENT 7.0 +.TP +.B Return type +list(unicode) +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsizehint\fP \-\- Ignored. +.IP \(bu 2 +\fBkeepends\fP \-\- If false, then strip newlines. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B seek(offset, whence=0) +Move the stream to a new file position. If the reader is +maintaining any buffers, then they will be cleared. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBoffset\fP \-\- A byte count offset. +.IP \(bu 2 +\fBwhence\fP \-\- If 0, then the offset is from the start of the file +(offset should be positive), if 1, then the offset is from the +current position (offset may be positive or negative); and if 2, +then the offset is from the end of the file (offset should +typically be negative). +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B stream +The underlying stream. +.UNINDENT +.INDENT 7.0 +.TP +.B tell() +Return the current file position on the underlying byte +stream. If this reader is maintaining any buffers, then the +returned file position will be the position of the beginning +of those buffers. +.UNINDENT +.INDENT 7.0 +.TP +.B xreadlines() +Return self +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.data.clear_cache() +Remove all objects from the resource cache. +:see: load() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.data.find(resource_name, paths=None) +Find the given resource by searching through the directories and +zip files in paths, where a None or empty string specifies an absolute path. +Returns a corresponding path name. If the given resource is not +found, raise a \fBLookupError\fP, whose message gives a pointer to +the installation instructions for the NLTK downloader. +.sp +Zip File Handling: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +If \fBresource_name\fP contains a component with a \fB\&.zip\fP +extension, then it is assumed to be a zipfile; and the +remaining path components are used to look inside the zipfile. +.IP \(bu 2 +If any element of \fBnltk.data.path\fP has a \fB\&.zip\fP extension, +then it is assumed to be a zipfile. +.IP \(bu 2 +If a given resource name that does not contain any zipfile +component is not found initially, then \fBfind()\fP will make a +second attempt to find that resource, by replacing each +component \fIp\fP in the path with \fIp.zip/p\fP\&. For example, this +allows \fBfind()\fP to map the resource name +\fBcorpora/chat80/cities.pl\fP to a zip file path pointer to +\fBcorpora/chat80.zip/chat80/cities.pl\fP\&. +.IP \(bu 2 +When using \fBfind()\fP to locate a directory contained in a +zipfile, the resource name must end with the forward slash +character. Otherwise, \fBfind()\fP will not locate the +directory. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +\fBresource_name\fP (\fIstr\fP\fI or \fP\fIunicode\fP) \-\- The name of the resource to search for. +Resource names are posix\-style relative path names, such as +\fBcorpora/brown\fP\&. Directory names will be +automatically converted to a platform\-appropriate path separator. +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.data.load(resource_url, format=\(aqauto\(aq, cache=True, verbose=False, logic_parser=None, fstruct_reader=None, encoding=None) +Load a given resource from the NLTK data package. The following +resource formats are currently supported: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fBpickle\fP +.IP \(bu 2 +\fBjson\fP +.IP \(bu 2 +\fByaml\fP +.IP \(bu 2 +\fBcfg\fP (context free grammars) +.IP \(bu 2 +\fBpcfg\fP (probabilistic CFGs) +.IP \(bu 2 +\fBfcfg\fP (feature\-based CFGs) +.IP \(bu 2 +\fBfol\fP (formulas of First Order Logic) +.IP \(bu 2 +\fBlogic\fP (Logical formulas to be parsed by the given logic_parser) +.IP \(bu 2 +\fBval\fP (valuation of First Order Logic model) +.IP \(bu 2 +\fBtext\fP (the file contents as a unicode string) +.IP \(bu 2 +\fBraw\fP (the raw file contents as a byte string) +.UNINDENT +.UNINDENT +.UNINDENT +.sp +If no format is specified, \fBload()\fP will attempt to determine a +format based on the resource name\(aqs file extension. If that +fails, \fBload()\fP will raise a \fBValueError\fP exception. +.sp +For all text formats (everything except \fBpickle\fP, \fBjson\fP, \fByaml\fP and \fBraw\fP), +it tries to decode the raw contents using UTF\-8, and if that doesn\(aqt +work, it tries with ISO\-8859\-1 (Latin\-1), unless the \fBencoding\fP +is specified. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBresource_url\fP (\fIstr\fP) \-\- A URL specifying where the resource should be +loaded from. The default protocol is "nltk:", which searches +for the file in the the NLTK data package. +.IP \(bu 2 +\fBcache\fP (\fIbool\fP) \-\- If true, add this resource to a cache. If load() +finds a resource in its cache, then it will return it from the +cache rather than loading it. +.IP \(bu 2 +\fBverbose\fP (\fIbool\fP) \-\- If true, print a message when loading a resource. +Messages are not displayed when a resource is retrieved from +the cache. +.IP \(bu 2 +\fBlogic_parser\fP (\fILogicParser\fP) \-\- The parser that will be used to parse logical +expressions. +.IP \(bu 2 +\fBfstruct_reader\fP (\fIFeatStructReader\fP) \-\- The parser that will be used to parse the +feature structure of an fcfg. +.IP \(bu 2 +\fBencoding\fP (\fIstr\fP) \-\- the encoding of the input; only used for text formats. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.data.path = [\(aq/Users/sbird1/nltk_data\(aq, \(aq/opt/local/Library/Frameworks/Python.framework/Versions/3.9/nltk_data\(aq, \(aq/opt/local/Library/Frameworks/Python.framework/Versions/3.9/share/nltk_data\(aq, \(aq/opt/local/Library/Frameworks/Python.framework/Versions/3.9/lib/nltk_data\(aq, \(aq/usr/share/nltk_data\(aq, \(aq/usr/local/share/nltk_data\(aq, \(aq/usr/lib/nltk_data\(aq, \(aq/usr/local/lib/nltk_data\(aq] +A list of directories where the NLTK data package might reside. +These directories will be checked in order when looking for a +resource in the data package. Note that this allows users to +substitute in their own versions of resources, if they have them +(e.g., in their home directory under ~/nltk_data). +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.data.retrieve(resource_url, filename=None, verbose=True) +Copy the given resource to a local file. If no filename is +specified, then use the URL\(aqs filename. If there is already a +file named \fBfilename\fP, then raise a \fBValueError\fP\&. +.INDENT 7.0 +.TP +.B Parameters +\fBresource_url\fP (\fIstr\fP) \-\- A URL specifying where the resource should be +loaded from. The default protocol is "nltk:", which searches +for the file in the the NLTK data package. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.data.show_cfg(resource_url, escape=\(aq##\(aq) +Write out a grammar file, ignoring escaped and empty lines. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBresource_url\fP (\fIstr\fP) \-\- A URL specifying where the resource should be +loaded from. The default protocol is "nltk:", which searches +for the file in the the NLTK data package. +.IP \(bu 2 +\fBescape\fP (\fIstr\fP) \-\- Prepended string that signals lines to be ignored +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.decorators module +.sp +Decorator module by Michele Simionato <\fI\%michelesimionato@libero.it\fP> +Copyright Michele Simionato, distributed under the terms of the BSD License (see below). +\fI\%http://www.phyast.pitt.edu/~micheles/python/documentation.html\fP +.sp +Included in NLTK for its support of a nice memoization decorator. +.INDENT 0.0 +.TP +.B nltk.decorators.decorator(caller) +General purpose decorator factory: takes a caller function as +input and returns a decorator with the same attributes. +A caller function is any function like this: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +def caller(func, *args, **kw): + # do something + return func(*args, **kw) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Here is an example of usage: +.sp +.nf +.ft C +>>> @decorator +\&... def chatty(f, *args, **kw): +\&... print("Calling %r" % f.__name__) +\&... return f(*args, **kw) +.ft P +.fi +.sp +.nf +.ft C +>>> chatty.__name__ +\(aqchatty\(aq +.ft P +.fi +.sp +.nf +.ft C +>>> @chatty +\&... def f(): pass +\&... +>>> f() +Calling \(aqf\(aq +.ft P +.fi +.sp +decorator can also take in input a class with a .caller method; in this +case it converts the class into a factory of callable decorator objects. +See the documentation for an example. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.decorators.getinfo(func) +Returns an info dictionary containing: +\- name (the name of the function : str) +\- argnames (the names of the arguments : list) +\- defaults (the values of the default arguments : tuple) +\- signature (the signature : str) +\- fullsignature (the full signature : Signature) +\- doc (the docstring : str) +\- module (the module name : str) +\- dict (the function __dict__ : str) +.sp +.nf +.ft C +>>> def f(self, x=1, y=2, *args, **kw): pass +.ft P +.fi +.sp +.nf +.ft C +>>> info = getinfo(f) +.ft P +.fi +.sp +.nf +.ft C +>>> info["name"] +\(aqf\(aq +>>> info["argnames"] +[\(aqself\(aq, \(aqx\(aq, \(aqy\(aq, \(aqargs\(aq, \(aqkw\(aq] +.ft P +.fi +.sp +.nf +.ft C +>>> info["defaults"] +(1, 2) +.ft P +.fi +.sp +.nf +.ft C +>>> info["signature"] +\(aqself, x, y, *args, **kw\(aq +.ft P +.fi +.sp +.nf +.ft C +>>> info["fullsignature"] + +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.decorators.new_wrapper(wrapper, model) +An improvement over functools.update_wrapper. The wrapper is a generic +callable object. It works by generating a copy of the wrapper with the +right signature and by updating the copy, not the original. +Moreovoer, \(aqmodel\(aq can be a dictionary with keys \(aqname\(aq, \(aqdoc\(aq, \(aqmodule\(aq, +\(aqdict\(aq, \(aqdefaults\(aq. +.UNINDENT +.SS nltk.downloader module +.sp +The NLTK corpus and module downloader. This module defines several +interfaces which can be used to download corpora, models, and other +data packages that can be used with NLTK. +.SS Downloading Packages +.sp +If called with no arguments, \fBdownload()\fP will display an interactive +interface which can be used to download and install new packages. +If Tkinter is available, then a graphical interface will be shown, +otherwise a simple text interface will be provided. +.sp +Individual packages can be downloaded by calling the \fBdownload()\fP +function with a single argument, giving the package identifier for the +package that should be downloaded: +.sp +.nf +.ft C +>>> download(\(aqtreebank\(aq) +[nltk_data] Downloading package \(aqtreebank\(aq... +[nltk_data] Unzipping corpora/treebank.zip. +.ft P +.fi +.sp +NLTK also provides a number of "package collections", consisting of +a group of related packages. To download all packages in a +colleciton, simply call \fBdownload()\fP with the collection\(aqs +identifier: +.sp +.nf +.ft C +>>> download(\(aqall\-corpora\(aq) +[nltk_data] Downloading package \(aqabc\(aq... +[nltk_data] Unzipping corpora/abc.zip. +[nltk_data] Downloading package \(aqalpino\(aq... +[nltk_data] Unzipping corpora/alpino.zip. + ... +[nltk_data] Downloading package \(aqwords\(aq... +[nltk_data] Unzipping corpora/words.zip. +.ft P +.fi +.SS Download Directory +.sp +By default, packages are installed in either a system\-wide directory +(if Python has sufficient access to write to it); or in the current +user\(aqs home directory. However, the \fBdownload_dir\fP argument may be +used to specify a different installation target, if desired. +.sp +See \fBDownloader.default_download_dir()\fP for more a detailed +description of how the default download directory is chosen. +.SS NLTK Download Server +.sp +Before downloading any packages, the corpus and module downloader +contacts the NLTK download server, to retrieve an index file +describing the available packages. By default, this index file is +loaded from \fBhttps://raw.githubusercontent.com/nltk/nltk_data/gh\-pages/index.xml\fP\&. +If necessary, it is possible to create a new \fBDownloader\fP object, +specifying a different URL for the package index file. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +python nltk/downloader.py [\-d DATADIR] [\-q] [\-f] [\-k] PACKAGE_IDS +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +or: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +python \-m nltk.downloader [\-d DATADIR] [\-q] [\-f] [\-k] PACKAGE_IDS +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.Collection(id, children, name=None, **kw) +Bases: \fBobject\fP +.sp +A directory entry for a collection of downloadable packages. +These entries are extracted from the XML index file that is +downloaded by \fBDownloader\fP\&. +.INDENT 7.0 +.TP +.B children +A list of the \fBCollections\fP or \fBPackages\fP directly +contained by this collection. +.UNINDENT +.INDENT 7.0 +.TP +.B static fromxml(xml) +.UNINDENT +.INDENT 7.0 +.TP +.B id +A unique identifier for this collection. +.UNINDENT +.INDENT 7.0 +.TP +.B name +A string name for this collection. +.UNINDENT +.INDENT 7.0 +.TP +.B packages +A list of \fBPackages\fP contained by this collection or any +collections it recursively contains. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.Downloader(server_index_url=None, download_dir=None) +Bases: \fBobject\fP +.sp +A class used to access the NLTK data server, which can be used to +download corpora and other data packages. +.INDENT 7.0 +.TP +.B DEFAULT_URL = \(aqhttps://raw.githubusercontent.com/nltk/nltk_data/gh\-pages/index.xml\(aq +The default URL for the NLTK data server\(aqs index. An +alternative URL can be specified when creating a new +\fBDownloader\fP object. +.UNINDENT +.INDENT 7.0 +.TP +.B INDEX_TIMEOUT = 3600 +The amount of time after which the cached copy of the data +server index will be considered \(aqstale,\(aq and will be +re\-downloaded. +.UNINDENT +.INDENT 7.0 +.TP +.B INSTALLED = \(aqinstalled\(aq +A status string indicating that a package or collection is +installed and up\-to\-date. +.UNINDENT +.INDENT 7.0 +.TP +.B NOT_INSTALLED = \(aqnot installed\(aq +A status string indicating that a package or collection is +not installed. +.UNINDENT +.INDENT 7.0 +.TP +.B PARTIAL = \(aqpartial\(aq +A status string indicating that a collection is partially +installed (i.e., only some of its packages are installed.) +.UNINDENT +.INDENT 7.0 +.TP +.B STALE = \(aqout of date\(aq +A status string indicating that a package or collection is +corrupt or out\-of\-date. +.UNINDENT +.INDENT 7.0 +.TP +.B clear_status_cache(id=None) +.UNINDENT +.INDENT 7.0 +.TP +.B collections() +.UNINDENT +.INDENT 7.0 +.TP +.B corpora() +.UNINDENT +.INDENT 7.0 +.TP +.B default_download_dir() +Return the directory to which packages will be downloaded by +default. This value can be overridden using the constructor, +or on a case\-by\-case basis using the \fBdownload_dir\fP argument when +calling \fBdownload()\fP\&. +.sp +On Windows, the default download directory is +\fBPYTHONHOME/lib/nltk\fP, where \fIPYTHONHOME\fP is the +directory containing Python, e.g. \fBC:\ePython25\fP\&. +.sp +On all other platforms, the default directory is the first of +the following which exists or which can be created with write +permission: \fB/usr/share/nltk_data\fP, \fB/usr/local/share/nltk_data\fP, +\fB/usr/lib/nltk_data\fP, \fB/usr/local/lib/nltk_data\fP, \fB~/nltk_data\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B download(info_or_id=None, download_dir=None, quiet=False, force=False, prefix=\(aq[nltk_data] \(aq, halt_on_error=True, raise_on_error=False, print_error_to=<_io.TextIOWrapper name=\(aq\(aq mode=\(aqw\(aq encoding=\(aqutf\-8\(aq>) +.UNINDENT +.INDENT 7.0 +.TP +.B property download_dir +The default directory to which packages will be downloaded. +This defaults to the value returned by \fBdefault_download_dir()\fP\&. +To override this default on a case\-by\-case basis, use the +\fBdownload_dir\fP argument when calling \fBdownload()\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B incr_download(info_or_id, download_dir=None, force=False) +.UNINDENT +.INDENT 7.0 +.TP +.B index() +Return the XML index describing the packages available from +the data server. If necessary, this index will be downloaded +from the data server. +.UNINDENT +.INDENT 7.0 +.TP +.B info(id) +Return the \fBPackage\fP or \fBCollection\fP record for the +given item. +.UNINDENT +.INDENT 7.0 +.TP +.B is_installed(info_or_id, download_dir=None) +.UNINDENT +.INDENT 7.0 +.TP +.B is_stale(info_or_id, download_dir=None) +.UNINDENT +.INDENT 7.0 +.TP +.B list(download_dir=None, show_packages=True, show_collections=True, header=True, more_prompt=False, skip_installed=False) +.UNINDENT +.INDENT 7.0 +.TP +.B models() +.UNINDENT +.INDENT 7.0 +.TP +.B packages() +.UNINDENT +.INDENT 7.0 +.TP +.B status(info_or_id, download_dir=None) +Return a constant describing the status of the given package +or collection. Status can be one of \fBINSTALLED\fP, +\fBNOT_INSTALLED\fP, \fBSTALE\fP, or \fBPARTIAL\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B update(quiet=False, prefix=\(aq[nltk_data] \(aq) +Re\-download any packages whose status is STALE. +.UNINDENT +.INDENT 7.0 +.TP +.B property url +The URL for the data server\(aqs index file. +.UNINDENT +.INDENT 7.0 +.TP +.B xmlinfo(id) +Return the XML info record for the given item +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.DownloaderGUI(dataserver, use_threads=True) +Bases: \fBobject\fP +.sp +Graphical interface for downloading packages from the NLTK data +server. +.INDENT 7.0 +.TP +.B COLUMNS = [\(aq\(aq, \(aqIdentifier\(aq, \(aqName\(aq, \(aqSize\(aq, \(aqStatus\(aq, \(aqUnzipped Size\(aq, \(aqCopyright\(aq, \(aqContact\(aq, \(aqLicense\(aq, \(aqAuthor\(aq, \(aqSubdir\(aq, \(aqChecksum\(aq] +A list of the names of columns. This controls the order in +which the columns will appear. If this is edited, then +\fB_package_to_columns()\fP may need to be edited to match. +.UNINDENT +.INDENT 7.0 +.TP +.B COLUMN_WEIGHTS = {\(aq\(aq: 0, \(aqName\(aq: 5, \(aqSize\(aq: 0, \(aqStatus\(aq: 0} +A dictionary specifying how columns should be resized when the +table is resized. Columns with weight 0 will not be resized at +all; and columns with high weight will be resized more. +Default weight (for columns not explicitly listed) is 1. +.UNINDENT +.INDENT 7.0 +.TP +.B COLUMN_WIDTHS = {\(aq\(aq: 1, \(aqIdentifier\(aq: 20, \(aqName\(aq: 45, \(aqSize\(aq: 10, \(aqStatus\(aq: 12, \(aqUnzipped Size\(aq: 10} +A dictionary specifying how wide each column should be, in +characters. The default width (for columns not explicitly +listed) is specified by \fBDEFAULT_COLUMN_WIDTH\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B DEFAULT_COLUMN_WIDTH = 30 +The default width for columns that are not explicitly listed +in \fBCOLUMN_WIDTHS\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B HELP = \(aqThis tool can be used to download a variety of corpora and models\enthat can be used with NLTK. Each corpus or model is distributed\enin a single zip file, known as a "package file." You can\endownload packages individually, or you can download pre\-defined\encollections of packages.\en\enWhen you download a package, it will be saved to the "download\endirectory." A default download directory is chosen when you run\en\enthe downloader; but you may also select a different download\endirectory. On Windows, the default download directory is\en\en\en"package."\en\enThe NLTK downloader can be used to download a variety of corpora,\enmodels, and other data packages.\en\enKeyboard shortcuts::\en [return]\et Download\en [up]\et Select previous package\en [down]\et Select next package\en [left]\et Select previous tab\en [right]\et Select next tab\en\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B INITIAL_COLUMNS = [\(aq\(aq, \(aqIdentifier\(aq, \(aqName\(aq, \(aqSize\(aq, \(aqStatus\(aq] +The set of columns that should be displayed by default. +.UNINDENT +.INDENT 7.0 +.TP +.B about(*e) +.UNINDENT +.INDENT 7.0 +.TP +.B c = \(aqStatus\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B destroy(*e) +.UNINDENT +.INDENT 7.0 +.TP +.B help(*e) +.UNINDENT +.INDENT 7.0 +.TP +.B mainloop(*args, **kwargs) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.DownloaderMessage +Bases: \fBobject\fP +.sp +A status message object, used by \fBincr_download\fP to +communicate its progress. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.DownloaderShell(dataserver) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B run() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.ErrorMessage(package, message) +Bases: \fI\%nltk.downloader.DownloaderMessage\fP +.sp +Data server encountered an error +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.FinishCollectionMessage(collection) +Bases: \fI\%nltk.downloader.DownloaderMessage\fP +.sp +Data server has finished working on a collection of packages. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.FinishDownloadMessage(package) +Bases: \fI\%nltk.downloader.DownloaderMessage\fP +.sp +Data server has finished downloading a package. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.FinishPackageMessage(package) +Bases: \fI\%nltk.downloader.DownloaderMessage\fP +.sp +Data server has finished working on a package. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.FinishUnzipMessage(package) +Bases: \fI\%nltk.downloader.DownloaderMessage\fP +.sp +Data server has finished unzipping a package. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.Package(id, url, name=None, subdir=\(aq\(aq, size=None, unzipped_size=None, checksum=None, svn_revision=None, copyright=\(aqUnknown\(aq, contact=\(aqUnknown\(aq, license=\(aqUnknown\(aq, author=\(aqUnknown\(aq, unzip=True, **kw) +Bases: \fBobject\fP +.sp +A directory entry for a downloadable package. These entries are +extracted from the XML index file that is downloaded by +\fBDownloader\fP\&. Each package consists of a single file; but if +that file is a zip file, then it can be automatically decompressed +when the package is installed. +.INDENT 7.0 +.TP +.B author +Author of this package. +.UNINDENT +.INDENT 7.0 +.TP +.B checksum +The MD\-5 checksum of the package file. +.UNINDENT +.INDENT 7.0 +.TP +.B contact +Name & email of the person who should be contacted with +questions about this package. +.UNINDENT +.INDENT 7.0 +.TP +.B copyright +Copyright holder for this package. +.UNINDENT +.INDENT 7.0 +.TP +.B filename +The filename that should be used for this package\(aqs file. It +is formed by joining \fBself.subdir\fP with \fBself.id\fP, and +using the same extension as \fBurl\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B static fromxml(xml) +.UNINDENT +.INDENT 7.0 +.TP +.B id +A unique identifier for this package. +.UNINDENT +.INDENT 7.0 +.TP +.B license +License information for this package. +.UNINDENT +.INDENT 7.0 +.TP +.B name +A string name for this package. +.UNINDENT +.INDENT 7.0 +.TP +.B size +The filesize (in bytes) of the package file. +.UNINDENT +.INDENT 7.0 +.TP +.B subdir +The subdirectory where this package should be installed. +E.g., \fB\(aqcorpora\(aq\fP or \fB\(aqtaggers\(aq\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B svn_revision +A subversion revision number for this package. +.UNINDENT +.INDENT 7.0 +.TP +.B unzip +A flag indicating whether this corpus should be unzipped by +default. +.UNINDENT +.INDENT 7.0 +.TP +.B unzipped_size +The total filesize of the files contained in the package\(aqs +zipfile. +.UNINDENT +.INDENT 7.0 +.TP +.B url +A URL that can be used to download this package\(aqs file. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.ProgressMessage(progress) +Bases: \fI\%nltk.downloader.DownloaderMessage\fP +.sp +Indicates how much progress the data server has made +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.SelectDownloadDirMessage(download_dir) +Bases: \fI\%nltk.downloader.DownloaderMessage\fP +.sp +Indicates what download directory the data server is using +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.StaleMessage(package) +Bases: \fI\%nltk.downloader.DownloaderMessage\fP +.sp +The package download file is out\-of\-date or corrupt +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.StartCollectionMessage(collection) +Bases: \fI\%nltk.downloader.DownloaderMessage\fP +.sp +Data server has started working on a collection of packages. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.StartDownloadMessage(package) +Bases: \fI\%nltk.downloader.DownloaderMessage\fP +.sp +Data server has started downloading a package. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.StartPackageMessage(package) +Bases: \fI\%nltk.downloader.DownloaderMessage\fP +.sp +Data server has started working on a package. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.StartUnzipMessage(package) +Bases: \fI\%nltk.downloader.DownloaderMessage\fP +.sp +Data server has started unzipping a package. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.downloader.UpToDateMessage(package) +Bases: \fI\%nltk.downloader.DownloaderMessage\fP +.sp +The package download file is already up\-to\-date +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.downloader.build_index(root, base_url) +Create a new data.xml index file, by combining the xml description +files for various packages and collections. \fBroot\fP should be the +path to a directory containing the package xml and zip files; and +the collection xml files. The \fBroot\fP directory is expected to +have the following subdirectories: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +root/ + packages/ .................. subdirectory for packages + corpora/ ................. zip & xml files for corpora + grammars/ ................ zip & xml files for grammars + taggers/ ................. zip & xml files for taggers + tokenizers/ .............. zip & xml files for tokenizers + etc. + collections/ ............... xml files for collections +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +For each package, there should be two files: \fBpackage.zip\fP +(where \fIpackage\fP is the package name) +which contains the package itself as a compressed zip file; and +\fBpackage.xml\fP, which is an xml description of the package. The +zipfile \fBpackage.zip\fP should expand to a single subdirectory +named \fBpackage/\fP\&. The base filename \fBpackage\fP must match +the identifier given in the package\(aqs xml file. +.sp +For each collection, there should be a single file \fBcollection.zip\fP +describing the collection, where \fIcollection\fP is the name of the collection. +.sp +All identifiers (for both packages and collections) must be unique. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.downloader.download(info_or_id=None, download_dir=None, quiet=False, force=False, prefix=\(aq[nltk_data] \(aq, halt_on_error=True, raise_on_error=False, print_error_to=<_io.TextIOWrapper name=\(aq\(aq mode=\(aqw\(aq encoding=\(aqutf\-8\(aq>) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.downloader.download_gui() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.downloader.download_shell() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.downloader.md5_hexdigest(file) +Calculate and return the MD5 checksum for a given file. +\fBfile\fP may either be a filename or an open stream. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.downloader.unzip(filename, root, verbose=True) +Extract the contents of the zip file \fBfilename\fP into the +directory \fBroot\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.downloader.update() +.UNINDENT +.SS nltk.featstruct module +.sp +Basic data classes for representing feature structures, and for +performing basic operations on those feature structures. A feature +structure is a mapping from feature identifiers to feature values, +where each feature value is either a basic value (such as a string or +an integer), or a nested feature structure. There are two types of +feature structure, implemented by two subclasses of \fBFeatStruct\fP: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +feature dictionaries, implemented by \fBFeatDict\fP, act like +Python dictionaries. Feature identifiers may be strings or +instances of the \fBFeature\fP class. +.IP \(bu 2 +feature lists, implemented by \fBFeatList\fP, act like Python +lists. Feature identifiers are integers. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +Feature structures are typically used to represent partial information +about objects. A feature identifier that is not mapped to a value +stands for a feature whose value is unknown (\fInot\fP a feature without +a value). Two feature structures that represent (potentially +overlapping) information about the same object can be combined by +unification. When two inconsistent feature structures are unified, +the unification fails and returns None. +.sp +Features can be specified using "feature paths", or tuples of feature +identifiers that specify path through the nested feature structures to +a value. Feature structures may contain reentrant feature values. A +"reentrant feature value" is a single feature value that can be +accessed via multiple feature paths. Unification preserves the +reentrance relations imposed by both of the unified feature +structures. In the feature structure resulting from unification, any +modifications to a reentrant feature value will be visible using any +of its feature paths. +.sp +Feature structure variables are encoded using the \fBnltk.sem.Variable\fP +class. The variables\(aq values are tracked using a bindings +dictionary, which maps variables to their values. When two feature +structures are unified, a fresh bindings dictionary is created to +track their values; and before unification completes, all bound +variables are replaced by their values. Thus, the bindings +dictionaries are usually strictly internal to the unification process. +However, it is possible to track the bindings of variables if you +choose to, by supplying your own initial bindings dictionary to the +\fBunify()\fP function. +.sp +When unbound variables are unified with one another, they become +aliased. This is encoded by binding one variable to the other. +.SS Lightweight Feature Structures +.sp +Many of the functions defined by \fBnltk.featstruct\fP can be applied +directly to simple Python dictionaries and lists, rather than to +full\-fledged \fBFeatDict\fP and \fBFeatList\fP objects. In other words, +Python \fBdicts\fP and \fBlists\fP can be used as "light\-weight" feature +structures. +.sp +.nf +.ft C +>>> from nltk.featstruct import unify +>>> unify(dict(x=1, y=dict()), dict(a=\(aqa\(aq, y=dict(b=\(aqb\(aq))) +{\(aqy\(aq: {\(aqb\(aq: \(aqb\(aq}, \(aqx\(aq: 1, \(aqa\(aq: \(aqa\(aq} +.ft P +.fi +.sp +However, you should keep in mind the following caveats: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +Python dictionaries & lists ignore reentrance when checking for +equality between values. But two FeatStructs with different +reentrances are considered nonequal, even if all their base +values are equal. +.IP \(bu 2 +FeatStructs can be easily frozen, allowing them to be used as +keys in hash tables. Python dictionaries and lists can not. +.IP \(bu 2 +FeatStructs display reentrance in their string representations; +Python dictionaries and lists do not. +.IP \(bu 2 +FeatStructs may \fInot\fP be mixed with Python dictionaries and lists +(e.g., when performing unification). +.IP \(bu 2 +FeatStructs provide a number of useful methods, such as \fBwalk()\fP +and \fBcyclic()\fP, which are not available for Python dicts and lists. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +In general, if your feature structures will contain any reentrances, +or if you plan to use them as dictionary keys, it is strongly +recommended that you use full\-fledged \fBFeatStruct\fP objects. +.INDENT 0.0 +.TP +.B class nltk.featstruct.FeatDict(features=None, **morefeatures) +Bases: \fI\%nltk.featstruct.FeatStruct\fP, \fBdict\fP +.sp +A feature structure that acts like a Python dictionary. I.e., a +mapping from feature identifiers to feature values, where a feature +identifier can be a string or a \fBFeature\fP; and where a feature value +can be either a basic value (such as a string or an integer), or a nested +feature structure. A feature identifiers for a \fBFeatDict\fP is +sometimes called a "feature name". +.sp +Two feature dicts are considered equal if they assign the same +values to all features, and have the same reentrances. +.INDENT 7.0 +.TP +.B See +\fBFeatStruct\fP for information about feature paths, reentrance, +cyclic feature structures, mutability, freezing, and hashing. +.UNINDENT +.INDENT 7.0 +.TP +.B clear() -> None. Remove all items from D. +If self is frozen, raise ValueError. +.UNINDENT +.INDENT 7.0 +.TP +.B get(name_or_path, default=None) +If the feature with the given name or path exists, return its +value; otherwise, return \fBdefault\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B has_key(name_or_path) +Return true if a feature with the given name or path exists. +.UNINDENT +.INDENT 7.0 +.TP +.B pop(k[, d]) -> v, remove specified key and return the corresponding value. +If key is not found, default is returned if given, otherwise KeyError is raised +If self is frozen, raise ValueError. +.UNINDENT +.INDENT 7.0 +.TP +.B popitem(*args, **kwargs) +Remove and return a (key, value) pair as a 2\-tuple. +.sp +Pairs are returned in LIFO (last\-in, first\-out) order. +Raises KeyError if the dict is empty. +If self is frozen, raise ValueError. +.UNINDENT +.INDENT 7.0 +.TP +.B setdefault(*args, **kwargs) +Insert key with a value of default if key is not in the dictionary. +.sp +Return the value for key if key is in the dictionary, else default. +If self is frozen, raise ValueError. +.UNINDENT +.INDENT 7.0 +.TP +.B update([E], **F) -> None. Update D from dict/iterable E and F. +If E is present and has a .keys() method, then does: for k in E: D[k] = E[k] +If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v +In either case, this is followed by: for k in F: D[k] = F[k] +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.featstruct.FeatList(features=None, **morefeatures) +Bases: \fI\%nltk.featstruct.FeatStruct\fP, \fBlist\fP +.sp +A list of feature values, where each feature value is either a +basic value (such as a string or an integer), or a nested feature +structure. +.sp +Feature lists may contain reentrant feature values. A "reentrant +feature value" is a single feature value that can be accessed via +multiple feature paths. Feature lists may also be cyclic. +.sp +Two feature lists are considered equal if they assign the same +values to all features, and have the same reentrances. +.INDENT 7.0 +.TP +.B See +\fBFeatStruct\fP for information about feature paths, reentrance, +cyclic feature structures, mutability, freezing, and hashing. +.UNINDENT +.INDENT 7.0 +.TP +.B append(*args, **kwargs) +Append object to the end of the list. +If self is frozen, raise ValueError. +.UNINDENT +.INDENT 7.0 +.TP +.B extend(*args, **kwargs) +Extend list by appending elements from the iterable. +If self is frozen, raise ValueError. +.UNINDENT +.INDENT 7.0 +.TP +.B insert(*args, **kwargs) +Insert object before index. +If self is frozen, raise ValueError. +.UNINDENT +.INDENT 7.0 +.TP +.B pop(*args, **kwargs) +Remove and return item at index (default last). +.sp +Raises IndexError if list is empty or index is out of range. +If self is frozen, raise ValueError. +.UNINDENT +.INDENT 7.0 +.TP +.B remove(*args, **kwargs) +Remove first occurrence of value. +.sp +Raises ValueError if the value is not present. +If self is frozen, raise ValueError. +.UNINDENT +.INDENT 7.0 +.TP +.B reverse(*args, **kwargs) +Reverse \fIIN PLACE\fP\&. +If self is frozen, raise ValueError. +.UNINDENT +.INDENT 7.0 +.TP +.B sort(*args, **kwargs) +Sort the list in ascending order and return None. +.sp +The sort is in\-place (i.e. the list itself is modified) and stable (i.e. the +order of two equal elements is maintained). +.sp +If a key function is given, apply it once to each list item and sort them, +ascending or descending, according to their function values. +.sp +The reverse flag can be set to sort in descending order. +If self is frozen, raise ValueError. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.featstruct.FeatStruct(features=None, **morefeatures) +Bases: \fBnltk.sem.logic.SubstituteBindingsI\fP +.sp +A mapping from feature identifiers to feature values, where each +feature value is either a basic value (such as a string or an +integer), or a nested feature structure. There are two types of +feature structure: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +feature dictionaries, implemented by \fBFeatDict\fP, act like +Python dictionaries. Feature identifiers may be strings or +instances of the \fBFeature\fP class. +.IP \(bu 2 +feature lists, implemented by \fBFeatList\fP, act like Python +lists. Feature identifiers are integers. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +Feature structures may be indexed using either simple feature +identifiers or \(aqfeature paths.\(aq A feature path is a sequence +of feature identifiers that stand for a corresponding sequence of +indexing operations. In particular, \fBfstruct[(f1,f2,...,fn)]\fP is +equivalent to \fBfstruct[f1][f2]...[fn]\fP\&. +.sp +Feature structures may contain reentrant feature structures. A +"reentrant feature structure" is a single feature structure +object that can be accessed via multiple feature paths. Feature +structures may also be cyclic. A feature structure is "cyclic" +if there is any feature path from the feature structure to itself. +.sp +Two feature structures are considered equal if they assign the +same values to all features, and have the same reentrancies. +.sp +By default, feature structures are mutable. They may be made +immutable with the \fBfreeze()\fP method. Once they have been +frozen, they may be hashed, and thus used as dictionary keys. +.INDENT 7.0 +.TP +.B copy(deep=True) +Return a new copy of \fBself\fP\&. The new copy will not be frozen. +.INDENT 7.0 +.TP +.B Parameters +\fBdeep\fP \-\- If true, create a deep copy; if false, create +a shallow copy. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B cyclic() +Return True if this feature structure contains itself. +.UNINDENT +.INDENT 7.0 +.TP +.B equal_values(other, check_reentrance=False) +Return True if \fBself\fP and \fBother\fP assign the same value to +to every feature. In particular, return true if +\fBself[p]==other[p]\fP for every feature path \fIp\fP such +that \fBself[p]\fP or \fBother[p]\fP is a base value (i.e., +not a nested feature structure). +.INDENT 7.0 +.TP +.B Parameters +\fBcheck_reentrance\fP \-\- If True, then also return False if +there is any difference between the reentrances of \fBself\fP +and \fBother\fP\&. +.TP +.B Note +the \fB==\fP is equivalent to \fBequal_values()\fP with +\fBcheck_reentrance=True\fP\&. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B freeze() +Make this feature structure, and any feature structures it +contains, immutable. Note: this method does not attempt to +\(aqfreeze\(aq any feature value that is not a \fBFeatStruct\fP; it +is recommended that you use only immutable feature values. +.UNINDENT +.INDENT 7.0 +.TP +.B frozen() +Return True if this feature structure is immutable. Feature +structures can be made immutable with the \fBfreeze()\fP method. +Immutable feature structures may not be made mutable again, +but new mutable copies can be produced with the \fBcopy()\fP method. +.UNINDENT +.INDENT 7.0 +.TP +.B remove_variables() +Return the feature structure that is obtained by deleting +any feature whose value is a \fBVariable\fP\&. +.INDENT 7.0 +.TP +.B Return type +FeatStruct +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B rename_variables(vars=None, used_vars=(), new_vars=None) +.INDENT 7.0 +.TP +.B See +\fBnltk.featstruct.rename_variables()\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B retract_bindings(bindings) +.INDENT 7.0 +.TP +.B See +\fBnltk.featstruct.retract_bindings()\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B substitute_bindings(bindings) +.INDENT 7.0 +.TP +.B See +\fBnltk.featstruct.substitute_bindings()\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B subsumes(other) +Return True if \fBself\fP subsumes \fBother\fP\&. I.e., return true +If unifying \fBself\fP with \fBother\fP would result in a feature +structure equal to \fBother\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B unify(other, bindings=None, trace=False, fail=None, rename_vars=True) +.UNINDENT +.INDENT 7.0 +.TP +.B variables() +.INDENT 7.0 +.TP +.B See +\fBnltk.featstruct.find_variables()\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B walk() +Return an iterator that generates this feature structure, and +each feature structure it contains. Each feature structure will +be generated exactly once. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.featstruct.FeatStructReader(features=(*slash*, *type*), fdict_class=, flist_class=, logic_parser=None) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B VALUE_HANDLERS = [(\(aqread_fstruct_value\(aq, re.compile(\(aq\e\es*(?:\e\e((\e\ed+)\e\e)\e\es*)?(\e\e??[\e\ew\-]+)?(\e\e[)\(aq)), (\(aqread_var_value\(aq, re.compile(\(aq\e\e?[a\-zA\-Z_][a\-zA\-Z0\-9_]*\(aq)), (\(aqread_str_value\(aq, re.compile(\(aq[uU]?[rR]?([\e\(aq"])\(aq)), (\(aqread_int_value\(aq, re.compile(\(aq\-?\e\ed+\(aq)), (\(aqread_sym_value\(aq, re.compile(\(aq[a\-zA\-Z_][a\-zA\-Z0\-9_]*\(aq)), (\(aqread_app_value\(aq, re.compile(\(aq<(app)\e\e((\e\e?[a\-z][a\-z]*)\e\es*,\e\es*(\e\e?[a\-z][a\-z]*)\e\e)>\(aq)), (\(aqread_logic_value\(aq, re.compile(\(aq<(.*?)(?\(aq)), (\(aqread_set_value\(aq, re.compile(\(aq{\(aq)), (\(aqread_tuple_value\(aq, re.compile(\(aq\e\e(\(aq))] +A table indicating how feature values should be processed. Each +entry in the table is a pair (handler, regexp). The first entry +with a matching regexp will have its handler called. Handlers +should have the following signature: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +def handler(s, position, reentrances, match): ... +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +and should return a tuple (value, position), where position is +the string position where the value ended. (n.b.: order is +important here!) +.UNINDENT +.INDENT 7.0 +.TP +.B fromstring(s, fstruct=None) +Convert a string representation of a feature structure (as +displayed by repr) into a \fBFeatStruct\fP\&. This process +imposes the following restrictions on the string +representation: +.INDENT 7.0 +.IP \(bu 2 +Feature names cannot contain any of the following: +whitespace, parentheses, quote marks, equals signs, +dashes, commas, and square brackets. Feature names may +not begin with plus signs or minus signs. +.IP \(bu 2 +Only the following basic feature value are supported: +strings, integers, variables, None, and unquoted +alphanumeric strings. +.IP \(bu 2 +For reentrant values, the first mention must specify +a reentrance identifier and a value; and any subsequent +mentions must use arrows (\fB\(aq\->\(aq\fP) to reference the +reentrance identifier. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B read_app_value(s, position, reentrances, match) +Mainly included for backwards compat. +.UNINDENT +.INDENT 7.0 +.TP +.B read_fstruct_value(s, position, reentrances, match) +.UNINDENT +.INDENT 7.0 +.TP +.B read_int_value(s, position, reentrances, match) +.UNINDENT +.INDENT 7.0 +.TP +.B read_logic_value(s, position, reentrances, match) +.UNINDENT +.INDENT 7.0 +.TP +.B read_partial(s, position=0, reentrances=None, fstruct=None) +Helper function that reads in a feature structure. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBs\fP \-\- The string to read. +.IP \(bu 2 +\fBposition\fP \-\- The position in the string to start parsing. +.IP \(bu 2 +\fBreentrances\fP \-\- A dictionary from reentrance ids to values. +Defaults to an empty dictionary. +.UNINDENT +.TP +.B Returns +A tuple (val, pos) of the feature structure created by +parsing and the position where the parsed feature structure ends. +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B read_set_value(s, position, reentrances, match) +.UNINDENT +.INDENT 7.0 +.TP +.B read_str_value(s, position, reentrances, match) +.UNINDENT +.INDENT 7.0 +.TP +.B read_sym_value(s, position, reentrances, match) +.UNINDENT +.INDENT 7.0 +.TP +.B read_tuple_value(s, position, reentrances, match) +.UNINDENT +.INDENT 7.0 +.TP +.B read_value(s, position, reentrances) +.UNINDENT +.INDENT 7.0 +.TP +.B read_var_value(s, position, reentrances, match) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.featstruct.Feature(name, default=None, display=None) +Bases: \fBobject\fP +.sp +A feature identifier that\(aqs specialized to put additional +constraints, default values, etc. +.INDENT 7.0 +.TP +.B property default +Default value for this feature. +.UNINDENT +.INDENT 7.0 +.TP +.B property display +Custom display location: can be prefix, or slash. +.UNINDENT +.INDENT 7.0 +.TP +.B property name +The name of this feature. +.UNINDENT +.INDENT 7.0 +.TP +.B read_value(s, position, reentrances, parser) +.UNINDENT +.INDENT 7.0 +.TP +.B unify_base_values(fval1, fval2, bindings) +If possible, return a single value.. If not, return +the value \fBUnificationFailure\fP\&. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.featstruct.RangeFeature(name, default=None, display=None) +Bases: \fI\%nltk.featstruct.Feature\fP +.INDENT 7.0 +.TP +.B RANGE_RE = re.compile(\(aq(\-?\e\ed+):(\-?\e\ed+)\(aq) +.UNINDENT +.INDENT 7.0 +.TP +.B read_value(s, position, reentrances, parser) +.UNINDENT +.INDENT 7.0 +.TP +.B unify_base_values(fval1, fval2, bindings) +If possible, return a single value.. If not, return +the value \fBUnificationFailure\fP\&. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.featstruct.SlashFeature(name, default=None, display=None) +Bases: \fI\%nltk.featstruct.Feature\fP +.INDENT 7.0 +.TP +.B read_value(s, position, reentrances, parser) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.featstruct.conflicts(fstruct1, fstruct2, trace=0) +Return a list of the feature paths of all features which are +assigned incompatible values by \fBfstruct1\fP and \fBfstruct2\fP\&. +.INDENT 7.0 +.TP +.B Return type +list(tuple) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.featstruct.subsumes(fstruct1, fstruct2) +Return True if \fBfstruct1\fP subsumes \fBfstruct2\fP\&. I.e., return +true if unifying \fBfstruct1\fP with \fBfstruct2\fP would result in a +feature structure equal to \fBfstruct2.\fP +.INDENT 7.0 +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.featstruct.unify(fstruct1, fstruct2, bindings=None, trace=False, fail=None, rename_vars=True, fs_class=\(aqdefault\(aq) +Unify \fBfstruct1\fP with \fBfstruct2\fP, and return the resulting feature +structure. This unified feature structure is the minimal +feature structure that contains all feature value assignments from both +\fBfstruct1\fP and \fBfstruct2\fP, and that preserves all reentrancies. +.sp +If no such feature structure exists (because \fBfstruct1\fP and +\fBfstruct2\fP specify incompatible values for some feature), then +unification fails, and \fBunify\fP returns None. +.sp +Bound variables are replaced by their values. Aliased +variables are replaced by their representative variable +(if unbound) or the value of their representative variable +(if bound). I.e., if variable \fIv\fP is in \fBbindings\fP, +then \fIv\fP is replaced by \fBbindings[v]\fP\&. This will +be repeated until the variable is replaced by an unbound +variable or a non\-variable value. +.sp +Unbound variables are bound when they are unified with +values; and aliased when they are unified with variables. +I.e., if variable \fIv\fP is not in \fBbindings\fP, and is +unified with a variable or value \fIx\fP, then +\fBbindings[v]\fP is set to \fIx\fP\&. +.sp +If \fBbindings\fP is unspecified, then all variables are +assumed to be unbound. I.e., \fBbindings\fP defaults to an +empty dict. +.sp +.nf +.ft C +>>> from nltk.featstruct import FeatStruct +>>> FeatStruct(\(aq[a=?x]\(aq).unify(FeatStruct(\(aq[b=?x]\(aq)) +[a=?x, b=?x2] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBbindings\fP (\fIdict\fP\fI(\fP\fIVariable \-> any\fP\fI)\fP) \-\- A set of variable bindings to be used and +updated during unification. +.IP \(bu 2 +\fBtrace\fP (\fIbool\fP) \-\- If true, generate trace output. +.IP \(bu 2 +\fBrename_vars\fP (\fIbool\fP) \-\- If True, then rename any variables in +\fBfstruct2\fP that are also used in \fBfstruct1\fP, in order to +avoid collisions on variable names. +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.grammar module +.sp +Basic data classes for representing context free grammars. A +"grammar" specifies which trees can represent the structure of a +given text. Each of these trees is called a "parse tree" for the +text (or simply a "parse"). In a "context free" grammar, the set of +parse trees for any piece of a text can depend only on that piece, and +not on the rest of the text (i.e., the piece\(aqs context). Context free +grammars are often used to find possible syntactic structures for +sentences. In this context, the leaves of a parse tree are word +tokens; and the node values are phrasal categories, such as \fBNP\fP +and \fBVP\fP\&. +.sp +The \fBCFG\fP class is used to encode context free grammars. Each +\fBCFG\fP consists of a start symbol and a set of productions. +The "start symbol" specifies the root node value for parse trees. For example, +the start symbol for syntactic parsing is usually \fBS\fP\&. Start +symbols are encoded using the \fBNonterminal\fP class, which is discussed +below. +.sp +A Grammar\(aqs "productions" specify what parent\-child relationships a parse +tree can contain. Each production specifies that a particular +node can be the parent of a particular set of children. For example, +the production \fB \-> \fP specifies that an \fBS\fP node can +be the parent of an \fBNP\fP node and a \fBVP\fP node. +.sp +Grammar productions are implemented by the \fBProduction\fP class. +Each \fBProduction\fP consists of a left hand side and a right hand +side. The "left hand side" is a \fBNonterminal\fP that specifies the +node type for a potential parent; and the "right hand side" is a list +that specifies allowable children for that parent. This lists +consists of \fBNonterminals\fP and text types: each \fBNonterminal\fP +indicates that the corresponding child may be a \fBTreeToken\fP with the +specified node type; and each text type indicates that the +corresponding child may be a \fBToken\fP with the with that type. +.sp +The \fBNonterminal\fP class is used to distinguish node values from leaf +values. This prevents the grammar from accidentally using a leaf +value (such as the English word "A") as the node of a subtree. Within +a \fBCFG\fP, all node values are wrapped in the \fBNonterminal\fP +class. Note, however, that the trees that are specified by the grammar do +\fInot\fP include these \fBNonterminal\fP wrappers. +.sp +Grammars can also be given a more procedural interpretation. According to +this interpretation, a Grammar specifies any tree structure \fItree\fP that +can be produced by the following procedure: +.nf +Set tree to the start symbol +Repeat until tree contains no more nonterminal leaves: +.in +2 +Choose a production prod with whose left hand side +.in +2 +lhs is a nonterminal leaf of tree. +.in -2 +Replace the nonterminal leaf with a subtree, whose node +.in +2 +value is the value wrapped by the nonterminal lhs, and +whose children are the right hand side of prod. +.in -2 +.in -2 +.fi +.sp +.sp +The operation of replacing the left hand side (\fIlhs\fP) of a production +with the right hand side (\fIrhs\fP) in a tree (\fItree\fP) is known as +"expanding" \fIlhs\fP to \fIrhs\fP in \fItree\fP\&. +.INDENT 0.0 +.TP +.B class nltk.grammar.CFG(start, productions, calculate_leftcorners=True) +Bases: \fBobject\fP +.sp +A context\-free grammar. A grammar consists of a start state and +a set of productions. The set of terminals and nonterminals is +implicitly specified by the productions. +.sp +If you need efficient key\-based access to productions, you +can use a subclass to implement it. +.INDENT 7.0 +.TP +.B classmethod binarize(grammar, padding=\(aq@$@\(aq) +Convert all non\-binary rules into binary by introducing +new tokens. +Example:: +Original: +.INDENT 7.0 +.INDENT 3.5 +A => B C D +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B After Conversion: +A => B A@$@B +A@$@B => C D +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B check_coverage(tokens) +Check whether the grammar rules cover the given list of tokens. +If not, then raise an exception. +.INDENT 7.0 +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B chomsky_normal_form(new_token_padding=\(aq@$@\(aq, flexible=False) +Returns a new Grammar that is in chomsky normal +:param: new_token_padding +.INDENT 7.0 +.INDENT 3.5 +Customise new rule formation during binarisation +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod eliminate_start(grammar) +Eliminate start rule in case it appears on RHS +Example: S \-> S0 S1 and S0 \-> S1 S +Then another rule S0_Sigma \-> S is added +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod fromstring(input, encoding=None) +Return the grammar instance corresponding to the input string(s). +.INDENT 7.0 +.TP +.B Parameters +\fBinput\fP \-\- a grammar, either in the form of a string or as a list of strings. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_binarised() +Return True if all productions are at most binary. +Note that there can still be empty and unary productions. +.UNINDENT +.INDENT 7.0 +.TP +.B is_chomsky_normal_form() +Return True if the grammar is of Chomsky Normal Form, i.e. all productions +are of the form A \-> B C, or A \-> "s". +.UNINDENT +.INDENT 7.0 +.TP +.B is_flexible_chomsky_normal_form() +Return True if all productions are of the forms +A \-> B C, A \-> B, or A \-> "s". +.UNINDENT +.INDENT 7.0 +.TP +.B is_leftcorner(cat, left) +True if left is a leftcorner of cat, where left can be a +terminal or a nonterminal. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBcat\fP (\fINonterminal\fP) \-\- the parent of the leftcorner +.IP \(bu 2 +\fBleft\fP (\fITerminal\fP\fI or \fP\fINonterminal\fP) \-\- the suggested leftcorner +.UNINDENT +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_lexical() +Return True if all productions are lexicalised. +.UNINDENT +.INDENT 7.0 +.TP +.B is_nonempty() +Return True if there are no empty productions. +.UNINDENT +.INDENT 7.0 +.TP +.B is_nonlexical() +Return True if all lexical rules are "preterminals", that is, +unary rules which can be separated in a preprocessing step. +.sp +This means that all productions are of the forms +A \-> B1 ... Bn (n>=0), or A \-> "s". +.sp +Note: is_lexical() and is_nonlexical() are not opposites. +There are grammars which are neither, and grammars which are both. +.UNINDENT +.INDENT 7.0 +.TP +.B leftcorner_parents(cat) +Return the set of all nonterminals for which the given category +is a left corner. This is the inverse of the leftcorner relation. +.INDENT 7.0 +.TP +.B Parameters +\fBcat\fP (\fINonterminal\fP) \-\- the suggested leftcorner +.TP +.B Returns +the set of all parents to the leftcorner +.TP +.B Return type +set(Nonterminal) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B leftcorners(cat) +Return the set of all nonterminals that the given nonterminal +can start with, including itself. +.sp +This is the reflexive, transitive closure of the immediate +leftcorner relation: (A > B) iff (A \-> B beta) +.INDENT 7.0 +.TP +.B Parameters +\fBcat\fP (\fINonterminal\fP) \-\- the parent of the leftcorners +.TP +.B Returns +the set of all leftcorners +.TP +.B Return type +set(Nonterminal) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B max_len() +Return the right\-hand side length of the longest grammar production. +.UNINDENT +.INDENT 7.0 +.TP +.B min_len() +Return the right\-hand side length of the shortest grammar production. +.UNINDENT +.INDENT 7.0 +.TP +.B productions(lhs=None, rhs=None, empty=False) +Return the grammar productions, filtered by the left\-hand side +or the first item in the right\-hand side. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBlhs\fP \-\- Only return productions with the given left\-hand side. +.IP \(bu 2 +\fBrhs\fP \-\- Only return productions with the given first item +in the right\-hand side. +.IP \(bu 2 +\fBempty\fP \-\- Only return productions with an empty right\-hand side. +.UNINDENT +.TP +.B Returns +A list of productions matching the given constraints. +.TP +.B Return type +list(Production) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod remove_unitary_rules(grammar) +Remove nonlexical unitary rules and convert them to +lexical +.UNINDENT +.INDENT 7.0 +.TP +.B start() +Return the start symbol of the grammar +.INDENT 7.0 +.TP +.B Return type +Nonterminal +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.grammar.DependencyGrammar(productions) +Bases: \fBobject\fP +.sp +A dependency grammar. A DependencyGrammar consists of a set of +productions. Each production specifies a head/modifier relationship +between a pair of words. +.INDENT 7.0 +.TP +.B contains(head, mod) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBhead\fP (\fIstr\fP) \-\- A head word. +.IP \(bu 2 +\fBmod\fP (\fIstr\fP) \-\- A mod word, to test as a modifier of \(aqhead\(aq. +.UNINDENT +.TP +.B Returns +true if this \fBDependencyGrammar\fP contains a +\fBDependencyProduction\fP mapping \(aqhead\(aq to \(aqmod\(aq. +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod fromstring(input) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.grammar.DependencyProduction(lhs, rhs) +Bases: \fI\%nltk.grammar.Production\fP +.sp +A dependency grammar production. Each production maps a single +head word to an unordered list of one or more modifier words. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.grammar.Nonterminal(symbol) +Bases: \fBobject\fP +.sp +A non\-terminal symbol for a context free grammar. \fBNonterminal\fP +is a wrapper class for node values; it is used by \fBProduction\fP +objects to distinguish node values from leaf values. +The node value that is wrapped by a \fBNonterminal\fP is known as its +"symbol". Symbols are typically strings representing phrasal +categories (such as \fB"NP"\fP or \fB"VP"\fP). However, more complex +symbol types are sometimes used (e.g., for lexicalized grammars). +Since symbols are node values, they must be immutable and +hashable. Two \fBNonterminals\fP are considered equal if their +symbols are equal. +.INDENT 7.0 +.TP +.B See +\fBCFG\fP, \fBProduction\fP +.TP +.B Variables +\fB_symbol\fP \-\- The node value corresponding to this +\fBNonterminal\fP\&. This value must be immutable and hashable. +.UNINDENT +.INDENT 7.0 +.TP +.B symbol() +Return the node value corresponding to this \fBNonterminal\fP\&. +.INDENT 7.0 +.TP +.B Return type +(any) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.grammar.PCFG(start, productions, calculate_leftcorners=True) +Bases: \fI\%nltk.grammar.CFG\fP +.sp +A probabilistic context\-free grammar. A PCFG consists of a +start state and a set of productions with probabilities. The set of +terminals and nonterminals is implicitly specified by the productions. +.sp +PCFG productions use the \fBProbabilisticProduction\fP class. +\fBPCFGs\fP impose the constraint that the set of productions with +any given left\-hand\-side must have probabilities that sum to 1 +(allowing for a small margin of error). +.sp +If you need efficient key\-based access to productions, you can use +a subclass to implement it. +.INDENT 7.0 +.TP +.B Variables +\fBEPSILON\fP \-\- The acceptable margin of error for checking that +productions with a given left\-hand side have probabilities +that sum to 1. +.UNINDENT +.INDENT 7.0 +.TP +.B EPSILON = 0.01 +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod fromstring(input, encoding=None) +Return a probabilistic context\-free grammar corresponding to the +input string(s). +.INDENT 7.0 +.TP +.B Parameters +\fBinput\fP \-\- a grammar, either in the form of a string or else +as a list of strings. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.grammar.ProbabilisticDependencyGrammar(productions, events, tags) +Bases: \fBobject\fP +.INDENT 7.0 +.TP +.B contains(head, mod) +Return True if this \fBDependencyGrammar\fP contains a +\fBDependencyProduction\fP mapping \(aqhead\(aq to \(aqmod\(aq. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBhead\fP (\fIstr\fP) \-\- A head word. +.IP \(bu 2 +\fBmod\fP (\fIstr\fP) \-\- A mod word, to test as a modifier of \(aqhead\(aq. +.UNINDENT +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.grammar.ProbabilisticProduction(lhs, rhs, **prob) +Bases: \fI\%nltk.grammar.Production\fP, \fI\%nltk.probability.ImmutableProbabilisticMixIn\fP +.sp +A probabilistic context free grammar production. +A PCFG \fBProbabilisticProduction\fP is essentially just a \fBProduction\fP that +has an associated probability, which represents how likely it is that +this production will be used. In particular, the probability of a +\fBProbabilisticProduction\fP records the likelihood that its right\-hand side is +the correct instantiation for any given occurrence of its left\-hand side. +.INDENT 7.0 +.TP +.B See +\fBProduction\fP +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.grammar.Production(lhs, rhs) +Bases: \fBobject\fP +.sp +A grammar production. Each production maps a single symbol +on the "left\-hand side" to a sequence of symbols on the +"right\-hand side". (In the case of context\-free productions, +the left\-hand side must be a \fBNonterminal\fP, and the right\-hand +side is a sequence of terminals and \fBNonterminals\fP\&.) +"terminals" can be any immutable hashable object that is +not a \fBNonterminal\fP\&. Typically, terminals are strings +representing words, such as \fB"dog"\fP or \fB"under"\fP\&. +.INDENT 7.0 +.TP +.B See +\fBCFG\fP +.TP +.B See +\fBDependencyGrammar\fP +.TP +.B See +\fBNonterminal\fP +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB_lhs\fP \-\- The left\-hand side of the production. +.IP \(bu 2 +\fB_rhs\fP \-\- The right\-hand side of the production. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_lexical() +Return True if the right\-hand contain at least one terminal token. +.INDENT 7.0 +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B is_nonlexical() +Return True if the right\-hand side only contains \fBNonterminals\fP +.INDENT 7.0 +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B lhs() +Return the left\-hand side of this \fBProduction\fP\&. +.INDENT 7.0 +.TP +.B Return type +Nonterminal +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B rhs() +Return the right\-hand side of this \fBProduction\fP\&. +.INDENT 7.0 +.TP +.B Return type +sequence(Nonterminal and terminal) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.grammar.induce_pcfg(start, productions) +Induce a PCFG grammar from a list of productions. +.sp +The probability of a production A \-> B C in a PCFG is: +.nf +.in +2 +count(A \-> B C) +.in -2 +P(B, C | A) = \-\-\-\-\-\-\-\-\-\-\-\-\-\-\- where * is any right hand side +.in +2 +count(A \-> *) +.in -2 +.fi +.sp +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBstart\fP (\fINonterminal\fP) \-\- The start symbol +.IP \(bu 2 +\fBproductions\fP (\fIlist\fP\fI(\fP\fIProduction\fP\fI)\fP) \-\- The list of productions that defines the grammar +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.grammar.nonterminals(symbols) +Given a string containing a list of symbol names, return a list of +\fBNonterminals\fP constructed from those symbols. +.INDENT 7.0 +.TP +.B Parameters +\fBsymbols\fP (\fIstr\fP) \-\- The symbol name string. This string can be +delimited by either spaces or commas. +.TP +.B Returns +A list of \fBNonterminals\fP constructed from the symbol +names given in \fBsymbols\fP\&. The \fBNonterminals\fP are sorted +in the same order as the symbols names. +.TP +.B Return type +list(Nonterminal) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.grammar.read_grammar(input, nonterm_parser, probabilistic=False, encoding=None) +Return a pair consisting of a starting category and a list of +\fBProductions\fP\&. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBinput\fP \-\- a grammar, either in the form of a string or else +as a list of strings. +.IP \(bu 2 +\fBnonterm_parser\fP \-\- a function for parsing nonterminals. +It should take a \fB(string, position)\fP as argument and +return a \fB(nonterminal, position)\fP as result. +.IP \(bu 2 +\fBprobabilistic\fP (\fIbool\fP) \-\- are the grammar rules probabilistic? +.IP \(bu 2 +\fBencoding\fP (\fIstr\fP) \-\- the encoding of the grammar, if it is a binary string +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.help module +.sp +Provide structured access to documentation. +.INDENT 0.0 +.TP +.B nltk.help.brown_tagset(tagpattern=None) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.help.claws5_tagset(tagpattern=None) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.help.upenn_tagset(tagpattern=None) +.UNINDENT +.SS nltk.internals module +.INDENT 0.0 +.TP +.B class nltk.internals.Counter(initial_value=0) +Bases: \fBobject\fP +.sp +A counter that auto\-increments each time its value is read. +.INDENT 7.0 +.TP +.B get() +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.internals.Deprecated(*args, **kwargs) +Bases: \fBobject\fP +.sp +A base class used to mark deprecated classes. A typical usage is to +alert users that the name of a class has changed: +.sp +.nf +.ft C +>>> from nltk.internals import Deprecated +>>> class NewClassName: +\&... pass # All logic goes here. +\&... +>>> class OldClassName(Deprecated, NewClassName): +\&... "Use NewClassName instead." +.ft P +.fi +.sp +The docstring of the deprecated class will be used in the +deprecation warning message. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.internals.ElementWrapper(etree) +Bases: \fBobject\fP +.sp +A wrapper around ElementTree Element objects whose main purpose is +to provide nicer __repr__ and __str__ methods. In addition, any +of the wrapped Element\(aqs methods that return other Element objects +are overridden to wrap those values before returning them. +.sp +This makes Elements more convenient to work with in +interactive sessions and doctests, at the expense of some +efficiency. +.INDENT 7.0 +.TP +.B find(path) +.UNINDENT +.INDENT 7.0 +.TP +.B findall(path) +.UNINDENT +.INDENT 7.0 +.TP +.B getchildren() +.UNINDENT +.INDENT 7.0 +.TP +.B getiterator(tag=None) +.UNINDENT +.INDENT 7.0 +.TP +.B makeelement(tag, attrib) +.UNINDENT +.INDENT 7.0 +.TP +.B unwrap() +Return the Element object wrapped by this wrapper. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B exception nltk.internals.ReadError(expected, position) +Bases: \fBValueError\fP +.sp +Exception raised by read_* functions when they fail. +:param position: The index in the input string where an error occurred. +:param expected: What was expected when an error occurred. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.config_java(bin=None, options=None, verbose=False) +Configure nltk\(aqs java interface, by letting nltk know where it can +find the Java binary, and what extra options (if any) should be +passed to Java when it is run. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBbin\fP (\fIstr\fP) \-\- The full path to the Java binary. If not specified, +then nltk will search the system for a Java binary; and if +one is not found, it will raise a \fBLookupError\fP exception. +.IP \(bu 2 +\fBoptions\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- A list of options that should be passed to the +Java binary when it is called. A common value is +\fB\(aq\-Xmx512m\(aq\fP, which tells Java binary to increase +the maximum heap size to 512 megabytes. If no options are +specified, then do not modify the options list. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.deprecated(message) +A decorator used to mark functions as deprecated. This will cause +a warning to be printed the when the function is used. Usage: +.sp +.nf +.ft C +>>> from nltk.internals import deprecated +>>> @deprecated(\(aqUse foo() instead\(aq) +\&... def bar(x): +\&... print(x/10) +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.find_binary(name, path_to_bin=None, env_vars=(), searchpath=(), binary_names=None, url=None, verbose=False) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.find_binary_iter(name, path_to_bin=None, env_vars=(), searchpath=(), binary_names=None, url=None, verbose=False) +Search for a file to be used by nltk. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBname\fP \-\- The name or path of the file. +.IP \(bu 2 +\fBpath_to_bin\fP \-\- The user\-supplied binary location (deprecated) +.IP \(bu 2 +\fBenv_vars\fP \-\- A list of environment variable names to check. +.IP \(bu 2 +\fBfile_names\fP \-\- A list of alternative file names to check. +.IP \(bu 2 +\fBsearchpath\fP \-\- List of directories to search. +.IP \(bu 2 +\fBurl\fP \-\- URL presented to user for download help. +.IP \(bu 2 +\fBverbose\fP \-\- Whether or not to print path when a file is found. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.find_dir(filename, env_vars=(), searchpath=(), file_names=None, url=None, verbose=False) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.find_file(filename, env_vars=(), searchpath=(), file_names=None, url=None, verbose=False) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.find_file_iter(filename, env_vars=(), searchpath=(), file_names=None, url=None, verbose=False, finding_dir=False) +Search for a file to be used by nltk. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfilename\fP \-\- The name or path of the file. +.IP \(bu 2 +\fBenv_vars\fP \-\- A list of environment variable names to check. +.IP \(bu 2 +\fBfile_names\fP \-\- A list of alternative file names to check. +.IP \(bu 2 +\fBsearchpath\fP \-\- List of directories to search. +.IP \(bu 2 +\fBurl\fP \-\- URL presented to user for download help. +.IP \(bu 2 +\fBverbose\fP \-\- Whether or not to print path when a file is found. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.find_jar(name_pattern, path_to_jar=None, env_vars=(), searchpath=(), url=None, verbose=False, is_regex=False) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.find_jar_iter(name_pattern, path_to_jar=None, env_vars=(), searchpath=(), url=None, verbose=False, is_regex=False) +Search for a jar that is used by nltk. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBname_pattern\fP \-\- The name of the jar file +.IP \(bu 2 +\fBpath_to_jar\fP \-\- The user\-supplied jar location, or None. +.IP \(bu 2 +\fBenv_vars\fP \-\- A list of environment variable names to check +in addition to the CLASSPATH variable which is +checked by default. +.IP \(bu 2 +\fBsearchpath\fP \-\- List of directories to search. +.IP \(bu 2 +\fBis_regex\fP \-\- Whether name is a regular expression. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.find_jars_within_path(path_to_jars) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.import_from_stdlib(module) +When python is run from within the nltk/ directory tree, the +current directory is included at the beginning of the search path. +Unfortunately, that means that modules within nltk can sometimes +shadow standard library modules. As an example, the stdlib +\(aqinspect\(aq module will attempt to import the stdlib \(aqtokenize\(aq +module, but will instead end up importing NLTK\(aqs \(aqtokenize\(aq module +instead (causing the import to fail). +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.is_writable(path) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=True) +Execute the given java command, by opening a subprocess that calls +Java. If java has not yet been configured, it will be configured +by calling \fBconfig_java()\fP with no arguments. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBcmd\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The java command that should be called, formatted as +a list of strings. Typically, the first string will be the name +of the java class; and the remaining strings will be arguments +for that java class. +.IP \(bu 2 +\fBclasspath\fP (\fIstr\fP) \-\- A \fB\(aq:\(aq\fP separated list of directories, JAR +archives, and ZIP archives to search for class files. +.IP \(bu 2 +\fBstderr\fP (\fIstdin\fP\fI, \fP\fIstdout\fP\fI,\fP) \-\- Specify the executed programs\(aq +standard input, standard output and standard error file +handles, respectively. Valid values are \fBsubprocess.PIPE\fP, +an existing file descriptor (a positive integer), an existing +file object, \(aqpipe\(aq, \(aqstdout\(aq, \(aqdevnull\(aq and None. \fBsubprocess.PIPE\fP indicates that a +new pipe to the child should be created. With None, no +redirection will occur; the child\(aqs file handles will be +inherited from the parent. Additionally, stderr can be +\fBsubprocess.STDOUT\fP, which indicates that the stderr data +from the applications should be captured into the same file +handle as for stdout. +.IP \(bu 2 +\fBblocking\fP \-\- If \fBfalse\fP, then return immediately after +spawning the subprocess. In this case, the return value is +the \fBPopen\fP object, and not a \fB(stdout, stderr)\fP tuple. +.UNINDENT +.TP +.B Returns +If \fBblocking=True\fP, then return a tuple \fB(stdout, +stderr)\fP, containing the stdout and stderr outputs generated +by the java command if the \fBstdout\fP and \fBstderr\fP parameters +were set to \fBsubprocess.PIPE\fP; or None otherwise. If +\fBblocking=False\fP, then return a \fBsubprocess.Popen\fP object. +.TP +.B Raises +\fBOSError\fP \-\- If the java command returns a nonzero return code. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.overridden(method) +.INDENT 7.0 +.TP +.B Returns +True if \fBmethod\fP overrides some method with the same +.UNINDENT +.sp +name in a base class. This is typically used when defining +abstract base classes or interfaces, to allow subclasses to define +either of two related methods: +.sp +.nf +.ft C +>>> class EaterI: +\&... \(aq\(aq\(aqSubclass must define eat() or batch_eat().\(aq\(aq\(aq +\&... def eat(self, food): +\&... if overridden(self.batch_eat): +\&... return self.batch_eat([food])[0] +\&... else: +\&... raise NotImplementedError() +\&... def batch_eat(self, foods): +\&... return [self.eat(food) for food in foods] +.ft P +.fi +.INDENT 7.0 +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.raise_unorderable_types(ordering, a, b) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.read_int(s, start_position) +If an integer begins at the specified position in the given +string, then return a tuple \fB(val, end_position)\fP containing the +value of the integer and the position where it ends. Otherwise, +raise a \fBReadError\fP\&. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBs\fP (\fIstr\fP) \-\- A string that will be checked to see if within which a +Python integer exists. +.IP \(bu 2 +\fBstart_position\fP (\fIint\fP) \-\- The specified beginning position of the string \fBs\fP +to begin regex matching. +.UNINDENT +.TP +.B Returns +A tuple containing the matched integer casted to an int, +and the end position of the int in \fBs\fP\&. +.TP +.B Return type +tuple(int, int) +.TP +.B Raises +\fBReadError\fP \-\- If the \fB_READ_INT_RE\fP regex doesn\(aqt return a +match in \fBs\fP at \fBstart_position\fP\&. +.TP +.B Example +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.internals import read_int +>>> read_int(\(aq42 is the answer\(aq, 0) +(42, 2) +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.read_number(s, start_position) +If an integer or float begins at the specified position in the +given string, then return a tuple \fB(val, end_position)\fP +containing the value of the number and the position where it ends. +Otherwise, raise a \fBReadError\fP\&. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBs\fP (\fIstr\fP) \-\- A string that will be checked to see if within which a +Python number exists. +.IP \(bu 2 +\fBstart_position\fP (\fIint\fP) \-\- The specified beginning position of the string \fBs\fP +to begin regex matching. +.UNINDENT +.TP +.B Returns +A tuple containing the matched number casted to a \fBfloat\fP, +and the end position of the number in \fBs\fP\&. +.TP +.B Return type +tuple(float, int) +.TP +.B Raises +\fBReadError\fP \-\- If the \fB_READ_NUMBER_VALUE\fP regex doesn\(aqt return a +match in \fBs\fP at \fBstart_position\fP\&. +.TP +.B Example +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.internals import read_number +>>> read_number(\(aqPi is 3.14159\(aq, 6) +(3.14159, 13) +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.read_str(s, start_position) +If a Python string literal begins at the specified position in the +given string, then return a tuple \fB(val, end_position)\fP +containing the value of the string literal and the position where +it ends. Otherwise, raise a \fBReadError\fP\&. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBs\fP (\fIstr\fP) \-\- A string that will be checked to see if within which a +Python string literal exists. +.IP \(bu 2 +\fBstart_position\fP (\fIint\fP) \-\- The specified beginning position of the string \fBs\fP +to begin regex matching. +.UNINDENT +.TP +.B Returns +A tuple containing the matched string literal evaluated as a +string and the end position of the string literal. +.TP +.B Return type +tuple(str, int) +.TP +.B Raises +.INDENT 7.0 +.IP \(bu 2 +\fBReadError\fP \-\- If the \fB_STRING_START_RE\fP regex doesn\(aqt return a +match in \fBs\fP at \fBstart_position\fP, i.e., open quote. If the +\fB_STRING_END_RE\fP regex doesn\(aqt return a match in \fBs\fP at the +end of the first match, i.e., close quote. +.IP \(bu 2 +\fBValueError\fP \-\- If an invalid string (i.e., contains an invalid +escape sequence) is passed into the \fBeval\fP\&. +.UNINDENT +.TP +.B Example +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.internals import read_str +>>> read_str(\(aq"Hello", World!\(aq, 0) +(\(aqHello\(aq, 7) +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.internals.slice_bounds(sequence, slice_obj, allow_step=False) +Given a slice, return the corresponding (start, stop) bounds, +taking into account None indices and negative indices. The +following guarantees are made for the returned start and stop values: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +0 <= start <= len(sequence) +.IP \(bu 2 +0 <= stop <= len(sequence) +.IP \(bu 2 +start <= stop +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Raises +\fBValueError\fP \-\- If \fBslice_obj.step\fP is not None. +.TP +.B Parameters +\fBallow_step\fP \-\- If true, then the slice object may have a +non\-None step. If it does, then return a tuple +(start, stop, step). +.UNINDENT +.UNINDENT +.SS nltk.jsontags module +.sp +Register JSON tags, so the nltk data loader knows what module and class to look for. +.sp +NLTK uses simple \(aq!\(aq tags to mark the types of objects, but the fully\-qualified +"\fI\%tag:nltk.org,2011\fP:" prefix is also accepted in case anyone ends up +using it. +.INDENT 0.0 +.TP +.B class nltk.jsontags.JSONTaggedDecoder(*, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, strict=True, object_pairs_hook=None) +Bases: \fBjson.decoder.JSONDecoder\fP +.INDENT 7.0 +.TP +.B decode(s) +Return the Python representation of \fBs\fP (a \fBstr\fP instance +containing a JSON document). +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod decode_obj(obj) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.jsontags.JSONTaggedEncoder(*, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None) +Bases: \fBjson.encoder.JSONEncoder\fP +.INDENT 7.0 +.TP +.B default(obj) +Implement this method in a subclass such that it returns +a serializable object for \fBo\fP, or calls the base implementation +(to raise a \fBTypeError\fP). +.sp +For example, to support arbitrary iterators, you could +implement default like this: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + # Let the base class default method raise the TypeError + return JSONEncoder.default(self, o) +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.jsontags.register_tag(cls) +Decorates a class to register it\(aqs json tag. +.UNINDENT +.SS nltk.lazyimport module +.sp +Helper to enable simple lazy module import. +.sp +\(aqLazy\(aq means the actual import is deferred until an attribute is +requested from the module\(aqs namespace. This has the advantage of +allowing all imports to be done at the top of a script (in a +prominent and visible place) without having a great impact +on startup time. +.sp +Copyright (c) 1999\-2005, Marc\-Andre Lemburg; \fI\%mailto:mal@lemburg.com\fP +See the documentation for further information on copyrights, +or contact the author. All Rights Reserved. +.INDENT 0.0 +.TP +.B class nltk.lazyimport.LazyModule(name, locals, globals=None) +Bases: \fBobject\fP +.sp +Lazy module class. +.sp +Lazy modules are imported into the given namespaces whenever a +non\-special attribute (there are some attributes like __doc__ +that class instances handle without calling __getattr__) is +requested. The module is then registered under the given name +in locals usually replacing the import wrapper instance. The +import itself is done using globals as global namespace. +.sp +Example of creating a lazy load module: +.sp +ISO = LazyModule(\(aqISO\(aq,locals(),globals()) +.sp +Later, requesting an attribute from ISO will load the module +automatically into the locals() namespace, overriding the +LazyModule instance: +.sp +t = ISO.Week(1998,1,1) +.UNINDENT +.SS nltk.probability module +.sp +Classes for representing and processing probabilistic information. +.sp +The \fBFreqDist\fP class is used to encode "frequency distributions", +which count the number of times that each outcome of an experiment +occurs. +.sp +The \fBProbDistI\fP class defines a standard interface for "probability +distributions", which encode the probability of each outcome for an +experiment. There are two types of probability distribution: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +"derived probability distributions" are created from frequency +distributions. They attempt to model the probability distribution +that generated the frequency distribution. +.IP \(bu 2 +"analytic probability distributions" are created directly from +parameters (such as variance). +.UNINDENT +.UNINDENT +.UNINDENT +.sp +The \fBConditionalFreqDist\fP class and \fBConditionalProbDistI\fP interface +are used to encode conditional distributions. Conditional probability +distributions can be derived or analytic; but currently the only +implementation of the \fBConditionalProbDistI\fP interface is +\fBConditionalProbDist\fP, a derived distribution. +.INDENT 0.0 +.TP +.B class nltk.probability.ConditionalFreqDist(cond_samples=None) +Bases: \fBcollections.defaultdict\fP +.sp +A collection of frequency distributions for a single experiment +run under different conditions. Conditional frequency +distributions are used to record the number of times each sample +occurred, given the condition under which the experiment was run. +For example, a conditional frequency distribution could be used to +record the frequency of each word (type) in a document, given its +length. Formally, a conditional frequency distribution can be +defined as a function that maps from each condition to the +FreqDist for the experiment under that condition. +.sp +Conditional frequency distributions are typically constructed by +repeatedly running an experiment under a variety of conditions, +and incrementing the sample outcome counts for the appropriate +conditions. For example, the following code will produce a +conditional frequency distribution that encodes how often each +word type occurs, given the length of that word type: +.sp +.nf +.ft C +>>> from nltk.probability import ConditionalFreqDist +>>> from nltk.tokenize import word_tokenize +>>> sent = "the the the dog dog some other words that we do not care about" +>>> cfdist = ConditionalFreqDist() +>>> for word in word_tokenize(sent): +\&... condition = len(word) +\&... cfdist[condition][word] += 1 +.ft P +.fi +.sp +An equivalent way to do this is with the initializer: +.sp +.nf +.ft C +>>> cfdist = ConditionalFreqDist((len(word), word) for word in word_tokenize(sent)) +.ft P +.fi +.sp +The frequency distribution for each condition is accessed using +the indexing operator: +.sp +.nf +.ft C +>>> cfdist[3] +FreqDist({\(aqthe\(aq: 3, \(aqdog\(aq: 2, \(aqnot\(aq: 1}) +>>> cfdist[3].freq(\(aqthe\(aq) +0.5 +>>> cfdist[3][\(aqdog\(aq] +2 +.ft P +.fi +.sp +When the indexing operator is used to access the frequency +distribution for a condition that has not been accessed before, +\fBConditionalFreqDist\fP creates a new empty FreqDist for that +condition. +.INDENT 7.0 +.TP +.B N() +Return the total number of sample outcomes that have been +recorded by this \fBConditionalFreqDist\fP\&. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B conditions() +Return a list of the conditions that have been accessed for +this \fBConditionalFreqDist\fP\&. Use the indexing operator to +access the frequency distribution for a given condition. +Note that the frequency distributions for some conditions +may contain zero sample outcomes. +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B plot(*args, samples=None, title=\(aq\(aq, cumulative=False, percents=False, conditions=None, show=True, **kwargs) +Plot the given samples from the conditional frequency distribution. +For a cumulative plot, specify cumulative=True. Additional +.nf +* +.fi +args and + +.nf +** +.fi +kwargs are passed to matplotlib\(aqs plot function. +(Requires Matplotlib to be installed.) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsamples\fP (\fIlist\fP) \-\- The samples to plot +.IP \(bu 2 +\fBtitle\fP (\fIstr\fP) \-\- The title for the graph +.IP \(bu 2 +\fBcumulative\fP (\fIbool\fP) \-\- Whether the plot is cumulative. (default = False) +.IP \(bu 2 +\fBpercents\fP (\fIbool\fP) \-\- Whether the plot uses percents instead of counts. (default = False) +.IP \(bu 2 +\fBconditions\fP (\fIlist\fP) \-\- The conditions to plot (default is all) +.IP \(bu 2 +\fBshow\fP (\fIbool\fP) \-\- Whether to show the plot, or only return the ax. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B tabulate(*args, **kwargs) +Tabulate the given samples from the conditional frequency distribution. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsamples\fP (\fIlist\fP) \-\- The samples to plot +.IP \(bu 2 +\fBconditions\fP (\fIlist\fP) \-\- The conditions to plot (default is all) +.IP \(bu 2 +\fBcumulative\fP \-\- A flag to specify whether the freqs are cumulative (default = False) +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.ConditionalProbDist(cfdist, probdist_factory, *factory_args, **factory_kw_args) +Bases: \fI\%nltk.probability.ConditionalProbDistI\fP +.sp +A conditional probability distribution modeling the experiments +that were used to generate a conditional frequency distribution. +A ConditionalProbDist is constructed from a +\fBConditionalFreqDist\fP and a \fBProbDist\fP factory: +.INDENT 7.0 +.IP \(bu 2 +The \fBConditionalFreqDist\fP specifies the frequency +distribution for each condition. +.IP \(bu 2 +The \fBProbDist\fP factory is a function that takes a +condition\(aqs frequency distribution, and returns its +probability distribution. A \fBProbDist\fP class\(aqs name (such as +\fBMLEProbDist\fP or \fBHeldoutProbDist\fP) can be used to specify +that class\(aqs constructor. +.UNINDENT +.sp +The first argument to the \fBProbDist\fP factory is the frequency +distribution that it should model; and the remaining arguments are +specified by the \fBfactory_args\fP parameter to the +\fBConditionalProbDist\fP constructor. For example, the following +code constructs a \fBConditionalProbDist\fP, where the probability +distribution for each condition is an \fBELEProbDist\fP with 10 bins: +.sp +.nf +.ft C +>>> from nltk.corpus import brown +>>> from nltk.probability import ConditionalFreqDist +>>> from nltk.probability import ConditionalProbDist, ELEProbDist +>>> cfdist = ConditionalFreqDist(brown.tagged_words()[:5000]) +>>> cpdist = ConditionalProbDist(cfdist, ELEProbDist, 10) +>>> cpdist[\(aqpassed\(aq].max() +\(aqVBD\(aq +>>> cpdist[\(aqpassed\(aq].prob(\(aqVBD\(aq) +0.423... +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.ConditionalProbDistI +Bases: \fBdict\fP +.sp +A collection of probability distributions for a single experiment +run under different conditions. Conditional probability +distributions are used to estimate the likelihood of each sample, +given the condition under which the experiment was run. For +example, a conditional probability distribution could be used to +estimate the probability of each word type in a document, given +the length of the word type. Formally, a conditional probability +distribution can be defined as a function that maps from each +condition to the \fBProbDist\fP for the experiment under that +condition. +.INDENT 7.0 +.TP +.B conditions() +Return a list of the conditions that are represented by +this \fBConditionalProbDist\fP\&. Use the indexing operator to +access the probability distribution for a given condition. +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.CrossValidationProbDist(freqdists, bins) +Bases: \fI\%nltk.probability.ProbDistI\fP +.sp +The cross\-validation estimate for the probability distribution of +the experiment used to generate a set of frequency distribution. +The "cross\-validation estimate" for the probability of a sample +is found by averaging the held\-out estimates for the sample in +each pair of frequency distributions. +.INDENT 7.0 +.TP +.B SUM_TO_ONE = False +True if the probabilities of the samples in this probability +distribution will always sum to one. +.UNINDENT +.INDENT 7.0 +.TP +.B discount() +Return the ratio by which counts are discounted on average: c*/c +.INDENT 7.0 +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B freqdists() +Return the list of frequency distributions that this \fBProbDist\fP is based on. +.INDENT 7.0 +.TP +.B Return type +list(FreqDist) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob(sample) +Return the probability for a given sample. Probabilities +are always real numbers in the range [0, 1]. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIany\fP) \-\- The sample whose probability +should be returned. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B samples() +Return a list of all samples that have nonzero probabilities. +Use \fBprob\fP to find the probability of each sample. +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.DictionaryConditionalProbDist(probdist_dict) +Bases: \fI\%nltk.probability.ConditionalProbDistI\fP +.sp +An alternative ConditionalProbDist that simply wraps a dictionary of +ProbDists rather than creating these from FreqDists. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.DictionaryProbDist(prob_dict=None, log=False, normalize=False) +Bases: \fI\%nltk.probability.ProbDistI\fP +.sp +A probability distribution whose probabilities are directly +specified by a given dictionary. The given dictionary maps +samples to probabilities. +.INDENT 7.0 +.TP +.B logprob(sample) +Return the base 2 logarithm of the probability for a given sample. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIany\fP) \-\- The sample whose probability +should be returned. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B max() +Return the sample with the greatest probability. If two or +more samples have the same probability, return one of them; +which sample is returned is undefined. +.INDENT 7.0 +.TP +.B Return type +any +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob(sample) +Return the probability for a given sample. Probabilities +are always real numbers in the range [0, 1]. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIany\fP) \-\- The sample whose probability +should be returned. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B samples() +Return a list of all samples that have nonzero probabilities. +Use \fBprob\fP to find the probability of each sample. +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.ELEProbDist(freqdist, bins=None) +Bases: \fI\%nltk.probability.LidstoneProbDist\fP +.sp +The expected likelihood estimate for the probability distribution +of the experiment used to generate a frequency distribution. The +"expected likelihood estimate" approximates the probability of a +sample with count \fIc\fP from an experiment with \fIN\fP outcomes and +\fIB\fP bins as \fI(c+0.5)/(N+B/2)\fP\&. This is equivalent to adding 0.5 +to the count for each bin, and taking the maximum likelihood +estimate of the resulting frequency distribution. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.FreqDist(samples=None) +Bases: \fBcollections.Counter\fP +.sp +A frequency distribution for the outcomes of an experiment. A +frequency distribution records the number of times each outcome of +an experiment has occurred. For example, a frequency distribution +could be used to record the frequency of each word type in a +document. Formally, a frequency distribution can be defined as a +function mapping from each sample to the number of times that +sample occurred as an outcome. +.sp +Frequency distributions are generally constructed by running a +number of experiments, and incrementing the count for a sample +every time it is an outcome of an experiment. For example, the +following code will produce a frequency distribution that encodes +how often each word occurs in a text: +.sp +.nf +.ft C +>>> from nltk.tokenize import word_tokenize +>>> from nltk.probability import FreqDist +>>> sent = \(aqThis is an example sentence\(aq +>>> fdist = FreqDist() +>>> for word in word_tokenize(sent): +\&... fdist[word.lower()] += 1 +.ft P +.fi +.sp +An equivalent way to do this is with the initializer: +.sp +.nf +.ft C +>>> fdist = FreqDist(word.lower() for word in word_tokenize(sent)) +.ft P +.fi +.INDENT 7.0 +.TP +.B B() +Return the total number of sample values (or "bins") that +have counts greater than zero. For the total +number of sample outcomes recorded, use \fBFreqDist.N()\fP\&. +(FreqDist.B() is the same as len(FreqDist).) +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B N() +Return the total number of sample outcomes that have been +recorded by this FreqDist. For the number of unique +sample values (or bins) with counts greater than zero, use +\fBFreqDist.B()\fP\&. +.INDENT 7.0 +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Nr(r, bins=None) +.UNINDENT +.INDENT 7.0 +.TP +.B copy() +Create a copy of this frequency distribution. +.INDENT 7.0 +.TP +.B Return type +FreqDist +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B freq(sample) +Return the frequency of a given sample. The frequency of a +sample is defined as the count of that sample divided by the +total number of sample outcomes that have been recorded by +this FreqDist. The count of a sample is defined as the +number of times that sample outcome was recorded by this +FreqDist. Frequencies are always real numbers in the range +[0, 1]. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIany\fP) \-\- the sample whose frequency +should be returned. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B hapaxes() +Return a list of all samples that occur once (hapax legomena) +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B max() +Return the sample with the greatest number of outcomes in this +frequency distribution. If two or more samples have the same +number of outcomes, return one of them; which sample is +returned is undefined. If no outcomes have occurred in this +frequency distribution, return None. +.INDENT 7.0 +.TP +.B Returns +The sample with the maximum number of outcomes in this +frequency distribution. +.TP +.B Return type +any or None +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B pformat(maxlen=10) +Return a string representation of this FreqDist. +.INDENT 7.0 +.TP +.B Parameters +\fBmaxlen\fP (\fIint\fP) \-\- The maximum number of items to display +.TP +.B Return type +string +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B plot(*args, title=\(aq\(aq, cumulative=False, percents=False, show=True, **kwargs) +Plot samples from the frequency distribution +displaying the most frequent sample first. If an integer +parameter is supplied, stop after this many samples have been +plotted. For a cumulative plot, specify cumulative=True. Additional + +.nf +** +.fi +kwargs are passed to matplotlib\(aqs plot function. +(Requires Matplotlib to be installed.) +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtitle\fP (\fIstr\fP) \-\- The title for the graph. +.IP \(bu 2 +\fBcumulative\fP (\fIbool\fP) \-\- Whether the plot is cumulative. (default = False) +.IP \(bu 2 +\fBpercents\fP (\fIbool\fP) \-\- Whether the plot uses percents instead of counts. (default = False) +.IP \(bu 2 +\fBshow\fP (\fIbool\fP) \-\- Whether to show the plot, or only return the ax. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B pprint(maxlen=10, stream=None) +Print a string representation of this FreqDist to \(aqstream\(aq +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBmaxlen\fP (\fIint\fP) \-\- The maximum number of items to print +.IP \(bu 2 +\fBstream\fP \-\- The stream to print to. stdout by default +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B r_Nr(bins=None) +Return the dictionary mapping r to Nr, the number of samples with frequency r, where Nr > 0. +.INDENT 7.0 +.TP +.B Parameters +\fBbins\fP (\fIint\fP) \-\- The number of possible sample outcomes. \fBbins\fP +is used to calculate Nr(0). In particular, Nr(0) is +\fBbins\-self.B()\fP\&. If \fBbins\fP is not specified, it +defaults to \fBself.B()\fP (so Nr(0) will be 0). +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B setdefault(key, val) +Override \fBCounter.setdefault()\fP to invalidate the cached N +.UNINDENT +.INDENT 7.0 +.TP +.B tabulate(*args, **kwargs) +Tabulate the given samples from the frequency distribution (cumulative), +displaying the most frequent sample first. If an integer +parameter is supplied, stop after this many samples have been +plotted. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsamples\fP (\fIlist\fP) \-\- The samples to plot (default is all samples) +.IP \(bu 2 +\fBcumulative\fP \-\- A flag to specify whether the freqs are cumulative (default = False) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B update(*args, **kwargs) +Override \fBCounter.update()\fP to invalidate the cached N +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.HeldoutProbDist(base_fdist, heldout_fdist, bins=None) +Bases: \fI\%nltk.probability.ProbDistI\fP +.sp +The heldout estimate for the probability distribution of the +experiment used to generate two frequency distributions. These +two frequency distributions are called the "heldout frequency +distribution" and the "base frequency distribution." The +"heldout estimate" uses uses the "heldout frequency +distribution" to predict the probability of each sample, given its +frequency in the "base frequency distribution". +.sp +In particular, the heldout estimate approximates the probability +for a sample that occurs \fIr\fP times in the base distribution as +the average frequency in the heldout distribution of all samples +that occur \fIr\fP times in the base distribution. +.sp +This average frequency is \fITr[r]/(Nr[r].N)\fP, where: +.INDENT 7.0 +.IP \(bu 2 +\fITr[r]\fP is the total count in the heldout distribution for +all samples that occur \fIr\fP times in the base distribution. +.IP \(bu 2 +\fINr[r]\fP is the number of samples that occur \fIr\fP times in +the base distribution. +.IP \(bu 2 +\fIN\fP is the number of outcomes recorded by the heldout +frequency distribution. +.UNINDENT +.sp +In order to increase the efficiency of the \fBprob\fP member +function, \fITr[r]/(Nr[r].N)\fP is precomputed for each value of \fIr\fP +when the \fBHeldoutProbDist\fP is created. +.INDENT 7.0 +.TP +.B Variables +.INDENT 7.0 +.IP \(bu 2 +\fB_estimate\fP \-\- A list mapping from \fIr\fP, the number of +times that a sample occurs in the base distribution, to the +probability estimate for that sample. \fB_estimate[r]\fP is +calculated by finding the average frequency in the heldout +distribution of all samples that occur \fIr\fP times in the base +distribution. In particular, \fB_estimate[r]\fP = +\fITr[r]/(Nr[r].N)\fP\&. +.IP \(bu 2 +\fB_max_r\fP \-\- The maximum number of times that any sample occurs +in the base distribution. \fB_max_r\fP is used to decide how +large \fB_estimate\fP must be. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B SUM_TO_ONE = False +True if the probabilities of the samples in this probability +distribution will always sum to one. +.UNINDENT +.INDENT 7.0 +.TP +.B base_fdist() +Return the base frequency distribution that this probability +distribution is based on. +.INDENT 7.0 +.TP +.B Return type +FreqDist +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B discount() +Return the ratio by which counts are discounted on average: c*/c +.INDENT 7.0 +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B heldout_fdist() +Return the heldout frequency distribution that this +probability distribution is based on. +.INDENT 7.0 +.TP +.B Return type +FreqDist +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B max() +Return the sample with the greatest probability. If two or +more samples have the same probability, return one of them; +which sample is returned is undefined. +.INDENT 7.0 +.TP +.B Return type +any +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob(sample) +Return the probability for a given sample. Probabilities +are always real numbers in the range [0, 1]. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIany\fP) \-\- The sample whose probability +should be returned. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B samples() +Return a list of all samples that have nonzero probabilities. +Use \fBprob\fP to find the probability of each sample. +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.ImmutableProbabilisticMixIn(**kwargs) +Bases: \fI\%nltk.probability.ProbabilisticMixIn\fP +.INDENT 7.0 +.TP +.B set_logprob(prob) +Set the log probability associated with this object to +\fBlogprob\fP\&. I.e., set the probability associated with this +object to \fB2**(logprob)\fP\&. +.INDENT 7.0 +.TP +.B Parameters +\fBlogprob\fP (\fIfloat\fP) \-\- The new log probability +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B set_prob(prob) +Set the probability associated with this object to \fBprob\fP\&. +.INDENT 7.0 +.TP +.B Parameters +\fBprob\fP (\fIfloat\fP) \-\- The new probability +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.KneserNeyProbDist(freqdist, bins=None, discount=0.75) +Bases: \fI\%nltk.probability.ProbDistI\fP +.sp +Kneser\-Ney estimate of a probability distribution. This is a version of +back\-off that counts how likely an n\-gram is provided the n\-1\-gram had +been seen in training. Extends the ProbDistI interface, requires a trigram +FreqDist instance to train on. Optionally, a different from default discount +value can be specified. The default discount is set to 0.75. +.INDENT 7.0 +.TP +.B discount() +Return the value by which counts are discounted. By default set to 0.75. +.INDENT 7.0 +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B max() +Return the sample with the greatest probability. If two or +more samples have the same probability, return one of them; +which sample is returned is undefined. +.INDENT 7.0 +.TP +.B Return type +any +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob(trigram) +Return the probability for a given sample. Probabilities +are always real numbers in the range [0, 1]. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIany\fP) \-\- The sample whose probability +should be returned. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B samples() +Return a list of all samples that have nonzero probabilities. +Use \fBprob\fP to find the probability of each sample. +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B set_discount(discount) +Set the value by which counts are discounted to the value of discount. +.INDENT 7.0 +.TP +.B Parameters +\fBdiscount\fP (\fIfloat\fP\fI (\fP\fIpreferred\fP\fI, \fP\fIbut int possible\fP\fI)\fP) \-\- the new value to discount counts by +.TP +.B Return type +None +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.LaplaceProbDist(freqdist, bins=None) +Bases: \fI\%nltk.probability.LidstoneProbDist\fP +.sp +The Laplace estimate for the probability distribution of the +experiment used to generate a frequency distribution. The +"Laplace estimate" approximates the probability of a sample with +count \fIc\fP from an experiment with \fIN\fP outcomes and \fIB\fP bins as +\fI(c+1)/(N+B)\fP\&. This is equivalent to adding one to the count for +each bin, and taking the maximum likelihood estimate of the +resulting frequency distribution. +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.LidstoneProbDist(freqdist, gamma, bins=None) +Bases: \fI\%nltk.probability.ProbDistI\fP +.sp +The Lidstone estimate for the probability distribution of the +experiment used to generate a frequency distribution. The +"Lidstone estimate" is parameterized by a real number \fIgamma\fP, +which typically ranges from 0 to 1. The Lidstone estimate +approximates the probability of a sample with count \fIc\fP from an +experiment with \fIN\fP outcomes and \fIB\fP bins as +\fBc+gamma)/(N+B*gamma)\fP\&. This is equivalent to adding +\fIgamma\fP to the count for each bin, and taking the maximum +likelihood estimate of the resulting frequency distribution. +.INDENT 7.0 +.TP +.B SUM_TO_ONE = False +True if the probabilities of the samples in this probability +distribution will always sum to one. +.UNINDENT +.INDENT 7.0 +.TP +.B discount() +Return the ratio by which counts are discounted on average: c*/c +.INDENT 7.0 +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B freqdist() +Return the frequency distribution that this probability +distribution is based on. +.INDENT 7.0 +.TP +.B Return type +FreqDist +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B max() +Return the sample with the greatest probability. If two or +more samples have the same probability, return one of them; +which sample is returned is undefined. +.INDENT 7.0 +.TP +.B Return type +any +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob(sample) +Return the probability for a given sample. Probabilities +are always real numbers in the range [0, 1]. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIany\fP) \-\- The sample whose probability +should be returned. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B samples() +Return a list of all samples that have nonzero probabilities. +Use \fBprob\fP to find the probability of each sample. +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.MLEProbDist(freqdist, bins=None) +Bases: \fI\%nltk.probability.ProbDistI\fP +.sp +The maximum likelihood estimate for the probability distribution +of the experiment used to generate a frequency distribution. The +"maximum likelihood estimate" approximates the probability of +each sample as the frequency of that sample in the frequency +distribution. +.INDENT 7.0 +.TP +.B freqdist() +Return the frequency distribution that this probability +distribution is based on. +.INDENT 7.0 +.TP +.B Return type +FreqDist +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B max() +Return the sample with the greatest probability. If two or +more samples have the same probability, return one of them; +which sample is returned is undefined. +.INDENT 7.0 +.TP +.B Return type +any +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob(sample) +Return the probability for a given sample. Probabilities +are always real numbers in the range [0, 1]. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIany\fP) \-\- The sample whose probability +should be returned. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B samples() +Return a list of all samples that have nonzero probabilities. +Use \fBprob\fP to find the probability of each sample. +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.MutableProbDist(prob_dist, samples, store_logs=True) +Bases: \fI\%nltk.probability.ProbDistI\fP +.sp +An mutable probdist where the probabilities may be easily modified. This +simply copies an existing probdist, storing the probability values in a +mutable dictionary and providing an update method. +.INDENT 7.0 +.TP +.B logprob(sample) +Return the base 2 logarithm of the probability for a given sample. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIany\fP) \-\- The sample whose probability +should be returned. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B max() +Return the sample with the greatest probability. If two or +more samples have the same probability, return one of them; +which sample is returned is undefined. +.INDENT 7.0 +.TP +.B Return type +any +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob(sample) +Return the probability for a given sample. Probabilities +are always real numbers in the range [0, 1]. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIany\fP) \-\- The sample whose probability +should be returned. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B samples() +Return a list of all samples that have nonzero probabilities. +Use \fBprob\fP to find the probability of each sample. +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B update(sample, prob, log=True) +Update the probability for the given sample. This may cause the object +to stop being the valid probability distribution \- the user must +ensure that they update the sample probabilities such that all samples +have probabilities between 0 and 1 and that all probabilities sum to +one. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsample\fP (\fIany\fP) \-\- the sample for which to update the probability +.IP \(bu 2 +\fBprob\fP (\fIfloat\fP) \-\- the new probability +.IP \(bu 2 +\fBlog\fP (\fIbool\fP) \-\- is the probability already logged +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.ProbDistI +Bases: \fBobject\fP +.sp +A probability distribution for the outcomes of an experiment. A +probability distribution specifies how likely it is that an +experiment will have any given outcome. For example, a +probability distribution could be used to predict the probability +that a token in a document will have a given type. Formally, a +probability distribution can be defined as a function mapping from +samples to nonnegative real numbers, such that the sum of every +number in the function\(aqs range is 1.0. A \fBProbDist\fP is often +used to model the probability distribution of the experiment used +to generate a frequency distribution. +.INDENT 7.0 +.TP +.B SUM_TO_ONE = True +True if the probabilities of the samples in this probability +distribution will always sum to one. +.UNINDENT +.INDENT 7.0 +.TP +.B discount() +Return the ratio by which counts are discounted on average: c*/c +.INDENT 7.0 +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B generate() +Return a randomly selected sample from this probability distribution. +The probability of returning each sample \fBsamp\fP is equal to +\fBself.prob(samp)\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B logprob(sample) +Return the base 2 logarithm of the probability for a given sample. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIany\fP) \-\- The sample whose probability +should be returned. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract max() +Return the sample with the greatest probability. If two or +more samples have the same probability, return one of them; +which sample is returned is undefined. +.INDENT 7.0 +.TP +.B Return type +any +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract prob(sample) +Return the probability for a given sample. Probabilities +are always real numbers in the range [0, 1]. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIany\fP) \-\- The sample whose probability +should be returned. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B abstract samples() +Return a list of all samples that have nonzero probabilities. +Use \fBprob\fP to find the probability of each sample. +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.ProbabilisticMixIn(**kwargs) +Bases: \fBobject\fP +.sp +A mix\-in class to associate probabilities with other classes +(trees, rules, etc.). To use the \fBProbabilisticMixIn\fP class, +define a new class that derives from an existing class and from +ProbabilisticMixIn. You will need to define a new constructor for +the new class, which explicitly calls the constructors of both its +parent classes. For example: +.sp +.nf +.ft C +>>> from nltk.probability import ProbabilisticMixIn +>>> class A: +\&... def __init__(self, x, y): self.data = (x,y) +\&... +>>> class ProbabilisticA(A, ProbabilisticMixIn): +\&... def __init__(self, x, y, **prob_kwarg): +\&... A.__init__(self, x, y) +\&... ProbabilisticMixIn.__init__(self, **prob_kwarg) +.ft P +.fi +.sp +See the documentation for the ProbabilisticMixIn +\fBconstructor<__init__>\fP for information about the arguments it +expects. +.sp +You should generally also redefine the string representation +methods, the comparison methods, and the hashing method. +.INDENT 7.0 +.TP +.B logprob() +Return \fBlog(p)\fP, where \fBp\fP is the probability associated +with this object. +.INDENT 7.0 +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob() +Return the probability associated with this object. +.INDENT 7.0 +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B set_logprob(logprob) +Set the log probability associated with this object to +\fBlogprob\fP\&. I.e., set the probability associated with this +object to \fB2**(logprob)\fP\&. +.INDENT 7.0 +.TP +.B Parameters +\fBlogprob\fP (\fIfloat\fP) \-\- The new log probability +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B set_prob(prob) +Set the probability associated with this object to \fBprob\fP\&. +.INDENT 7.0 +.TP +.B Parameters +\fBprob\fP (\fIfloat\fP) \-\- The new probability +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.SimpleGoodTuringProbDist(freqdist, bins=None) +Bases: \fI\%nltk.probability.ProbDistI\fP +.sp +SimpleGoodTuring ProbDist approximates from frequency to frequency of +frequency into a linear line under log space by linear regression. +Details of Simple Good\-Turing algorithm can be found in: +.INDENT 7.0 +.IP \(bu 2 +Good Turing smoothing without tears" (Gale & Sampson 1995), +Journal of Quantitative Linguistics, vol. 2 pp. 217\-237. +.IP \(bu 2 +"Speech and Language Processing (Jurafsky & Martin), +2nd Edition, Chapter 4.5 p103 (log(Nc) = a + b*log(c)) +.IP \(bu 2 +\fI\%http://www.grsampson.net/RGoodTur.html\fP +.UNINDENT +.sp +Given a set of pair (xi, yi), where the xi denotes the frequency and +yi denotes the frequency of frequency, we want to minimize their +square variation. E(x) and E(y) represent the mean of xi and yi. +.INDENT 7.0 +.IP \(bu 2 +slope: b = sigma ((xi\-E(x)(yi\-E(y))) / sigma ((xi\-E(x))(xi\-E(x))) +.IP \(bu 2 +intercept: a = E(y) \- b.E(x) +.UNINDENT +.INDENT 7.0 +.TP +.B SUM_TO_ONE = False +True if the probabilities of the samples in this probability +distribution will always sum to one. +.UNINDENT +.INDENT 7.0 +.TP +.B check() +.UNINDENT +.INDENT 7.0 +.TP +.B discount() +This function returns the total mass of probability transfers from the +seen samples to the unseen samples. +.UNINDENT +.INDENT 7.0 +.TP +.B find_best_fit(r, nr) +Use simple linear regression to tune parameters self._slope and +self._intercept in the log\-log space based on count and Nr(count) +(Work in log space to avoid floating point underflow.) +.UNINDENT +.INDENT 7.0 +.TP +.B freqdist() +.UNINDENT +.INDENT 7.0 +.TP +.B max() +Return the sample with the greatest probability. If two or +more samples have the same probability, return one of them; +which sample is returned is undefined. +.INDENT 7.0 +.TP +.B Return type +any +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob(sample) +Return the sample\(aqs probability. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIstr\fP) \-\- sample of the event +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B samples() +Return a list of all samples that have nonzero probabilities. +Use \fBprob\fP to find the probability of each sample. +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B smoothedNr(r) +Return the number of samples with count r. +.INDENT 7.0 +.TP +.B Parameters +\fBr\fP (\fIint\fP) \-\- The amount of frequency. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.UniformProbDist(samples) +Bases: \fI\%nltk.probability.ProbDistI\fP +.sp +A probability distribution that assigns equal probability to each +sample in a given set; and a zero probability to all other +samples. +.INDENT 7.0 +.TP +.B max() +Return the sample with the greatest probability. If two or +more samples have the same probability, return one of them; +which sample is returned is undefined. +.INDENT 7.0 +.TP +.B Return type +any +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob(sample) +Return the probability for a given sample. Probabilities +are always real numbers in the range [0, 1]. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIany\fP) \-\- The sample whose probability +should be returned. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B samples() +Return a list of all samples that have nonzero probabilities. +Use \fBprob\fP to find the probability of each sample. +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.probability.WittenBellProbDist(freqdist, bins=None) +Bases: \fI\%nltk.probability.ProbDistI\fP +.sp +The Witten\-Bell estimate of a probability distribution. This distribution +allocates uniform probability mass to as yet unseen events by using the +number of events that have only been seen once. The probability mass +reserved for unseen events is equal to \fIT / (N + T)\fP +where \fIT\fP is the number of observed event types and \fIN\fP is the total +number of observed events. This equates to the maximum likelihood estimate +of a new type event occurring. The remaining probability mass is discounted +such that all probability estimates sum to one, yielding: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fIp = T / Z (N + T)\fP, if count = 0 +.IP \(bu 2 +\fIp = c / (N + T)\fP, otherwise +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B discount() +Return the ratio by which counts are discounted on average: c*/c +.INDENT 7.0 +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B freqdist() +.UNINDENT +.INDENT 7.0 +.TP +.B max() +Return the sample with the greatest probability. If two or +more samples have the same probability, return one of them; +which sample is returned is undefined. +.INDENT 7.0 +.TP +.B Return type +any +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob(sample) +Return the probability for a given sample. Probabilities +are always real numbers in the range [0, 1]. +.INDENT 7.0 +.TP +.B Parameters +\fBsample\fP (\fIany\fP) \-\- The sample whose probability +should be returned. +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B samples() +Return a list of all samples that have nonzero probabilities. +Use \fBprob\fP to find the probability of each sample. +.INDENT 7.0 +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.probability.add_logs(logx, logy) +Given two numbers \fBlogx\fP = \fIlog(x)\fP and \fBlogy\fP = \fIlog(y)\fP, return +\fIlog(x+y)\fP\&. Conceptually, this is the same as returning +\fBlog(2**(logx)+2**(logy))\fP, but the actual implementation +avoids overflow errors that could result from direct computation. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.probability.entropy(pdist) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.probability.log_likelihood(test_pdist, actual_pdist) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.probability.sum_logs(logs) +.UNINDENT +.SS nltk.text module +.sp +This module brings together a variety of NLTK functionality for +text analysis, and provides simple, interactive interfaces. +Functionality includes: concordancing, collocation discovery, +regular expression search over tokenized strings, and +distributional similarity. +.INDENT 0.0 +.TP +.B class nltk.text.ConcordanceIndex(tokens, key=>) +Bases: \fBobject\fP +.sp +An index that can be used to look up the offset locations at which +a given word occurs in a document. +.INDENT 7.0 +.TP +.B find_concordance(word, width=80) +Find all concordance lines given the query word. +.sp +Provided with a list of words, these will be found as a phrase. +.UNINDENT +.INDENT 7.0 +.TP +.B offsets(word) +.INDENT 7.0 +.TP +.B Return type +list(int) +.TP +.B Returns +A list of the offset positions at which the given +word occurs. If a key function was specified for the +index, then given word\(aqs key will be looked up. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B print_concordance(word, width=80, lines=25) +Print concordance lines given the query word. +:param word: The target word or phrase (a list of strings) +:type word: str or list +:param lines: The number of lines to display (default=25) +:type lines: int +:param width: The width of each line, in characters (default=80) +:type width: int +:param save: The option to save the concordance. +:type save: bool +.UNINDENT +.INDENT 7.0 +.TP +.B tokens() +.INDENT 7.0 +.TP +.B Return type +list(str) +.TP +.B Returns +The document that this concordance index was +created from. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.text.ContextIndex(tokens, context_func=None, filter=None, key=>) +Bases: \fBobject\fP +.sp +A bidirectional index between words and their \(aqcontexts\(aq in a text. +The context of a word is usually defined to be the words that occur +in a fixed window around the word; but other definitions may also +be used by providing a custom context function. +.INDENT 7.0 +.TP +.B common_contexts(words, fail_on_unknown=False) +Find contexts where the specified words can all appear; and +return a frequency distribution mapping each context to the +number of times that context was used. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBwords\fP (\fIstr\fP) \-\- The words used to seed the similarity search +.IP \(bu 2 +\fBfail_on_unknown\fP \-\- If true, then raise a value error if +any of the given words do not occur at all in the index. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B similar_words(word, n=20) +.UNINDENT +.INDENT 7.0 +.TP +.B tokens() +.INDENT 7.0 +.TP +.B Return type +list(str) +.TP +.B Returns +The document that this context index was +created from. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B word_similarity_dict(word) +Return a dictionary mapping from words to \(aqsimilarity scores,\(aq +indicating how often these two words occur in the same +context. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.text.Text(tokens, name=None) +Bases: \fBobject\fP +.sp +A wrapper around a sequence of simple (string) tokens, which is +intended to support initial exploration of texts (via the +interactive console). Its methods perform a variety of analyses +on the text\(aqs contexts (e.g., counting, concordancing, collocation +discovery), and display the results. If you wish to write a +program which makes use of these analyses, then you should bypass +the \fBText\fP class, and use the appropriate analysis function or +class directly instead. +.sp +A \fBText\fP is typically initialized from a given document or +corpus. E.g.: +.sp +.nf +.ft C +>>> import nltk.corpus +>>> from nltk.text import Text +>>> moby = Text(nltk.corpus.gutenberg.words(\(aqmelville\-moby_dick.txt\(aq)) +.ft P +.fi +.INDENT 7.0 +.TP +.B collocation_list(num=20, window_size=2) +Return collocations derived from the text, ignoring stopwords. +.sp +.nf +.ft C +>>> from nltk.book import text4 +>>> text4.collocation_list()[:2] +[(\(aqUnited\(aq, \(aqStates\(aq), (\(aqfellow\(aq, \(aqcitizens\(aq)] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBnum\fP (\fIint\fP) \-\- The maximum number of collocations to return. +.IP \(bu 2 +\fBwindow_size\fP (\fIint\fP) \-\- The number of tokens spanned by a collocation (default=2) +.UNINDENT +.TP +.B Return type +list(tuple(str, str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B collocations(num=20, window_size=2) +Print collocations derived from the text, ignoring stopwords. +.sp +.nf +.ft C +>>> from nltk.book import text4 +>>> text4.collocations() +United States; fellow citizens; four years; ... +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBnum\fP (\fIint\fP) \-\- The maximum number of collocations to print. +.IP \(bu 2 +\fBwindow_size\fP (\fIint\fP) \-\- The number of tokens spanned by a collocation (default=2) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B common_contexts(words, num=20) +Find contexts where the specified words appear; list +most frequent common contexts first. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBwords\fP (\fIstr\fP) \-\- The words used to seed the similarity search +.IP \(bu 2 +\fBnum\fP (\fIint\fP) \-\- The number of words to generate (default=20) +.UNINDENT +.TP +.B Seealso +ContextIndex.common_contexts() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B concordance(word, width=79, lines=25) +Prints a concordance for \fBword\fP with the specified context window. +Word matching is not case\-sensitive. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBword\fP (\fIstr\fP\fI or \fP\fIlist\fP) \-\- The target word or phrase (a list of strings) +.IP \(bu 2 +\fBwidth\fP (\fIint\fP) \-\- The width of each line, in characters (default=80) +.IP \(bu 2 +\fBlines\fP (\fIint\fP) \-\- The number of lines to display (default=25) +.UNINDENT +.TP +.B Seealso +\fBConcordanceIndex\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B concordance_list(word, width=79, lines=25) +Generate a concordance for \fBword\fP with the specified context window. +Word matching is not case\-sensitive. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBword\fP (\fIstr\fP\fI or \fP\fIlist\fP) \-\- The target word or phrase (a list of strings) +.IP \(bu 2 +\fBwidth\fP (\fIint\fP) \-\- The width of each line, in characters (default=80) +.IP \(bu 2 +\fBlines\fP (\fIint\fP) \-\- The number of lines to display (default=25) +.UNINDENT +.TP +.B Seealso +\fBConcordanceIndex\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B count(word) +Count the number of times this word appears in the text. +.UNINDENT +.INDENT 7.0 +.TP +.B dispersion_plot(words) +Produce a plot showing the distribution of the words through the text. +Requires pylab to be installed. +.INDENT 7.0 +.TP +.B Parameters +\fBwords\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- The words to be plotted +.TP +.B Seealso +nltk.draw.dispersion_plot() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B findall(regexp) +Find instances of the regular expression in the text. +The text is a list of tokens, and a regexp pattern to match +a single token must be surrounded by angle brackets. E.g. +.sp +.nf +.ft C +>>> print(\(aqhack\(aq); from nltk.book import text1, text5, text9 +hack... +>>> text5.findall("<.*><.*>") +you rule bro; telling you bro; u twizted bro +>>> text1.findall("(<.*>)") +monied; nervous; dangerous; white; white; white; pious; queer; good; +mature; white; Cape; great; wise; wise; butterless; white; fiendish; +pale; furious; better; certain; complete; dismasted; younger; brave; +brave; brave; brave +>>> text9.findall("{3,}") +thread through those; the thought that; that the thing; the thing +that; that that thing; through these than through; them that the; +through the thick; them that they; thought that the +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBregexp\fP (\fIstr\fP) \-\- A regular expression +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B generate(length=100, text_seed=None, random_seed=42) +Print random text, generated using a trigram language model. +See also \fIhelp(nltk.lm)\fP\&. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBlength\fP (\fIint\fP) \-\- The length of text to generate (default=100) +.IP \(bu 2 +\fBtext_seed\fP (\fIlist\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- Generation can be conditioned on preceding context. +.IP \(bu 2 +\fBrandom_seed\fP \-\- A random seed or an instance of \fIrandom.Random\fP\&. If provided, +.UNINDENT +.UNINDENT +.sp +makes the random sampling part of generation reproducible. (default=42) +:type random_seed: int +.UNINDENT +.INDENT 7.0 +.TP +.B index(word) +Find the index of the first occurrence of the word in the text. +.UNINDENT +.INDENT 7.0 +.TP +.B plot(*args) +See documentation for FreqDist.plot() +:seealso: nltk.prob.FreqDist.plot() +.UNINDENT +.INDENT 7.0 +.TP +.B readability(method) +.UNINDENT +.INDENT 7.0 +.TP +.B similar(word, num=20) +Distributional similarity: find other words which appear in the +same contexts as the specified word; list most similar words first. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBword\fP (\fIstr\fP) \-\- The word used to seed the similarity search +.IP \(bu 2 +\fBnum\fP (\fIint\fP) \-\- The number of words to generate (default=20) +.UNINDENT +.TP +.B Seealso +ContextIndex.similar_words() +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B vocab() +.INDENT 7.0 +.TP +.B Seealso +nltk.prob.FreqDist +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.text.TextCollection(source) +Bases: \fI\%nltk.text.Text\fP +.sp +A collection of texts, which can be loaded with list of texts, or +with a corpus consisting of one or more texts, and which supports +counting, concordancing, collocation discovery, etc. Initialize a +TextCollection as follows: +.sp +.nf +.ft C +>>> import nltk.corpus +>>> from nltk.text import TextCollection +>>> print(\(aqhack\(aq); from nltk.book import text1, text2, text3 +hack... +>>> gutenberg = TextCollection(nltk.corpus.gutenberg) +>>> mytexts = TextCollection([text1, text2, text3]) +.ft P +.fi +.sp +Iterating over a TextCollection produces all the tokens of all the +texts in order. +.INDENT 7.0 +.TP +.B idf(term) +The number of texts in the corpus divided by the +number of texts that the term appears in. +If a term does not appear in the corpus, 0.0 is returned. +.UNINDENT +.INDENT 7.0 +.TP +.B tf(term, text) +The frequency of the term in text. +.UNINDENT +.INDENT 7.0 +.TP +.B tf_idf(term, text) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.text.TokenSearcher(tokens) +Bases: \fBobject\fP +.sp +A class that makes it easier to use regular expressions to search +over tokenized strings. The tokenized string is converted to a +string where tokens are marked with angle brackets \-\- e.g., +\fB\(aq\(aq\fP\&. The regular expression +passed to the \fBfindall()\fP method is modified to treat angle +brackets as non\-capturing parentheses, in addition to matching the +token boundaries; and to have \fB\(aq.\(aq\fP not match the angle brackets. +.INDENT 7.0 +.TP +.B findall(regexp) +Find instances of the regular expression in the text. +The text is a list of tokens, and a regexp pattern to match +a single token must be surrounded by angle brackets. E.g. +.sp +.nf +.ft C +>>> from nltk.text import TokenSearcher +>>> print(\(aqhack\(aq); from nltk.book import text1, text5, text9 +hack... +>>> text5.findall("<.*><.*>") +you rule bro; telling you bro; u twizted bro +>>> text1.findall("(<.*>)") +monied; nervous; dangerous; white; white; white; pious; queer; good; +mature; white; Cape; great; wise; wise; butterless; white; fiendish; +pale; furious; better; certain; complete; dismasted; younger; brave; +brave; brave; brave +>>> text9.findall("{3,}") +thread through those; the thought that; that the thing; the thing +that; that that thing; through these than through; them that the; +through the thick; them that they; thought that the +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBregexp\fP (\fIstr\fP) \-\- A regular expression +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.tgrep module +.SS TGrep search implementation for NLTK trees +.sp +This module supports TGrep2 syntax for matching parts of NLTK Trees. +Note that many tgrep operators require the tree passed to be a +\fBParentedTree\fP\&. +.sp +External links: +.INDENT 0.0 +.IP \(bu 2 +\fI\%Tgrep tutorial\fP +.IP \(bu 2 +\fI\%Tgrep2 manual\fP +.IP \(bu 2 +\fI\%Tgrep2 source\fP +.UNINDENT +.SS Usage +.sp +.nf +.ft C +>>> from nltk.tree import ParentedTree +>>> from nltk.tgrep import tgrep_nodes, tgrep_positions +>>> tree = ParentedTree.fromstring(\(aq(S (NP (DT the) (JJ big) (NN dog)) (VP bit) (NP (DT a) (NN cat)))\(aq) +>>> list(tgrep_nodes(\(aqNN\(aq, [tree])) +[[ParentedTree(\(aqNN\(aq, [\(aqdog\(aq]), ParentedTree(\(aqNN\(aq, [\(aqcat\(aq])]] +>>> list(tgrep_positions(\(aqNN\(aq, [tree])) +[[(0, 2), (2, 1)]] +>>> list(tgrep_nodes(\(aqDT\(aq, [tree])) +[[ParentedTree(\(aqDT\(aq, [\(aqthe\(aq]), ParentedTree(\(aqDT\(aq, [\(aqa\(aq])]] +>>> list(tgrep_nodes(\(aqDT $ JJ\(aq, [tree])) +[[ParentedTree(\(aqDT\(aq, [\(aqthe\(aq])]] +.ft P +.fi +.sp +This implementation adds syntax to select nodes based on their NLTK +tree position. This syntax is \fBN\fP plus a Python tuple representing +the tree position. For instance, \fBN()\fP, \fBN(0,)\fP, \fBN(0,0)\fP are +valid node selectors. Example: +.sp +.nf +.ft C +>>> tree = ParentedTree.fromstring(\(aq(S (NP (DT the) (JJ big) (NN dog)) (VP bit) (NP (DT a) (NN cat)))\(aq) +>>> tree[0,0] +ParentedTree(\(aqDT\(aq, [\(aqthe\(aq]) +>>> tree[0,0].treeposition() +(0, 0) +>>> list(tgrep_nodes(\(aqN(0,0)\(aq, [tree])) +[[ParentedTree(\(aqDT\(aq, [\(aqthe\(aq])]] +.ft P +.fi +.SS Caveats: +.INDENT 0.0 +.IP \(bu 2 +Link modifiers: "?" and "=" are not implemented. +.IP \(bu 2 +Tgrep compatibility: Using "@" for "!", "{" for "<", "}" for ">" are +not implemented. +.IP \(bu 2 +The "=" and "~" links are not implemented. +.UNINDENT +.SS Known Issues: +.INDENT 0.0 +.IP \(bu 2 +There are some issues with link relations involving leaf nodes +(which are represented as bare strings in NLTK trees). For +instance, consider the tree: +.INDENT 2.0 +.INDENT 3.5 +.sp +.nf +.ft C +(S (A x)) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The search string \fB* !>> S\fP should select all nodes which are not +dominated in some way by an \fBS\fP node (i.e., all nodes which are +not descendants of an \fBS\fP). Clearly, in this tree, the only node +which fulfills this criterion is the top node (since it is not +dominated by anything). However, the code here will find both the +top node and the leaf node \fBx\fP\&. This is because we cannot recover +the parent of the leaf, since it is stored as a bare string. +.sp +A possible workaround, when performing this kind of search, would be +to filter out all leaf nodes. +.UNINDENT +.SS Implementation notes +.sp +This implementation is (somewhat awkwardly) based on lambda functions +which are predicates on a node. A predicate is a function which is +either True or False; using a predicate function, we can identify sets +of nodes with particular properties. A predicate function, could, for +instance, return True only if a particular node has a label matching a +particular regular expression, and has a daughter node which has no +sisters. Because tgrep2 search strings can do things statefully (such +as substituting in macros, and binding nodes with node labels), the +actual predicate function is declared with three arguments: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +pred = lambda n, m, l: return True # some logic here +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \fBn\fP +is a node in a tree; this argument must always be given +.TP +.B \fBm\fP +contains a dictionary, mapping macro names onto predicate functions +.TP +.B \fBl\fP +is a dictionary to map node labels onto nodes in the tree +.UNINDENT +.sp +\fBm\fP and \fBl\fP are declared to default to \fBNone\fP, and so need not be +specified in a call to a predicate. Predicates which call other +predicates must always pass the value of these arguments on. The +top\-level predicate (constructed by \fB_tgrep_exprs_action\fP) binds the +macro definitions to \fBm\fP and initialises \fBl\fP to an empty dictionary. +.INDENT 0.0 +.TP +.B exception nltk.tgrep.TgrepException +Bases: \fBException\fP +.sp +Tgrep exception type. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tgrep.ancestors(node) +Returns the list of all nodes dominating the given tree node. +This method will not work with leaf nodes, since there is no way +to recover the parent. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tgrep.tgrep_compile(tgrep_string) +Parses (and tokenizes, if necessary) a TGrep search string into a +lambda function. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tgrep.tgrep_nodes(pattern, trees, search_leaves=True) +Return the tree nodes in the trees which match the given pattern. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBpattern\fP (\fIstr\fP\fI or \fP\fIoutput of tgrep_compile\fP\fI(\fP\fI)\fP) \-\- a tgrep search pattern +.IP \(bu 2 +\fBtrees\fP (\fIiter\fP\fI(\fP\fIParentedTree\fP\fI) or \fP\fIiter\fP\fI(\fP\fITree\fP\fI)\fP) \-\- a sequence of NLTK trees (usually ParentedTrees) +.IP \(bu 2 +\fBsearch_leaves\fP (\fIbool\fP) \-\- whether to return matching leaf nodes +.UNINDENT +.TP +.B Return type +iter(tree nodes) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tgrep.tgrep_positions(pattern, trees, search_leaves=True) +Return the tree positions in the trees which match the given pattern. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBpattern\fP (\fIstr\fP\fI or \fP\fIoutput of tgrep_compile\fP\fI(\fP\fI)\fP) \-\- a tgrep search pattern +.IP \(bu 2 +\fBtrees\fP (\fIiter\fP\fI(\fP\fIParentedTree\fP\fI) or \fP\fIiter\fP\fI(\fP\fITree\fP\fI)\fP) \-\- a sequence of NLTK trees (usually ParentedTrees) +.IP \(bu 2 +\fBsearch_leaves\fP (\fIbool\fP) \-\- whether to return matching leaf nodes +.UNINDENT +.TP +.B Return type +iter(tree positions) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tgrep.tgrep_tokenize(tgrep_string) +Tokenizes a TGrep search string into separate tokens. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tgrep.treepositions_no_leaves(tree) +Returns all the tree positions in the given tree which are not +leaf nodes. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tgrep.unique_ancestors(node) +Returns the list of all nodes dominating the given node, where +there is only a single path of descent. +.UNINDENT +.SS nltk.toolbox module +.sp +Module for reading, writing and manipulating +Toolbox databases and settings files. +.INDENT 0.0 +.TP +.B class nltk.toolbox.StandardFormat(filename=None, encoding=None) +Bases: \fBobject\fP +.sp +Class for reading and processing standard format marker files and strings. +.INDENT 7.0 +.TP +.B close() +Close a previously opened standard format marker file or string. +.UNINDENT +.INDENT 7.0 +.TP +.B fields(strip=True, unwrap=True, encoding=None, errors=\(aqstrict\(aq, unicode_fields=None) +Return an iterator that returns the next field in a \fB(marker, value)\fP +tuple, where \fBmarker\fP and \fBvalue\fP are unicode strings if an \fBencoding\fP +was specified in the \fBfields()\fP method. Otherwise they are non\-unicode strings. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBstrip\fP (\fIbool\fP) \-\- strip trailing whitespace from the last line of each field +.IP \(bu 2 +\fBunwrap\fP (\fIbool\fP) \-\- Convert newlines in a field to spaces. +.IP \(bu 2 +\fBencoding\fP (\fIstr\fP\fI or \fP\fINone\fP) \-\- Name of an encoding to use. If it is specified then +the \fBfields()\fP method returns unicode strings rather than non +unicode strings. +.IP \(bu 2 +\fBerrors\fP (\fIstr\fP) \-\- Error handling scheme for codec. Same as the \fBdecode()\fP +builtin string method. +.IP \(bu 2 +\fBunicode_fields\fP (\fIsequence\fP) \-\- Set of marker names whose values are UTF\-8 encoded. +Ignored if encoding is None. If the whole file is UTF\-8 encoded set +\fBencoding=\(aqutf8\(aq\fP and leave \fBunicode_fields\fP with its default +value of None. +.UNINDENT +.TP +.B Return type +iter(tuple(str, str)) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B open(sfm_file) +Open a standard format marker file for sequential reading. +.INDENT 7.0 +.TP +.B Parameters +\fBsfm_file\fP (\fIstr\fP) \-\- name of the standard format marker input file +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B open_string(s) +Open a standard format marker string for sequential reading. +.INDENT 7.0 +.TP +.B Parameters +\fBs\fP (\fIstr\fP) \-\- string to parse as a standard format marker input file +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B raw_fields() +Return an iterator that returns the next field in a (marker, value) +tuple. Linebreaks and trailing white space are preserved except +for the final newline in each field. +.INDENT 7.0 +.TP +.B Return type +iter(tuple(str, str)) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.toolbox.ToolboxData(filename=None, encoding=None) +Bases: \fI\%nltk.toolbox.StandardFormat\fP +.INDENT 7.0 +.TP +.B parse(grammar=None, **kwargs) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.toolbox.ToolboxSettings +Bases: \fI\%nltk.toolbox.StandardFormat\fP +.sp +This class is the base class for settings files. +.INDENT 7.0 +.TP +.B parse(encoding=None, errors=\(aqstrict\(aq, **kwargs) +Return the contents of toolbox settings file with a nested structure. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBencoding\fP (\fIstr\fP) \-\- encoding used by settings file +.IP \(bu 2 +\fBerrors\fP (\fIstr\fP) \-\- Error handling scheme for codec. Same as \fBdecode()\fP builtin method. +.IP \(bu 2 +\fBkwargs\fP (\fIdict\fP) \-\- Keyword arguments passed to \fBStandardFormat.fields()\fP +.UNINDENT +.TP +.B Return type +ElementTree._ElementInterface +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.toolbox.add_blank_lines(tree, blanks_before, blanks_between) +Add blank lines before all elements and subelements specified in blank_before. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBelem\fP (\fIElementTree._ElementInterface\fP) \-\- toolbox data in an elementtree structure +.IP \(bu 2 +\fBblank_before\fP (\fIdict\fP\fI(\fP\fItuple\fP\fI)\fP) \-\- elements and subelements to add blank lines before +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.toolbox.add_default_fields(elem, default_fields) +Add blank elements and subelements specified in default_fields. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBelem\fP (\fIElementTree._ElementInterface\fP) \-\- toolbox data in an elementtree structure +.IP \(bu 2 +\fBdefault_fields\fP (\fIdict\fP\fI(\fP\fItuple\fP\fI)\fP) \-\- fields to add to each type of element and subelement +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.toolbox.demo() +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.toolbox.remove_blanks(elem) +Remove all elements and subelements with no text and no child elements. +.INDENT 7.0 +.TP +.B Parameters +\fBelem\fP (\fIElementTree._ElementInterface\fP) \-\- toolbox data in an elementtree structure +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.toolbox.sort_fields(elem, field_orders) +Sort the elements and subelements in order specified in field_orders. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBelem\fP (\fIElementTree._ElementInterface\fP) \-\- toolbox data in an elementtree structure +.IP \(bu 2 +\fBfield_orders\fP (\fIdict\fP\fI(\fP\fItuple\fP\fI)\fP) \-\- order of fields for each type of element and subelement +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.toolbox.to_settings_string(tree, encoding=None, errors=\(aqstrict\(aq, unicode_fields=None) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.toolbox.to_sfm_string(tree, encoding=None, errors=\(aqstrict\(aq, unicode_fields=None) +Return a string with a standard format representation of the toolbox +data in tree (tree can be a toolbox database or a single record). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtree\fP (\fIElementTree._ElementInterface\fP) \-\- flat representation of toolbox data (whole database or single record) +.IP \(bu 2 +\fBencoding\fP (\fIstr\fP) \-\- Name of an encoding to use. +.IP \(bu 2 +\fBerrors\fP (\fIstr\fP) \-\- Error handling scheme for codec. Same as the \fBencode()\fP +builtin string method. +.IP \(bu 2 +\fBunicode_fields\fP (\fIdict\fP\fI(\fP\fIstr\fP\fI) or \fP\fIset\fP\fI(\fP\fIstr\fP\fI)\fP) \-\- +.UNINDENT +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.SS nltk.tree module +.sp +Class for representing hierarchical language structures, such as +syntax trees and morphological trees. +.INDENT 0.0 +.TP +.B class nltk.tree.ImmutableMultiParentedTree(node, children=None) +Bases: \fI\%nltk.tree.ImmutableTree\fP, \fI\%nltk.tree.MultiParentedTree\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tree.ImmutableParentedTree(node, children=None) +Bases: \fI\%nltk.tree.ImmutableTree\fP, \fI\%nltk.tree.ParentedTree\fP +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tree.ImmutableProbabilisticTree(node, children=None, **prob_kwargs) +Bases: \fI\%nltk.tree.ImmutableTree\fP, \fI\%nltk.probability.ProbabilisticMixIn\fP +.INDENT 7.0 +.TP +.B classmethod convert(val) +.UNINDENT +.INDENT 7.0 +.TP +.B copy(deep=False) +Return a shallow copy of the list. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tree.ImmutableTree(node, children=None) +Bases: \fI\%nltk.tree.Tree\fP +.INDENT 7.0 +.TP +.B append(v) +Append object to the end of the list. +.UNINDENT +.INDENT 7.0 +.TP +.B extend(v) +Extend list by appending elements from the iterable. +.UNINDENT +.INDENT 7.0 +.TP +.B pop(v=None) +Remove and return item at index (default last). +.sp +Raises IndexError if list is empty or index is out of range. +.UNINDENT +.INDENT 7.0 +.TP +.B remove(v) +Remove first occurrence of value. +.sp +Raises ValueError if the value is not present. +.UNINDENT +.INDENT 7.0 +.TP +.B reverse() +Reverse \fIIN PLACE\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B set_label(value) +Set the node label. This will only succeed the first time the +node label is set, which should occur in ImmutableTree.__init__(). +.UNINDENT +.INDENT 7.0 +.TP +.B sort() +Sort the list in ascending order and return None. +.sp +The sort is in\-place (i.e. the list itself is modified) and stable (i.e. the +order of two equal elements is maintained). +.sp +If a key function is given, apply it once to each list item and sort them, +ascending or descending, according to their function values. +.sp +The reverse flag can be set to sort in descending order. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tree.MultiParentedTree(node, children=None) +Bases: \fBnltk.tree.AbstractParentedTree\fP +.sp +A \fBTree\fP that automatically maintains parent pointers for +multi\-parented trees. The following are methods for querying the +structure of a multi\-parented tree: \fBparents()\fP, \fBparent_indices()\fP, +\fBleft_siblings()\fP, \fBright_siblings()\fP, \fBroots\fP, \fBtreepositions\fP\&. +.sp +Each \fBMultiParentedTree\fP may have zero or more parents. In +particular, subtrees may be shared. If a single +\fBMultiParentedTree\fP is used as multiple children of the same +parent, then that parent will appear multiple times in its +\fBparents()\fP method. +.sp +\fBMultiParentedTrees\fP should never be used in the same tree as +\fBTrees\fP or \fBParentedTrees\fP\&. Mixing tree implementations may +result in incorrect parent pointers and in \fBTypeError\fP exceptions. +.INDENT 7.0 +.TP +.B left_siblings() +A list of all left siblings of this tree, in any of its parent +trees. A tree may be its own left sibling if it is used as +multiple contiguous children of the same parent. A tree may +appear multiple times in this list if it is the left sibling +of this tree with respect to multiple parents. +.INDENT 7.0 +.TP +.B Type +list(MultiParentedTree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parent_indices(parent) +Return a list of the indices where this tree occurs as a child +of \fBparent\fP\&. If this child does not occur as a child of +\fBparent\fP, then the empty list is returned. The following is +always true: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +for parent_index in ptree.parent_indices(parent): + parent[parent_index] is ptree +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B parents() +The set of parents of this tree. If this tree has no parents, +then \fBparents\fP is the empty set. To check if a tree is used +as multiple children of the same parent, use the +\fBparent_indices()\fP method. +.INDENT 7.0 +.TP +.B Type +list(MultiParentedTree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B right_siblings() +A list of all right siblings of this tree, in any of its parent +trees. A tree may be its own right sibling if it is used as +multiple contiguous children of the same parent. A tree may +appear multiple times in this list if it is the right sibling +of this tree with respect to multiple parents. +.INDENT 7.0 +.TP +.B Type +list(MultiParentedTree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B roots() +The set of all roots of this tree. This set is formed by +tracing all possible parent paths until trees with no parents +are found. +.INDENT 7.0 +.TP +.B Type +list(MultiParentedTree) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B treepositions(root) +Return a list of all tree positions that can be used to reach +this multi\-parented tree starting from \fBroot\fP\&. I.e., the +following is always true: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +for treepos in ptree.treepositions(root): + root[treepos] is ptree +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tree.ParentedTree(node, children=None) +Bases: \fBnltk.tree.AbstractParentedTree\fP +.sp +A \fBTree\fP that automatically maintains parent pointers for +single\-parented trees. The following are methods for querying +the structure of a parented tree: \fBparent\fP, \fBparent_index\fP, +\fBleft_sibling\fP, \fBright_sibling\fP, \fBroot\fP, \fBtreeposition\fP\&. +.sp +Each \fBParentedTree\fP may have at most one parent. In +particular, subtrees may not be shared. Any attempt to reuse a +single \fBParentedTree\fP as a child of more than one parent (or +as multiple children of the same parent) will cause a +\fBValueError\fP exception to be raised. +.sp +\fBParentedTrees\fP should never be used in the same tree as \fBTrees\fP +or \fBMultiParentedTrees\fP\&. Mixing tree implementations may result +in incorrect parent pointers and in \fBTypeError\fP exceptions. +.INDENT 7.0 +.TP +.B left_sibling() +The left sibling of this tree, or None if it has none. +.UNINDENT +.INDENT 7.0 +.TP +.B parent() +The parent of this tree, or None if it has no parent. +.UNINDENT +.INDENT 7.0 +.TP +.B parent_index() +The index of this tree in its parent. I.e., +\fBptree.parent()[ptree.parent_index()] is ptree\fP\&. Note that +\fBptree.parent_index()\fP is not necessarily equal to +\fBptree.parent.index(ptree)\fP, since the \fBindex()\fP method +returns the first child that is equal to its argument. +.UNINDENT +.INDENT 7.0 +.TP +.B right_sibling() +The right sibling of this tree, or None if it has none. +.UNINDENT +.INDENT 7.0 +.TP +.B root() +The root of this tree. I.e., the unique ancestor of this tree +whose parent is None. If \fBptree.parent()\fP is None, then +\fBptree\fP is its own root. +.UNINDENT +.INDENT 7.0 +.TP +.B treeposition() +The tree position of this tree, relative to the root of the +tree. I.e., \fBptree.root[ptree.treeposition] is ptree\fP\&. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tree.ProbabilisticMixIn(**kwargs) +Bases: \fBobject\fP +.sp +A mix\-in class to associate probabilities with other classes +(trees, rules, etc.). To use the \fBProbabilisticMixIn\fP class, +define a new class that derives from an existing class and from +ProbabilisticMixIn. You will need to define a new constructor for +the new class, which explicitly calls the constructors of both its +parent classes. For example: +.sp +.nf +.ft C +>>> from nltk.probability import ProbabilisticMixIn +>>> class A: +\&... def __init__(self, x, y): self.data = (x,y) +\&... +>>> class ProbabilisticA(A, ProbabilisticMixIn): +\&... def __init__(self, x, y, **prob_kwarg): +\&... A.__init__(self, x, y) +\&... ProbabilisticMixIn.__init__(self, **prob_kwarg) +.ft P +.fi +.sp +See the documentation for the ProbabilisticMixIn +\fBconstructor<__init__>\fP for information about the arguments it +expects. +.sp +You should generally also redefine the string representation +methods, the comparison methods, and the hashing method. +.INDENT 7.0 +.TP +.B logprob() +Return \fBlog(p)\fP, where \fBp\fP is the probability associated +with this object. +.INDENT 7.0 +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B prob() +Return the probability associated with this object. +.INDENT 7.0 +.TP +.B Return type +float +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B set_logprob(logprob) +Set the log probability associated with this object to +\fBlogprob\fP\&. I.e., set the probability associated with this +object to \fB2**(logprob)\fP\&. +.INDENT 7.0 +.TP +.B Parameters +\fBlogprob\fP (\fIfloat\fP) \-\- The new log probability +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B set_prob(prob) +Set the probability associated with this object to \fBprob\fP\&. +.INDENT 7.0 +.TP +.B Parameters +\fBprob\fP (\fIfloat\fP) \-\- The new probability +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tree.ProbabilisticTree(node, children=None, **prob_kwargs) +Bases: \fI\%nltk.tree.Tree\fP, \fI\%nltk.probability.ProbabilisticMixIn\fP +.INDENT 7.0 +.TP +.B classmethod convert(val) +.UNINDENT +.INDENT 7.0 +.TP +.B copy(deep=False) +Return a shallow copy of the list. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B class nltk.tree.Tree(node, children=None) +Bases: \fBlist\fP +.sp +A Tree represents a hierarchical grouping of leaves and subtrees. +For example, each constituent in a syntax tree is represented by a single Tree. +.sp +A tree\(aqs children are encoded as a list of leaves and subtrees, +where a leaf is a basic (non\-tree) value; and a subtree is a +nested Tree. +.sp +.nf +.ft C +>>> from nltk.tree import Tree +>>> print(Tree(1, [2, Tree(3, [4]), 5])) +(1 2 (3 4) 5) +>>> vp = Tree(\(aqVP\(aq, [Tree(\(aqV\(aq, [\(aqsaw\(aq]), +\&... Tree(\(aqNP\(aq, [\(aqhim\(aq])]) +>>> s = Tree(\(aqS\(aq, [Tree(\(aqNP\(aq, [\(aqI\(aq]), vp]) +>>> print(s) +(S (NP I) (VP (V saw) (NP him))) +>>> print(s[1]) +(VP (V saw) (NP him)) +>>> print(s[1,1]) +(NP him) +>>> t = Tree.fromstring("(S (NP I) (VP (V saw) (NP him)))") +>>> s == t +True +>>> t[1][1].set_label(\(aqX\(aq) +>>> t[1][1].label() +\(aqX\(aq +>>> print(t) +(S (NP I) (VP (V saw) (X him))) +>>> t[0], t[1,1] = t[1,1], t[0] +>>> print(t) +(S (X him) (VP (V saw) (NP I))) +.ft P +.fi +.sp +The length of a tree is the number of children it has. +.sp +.nf +.ft C +>>> len(t) +2 +.ft P +.fi +.sp +The set_label() and label() methods allow individual constituents +to be labeled. For example, syntax trees use this label to specify +phrase tags, such as "NP" and "VP". +.sp +Several Tree methods use "tree positions" to specify +children or descendants of a tree. Tree positions are defined as +follows: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +The tree position \fIi\fP specifies a Tree\(aqs \fIi\fPth child. +.IP \(bu 2 +The tree position \fB()\fP specifies the Tree itself. +.IP \(bu 2 +If \fIp\fP is the tree position of descendant \fId\fP, then +\fIp+i\fP specifies the \fIi\fPth child of \fId\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +I.e., every tree position is either a single index \fIi\fP, +specifying \fBtree[i]\fP; or a sequence \fIi1, i2, ..., iN\fP, +specifying \fBtree[i1][i2]...[iN]\fP\&. +.sp +Construct a new tree. This constructor can be called in one +of two ways: +.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B \fBTree(label, children)\fP constructs a new tree with the +specified label and list of children. +.UNINDENT +.IP \(bu 2 +\fBTree.fromstring(s)\fP constructs a new tree by parsing the string \fBs\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B chomsky_normal_form(factor=\(aqright\(aq, horzMarkov=None, vertMarkov=0, childChar=\(aq|\(aq, parentChar=\(aq^\(aq) +This method can modify a tree in three ways: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP 1. 3 +Convert a tree into its Chomsky Normal Form (CNF) +equivalent \-\- Every subtree has either two non\-terminals +or one terminal as its children. This process requires +the creation of more"artificial" non\-terminal nodes. +.IP 2. 3 +Markov (vertical) smoothing of children in new artificial +nodes +.IP 3. 3 +Horizontal (parent) annotation of nodes +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfactor\fP (\fIstr =\fP\fI [\fP\fIleft\fP\fI|\fP\fIright\fP\fI]\fP) \-\- Right or left factoring method (default = "right") +.IP \(bu 2 +\fBhorzMarkov\fP (\fIint\fP\fI | \fP\fINone\fP) \-\- Markov order for sibling smoothing in artificial nodes (None (default) = include all siblings) +.IP \(bu 2 +\fBvertMarkov\fP (\fIint\fP\fI | \fP\fINone\fP) \-\- Markov order for parent smoothing (0 (default) = no vertical annotation) +.IP \(bu 2 +\fBchildChar\fP (\fIstr\fP) \-\- A string used in construction of the artificial nodes, separating the head of the +original subtree from the child nodes that have yet to be expanded (default = "|") +.IP \(bu 2 +\fBparentChar\fP (\fIstr\fP) \-\- A string used to separate the node representation from its vertical annotation +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B collapse_unary(collapsePOS=False, collapseRoot=False, joinChar=\(aq+\(aq) +Collapse subtrees with a single child (ie. unary productions) +into a new non\-terminal (Tree node) joined by \(aqjoinChar\(aq. +This is useful when working with algorithms that do not allow +unary productions, and completely removing the unary productions +would require loss of useful information. The Tree is modified +directly (since it is passed by reference) and no value is returned. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBcollapsePOS\fP (\fIbool\fP) \-\- \(aqFalse\(aq (default) will not collapse the parent of leaf nodes (ie. +Part\-of\-Speech tags) since they are always unary productions +.IP \(bu 2 +\fBcollapseRoot\fP (\fIbool\fP) \-\- \(aqFalse\(aq (default) will not modify the root production +if it is unary. For the Penn WSJ treebank corpus, this corresponds +to the TOP \-> productions. +.IP \(bu 2 +\fBjoinChar\fP (\fIstr\fP) \-\- A string used to connect collapsed node values (default = "+") +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod convert(tree) +Convert a tree between different subtypes of Tree. \fBcls\fP determines +which class will be used to encode the new tree. +.INDENT 7.0 +.TP +.B Parameters +\fBtree\fP (\fITree\fP) \-\- The tree that should be converted. +.TP +.B Returns +The new Tree. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B copy(deep=False) +Return a shallow copy of the list. +.UNINDENT +.INDENT 7.0 +.TP +.B draw() +Open a new window containing a graphical diagram of this tree. +.UNINDENT +.INDENT 7.0 +.TP +.B flatten() +Return a flat version of the tree, with all non\-root non\-terminals removed. +.sp +.nf +.ft C +>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") +>>> print(t.flatten()) +(S the dog chased the cat) +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +a tree consisting of this tree\(aqs root connected directly to +its leaves, omitting all intervening non\-terminal nodes. +.TP +.B Return type +Tree +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B freeze(leaf_freezer=None) +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod fromlist(l) +.INDENT 7.0 +.TP +.B Parameters +\fBl\fP (\fIlist\fP) \-\- a tree represented as nested lists +.TP +.B Returns +A tree corresponding to the list representation \fBl\fP\&. +.TP +.B Return type +Tree +.UNINDENT +.sp +Convert nested lists to a NLTK Tree +.UNINDENT +.INDENT 7.0 +.TP +.B classmethod fromstring(s, brackets=\(aq()\(aq, read_node=None, read_leaf=None, node_pattern=None, leaf_pattern=None, remove_empty_top_bracketing=False) +Read a bracketed tree string and return the resulting tree. +Trees are represented as nested brackettings, such as: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +(S (NP (NNP John)) (VP (V runs))) +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBs\fP (\fIstr\fP) \-\- The string to read +.IP \(bu 2 +\fBbrackets\fP (\fIstr\fP\fI (\fP\fIlength=2\fP\fI)\fP) \-\- The bracket characters used to mark the +beginning and end of trees and subtrees. +.IP \(bu 2 +\fBread_leaf\fP (\fIread_node\fP\fI,\fP) \-\- +.sp +If specified, these functions +are applied to the substrings of \fBs\fP corresponding to +nodes and leaves (respectively) to obtain the values for +those nodes and leaves. They should have the following +signature: +.INDENT 2.0 +.INDENT 3.5 +read_node(str) \-> value +.UNINDENT +.UNINDENT +.sp +For example, these functions could be used to process nodes +and leaves whose values should be some type other than +string (such as \fBFeatStruct\fP). +Note that by default, node strings and leaf strings are +delimited by whitespace and brackets; to override this +default, use the \fBnode_pattern\fP and \fBleaf_pattern\fP +arguments. + +.IP \(bu 2 +\fBleaf_pattern\fP (\fInode_pattern\fP\fI,\fP) \-\- Regular expression patterns +used to find node and leaf substrings in \fBs\fP\&. By +default, both nodes patterns are defined to match any +sequence of non\-whitespace non\-bracket characters. +.IP \(bu 2 +\fBremove_empty_top_bracketing\fP (\fIbool\fP) \-\- If the resulting tree has +an empty node label, and is length one, then return its +single child instead. This is useful for treebank trees, +which sometimes contain an extra level of bracketing. +.UNINDENT +.TP +.B Returns +A tree corresponding to the string representation \fBs\fP\&. +If this class method is called using a subclass of Tree, +then it will return a tree of that type. +.TP +.B Return type +Tree +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B height() +Return the height of the tree. +.sp +.nf +.ft C +>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") +>>> t.height() +5 +>>> print(t[0,0]) +(D the) +>>> t[0,0].height() +2 +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +The height of this tree. The height of a tree +containing no children is 1; the height of a tree +containing only leaves is 2; and the height of any other +tree is one plus the maximum of its children\(aqs +heights. +.TP +.B Return type +int +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B label() +Return the node label of the tree. +.sp +.nf +.ft C +>>> t = Tree.fromstring(\(aq(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))\(aq) +>>> t.label() +\(aqS\(aq +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +the node label (typically a string) +.TP +.B Return type +any +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B leaf_treeposition(index) +.INDENT 7.0 +.TP +.B Returns +The tree position of the \fBindex\fP\-th leaf in this +tree. I.e., if \fBtp=self.leaf_treeposition(i)\fP, then +\fBself[tp]==self.leaves()[i]\fP\&. +.TP +.B Raises +\fBIndexError\fP \-\- If this tree contains fewer than \fBindex+1\fP +leaves, or if \fBindex<0\fP\&. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B leaves() +Return the leaves of the tree. +.sp +.nf +.ft C +>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") +>>> t.leaves() +[\(aqthe\(aq, \(aqdog\(aq, \(aqchased\(aq, \(aqthe\(aq, \(aqcat\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +a list containing this tree\(aqs leaves. +The order reflects the order of the +leaves in the tree\(aqs hierarchical structure. +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B property node +Outdated method to access the node value; use the label() method instead. +.UNINDENT +.INDENT 7.0 +.TP +.B pformat(margin=70, indent=0, nodesep=\(aq\(aq, parens=\(aq()\(aq, quotes=False) +.INDENT 7.0 +.TP +.B Returns +A pretty\-printed string representation of this tree. +.TP +.B Return type +str +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBmargin\fP (\fIint\fP) \-\- The right margin at which to do line\-wrapping. +.IP \(bu 2 +\fBindent\fP (\fIint\fP) \-\- The indentation level at which printing +begins. This number is used to decide how far to indent +subsequent lines. +.IP \(bu 2 +\fBnodesep\fP \-\- A string that is used to separate the node +from the children. E.g., the default value \fB\(aq:\(aq\fP gives +trees like \fB(S: (NP: I) (VP: (V: saw) (NP: it)))\fP\&. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B pformat_latex_qtree() +Returns a representation of the tree compatible with the +LaTeX qtree package. This consists of the string \fB\eTree\fP +followed by the tree represented in bracketed notation. +.sp +For example, the following result was generated from a parse tree of +the sentence \fBThe announcement astounded us\fP: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +\eTree [.I\(aq\(aq [.N\(aq\(aq [.D The ] [.N\(aq [.N announcement ] ] ] + [.I\(aq [.V\(aq\(aq [.V\(aq [.V astounded ] [.N\(aq\(aq [.N\(aq [.N us ] ] ] ] ] ] ] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +See \fI\%http://www.ling.upenn.edu/advice/latex.html\fP for the LaTeX +style file for the qtree package. +.INDENT 7.0 +.TP +.B Returns +A latex qtree representation of this tree. +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B pos() +Return a sequence of pos\-tagged words extracted from the tree. +.sp +.nf +.ft C +>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") +>>> t.pos() +[(\(aqthe\(aq, \(aqD\(aq), (\(aqdog\(aq, \(aqN\(aq), (\(aqchased\(aq, \(aqV\(aq), (\(aqthe\(aq, \(aqD\(aq), (\(aqcat\(aq, \(aqN\(aq)] +.ft P +.fi +.INDENT 7.0 +.TP +.B Returns +a list of tuples containing leaves and pre\-terminals (part\-of\-speech tags). +The order reflects the order of the leaves in the tree\(aqs hierarchical structure. +.TP +.B Return type +list(tuple) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B pprint(**kwargs) +Print a string representation of this Tree to \(aqstream\(aq +.UNINDENT +.INDENT 7.0 +.TP +.B pretty_print(sentence=None, highlight=(), stream=None, **kwargs) +Pretty\-print this tree as ASCII or Unicode art. +For explanation of the arguments, see the documentation for +\fInltk.treeprettyprinter.TreePrettyPrinter\fP\&. +.UNINDENT +.INDENT 7.0 +.TP +.B productions() +Generate the productions that correspond to the non\-terminal nodes of the tree. +For each subtree of the form (P: C1 C2 ... Cn) this produces a production of the +form P \-> C1 C2 ... Cn. +.sp +.nf +.ft C +>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") +>>> t.productions() +[S \-> NP VP, NP \-> D N, D \-> \(aqthe\(aq, N \-> \(aqdog\(aq, VP \-> V NP, V \-> \(aqchased\(aq, +NP \-> D N, D \-> \(aqthe\(aq, N \-> \(aqcat\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B Return type +list(Production) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B set_label(label) +Set the node label of the tree. +.sp +.nf +.ft C +>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") +>>> t.set_label("T") +>>> print(t) +(T (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat)))) +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBlabel\fP (\fIany\fP) \-\- the node label (typically a string) +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B subtrees(filter=None) +Generate all the subtrees of this tree, optionally restricted +to trees matching the filter function. +.sp +.nf +.ft C +>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") +>>> for s in t.subtrees(lambda t: t.height() == 2): +\&... print(s) +(D the) +(N dog) +(V chased) +(D the) +(N cat) +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBfilter\fP (\fIfunction\fP) \-\- the function to filter all local trees +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B treeposition_spanning_leaves(start, end) +.INDENT 7.0 +.TP +.B Returns +The tree position of the lowest descendant of this +tree that dominates \fBself.leaves()[start:end]\fP\&. +.TP +.B Raises +\fBValueError\fP \-\- if \fBend <= start\fP +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B treepositions(order=\(aqpreorder\(aq) +.sp +.nf +.ft C +>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") +>>> t.treepositions() +[(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0), (1, 0, 0), ...] +>>> for pos in t.treepositions(\(aqleaves\(aq): +\&... t[pos] = t[pos][::\-1].upper() +>>> print(t) +(S (NP (D EHT) (N GOD)) (VP (V DESAHC) (NP (D EHT) (N TAC)))) +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBorder\fP \-\- One of: \fBpreorder\fP, \fBpostorder\fP, \fBbothorder\fP, +\fBleaves\fP\&. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B un_chomsky_normal_form(expandUnary=True, childChar=\(aq|\(aq, parentChar=\(aq^\(aq, unaryChar=\(aq+\(aq) +This method modifies the tree in three ways: +.INDENT 7.0 +.INDENT 3.5 +.INDENT 0.0 +.IP 1. 3 +Transforms a tree in Chomsky Normal Form back to its +original structure (branching greater than two) +.IP 2. 3 +Removes any parent annotation (if it exists) +.IP 3. 3 +(optional) expands unary subtrees (if previously +collapsed with collapseUnary(...) ) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBexpandUnary\fP (\fIbool\fP) \-\- Flag to expand unary or not (default = True) +.IP \(bu 2 +\fBchildChar\fP (\fIstr\fP) \-\- A string separating the head node from its children in an artificial node (default = "|") +.IP \(bu 2 +\fBparentChar\fP (\fIstr\fP) \-\- A string separating the node label from its parent annotation (default = "^") +.IP \(bu 2 +\fBunaryChar\fP (\fIstr\fP) \-\- A string joining two non\-terminals in a unary production (default = "+") +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tree.bracket_parse(s) +Use Tree.read(s, remove_empty_top_bracketing=True) instead. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.tree.sinica_parse(s) +Parse a Sinica Treebank string and return a tree. Trees are represented as nested brackettings, +as shown in the following example (X represents a Chinese character): +S(goal:NP(Head:Nep:XX)|theme:NP(Head:Nhaa:X)|quantity:Dab:X|Head:VL2:X)#0(PERIODCATEGORY) +.INDENT 7.0 +.TP +.B Returns +A tree corresponding to the string representation. +.TP +.B Return type +Tree +.TP +.B Parameters +\fBs\fP (\fIstr\fP) \-\- The string to be converted +.UNINDENT +.UNINDENT +.SS nltk.treeprettyprinter module +.sp +Pretty\-printing of discontinuous trees. +Adapted from the disco\-dop project, by Andreas van Cranenburgh. +\fI\%https://github.com/andreasvc/disco\-dop\fP +.sp +Interesting reference (not used for this code): +T. Eschbach et al., Orth. Hypergraph Drawing, Journal of +Graph Algorithms and Applications, 10(2) 141\-\-157 (2006)149. +\fI\%http://jgaa.info/accepted/2006/EschbachGuentherBecker2006.10.2.pdf\fP +.INDENT 0.0 +.TP +.B class nltk.treeprettyprinter.TreePrettyPrinter(tree, sentence=None, highlight=()) +Bases: \fBobject\fP +.sp +Pretty\-print a tree in text format, either as ASCII or Unicode. +The tree can be a normal tree, or discontinuous. +.sp +\fBTreePrettyPrinter(tree, sentence=None, highlight=())\fP +creates an object from which different visualizations can be created. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtree\fP \-\- a Tree object. +.IP \(bu 2 +\fBsentence\fP \-\- a list of words (strings). If \fIsentence\fP is given, +\fItree\fP must contain integers as leaves, which are taken as indices +in \fIsentence\fP\&. Using this you can display a discontinuous tree. +.IP \(bu 2 +\fBhighlight\fP \-\- Optionally, a sequence of Tree objects in \fItree\fP which +should be highlighted. Has the effect of only applying colors to nodes +in this sequence (nodes should be given as Tree objects, terminals as +indices). +.UNINDENT +.UNINDENT +.sp +.nf +.ft C +>>> from nltk.tree import Tree +>>> tree = Tree.fromstring(\(aq(S (NP Mary) (VP walks))\(aq) +>>> print(TreePrettyPrinter(tree).text()) +\&... + S + ____|____ + NP VP + | | +Mary walks +.ft P +.fi +.INDENT 7.0 +.TP +.B static nodecoords(tree, sentence, highlight) +Produce coordinates of nodes on a grid. +.sp +Objective: +.INDENT 7.0 +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Produce coordinates for a non\-overlapping placement of nodes and +horizontal lines. +.UNINDENT +.IP \(bu 2 +.INDENT 2.0 +.TP +.B Order edges so that crossing edges cross a minimal number of previous +horizontal lines (never vertical lines). +.UNINDENT +.UNINDENT +.sp +Approach: +.INDENT 7.0 +.IP \(bu 2 +bottom up level order traversal (start at terminals) +.IP \(bu 2 +at each level, identify nodes which cannot be on the same row +.IP \(bu 2 +identify nodes which cannot be in the same column +.IP \(bu 2 +place nodes into a grid at (row, column) +.IP \(bu 2 +order child\-parent edges with crossing edges last +.UNINDENT +.sp +Coordinates are (row, column); the origin (0, 0) is at the top left; +the root node is on row 0. Coordinates do not consider the size of a +node (which depends on font, &c), so the width of a column of the grid +should be automatically determined by the element with the greatest +width in that column. Alternatively, the integer coordinates could be +converted to coordinates in which the distances between adjacent nodes +are non\-uniform. +.sp +Produces tuple (nodes, coords, edges, highlighted) where: +.INDENT 7.0 +.IP \(bu 2 +nodes[id]: Tree object for the node with this integer id +.IP \(bu 2 +coords[id]: (n, m) coordinate where to draw node with id in the grid +.IP \(bu 2 +edges[id]: parent id of node with this id (ordered dictionary) +.IP \(bu 2 +highlighted: set of ids that should be highlighted +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B svg(nodecolor=\(aqblue\(aq, leafcolor=\(aqred\(aq, funccolor=\(aqgreen\(aq) +.INDENT 7.0 +.TP +.B Returns +SVG representation of a tree. +.UNINDENT +.UNINDENT +.INDENT 7.0 +.TP +.B text(nodedist=1, unicodelines=False, html=False, ansi=False, nodecolor=\(aqblue\(aq, leafcolor=\(aqred\(aq, funccolor=\(aqgreen\(aq, abbreviate=None, maxwidth=16) +.INDENT 7.0 +.TP +.B Returns +ASCII art for a discontinuous tree. +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBunicodelines\fP \-\- whether to use Unicode line drawing characters +instead of plain (7\-bit) ASCII. +.IP \(bu 2 +\fBhtml\fP \-\- whether to wrap output in html code (default plain text). +.IP \(bu 2 +\fBansi\fP \-\- whether to produce colors with ANSI escape sequences +(only effective when html==False). +.IP \(bu 2 +\fBnodecolor\fP (\fIleafcolor\fP\fI,\fP) \-\- specify colors of leaves and phrasal +nodes; effective when either html or ansi is True. +.IP \(bu 2 +\fBabbreviate\fP \-\- if True, abbreviate labels longer than 5 characters. +If integer, abbreviate labels longer than \fIabbr\fP characters. +.IP \(bu 2 +\fBmaxwidth\fP \-\- maximum number of characters before a label starts to +wrap; pass None to disable. +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.SS nltk.treetransforms module +.sp +A collection of methods for tree (grammar) transformations used +in parsing natural language. +.sp +Although many of these methods are technically grammar transformations +(ie. Chomsky Norm Form), when working with treebanks it is much more +natural to visualize these modifications in a tree structure. Hence, +we will do all transformation directly to the tree itself. +Transforming the tree directly also allows us to do parent annotation. +A grammar can then be simply induced from the modified tree. +.sp +The following is a short tutorial on the available transformations. +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP 1. 3 +Chomsky Normal Form (binarization) +.sp +It is well known that any grammar has a Chomsky Normal Form (CNF) +equivalent grammar where CNF is defined by every production having +either two non\-terminals or one terminal on its right hand side. +When we have hierarchically structured data (ie. a treebank), it is +natural to view this in terms of productions where the root of every +subtree is the head (left hand side) of the production and all of +its children are the right hand side constituents. In order to +convert a tree into CNF, we simply need to ensure that every subtree +has either two subtrees as children (binarization), or one leaf node +(non\-terminal). In order to binarize a subtree with more than two +children, we must introduce artificial nodes. +.sp +There are two popular methods to convert a tree into CNF: left +factoring and right factoring. The following example demonstrates +the difference between them. Example: +.INDENT 3.0 +.INDENT 3.5 +.sp +.nf +.ft C +Original Right\-Factored Left\-Factored + + A A A + / | \e / \e / \e + B C D ==> B A| OR A| D + / \e / \e + C D B C +.ft P +.fi +.UNINDENT +.UNINDENT +.IP 2. 3 +Parent Annotation +.sp +In addition to binarizing the tree, there are two standard +modifications to node labels we can do in the same traversal: parent +annotation and Markov order\-N smoothing (or sibling smoothing). +.sp +The purpose of parent annotation is to refine the probabilities of +productions by adding a small amount of context. With this simple +addition, a CYK (inside\-outside, dynamic programming chart parse) +can improve from 74% to 79% accuracy. A natural generalization from +parent annotation is to grandparent annotation and beyond. The +tradeoff becomes accuracy gain vs. computational complexity. We +must also keep in mind data sparcity issues. Example: +.INDENT 3.0 +.INDENT 3.5 +.sp +.nf +.ft C +Original Parent Annotation + + A A^ + / | \e / \e + B C D ==> B^ A|^ where ? is the + / \e parent of A + C^ D^ +.ft P +.fi +.UNINDENT +.UNINDENT +.IP 3. 3 +Markov order\-N smoothing +.sp +Markov smoothing combats data sparcity issues as well as decreasing +computational requirements by limiting the number of children +included in artificial nodes. In practice, most people use an order +2 grammar. Example: +.INDENT 3.0 +.INDENT 3.5 +.sp +.nf +.ft C + Original No Smoothing Markov order 1 Markov order 2 etc. + + __A__ A A A + / /|\e \e / \e / \e / \e +B C D E F ==> B A| ==> B A| ==> B A| + / \e / \e / \e + C ... C ... C ... +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Annotation decisions can be thought about in the vertical direction +(parent, grandparent, etc) and the horizontal direction (number of +siblings to keep). Parameters to the following functions specify +these values. For more information see: +.sp +Dan Klein and Chris Manning (2003) "Accurate Unlexicalized +Parsing", ACL\-03. \fI\%http://www.aclweb.org/anthology/P03\-1054\fP +.IP 4. 3 +Unary Collapsing +.sp +Collapse unary productions (ie. subtrees with a single child) into a +new non\-terminal (Tree node). This is useful when working with +algorithms that do not allow unary productions, yet you do not wish +to lose the parent information. Example: +.INDENT 3.0 +.INDENT 3.5 +.sp +.nf +.ft C + A + | + B ==> A+B + / \e / \e +C D C D +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.treetransforms.chomsky_normal_form(tree, factor=\(aqright\(aq, horzMarkov=None, vertMarkov=0, childChar=\(aq|\(aq, parentChar=\(aq^\(aq) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.treetransforms.collapse_unary(tree, collapsePOS=False, collapseRoot=False, joinChar=\(aq+\(aq) +Collapse subtrees with a single child (ie. unary productions) +into a new non\-terminal (Tree node) joined by \(aqjoinChar\(aq. +This is useful when working with algorithms that do not allow +unary productions, and completely removing the unary productions +would require loss of useful information. The Tree is modified +directly (since it is passed by reference) and no value is returned. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtree\fP (\fITree\fP) \-\- The Tree to be collapsed +.IP \(bu 2 +\fBcollapsePOS\fP (\fIbool\fP) \-\- \(aqFalse\(aq (default) will not collapse the parent of leaf nodes (ie. +Part\-of\-Speech tags) since they are always unary productions +.IP \(bu 2 +\fBcollapseRoot\fP (\fIbool\fP) \-\- \(aqFalse\(aq (default) will not modify the root production +if it is unary. For the Penn WSJ treebank corpus, this corresponds +to the TOP \-> productions. +.IP \(bu 2 +\fBjoinChar\fP (\fIstr\fP) \-\- A string used to connect collapsed node values (default = "+") +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.treetransforms.un_chomsky_normal_form(tree, expandUnary=True, childChar=\(aq|\(aq, parentChar=\(aq^\(aq, unaryChar=\(aq+\(aq) +.UNINDENT +.SS nltk.util module +.INDENT 0.0 +.TP +.B class nltk.util.Index(pairs) +Bases: \fBcollections.defaultdict\fP +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.acyclic_branches_depth_first(tree, children=, depth=\-1, cut_mark=None, traversed=None) +Traverse the nodes of a tree in depth\-first order, +discarding eventual cycles within the same branch, +but keep duplicate paths in different branches. +Add cut_mark (when defined) if cycles were truncated. +.sp +The first argument should be the tree root; +children should be a function taking as argument a tree node +and returning an iterator of the node\(aqs children. +.sp +Catches only only cycles within the same branch, +but keeping cycles from different branches: +.sp +.nf +.ft C +>>> import nltk +>>> from nltk.util import acyclic_branches_depth_first as tree +>>> wn=nltk.corpus.wordnet +>>> from pprint import pprint +>>> pprint(tree(wn.synset(\(aqcertified.a.01\(aq), lambda s:s.also_sees(), cut_mark=\(aq...\(aq, depth=4)) +[Synset(\(aqcertified.a.01\(aq), + [Synset(\(aqauthorized.a.01\(aq), + [Synset(\(aqlawful.a.01\(aq), + [Synset(\(aqlegal.a.01\(aq), + "Cycle(Synset(\(aqlawful.a.01\(aq),0,...)", + [Synset(\(aqlegitimate.a.01\(aq), \(aq...\(aq]], + [Synset(\(aqstraight.a.06\(aq), + [Synset(\(aqhonest.a.01\(aq), \(aq...\(aq], + "Cycle(Synset(\(aqlawful.a.01\(aq),0,...)"]], + [Synset(\(aqlegitimate.a.01\(aq), + "Cycle(Synset(\(aqauthorized.a.01\(aq),1,...)", + [Synset(\(aqlegal.a.01\(aq), + [Synset(\(aqlawful.a.01\(aq), \(aq...\(aq], + "Cycle(Synset(\(aqlegitimate.a.01\(aq),0,...)"], + [Synset(\(aqvalid.a.01\(aq), + "Cycle(Synset(\(aqlegitimate.a.01\(aq),0,...)", + [Synset(\(aqreasonable.a.01\(aq), \(aq...\(aq]]], + [Synset(\(aqofficial.a.01\(aq), "Cycle(Synset(\(aqauthorized.a.01\(aq),1,...)"]], + [Synset(\(aqdocumented.a.01\(aq)]] +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.acyclic_breadth_first(tree, children=, maxdepth=\-1) +Traverse the nodes of a tree in breadth\-first order, +discarding eventual cycles. +.sp +The first argument should be the tree root; +children should be a function taking as argument a tree node +and returning an iterator of the node\(aqs children. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.acyclic_depth_first(tree, children=, depth=\-1, cut_mark=None, traversed=None) +Traverse the nodes of a tree in depth\-first order, +discarding eventual cycles within any branch, +adding cut_mark (when specified) if cycles were truncated. +.sp +The first argument should be the tree root; +children should be a function taking as argument a tree node +and returning an iterator of the node\(aqs children. +.sp +Catches all cycles: +.sp +.nf +.ft C +>>> import nltk +>>> from nltk.util import acyclic_depth_first as acyclic_tree +>>> wn=nltk.corpus.wordnet +>>> from pprint import pprint +>>> pprint(acyclic_tree(wn.synset(\(aqdog.n.01\(aq), lambda s:s.hypernyms(),cut_mark=\(aq...\(aq)) +[Synset(\(aqdog.n.01\(aq), + [Synset(\(aqcanine.n.02\(aq), + [Synset(\(aqcarnivore.n.01\(aq), + [Synset(\(aqplacental.n.01\(aq), + [Synset(\(aqmammal.n.01\(aq), + [Synset(\(aqvertebrate.n.01\(aq), + [Synset(\(aqchordate.n.01\(aq), + [Synset(\(aqanimal.n.01\(aq), + [Synset(\(aqorganism.n.01\(aq), + [Synset(\(aqliving_thing.n.01\(aq), + [Synset(\(aqwhole.n.02\(aq), + [Synset(\(aqobject.n.01\(aq), + [Synset(\(aqphysical_entity.n.01\(aq), + [Synset(\(aqentity.n.01\(aq)]]]]]]]]]]]]], + [Synset(\(aqdomestic_animal.n.01\(aq), "Cycle(Synset(\(aqanimal.n.01\(aq),\-3,...)"]] +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.acyclic_dic2tree(node, dic) +Convert acyclic dictionary \(aqdic\(aq, where the keys are nodes, and the +values are lists of children, to output tree suitable for pprint(), +starting at root \(aqnode\(aq, with subtrees as nested lists. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.bigrams(sequence, **kwargs) +Return the bigrams generated from a sequence of items, as an iterator. +For example: +.sp +.nf +.ft C +>>> from nltk.util import bigrams +>>> list(bigrams([1,2,3,4,5])) +[(1, 2), (2, 3), (3, 4), (4, 5)] +.ft P +.fi +.sp +Use bigrams for a list version of this function. +.INDENT 7.0 +.TP +.B Parameters +\fBsequence\fP (\fIsequence\fP\fI or \fP\fIiter\fP) \-\- the source data to be converted into bigrams +.TP +.B Return type +iter(tuple) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.binary_search_file(file, key, cache={}, cacheDepth=\- 1) +Return the line from the file with first word key. +Searches through a sorted file using the binary search algorithm. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBfile\fP (\fIfile\fP) \-\- the file to be searched through. +.IP \(bu 2 +\fBkey\fP (\fIstr\fP) \-\- the identifier we are searching for. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.breadth_first(tree, children=, maxdepth=\-1) +Traverse the nodes of a tree in breadth\-first order. +(No check for cycles.) +The first argument should be the tree root; +children should be a function taking as argument a tree node +and returning an iterator of the node\(aqs children. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.choose(n, k) +This function is a fast way to calculate binomial coefficients, commonly +known as nCk, i.e. the number of combinations of n things taken k at a time. +(\fI\%https://en.wikipedia.org/wiki/Binomial_coefficient\fP). +.sp +This is the \fIscipy.special.comb()\fP with long integer computation but this +approximation is faster, see \fI\%https://github.com/nltk/nltk/issues/1181\fP +.sp +.nf +.ft C +>>> choose(4, 2) +6 +>>> choose(6, 2) +15 +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBn\fP (\fIint\fP) \-\- The number of things. +.IP \(bu 2 +\fBr\fP (\fIint\fP) \-\- The number of times a thing is taken. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.clean_html(html) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.clean_url(url) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.edge_closure(tree, children=, maxdepth=\-1, verbose=False) +Yield the edges of a graph in breadth\-first order, +discarding eventual cycles. +The first argument should be the start node; +children should be a function taking as argument a graph node +and returning an iterator of the node\(aqs children. +.sp +.nf +.ft C +>>> from nltk.util import edge_closure +>>> print(list(edge_closure(\(aqA\(aq, lambda node:{\(aqA\(aq:[\(aqB\(aq,\(aqC\(aq], \(aqB\(aq:\(aqC\(aq, \(aqC\(aq:\(aqB\(aq}[node]))) +[(\(aqA\(aq, \(aqB\(aq), (\(aqA\(aq, \(aqC\(aq), (\(aqB\(aq, \(aqC\(aq), (\(aqC\(aq, \(aqB\(aq)] +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.edges2dot(edges, shapes=None, attr=None) +.INDENT 7.0 +.TP +.B Parameters +\fBedges\fP \-\- the set (or list) of edges of a directed graph. +.TP +.B Return dot_string +a representation of \(aqedges\(aq as a string in the DOT +.UNINDENT +.sp +graph language, which can be converted to an image by the \(aqdot\(aq program +from the Graphviz package, or nltk.parse.dependencygraph.dot2img(dot_string). +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBshapes\fP \-\- dictionary of strings that trigger a specified shape. +.IP \(bu 2 +\fBattr\fP \-\- dictionary with global graph attributes +.UNINDENT +.UNINDENT +.sp +.nf +.ft C +>>> import nltk +>>> from nltk.util import edges2dot +>>> print(edges2dot([(\(aqA\(aq, \(aqB\(aq), (\(aqA\(aq, \(aqC\(aq), (\(aqB\(aq, \(aqC\(aq), (\(aqC\(aq, \(aqB\(aq)])) +digraph G { +"A" \-> "B"; +"A" \-> "C"; +"B" \-> "C"; +"C" \-> "B"; +} +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.elementtree_indent(elem, level=0) +Recursive function to indent an ElementTree._ElementInterface +used for pretty printing. Run indent on elem and then output +in the normal way. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBelem\fP (\fIElementTree._ElementInterface\fP) \-\- element to be indented. will be modified. +.IP \(bu 2 +\fBlevel\fP (\fInonnegative integer\fP) \-\- level of indentation for this element +.UNINDENT +.TP +.B Return type +ElementTree._ElementInterface +.TP +.B Returns +Contents of elem indented to reflect its structure +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.everygrams(sequence, min_len=1, max_len=\- 1, pad_left=False, pad_right=False, **kwargs) +Returns all possible ngrams generated from a sequence of items, as an iterator. +.sp +.nf +.ft C +>>> sent = \(aqa b c\(aq.split() +.ft P +.fi +.INDENT 7.0 +.TP +.B New version outputs for everygrams. +.sp +.nf +.ft C +>>> list(everygrams(sent)) +[(\(aqa\(aq,), (\(aqa\(aq, \(aqb\(aq), (\(aqa\(aq, \(aqb\(aq, \(aqc\(aq), (\(aqb\(aq,), (\(aqb\(aq, \(aqc\(aq), (\(aqc\(aq,)] +.ft P +.fi +.TP +.B Old version outputs for everygrams. +.sp +.nf +.ft C +>>> sorted(everygrams(sent), key=len) +[(\(aqa\(aq,), (\(aqb\(aq,), (\(aqc\(aq,), (\(aqa\(aq, \(aqb\(aq), (\(aqb\(aq, \(aqc\(aq), (\(aqa\(aq, \(aqb\(aq, \(aqc\(aq)] +.ft P +.fi +.sp +.nf +.ft C +>>> list(everygrams(sent, max_len=2)) +[(\(aqa\(aq,), (\(aqa\(aq, \(aqb\(aq), (\(aqb\(aq,), (\(aqb\(aq, \(aqc\(aq), (\(aqc\(aq,)] +.ft P +.fi +.UNINDENT +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsequence\fP (\fIsequence\fP\fI or \fP\fIiter\fP) \-\- the source data to be converted into ngrams. If max_len is +not provided, this sequence will be loaded into memory +.IP \(bu 2 +\fBmin_len\fP (\fIint\fP) \-\- minimum length of the ngrams, aka. n\-gram order/degree of ngram +.IP \(bu 2 +\fBmax_len\fP (\fIint\fP) \-\- maximum length of the ngrams (set to length of sequence by default) +.IP \(bu 2 +\fBpad_left\fP (\fIbool\fP) \-\- whether the ngrams should be left\-padded +.IP \(bu 2 +\fBpad_right\fP (\fIbool\fP) \-\- whether the ngrams should be right\-padded +.UNINDENT +.TP +.B Return type +iter(tuple) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.filestring(f) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.flatten(*args) +Flatten a list. +.sp +.nf +.ft C +>>> from nltk.util import flatten +>>> flatten(1, 2, [\(aqb\(aq, \(aqa\(aq , [\(aqc\(aq, \(aqd\(aq]], 3) +[1, 2, \(aqb\(aq, \(aqa\(aq, \(aqc\(aq, \(aqd\(aq, 3] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +\fBargs\fP \-\- items and lists to be combined into a single list +.TP +.B Return type +list +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.guess_encoding(data) +Given a byte string, attempt to decode it. +Tries the standard \(aqUTF8\(aq and \(aqlatin\-1\(aq encodings, +Plus several gathered from locale information. +.sp +The calling program \fImust\fP first call: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +locale.setlocale(locale.LC_ALL, \(aq\(aq) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +If successful it returns \fB(decoded_unicode, successful_encoding)\fP\&. +If unsuccessful it raises a \fBUnicodeError\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.in_idle() +Return True if this function is run within idle. Tkinter +programs that are run in idle should never call \fBTk.mainloop\fP; so +this function should be used to gate all calls to \fBTk.mainloop\fP\&. +.INDENT 7.0 +.TP +.B Warning +This function works by checking \fBsys.stdin\fP\&. If the +user has modified \fBsys.stdin\fP, then it may return incorrect +results. +.TP +.B Return type +bool +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.invert_dict(d) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.invert_graph(graph) +Inverts a directed graph. +.INDENT 7.0 +.TP +.B Parameters +\fBgraph\fP (\fIdict\fP\fI(\fP\fIset\fP\fI)\fP) \-\- the graph, represented as a dictionary of sets +.TP +.B Returns +the inverted graph +.TP +.B Return type +dict(set) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.ngrams(sequence, n, **kwargs) +Return the ngrams generated from a sequence of items, as an iterator. +For example: +.sp +.nf +.ft C +>>> from nltk.util import ngrams +>>> list(ngrams([1,2,3,4,5], 3)) +[(1, 2, 3), (2, 3, 4), (3, 4, 5)] +.ft P +.fi +.sp +Wrap with list for a list version of this function. Set pad_left +or pad_right to true in order to get additional ngrams: +.sp +.nf +.ft C +>>> list(ngrams([1,2,3,4,5], 2, pad_right=True)) +[(1, 2), (2, 3), (3, 4), (4, 5), (5, None)] +>>> list(ngrams([1,2,3,4,5], 2, pad_right=True, right_pad_symbol=\(aq\(aq)) +[(1, 2), (2, 3), (3, 4), (4, 5), (5, \(aq\(aq)] +>>> list(ngrams([1,2,3,4,5], 2, pad_left=True, left_pad_symbol=\(aq\(aq)) +[(\(aq\(aq, 1), (1, 2), (2, 3), (3, 4), (4, 5)] +>>> list(ngrams([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol=\(aq\(aq, right_pad_symbol=\(aq\(aq)) +[(\(aq\(aq, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, \(aq\(aq)] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsequence\fP (\fIsequence\fP\fI or \fP\fIiter\fP) \-\- the source data to be converted into ngrams +.IP \(bu 2 +\fBn\fP (\fIint\fP) \-\- the degree of the ngrams +.IP \(bu 2 +\fBpad_left\fP (\fIbool\fP) \-\- whether the ngrams should be left\-padded +.IP \(bu 2 +\fBpad_right\fP (\fIbool\fP) \-\- whether the ngrams should be right\-padded +.IP \(bu 2 +\fBleft_pad_symbol\fP (\fIany\fP) \-\- the symbol to use for left padding (default is None) +.IP \(bu 2 +\fBright_pad_symbol\fP (\fIany\fP) \-\- the symbol to use for right padding (default is None) +.UNINDENT +.TP +.B Return type +sequence or iter +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.pad_sequence(sequence, n, pad_left=False, pad_right=False, left_pad_symbol=None, right_pad_symbol=None) +Returns a padded sequence of items before ngram extraction. +.sp +.nf +.ft C +>>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol=\(aq\(aq, right_pad_symbol=\(aq\(aq)) +[\(aq\(aq, 1, 2, 3, 4, 5, \(aq\(aq] +>>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, left_pad_symbol=\(aq\(aq)) +[\(aq\(aq, 1, 2, 3, 4, 5] +>>> list(pad_sequence([1,2,3,4,5], 2, pad_right=True, right_pad_symbol=\(aq\(aq)) +[1, 2, 3, 4, 5, \(aq\(aq] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsequence\fP (\fIsequence\fP\fI or \fP\fIiter\fP) \-\- the source data to be padded +.IP \(bu 2 +\fBn\fP (\fIint\fP) \-\- the degree of the ngrams +.IP \(bu 2 +\fBpad_left\fP (\fIbool\fP) \-\- whether the ngrams should be left\-padded +.IP \(bu 2 +\fBpad_right\fP (\fIbool\fP) \-\- whether the ngrams should be right\-padded +.IP \(bu 2 +\fBleft_pad_symbol\fP (\fIany\fP) \-\- the symbol to use for left padding (default is None) +.IP \(bu 2 +\fBright_pad_symbol\fP (\fIany\fP) \-\- the symbol to use for right padding (default is None) +.UNINDENT +.TP +.B Return type +sequence or iter +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.pairwise(iterable) +s \-> (s0,s1), (s1,s2), (s2, s3), ... +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.parallelize_preprocess(func, iterator, processes, progress_bar=False) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.pr(data, start=0, end=None) +Pretty print a sequence of data items +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBdata\fP (\fIsequence\fP\fI or \fP\fIiter\fP) \-\- the data stream to print +.IP \(bu 2 +\fBstart\fP (\fIint\fP) \-\- the start position +.IP \(bu 2 +\fBend\fP (\fIint\fP) \-\- the end position +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.print_string(s, width=70) +Pretty print a string, breaking lines on whitespace +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBs\fP (\fIstr\fP) \-\- the string to print, consisting of words and spaces +.IP \(bu 2 +\fBwidth\fP (\fIint\fP) \-\- the display width +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.re_show(regexp, string, left=\(aq{\(aq, right=\(aq}\(aq) +Return a string with markers surrounding the matched substrings. +Search str for substrings matching \fBregexp\fP and wrap the matches +with braces. This is convenient for learning about regular expressions. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBregexp\fP (\fIstr\fP) \-\- The regular expression. +.IP \(bu 2 +\fBstring\fP (\fIstr\fP) \-\- The string being matched. +.IP \(bu 2 +\fBleft\fP (\fIstr\fP) \-\- The left delimiter (printed before the matched substring) +.IP \(bu 2 +\fBright\fP (\fIstr\fP) \-\- The right delimiter (printed after the matched substring) +.UNINDENT +.TP +.B Return type +str +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.set_proxy(proxy, user=None, password=\(aq\(aq) +Set the HTTP proxy for Python to download through. +.sp +If \fBproxy\fP is None then tries to set proxy from environment or system +settings. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBproxy\fP \-\- The HTTP proxy server to use. For example: +\(aq\fI\%http://proxy.example.com:3128/\fP\(aq +.IP \(bu 2 +\fBuser\fP \-\- The username to authenticate with. Use None to disable +authentication. +.IP \(bu 2 +\fBpassword\fP \-\- The password to authenticate with. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.skipgrams(sequence, n, k, **kwargs) +Returns all possible skipgrams generated from a sequence of items, as an iterator. +Skipgrams are ngrams that allows tokens to be skipped. +Refer to \fI\%http://homepages.inf.ed.ac.uk/ballison/pdf/lrec_skipgrams.pdf\fP +.sp +.nf +.ft C +>>> sent = "Insurgents killed in ongoing fighting".split() +>>> list(skipgrams(sent, 2, 2)) +[(\(aqInsurgents\(aq, \(aqkilled\(aq), (\(aqInsurgents\(aq, \(aqin\(aq), (\(aqInsurgents\(aq, \(aqongoing\(aq), (\(aqkilled\(aq, \(aqin\(aq), (\(aqkilled\(aq, \(aqongoing\(aq), (\(aqkilled\(aq, \(aqfighting\(aq), (\(aqin\(aq, \(aqongoing\(aq), (\(aqin\(aq, \(aqfighting\(aq), (\(aqongoing\(aq, \(aqfighting\(aq)] +>>> list(skipgrams(sent, 3, 2)) +[(\(aqInsurgents\(aq, \(aqkilled\(aq, \(aqin\(aq), (\(aqInsurgents\(aq, \(aqkilled\(aq, \(aqongoing\(aq), (\(aqInsurgents\(aq, \(aqkilled\(aq, \(aqfighting\(aq), (\(aqInsurgents\(aq, \(aqin\(aq, \(aqongoing\(aq), (\(aqInsurgents\(aq, \(aqin\(aq, \(aqfighting\(aq), (\(aqInsurgents\(aq, \(aqongoing\(aq, \(aqfighting\(aq), (\(aqkilled\(aq, \(aqin\(aq, \(aqongoing\(aq), (\(aqkilled\(aq, \(aqin\(aq, \(aqfighting\(aq), (\(aqkilled\(aq, \(aqongoing\(aq, \(aqfighting\(aq), (\(aqin\(aq, \(aqongoing\(aq, \(aqfighting\(aq)] +.ft P +.fi +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBsequence\fP (\fIsequence\fP\fI or \fP\fIiter\fP) \-\- the source data to be converted into trigrams +.IP \(bu 2 +\fBn\fP (\fIint\fP) \-\- the degree of the ngrams +.IP \(bu 2 +\fBk\fP (\fIint\fP) \-\- the skip distance +.UNINDENT +.TP +.B Return type +iter(tuple) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.tokenwrap(tokens, separator=\(aq \(aq, width=70) +Pretty print a list of text tokens, breaking lines on whitespace +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBtokens\fP (\fIlist\fP) \-\- the tokens to print +.IP \(bu 2 +\fBseparator\fP (\fIstr\fP) \-\- the string to use to separate tokens +.IP \(bu 2 +\fBwidth\fP (\fIint\fP) \-\- the display width (default=70) +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.transitive_closure(graph, reflexive=False) +Calculate the transitive closure of a directed graph, +optionally the reflexive transitive closure. +.sp +The algorithm is a slight modification of the "Marking Algorithm" of +Ioannidis & Ramakrishnan (1998) "Efficient Transitive Closure Algorithms". +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBgraph\fP (\fIdict\fP\fI(\fP\fIset\fP\fI)\fP) \-\- the initial graph, represented as a dictionary of sets +.IP \(bu 2 +\fBreflexive\fP (\fIbool\fP) \-\- if set, also make the closure reflexive +.UNINDENT +.TP +.B Return type +dict(set) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.trigrams(sequence, **kwargs) +Return the trigrams generated from a sequence of items, as an iterator. +For example: +.sp +.nf +.ft C +>>> from nltk.util import trigrams +>>> list(trigrams([1,2,3,4,5])) +[(1, 2, 3), (2, 3, 4), (3, 4, 5)] +.ft P +.fi +.sp +Use trigrams for a list version of this function. +.INDENT 7.0 +.TP +.B Parameters +\fBsequence\fP (\fIsequence\fP\fI or \fP\fIiter\fP) \-\- the source data to be converted into trigrams +.TP +.B Return type +iter(tuple) +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.unique_list(xs) +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.unweighted_minimum_spanning_dict(tree, children=) +Output a dictionary representing a Minimum Spanning Tree (MST) +of an unweighted graph, by traversing the nodes of a tree in +breadth\-first order, discarding eventual cycles. +.sp +The first argument should be the tree root; +children should be a function taking as argument a tree node +and returning an iterator of the node\(aqs children. +.sp +.nf +.ft C +>>> import nltk +>>> from nltk.corpus import wordnet as wn +>>> from nltk.util import unweighted_minimum_spanning_dict as umsd +>>> from pprint import pprint +>>> pprint(umsd(wn.synset(\(aqbound.a.01\(aq), lambda s:s.also_sees())) +{Synset(\(aqbound.a.01\(aq): [Synset(\(aqunfree.a.02\(aq)], + Synset(\(aqclassified.a.02\(aq): [], + Synset(\(aqconfined.a.02\(aq): [], + Synset(\(aqdependent.a.01\(aq): [], + Synset(\(aqrestricted.a.01\(aq): [Synset(\(aqclassified.a.02\(aq)], + Synset(\(aqunfree.a.02\(aq): [Synset(\(aqconfined.a.02\(aq), + Synset(\(aqdependent.a.01\(aq), + Synset(\(aqrestricted.a.01\(aq)]} +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.unweighted_minimum_spanning_digraph(tree, children=, shapes=None, attr=None) +Build a Minimum Spanning Tree (MST) of an unweighted graph, +by traversing the nodes of a tree in breadth\-first order, +discarding eventual cycles. +.sp +Return a representation of this MST as a string in the DOT graph language, +which can be converted to an image by the \(aqdot\(aq program from the Graphviz +package, or nltk.parse.dependencygraph.dot2img(dot_string). +.sp +The first argument should be the tree root; +children should be a function taking as argument a tree node +and returning an iterator of the node\(aqs children. +.sp +.nf +.ft C +>>> import nltk +>>> wn=nltk.corpus.wordnet +>>> from nltk.util import unweighted_minimum_spanning_digraph as umsd +>>> print(umsd(wn.synset(\(aqbound.a.01\(aq), lambda s:s.also_sees())) +digraph G { +"Synset(\(aqbound.a.01\(aq)" \-> "Synset(\(aqunfree.a.02\(aq)"; +"Synset(\(aqunfree.a.02\(aq)" \-> "Synset(\(aqconfined.a.02\(aq)"; +"Synset(\(aqunfree.a.02\(aq)" \-> "Synset(\(aqdependent.a.01\(aq)"; +"Synset(\(aqunfree.a.02\(aq)" \-> "Synset(\(aqrestricted.a.01\(aq)"; +"Synset(\(aqrestricted.a.01\(aq)" \-> "Synset(\(aqclassified.a.02\(aq)"; +} +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.unweighted_minimum_spanning_tree(tree, children=) +Output a Minimum Spanning Tree (MST) of an unweighted graph, +by traversing the nodes of a tree in breadth\-first order, +discarding eventual cycles. +.sp +The first argument should be the tree root; +children should be a function taking as argument a tree node +and returning an iterator of the node\(aqs children. +.sp +.nf +.ft C +>>> import nltk +>>> from nltk.util import unweighted_minimum_spanning_tree as mst +>>> wn=nltk.corpus.wordnet +>>> from pprint import pprint +>>> pprint(mst(wn.synset(\(aqbound.a.01\(aq), lambda s:s.also_sees())) +[Synset(\(aqbound.a.01\(aq), + [Synset(\(aqunfree.a.02\(aq), + [Synset(\(aqconfined.a.02\(aq)], + [Synset(\(aqdependent.a.01\(aq)], + [Synset(\(aqrestricted.a.01\(aq), [Synset(\(aqclassified.a.02\(aq)]]]] +.ft P +.fi +.UNINDENT +.INDENT 0.0 +.TP +.B nltk.util.usage(obj) +.UNINDENT +.SS nltk.wsd module +.INDENT 0.0 +.TP +.B nltk.wsd.lesk(context_sentence, ambiguous_word, pos=None, synsets=None) +Return a synset for an ambiguous word in a context. +.INDENT 7.0 +.TP +.B Parameters +.INDENT 7.0 +.IP \(bu 2 +\fBcontext_sentence\fP (\fIiter\fP) \-\- The context sentence where the ambiguous word +occurs, passed as an iterable of words. +.IP \(bu 2 +\fBambiguous_word\fP (\fIstr\fP) \-\- The ambiguous word that requires WSD. +.IP \(bu 2 +\fBpos\fP (\fIstr\fP) \-\- A specified Part\-of\-Speech (POS). +.IP \(bu 2 +\fBsynsets\fP (\fIiter\fP) \-\- Possible synsets of the ambiguous word. +.UNINDENT +.TP +.B Returns +\fBlesk_sense\fP The Synset() object with the highest signature overlaps. +.UNINDENT +.sp +This function is an implementation of the original Lesk algorithm (1986) [1]. +.sp +Usage example: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +>>> lesk([\(aqI\(aq, \(aqwent\(aq, \(aqto\(aq, \(aqthe\(aq, \(aqbank\(aq, \(aqto\(aq, \(aqdeposit\(aq, \(aqmoney\(aq, \(aq.\(aq], \(aqbank\(aq, \(aqn\(aq) +Synset(\(aqsavings_bank.n.02\(aq) +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +[1] Lesk, Michael. "Automatic sense disambiguation using machine +readable dictionaries: how to tell a pine cone from an ice cream +cone." Proceedings of the 5th Annual International Conference on +Systems Documentation. ACM, 1986. +\fI\%http://dl.acm.org/citation.cfm?id=318728\fP +.UNINDENT +.SS Module contents +.sp +The Natural Language Toolkit (NLTK) is an open source Python library +for Natural Language Processing. A free online book is available. +(If you use the library for academic research, please cite the book.) +.sp +Steven Bird, Ewan Klein, and Edward Loper (2009). +Natural Language Processing with Python. O\(aqReilly Media Inc. +\fI\%http://nltk.org/book\fP +.sp +isort:skip_file +.sp +@version: 3.6.2 +.INDENT 0.0 +.TP +.B nltk.demo() +.UNINDENT +.INDENT 0.0 +.IP \(bu 2 +genindex +.IP \(bu 2 +modindex +.IP \(bu 2 +search +.UNINDENT +.SH AUTHOR +Steven Bird +.SH COPYRIGHT +2021, NLTK Project +.\" Generated by docutils manpage writer. +. diff --git a/team.html b/team.html index 869914a41..09ca3bf22 100644 --- a/team.html +++ b/team.html @@ -69,7 +69,7 @@

Documentation

NLTK Documentation