From facc59bc607025b4669aea4c59957c5eb8d27a04 Mon Sep 17 00:00:00 2001 From: Tom Aarsen Date: Mon, 11 Oct 2021 11:42:53 +0200 Subject: [PATCH 1/5] Replaced http with https in specific places --- CONTRIBUTING.md | 18 ++-- ChangeLog | 10 +- INSTALL.txt | 2 +- Makefile | 2 +- README.md | 4 +- nltk/__init__.py | 6 +- nltk/app/__init__.py | 2 +- nltk/app/chartparser_app.py | 2 +- nltk/app/chunkparser_app.py | 2 +- nltk/app/collocations_app.py | 2 +- nltk/app/concordance_app.py | 2 +- nltk/app/nemo_app.py | 2 +- nltk/app/rdparser_app.py | 2 +- nltk/app/srparser_app.py | 2 +- nltk/app/wordfreq_app.py | 2 +- nltk/app/wordnet_app.py | 24 ++--- nltk/book.py | 2 +- nltk/ccg/__init__.py | 2 +- nltk/ccg/api.py | 2 +- nltk/ccg/chart.py | 2 +- nltk/ccg/combinator.py | 2 +- nltk/ccg/lexicon.py | 2 +- nltk/ccg/logic.py | 2 +- nltk/chat/__init__.py | 2 +- nltk/chat/eliza.py | 2 +- nltk/chat/iesha.py | 2 +- nltk/chat/rude.py | 2 +- nltk/chat/suntsu.py | 4 +- nltk/chat/util.py | 2 +- nltk/chat/zen.py | 2 +- nltk/chunk/__init__.py | 2 +- nltk/chunk/api.py | 2 +- nltk/chunk/named_entity.py | 2 +- nltk/chunk/regexp.py | 2 +- nltk/chunk/util.py | 2 +- nltk/classify/__init__.py | 2 +- nltk/classify/api.py | 2 +- nltk/classify/decisiontree.py | 2 +- nltk/classify/maxent.py | 4 +- nltk/classify/megam.py | 6 +- nltk/classify/naivebayes.py | 2 +- nltk/classify/positivenaivebayes.py | 2 +- nltk/classify/rte_classify.py | 2 +- nltk/classify/scikitlearn.py | 4 +- nltk/classify/senna.py | 4 +- nltk/classify/svm.py | 4 +- nltk/classify/tadm.py | 6 +- nltk/classify/textcat.py | 6 +- nltk/classify/util.py | 2 +- nltk/classify/weka.py | 4 +- nltk/cli.py | 2 +- nltk/cluster/__init__.py | 2 +- nltk/cluster/api.py | 2 +- nltk/cluster/em.py | 2 +- nltk/cluster/gaac.py | 2 +- nltk/cluster/kmeans.py | 2 +- nltk/cluster/util.py | 2 +- nltk/collections.py | 2 +- nltk/collocations.py | 6 +- nltk/compat.py | 2 +- nltk/corpus/__init__.py | 4 +- nltk/corpus/europarl_raw.py | 2 +- nltk/corpus/reader/__init__.py | 2 +- nltk/corpus/reader/aligned.py | 2 +- nltk/corpus/reader/api.py | 2 +- nltk/corpus/reader/bnc.py | 4 +- nltk/corpus/reader/bracket_parse.py | 2 +- nltk/corpus/reader/categorized_sents.py | 6 +- nltk/corpus/reader/chasen.py | 4 +- nltk/corpus/reader/childes.py | 4 +- nltk/corpus/reader/chunked.py | 2 +- nltk/corpus/reader/cmudict.py | 2 +- nltk/corpus/reader/comparative_sents.py | 4 +- nltk/corpus/reader/conll.py | 2 +- nltk/corpus/reader/crubadan.py | 4 +- nltk/corpus/reader/dependency.py | 2 +- nltk/corpus/reader/framenet.py | 4 +- nltk/corpus/reader/ieer.py | 4 +- nltk/corpus/reader/indian.py | 2 +- nltk/corpus/reader/ipipan.py | 4 +- nltk/corpus/reader/knbc.py | 2 +- nltk/corpus/reader/lin.py | 2 +- nltk/corpus/reader/mte.py | 8 +- nltk/corpus/reader/nkjp.py | 2 +- nltk/corpus/reader/nombank.py | 2 +- nltk/corpus/reader/nps_chat.py | 2 +- nltk/corpus/reader/opinion_lexicon.py | 4 +- nltk/corpus/reader/panlex_lite.py | 2 +- nltk/corpus/reader/panlex_swadesh.py | 2 +- nltk/corpus/reader/pl196x.py | 2 +- nltk/corpus/reader/plaintext.py | 2 +- nltk/corpus/reader/ppattach.py | 4 +- nltk/corpus/reader/propbank.py | 2 +- nltk/corpus/reader/pros_cons.py | 4 +- nltk/corpus/reader/reviews.py | 4 +- nltk/corpus/reader/rte.py | 2 +- nltk/corpus/reader/semcor.py | 4 +- nltk/corpus/reader/senseval.py | 8 +- nltk/corpus/reader/sentiwordnet.py | 4 +- nltk/corpus/reader/sinica_treebank.py | 6 +- nltk/corpus/reader/string_category.py | 2 +- nltk/corpus/reader/switchboard.py | 2 +- nltk/corpus/reader/tagged.py | 2 +- nltk/corpus/reader/timit.py | 2 +- nltk/corpus/reader/toolbox.py | 2 +- nltk/corpus/reader/twitter.py | 2 +- nltk/corpus/reader/util.py | 2 +- nltk/corpus/reader/verbnet.py | 2 +- nltk/corpus/reader/wordlist.py | 14 +-- nltk/corpus/reader/wordnet.py | 8 +- nltk/corpus/reader/xmldocs.py | 2 +- nltk/corpus/reader/ycoe.py | 6 +- nltk/corpus/util.py | 4 +- nltk/data.py | 10 +- nltk/downloader.py | 2 +- nltk/draw/__init__.py | 2 +- nltk/draw/cfg.py | 2 +- nltk/draw/dispersion.py | 4 +- nltk/draw/table.py | 4 +- nltk/draw/tree.py | 2 +- nltk/draw/util.py | 4 +- nltk/featstruct.py | 2 +- nltk/grammar.py | 2 +- nltk/help.py | 2 +- nltk/inference/__init__.py | 2 +- nltk/inference/api.py | 2 +- nltk/inference/discourse.py | 4 +- nltk/inference/mace.py | 4 +- nltk/inference/nonmonotonic.py | 2 +- nltk/inference/prover9.py | 6 +- nltk/inference/resolution.py | 2 +- nltk/inference/tableau.py | 2 +- nltk/internals.py | 2 +- nltk/jsontags.py | 2 +- nltk/lazyimport.py | 2 +- nltk/lm/__init__.py | 2 +- nltk/lm/api.py | 2 +- nltk/lm/counter.py | 2 +- nltk/lm/models.py | 2 +- nltk/lm/preprocessing.py | 2 +- nltk/lm/smoothing.py | 2 +- nltk/lm/util.py | 2 +- nltk/lm/vocabulary.py | 2 +- nltk/metrics/__init__.py | 2 +- nltk/metrics/agreement.py | 2 +- nltk/metrics/aline.py | 4 +- nltk/metrics/association.py | 2 +- nltk/metrics/confusionmatrix.py | 2 +- nltk/metrics/distance.py | 2 +- nltk/metrics/paice.py | 4 +- nltk/metrics/scores.py | 2 +- nltk/metrics/segmentation.py | 4 +- nltk/metrics/spearman.py | 2 +- nltk/misc/__init__.py | 2 +- nltk/misc/chomsky.py | 2 +- nltk/misc/minimalset.py | 2 +- nltk/misc/sort.py | 2 +- nltk/misc/wordfinder.py | 2 +- nltk/parse/__init__.py | 2 +- nltk/parse/api.py | 2 +- nltk/parse/bllip.py | 4 +- nltk/parse/chart.py | 2 +- nltk/parse/corenlp.py | 18 ++-- nltk/parse/dependencygraph.py | 4 +- nltk/parse/earleychart.py | 2 +- nltk/parse/evaluate.py | 2 +- nltk/parse/featurechart.py | 2 +- nltk/parse/generate.py | 2 +- nltk/parse/malt.py | 6 +- nltk/parse/nonprojectivedependencyparser.py | 2 +- nltk/parse/pchart.py | 2 +- nltk/parse/projectivedependencyparser.py | 2 +- nltk/parse/recursivedescent.py | 2 +- nltk/parse/shiftreduce.py | 2 +- nltk/parse/stanford.py | 2 +- nltk/parse/transitionparser.py | 2 +- nltk/parse/util.py | 2 +- nltk/parse/viterbi.py | 2 +- nltk/probability.py | 8 +- nltk/sem/__init__.py | 2 +- nltk/sem/boxer.py | 6 +- nltk/sem/chat80.py | 10 +- nltk/sem/cooper_storage.py | 2 +- nltk/sem/drt.py | 2 +- nltk/sem/drt_glue_demo.py | 2 +- nltk/sem/evaluate.py | 2 +- nltk/sem/glue.py | 2 +- nltk/sem/hole.py | 2 +- nltk/sem/lfg.py | 2 +- nltk/sem/linearlogic.py | 2 +- nltk/sem/logic.py | 2 +- nltk/sem/relextract.py | 2 +- nltk/sem/skolemize.py | 2 +- nltk/sem/util.py | 2 +- nltk/sentiment/__init__.py | 2 +- nltk/sentiment/sentiment_analyzer.py | 2 +- nltk/sentiment/util.py | 4 +- nltk/sentiment/vader.py | 4 +- nltk/stem/__init__.py | 2 +- nltk/stem/api.py | 2 +- nltk/stem/arlstem.py | 2 +- nltk/stem/arlstem2.py | 2 +- nltk/stem/cistem.py | 4 +- nltk/stem/isri.py | 2 +- nltk/stem/lancaster.py | 2 +- nltk/stem/porter.py | 6 +- nltk/stem/regexp.py | 2 +- nltk/stem/rslp.py | 10 +- nltk/stem/snowball.py | 2 +- nltk/stem/util.py | 2 +- nltk/stem/wordnet.py | 2 +- nltk/tag/__init__.py | 2 +- nltk/tag/api.py | 2 +- nltk/tag/brill.py | 4 +- nltk/tag/brill_trainer.py | 2 +- nltk/tag/crf.py | 2 +- nltk/tag/hmm.py | 2 +- nltk/tag/hunpos.py | 4 +- nltk/tag/mapping.py | 4 +- nltk/tag/perceptron.py | 2 +- nltk/tag/senna.py | 2 +- nltk/tag/sequential.py | 2 +- nltk/tag/stanford.py | 2 +- nltk/tag/tnt.py | 4 +- nltk/tag/util.py | 2 +- nltk/tbl/__init__.py | 2 +- nltk/tbl/demo.py | 2 +- nltk/tbl/erroranalysis.py | 2 +- nltk/tbl/feature.py | 2 +- nltk/tbl/rule.py | 2 +- nltk/tbl/template.py | 2 +- nltk/test/__init__.py | 2 +- nltk/test/chat80.doctest | 4 +- nltk/test/childes.doctest | 2 +- nltk/test/corpus.doctest | 2 +- nltk/test/data.doctest | 2 +- nltk/test/featgram.doctest | 4 +- nltk/test/gensim.doctest | 2 +- nltk/test/inference.doctest | 6 +- nltk/test/misc.doctest | 2 +- nltk/test/portuguese.doctest_latin1 | 96 +++++++++---------- nltk/test/portuguese_en.doctest | 4 +- nltk/test/translate.doctest | 4 +- nltk/test/twitter.ipynb | 2 +- nltk/test/unit/lm/test_counter.py | 2 +- nltk/test/unit/lm/test_models.py | 2 +- nltk/test/unit/lm/test_preprocessing.py | 2 +- nltk/test/unit/lm/test_vocabulary.py | 2 +- nltk/test/unit/test_chunk.py | 2 +- nltk/test/unit/test_concordance.py | 2 +- nltk/test/unit/test_disagreement.py | 2 +- nltk/test/unit/test_hmm.py | 2 +- nltk/test/unit/test_json2csv_corpus.py | 2 +- nltk/test/unit/test_stem.py | 8 +- nltk/test/unit/test_tgrep.py | 2 +- nltk/test/unit/translate/test_bleu.py | 2 +- .../test/unit/translate/test_stack_decoder.py | 2 +- nltk/text.py | 2 +- nltk/tgrep.py | 8 +- nltk/tokenize/__init__.py | 2 +- nltk/tokenize/api.py | 2 +- nltk/tokenize/casual.py | 4 +- nltk/tokenize/destructive.py | 2 +- nltk/tokenize/legality_principle.py | 2 +- nltk/tokenize/mwe.py | 2 +- nltk/tokenize/nist.py | 2 +- nltk/tokenize/punkt.py | 2 +- nltk/tokenize/regexp.py | 2 +- nltk/tokenize/repp.py | 4 +- nltk/tokenize/sexpr.py | 2 +- nltk/tokenize/simple.py | 2 +- nltk/tokenize/sonority_sequencing.py | 2 +- nltk/tokenize/stanford.py | 2 +- nltk/tokenize/stanford_segmenter.py | 2 +- nltk/tokenize/texttiling.py | 4 +- nltk/tokenize/toktok.py | 4 +- nltk/tokenize/treebank.py | 6 +- nltk/tokenize/util.py | 4 +- nltk/toolbox.py | 2 +- nltk/translate/__init__.py | 2 +- nltk/translate/api.py | 2 +- nltk/translate/bleu_score.py | 4 +- nltk/translate/chrf_score.py | 8 +- nltk/translate/gale_church.py | 4 +- nltk/translate/gdfa.py | 2 +- nltk/translate/gleu_score.py | 2 +- nltk/translate/ibm1.py | 2 +- nltk/translate/ibm2.py | 2 +- nltk/translate/ibm3.py | 2 +- nltk/translate/ibm4.py | 2 +- nltk/translate/ibm5.py | 2 +- nltk/translate/ibm_model.py | 2 +- nltk/translate/meteor_score.py | 6 +- nltk/translate/metrics.py | 2 +- nltk/translate/nist_score.py | 4 +- nltk/translate/phrase_based.py | 2 +- nltk/translate/ribes_score.py | 6 +- nltk/translate/stack_decoder.py | 2 +- nltk/tree.py | 6 +- nltk/treeprettyprinter.py | 6 +- nltk/treetransforms.py | 4 +- nltk/twitter/__init__.py | 2 +- nltk/twitter/api.py | 2 +- nltk/twitter/common.py | 2 +- nltk/twitter/twitter_demo.py | 4 +- nltk/twitter/twitterclient.py | 4 +- nltk/twitter/util.py | 2 +- nltk/util.py | 12 +-- nltk/wsd.py | 4 +- setup.py | 6 +- tools/find_deprecated.py | 2 +- tools/github_actions/third-party.sh | 2 +- tools/global_replace.py | 4 +- tools/svnmime.py | 2 +- tools/travis/third-party.sh | 2 +- tox.ini | 2 +- web/contribute.rst | 2 +- web/data.rst | 8 +- web/dev/jenkins.rst | 14 +-- web/dev/local_testing.rst | 2 +- web/index.rst | 14 +-- web/install.rst | 8 +- web/news.rst | 30 +++--- web/team.rst | 4 +- 324 files changed, 573 insertions(+), 575 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 36091b3359..e0d667ea80 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ # Contributing to NLTK -Hi! Thanks for your interest in contributing to [NLTK](http://www.nltk.org/). +Hi! Thanks for your interest in contributing to [NLTK](https://www.nltk.org/). :-) You'll be joining a [long list of contributors](https://github.com/nltk/nltk/blob/develop/AUTHORS.md). In this document we'll try to summarize everything that you need to know to do a good job. @@ -33,11 +33,11 @@ Some priority areas for development are listed in the [NLTK Wiki](https://github ### Git -We use [Git](http://git-scm.com/) as our [version control -system](http://en.wikipedia.org/wiki/Revision_control), so the best way to +We use [Git](https://git-scm.com/) as our [version control +system](https://en.wikipedia.org/wiki/Revision_control), so the best way to contribute is to learn how to use it and put your changes on a Git repository. There's a plenty of documentation about Git -- you can start with the [Pro Git -book](http://git-scm.com/book/). +book](https://git-scm.com/book/). ### Setting up a Development Environment @@ -63,7 +63,7 @@ repository [nltk/nltk](https://github.com/nltk/nltk/): ### GitHub Pull requests We use the famous -[gitflow](http://nvie.com/posts/a-successful-git-branching-model/) to manage our +[gitflow](https://nvie.com/posts/a-successful-git-branching-model/) to manage our branches. Summary of our git branching model: @@ -89,7 +89,7 @@ Summary of our git branching model: ### Tips - Write [helpful commit - messages](http://robots.thoughtbot.com/5-useful-tips-for-a-better-commit-message). + messages](https://robots.thoughtbot.com/5-useful-tips-for-a-better-commit-message). - Anything in the `develop` branch should be deployable (no failing tests). - Never use `git add .`: it can add unwanted files; - Avoid using `git commit -a` unless you know what you're doing; @@ -106,16 +106,16 @@ Summary of our git branching model: ## Code Guidelines -- Use [PEP8](http://www.python.org/dev/peps/pep-0008/); +- Use [PEP8](https://www.python.org/dev/peps/pep-0008/); - Write tests for your new features (please see "Tests" topic below); - Always remember that [commented code is dead - code](http://www.codinghorror.com/blog/2008/07/coding-without-comments.html); + code](https://www.codinghorror.com/blog/2008/07/coding-without-comments.html); - Name identifiers (variables, classes, functions, module names) with readable names (`x` is always wrong); - When manipulating strings, we prefer either [f-string formatting](https://docs.python.org/3/tutorial/inputoutput.html#formatted-string-literals) (f`'{a} = {b}'`) or [new-style - formatting](http://docs.python.org/library/string.html#format-string-syntax) + formatting](https://docs.python.org/library/string.html#format-string-syntax) (`'{} = {}'.format(a, b)`), instead of the old-style formatting (`'%s = %s' % (a, b)`); - All `#TODO` comments should be turned into issues (use our [GitHub issue system](https://github.com/nltk/nltk/issues)); diff --git a/ChangeLog b/ChangeLog index 9dff019415..8e4a11569f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -239,7 +239,7 @@ Version 3.2.2 2016-12-31 * rewrite Porter Stemmer * rewrite FrameNet corpus reader (adds frame parameter to fes(), lus(), exemplars() - see http://nltk.org/howto/framenet.html) + see https://www.nltk.org/howto/framenet.html) * updated FrameNet Corpus to version 1.7 * fixes to stanford_segmenter.py, SentiText, CoNLL Corpus Reader * fixes to BLEU, naivebayes, Krippendorff's alpha, Punkt @@ -509,9 +509,9 @@ Morten Neergaard, Nathan Schneider, Rico Sennrich. Version 2.0.1 2012-05-15 -* moved NLTK to GitHub: http://github.com/nltk +* moved NLTK to GitHub: https://github.com/nltk * set up integration testing: https://jenkins.shiningpanda.com/nltk/ (Morten Neergaard) -* converted documentation to Sphinx format: http://nltk.github.com/api/nltk.html +* converted documentation to Sphinx format: https://www.nltk.org/api/nltk.html * dozens of minor enhancements and bugfixes: https://github.com/nltk/nltk/commits/ * dozens of fixes for conformance with PEP-8 * dozens of fixes to ensure operation with Python 2.5 @@ -642,7 +642,7 @@ NLTK-Contrib: Book: * updates for second printing, correcting errata - http://nltk.googlecode.com/svn/trunk/nltk/doc/book/errata.txt + https://nltk.googlecode.com/svn/trunk/nltk/doc/book/errata.txt Data: * added Europarl sample, with 10 docs for each of 11 langs (Nitin Madnani) @@ -855,7 +855,7 @@ Data: * Added movie script "Monty Python and the Holy Grail" to webtext corpus * Replaced words corpus data with a much larger list of English words * New URL for list of available NLTK corpora - http://nltk.googlecode.com/svn/trunk/nltk_data/index.xml + https://nltk.googlecode.com/svn/trunk/nltk_data/index.xml Book: * complete rewrite of first three chapters to make the book accessible diff --git a/INSTALL.txt b/INSTALL.txt index c04d44bd5d..cba3d199a8 100644 --- a/INSTALL.txt +++ b/INSTALL.txt @@ -2,4 +2,4 @@ To install NLTK, run setup.py from an administrator account, e.g.: sudo python setup.py install -For full installation instructions, please see http://nltk.github.com/install.html +For full installation instructions, please see https://www.nltk.org/install.html diff --git a/Makefile b/Makefile index adc0bbe42d..7446b7f620 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT PYTHON = python diff --git a/README.md b/README.md index 914ebfdfcb..156d60eaf2 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ NLTK -- the Natural Language Toolkit -- is a suite of open source Python modules, data sets, and tutorials supporting research and development in Natural Language Processing. NLTK requires Python version 3.6, 3.7, 3.8, or 3.9. -For documentation, please visit [nltk.org](http://www.nltk.org/). +For documentation, please visit [nltk.org](https://www.nltk.org/). ## Contributing @@ -14,7 +14,7 @@ For documentation, please visit [nltk.org](http://www.nltk.org/). Do you want to contribute to NLTK development? Great! Please read [CONTRIBUTING.md](CONTRIBUTING.md) for more details. -See also [how to contribute to NLTK](http://www.nltk.org/contribute.html). +See also [how to contribute to NLTK](https://www.nltk.org/contribute.html). ## Donate diff --git a/nltk/__init__.py b/nltk/__init__.py index b06598edca..1cd72fb459 100644 --- a/nltk/__init__.py +++ b/nltk/__init__.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Authors: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -13,7 +13,7 @@ Steven Bird, Ewan Klein, and Edward Loper (2009). Natural Language Processing with Python. O'Reilly Media Inc. -http://nltk.org/book +https://www.nltk.org/book isort:skip_file """ @@ -67,7 +67,7 @@ "natural language", "text analytics", ] -__url__ = "http://nltk.org/" +__url__ = "https://www.nltk.org/" # Maintainer, contributors, etc. __maintainer__ = "NLTK Team" diff --git a/nltk/app/__init__.py b/nltk/app/__init__.py index 7e8afe0099..bc5d2e05c1 100644 --- a/nltk/app/__init__.py +++ b/nltk/app/__init__.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/app/chartparser_app.py b/nltk/app/chartparser_app.py index 8abf889248..91423cabe4 100644 --- a/nltk/app/chartparser_app.py +++ b/nltk/app/chartparser_app.py @@ -4,7 +4,7 @@ # Author: Edward Loper # Jean Mark Gawron # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/app/chunkparser_app.py b/nltk/app/chunkparser_app.py index b4694e3ecf..0caf663c82 100644 --- a/nltk/app/chunkparser_app.py +++ b/nltk/app/chunkparser_app.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/app/collocations_app.py b/nltk/app/collocations_app.py index 0d0e76504c..c64f58d382 100644 --- a/nltk/app/collocations_app.py +++ b/nltk/app/collocations_app.py @@ -2,7 +2,7 @@ # Much of the GUI code is imported from concordance.py; We intend to merge these tools together # Copyright (C) 2001-2021 NLTK Project # Author: Sumukh Ghodke -# URL: +# URL: # For license information, see LICENSE.TXT # diff --git a/nltk/app/concordance_app.py b/nltk/app/concordance_app.py index 32310ce669..df50e93245 100755 --- a/nltk/app/concordance_app.py +++ b/nltk/app/concordance_app.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Sumukh Ghodke -# URL: +# URL: # For license information, see LICENSE.TXT import queue as q diff --git a/nltk/app/nemo_app.py b/nltk/app/nemo_app.py index 160afbabbb..b830cc8faf 100755 --- a/nltk/app/nemo_app.py +++ b/nltk/app/nemo_app.py @@ -1,5 +1,5 @@ # Finding (and Replacing) Nemo, Version 1.1, Aristide Grange 2006/06/06 -# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/496783 +# https://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/496783 """ Finding (and Replacing) Nemo diff --git a/nltk/app/rdparser_app.py b/nltk/app/rdparser_app.py index c98461d1b6..f444a1a0c5 100644 --- a/nltk/app/rdparser_app.py +++ b/nltk/app/rdparser_app.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/app/srparser_app.py b/nltk/app/srparser_app.py index 8a3daa9bf0..c88f3b0221 100644 --- a/nltk/app/srparser_app.py +++ b/nltk/app/srparser_app.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/app/wordfreq_app.py b/nltk/app/wordfreq_app.py index 89325c6d45..602ca15bfb 100644 --- a/nltk/app/wordfreq_app.py +++ b/nltk/app/wordfreq_app.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Sumukh Ghodke -# URL: +# URL: # For license information, see LICENSE.TXT from matplotlib import pylab diff --git a/nltk/app/wordnet_app.py b/nltk/app/wordnet_app.py index 00e4b7eabf..a572fa6a4a 100644 --- a/nltk/app/wordnet_app.py +++ b/nltk/app/wordnet_app.py @@ -3,13 +3,13 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Jussi Salmela # Paul Bone -# URL: +# URL: # For license information, see LICENSE.TXT """ A WordNet Browser application which launches the default browser (if it is not already running) and opens a new tab with a connection -to http://localhost:port/ . It also starts an HTTP server on the +to https://localhost:port/ . It also starts an HTTP server on the specified port and begins serving browser requests. The default port is 8000. (For command-line help, run "python wordnet -h") This application requires that the user's web browser supports @@ -226,7 +226,7 @@ def wnb(port=8000, runBrowser=True, logfilename=None): logfile = None # Compute URL and start web browser - url = "http://localhost:" + str(port) + url = "https://localhost:" + str(port) server_ready = None browser_thread = None @@ -440,7 +440,7 @@ def get_relations_data(word, synset): html_header = """ +'https://www.w3.org/TR/html4/strict.dtd'> + @@ -840,8 +840,8 @@ def get_static_web_help_page():

NLTK Wordnet Browser Help

The NLTK Wordnet Browser is a tool to use in browsing the Wordnet database. It tries to behave like the Wordnet project's web browser but the difference is that the NLTK Wordnet Browser uses a local Wordnet database.

You are using the Javascript client part of the NLTK Wordnet BrowseServer. We assume your browser is in tab sheets enabled mode.

-

For background information on Wordnet, see the Wordnet project home page: http://wordnet.princeton.edu/. For more information on the NLTK project, see the project home: -http://nltk.sourceforge.net/. To get an idea of what the Wordnet version used by this browser includes choose Show Database Info from the View submenu.

+

For background information on Wordnet, see the Wordnet project home page: https://wordnet.princeton.edu/. For more information on the NLTK project, see the project home: +https://www.nltk.org/. To get an idea of what the Wordnet version used by this browser includes choose Show Database Info from the View submenu.

Word search

The word to be searched is typed into the New Word field and the search started with Enter or by clicking the Search button. There is no uppercase/lowercase distinction: the search word is transformed to lowercase before the search.

In addition, the word does not have to be in base form. The browser tries to find the possible base form(s) by making certain morphological substitutions. Typing fLIeS as an obscure example gives one this. Click the previous link to see what this kind of search looks like and then come back to this page by using the Alt+LeftArrow key combination.

@@ -895,12 +895,12 @@ def get_static_index_page(with_shutdown): Get the static index page. """ template = """ - + NLTK Wordnet Browser @@ -928,12 +928,12 @@ def get_static_upper_page(with_shutdown): to shutdown the server. """ template = """ - + diff --git a/nltk/book.py b/nltk/book.py index 815157174c..b27cdc8d95 100644 --- a/nltk/book.py +++ b/nltk/book.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.corpus import ( diff --git a/nltk/ccg/__init__.py b/nltk/ccg/__init__.py index cbedf961ba..90be9be6e7 100644 --- a/nltk/ccg/__init__.py +++ b/nltk/ccg/__init__.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Graeme Gange -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/ccg/api.py b/nltk/ccg/api.py index e54124f1c6..afc3a43768 100644 --- a/nltk/ccg/api.py +++ b/nltk/ccg/api.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Graeme Gange -# URL: +# URL: # For license information, see LICENSE.TXT from abc import ABCMeta, abstractmethod diff --git a/nltk/ccg/chart.py b/nltk/ccg/chart.py index 3f8153a2f3..0e1edfab88 100644 --- a/nltk/ccg/chart.py +++ b/nltk/ccg/chart.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Graeme Gange -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/ccg/combinator.py b/nltk/ccg/combinator.py index 6b7c9527ba..9348633068 100644 --- a/nltk/ccg/combinator.py +++ b/nltk/ccg/combinator.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Graeme Gange -# URL: +# URL: # For license information, see LICENSE.TXT """ CCG Combinators diff --git a/nltk/ccg/lexicon.py b/nltk/ccg/lexicon.py index 9adf4c7a6f..bab144e102 100644 --- a/nltk/ccg/lexicon.py +++ b/nltk/ccg/lexicon.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Graeme Gange -# URL: +# URL: # For license information, see LICENSE.TXT """ CCG Lexicons diff --git a/nltk/ccg/logic.py b/nltk/ccg/logic.py index 24d62d9b7d..1bddce4572 100644 --- a/nltk/ccg/logic.py +++ b/nltk/ccg/logic.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Tanin Na Nakorn (@tanin) -# URL: +# URL: # For license information, see LICENSE.TXT """ Helper functions for CCG semantics computation diff --git a/nltk/chat/__init__.py b/nltk/chat/__init__.py index 39b8c7d681..dc7ed16d90 100644 --- a/nltk/chat/__init__.py +++ b/nltk/chat/__init__.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Authors: Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT # Based on an Eliza implementation by Joe Strout , diff --git a/nltk/chat/eliza.py b/nltk/chat/eliza.py index 1804f7d6f5..19f03d30ef 100644 --- a/nltk/chat/eliza.py +++ b/nltk/chat/eliza.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Authors: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT # Based on an Eliza implementation by Joe Strout , diff --git a/nltk/chat/iesha.py b/nltk/chat/iesha.py index f32e5c7d44..f4df6678e4 100644 --- a/nltk/chat/iesha.py +++ b/nltk/chat/iesha.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Selina Dennis -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/chat/rude.py b/nltk/chat/rude.py index ed283fe695..c07a4ffbc8 100644 --- a/nltk/chat/rude.py +++ b/nltk/chat/rude.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Peter Spiller -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.chat.util import Chat, reflections diff --git a/nltk/chat/suntsu.py b/nltk/chat/suntsu.py index 0ffc019d72..6ddd216409 100644 --- a/nltk/chat/suntsu.py +++ b/nltk/chat/suntsu.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Sam Huston 2007 -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -11,7 +11,7 @@ Quoted from Sun Tsu's The Art of War Translated by LIONEL GILES, M.A. 1910 Hosted by the Gutenberg Project -http://www.gutenberg.org/ +https://www.gutenberg.org/ """ from nltk.chat.util import Chat, reflections diff --git a/nltk/chat/util.py b/nltk/chat/util.py index 43d26c6f25..e8017b1c28 100644 --- a/nltk/chat/util.py +++ b/nltk/chat/util.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Authors: Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT # Based on an Eliza implementation by Joe Strout , diff --git a/nltk/chat/zen.py b/nltk/chat/zen.py index f7151043e5..d31594084a 100644 --- a/nltk/chat/zen.py +++ b/nltk/chat/zen.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Amy Holland -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/chunk/__init__.py b/nltk/chunk/__init__.py index 7114de9c97..1119f6d2b6 100644 --- a/nltk/chunk/__init__.py +++ b/nltk/chunk/__init__.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT # diff --git a/nltk/chunk/api.py b/nltk/chunk/api.py index ad45c0353b..de4bb93958 100644 --- a/nltk/chunk/api.py +++ b/nltk/chunk/api.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) -# URL: +# URL: # For license information, see LICENSE.TXT ##////////////////////////////////////////////////////// diff --git a/nltk/chunk/named_entity.py b/nltk/chunk/named_entity.py index 1bd49f7c07..c698a4e7c2 100644 --- a/nltk/chunk/named_entity.py +++ b/nltk/chunk/named_entity.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/chunk/regexp.py b/nltk/chunk/regexp.py index d2749062c7..e193cc754f 100644 --- a/nltk/chunk/regexp.py +++ b/nltk/chunk/regexp.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) -# URL: +# URL: # For license information, see LICENSE.TXT import re diff --git a/nltk/chunk/util.py b/nltk/chunk/util.py index dcc2f55df8..aeb781a2e7 100644 --- a/nltk/chunk/util.py +++ b/nltk/chunk/util.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) -# URL: +# URL: # For license information, see LICENSE.TXT import re diff --git a/nltk/classify/__init__.py b/nltk/classify/__init__.py index 8dcc322d78..847001c5f0 100644 --- a/nltk/classify/__init__.py +++ b/nltk/classify/__init__.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/classify/api.py b/nltk/classify/api.py index 62287f2c82..61e3a7e023 100644 --- a/nltk/classify/api.py +++ b/nltk/classify/api.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/classify/decisiontree.py b/nltk/classify/decisiontree.py index 3a9acebf8b..e8aac35de8 100644 --- a/nltk/classify/decisiontree.py +++ b/nltk/classify/decisiontree.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/classify/maxent.py b/nltk/classify/maxent.py index 774822ce7d..700b060432 100644 --- a/nltk/classify/maxent.py +++ b/nltk/classify/maxent.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Dmitry Chichkov (TypedMaxentFeatureEncoding) -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -1459,7 +1459,7 @@ def train_maxent_classifier_with_megam( if gaussian_prior_sigma: # Lambda is just the precision of the Gaussian prior, i.e. it's the # inverse variance, so the parameter conversion is 1.0/sigma**2. - # See http://www.umiacs.umd.edu/~hal/docs/daume04cg-bfgs.pdf. + # See https://www.umiacs.umd.edu/~hal/docs/daume04cg-bfgs.pdf. inv_variance = 1.0 / gaussian_prior_sigma ** 2 else: inv_variance = 0 diff --git a/nltk/classify/megam.py b/nltk/classify/megam.py index 1ef6e1d0d6..5919752c66 100644 --- a/nltk/classify/megam.py +++ b/nltk/classify/megam.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -20,7 +20,7 @@ nltk.classify.MaxentClassifier.train(corpus, 'megam') -.. _megam: http://www.umiacs.umd.edu/~hal/megam/index.html +.. _megam: https://www.umiacs.umd.edu/~hal/megam/index.html """ import subprocess @@ -54,7 +54,7 @@ def config_megam(bin=None): bin, env_vars=["MEGAM"], binary_names=["megam.opt", "megam", "megam_686", "megam_i686.opt"], - url="http://www.umiacs.umd.edu/~hal/megam/index.html", + url="https://www.umiacs.umd.edu/~hal/megam/index.html", ) diff --git a/nltk/classify/naivebayes.py b/nltk/classify/naivebayes.py index 3d7b48d69e..5b5d33d528 100644 --- a/nltk/classify/naivebayes.py +++ b/nltk/classify/naivebayes.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/classify/positivenaivebayes.py b/nltk/classify/positivenaivebayes.py index ed2f00a3e3..cc303cc40c 100644 --- a/nltk/classify/positivenaivebayes.py +++ b/nltk/classify/positivenaivebayes.py @@ -2,7 +2,7 @@ # # Copyright (C) 2012 NLTK Project # Author: Alessandro Presta -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/classify/rte_classify.py b/nltk/classify/rte_classify.py index f5409afad5..c951112d82 100644 --- a/nltk/classify/rte_classify.py +++ b/nltk/classify/rte_classify.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/classify/scikitlearn.py b/nltk/classify/scikitlearn.py index d7e9bf0685..95afc70c1e 100644 --- a/nltk/classify/scikitlearn.py +++ b/nltk/classify/scikitlearn.py @@ -1,10 +1,10 @@ # Natural Language Toolkit: Interface to scikit-learn classifiers # # Author: Lars Buitinck -# URL: +# URL: # For license information, see LICENSE.TXT """ -scikit-learn (http://scikit-learn.org) is a machine learning library for +scikit-learn (https://scikit-learn.org) is a machine learning library for Python. It supports many classification algorithms, including SVMs, Naive Bayes, logistic regression (MaxEnt) and decision trees. diff --git a/nltk/classify/senna.py b/nltk/classify/senna.py index eee92bb350..4058a61a92 100644 --- a/nltk/classify/senna.py +++ b/nltk/classify/senna.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Rami Al-Rfou' -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -42,7 +42,7 @@ from nltk.tag.api import TaggerI -_senna_url = "http://ml.nec-labs.com/senna/" +_senna_url = "https://ml.nec-labs.com/senna/" class Senna(TaggerI): diff --git a/nltk/classify/svm.py b/nltk/classify/svm.py index c313662c89..3534b54af1 100644 --- a/nltk/classify/svm.py +++ b/nltk/classify/svm.py @@ -3,12 +3,12 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Leon Derczynski # -# URL: +# URL: # For license information, see LICENSE.TXT """ nltk.classify.svm was deprecated. For classification based on support vector machines SVMs use nltk.classify.scikitlearn -(or `scikit-learn `_ directly). +(or `scikit-learn `_ directly). """ diff --git a/nltk/classify/tadm.py b/nltk/classify/tadm.py index 9299399562..a02a80fcda 100644 --- a/nltk/classify/tadm.py +++ b/nltk/classify/tadm.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Joseph Frazee -# URL: +# URL: # For license information, see LICENSE.TXT import subprocess @@ -43,8 +43,8 @@ def write_tadm_file(train_toks, encoding, stream): """ # See the following for a file format description: # - # http://sf.net/forum/forum.php?thread_id=1391502&forum_id=473054 - # http://sf.net/forum/forum.php?thread_id=1675097&forum_id=473054 + # https://sf.net/forum/forum.php?thread_id=1391502&forum_id=473054 + # https://sf.net/forum/forum.php?thread_id=1675097&forum_id=473054 labels = encoding.labels() for featureset, label in train_toks: length_line = "%d\n" % len(labels) diff --git a/nltk/classify/textcat.py b/nltk/classify/textcat.py index 1b79d6675e..27d8422a0a 100644 --- a/nltk/classify/textcat.py +++ b/nltk/classify/textcat.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Avital Pekker # -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -21,10 +21,10 @@ those files. For details regarding the algorithm, see: -http://www.let.rug.nl/~vannoord/TextCat/textcat.pdf +https://www.let.rug.nl/~vannoord/TextCat/textcat.pdf For details about An Crubadan, see: -http://borel.slu.edu/crubadan/index.html +https://borel.slu.edu/crubadan/index.html """ from sys import maxsize diff --git a/nltk/classify/util.py b/nltk/classify/util.py index c392859cf2..7ed59ee2a3 100644 --- a/nltk/classify/util.py +++ b/nltk/classify/util.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/classify/weka.py b/nltk/classify/weka.py index ba71a645a0..6c4a419915 100644 --- a/nltk/classify/weka.py +++ b/nltk/classify/weka.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -60,7 +60,7 @@ def config_weka(classpath=None): "Unable to find weka.jar! Use config_weka() " "or set the WEKAHOME environment variable. " "For more information about Weka, please see " - "http://www.cs.waikato.ac.nz/ml/weka/" + "https://www.cs.waikato.ac.nz/ml/weka/" ) diff --git a/nltk/cli.py b/nltk/cli.py index e6284b28dc..aa01dc4985 100644 --- a/nltk/cli.py +++ b/nltk/cli.py @@ -1,7 +1,7 @@ # Natural Language Toolkit: NLTK Command-Line Interface # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT diff --git a/nltk/cluster/__init__.py b/nltk/cluster/__init__.py index adf5f518d9..6e7ccb1548 100644 --- a/nltk/cluster/__init__.py +++ b/nltk/cluster/__init__.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Trevor Cohn -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/cluster/api.py b/nltk/cluster/api.py index 5340b93fa0..8ca4822008 100644 --- a/nltk/cluster/api.py +++ b/nltk/cluster/api.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Trevor Cohn # Porting: Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT from abc import ABCMeta, abstractmethod diff --git a/nltk/cluster/em.py b/nltk/cluster/em.py index dae7d2ef06..1f8270ea61 100644 --- a/nltk/cluster/em.py +++ b/nltk/cluster/em.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Trevor Cohn -# URL: +# URL: # For license information, see LICENSE.TXT try: diff --git a/nltk/cluster/gaac.py b/nltk/cluster/gaac.py index ead48f0741..ea6a4b850a 100644 --- a/nltk/cluster/gaac.py +++ b/nltk/cluster/gaac.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Trevor Cohn -# URL: +# URL: # For license information, see LICENSE.TXT try: diff --git a/nltk/cluster/kmeans.py b/nltk/cluster/kmeans.py index e061d9b17f..c38cc44fc9 100644 --- a/nltk/cluster/kmeans.py +++ b/nltk/cluster/kmeans.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Trevor Cohn -# URL: +# URL: # For license information, see LICENSE.TXT import copy diff --git a/nltk/cluster/util.py b/nltk/cluster/util.py index 47b7e120c7..e36bd30548 100644 --- a/nltk/cluster/util.py +++ b/nltk/cluster/util.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Trevor Cohn # Contributor: J Richard Snape -# URL: +# URL: # For license information, see LICENSE.TXT import copy from abc import abstractmethod diff --git a/nltk/collections.py b/nltk/collections.py index 6c5a91d5fa..f5cd90f4a2 100644 --- a/nltk/collections.py +++ b/nltk/collections.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT import bisect diff --git a/nltk/collocations.py b/nltk/collocations.py index 523e7b54d9..49bb864580 100644 --- a/nltk/collocations.py +++ b/nltk/collocations.py @@ -2,15 +2,15 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Joel Nothman -# URL: +# URL: # For license information, see LICENSE.TXT # """ Tools to identify collocations --- words that often appear consecutively --- within corpora. They may also be used to find other associations between word occurrences. -See Manning and Schutze ch. 5 at http://nlp.stanford.edu/fsnlp/promo/colloc.pdf -and the Text::NSP Perl package at http://ngram.sourceforge.net +See Manning and Schutze ch. 5 at https://nlp.stanford.edu/fsnlp/promo/colloc.pdf +and the Text::NSP Perl package at https://ngram.sourceforge.net Finding collocations requires first calculating the frequencies of words and their appearance in the context of other words. Often the collection of words diff --git a/nltk/compat.py b/nltk/compat.py index 3cb3e65720..8a8d4f92af 100755 --- a/nltk/compat.py +++ b/nltk/compat.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # -# URL: +# URL: # For license information, see LICENSE.TXT import os diff --git a/nltk/corpus/__init__.py b/nltk/corpus/__init__.py index ff16a9ab97..e951c6028c 100644 --- a/nltk/corpus/__init__.py +++ b/nltk/corpus/__init__.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT # TODO this docstring isn't up-to-date! @@ -16,7 +16,7 @@ Available Corpora ================= -Please see http://www.nltk.org/nltk_data/ for a complete list. +Please see https://www.nltk.org/nltk_data/ for a complete list. Install corpora using nltk.download(). Corpus Reader Functions diff --git a/nltk/corpus/europarl_raw.py b/nltk/corpus/europarl_raw.py index c70a89058a..771e1d6a20 100644 --- a/nltk/corpus/europarl_raw.py +++ b/nltk/corpus/europarl_raw.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Nitin Madnani -# URL: +# URL: # For license information, see LICENSE.TXT import re diff --git a/nltk/corpus/reader/__init__.py b/nltk/corpus/reader/__init__.py index 9a4619a44c..a72db46a13 100644 --- a/nltk/corpus/reader/__init__.py +++ b/nltk/corpus/reader/__init__.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/aligned.py b/nltk/corpus/reader/aligned.py index 4f98e7b9c7..5db8bba418 100644 --- a/nltk/corpus/reader/aligned.py +++ b/nltk/corpus/reader/aligned.py @@ -1,7 +1,7 @@ # Natural Language Toolkit: Aligned Corpus Reader # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # Author: Steven Bird # For license information, see LICENSE.TXT diff --git a/nltk/corpus/reader/api.py b/nltk/corpus/reader/api.py index 4e235224d4..7c908d85e1 100644 --- a/nltk/corpus/reader/api.py +++ b/nltk/corpus/reader/api.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/bnc.py b/nltk/corpus/reader/bnc.py index 80af3eb48c..6345d34ce0 100644 --- a/nltk/corpus/reader/bnc.py +++ b/nltk/corpus/reader/bnc.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """Corpus reader for the XML version of the British National Corpus.""" @@ -19,7 +19,7 @@ class BNCCorpusReader(XMLCorpusReader): ``words()``, ``sents()``, ``tagged_words()``, and ``tagged_sents()``. You can obtain the full version of the BNC corpus at - http://www.ota.ox.ac.uk/desc/2554 + https://www.ota.ox.ac.uk/desc/2554 If you extracted the archive to a directory called `BNC`, then you can instantiate the reader as:: diff --git a/nltk/corpus/reader/bracket_parse.py b/nltk/corpus/reader/bracket_parse.py index 6fd276bca6..40a1d6e8a5 100644 --- a/nltk/corpus/reader/bracket_parse.py +++ b/nltk/corpus/reader/bracket_parse.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ Corpus reader for corpora that consist of parenthesis-delineated parse trees. diff --git a/nltk/corpus/reader/categorized_sents.py b/nltk/corpus/reader/categorized_sents.py index a20f1e4d10..94728f6319 100644 --- a/nltk/corpus/reader/categorized_sents.py +++ b/nltk/corpus/reader/categorized_sents.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Pierpaolo Pantone <24alsecondo@gmail.com> -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -13,7 +13,7 @@ - Subjectivity Dataset information - Authors: Bo Pang and Lillian Lee. -Url: http://www.cs.cornell.edu/people/pabo/movie-review-data +Url: https://www.cs.cornell.edu/people/pabo/movie-review-data Distributed with permission. @@ -26,7 +26,7 @@ - Sentence Polarity Dataset information - Authors: Bo Pang and Lillian Lee. -Url: http://www.cs.cornell.edu/people/pabo/movie-review-data +Url: https://www.cs.cornell.edu/people/pabo/movie-review-data Related papers: diff --git a/nltk/corpus/reader/chasen.py b/nltk/corpus/reader/chasen.py index d0a24deafc..13c9817356 100644 --- a/nltk/corpus/reader/chasen.py +++ b/nltk/corpus/reader/chasen.py @@ -1,11 +1,9 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Masato Hagiwara -# URL: +# URL: # For license information, see LICENSE.TXT -# For more information, see http://lilyx.net/pages/nltkjapanesecorpus.html - import sys from nltk.corpus.reader import util diff --git a/nltk/corpus/reader/childes.py b/nltk/corpus/reader/childes.py index e8a64d64f2..7b3287d88b 100644 --- a/nltk/corpus/reader/childes.py +++ b/nltk/corpus/reader/childes.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Tomonori Nagano # Alexis Dimitriadis -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -20,7 +20,7 @@ from nltk.util import LazyConcatenation, LazyMap, flatten # to resolve the namespace issue -NS = "http://www.talkbank.org/ns/talkbank" +NS = "https://www.talkbank.org/ns/talkbank" class CHILDESCorpusReader(XMLCorpusReader): diff --git a/nltk/corpus/reader/chunked.py b/nltk/corpus/reader/chunked.py index 5a9e7dd5fe..e1495d35a8 100644 --- a/nltk/corpus/reader/chunked.py +++ b/nltk/corpus/reader/chunked.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/cmudict.py b/nltk/corpus/reader/cmudict.py index 3faea5d188..50417c4ee9 100644 --- a/nltk/corpus/reader/cmudict.py +++ b/nltk/corpus/reader/cmudict.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/comparative_sents.py b/nltk/corpus/reader/comparative_sents.py index ed295e4e02..1c057d114c 100644 --- a/nltk/corpus/reader/comparative_sents.py +++ b/nltk/corpus/reader/comparative_sents.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Pierpaolo Pantone <24alsecondo@gmail.com> -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -15,7 +15,7 @@ University of Illinois at Chicago Contact: Nitin Jindal, njindal@cs.uic.edu - Bing Liu, liub@cs.uic.edu (http://www.cs.uic.edu/~liub) + Bing Liu, liub@cs.uic.edu (https://www.cs.uic.edu/~liub) Distributed with permission. diff --git a/nltk/corpus/reader/conll.py b/nltk/corpus/reader/conll.py index 66b1fe3fe7..038f5ba633 100644 --- a/nltk/corpus/reader/conll.py +++ b/nltk/corpus/reader/conll.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/crubadan.py b/nltk/corpus/reader/crubadan.py index 2946a6db4e..b59bd19938 100644 --- a/nltk/corpus/reader/crubadan.py +++ b/nltk/corpus/reader/crubadan.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Avital Pekker # -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -15,7 +15,7 @@ context of language identification. For details about An Crubadan, this data, and its potential uses, see: -http://borel.slu.edu/crubadan/index.html +https://borel.slu.edu/crubadan/index.html """ import re diff --git a/nltk/corpus/reader/dependency.py b/nltk/corpus/reader/dependency.py index a0622022ea..26ceb70671 100644 --- a/nltk/corpus/reader/dependency.py +++ b/nltk/corpus/reader/dependency.py @@ -4,7 +4,7 @@ # Author: Kepa Sarasola # Iker Manterola # -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.corpus.reader.api import * diff --git a/nltk/corpus/reader/framenet.py b/nltk/corpus/reader/framenet.py index 58fcc50599..e722588c30 100644 --- a/nltk/corpus/reader/framenet.py +++ b/nltk/corpus/reader/framenet.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Authors: Chuck Wooters , # Nathan Schneider -# URL: +# URL: # For license information, see LICENSE.TXT @@ -772,7 +772,7 @@ class AttrDict(dict): """A class that wraps a dict and allows accessing the keys of the dict as if they were attributes. Taken from here: - http://stackoverflow.com/a/14620633/8879 + https://stackoverflow.com/a/14620633/8879 >>> foo = {'a':1, 'b':2, 'c':3} >>> bar = AttrDict(foo) diff --git a/nltk/corpus/reader/ieer.py b/nltk/corpus/reader/ieer.py index 4c2d6c2c51..f916e4fb33 100644 --- a/nltk/corpus/reader/ieer.py +++ b/nltk/corpus/reader/ieer.py @@ -3,14 +3,14 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ Corpus reader for the Information Extraction and Entity Recognition Corpus. NIST 1999 Information Extraction: Entity Recognition Evaluation -http://www.itl.nist.gov/iad/894.01/tests/ie-er/er_99/er_99.htm +https://www.itl.nist.gov/iad/894.01/tests/ie-er/er_99/er_99.htm This corpus contains the NEWSWIRE development test data for the NIST 1999 IE-ER Evaluation. The files were taken from the diff --git a/nltk/corpus/reader/indian.py b/nltk/corpus/reader/indian.py index f071642e1a..38d41f1c7f 100644 --- a/nltk/corpus/reader/indian.py +++ b/nltk/corpus/reader/indian.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/ipipan.py b/nltk/corpus/reader/ipipan.py index a96e91c282..5a95291631 100644 --- a/nltk/corpus/reader/ipipan.py +++ b/nltk/corpus/reader/ipipan.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Konrad Goluchowski -# URL: +# URL: # For license information, see LICENSE.TXT import functools @@ -25,7 +25,7 @@ def decorator(self, fileids=None, **kwargs): class IPIPANCorpusReader(CorpusReader): """ Corpus reader designed to work with corpus created by IPI PAN. - See http://korpus.pl/en/ for more details about IPI PAN corpus. + See https://korpus.pl/en/ for more details about IPI PAN corpus. The corpus includes information about text domain, channel and categories. You can access possible values using ``domains()``, ``channels()`` and diff --git a/nltk/corpus/reader/knbc.py b/nltk/corpus/reader/knbc.py index 3eedf07908..6e5edde180 100644 --- a/nltk/corpus/reader/knbc.py +++ b/nltk/corpus/reader/knbc.py @@ -2,7 +2,7 @@ # KNB Corpus reader # Copyright (C) 2001-2021 NLTK Project # Author: Masato Hagiwara -# URL: +# URL: # For license information, see LICENSE.TXT # For more information, see http://lilyx.net/pages/nltkjapanesecorpus.html diff --git a/nltk/corpus/reader/lin.py b/nltk/corpus/reader/lin.py index 318fff39ad..f033ed1b15 100644 --- a/nltk/corpus/reader/lin.py +++ b/nltk/corpus/reader/lin.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Dan Blanchard -# URL: +# URL: # For license information, see LICENSE.txt import re diff --git a/nltk/corpus/reader/mte.py b/nltk/corpus/reader/mte.py index 7a81d77dec..b0ac56eda6 100644 --- a/nltk/corpus/reader/mte.py +++ b/nltk/corpus/reader/mte.py @@ -38,11 +38,11 @@ class MTEFileReader: """ ns = { - "tei": "http://www.tei-c.org/ns/1.0", - "xml": "http://www.w3.org/XML/1998/namespace", + "tei": "https://www.tei-c.org/ns/1.0", + "xml": "https://www.w3.org/XML/1998/namespace", } - tag_ns = "{http://www.tei-c.org/ns/1.0}" - xml_ns = "{http://www.w3.org/XML/1998/namespace}" + tag_ns = "{https://www.tei-c.org/ns/1.0}" + xml_ns = "{https://www.w3.org/XML/1998/namespace}" word_path = "TEI/text/body/div/div/p/s/(w|c)" sent_path = "TEI/text/body/div/div/p/s" para_path = "TEI/text/body/div/div/p" diff --git a/nltk/corpus/reader/nkjp.py b/nltk/corpus/reader/nkjp.py index a99f036345..c65a143b98 100644 --- a/nltk/corpus/reader/nkjp.py +++ b/nltk/corpus/reader/nkjp.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Gabriela Kaczka -# URL: +# URL: # For license information, see LICENSE.TXT import functools diff --git a/nltk/corpus/reader/nombank.py b/nltk/corpus/reader/nombank.py index d1cc286941..511e770bbd 100644 --- a/nltk/corpus/reader/nombank.py +++ b/nltk/corpus/reader/nombank.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Authors: Paul Bedaride # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT from functools import total_ordering diff --git a/nltk/corpus/reader/nps_chat.py b/nltk/corpus/reader/nps_chat.py index 6ef5268766..fef9c49766 100644 --- a/nltk/corpus/reader/nps_chat.py +++ b/nltk/corpus/reader/nps_chat.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT import re diff --git a/nltk/corpus/reader/opinion_lexicon.py b/nltk/corpus/reader/opinion_lexicon.py index b95cac539a..1d3075f48a 100644 --- a/nltk/corpus/reader/opinion_lexicon.py +++ b/nltk/corpus/reader/opinion_lexicon.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Pierpaolo Pantone <24alsecondo@gmail.com> -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -14,7 +14,7 @@ University of Illinois at Chicago Contact: Bing Liu, liub@cs.uic.edu - http://www.cs.uic.edu/~liub + https://www.cs.uic.edu/~liub Distributed with permission. diff --git a/nltk/corpus/reader/panlex_lite.py b/nltk/corpus/reader/panlex_lite.py index 66d1fb2ca2..fb47e6c8ce 100644 --- a/nltk/corpus/reader/panlex_lite.py +++ b/nltk/corpus/reader/panlex_lite.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: David Kamholz -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/panlex_swadesh.py b/nltk/corpus/reader/panlex_swadesh.py index f4c684de19..cf7f5f3ff7 100644 --- a/nltk/corpus/reader/panlex_swadesh.py +++ b/nltk/corpus/reader/panlex_swadesh.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT diff --git a/nltk/corpus/reader/pl196x.py b/nltk/corpus/reader/pl196x.py index d057d1b3e8..c4e2ee6a2e 100644 --- a/nltk/corpus/reader/pl196x.py +++ b/nltk/corpus/reader/pl196x.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Piotr Kasprzyk -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.corpus.reader.api import * diff --git a/nltk/corpus/reader/plaintext.py b/nltk/corpus/reader/plaintext.py index 63bdd679ae..ddf01182c1 100644 --- a/nltk/corpus/reader/plaintext.py +++ b/nltk/corpus/reader/plaintext.py @@ -4,7 +4,7 @@ # Author: Steven Bird # Edward Loper # Nitin Madnani -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/ppattach.py b/nltk/corpus/reader/ppattach.py index dcf1bcb300..5a0b08e307 100644 --- a/nltk/corpus/reader/ppattach.py +++ b/nltk/corpus/reader/ppattach.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -32,7 +32,7 @@ Ratnaparkhi, Adwait (1994). A Maximum Entropy Model for Prepositional Phrase Attachment. Proceedings of the ARPA Human Language Technology -Conference. [http://www.cis.upenn.edu/~adwait/papers/hlt94.ps] +Conference. [https://www.cis.upenn.edu/~adwait/papers/hlt94.ps] The PP Attachment Corpus is distributed with NLTK with the permission of the author. diff --git a/nltk/corpus/reader/propbank.py b/nltk/corpus/reader/propbank.py index 72cc8ff6db..57dd83de38 100644 --- a/nltk/corpus/reader/propbank.py +++ b/nltk/corpus/reader/propbank.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT import re diff --git a/nltk/corpus/reader/pros_cons.py b/nltk/corpus/reader/pros_cons.py index bd37144bc0..177f8c0b66 100644 --- a/nltk/corpus/reader/pros_cons.py +++ b/nltk/corpus/reader/pros_cons.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Pierpaolo Pantone <24alsecondo@gmail.com> -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -11,7 +11,7 @@ - Pros and Cons dataset information - Contact: Bing Liu, liub@cs.uic.edu - http://www.cs.uic.edu/~liub + https://www.cs.uic.edu/~liub Distributed with permission. diff --git a/nltk/corpus/reader/reviews.py b/nltk/corpus/reader/reviews.py index fdefbc78a0..5c0427668b 100644 --- a/nltk/corpus/reader/reviews.py +++ b/nltk/corpus/reader/reviews.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Pierpaolo Pantone <24alsecondo@gmail.com> -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -14,7 +14,7 @@ University of Illinois at Chicago Contact: Bing Liu, liub@cs.uic.edu - http://www.cs.uic.edu/~liub + https://www.cs.uic.edu/~liub Distributed with permission. diff --git a/nltk/corpus/reader/rte.py b/nltk/corpus/reader/rte.py index e57c3971ca..3b7f8c5522 100644 --- a/nltk/corpus/reader/rte.py +++ b/nltk/corpus/reader/rte.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/semcor.py b/nltk/corpus/reader/semcor.py index 58c4dd9039..d45158ae35 100644 --- a/nltk/corpus/reader/semcor.py +++ b/nltk/corpus/reader/semcor.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Nathan Schneider -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -156,7 +156,7 @@ def _word(xmlword, unit, pos_tag, sem_tag, wordnet): sense_key = lemma + "%" + lexsn wnpos = ("n", "v", "a", "r", "s")[ int(lexsn.split(":")[0]) - 1 - ] # see http://wordnet.princeton.edu/man/senseidx.5WN.html + ] # see https://wordnet.princeton.edu/man/senseidx.5WN.html else: sense_key = wnpos = None redef = xmlword.get( diff --git a/nltk/corpus/reader/senseval.py b/nltk/corpus/reader/senseval.py index e9a52b6f98..3093737bfc 100644 --- a/nltk/corpus/reader/senseval.py +++ b/nltk/corpus/reader/senseval.py @@ -3,18 +3,18 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Trevor Cohn # Steven Bird (modifications) -# URL: +# URL: # For license information, see LICENSE.TXT """ Read from the Senseval 2 Corpus. -SENSEVAL [http://www.senseval.org/] +SENSEVAL [https://www.senseval.org/] Evaluation exercises for Word Sense Disambiguation. -Organized by ACL-SIGLEX [http://www.siglex.org/] +Organized by ACL-SIGLEX [https://www.siglex.org/] Prepared by Ted Pedersen , University of Minnesota, -http://www.d.umn.edu/~tpederse/data.html +https://www.d.umn.edu/~tpederse/data.html Distributed with permission. The NLTK version of the Senseval 2 files uses well-formed XML. diff --git a/nltk/corpus/reader/sentiwordnet.py b/nltk/corpus/reader/sentiwordnet.py index 99a401d938..43c130d322 100644 --- a/nltk/corpus/reader/sentiwordnet.py +++ b/nltk/corpus/reader/sentiwordnet.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Christopher Potts -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -13,7 +13,7 @@ sentiment scores: positivity, negativity, and objectivity. For details about SentiWordNet see: -http://sentiwordnet.isti.cnr.it/ +https://sentiwordnet.isti.cnr.it/ >>> from nltk.corpus import sentiwordnet as swn >>> print(swn.senti_synset('breakdown.n.03')) diff --git a/nltk/corpus/reader/sinica_treebank.py b/nltk/corpus/reader/sinica_treebank.py index 410a9100a3..da40f4e700 100644 --- a/nltk/corpus/reader/sinica_treebank.py +++ b/nltk/corpus/reader/sinica_treebank.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -13,14 +13,14 @@ 10,000 parsed sentences, drawn from the Academia Sinica Balanced Corpus of Modern Chinese. Parse tree notation is based on Information-based Case Grammar. Tagset documentation is available -at http://www.sinica.edu.tw/SinicaCorpus/modern_e_wordtype.html +at https://www.sinica.edu.tw/SinicaCorpus/modern_e_wordtype.html Language and Knowledge Processing Group, Institute of Information Science, Academia Sinica The data is distributed with the Natural Language Toolkit under the terms of the Creative Commons Attribution-NonCommercial-ShareAlike License -[http://creativecommons.org/licenses/by-nc-sa/2.5/]. +[https://creativecommons.org/licenses/by-nc-sa/2.5/]. References: diff --git a/nltk/corpus/reader/string_category.py b/nltk/corpus/reader/string_category.py index fd4adaf352..af78a750ff 100644 --- a/nltk/corpus/reader/string_category.py +++ b/nltk/corpus/reader/string_category.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/switchboard.py b/nltk/corpus/reader/switchboard.py index a266687350..76172b1603 100644 --- a/nltk/corpus/reader/switchboard.py +++ b/nltk/corpus/reader/switchboard.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT import re diff --git a/nltk/corpus/reader/tagged.py b/nltk/corpus/reader/tagged.py index ef5d77dbbe..7524ad5be3 100644 --- a/nltk/corpus/reader/tagged.py +++ b/nltk/corpus/reader/tagged.py @@ -4,7 +4,7 @@ # Author: Edward Loper # Steven Bird # Jacob Perkins -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/timit.py b/nltk/corpus/reader/timit.py index 0dfab9f4b5..e134cef4e4 100644 --- a/nltk/corpus/reader/timit.py +++ b/nltk/corpus/reader/timit.py @@ -4,7 +4,7 @@ # Author: Haejoong Lee # Steven Bird # Jacob Perkins -# URL: +# URL: # For license information, see LICENSE.TXT # [xx] this docstring is out-of-date: diff --git a/nltk/corpus/reader/toolbox.py b/nltk/corpus/reader/toolbox.py index 0273a1f10d..b56f39d56d 100644 --- a/nltk/corpus/reader/toolbox.py +++ b/nltk/corpus/reader/toolbox.py @@ -4,7 +4,7 @@ # Author: Greg Aumann # Stuart Robinson # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/twitter.py b/nltk/corpus/reader/twitter.py index cb9e438464..9cf3043a91 100644 --- a/nltk/corpus/reader/twitter.py +++ b/nltk/corpus/reader/twitter.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/util.py b/nltk/corpus/reader/util.py index 1daa9602a4..ba14d5b3fa 100644 --- a/nltk/corpus/reader/util.py +++ b/nltk/corpus/reader/util.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT import bisect diff --git a/nltk/corpus/reader/verbnet.py b/nltk/corpus/reader/verbnet.py index a8976ab4e9..33d1839844 100644 --- a/nltk/corpus/reader/verbnet.py +++ b/nltk/corpus/reader/verbnet.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/wordlist.py b/nltk/corpus/reader/wordlist.py index 81f81b4b06..97a233e987 100644 --- a/nltk/corpus/reader/wordlist.py +++ b/nltk/corpus/reader/wordlist.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.corpus.reader.api import * from nltk.corpus.reader.util import * @@ -97,9 +97,9 @@ def words(self, lang=None, fileids=None, ignore_lines_startswith="#"): class UnicharsCorpusReader(WordListCorpusReader): """ This class is used to read lists of characters from the Perl Unicode - Properties (see http://perldoc.perl.org/perluniprops.html). + Properties (see https://perldoc.perl.org/perluniprops.html). The files in the perluniprop.zip are extracted using the Unicode::Tussle - module from http://search.cpan.org/~bdfoy/Unicode-Tussle-1.11/lib/Unicode/Tussle.pm + module from https://search.cpan.org/~bdfoy/Unicode-Tussle-1.11/lib/Unicode/Tussle.pm """ # These are categories similar to the Perl Unicode Properties @@ -146,12 +146,12 @@ class MWAPPDBCorpusReader(WordListCorpusReader): This class is used to read the list of word pairs from the subset of lexical pairs of The Paraphrase Database (PPDB) XXXL used in the Monolingual Word Alignment (MWA) algorithm described in Sultan et al. (2014a, 2014b, 2015): - - http://acl2014.org/acl2014/Q14/pdf/Q14-1017 - - http://www.aclweb.org/anthology/S14-2039 - - http://www.aclweb.org/anthology/S15-2027 + - https://acl2014.org/acl2014/Q14/pdf/Q14-1017 + - https://www.aclweb.org/anthology/S14-2039 + - https://www.aclweb.org/anthology/S15-2027 The original source of the full PPDB corpus can be found on - http://www.cis.upenn.edu/~ccb/ppdb/ + https://www.cis.upenn.edu/~ccb/ppdb/ :return: a list of tuples of similar lexical terms. """ diff --git a/nltk/corpus/reader/wordnet.py b/nltk/corpus/reader/wordnet.py index 493892f301..7c38e78937 100644 --- a/nltk/corpus/reader/wordnet.py +++ b/nltk/corpus/reader/wordnet.py @@ -11,7 +11,7 @@ # Francis Bond # Eric Kafe -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -22,7 +22,7 @@ such as hypernyms, hyponyms, synonyms, antonyms etc. For details about WordNet see: -http://wordnet.princeton.edu/ +https://wordnet.princeton.edu/ This module also allows you to find lemmas in languages other than English from the Open Multilingual Wordnet @@ -1421,7 +1421,7 @@ def synset_from_pos_and_offset(self, pos, offset): def _synset_from_pos_and_offset(self, *args, **kwargs): """ Hack to help people like the readers of - http://stackoverflow.com/a/27145655/1709587 + https://stackoverflow.com/a/27145655/1709587 who were using this function before it was officially a public method """ return self.synset_from_pos_and_offset(*args, **kwargs) @@ -2182,7 +2182,7 @@ def ic(self, icfile): # useful for verb similarity as there exist multiple verb taxonomies. # More information about the metrics is available at -# http://marimba.d.umn.edu/similarity/measures.html +# https://marimba.d.umn.edu/similarity/measures.html def path_similarity(synset1, synset2, verbose=False, simulate_root=True): diff --git a/nltk/corpus/reader/xmldocs.py b/nltk/corpus/reader/xmldocs.py index 572e24454d..55f11bf2b1 100644 --- a/nltk/corpus/reader/xmldocs.py +++ b/nltk/corpus/reader/xmldocs.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/corpus/reader/ycoe.py b/nltk/corpus/reader/ycoe.py index 9d2caf53e4..edd5f46a2a 100644 --- a/nltk/corpus/reader/ycoe.py +++ b/nltk/corpus/reader/ycoe.py @@ -2,19 +2,19 @@ # # Copyright (C) 2001-2015 NLTK Project # Author: Selina Dennis -# URL: +# URL: # For license information, see LICENSE.TXT """ Corpus reader for the York-Toronto-Helsinki Parsed Corpus of Old English Prose (YCOE), a 1.5 million word syntactically-annotated corpus of Old English prose texts. The corpus is distributed by the -Oxford Text Archive: http://www.ota.ahds.ac.uk/ It is not included +Oxford Text Archive: https://www.ota.ahds.ac.uk/ It is not included with NLTK. The YCOE corpus is divided into 100 files, each representing an Old English prose text. Tags used within each text complies -to the YCOE standard: http://www-users.york.ac.uk/~lang22/YCOE/YcoeHome.htm +to the YCOE standard: https://www-users.york.ac.uk/~lang22/YCOE/YcoeHome.htm """ import os diff --git a/nltk/corpus/util.py b/nltk/corpus/util.py index 58039b03fb..890ac8e905 100644 --- a/nltk/corpus/util.py +++ b/nltk/corpus/util.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT ###################################################################### @@ -112,7 +112,7 @@ def _unload(self): def __getattr__(self, attr): # Fix for inspect.isclass under Python 2.6 - # (see http://bugs.python.org/issue1225107). + # (see https://bugs.python.org/issue1225107). # Without this fix tests may take extra 1.5GB RAM # because all corpora gets loaded during test collection. if attr == "__bases__": diff --git a/nltk/data.py b/nltk/data.py index 6deccfbc73..81105eae08 100644 --- a/nltk/data.py +++ b/nltk/data.py @@ -2,20 +2,20 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ Functions to find and load NLTK resource files, such as corpora, grammars, and saved processing objects. Resource files are identified using URLs, such as ``nltk:corpora/abc/rural.txt`` or -``http://nltk.org/sample/toy.cfg``. The following URL protocols are +``https://www.nltk.org/sample/toy.cfg``. The following URL protocols are supported: - ``file:path``: Specifies the file whose path is *path*. Both relative and absolute paths may be used. - - ``http://host/path``: Specifies the file stored on the web + - ``https://host/path``: Specifies the file stored on the web server *host* at path *path*. - ``nltk:path``: Specifies the file stored in the NLTK data @@ -171,8 +171,8 @@ def normalize_resource_url(resource_url): 'nltk:home/nltk' >>> windows or normalize_resource_url('nltk:/home/nltk') == 'file:///home/nltk' True - >>> normalize_resource_url('http://example.com/dir/file') - 'http://example.com/dir/file' + >>> normalize_resource_url('https://example.com/dir/file') + 'https://example.com/dir/file' >>> normalize_resource_url('dir/file') 'nltk:dir/file' """ diff --git a/nltk/downloader.py b/nltk/downloader.py index 8ec57fdddc..e513435a5a 100644 --- a/nltk/downloader.py +++ b/nltk/downloader.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/draw/__init__.py b/nltk/draw/__init__.py index 87900f29f9..c8ab5a5b39 100644 --- a/nltk/draw/__init__.py +++ b/nltk/draw/__init__.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT # Import Tkinter-based modules if Tkinter is installed diff --git a/nltk/draw/cfg.py b/nltk/draw/cfg.py index e827fa874d..792a4926a5 100644 --- a/nltk/draw/cfg.py +++ b/nltk/draw/cfg.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/draw/dispersion.py b/nltk/draw/dispersion.py index 3c7dc24c39..9e7991406d 100644 --- a/nltk/draw/dispersion.py +++ b/nltk/draw/dispersion.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -27,7 +27,7 @@ def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Pl except ImportError as e: raise ValueError( "The plot function requires matplotlib to be installed." - "See http://matplotlib.org/" + "See https://matplotlib.org/" ) from e text = list(text) diff --git a/nltk/draw/table.py b/nltk/draw/table.py index fe123805f7..255c799987 100644 --- a/nltk/draw/table.py +++ b/nltk/draw/table.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -21,7 +21,7 @@ class MultiListbox(Frame): """ A multi-column listbox, where the current selection applies to an entire row. Based on the MultiListbox Tkinter widget - recipe from the Python Cookbook (http://code.activestate.com/recipes/52266/) + recipe from the Python Cookbook (https://code.activestate.com/recipes/52266/) For the most part, ``MultiListbox`` methods delegate to its contained listboxes. For any methods that do not have docstrings, diff --git a/nltk/draw/tree.py b/nltk/draw/tree.py index 26628add9a..00ca9becf5 100644 --- a/nltk/draw/tree.py +++ b/nltk/draw/tree.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/draw/util.py b/nltk/draw/util.py index c564a3a2bd..b5b99c0e33 100644 --- a/nltk/draw/util.py +++ b/nltk/draw/util.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -30,7 +30,7 @@ Acknowledgements: Many of the ideas behind the canvas widget system are derived from ``CLIG``, a Tk-based grapher for linguistic data structures. For more information, see the CLIG -homepage (http://www.ags.uni-sb.de/~konrad/clig.html). +homepage (https://www.ags.uni-sb.de/~konrad/clig.html). """ from abc import ABCMeta, abstractmethod diff --git a/nltk/featstruct.py b/nltk/featstruct.py index a7001eb9aa..633b91e558 100644 --- a/nltk/featstruct.py +++ b/nltk/featstruct.py @@ -4,7 +4,7 @@ # Author: Edward Loper , # Rob Speer, # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/grammar.py b/nltk/grammar.py index 48725a5f1c..64a2c8dbe3 100644 --- a/nltk/grammar.py +++ b/nltk/grammar.py @@ -5,7 +5,7 @@ # Edward Loper # Jason Narad # Peter Ljunglöf -# URL: +# URL: # For license information, see LICENSE.TXT # diff --git a/nltk/help.py b/nltk/help.py index 8f6a5d64fb..d6e4557abc 100644 --- a/nltk/help.py +++ b/nltk/help.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Authors: Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/inference/__init__.py b/nltk/inference/__init__.py index 5c8fc23a12..a1e309db4c 100644 --- a/nltk/inference/__init__.py +++ b/nltk/inference/__init__.py @@ -4,7 +4,7 @@ # Author: Dan Garrette # Ewan Klein # -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/inference/api.py b/nltk/inference/api.py index acb2b77134..e80d629b24 100644 --- a/nltk/inference/api.py +++ b/nltk/inference/api.py @@ -3,7 +3,7 @@ # Author: Ewan Klein # Dan Garrette # -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/inference/discourse.py b/nltk/inference/discourse.py index c3bc8d8be4..aca34c9bdc 100644 --- a/nltk/inference/discourse.py +++ b/nltk/inference/discourse.py @@ -3,7 +3,7 @@ # Author: Ewan Klein # Dan Garrette # -# URL: +# URL: # For license information, see LICENSE.TXT r""" @@ -11,7 +11,7 @@ consistency and informativeness. Many of the ideas are based on the CURT family of programs of Blackburn and Bos -(see http://homepages.inf.ed.ac.uk/jbos/comsem/book1.html). +(see https://homepages.inf.ed.ac.uk/jbos/comsem/book1.html). Consistency checking is carried out by using the ``mace`` module to call the Mace4 model builder. Informativeness checking is carried out with a call to ``Prover.prove()`` from diff --git a/nltk/inference/mace.py b/nltk/inference/mace.py index b57957c88a..ba00f78607 100644 --- a/nltk/inference/mace.py +++ b/nltk/inference/mace.py @@ -3,7 +3,7 @@ # Author: Dan Garrette # Ewan Klein -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -172,7 +172,7 @@ def _make_model_var(value): def _decorate_model(self, valuation_str, format): """ Print out a Mace4 model using any Mace4 ``interpformat`` format. - See http://www.cs.unm.edu/~mccune/mace4/manual/ for details. + See https://www.cs.unm.edu/~mccune/mace4/manual/ for details. :param valuation_str: str with the model builder's output :param format: str indicating the format for displaying diff --git a/nltk/inference/nonmonotonic.py b/nltk/inference/nonmonotonic.py index 09948395d5..eeb6ce5323 100644 --- a/nltk/inference/nonmonotonic.py +++ b/nltk/inference/nonmonotonic.py @@ -3,7 +3,7 @@ # Author: Daniel H. Garrette # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/inference/prover9.py b/nltk/inference/prover9.py index d79e67c622..8c428e5b2f 100644 --- a/nltk/inference/prover9.py +++ b/nltk/inference/prover9.py @@ -4,7 +4,7 @@ # Author: Dan Garrette # Ewan Klein # -# URL: +# URL: # For license information, see LICENSE.TXT """ A theorem prover that makes use of the external 'Prover9' package. @@ -128,7 +128,7 @@ def config_prover9(self, binary_location, verbose=False): name, path_to_bin=binary_location, env_vars=["PROVER9"], - url="http://www.cs.unm.edu/~mccune/prover9/", + url="https://www.cs.unm.edu/~mccune/prover9/", binary_names=[name, name + ".exe"], verbose=verbose, ) @@ -178,7 +178,7 @@ def _find_binary(self, name, verbose=False): name, searchpath=binary_locations, env_vars=["PROVER9"], - url="http://www.cs.unm.edu/~mccune/prover9/", + url="https://www.cs.unm.edu/~mccune/prover9/", binary_names=[name, name + ".exe"], verbose=verbose, ) diff --git a/nltk/inference/resolution.py b/nltk/inference/resolution.py index 9adadf9f3e..deb7cc7e57 100755 --- a/nltk/inference/resolution.py +++ b/nltk/inference/resolution.py @@ -3,7 +3,7 @@ # Author: Dan Garrette # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/inference/tableau.py b/nltk/inference/tableau.py index 72ffbb4ef2..ff06ab0253 100644 --- a/nltk/inference/tableau.py +++ b/nltk/inference/tableau.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Dan Garrette # -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/internals.py b/nltk/internals.py index ba92adb168..97fd029029 100644 --- a/nltk/internals.py +++ b/nltk/internals.py @@ -4,7 +4,7 @@ # Author: Steven Bird # Edward Loper # Nitin Madnani -# URL: +# URL: # For license information, see LICENSE.TXT import fnmatch diff --git a/nltk/jsontags.py b/nltk/jsontags.py index 1ad723de3d..f324db6e2c 100644 --- a/nltk/jsontags.py +++ b/nltk/jsontags.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Xu # -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/lazyimport.py b/nltk/lazyimport.py index 3999dcc2b7..b6b56acb42 100644 --- a/nltk/lazyimport.py +++ b/nltk/lazyimport.py @@ -1,6 +1,6 @@ # This module is from mx/DateTime/LazyModule.py and is # distributed under the terms of the eGenix.com Public License Agreement -# http://www.egenix.com/products/eGenix.com-Public-License-1.1.0.pdf +# https://www.egenix.com/products/eGenix.com-Public-License-1.1.0.pdf """ Helper to enable simple lazy module import. diff --git a/nltk/lm/__init__.py b/nltk/lm/__init__.py index bb0041b530..b066036a63 100644 --- a/nltk/lm/__init__.py +++ b/nltk/lm/__init__.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Authors: Ilia Kurenkov -# URL: -# URL: +# URL: # For license information, see LICENSE.TXT """Language Model Interface.""" diff --git a/nltk/lm/counter.py b/nltk/lm/counter.py index e63a04dccc..eba30fc30f 100644 --- a/nltk/lm/counter.py +++ b/nltk/lm/counter.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ilia Kurenkov -# URL: +# URL: # For license information, see LICENSE.TXT """ Language Model Counter diff --git a/nltk/lm/models.py b/nltk/lm/models.py index ebcf3a877b..f139480b65 100644 --- a/nltk/lm/models.py +++ b/nltk/lm/models.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Ilia Kurenkov # Manu Joseph -# URL: +# URL: # For license information, see LICENSE.TXT """Language Models""" diff --git a/nltk/lm/preprocessing.py b/nltk/lm/preprocessing.py index 196254ebe4..c65fb76fa9 100644 --- a/nltk/lm/preprocessing.py +++ b/nltk/lm/preprocessing.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ilia Kurenkov -# URL: +# URL: # For license information, see LICENSE.TXT from functools import partial from itertools import chain diff --git a/nltk/lm/smoothing.py b/nltk/lm/smoothing.py index b0cbc641e5..a440afb16b 100644 --- a/nltk/lm/smoothing.py +++ b/nltk/lm/smoothing.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Ilia Kurenkov # Manu Joseph -# URL: +# URL: # For license information, see LICENSE.TXT """Smoothing algorithms for language modeling. diff --git a/nltk/lm/util.py b/nltk/lm/util.py index 8d3ed17aa7..fba683d7f2 100644 --- a/nltk/lm/util.py +++ b/nltk/lm/util.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ilia Kurenkov -# URL: +# URL: # For license information, see LICENSE.TXT """Language Model Utilities""" diff --git a/nltk/lm/vocabulary.py b/nltk/lm/vocabulary.py index 90449c8010..b50c5dbcc5 100644 --- a/nltk/lm/vocabulary.py +++ b/nltk/lm/vocabulary.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ilia Kurenkov -# URL: +# URL: # For license information, see LICENSE.TXT """Language Model Vocabulary""" diff --git a/nltk/metrics/__init__.py b/nltk/metrics/__init__.py index f2ba8025a8..cea619e26a 100644 --- a/nltk/metrics/__init__.py +++ b/nltk/metrics/__init__.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT # diff --git a/nltk/metrics/agreement.py b/nltk/metrics/agreement.py index c3f7079bef..875e5b509e 100644 --- a/nltk/metrics/agreement.py +++ b/nltk/metrics/agreement.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Tom Lippincott -# URL: +# URL: # For license information, see LICENSE.TXT # diff --git a/nltk/metrics/aline.py b/nltk/metrics/aline.py index 4be2b92de4..7bd5713296 100644 --- a/nltk/metrics/aline.py +++ b/nltk/metrics/aline.py @@ -3,12 +3,12 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Greg Kondrak # Geoff Bacon (Python port) -# URL: +# URL: # For license information, see LICENSE.TXT """ ALINE -http://webdocs.cs.ualberta.ca/~kondrak/ +https://webdocs.cs.ualberta.ca/~kondrak/ Copyright 2002 by Grzegorz Kondrak. ALINE is an algorithm for aligning phonetic sequences, described in [1]. diff --git a/nltk/metrics/association.py b/nltk/metrics/association.py index fb059588ed..6ad60fdd1e 100644 --- a/nltk/metrics/association.py +++ b/nltk/metrics/association.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Joel Nothman -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/metrics/confusionmatrix.py b/nltk/metrics/confusionmatrix.py index d1cd8a2459..1dc7121082 100644 --- a/nltk/metrics/confusionmatrix.py +++ b/nltk/metrics/confusionmatrix.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.probability import FreqDist diff --git a/nltk/metrics/distance.py b/nltk/metrics/distance.py index c0da4a1753..f89b4809d3 100644 --- a/nltk/metrics/distance.py +++ b/nltk/metrics/distance.py @@ -4,7 +4,7 @@ # Author: Edward Loper # Steven Bird # Tom Lippincott -# URL: +# URL: # For license information, see LICENSE.TXT # diff --git a/nltk/metrics/paice.py b/nltk/metrics/paice.py index 4212bf6844..6068c05137 100644 --- a/nltk/metrics/paice.py +++ b/nltk/metrics/paice.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Lauri Hallila -# URL: +# URL: # For license information, see LICENSE.TXT # @@ -60,7 +60,7 @@ def _truncate(words, cutlength): return stems -# Reference: http://en.wikipedia.org/wiki/Line-line_intersection +# Reference: https://en.wikipedia.org/wiki/Line-line_intersection def _count_intersection(l1, l2): """Count intersection between two line segments defined by coordinate pairs. diff --git a/nltk/metrics/scores.py b/nltk/metrics/scores.py index 01f60d74b4..8cdff3f89a 100644 --- a/nltk/metrics/scores.py +++ b/nltk/metrics/scores.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT import operator diff --git a/nltk/metrics/segmentation.py b/nltk/metrics/segmentation.py index 97011c89bb..b537e74359 100644 --- a/nltk/metrics/segmentation.py +++ b/nltk/metrics/segmentation.py @@ -4,7 +4,7 @@ # Author: Edward Loper # Steven Bird # David Doukhan -# URL: +# URL: # For license information, see LICENSE.TXT @@ -25,7 +25,7 @@ Information Retrieval 5, 2002, pp 353-375 Baseline implementation in C++ -http://digital.cs.usu.edu/~vkulyukin/vkweb/software/ghd/ghd.html +https://digital.cs.usu.edu/~vkulyukin/vkweb/software/ghd/ghd.html Study describing benefits of Generalized Hamming Distance Versus WindowDiff for evaluating text segmentation tasks diff --git a/nltk/metrics/spearman.py b/nltk/metrics/spearman.py index afee73e57f..bbf28ef7eb 100644 --- a/nltk/metrics/spearman.py +++ b/nltk/metrics/spearman.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Joel Nothman -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/misc/__init__.py b/nltk/misc/__init__.py index 39443323b8..564dfc09c2 100644 --- a/nltk/misc/__init__.py +++ b/nltk/misc/__init__.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.misc.babelfish import babelize_shell diff --git a/nltk/misc/chomsky.py b/nltk/misc/chomsky.py index 1f5786f82b..297d20c695 100644 --- a/nltk/misc/chomsky.py +++ b/nltk/misc/chomsky.py @@ -1,5 +1,5 @@ # Chomsky random text generator, version 1.1, Raymond Hettinger, 2005/09/13 -# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/440546 +# https://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/440546 """ CHOMSKY is an aid to writing linguistic papers in the style diff --git a/nltk/misc/minimalset.py b/nltk/misc/minimalset.py index 0edae0367a..2d5c18182f 100644 --- a/nltk/misc/minimalset.py +++ b/nltk/misc/minimalset.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT from collections import defaultdict diff --git a/nltk/misc/sort.py b/nltk/misc/sort.py index c969caba99..e9a07711f6 100644 --- a/nltk/misc/sort.py +++ b/nltk/misc/sort.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/misc/wordfinder.py b/nltk/misc/wordfinder.py index 131310b43f..3ffed4d5a8 100644 --- a/nltk/misc/wordfinder.py +++ b/nltk/misc/wordfinder.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT # Simplified from PHP version by Robert Klein diff --git a/nltk/parse/__init__.py b/nltk/parse/__init__.py index f2464768e7..cc6eff5145 100644 --- a/nltk/parse/__init__.py +++ b/nltk/parse/__init__.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT # diff --git a/nltk/parse/api.py b/nltk/parse/api.py index c82ace3af7..659dc7952d 100644 --- a/nltk/parse/api.py +++ b/nltk/parse/api.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT # diff --git a/nltk/parse/bllip.py b/nltk/parse/bllip.py index 1d1c733d7c..ffc497fa32 100644 --- a/nltk/parse/bllip.py +++ b/nltk/parse/bllip.py @@ -3,7 +3,7 @@ # Author: David McClosky # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.parse.api import ParserI @@ -75,7 +75,7 @@ ``BllipParser`` objects in the same process. BLLIP Parser currently has issues with non-ASCII text and will raise an error if given any. -See http://pypi.python.org/pypi/bllipparser/ for more information +See https://pypi.python.org/pypi/bllipparser/ for more information on BLLIP Parser's Python interface. """ diff --git a/nltk/parse/chart.py b/nltk/parse/chart.py index 49e0c78686..873d74f9e7 100644 --- a/nltk/parse/chart.py +++ b/nltk/parse/chart.py @@ -5,7 +5,7 @@ # Steven Bird # Jean Mark Gawron # Peter Ljunglöf -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/parse/corenlp.py b/nltk/parse/corenlp.py index fa791398a1..953400d706 100644 --- a/nltk/parse/corenlp.py +++ b/nltk/parse/corenlp.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Dmitrijs Milajevs # -# URL: +# URL: # For license information, see LICENSE.TXT import json @@ -18,7 +18,7 @@ from nltk.tokenize.api import TokenizerI from nltk.tree import Tree -_stanford_url = "http://stanfordnlp.github.io/CoreNLP/" +_stanford_url = "https://stanfordnlp.github.io/CoreNLP/" class CoreNLPServerError(EnvironmentError): @@ -77,7 +77,7 @@ def __init__( else: try_port(port) - self.url = f"http://localhost:{port}" + self.url = f"https://localhost:{port}" model_jar = max( find_jar_iter( @@ -176,7 +176,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI): """Interface to the CoreNLP Parser.""" - def __init__(self, url="http://localhost:9000", encoding="utf8", tagtype=None): + def __init__(self, url="https://localhost:9000", encoding="utf8", tagtype=None): import requests self.url = url @@ -300,7 +300,7 @@ def parse_text(self, text, *args, **kwargs): def tokenize(self, text, properties=None): """Tokenize a string of text. - >>> parser = CoreNLPParser(url='http://localhost:9000') + >>> parser = CoreNLPParser(url='https://localhost:9000') >>> text = 'Good muffins cost $3.88\\nin New York. Please buy me\\ntwo of them.\\nThanks.' >>> list(parser.tokenize(text)) @@ -347,13 +347,13 @@ def tag(self, sentence): :rtype: list(tuple(str, str)) - >>> parser = CoreNLPParser(url='http://localhost:9000', tagtype='ner') + >>> parser = CoreNLPParser(url='https://localhost:9000', tagtype='ner') >>> tokens = 'Rami Eid is studying at Stony Brook University in NY'.split() >>> parser.tag(tokens) [('Rami', 'PERSON'), ('Eid', 'PERSON'), ('is', 'O'), ('studying', 'O'), ('at', 'O'), ('Stony', 'ORGANIZATION'), ('Brook', 'ORGANIZATION'), ('University', 'ORGANIZATION'), ('in', 'O'), ('NY', 'O')] - >>> parser = CoreNLPParser(url='http://localhost:9000', tagtype='pos') + >>> parser = CoreNLPParser(url='https://localhost:9000', tagtype='pos') >>> tokens = "What is the airspeed of an unladen swallow ?".split() >>> parser.tag(tokens) [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), @@ -393,7 +393,7 @@ def raw_tag_sents(self, sentences): class CoreNLPParser(GenericCoreNLPParser): """ - >>> parser = CoreNLPParser(url='http://localhost:9000') + >>> parser = CoreNLPParser(url='https://localhost:9000') >>> next( ... parser.raw_parse('The quick brown fox jumps over the lazy dog.') @@ -546,7 +546,7 @@ def make_tree(self, result): class CoreNLPDependencyParser(GenericCoreNLPParser): """Dependency parser. - >>> dep_parser = CoreNLPDependencyParser(url='http://localhost:9000') + >>> dep_parser = CoreNLPDependencyParser(url='https://localhost:9000') >>> parse, = dep_parser.raw_parse( ... 'The quick brown fox jumps over the lazy dog.' diff --git a/nltk/parse/dependencygraph.py b/nltk/parse/dependencygraph.py index ac3d31723c..dd61a346f7 100755 --- a/nltk/parse/dependencygraph.py +++ b/nltk/parse/dependencygraph.py @@ -4,14 +4,14 @@ # Author: Jason Narad # Steven Bird (modifications) # -# URL: +# URL: # For license information, see LICENSE.TXT # """ Tools for reading and writing dependency trees. The input is assumed to be in Malt-TAB format -(http://stp.lingfil.uu.se/~nivre/research/MaltXML.html). +(https://stp.lingfil.uu.se/~nivre/research/MaltXML.html). """ import subprocess diff --git a/nltk/parse/earleychart.py b/nltk/parse/earleychart.py index f667b70878..b35dfee44c 100644 --- a/nltk/parse/earleychart.py +++ b/nltk/parse/earleychart.py @@ -6,7 +6,7 @@ # Edward Loper # Steven Bird # Jean Mark Gawron -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/parse/evaluate.py b/nltk/parse/evaluate.py index d8d6f9b97f..e42a699831 100644 --- a/nltk/parse/evaluate.py +++ b/nltk/parse/evaluate.py @@ -3,7 +3,7 @@ # Author: Long Duong # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT import unicodedata diff --git a/nltk/parse/featurechart.py b/nltk/parse/featurechart.py index de27156156..9ef4f3558b 100644 --- a/nltk/parse/featurechart.py +++ b/nltk/parse/featurechart.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Rob Speer # Peter Ljunglöf -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/parse/generate.py b/nltk/parse/generate.py index c1da1a2aa1..951020afa9 100644 --- a/nltk/parse/generate.py +++ b/nltk/parse/generate.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Peter Ljunglöf -# URL: +# URL: # For license information, see LICENSE.TXT # diff --git a/nltk/parse/malt.py b/nltk/parse/malt.py index 6a8c97691b..0d77d8932f 100644 --- a/nltk/parse/malt.py +++ b/nltk/parse/malt.py @@ -4,7 +4,7 @@ # Contributor: Liling Tan, Mustufain, osamamukhtar11 # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT import inspect @@ -127,7 +127,7 @@ def __init__( :param model_filename: The name of the pre-trained model with .mco file extension. If provided, training will not be required. (see http://www.maltparser.org/mco/mco.html and - see http://www.patful.com/chalk/node/185) + see https://www.patful.com/chalk/node/185) :type model_filename: str :param tagger: The tagger used to POS tag the raw string before formatting to CONLL format. It should behave like `nltk.pos_tag` @@ -135,7 +135,7 @@ def __init__( :param additional_java_args: This is the additional Java arguments that one can use when calling Maltparser, usually this is the heapsize limits, e.g. `additional_java_args=['-Xmx1024m']` - (see http://goo.gl/mpDBvQ) + (see https://goo.gl/mpDBvQ) :type additional_java_args: list """ diff --git a/nltk/parse/nonprojectivedependencyparser.py b/nltk/parse/nonprojectivedependencyparser.py index 0248de565f..4a29f038a0 100644 --- a/nltk/parse/nonprojectivedependencyparser.py +++ b/nltk/parse/nonprojectivedependencyparser.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Jason Narad # -# URL: +# URL: # For license information, see LICENSE.TXT # diff --git a/nltk/parse/pchart.py b/nltk/parse/pchart.py index 4ff4eaf25e..a94ff09f77 100644 --- a/nltk/parse/pchart.py +++ b/nltk/parse/pchart.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/parse/projectivedependencyparser.py b/nltk/parse/projectivedependencyparser.py index 0026fb3225..c14ee62ff2 100644 --- a/nltk/parse/projectivedependencyparser.py +++ b/nltk/parse/projectivedependencyparser.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Jason Narad # -# URL: +# URL: # For license information, see LICENSE.TXT # diff --git a/nltk/parse/recursivedescent.py b/nltk/parse/recursivedescent.py index 8989c498e5..daa33e2436 100644 --- a/nltk/parse/recursivedescent.py +++ b/nltk/parse/recursivedescent.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.grammar import Nonterminal diff --git a/nltk/parse/shiftreduce.py b/nltk/parse/shiftreduce.py index 51d578ea7e..74016b84b4 100644 --- a/nltk/parse/shiftreduce.py +++ b/nltk/parse/shiftreduce.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.grammar import Nonterminal diff --git a/nltk/parse/stanford.py b/nltk/parse/stanford.py index 42289c2bc0..71296d4008 100644 --- a/nltk/parse/stanford.py +++ b/nltk/parse/stanford.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Xu # -# URL: +# URL: # For license information, see LICENSE.TXT import os diff --git a/nltk/parse/transitionparser.py b/nltk/parse/transitionparser.py index e6bee563e6..ea041ec755 100644 --- a/nltk/parse/transitionparser.py +++ b/nltk/parse/transitionparser.py @@ -3,7 +3,7 @@ # Author: Long Duong # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT import pickle diff --git a/nltk/parse/util.py b/nltk/parse/util.py index b730556e84..6159ad68d1 100644 --- a/nltk/parse/util.py +++ b/nltk/parse/util.py @@ -3,7 +3,7 @@ # Author: Ewan Klein # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT diff --git a/nltk/parse/viterbi.py b/nltk/parse/viterbi.py index 82b15c75df..3629292efc 100644 --- a/nltk/parse/viterbi.py +++ b/nltk/parse/viterbi.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT from functools import reduce diff --git a/nltk/probability.py b/nltk/probability.py index 6b6ddae594..cd235fdb28 100755 --- a/nltk/probability.py +++ b/nltk/probability.py @@ -9,7 +9,7 @@ # Geoffrey Sampson (additions) # Ilia Kurenkov (additions) # -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -269,7 +269,7 @@ def plot( except ImportError as e: raise ValueError( "The plot function requires matplotlib to be installed." - "See http://matplotlib.org/" + "See https://matplotlib.org/" ) from e if len(args) == 0: @@ -1367,7 +1367,7 @@ class SimpleGoodTuringProbDist(ProbDistI): Journal of Quantitative Linguistics, vol. 2 pp. 217-237. - "Speech and Language Processing (Jurafsky & Martin), 2nd Edition, Chapter 4.5 p103 (log(Nc) = a + b*log(c)) - - http://www.grsampson.net/RGoodTur.html + - https://www.grsampson.net/RGoodTur.html Given a set of pair (xi, yi), where the xi denotes the frequency and yi denotes the frequency of frequency, we want to minimize their @@ -1954,7 +1954,7 @@ def plot( except ImportError as e: raise ValueError( "The plot function requires matplotlib to be installed." - "See http://matplotlib.org/" + "See https://matplotlib.org/" ) from e if not conditions: diff --git a/nltk/sem/__init__.py b/nltk/sem/__init__.py index 8ee8bd3404..8586e7928e 100644 --- a/nltk/sem/__init__.py +++ b/nltk/sem/__init__.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/sem/boxer.py b/nltk/sem/boxer.py index 6ebc4cb12c..a3c1f44138 100644 --- a/nltk/sem/boxer.py +++ b/nltk/sem/boxer.py @@ -1,10 +1,10 @@ # Natural Language Toolkit: Interface to Boxer -# +# # # Author: Dan Garrette # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -238,7 +238,7 @@ def _find_binary(self, name, bin_dir, verbose=False): name, path_to_bin=bin_dir, env_vars=["CANDC"], - url="http://svn.ask.it.usyd.edu.au/trac/candc/", + url="https://svn.ask.it.usyd.edu.au/trac/candc/", binary_names=[name, name + ".exe"], verbose=verbose, ) diff --git a/nltk/sem/chat80.py b/nltk/sem/chat80.py index 680a0ae188..6ddbe562bb 100644 --- a/nltk/sem/chat80.py +++ b/nltk/sem/chat80.py @@ -1,9 +1,9 @@ # Natural Language Toolkit: Chat-80 KB Reader -# See http://www.w3.org/TR/swbp-skos-core-guide/ +# See https://www.w3.org/TR/swbp-skos-core-guide/ # # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein , -# URL: +# URL: # For license information, see LICENSE.TXT r""" @@ -13,8 +13,8 @@ Chat-80 was a natural language system which allowed the user to interrogate a Prolog knowledge base in the domain of world geography. It was developed in the early '80s by Warren and Pereira; see -``http://www.aclweb.org/anthology/J82-3002.pdf`` for a description and -``http://www.cis.upenn.edu/~pereira/oldies.html`` for the source +``https://www.aclweb.org/anthology/J82-3002.pdf`` for a description and +``https://www.cis.upenn.edu/~pereira/oldies.html`` for the source files. This module contains functions to extract data from the Chat-80 @@ -246,7 +246,7 @@ class Concept: """ A Concept class, loosely based on SKOS - (http://www.w3.org/TR/swbp-skos-core-guide/). + (https://www.w3.org/TR/swbp-skos-core-guide/). """ def __init__(self, prefLabel, arity, altLabels=[], closures=[], extension=set()): diff --git a/nltk/sem/cooper_storage.py b/nltk/sem/cooper_storage.py index 7a7139a6f3..6677e6ca6f 100644 --- a/nltk/sem/cooper_storage.py +++ b/nltk/sem/cooper_storage.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.parse import load_parser diff --git a/nltk/sem/drt.py b/nltk/sem/drt.py index 50ee8a4ade..fbd7e1e1f5 100644 --- a/nltk/sem/drt.py +++ b/nltk/sem/drt.py @@ -3,7 +3,7 @@ # Author: Dan Garrette # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT import operator diff --git a/nltk/sem/drt_glue_demo.py b/nltk/sem/drt_glue_demo.py index e22e07c9a3..08551641b1 100644 --- a/nltk/sem/drt_glue_demo.py +++ b/nltk/sem/drt_glue_demo.py @@ -4,7 +4,7 @@ # Author: Dan Garrette # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT try: diff --git a/nltk/sem/evaluate.py b/nltk/sem/evaluate.py index bef452c80e..cbf1237483 100644 --- a/nltk/sem/evaluate.py +++ b/nltk/sem/evaluate.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein , -# URL: +# URL: # For license information, see LICENSE.TXT # TODO: diff --git a/nltk/sem/glue.py b/nltk/sem/glue.py index b5aec715ff..fa2990ef56 100644 --- a/nltk/sem/glue.py +++ b/nltk/sem/glue.py @@ -3,7 +3,7 @@ # Author: Dan Garrette # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT import os diff --git a/nltk/sem/hole.py b/nltk/sem/hole.py index 004f63ccfc..6119b282be 100644 --- a/nltk/sem/hole.py +++ b/nltk/sem/hole.py @@ -4,7 +4,7 @@ # Updated by: Dan Garrette # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/sem/lfg.py b/nltk/sem/lfg.py index 9889026dff..d6a3ff78be 100644 --- a/nltk/sem/lfg.py +++ b/nltk/sem/lfg.py @@ -3,7 +3,7 @@ # Author: Dan Garrette # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT from itertools import chain diff --git a/nltk/sem/linearlogic.py b/nltk/sem/linearlogic.py index 4ad55346d6..17ffe5daa0 100644 --- a/nltk/sem/linearlogic.py +++ b/nltk/sem/linearlogic.py @@ -3,7 +3,7 @@ # Author: Dan Garrette # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.internals import Counter diff --git a/nltk/sem/logic.py b/nltk/sem/logic.py index 617017b0fd..b5cf21530f 100644 --- a/nltk/sem/logic.py +++ b/nltk/sem/logic.py @@ -3,7 +3,7 @@ # Author: Dan Garrette # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/sem/relextract.py b/nltk/sem/relextract.py index a3ebb6f8c6..9977749055 100644 --- a/nltk/sem/relextract.py +++ b/nltk/sem/relextract.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/sem/skolemize.py b/nltk/sem/skolemize.py index b14e5af3b2..0f07bcf58a 100644 --- a/nltk/sem/skolemize.py +++ b/nltk/sem/skolemize.py @@ -3,7 +3,7 @@ # Author: Ewan Klein # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.sem.logic import ( diff --git a/nltk/sem/util.py b/nltk/sem/util.py index 210ac8befc..1429e657d6 100644 --- a/nltk/sem/util.py +++ b/nltk/sem/util.py @@ -3,7 +3,7 @@ # Author: Ewan Klein # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/sentiment/__init__.py b/nltk/sentiment/__init__.py index de7a1ff4e0..60b5785f72 100644 --- a/nltk/sentiment/__init__.py +++ b/nltk/sentiment/__init__.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/sentiment/sentiment_analyzer.py b/nltk/sentiment/sentiment_analyzer.py index 1660e2f841..e46a2e9f40 100644 --- a/nltk/sentiment/sentiment_analyzer.py +++ b/nltk/sentiment/sentiment_analyzer.py @@ -3,7 +3,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Pierpaolo Pantone <24alsecondo@gmail.com> -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/sentiment/util.py b/nltk/sentiment/util.py index f4d0cc634b..e949ff0a6b 100644 --- a/nltk/sentiment/util.py +++ b/nltk/sentiment/util.py @@ -3,7 +3,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Pierpaolo Pantone <24alsecondo@gmail.com> -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -301,7 +301,7 @@ def _show_plot(x_values, y_values, x_labels=None, y_labels=None): except ImportError as e: raise ImportError( "The plot function requires matplotlib to be installed." - "See http://matplotlib.org/" + "See https://matplotlib.org/" ) from e plt.locator_params(axis="y", nbins=3) diff --git a/nltk/sentiment/vader.py b/nltk/sentiment/vader.py index 05bb5122a4..93ff5fdbfe 100644 --- a/nltk/sentiment/vader.py +++ b/nltk/sentiment/vader.py @@ -6,7 +6,7 @@ # Pierpaolo Pantone <24alsecondo@gmail.com> (modifications) # George Berry (modifications) # Malavika Suresh (modifications) -# URL: +# URL: # For license information, see LICENSE.TXT # # Modifications to the original VADER code have been made in order to @@ -109,7 +109,7 @@ class VaderConstants: } # booster/dampener 'intensifiers' or 'degree adverbs' - # http://en.wiktionary.org/wiki/Category:English_degree_adverbs + # https://en.wiktionary.org/wiki/Category:English_degree_adverbs BOOSTER_DICT = { "absolutely": B_INCR, diff --git a/nltk/stem/__init__.py b/nltk/stem/__init__.py index 071b64070c..54378c8f42 100644 --- a/nltk/stem/__init__.py +++ b/nltk/stem/__init__.py @@ -4,7 +4,7 @@ # Author: Trevor Cohn # Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/stem/api.py b/nltk/stem/api.py index 8ade354c3a..edcb1a831a 100644 --- a/nltk/stem/api.py +++ b/nltk/stem/api.py @@ -4,7 +4,7 @@ # Author: Trevor Cohn # Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT from abc import ABCMeta, abstractmethod diff --git a/nltk/stem/arlstem.py b/nltk/stem/arlstem.py index 058a6b3f96..d9ba1d53e2 100644 --- a/nltk/stem/arlstem.py +++ b/nltk/stem/arlstem.py @@ -7,7 +7,7 @@ # Algorithms: Kheireddine Abainia # Siham Ouamour # Halim Sayoud -# URL: +# URL: # For license information, see LICENSE.TXT diff --git a/nltk/stem/arlstem2.py b/nltk/stem/arlstem2.py index 8dd5d02ce3..b4fc8db8b7 100644 --- a/nltk/stem/arlstem2.py +++ b/nltk/stem/arlstem2.py @@ -6,7 +6,7 @@ # Author: Kheireddine Abainia (x-programer) # Algorithms: Kheireddine Abainia # Hamza Rebbani -# URL: +# URL: # For license information, see LICENSE.TXT diff --git a/nltk/stem/cistem.py b/nltk/stem/cistem.py index 2966e8e589..82c751e30b 100644 --- a/nltk/stem/cistem.py +++ b/nltk/stem/cistem.py @@ -4,7 +4,7 @@ # Tom Aarsen <> (modifications) # Algorithm: Leonie Weissweiler # Alexander Fraser -# URL: +# URL: # For license information, see LICENSE.TXT import re @@ -24,7 +24,7 @@ class Cistem(StemmerI): In Proceedings of the German Society for Computational Linguistics and Language Technology (GSCL) which can be read here: - http://www.cis.lmu.de/~weissweiler/cistem/ + https://www.cis.lmu.de/~weissweiler/cistem/ In the paper, we conducted an analysis of publicly available stemmers, developed two gold standards for German stemming and evaluated the stemmers diff --git a/nltk/stem/isri.py b/nltk/stem/isri.py index 77373aa6a5..77aa4de37a 100644 --- a/nltk/stem/isri.py +++ b/nltk/stem/isri.py @@ -4,7 +4,7 @@ # Copyright (C) 2001-2021 NLTK Project # Algorithm: Kazem Taghva, Rania Elkhoury, and Jeffrey Coombs (2005) # Author: Hosam Algasaier -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/stem/lancaster.py b/nltk/stem/lancaster.py index bc0be59d5f..9dbab5b1f7 100644 --- a/nltk/stem/lancaster.py +++ b/nltk/stem/lancaster.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Steven Tomcavage -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/stem/porter.py b/nltk/stem/porter.py index dfd957b4c1..807cdac933 100644 --- a/nltk/stem/porter.py +++ b/nltk/stem/porter.py @@ -12,7 +12,7 @@ Martin Porter, the algorithm's inventor, maintains a web page about the algorithm at - http://www.tartarus.org/~martin/PorterStemmer/ + https://www.tartarus.org/~martin/PorterStemmer/ which includes another Python implementation and other implementations in many languages. @@ -32,7 +32,7 @@ class PorterStemmer(StemmerI): Porter, M. "An algorithm for suffix stripping." Program 14.3 (1980): 130-137. - See http://www.tartarus.org/~martin/PorterStemmer/ for the homepage + See https://www.tartarus.org/~martin/PorterStemmer/ for the homepage of the algorithm. Martin Porter has endorsed several modifications to the Porter @@ -50,7 +50,7 @@ class PorterStemmer(StemmerI): algorithm. Martin distributes implementations of the Porter Stemmer in many languages, hosted at: - http://www.tartarus.org/~martin/PorterStemmer/ + https://www.tartarus.org/~martin/PorterStemmer/ and all of these implementations include his extensions. He strongly recommends against using the original, published diff --git a/nltk/stem/regexp.py b/nltk/stem/regexp.py index d97a130dc3..d3300c429d 100644 --- a/nltk/stem/regexp.py +++ b/nltk/stem/regexp.py @@ -4,7 +4,7 @@ # Author: Trevor Cohn # Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT import re diff --git a/nltk/stem/rslp.py b/nltk/stem/rslp.py index 090a044f33..83f00e9616 100644 --- a/nltk/stem/rslp.py +++ b/nltk/stem/rslp.py @@ -2,15 +2,15 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Tiago Tresoldi -# URL: +# URL: # For license information, see LICENSE.TXT # This code is based on the algorithm presented in the paper "A Stemming # Algorithm for the Portuguese Language" by Viviane Moreira Orengo and # Christian Huyck, which unfortunately I had no access to. The code is a # Python version, with some minor modifications of mine, to the description -# presented at http://www.webcitation.org/5NnvdIzOb and to the C source code -# available at http://www.inf.ufrgs.br/~arcoelho/rslp/integrando_rslp.html. +# presented at https://www.webcitation.org/5NnvdIzOb and to the C source code +# available at https://www.inf.ufrgs.br/~arcoelho/rslp/integrando_rslp.html. # Please note that this stemmer is intended for demonstration and educational # purposes only. Feel free to write me for any comments, including the # development of a different and/or better stemmer for Portuguese. I also @@ -20,9 +20,9 @@ # Algorithm for the Portuguese Language" de Viviane Moreira Orengo e # Christian Huyck, o qual infelizmente não tive a oportunidade de ler. O # código é uma conversão para Python, com algumas pequenas modificações -# minhas, daquele apresentado em http://www.webcitation.org/5NnvdIzOb e do +# minhas, daquele apresentado em https://www.webcitation.org/5NnvdIzOb e do # código para linguagem C disponível em -# http://www.inf.ufrgs.br/~arcoelho/rslp/integrando_rslp.html. Por favor, +# https://www.inf.ufrgs.br/~arcoelho/rslp/integrando_rslp.html. Por favor, # lembre-se de que este stemmer foi desenvolvido com finalidades unicamente # de demonstração e didáticas. Sinta-se livre para me escrever para qualquer # comentário, inclusive sobre o desenvolvimento de um stemmer diferente diff --git a/nltk/stem/snowball.py b/nltk/stem/snowball.py index 95946ea320..cb17df9875 100644 --- a/nltk/stem/snowball.py +++ b/nltk/stem/snowball.py @@ -10,7 +10,7 @@ # Algorithms: Dr Martin Porter # Assem Chelli arabic stemming algorithm # Benzahia Lakhdar -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/stem/util.py b/nltk/stem/util.py index 468a0607a7..250e5ff088 100644 --- a/nltk/stem/util.py +++ b/nltk/stem/util.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Helder -# URL: +# URL: # For license information, see LICENSE.TXT diff --git a/nltk/stem/wordnet.py b/nltk/stem/wordnet.py index 1ec43bcd1c..7d9e3b68b4 100644 --- a/nltk/stem/wordnet.py +++ b/nltk/stem/wordnet.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.corpus import wordnet as wn diff --git a/nltk/tag/__init__.py b/nltk/tag/__init__.py index 9b6db75e57..ce7610e171 100644 --- a/nltk/tag/__init__.py +++ b/nltk/tag/__init__.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) -# URL: +# URL: # For license information, see LICENSE.TXT """ NLTK Taggers diff --git a/nltk/tag/api.py b/nltk/tag/api.py index 57abc39b1e..9ef6513549 100644 --- a/nltk/tag/api.py +++ b/nltk/tag/api.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tag/brill.py b/nltk/tag/brill.py index df5c8c269b..b0b7607eac 100644 --- a/nltk/tag/brill.py +++ b/nltk/tag/brill.py @@ -4,7 +4,7 @@ # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT from collections import Counter, defaultdict @@ -89,7 +89,7 @@ def nltkdemo18plus(): def fntbl37(): """ Return 37 templates taken from the postagging task of the - fntbl distribution http://www.cs.jhu.edu/~rflorian/fntbl/ + fntbl distribution https://www.cs.jhu.edu/~rflorian/fntbl/ (37 is after excluding a handful which do not condition on Pos[0]; fntbl can do that but the current nltk implementation cannot.) """ diff --git a/nltk/tag/brill_trainer.py b/nltk/tag/brill_trainer.py index f9db7c2325..e82be79f93 100644 --- a/nltk/tag/brill_trainer.py +++ b/nltk/tag/brill_trainer.py @@ -4,7 +4,7 @@ # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT import bisect diff --git a/nltk/tag/crf.py b/nltk/tag/crf.py index 110562ef24..02eadb0b1d 100644 --- a/nltk/tag/crf.py +++ b/nltk/tag/crf.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Long Duong -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tag/hmm.py b/nltk/tag/hmm.py index 437d74e5c5..6be420b86a 100644 --- a/nltk/tag/hmm.py +++ b/nltk/tag/hmm.py @@ -7,7 +7,7 @@ # Steven Bird (fixes) # Joseph Frazee (fixes) # Steven Xu (fixes) -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tag/hunpos.py b/nltk/tag/hunpos.py index 16997e21bb..9e2ce29847 100644 --- a/nltk/tag/hunpos.py +++ b/nltk/tag/hunpos.py @@ -4,7 +4,7 @@ # Author: Peter Ljunglöf # Dávid Márk Nemeskey (modifications) # Attila Zséder (modifications) -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -17,7 +17,7 @@ from nltk.internals import find_binary, find_file from nltk.tag.api import TaggerI -_hunpos_url = "http://code.google.com/p/hunpos/" +_hunpos_url = "https://code.google.com/p/hunpos/" _hunpos_charset = "ISO-8859-1" """The default encoding used by hunpos: ISO-8859-1.""" diff --git a/nltk/tag/mapping.py b/nltk/tag/mapping.py index 861e0211a5..e5b973c18c 100644 --- a/nltk/tag/mapping.py +++ b/nltk/tag/mapping.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Nathan Schneider # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -25,7 +25,7 @@ X - other: foreign words, typos, abbreviations . - punctuation -@see: http://arxiv.org/abs/1104.2086 and http://code.google.com/p/universal-pos-tags/ +@see: https://arxiv.org/abs/1104.2086 and https://code.google.com/p/universal-pos-tags/ """ diff --git a/nltk/tag/perceptron.py b/nltk/tag/perceptron.py index 6091e9886a..6e12f3eb6e 100644 --- a/nltk/tag/perceptron.py +++ b/nltk/tag/perceptron.py @@ -2,7 +2,7 @@ # Author: Matthew Honnibal , # Long Duong (NLTK port) # URL: -# +# # Copyright 2013 Matthew Honnibal # NLTK modifications Copyright 2015 The NLTK Project # diff --git a/nltk/tag/senna.py b/nltk/tag/senna.py index 12c4379814..edba2c3530 100644 --- a/nltk/tag/senna.py +++ b/nltk/tag/senna.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Rami Al-Rfou' -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tag/sequential.py b/nltk/tag/sequential.py index 9a58c2f9d4..3576d2d8a9 100644 --- a/nltk/tag/sequential.py +++ b/nltk/tag/sequential.py @@ -4,7 +4,7 @@ # Author: Edward Loper # Steven Bird (minor additions) # Tiago Tresoldi (original affix tagger) -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tag/stanford.py b/nltk/tag/stanford.py index 81931b4572..dea7970e7f 100644 --- a/nltk/tag/stanford.py +++ b/nltk/tag/stanford.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Nitin Madnani # Rami Al-Rfou' -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tag/tnt.py b/nltk/tag/tnt.py index 32804c9d48..887693afd9 100755 --- a/nltk/tag/tnt.py +++ b/nltk/tag/tnt.py @@ -3,14 +3,14 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Sam Huston # -# URL: +# URL: # For license information, see LICENSE.TXT """ Implementation of 'TnT - A Statisical Part of Speech Tagger' by Thorsten Brants -http://acl.ldc.upenn.edu/A/A00/A00-1031.pdf +https://acl.ldc.upenn.edu/A/A00/A00-1031.pdf """ from math import log diff --git a/nltk/tag/util.py b/nltk/tag/util.py index df23ca8355..c60e326d14 100644 --- a/nltk/tag/util.py +++ b/nltk/tag/util.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT diff --git a/nltk/tbl/__init__.py b/nltk/tbl/__init__.py index e7cb5be338..cb1cc48c02 100644 --- a/nltk/tbl/__init__.py +++ b/nltk/tbl/__init__.py @@ -4,7 +4,7 @@ # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tbl/demo.py b/nltk/tbl/demo.py index 49d5d115ae..613351b03a 100644 --- a/nltk/tbl/demo.py +++ b/nltk/tbl/demo.py @@ -4,7 +4,7 @@ # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT import os diff --git a/nltk/tbl/erroranalysis.py b/nltk/tbl/erroranalysis.py index 603b93cbd1..0a89e69317 100644 --- a/nltk/tbl/erroranalysis.py +++ b/nltk/tbl/erroranalysis.py @@ -4,7 +4,7 @@ # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT # returns a list of errors in string format diff --git a/nltk/tbl/feature.py b/nltk/tbl/feature.py index 31a95b1d8e..1d4f619cf7 100644 --- a/nltk/tbl/feature.py +++ b/nltk/tbl/feature.py @@ -4,7 +4,7 @@ # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT from abc import ABCMeta, abstractmethod diff --git a/nltk/tbl/rule.py b/nltk/tbl/rule.py index 8614344d4a..c39df01073 100644 --- a/nltk/tbl/rule.py +++ b/nltk/tbl/rule.py @@ -4,7 +4,7 @@ # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT from abc import ABCMeta, abstractmethod diff --git a/nltk/tbl/template.py b/nltk/tbl/template.py index a25b3a0e25..cfa9d9ac94 100644 --- a/nltk/tbl/template.py +++ b/nltk/tbl/template.py @@ -4,7 +4,7 @@ # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT import itertools as it diff --git a/nltk/test/__init__.py b/nltk/test/__init__.py index 74d154d8a7..54655b6806 100644 --- a/nltk/test/__init__.py +++ b/nltk/test/__init__.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/test/chat80.doctest b/nltk/test/chat80.doctest index 46dff13235..80d9d04aff 100644 --- a/nltk/test/chat80.doctest +++ b/nltk/test/chat80.doctest @@ -8,8 +8,8 @@ Chat-80 Chat-80 was a natural language system which allowed the user to interrogate a Prolog knowledge base in the domain of world geography. It was developed in the early '80s by Warren and Pereira; see -``_ for a description and -``_ for the source +``_ for a description and +``_ for the source files. The ``chat80`` module contains functions to extract data from the Chat-80 diff --git a/nltk/test/childes.doctest b/nltk/test/childes.doctest index c0703fc25f..e99c2bb19f 100644 --- a/nltk/test/childes.doctest +++ b/nltk/test/childes.doctest @@ -43,7 +43,7 @@ Printing properties of the corpus files. Id : 01a Lang : eng Version : 2.0.1 - {http://www.w3.org/2001/XMLSchema-instance}schemaLocation : http://www.talkbank.org/ns/talkbank http://talkbank.org/software/talkbank.xsd + {https://www.w3.org/2001/XMLSchema-instance}schemaLocation : https://www.talkbank.org/ns/talkbank https://talkbank.org/software/talkbank.xsd Printing information of participants of the corpus. The most common codes for the participants are 'CHI' (target child), 'MOT' (mother), and 'INV' (investigator). diff --git a/nltk/test/corpus.doctest b/nltk/test/corpus.doctest index ef74077433..6544aa2331 100644 --- a/nltk/test/corpus.doctest +++ b/nltk/test/corpus.doctest @@ -9,7 +9,7 @@ The `nltk.corpus` package defines a collection of *corpus reader* classes, which can be used to access the contents of a diverse set of corpora. The list of available corpora is given at: -http://www.nltk.org/nltk_data/ +https://www.nltk.org/nltk_data/ Each corpus reader class is specialized to handle a specific corpus format. In addition, the `nltk.corpus` package automatically diff --git a/nltk/test/data.doctest b/nltk/test/data.doctest index ca826150e7..fef95f1773 100644 --- a/nltk/test/data.doctest +++ b/nltk/test/data.doctest @@ -34,7 +34,7 @@ The ``nltk:`` protocol is used by default if no protocol is specified: But it is also possible to load resources from ``http:``, ``ftp:``, -and ``file:`` URLs, e.g. ``cfg = nltk.data.load('http://example.com/path/to/toy.cfg')`` +and ``file:`` URLs, e.g. ``cfg = nltk.data.load('https://example.com/path/to/toy.cfg')`` >>> # Load a grammar using an absolute path. >>> url = 'file:%s' % nltk.data.find('grammars/sample_grammars/toy.cfg') diff --git a/nltk/test/featgram.doctest b/nltk/test/featgram.doctest index cedfca452f..1898c0ee88 100644 --- a/nltk/test/featgram.doctest +++ b/nltk/test/featgram.doctest @@ -40,7 +40,7 @@ tested and revised. Let's assume that we have saved feat0cfg_ as a file named ``'feat0.fcfg'`` and placed it in the NLTK ``data`` directory. We can inspect it as follows: -.. _feat0cfg: http://nltk.svn.sourceforge.net/svnroot/nltk/trunk/nltk/data/grammars/feat0.fcfg +.. _feat0cfg: https://nltk.svn.sourceforge.net/svnroot/nltk/trunk/nltk/data/grammars/feat0.fcfg >>> nltk.data.show_cfg('grammars/book_grammars/feat0.fcfg') % start S @@ -501,7 +501,7 @@ calculus. Each element has a core semantics, which is a single lambda calculus expression; and a set of binding operators, which bind variables. -.. _bindop.fcfg: http://nltk.svn.sourceforge.net/svnroot/nltk/trunk/nltk/data/grammars/bindop.fcfg +.. _bindop.fcfg: https://nltk.svn.sourceforge.net/svnroot/nltk/trunk/nltk/data/grammars/bindop.fcfg In order to make the binding operators work right, they need to instantiate their bound variable every time they are added to the diff --git a/nltk/test/gensim.doctest b/nltk/test/gensim.doctest index 774a07df98..f9215b0bb2 100644 --- a/nltk/test/gensim.doctest +++ b/nltk/test/gensim.doctest @@ -80,7 +80,7 @@ the vector 'King - Man + Woman' is close to 'Queen' and 'Germany - Berlin + Pari >>> model.most_similar(positive=['Paris','Germany'], negative=['Berlin'], topn = 1) [('France', 0.78840...)] -We can visualize the word embeddings using t-SNE (http://lvdmaaten.github.io/tsne/). For this demonstration, we visualize the first 1000 words. +We can visualize the word embeddings using t-SNE (https://lvdmaaten.github.io/tsne/). For this demonstration, we visualize the first 1000 words. | import numpy as np | labels = [] diff --git a/nltk/test/inference.doctest b/nltk/test/inference.doctest index 30d1ce1863..e89f1c2eac 100644 --- a/nltk/test/inference.doctest +++ b/nltk/test/inference.doctest @@ -147,7 +147,7 @@ Prover9 Prover9 Installation ~~~~~~~~~~~~~~~~~~~~ -You can download Prover9 from http://www.cs.unm.edu/~mccune/prover9/. +You can download Prover9 from https://www.cs.unm.edu/~mccune/prover9/. Extract the source code into a suitable directory and follow the instructions in the Prover9 ``README.make`` file to compile the executables. @@ -186,7 +186,7 @@ If the executables cannot be found, ``Prover9`` will issue a warning message: >> config_prover9('/path/to/prover9') For more information, on prover9, see: - + =========================================================================== @@ -434,7 +434,7 @@ Mace4 Installation ~~~~~~~~~~~~~~~~~~ Mace4 is packaged with Prover9, and can be downloaded from the same -source, namely http://www.cs.unm.edu/~mccune/prover9/. It is installed +source, namely https://www.cs.unm.edu/~mccune/prover9/. It is installed in the same manner as Prover9. Using Mace4 diff --git a/nltk/test/misc.doctest b/nltk/test/misc.doctest index b7f7e368d1..72b148e1e5 100644 --- a/nltk/test/misc.doctest +++ b/nltk/test/misc.doctest @@ -51,7 +51,7 @@ Unit tests for Wordfinder class >>> import random >>> # The following is not enough for reproducibility under Python 2/3 - >>> # (see http://bugs.python.org/issue9025) so this test is skipped. + >>> # (see https://bugs.python.org/issue9025) so this test is skipped. >>> random.seed(12345) >>> from nltk.misc import wordfinder diff --git a/nltk/test/portuguese.doctest_latin1 b/nltk/test/portuguese.doctest_latin1 index c99ef6bfd8..c43279c838 100644 --- a/nltk/test/portuguese.doctest_latin1 +++ b/nltk/test/portuguese.doctest_latin1 @@ -1,21 +1,21 @@ ========================================== -Examplos para o processamento do português +Examplos para o processamento do portugu�s ========================================== >>> import nltk (NB. Este material parte do pressuposto de que o leitor esteja -familiarizado com o livro do NLTK, disponível em -``http://nltk.org/index.php/Book``). +familiarizado com o livro do NLTK, dispon�vel em +``https://www.nltk.org/index.php/Book``). Utilizando o Corpus MacMorpho Tagged ------------------------------------ -O NLTK inclui o corpus de notícias para o português brasileiro com tags de partes do discurso -MAC-MORPHO, que conta com mais de um milhão de palavras de textos jornalísticos extraídos -de dez seções do jornal diário *Folha de São Paulo*, do ano de 1994. +O NLTK inclui o corpus de not�cias para o portugu�s brasileiro com tags de partes do discurso +MAC-MORPHO, que conta com mais de um milh�o de palavras de textos jornal�sticos extra�dos +de dez se��es do jornal di�rio *Folha de S�o Paulo*, do ano de 1994. -Podemos utilizar este corpus como uma seqüência de palavras ou de palavras com tags da +Podemos utilizar este corpus como uma seq��ncia de palavras ou de palavras com tags da seguinte maneira: >>> nltk.corpus.mac_morpho.words() @@ -28,7 +28,7 @@ seguinte maneira: >>> nltk.corpus.mac_morpho.tagged_words() [('Jersei', 'N'), ('atinge', 'V'), ('m\xe9dia', 'N'), ...] -Também é possível utilizá-lo em chunks de frases. +Tamb�m � poss�vel utiliz�-lo em chunks de frases. >>> nltk.corpus.mac_morpho.tagged_sents() [[('Jersei', 'N'), ('atinge', 'V'), ('m\xe9dia', 'N'), ('de', 'PREP'), @@ -47,13 +47,13 @@ exemplos abaixo para o Floresta treebank). Utilizando o Floresta Portuguese Treebank ----------------------------------------- -A distribuição de dados do NLTK inclui o -"Floresta Sinta(c)tica Corpus" na versão 7.4, disponível em -``http://www.linguateca.pt/Floresta/``. +A distribui��o de dados do NLTK inclui o +"Floresta Sinta(c)tica Corpus" na vers�o 7.4, dispon�vel em +``https://www.linguateca.pt/Floresta/``. -Como para a amostra do Penn Treebank, é possível -utilizar o conteúdo deste corpus como uma seqüência de palavras com -informações de tags, da seguinte maneira: +Como para a amostra do Penn Treebank, � poss�vel +utilizar o conte�do deste corpus como uma seq��ncia de palavras com +informa��es de tags, da seguinte maneira: >>> from nltk.corpus import floresta >>> floresta.words() @@ -61,11 +61,11 @@ informa >>> floresta.tagged_words() [('Um', '>N+art'), ('revivalismo', 'H+n'), ...] -As tags são constituídas por certas informações sintáticas, seguidas por +As tags s�o constitu�das por certas informa��es sint�ticas, seguidas por um sinal de mais, seguido por tag costumeira de parte do discurso (part-of-speech). Vamos -remover o conteúdo que antecede o sinal de mais: +remover o conte�do que antecede o sinal de mais: >>> def simplify_tag(t): ... if "+" in t: @@ -78,12 +78,12 @@ remover o conte [('um', 'art'), ('revivalismo', 'n'), ('refrescante', 'adj'), ('o', 'art'), ('7_e_meio', 'prop'), ('\xe9', 'v-fin'), ('um', 'art'), ('ex-libris', 'n'), ('de', 'prp'), ('a', 'art')] -E exibir de maneira mais apropriada as palavras com informações de tags: +E exibir de maneira mais apropriada as palavras com informa��es de tags: >>> print ' '.join(word + '/' + tag for (word, tag) in twords[:10]) um/art revivalismo/n refrescante/adj o/art 7_e_meio/prop ?/v-fin um/art ex-libris/n de/prp a/art -Em seguida, vamos contar o número de tokens de palavras e tipos, além de +Em seguida, vamos contar o n�mero de tokens de palavras e tipos, al�m de determinar qual a palavra mais comum: >>> words = floresta.words() @@ -95,8 +95,8 @@ determinar qual a palavra mais comum: >>> fd.max() 'de' -Podemos também listar as 20 tags mais freqüentes, em ordem decrescente de -freqüência: +Podemos tamb�m listar as 20 tags mais freq�entes, em ordem decrescente de +freq��ncia: >>> tags = [simplify_tag(tag) for (word,tag) in floresta.tagged_words()] >>> fd = nltk.FreqDist(tags) @@ -104,7 +104,7 @@ freq ['n', 'prp', 'art', 'v-fin', ',', 'prop', 'adj', 'adv', '.', 'conj-c', 'v-inf', 'pron-det', 'v-pcp', 'num', 'pron-indp', 'pron-pers', '\xab', '\xbb', 'conj-s', '}'] -Também podemos ler o corpus agrupado por enunciados: +Tamb�m podemos ler o corpus agrupado por enunciados: >>> floresta.sents() [['Um', 'revivalismo', 'refrescante'], ['O', '7_e_Meio', '\xe9', 'um', 'ex-libris', @@ -122,19 +122,19 @@ Tamb Tree('N<+pp', [Tree('H+prp', ['de']), Tree('P<+np', [Tree('>N+art', ['a']), Tree('H+n', ['noite']), Tree('N<+adj', ['algarvia'])])])]), Tree('.', ['.'])]), ...] -Para ver uma árvore de análise sintática, podemos utilizar o método +Para ver uma �rvore de an�lise sint�tica, podemos utilizar o m�todo ``draw()``, como no exemplo: >>> psents = floresta.parsed_sents() >>> psents[5].draw() # doctest: +SKIP -Concordância simples +Concord�ncia simples -------------------- -A seguir, apresentamos uma função que recebe uma palavra e uma +A seguir, apresentamos uma fun��o que recebe uma palavra e uma quantidade determinada -de contexto (medido em caracteres) e gera uma concordância para a mesma. +de contexto (medido em caracteres) e gera uma concord�ncia para a mesma. >>> def concordance(word, context=30): ... for sent in floresta.sents(): @@ -166,9 +166,9 @@ de contexto (medido em caracteres) e gera uma concord Tagging de partes do discurso ----------------------------- -Vamos começar obtendo os dados dos enunciados marcados com tags e +Vamos come�ar obtendo os dados dos enunciados marcados com tags e simplificando -estas últimas como descrito anteriormente. +estas �ltimas como descrito anteriormente. >>> from nltk.corpus import floresta >>> tsents = floresta.tagged_sents() @@ -176,7 +176,7 @@ estas >>> train = tsents[100:] >>> test = tsents[:100] -Já sabemos que ``n`` é a tag mais comum; desta forma, podemos criar um +J� sabemos que ``n`` � a tag mais comum; desta forma, podemos criar um tagger por default que marque toda palavra como substantivo e, em seguida, avaliar seu desempenho: @@ -185,9 +185,9 @@ desempenho: >>> nltk.tag.accuracy(tagger0, test) 0.17690941385435169 -Como pode-se deduzir facilmente, uma em cada seis palavras é um +Como pode-se deduzir facilmente, uma em cada seis palavras � um substantivo. Vamos -aperfeiçoar estes resultados treinando um tagger unigrama: +aperfei�oar estes resultados treinando um tagger unigrama: >>> tagger1 = nltk.UnigramTagger(train, backoff=tagger0) >>> nltk.tag.accuracy(tagger1, test) @@ -199,23 +199,23 @@ E, em seguida, um tagger bigrama: >>> nltk.tag.accuracy(tagger2, test) 0.86856127886323264 -Segmentação de frases +Segmenta��o de frases --------------------- -O Punkt é uma ferramenta para segmentação de frases lingüisticamente independente, o qual +O Punkt � uma ferramenta para segmenta��o de frases ling�isticamente independente, o qual requer um treinamento em texto puro. -O texto de origem (obtido do Floresta Portuguese Treebank) contém uma frase por linha. Podemos -ler o texto, dividi-lo em função de suas linhas e então agrupar estas linhas utilizando -espaços. Desta forma as informações sobre quebras de frases terão sido descartadas; podemos -então dividir este material em dados para treinamento e para verificação: +O texto de origem (obtido do Floresta Portuguese Treebank) cont�m uma frase por linha. Podemos +ler o texto, dividi-lo em fun��o de suas linhas e ent�o agrupar estas linhas utilizando +espa�os. Desta forma as informa��es sobre quebras de frases ter�o sido descartadas; podemos +ent�o dividir este material em dados para treinamento e para verifica��o: >>> text = open('floresta.txt').read() >>> lines = text.split('\n') >>> train = ' '.join(lines[10:]) >>> test = ' '.join(lines[:10]) -É agora possível treinar o segmentador de frases (ou tokenizador de frases) e utilizá-lo em -nossas frases de verificação. (Para exibir o texto em uma forma legível, pode ser necessário +� agora poss�vel treinar o segmentador de frases (ou tokenizador de frases) e utiliz�-lo em +nossas frases de verifica��o. (Para exibir o texto em uma forma leg�vel, pode ser necess�rio converter o texto para o UTF-8, utilizando ``print sent.decode('latin-1').encode('utf-8')``.) >>> stok = nltk.PunktSentenceTokenizer(train) @@ -223,17 +223,17 @@ converter o texto para o UTF-8, utilizando ``print sent.decode('latin-1').encode ... print sent -As versões do NLTK a partir da 0.9b1 incluem um modelo treinado para a segmentação de frases -em português, o qual pode ser carregado pela maneira a seguir. É mais rápido carregar um modelo -já treinado do que repetir o treinamento do mesmo. +As vers�es do NLTK a partir da 0.9b1 incluem um modelo treinado para a segmenta��o de frases +em portugu�s, o qual pode ser carregado pela maneira a seguir. � mais r�pido carregar um modelo +j� treinado do que repetir o treinamento do mesmo. >>> stok = nltk.data.load('tokenizers/punkt/portuguese.pickle') Stemming -------- -O NLTK inclui o stemmer para o português RSLP. Vamos demonstrar sua utilização para algumas -palavras em português: +O NLTK inclui o stemmer para o portugu�s RSLP. Vamos demonstrar sua utiliza��o para algumas +palavras em portugu�s: >>> stemmer = nltk.stem.RSLPStemmer() >>> stemmer.stem("copiar") @@ -244,14 +244,14 @@ palavras em portugu Stopwords --------- -O NLTK inclui stopword ("palavras limite") para o português: +O NLTK inclui stopword ("palavras limite") para o portugu�s: >>> stopwords = nltk.corpus.stopwords.words('portuguese') >>> stopwords[:10] ['a', 'ao', 'aos', 'aquela', 'aquelas', 'aquele', 'aqueles', 'aquilo', 'as', 'at\xe9'] -A esta altura, é possível utilizá-las para filtrar textos. Vamos encontrar as palavras mais -comuns (à exceção das stopwords) e listá-las em ordem decrescente de freqüência: +A esta altura, � poss�vel utiliz�-las para filtrar textos. Vamos encontrar as palavras mais +comuns (� exce��o das stopwords) e list�-las em ordem decrescente de freq��ncia: >>> fd = nltk.FreqDist(w.lower() for w in floresta.words() if w not in stopwords) >>> for word in fd.sorted()[:20]: @@ -278,10 +278,10 @@ comuns ( dois 231 -Codificações de caracteres +Codifica��es de caracteres -------------------------- -O Python é capaz de lidar com todas a codificações de caracteres mais utilizada para o português, a +O Python � capaz de lidar com todas a codifica��es de caracteres mais utilizada para o portugu�s, a ISO 8859-1 (ISO Latin 1). >>> text = open('floresta.txt').read() diff --git a/nltk/test/portuguese_en.doctest b/nltk/test/portuguese_en.doctest index 0b2f6325b2..ee1641e645 100644 --- a/nltk/test/portuguese_en.doctest +++ b/nltk/test/portuguese_en.doctest @@ -7,7 +7,7 @@ Examples for Portuguese Processing This HOWTO contains a variety of examples relating to the Portuguese language. It is intended to be read in conjunction with the NLTK book -(``http://nltk.org/book``). For instructions on running the Python +(``https://www.nltk.org/book``). For instructions on running the Python interpreter, please see the section *Getting Started with Python*, in Chapter 1. -------------------------------------------- @@ -271,7 +271,7 @@ Accessing the Floresta Portuguese Treebank The NLTK data distribution includes the "Floresta Sinta(c)tica Corpus" version 7.4, available from -``http://www.linguateca.pt/Floresta/``. +``https://www.linguateca.pt/Floresta/``. We can access this corpus as a sequence of words or tagged words as follows: diff --git a/nltk/test/translate.doctest b/nltk/test/translate.doctest index d167281c3e..2bfedc636f 100644 --- a/nltk/test/translate.doctest +++ b/nltk/test/translate.doctest @@ -165,7 +165,7 @@ Here are some examples: .. _nltk.metrics.scores.precision: - http://www.nltk.org/api/nltk.metrics.html#nltk.metrics.scores.precision + https://www.nltk.org/api/nltk.metrics.html#nltk.metrics.scores.precision Recall @@ -199,7 +199,7 @@ Here are some examples: .. _nltk.metrics.scores.recall: - http://www.nltk.org/api/nltk.metrics.html#nltk.metrics.scores.recall + https://www.nltk.org/api/nltk.metrics.html#nltk.metrics.scores.recall Alignment Error Rate (AER) diff --git a/nltk/test/twitter.ipynb b/nltk/test/twitter.ipynb index 05b7b432da..14f42f4886 100644 --- a/nltk/test/twitter.ipynb +++ b/nltk/test/twitter.ipynb @@ -150,7 +150,7 @@ "source": [ "The next example filters the live public stream by looking for specific user accounts. In this case, we 'follow' two news organisations, namely `@CNN` and `@BBCNews`. [As advised by Twitter](https://dev.twitter.com/streaming/reference/post/statuses/filter), we use *numeric userIDs* for these accounts. If you run this code yourself, you'll see that Tweets are arriving much more slowly than in the previous example. This is because even big new organisations don't publish Tweets that often.\n", "\n", - "A bit later we will show you how to use Python to convert usernames such as `@CNN` to userIDs such as `759251`, but for now you might find it simpler to use a web service like [TweeterID](http://tweeterid.com) if you want to experiment with following different accounts than the ones shown below." + "A bit later we will show you how to use Python to convert usernames such as `@CNN` to userIDs such as `759251`, but for now you might find it simpler to use a web service like [TweeterID](https://tweeterid.com) if you want to experiment with following different accounts than the ones shown below." ] }, { diff --git a/nltk/test/unit/lm/test_counter.py b/nltk/test/unit/lm/test_counter.py index 94c2f8f2f5..b585c06d59 100644 --- a/nltk/test/unit/lm/test_counter.py +++ b/nltk/test/unit/lm/test_counter.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ilia Kurenkov -# URL: +# URL: # For license information, see LICENSE.TXT import unittest diff --git a/nltk/test/unit/lm/test_models.py b/nltk/test/unit/lm/test_models.py index 97ef016439..f7df66baf2 100644 --- a/nltk/test/unit/lm/test_models.py +++ b/nltk/test/unit/lm/test_models.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ilia Kurenkov -# URL: +# URL: # For license information, see LICENSE.TXT import math from operator import itemgetter diff --git a/nltk/test/unit/lm/test_preprocessing.py b/nltk/test/unit/lm/test_preprocessing.py index 326af420a3..11d8eddffe 100644 --- a/nltk/test/unit/lm/test_preprocessing.py +++ b/nltk/test/unit/lm/test_preprocessing.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ilia Kurenkov -# URL: +# URL: # For license information, see LICENSE.TXT import unittest diff --git a/nltk/test/unit/lm/test_vocabulary.py b/nltk/test/unit/lm/test_vocabulary.py index a64cbf3152..658fc0e6f6 100644 --- a/nltk/test/unit/lm/test_vocabulary.py +++ b/nltk/test/unit/lm/test_vocabulary.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ilia Kurenkov -# URL: +# URL: # For license information, see LICENSE.TXT import unittest diff --git a/nltk/test/unit/test_chunk.py b/nltk/test/unit/test_chunk.py index 3991841355..75d5692a59 100644 --- a/nltk/test/unit/test_chunk.py +++ b/nltk/test/unit/test_chunk.py @@ -9,7 +9,7 @@ def test_tag_pattern2re_pattern_quantifier(self): Ensures that curly bracket quantifiers can be used inside a chunk rule. This type of quantifier has been used for the supplementary example - in http://www.nltk.org/book/ch07.html#exploring-text-corpora. + in https://www.nltk.org/book/ch07.html#exploring-text-corpora. """ sent = [ ("The", "AT"), diff --git a/nltk/test/unit/test_concordance.py b/nltk/test/unit/test_concordance.py index e20cd091e0..23115bfac4 100644 --- a/nltk/test/unit/test_concordance.py +++ b/nltk/test/unit/test_concordance.py @@ -17,7 +17,7 @@ def stdout_redirect(where): class TestConcordance(unittest.TestCase): - """Text constructed using: http://www.nltk.org/book/ch01.html""" + """Text constructed using: https://www.nltk.org/book/ch01.html""" @classmethod def setUpClass(cls): diff --git a/nltk/test/unit/test_disagreement.py b/nltk/test/unit/test_disagreement.py index 9b3f6d70c9..635b09c7fc 100644 --- a/nltk/test/unit/test_disagreement.py +++ b/nltk/test/unit/test_disagreement.py @@ -44,7 +44,7 @@ def test_easy2(self): def test_advanced(self): """ More advanced test, based on - http://www.agreestat.com/research_papers/onkrippendorffalpha.pdf + https://www.agreestat.com/research_papers/onkrippendorffalpha.pdf """ data = [ ("A", "1", "1"), diff --git a/nltk/test/unit/test_hmm.py b/nltk/test/unit/test_hmm.py index 173f72a95d..246078e65a 100644 --- a/nltk/test/unit/test_hmm.py +++ b/nltk/test/unit/test_hmm.py @@ -5,7 +5,7 @@ def _wikipedia_example_hmm(): # Example from wikipedia - # (http://en.wikipedia.org/wiki/Forward%E2%80%93backward_algorithm) + # (https://en.wikipedia.org/wiki/Forward%E2%80%93backward_algorithm) states = ["rain", "no rain"] symbols = ["umbrella", "no umbrella"] diff --git a/nltk/test/unit/test_json2csv_corpus.py b/nltk/test/unit/test_json2csv_corpus.py index 051190956e..8da43a5b42 100644 --- a/nltk/test/unit/test_json2csv_corpus.py +++ b/nltk/test/unit/test_json2csv_corpus.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Lorenzo Rubio -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/test/unit/test_stem.py b/nltk/test/unit/test_stem.py index a0d0bea7ba..95bed812d1 100644 --- a/nltk/test/unit/test_stem.py +++ b/nltk/test/unit/test_stem.py @@ -89,11 +89,11 @@ def test_vocabulary_martin_mode(self): """Tests all words from the test vocabulary provided by M Porter The sample vocabulary and output were sourced from: - http://tartarus.org/martin/PorterStemmer/voc.txt - http://tartarus.org/martin/PorterStemmer/output.txt + https://tartarus.org/martin/PorterStemmer/voc.txt + https://tartarus.org/martin/PorterStemmer/output.txt and are linked to from the Porter Stemmer algorithm's homepage at - http://tartarus.org/martin/PorterStemmer/ + https://tartarus.org/martin/PorterStemmer/ """ with closing( data.find("stemmers/porter_test/porter_martin_output.txt").open( @@ -117,7 +117,7 @@ def test_vocabulary_nltk_mode(self): def test_vocabulary_original_mode(self): # The list of stems for this test was generated by taking the # Martin-blessed stemmer from - # http://tartarus.org/martin/PorterStemmer/c.txt + # https://tartarus.org/martin/PorterStemmer/c.txt # and removing all the --DEPARTURE-- sections from it and # running it against Martin's test vocabulary. diff --git a/nltk/test/unit/test_tgrep.py b/nltk/test/unit/test_tgrep.py index 811d7c37aa..eaf38ddac4 100644 --- a/nltk/test/unit/test_tgrep.py +++ b/nltk/test/unit/test_tgrep.py @@ -4,7 +4,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Will Roberts -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/test/unit/translate/test_bleu.py b/nltk/test/unit/translate/test_bleu.py index 0a1811f031..fa26df5823 100644 --- a/nltk/test/unit/translate/test_bleu.py +++ b/nltk/test/unit/translate/test_bleu.py @@ -20,7 +20,7 @@ class TestBLEU(unittest.TestCase): def test_modified_precision(self): """ Examples from the original BLEU paper - http://www.aclweb.org/anthology/P02-1040.pdf + https://www.aclweb.org/anthology/P02-1040.pdf """ # Example 1: the "the*" example. # Reference sentences. diff --git a/nltk/test/unit/translate/test_stack_decoder.py b/nltk/test/unit/translate/test_stack_decoder.py index f281162bfd..06746b1e93 100644 --- a/nltk/test/unit/translate/test_stack_decoder.py +++ b/nltk/test/unit/translate/test_stack_decoder.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Tah Wei Hoon -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/text.py b/nltk/text.py index 04543c2a92..dd734aa1be 100644 --- a/nltk/text.py +++ b/nltk/text.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tgrep.py b/nltk/tgrep.py index 36475a043c..1084130aae 100644 --- a/nltk/tgrep.py +++ b/nltk/tgrep.py @@ -4,7 +4,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Will Roberts -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -18,9 +18,9 @@ External links: -- `Tgrep tutorial `_ -- `Tgrep2 manual `_ -- `Tgrep2 source `_ +- `Tgrep tutorial `_ +- `Tgrep2 manual `_ +- `Tgrep2 source `_ Usage ===== diff --git a/nltk/tokenize/__init__.py b/nltk/tokenize/__init__.py index c8dd45b319..9a82830e12 100644 --- a/nltk/tokenize/__init__.py +++ b/nltk/tokenize/__init__.py @@ -4,7 +4,7 @@ # Author: Edward Loper # Steven Bird (minor additions) # Contributors: matthewmc, clouds56 -# URL: +# URL: # For license information, see LICENSE.TXT r""" diff --git a/nltk/tokenize/api.py b/nltk/tokenize/api.py index 69be15c295..6786dad2bf 100644 --- a/nltk/tokenize/api.py +++ b/nltk/tokenize/api.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tokenize/casual.py b/nltk/tokenize/casual.py index 195d683522..89cc63a10c 100644 --- a/nltk/tokenize/casual.py +++ b/nltk/tokenize/casual.py @@ -6,7 +6,7 @@ # Ewan Klein (modifications) # Pierpaolo Pantone <> (modifications) # Tom Aarsen <> (modifications) -# URL: +# URL: # For license information, see LICENSE.TXT # @@ -60,7 +60,7 @@ # Most importantly, the final element should always be last, since it # does a last ditch whitespace-based tokenization of whatever is left. -# ToDo: Update with http://en.wikipedia.org/wiki/List_of_emoticons ? +# ToDo: Update with https://en.wikipedia.org/wiki/List_of_emoticons ? # This particular element is used in a couple ways, so we define it # with a name: diff --git a/nltk/tokenize/destructive.py b/nltk/tokenize/destructive.py index 4080c14f15..a4daf0941f 100644 --- a/nltk/tokenize/destructive.py +++ b/nltk/tokenize/destructive.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: -# URL: +# URL: # For license information, see LICENSE.TXT diff --git a/nltk/tokenize/legality_principle.py b/nltk/tokenize/legality_principle.py index 1a7d95d1c3..0f2e0704eb 100644 --- a/nltk/tokenize/legality_principle.py +++ b/nltk/tokenize/legality_principle.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Christopher Hench # Alex Estes -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tokenize/mwe.py b/nltk/tokenize/mwe.py index e6ecf01327..49bc8efbf9 100644 --- a/nltk/tokenize/mwe.py +++ b/nltk/tokenize/mwe.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Rob Malouf -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tokenize/nist.py b/nltk/tokenize/nist.py index 8d82a52b49..8f36d410e9 100644 --- a/nltk/tokenize/nist.py +++ b/nltk/tokenize/nist.py @@ -4,7 +4,7 @@ # Author: Liling Tan (ported from ftp://jaguar.ncsl.nist.gov/mt/resources/mteval-v14.pl) # Contributors: Ozan Caglayan, Wiktor Stribizew # -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tokenize/punkt.py b/nltk/tokenize/punkt.py index eb74c38898..a08ff4c903 100644 --- a/nltk/tokenize/punkt.py +++ b/nltk/tokenize/punkt.py @@ -7,7 +7,7 @@ # Edward Loper (rewrite) # Joel Nothman (almost rewrite) # Arthur Darcet (fixes) -# URL: +# URL: # For license information, see LICENSE.TXT r""" diff --git a/nltk/tokenize/regexp.py b/nltk/tokenize/regexp.py index 302d90f3e8..247b0d1e1a 100644 --- a/nltk/tokenize/regexp.py +++ b/nltk/tokenize/regexp.py @@ -4,7 +4,7 @@ # Author: Edward Loper # Steven Bird # Trevor Cohn -# URL: +# URL: # For license information, see LICENSE.TXT r""" diff --git a/nltk/tokenize/repp.py b/nltk/tokenize/repp.py index c8d3ae59f2..ac7927ddfa 100644 --- a/nltk/tokenize/repp.py +++ b/nltk/tokenize/repp.py @@ -4,7 +4,7 @@ # Authors: Rebecca Dridan and Stephan Oepen # Contributors: Liling Tan # -# URL: +# URL: # For license information, see LICENSE.TXT import os @@ -23,7 +23,7 @@ class ReppTokenizer(TokenizerI): A class for word tokenization using the REPP parser described in Rebecca Dridan and Stephan Oepen (2012) Tokenization: Returning to a Long Solved Problem - A Survey, Contrastive Experiment, Recommendations, - and Toolkit. In ACL. http://anthology.aclweb.org/P/P12/P12-2.pdf#page=406 + and Toolkit. In ACL. https://anthology.aclweb.org/P/P12/P12-2.pdf#page=406 >>> sents = ['Tokenization is widely regarded as a solved problem due to the high accuracy that rulebased tokenizers achieve.' , ... 'But rule-based tokenizers are hard to maintain and their rules language specific.' , diff --git a/nltk/tokenize/sexpr.py b/nltk/tokenize/sexpr.py index 1fa7d366d7..4a5a5c0152 100644 --- a/nltk/tokenize/sexpr.py +++ b/nltk/tokenize/sexpr.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Yoav Goldberg # Steven Bird (minor edits) -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tokenize/simple.py b/nltk/tokenize/simple.py index 614bdae1c4..91b363aa0c 100644 --- a/nltk/tokenize/simple.py +++ b/nltk/tokenize/simple.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT r""" diff --git a/nltk/tokenize/sonority_sequencing.py b/nltk/tokenize/sonority_sequencing.py index 27077ca34f..7d9925a6d2 100644 --- a/nltk/tokenize/sonority_sequencing.py +++ b/nltk/tokenize/sonority_sequencing.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Christopher Hench # Alex Estes -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tokenize/stanford.py b/nltk/tokenize/stanford.py index 2e9f072153..97bd9a9fe2 100644 --- a/nltk/tokenize/stanford.py +++ b/nltk/tokenize/stanford.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Xu # -# URL: +# URL: # For license information, see LICENSE.TXT import json diff --git a/nltk/tokenize/stanford_segmenter.py b/nltk/tokenize/stanford_segmenter.py index f489077027..213ac3c204 100644 --- a/nltk/tokenize/stanford_segmenter.py +++ b/nltk/tokenize/stanford_segmenter.py @@ -7,7 +7,7 @@ # Casper Lehmann-Strøm # Alex Constantin # -# URL: +# URL: # For license information, see LICENSE.TXT import json diff --git a/nltk/tokenize/texttiling.py b/nltk/tokenize/texttiling.py index 8917ee2b63..38720144de 100644 --- a/nltk/tokenize/texttiling.py +++ b/nltk/tokenize/texttiling.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: George Boutsioukis # -# URL: +# URL: # For license information, see LICENSE.TXT import math @@ -402,7 +402,7 @@ def __init__(self, index, wrdindex_list, original_length=None): del self.__dict__["self"] -# Pasted from the SciPy cookbook: http://www.scipy.org/Cookbook/SignalSmooth +# Pasted from the SciPy cookbook: https://www.scipy.org/Cookbook/SignalSmooth def smooth(x, window_len=11, window="flat"): """smooth the data using a window with requested size. diff --git a/nltk/tokenize/toktok.py b/nltk/tokenize/toktok.py index bf348a58f8..9233ac51e2 100644 --- a/nltk/tokenize/toktok.py +++ b/nltk/tokenize/toktok.py @@ -4,7 +4,7 @@ # Author: Jon Dehdari # Contributors: Liling Tan, Selcuk Ayguney, ikegami, Martijn Pieters # -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -85,7 +85,7 @@ class ToktokTokenizer(TokenizerI): MULTI_DOTS = re.compile(r"(\.{2,})"), r" \1 " # This is the \p{Open_Punctuation} from Perl's perluniprops - # see http://perldoc.perl.org/perluniprops.html + # see https://perldoc.perl.org/perluniprops.html OPEN_PUNCT = str( "([{\u0f3a\u0f3c\u169b\u201a\u201e\u2045\u207d" "\u208d\u2329\u2768\u276a\u276c\u276e\u2770\u2772" diff --git a/nltk/tokenize/treebank.py b/nltk/tokenize/treebank.py index 78e0522387..308987d9d9 100644 --- a/nltk/tokenize/treebank.py +++ b/nltk/tokenize/treebank.py @@ -2,9 +2,9 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# Michael Heilman (re-port from http://www.cis.upenn.edu/~treebank/tokenizer.sed) +# Michael Heilman (re-port from https://www.cis.upenn.edu/~treebank/tokenizer.sed) # -# URL: +# URL: # For license information, see LICENSE.TXT r""" @@ -13,7 +13,7 @@ The Treebank tokenizer uses regular expressions to tokenize text as in Penn Treebank. This implementation is a port of the tokenizer sed script written by Robert McIntyre -and available at http://www.cis.upenn.edu/~treebank/tokenizer.sed. +and available at https://www.cis.upenn.edu/~treebank/tokenizer.sed. """ import re diff --git a/nltk/tokenize/util.py b/nltk/tokenize/util.py index 399ead70d9..e6194195ba 100644 --- a/nltk/tokenize/util.py +++ b/nltk/tokenize/util.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT from re import finditer @@ -95,7 +95,7 @@ def spans_to_relative(spans): class CJKChars: """ An object that enumerates the code points of the CJK characters as listed on - http://en.wikipedia.org/wiki/Basic_Multilingual_Plane#Basic_Multilingual_Plane + https://en.wikipedia.org/wiki/Basic_Multilingual_Plane#Basic_Multilingual_Plane This is a Python port of the CJK code point enumerations of Moses tokenizer: https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/detokenizer.perl#L309 diff --git a/nltk/toolbox.py b/nltk/toolbox.py index a605dd8187..2d460a070c 100644 --- a/nltk/toolbox.py +++ b/nltk/toolbox.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Greg Aumann -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/translate/__init__.py b/nltk/translate/__init__.py index ed33e20056..dd1899d5b8 100644 --- a/nltk/translate/__init__.py +++ b/nltk/translate/__init__.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird , Tah Wei Hoon -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/translate/api.py b/nltk/translate/api.py index 74141321ec..967f28c521 100644 --- a/nltk/translate/api.py +++ b/nltk/translate/api.py @@ -5,7 +5,7 @@ # Guan Gui # Steven Bird # Tah Wei Hoon -# URL: +# URL: # For license information, see LICENSE.TXT import subprocess diff --git a/nltk/translate/bleu_score.py b/nltk/translate/bleu_score.py index 654609847d..29e93a731e 100644 --- a/nltk/translate/bleu_score.py +++ b/nltk/translate/bleu_score.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim # Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan -# URL: +# URL: # For license information, see LICENSE.TXT """BLEU score implementation.""" @@ -28,7 +28,7 @@ def sentence_bleu( Calculate BLEU score (Bilingual Evaluation Understudy) from Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. "BLEU: a method for automatic evaluation of machine translation." - In Proceedings of ACL. http://www.aclweb.org/anthology/P02-1040.pdf + In Proceedings of ACL. https://www.aclweb.org/anthology/P02-1040.pdf >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', ... 'ensures', 'that', 'the', 'military', 'always', diff --git a/nltk/translate/chrf_score.py b/nltk/translate/chrf_score.py index 6f8d2b4fad..928a9b2a07 100644 --- a/nltk/translate/chrf_score.py +++ b/nltk/translate/chrf_score.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Authors: Maja Popovic # Contributors: Liling Tan, AleÅ¡ Tamchyna (Memsource) -# URL: +# URL: # For license information, see LICENSE.TXT """ ChrF score implementation """ @@ -20,10 +20,10 @@ def sentence_chrf( Calculates the sentence level CHRF (Character n-gram F-score) described in - Maja Popovic. 2015. CHRF: Character n-gram F-score for Automatic MT Evaluation. In Proceedings of the 10th Workshop on Machine Translation. - http://www.statmt.org/wmt15/pdf/WMT49.pdf + https://www.statmt.org/wmt15/pdf/WMT49.pdf - Maja Popovic. 2016. CHRF Deconstructed: β Parameters and n-gram Weights. In Proceedings of the 1st Conference on Machine Translation. - http://www.statmt.org/wmt16/pdf/W16-2341.pdf + https://www.statmt.org/wmt16/pdf/W16-2341.pdf This implementation of CHRF only supports a single reference at the moment. @@ -34,7 +34,7 @@ def sentence_chrf( following options: -nw 0 -b 3 An example from the original BLEU paper - http://www.aclweb.org/anthology/P02-1040.pdf + https://www.aclweb.org/anthology/P02-1040.pdf >>> ref1 = str('It is a guide to action that ensures that the military ' ... 'will forever heed Party commands').split() diff --git a/nltk/translate/gale_church.py b/nltk/translate/gale_church.py index 81609d6b92..80298c2eb0 100644 --- a/nltk/translate/gale_church.py +++ b/nltk/translate/gale_church.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Torsten Marek # Contributor: Cassidy Laidlaw, Liling Tan -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -11,7 +11,7 @@ A port of the Gale-Church Aligner. Gale & Church (1993), A Program for Aligning Sentences in Bilingual Corpora. -http://aclweb.org/anthology/J93-1004.pdf +https://aclweb.org/anthology/J93-1004.pdf """ diff --git a/nltk/translate/gdfa.py b/nltk/translate/gdfa.py index 5cfa4aa621..2e13a7daf6 100644 --- a/nltk/translate/gdfa.py +++ b/nltk/translate/gdfa.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Authors: Liling Tan -# URL: +# URL: # For license information, see LICENSE.TXT from collections import defaultdict diff --git a/nltk/translate/gleu_score.py b/nltk/translate/gleu_score.py index 4e1b775308..34012ee244 100644 --- a/nltk/translate/gleu_score.py +++ b/nltk/translate/gleu_score.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Authors: # Contributors: Mike Schuster, Michael Wayne Goodman, Liling Tan -# URL: +# URL: # For license information, see LICENSE.TXT """ GLEU score implementation. """ diff --git a/nltk/translate/ibm1.py b/nltk/translate/ibm1.py index 5e3e316885..c11a5ae6cc 100644 --- a/nltk/translate/ibm1.py +++ b/nltk/translate/ibm1.py @@ -8,7 +8,7 @@ # Based on earlier version by: # Will Zhang # Guan Gui -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/translate/ibm2.py b/nltk/translate/ibm2.py index e8ffdaf2ed..ad54c41ace 100644 --- a/nltk/translate/ibm2.py +++ b/nltk/translate/ibm2.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2013 NLTK Project # Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/translate/ibm3.py b/nltk/translate/ibm3.py index f39cac42cf..2b04597818 100644 --- a/nltk/translate/ibm3.py +++ b/nltk/translate/ibm3.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2013 NLTK Project # Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/translate/ibm4.py b/nltk/translate/ibm4.py index a2b335dbc1..f65aa27313 100644 --- a/nltk/translate/ibm4.py +++ b/nltk/translate/ibm4.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Tah Wei Hoon -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/translate/ibm5.py b/nltk/translate/ibm5.py index 78c8338890..bdf80360b2 100644 --- a/nltk/translate/ibm5.py +++ b/nltk/translate/ibm5.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Tah Wei Hoon -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/translate/ibm_model.py b/nltk/translate/ibm_model.py index 48162a5b78..94432cb70c 100644 --- a/nltk/translate/ibm_model.py +++ b/nltk/translate/ibm_model.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Tah Wei Hoon -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/translate/meteor_score.py b/nltk/translate/meteor_score.py index 5c2c580203..9eb77c2daf 100644 --- a/nltk/translate/meteor_score.py +++ b/nltk/translate/meteor_score.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Uday Krishna # Contributor: Tom Aarsen -# URL: +# URL: # For license information, see LICENSE.TXT @@ -294,7 +294,7 @@ def single_meteor_score( "Meteor: An Automatic Metric for MT Evaluation with HighLevels of Correlation with Human Judgments" by Alon Lavie and Abhaya Agarwal, in Proceedings of ACL. - http://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf + https://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures', 'that', 'the', 'military', 'always', 'obeys', 'the', 'commands', 'of', 'the', 'party'] @@ -359,7 +359,7 @@ def meteor_score( described in "Meteor: An Automatic Metric for MT Evaluation with HighLevels of Correlation with Human Judgments" by Alon Lavie and Abhaya Agarwal, in Proceedings of ACL. - http://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf + https://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf In case of multiple references the best score is chosen. This method diff --git a/nltk/translate/metrics.py b/nltk/translate/metrics.py index 3235565ae8..3e2f5a1ea7 100644 --- a/nltk/translate/metrics.py +++ b/nltk/translate/metrics.py @@ -4,7 +4,7 @@ # Author: Will Zhang # Guan Gui # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT diff --git a/nltk/translate/nist_score.py b/nltk/translate/nist_score.py index 40bf579667..731f6f0ada 100644 --- a/nltk/translate/nist_score.py +++ b/nltk/translate/nist_score.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Authors: # Contributors: -# URL: +# URL: # For license information, see LICENSE.TXT """NIST score implementation.""" @@ -20,7 +20,7 @@ def sentence_nist(references, hypothesis, n=5): Calculate NIST score from George Doddington. 2002. "Automatic evaluation of machine translation quality using n-gram co-occurrence statistics." Proceedings of HLT. - Morgan Kaufmann Publishers Inc. http://dl.acm.org/citation.cfm?id=1289189.1289273 + Morgan Kaufmann Publishers Inc. https://dl.acm.org/citation.cfm?id=1289189.1289273 DARPA commissioned NIST to develop an MT evaluation facility based on the BLEU score. The official script used by NIST to compute BLEU and NIST score is diff --git a/nltk/translate/phrase_based.py b/nltk/translate/phrase_based.py index 60442c0e4f..2bf417e70b 100644 --- a/nltk/translate/phrase_based.py +++ b/nltk/translate/phrase_based.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Authors: Liling Tan, Fredrik Hedman, Petra Barancikova -# URL: +# URL: # For license information, see LICENSE.TXT diff --git a/nltk/translate/ribes_score.py b/nltk/translate/ribes_score.py index 63d87ac8e7..c4538d7bf0 100644 --- a/nltk/translate/ribes_score.py +++ b/nltk/translate/ribes_score.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Contributors: Katsuhito Sudoh, Liling Tan, Kasramvd, J.F.Sebastian # Mark Byers, ekhumoro, P. Ortiz -# URL: +# URL: # For license information, see LICENSE.TXT """ RIBES score implementation """ @@ -19,7 +19,7 @@ def sentence_ribes(references, hypothesis, alpha=0.25, beta=0.10): Hideki Isozaki, Tsutomu Hirao, Kevin Duh, Katsuhito Sudoh and Hajime Tsukada. 2010. "Automatic Evaluation of Translation Quality for Distant Language Pairs". In Proceedings of EMNLP. - http://www.aclweb.org/anthology/D/D10/D10-1092.pdf + https://www.aclweb.org/anthology/D/D10/D10-1092.pdf The generic RIBES scores used in shared task, e.g. Workshop for Asian Translation (WAT) uses the following RIBES calculations: @@ -33,7 +33,7 @@ def sentence_ribes(references, hypothesis, alpha=0.25, beta=0.10): Users are encouraged to use the official RIBES script instead of this implementation when evaluating your machine translation system. Refer - to http://www.kecl.ntt.co.jp/icl/lirg/ribes/ for the official script. + to https://www.kecl.ntt.co.jp/icl/lirg/ribes/ for the official script. :param references: a list of reference sentences :type references: list(list(str)) diff --git a/nltk/translate/stack_decoder.py b/nltk/translate/stack_decoder.py index 50c8097c65..786c1d7b08 100644 --- a/nltk/translate/stack_decoder.py +++ b/nltk/translate/stack_decoder.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Tah Wei Hoon -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/tree.py b/nltk/tree.py index a7c66de2d1..c9acd7d8f2 100644 --- a/nltk/tree.py +++ b/nltk/tree.py @@ -6,7 +6,7 @@ # Peter Ljunglöf # Nathan Bodenstab (tree transforms) # Eric Kafe (Tree.fromlist()) -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -825,7 +825,7 @@ def _repr_png_(self): except LookupError as e: pre_error_message = str( "The Ghostscript executable isn't found.\n" - "See http://web.mit.edu/ghostscript/www/Install.htm\n" + "See https://web.mit.edu/ghostscript/www/Install.htm\n" "If you're using a Mac, you can try installing\n" "https://docs.brew.sh/Installation then `brew install ghostscript`" ) @@ -905,7 +905,7 @@ def pformat_latex_qtree(self): \Tree [.I'' [.N'' [.D The ] [.N' [.N announcement ] ] ] [.I' [.V'' [.V' [.V astounded ] [.N'' [.N' [.N us ] ] ] ] ] ] ] - See http://www.ling.upenn.edu/advice/latex.html for the LaTeX + See https://www.ling.upenn.edu/advice/latex.html for the LaTeX style file for the qtree package. :return: A latex qtree representation of this tree. diff --git a/nltk/treeprettyprinter.py b/nltk/treeprettyprinter.py index 79a6954a95..e24882fe47 100644 --- a/nltk/treeprettyprinter.py +++ b/nltk/treeprettyprinter.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Andreas van Cranenburgh # Peter Ljunglöf -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -14,7 +14,7 @@ Interesting reference (not used for this code): T. Eschbach et al., Orth. Hypergraph Drawing, Journal of Graph Algorithms and Applications, 10(2) 141--157 (2006)149. -http://jgaa.info/accepted/2006/EschbachGuentherBecker2006.10.2.pdf +https://jgaa.info/accepted/2006/EschbachGuentherBecker2006.10.2.pdf """ import re @@ -502,7 +502,7 @@ def svg(self, nodecolor="blue", leafcolor="red", funccolor="green"): width = max(col for _, col in self.coords.values()) height = max(row for row, _ in self.coords.values()) result = [ - '' % ( width * 3, diff --git a/nltk/treetransforms.py b/nltk/treetransforms.py index 2802497ea6..5b89d1a017 100644 --- a/nltk/treetransforms.py +++ b/nltk/treetransforms.py @@ -2,7 +2,7 @@ # # Copyright (C) 2005-2007 Oregon Graduate Institute # Author: Nathan Bodenstab -# URL: +# URL: # For license information, see LICENSE.TXT r""" @@ -90,7 +90,7 @@ these values. For more information see: Dan Klein and Chris Manning (2003) "Accurate Unlexicalized - Parsing", ACL-03. http://www.aclweb.org/anthology/P03-1054 + Parsing", ACL-03. https://www.aclweb.org/anthology/P03-1054 4. Unary Collapsing diff --git a/nltk/twitter/__init__.py b/nltk/twitter/__init__.py index 9775f764d9..6f975dd2bf 100644 --- a/nltk/twitter/__init__.py +++ b/nltk/twitter/__init__.py @@ -2,7 +2,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/twitter/api.py b/nltk/twitter/api.py index bb13f34795..dbb1adbfa4 100644 --- a/nltk/twitter/api.py +++ b/nltk/twitter/api.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein # Lorenzo Rubio -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/twitter/common.py b/nltk/twitter/common.py index aa1616029d..58b339e31e 100644 --- a/nltk/twitter/common.py +++ b/nltk/twitter/common.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein # Lorenzo Rubio -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/twitter/twitter_demo.py b/nltk/twitter/twitter_demo.py index a5126202a9..5d24c7b4e1 100644 --- a/nltk/twitter/twitter_demo.py +++ b/nltk/twitter/twitter_demo.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein # Lorenzo Rubio -# URL: +# URL: # For license information, see LICENSE.TXT """ @@ -12,7 +12,7 @@ These demo functions should all run, with the following caveats: * You must have obtained API keys from Twitter, and installed them according to - the instructions in the `twitter HOWTO `_. + the instructions in the `twitter HOWTO `_. * If you are on a slow network, some of the calls to the Twitter API may timeout. diff --git a/nltk/twitter/twitterclient.py b/nltk/twitter/twitterclient.py index 3070379265..621e568ad5 100644 --- a/nltk/twitter/twitterclient.py +++ b/nltk/twitter/twitterclient.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein # Lorenzo Rubio -# URL: +# URL: # For license information, see LICENSE.TXT @@ -42,7 +42,7 @@ class Streamer(TwythonStreamer): Retrieve data from the Twitter Streaming API. The streaming API requires - `OAuth 1.0 `_ authentication. + `OAuth 1.0 `_ authentication. """ def __init__(self, app_key, app_secret, oauth_token, oauth_token_secret): diff --git a/nltk/twitter/util.py b/nltk/twitter/util.py index b76f372226..26bef4d4c4 100644 --- a/nltk/twitter/util.py +++ b/nltk/twitter/util.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Ewan Klein # Lorenzo Rubio -# URL: +# URL: # For license information, see LICENSE.TXT """ diff --git a/nltk/util.py b/nltk/util.py index 2b41871fa3..721b1f5d0d 100644 --- a/nltk/util.py +++ b/nltk/util.py @@ -3,7 +3,7 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Steven Bird # Eric Kafe (acyclic closures) -# URL: +# URL: # For license information, see LICENSE.TXT import bisect @@ -586,8 +586,8 @@ def unweighted_minimum_spanning_tree(tree, children=iter): # Guess Character Encoding ########################################################################## -# adapted from io.py in the docutils extension module (http://docutils.sourceforge.net) -# http://www.pyzine.com/Issue008/Section_Articles/article_Encodings.html +# adapted from io.py in the docutils extension module (https://docutils.sourceforge.io/) +# https://www.pyzine.com/Issue008/Section_Articles/article_Encodings.html def guess_encoding(data): @@ -974,7 +974,7 @@ def skipgrams(sequence, n, k, **kwargs): """ Returns all possible skipgrams generated from a sequence of items, as an iterator. Skipgrams are ngrams that allows tokens to be skipped. - Refer to http://homepages.inf.ed.ac.uk/ballison/pdf/lrec_skipgrams.pdf + Refer to https://homepages.inf.ed.ac.uk/ballison/pdf/lrec_skipgrams.pdf >>> sent = "Insurgents killed in ongoing fighting".split() >>> list(skipgrams(sent, 2, 2)) @@ -1098,7 +1098,7 @@ def set_proxy(proxy, user=None, password=""): settings. :param proxy: The HTTP proxy server to use. For example: - 'http://proxy.example.com:3128/' + 'https://proxy.example.com:3128/' :param user: The username to authenticate with. Use None to disable authentication. :param password: The password to authenticate with. @@ -1126,7 +1126,7 @@ def set_proxy(proxy, user=None, password=""): ###################################################################### -# ElementTree pretty printing from http://www.effbot.org/zone/element-lib.htm +# ElementTree pretty printing from https://www.effbot.org/zone/element-lib.htm ###################################################################### diff --git a/nltk/wsd.py b/nltk/wsd.py index fc04f3f03a..d5b15fb20c 100644 --- a/nltk/wsd.py +++ b/nltk/wsd.py @@ -4,7 +4,7 @@ # Dmitrijs Milajevs # # Copyright (C) 2001-2021 NLTK Project -# URL: +# URL: # For license information, see LICENSE.TXT from nltk.corpus import wordnet @@ -31,7 +31,7 @@ def lesk(context_sentence, ambiguous_word, pos=None, synsets=None): readable dictionaries: how to tell a pine cone from an ice cream cone." Proceedings of the 5th Annual International Conference on Systems Documentation. ACM, 1986. - http://dl.acm.org/citation.cfm?id=318728 + https://dl.acm.org/citation.cfm?id=318728 """ context = set(context_sentence) diff --git a/setup.py b/setup.py index eb32f49f9d..c59e7839b8 100644 --- a/setup.py +++ b/setup.py @@ -4,11 +4,11 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: NLTK Team -# URL: +# URL: # For license information, see LICENSE.TXT # Work around mbcs bug in distutils. -# http://bugs.python.org/issue10945 +# https://bugs.python.org/issue10945 import codecs try: @@ -54,7 +54,7 @@ nltk=nltk.cli:cli """ -_project_homepage = "http://nltk.org/" +_project_homepage = "https://www.nltk.org/" setup( name="nltk", diff --git a/tools/find_deprecated.py b/tools/find_deprecated.py index 94f1332eab..6439ad4cc8 100755 --- a/tools/find_deprecated.py +++ b/tools/find_deprecated.py @@ -4,7 +4,7 @@ # # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper -# URL: +# URL: # For license information, see LICENSE.TXT diff --git a/tools/github_actions/third-party.sh b/tools/github_actions/third-party.sh index b8c65b6e5d..24a0cf41ee 100644 --- a/tools/github_actions/third-party.sh +++ b/tools/github_actions/third-party.sh @@ -15,7 +15,7 @@ stanford_corenlp_package_zip_name="stanford-corenlp-full-2017-06-09.zip" stanford_corenlp_package_name=${BASH_REMATCH[1]} if [[ ! -d ${stanford_corenlp_package_name} ]]; then curl -L "https://nlp.stanford.edu/software/$stanford_corenlp_package_zip_name" -o ${stanford_corenlp_package_zip_name} - # wget -nv "http://nlp.stanford.edu/software/$stanford_corenlp_package_zip_name" + # wget -nv "https://nlp.stanford.edu/software/$stanford_corenlp_package_zip_name" unzip -q ${stanford_corenlp_package_zip_name} rm ${stanford_corenlp_package_zip_name} mv ${stanford_corenlp_package_name} 'stanford-corenlp' diff --git a/tools/global_replace.py b/tools/global_replace.py index c118ba9e22..8805535cd0 100755 --- a/tools/global_replace.py +++ b/tools/global_replace.py @@ -5,11 +5,11 @@ # Copyright (C) 2001-2021 NLTK Project # Author: Edward Loper # Steven Bird -# URL: +# URL: # For license information, see LICENSE.TXT # NB Should work on all platforms, -# http://www.python.org/doc/2.5.2/lib/os-file-dir.html +# https://www.python.org/doc/2.5.2/lib/os-file-dir.html import os import stat diff --git a/tools/svnmime.py b/tools/svnmime.py index 07f24ef486..6d61d85aa5 100755 --- a/tools/svnmime.py +++ b/tools/svnmime.py @@ -2,7 +2,7 @@ # NB, this wouldn't be needed if everyone had .subversion/config # configured to automatically set mime types -# http://code.google.com/p/support/wiki/FAQ +# https://code.google.com/p/support/wiki/FAQ import os diff --git a/tools/travis/third-party.sh b/tools/travis/third-party.sh index 57971b3724..952f7f45b0 100644 --- a/tools/travis/third-party.sh +++ b/tools/travis/third-party.sh @@ -13,7 +13,7 @@ stanford_corenlp_package_zip_name="stanford-corenlp-full-2017-06-09.zip" [[ ${stanford_corenlp_package_zip_name} =~ (.+)\.zip ]] stanford_corenlp_package_name=${BASH_REMATCH[1]} if [[ ! -d ${stanford_corenlp_package_name} ]]; then - wget -nv "http://nlp.stanford.edu/software/$stanford_corenlp_package_zip_name" + wget -nv "https://nlp.stanford.edu/software/$stanford_corenlp_package_zip_name" unzip ${stanford_corenlp_package_zip_name} rm ${stanford_corenlp_package_zip_name} ln -sf ${stanford_corenlp_package_name} 'stanford-corenlp' diff --git a/tox.ini b/tox.ini index b97d416505..d20238e005 100644 --- a/tox.ini +++ b/tox.ini @@ -81,7 +81,7 @@ commands = pytest # Use minor version agnostic basepython, but specify testenv # control Python2/3 versions using jenkins' user-defined matrix instead. -# Available Python versions: http://repository-cloudbees.forge.cloudbees.com/distributions/ci-addons/python/fc25/ +# Available Python versions: https://repository-cloudbees.forge.cloudbees.com/distributions/ci-addons/python/fc25/ [testenv:py3.6.4-jenkins] basepython = python3 diff --git a/web/contribute.rst b/web/contribute.rst index adccedd72a..2a03a995c0 100644 --- a/web/contribute.rst +++ b/web/contribute.rst @@ -9,5 +9,5 @@ Information for contributors: * `contributing to NLTK `_ * `desired enhancements `_ * `contribute a corpus `_ -* `nltk-dev mailing list `_ +* `nltk-dev mailing list `_ * `GitHub Project `_ diff --git a/web/data.rst b/web/data.rst index 9fdb9edc20..dfcc1e0f3c 100644 --- a/web/data.rst +++ b/web/data.rst @@ -1,9 +1,9 @@ Installing NLTK Data ==================== -NLTK comes with many corpora, toy grammars, trained models, etc. A complete list is posted at: http://nltk.org/nltk_data/ +NLTK comes with many corpora, toy grammars, trained models, etc. A complete list is posted at: https://www.nltk.org/nltk_data/ -To install the data, first install NLTK (see http://nltk.org/install.html), then use NLTK's data downloader as described below. +To install the data, first install NLTK (see https://www.nltk.org/install.html), then use NLTK's data downloader as described below. Apart from individual data packages, you can download the entire collection (using "all"), or just the data required for the examples and exercises in the book (using "book"), or just the corpora and no grammars or trained models (using "all-corpora"). @@ -32,7 +32,7 @@ Installing via a proxy web server If your web connection uses a proxy server, you should specify the proxy address as follows. In the case of an authenticating proxy, specify a username and password. If the proxy is set to None then this function will attempt to detect the system proxy. - >>> nltk.set_proxy('http://proxy.example.com:3128', ('USERNAME', 'PASSWORD')) + >>> nltk.set_proxy('https://proxy.example.com:3128', ('USERNAME', 'PASSWORD')) >>> nltk.download() Command line installation @@ -54,7 +54,7 @@ Create a folder ``nltk_data``, e.g. ``C:\nltk_data``, or ``/usr/local/share/nltk and subfolders ``chunkers``, ``grammars``, ``misc``, ``sentiment``, ``taggers``, ``corpora``, ``help``, ``models``, ``stemmers``, ``tokenizers``. -Download individual packages from ``http://nltk.org/nltk_data/`` (see the "download" links). +Download individual packages from ``https://www.nltk.org/nltk_data/`` (see the "download" links). Unzip them to the appropriate subfolder. For example, the Brown Corpus, found at: ``https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip`` is to be unzipped to ``nltk_data/corpora/brown``. diff --git a/web/dev/jenkins.rst b/web/dev/jenkins.rst index 0a210a046f..e87eea1e2e 100644 --- a/web/dev/jenkins.rst +++ b/web/dev/jenkins.rst @@ -10,9 +10,9 @@ to their FLOSS program. The setup is not specific to their solutions, it could be moved to any `Jenkins`_ instance. The URL of our current instance is https://jenkins.shiningpanda.com/nltk/ -.. _`continuous integration`: http://en.wikipedia.org/wiki/Continuous_integration +.. _`continuous integration`: https://en.wikipedia.org/wiki/Continuous_integration .. _`Shining Panda`: http://shiningpanda.com -.. _`Jenkins`: http://jenkins-ci.org +.. _`Jenkins`: https://jenkins-ci.org Base tasks @@ -80,9 +80,9 @@ The results of these programs are parsed and published by the jenkins instance, giving us pretty graphs :) .. _pytest: https://docs.pytest.org/ -.. _`pytest-cov`: http://pytest-cov.readthedocs.io/ -.. _`PEP-8`: http://www.python.org/dev/peps/pep-0008/ -.. _`pylint`: http://www.logilab.org/project/pylint +.. _`pytest-cov`: https://pytest-cov.readthedocs.io/ +.. _`PEP-8`: https://www.python.org/dev/peps/pep-0008/ +.. _`pylint`: https://www.logilab.org/project/pylint The builds @@ -93,7 +93,7 @@ The packages are built using ``make dist``. The outputted builds are all placed specifically for mac are not available. File names are made based on the ``__version__`` string, so they change every build. -.. _`in our jenkins workspace`: http://example.com/ +.. _`in our jenkins workspace`: https://example.com/ Web page builder @@ -105,5 +105,5 @@ pushes it to the `nltk.github.com repo on github`_. To push it, it needs access to the repo – because this cannot be done using a deploy key, it has the ssh key of the ``nltk-webdeploy`` user. -.. _Sphinx: http://sphinx.pocoo.org +.. _Sphinx: https://www.sphinx-doc.org .. _`nltk.github.com repo on github`: https://github.com/nltk/nltk.github.com diff --git a/web/dev/local_testing.rst b/web/dev/local_testing.rst index d72d67ed32..8e39fc4b00 100644 --- a/web/dev/local_testing.rst +++ b/web/dev/local_testing.rst @@ -20,7 +20,7 @@ NLTK testing It may take a long time at first run, but the subsequent runs will be much faster. -Please consult http://tox.testrun.org/ for more info about the tox tool. +Please consult https://tox.wiki for more info about the tox tool. Examples -------- diff --git a/web/index.rst b/web/index.rst index c83f029e4d..51557025c8 100644 --- a/web/index.rst +++ b/web/index.rst @@ -3,10 +3,10 @@ Natural Language Toolkit NLTK is a leading platform for building Python programs to work with human language data. It provides easy-to-use interfaces to `over 50 corpora and lexical -resources `_ such as WordNet, +resources `_ such as WordNet, along with a suite of text processing libraries for classification, tokenization, stemming, tagging, parsing, and semantic reasoning, wrappers for industrial-strength NLP libraries, -and an active `discussion forum `_. +and an active `discussion forum `_. Thanks to a hands-on guide introducing programming fundamentals alongside topics in computational linguistics, plus comprehensive API documentation, NLTK is suitable for linguists, engineers, students, educators, researchers, and industry users alike. @@ -15,13 +15,13 @@ NLTK is available for Windows, Mac OS X, and Linux. Best of all, NLTK is a free, NLTK has been called "a wonderful tool for teaching, and working in, computational linguistics using Python," and "an amazing library to play with natural language." -`Natural Language Processing with Python `_ provides a practical +`Natural Language Processing with Python `_ provides a practical introduction to programming for language processing. Written by the creators of NLTK, it guides the reader through the fundamentals of writing Python programs, working with corpora, categorizing text, analyzing linguistic structure, and more. The online version of the book has been been updated for Python 3 and NLTK 3. -(The original Python 2 version is still available at `http://nltk.org/book_1ed `_.) +(The original Python 2 version is still available at `https://www.nltk.org/book_1ed `_.) Some simple things you can do with NLTK --------------------------------------- @@ -66,8 +66,8 @@ follows: Next Steps ---------- -* `sign up for release announcements `_ -* `join in the discussion `_ +* `sign up for release announcements `_ +* `join in the discussion `_ .. toctree:: @@ -76,7 +76,7 @@ Next Steps :caption: NLTK Documentation API Reference - Example Usage + Example Usage Module Index Wiki FAQ diff --git a/web/install.rst b/web/install.rst index 5dfc91f0b0..248651b9de 100644 --- a/web/install.rst +++ b/web/install.rst @@ -19,7 +19,7 @@ Mac/Unix #. Install Numpy (optional): run ``pip install --user -U numpy`` #. Test installation: run ``python`` then type ``import nltk`` -For older versions of Python it might be necessary to install setuptools (see http://pypi.python.org/pypi/setuptools) and to install pip (``sudo easy_install pip``). +For older versions of Python it might be necessary to install setuptools (see https://pypi.python.org/pypi/setuptools) and to install pip (``sudo easy_install pip``). Windows ------- @@ -29,9 +29,9 @@ These instructions assume that you do not already have Python installed on your 32-bit binary installation ~~~~~~~~~~~~~~~~~~~~~~~~~~ -#. Install Python 3.8: http://www.python.org/downloads/ (avoid the 64-bit versions) +#. Install Python 3.8: https://www.python.org/downloads/ (avoid the 64-bit versions) #. Install Numpy (optional): https://www.scipy.org/scipylib/download.html -#. Install NLTK: http://pypi.python.org/pypi/nltk +#. Install NLTK: https://pypi.python.org/pypi/nltk #. Test installation: ``Start>Python38``, then type ``import nltk`` Installing Third-Party Software @@ -47,4 +47,4 @@ After installing the NLTK package, please do install the necessary datasets/mode If you're unsure of which datasets/models you'll need, you can install the "popular" subset of NLTK data, on the command line type ``python -m nltk.downloader popular``, or in the Python interpreter ``import nltk; nltk.download('popular')`` -For details, see http://www.nltk.org/data.html +For details, see https://www.nltk.org/data.html diff --git a/web/news.rst b/web/news.rst index d35ebaff8e..63914dc9f8 100644 --- a/web/news.rst +++ b/web/news.rst @@ -172,8 +172,8 @@ NLTK 3.0.0b2 released : August 2014 Minor bugfixes and clean-ups. NLTK Book Updates : July 2014 - The NLTK book is being updated for Python 3 and NLTK 3 `here `_. - The original Python 2 edition is still available `here `_. + The NLTK book is being updated for Python 3 and NLTK 3 `here `_. + The original Python 2 edition is still available `here `_. NLTK 3.0.0b1 released : July 2014 FrameNet, SentiWordNet, universal tagset, misc efficiency improvements and bugfixes @@ -184,34 +184,34 @@ NLTK 3.0a4 released : June 2014 Several API changes, see https://github.com/nltk/nltk/wiki/Porting-your-code-to-NLTK-3.0 For full details see: https://github.com/nltk/nltk/blob/develop/ChangeLog - http://nltk.org/nltk3-alpha/ + https://www.nltk.org/nltk3-alpha/ 2013 ---- NLTK Book Updates : October 2013 We are updating the NLTK book for Python 3 and NLTK 3; please see - http://nltk.org/book3/ + https://www.nltk.org/book3/ NLTK 3.0a2 released : July 2013 Misc efficiency improvements and bugfixes; for details see https://github.com/nltk/nltk/blob/develop/ChangeLog - http://nltk.org/nltk3-alpha/ + https://www.nltk.org/nltk3-alpha/ NLTK 3.0a1 released : February 2013 This version adds support for NLTK's graphical user interfaces. - http://nltk.org/nltk3-alpha/ + https://www.nltk.org/nltk3-alpha/ NLTK 3.0a0 released : January 2013 The first alpha release of NLTK 3.0 is now available for testing. This version of NLTK works with Python 2.6, 2.7, and Python 3. - http://nltk.org/nltk3-alpha/ + https://www.nltk.org/nltk3-alpha/ 2012 ---- Python Grant : November 2012 The Python Software Foundation is sponsoring Mikhail Korobov's work on porting NLTK to Python 3. - http://pyfound.blogspot.hu/2012/11/grants-to-assist-kivy-nltk-in-porting.html + https://pyfound.blogspot.hu/2012/11/grants-to-assist-kivy-nltk-in-porting.html NLTK 2.0.4 released : November 2012 Minor fix to remove numpy dependency. @@ -238,7 +238,7 @@ NLTK 2.0.1rc2 released : December 2011 The second release candidate for NLTK 2. For full details see the ChangeLog. NLTK development moved to GitHub : October 2011 - The development site for NLTK has moved from GoogleCode to GitHub: http://github.com/nltk + The development site for NLTK has moved from GoogleCode to GitHub: https://github.com/nltk NLTK 2.0.1rc1 released : April 2011 The first release candidate for NLTK 2. For full details see the ChangeLog. @@ -250,13 +250,13 @@ Python Text Processing with NLTK 2.0 Cookbook : December 2010 Jacob Perkins has written a 250-page cookbook full of great recipes for text processing using Python and NLTK, published by Packt Publishing. Some of the royalties are being donated to the NLTK project. Japanese translation of NLTK book : November 2010 - Masato Hagiwara has translated the NLTK book into Japanese, along with an extra chapter on particular issues with Japanese language process. See http://www.oreilly.co.jp/books/9784873114705/. + Masato Hagiwara has translated the NLTK book into Japanese, along with an extra chapter on particular issues with Japanese language process. See https://www.oreilly.co.jp/books/9784873114705/. NLTK 2.0b9 released : July 2010 The last beta release before 2.0 final. For full details see the ChangeLog. NLTK in Ubuntu 10.4 (Lucid Lynx) : February 2010 - NLTK is now in the latest LTS version of Ubuntu, thanks to the efforts of Robin Munn. See http://packages.ubuntu.com/lucid/python/python-nltk + NLTK is now in the latest LTS version of Ubuntu, thanks to the efforts of Robin Munn. See https://packages.ubuntu.com/lucid/python/python-nltk NLTK 2.0b? released : June 2009 - February 2010 Bugfix releases in preparation for 2.0 final. For full details see the ChangeLog. @@ -268,7 +268,7 @@ NLTK Book in second printing : December 2009 The second print run of Natural Language Processing with Python will go on sale in January. We've taken the opportunity to make about 40 minor corrections. The online version has been updated. NLTK Book published : June 2009 - Natural Language Processing with Python, by Steven Bird, Ewan Klein and Edward Loper, has been published by O'Reilly Media Inc. It can be purchased in hardcopy, ebook, PDF or for online access, at http://oreilly.com/catalog/9780596516499/. For information about sellers and prices, see https://isbndb.com/d/book/natural_language_processing_with_python/prices.html. + Natural Language Processing with Python, by Steven Bird, Ewan Klein and Edward Loper, has been published by O'Reilly Media Inc. It can be purchased in hardcopy, ebook, PDF or for online access, at https://oreilly.com/catalog/9780596516499/. For information about sellers and prices, see https://isbndb.com/d/book/natural_language_processing_with_python/prices.html. Version 0.9.9 released : May 2009 This version finalizes NLTK's API ahead of the 2.0 release and the publication of the NLTK book. There have been dozens of minor enhancements and bugfixes. Many names of the form nltk.foo.Bar are now available as nltk.Bar. There is expanded functionality in the decision tree, collocations, and Toolbox modules. A new translation toy nltk.misc.babelfish has been added. A new module nltk.help gives access to tagset documentation. Fixed imports so NLTK will build and install without Tkinter (for running on servers). New data includes a maximum entropy chunker model and updated grammars. NLTK Contrib includes updates to the coreference package (Joseph Frazee) and the ISRI Arabic stemmer (Hosam Algasaier). The book has undergone substantial editorial corrections ahead of final publication. For full details see the ChangeLog. @@ -300,19 +300,19 @@ Version 0.9.3 released : June 2008 This version contains an improved WordNet? similarity module using pre-built information content files (included in the corpus distribution), new/improved interfaces to Weka, MEGAM and Prover9/Mace4 toolkits, improved Unicode support for corpus readers, a BNC corpus reader, and a rewrite of the Punkt sentence segmenter contributed by Joel Nothman. NLTK-Contrib includes an implementation of incremental algorithm for generating referring expression contributed by Margaret Mitchell. For full details see the ChangeLog. NLTK presented at LinuxFest Northwest : April 2008 - Sean Boisen presented NLTK at LinuxFest Northwest, which took place in Bellingham, Washington. His presentation slides are available at: http://semanticbible.com/other/talks/2008/nltk/main.html + Sean Boisen presented NLTK at LinuxFest Northwest, which took place in Bellingham, Washington. His presentation slides are available at: https://semanticbible.com/other/talks/2008/nltk/main.html NLTK in Google Summer of Code : April 2008 Google Summer of Code will sponsor two NLTK projects. Jason Narad won funding for a project on dependency parsers in NLTK (mentored by Sebastian Riedel and Jason Baldridge). Petro Verkhogliad won funding for a project on natural language generation in NLTK (mentored by Robert Dale and Edward Loper). Python Software Foundation adopts NLTK for Google Summer of Code application : March 2008 - The Python Software Foundation has listed NLTK projects for sponsorship from the 2008 Google Summer of Code program. For details please see http://wiki.python.org/moin/SummerOfCode. + The Python Software Foundation has listed NLTK projects for sponsorship from the 2008 Google Summer of Code program. For details please see https://wiki.python.org/moin/SummerOfCode. Version 0.9.2 released : March 2008 This version contains a new inference module linked to the Prover9/Mace4 theorem-prover and model checker (Dan Garrette, Ewan Klein). It also includes the VerbNet? and PropBank? corpora along with corpus readers. A bug in the Reuters corpus reader has been fixed. NLTK-Contrib includes new work on the WordNet? browser (Jussi Salmela). For full details see the ChangeLog Youtube video about NLTK : January 2008 - The video from of the NLTK talk at the Bay Area Python Interest Group last July has been posted at http://www.youtube.com/watch?v=keXW_5-llD0 (1h15m) + The video from of the NLTK talk at the Bay Area Python Interest Group last July has been posted at https://www.youtube.com/watch?v=keXW_5-llD0 (1h15m) Version 0.9.1 released : January 2008 This version contains new support for accessing text categorization corpora, along with several corpora categorized for topic, genre, question type, or sentiment. It includes several new corpora: Question classification data (Li & Roth), Reuters 21578 Corpus, Movie Reviews corpus (Pang & Lee), Recognising Textual Entailment (RTE) Challenges. NLTK-Contrib includes expanded support for semantics (Dan Garrette), readability scoring (Thomas Jakobsen, Thomas Skardal), and SIL Toolbox (Greg Aumann). The book contains many improvements in early chapters in response to reader feedback. For full details see the ChangeLog. diff --git a/web/team.rst b/web/team.rst index 3388004b30..5399487ded 100644 --- a/web/team.rst +++ b/web/team.rst @@ -5,8 +5,8 @@ The NLTK project is led by `Steven Bird and Liling Tan `_, Austin, USA (``nltk.sem, nltk.inference``) -:Parsing: `Peter Ljunglöf `_, Gothenburg, Sweden (``nltk.parse, nltk.featstruct``) +:Parsing: `Peter Ljunglöf `_, Gothenburg, Sweden (``nltk.parse, nltk.featstruct``) :Metrics: `Joel Nothman `_, Sydney, Australia (``nltk.metrics, nltk.tokenize.punkt``) :Python 3: `Mikhail Korobov `_, Ekaterinburg, Russia -:Releases: `Steven Bird `_, Melbourne, Australia +:Releases: `Steven Bird `_, Melbourne, Australia :NLTK-Users: `Alexis Dimitriadis `_, Utrecht, Netherlands From ffcf778806151201a402776dea90519e583c1276 Mon Sep 17 00:00:00 2001 From: Tom Aarsen Date: Mon, 11 Oct 2021 23:00:13 +0200 Subject: [PATCH 2/5] Increase verbosity of pytest to identify error --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5fafe82176..53103466a6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -124,4 +124,4 @@ jobs: - name: Run pytest shell: bash run: | - pytest --numprocesses auto -rsx --doctest-modules nltk/test + pytest -v --numprocesses auto -rsx --doctest-modules nltk/test From e809bc8f704cc962286753a77345bc1280084270 Mon Sep 17 00:00:00 2001 From: Tom Aarsen Date: Mon, 11 Oct 2021 23:15:34 +0200 Subject: [PATCH 3/5] Revert to http for localhost web browsers --- nltk/app/wordnet_app.py | 4 ++-- nltk/parse/corenlp.py | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/nltk/app/wordnet_app.py b/nltk/app/wordnet_app.py index a572fa6a4a..e9c89e3706 100644 --- a/nltk/app/wordnet_app.py +++ b/nltk/app/wordnet_app.py @@ -9,7 +9,7 @@ """ A WordNet Browser application which launches the default browser (if it is not already running) and opens a new tab with a connection -to https://localhost:port/ . It also starts an HTTP server on the +to http://localhost:port/ . It also starts an HTTP server on the specified port and begins serving browser requests. The default port is 8000. (For command-line help, run "python wordnet -h") This application requires that the user's web browser supports @@ -226,7 +226,7 @@ def wnb(port=8000, runBrowser=True, logfilename=None): logfile = None # Compute URL and start web browser - url = "https://localhost:" + str(port) + url = "http://localhost:" + str(port) server_ready = None browser_thread = None diff --git a/nltk/parse/corenlp.py b/nltk/parse/corenlp.py index 953400d706..77c553c32d 100644 --- a/nltk/parse/corenlp.py +++ b/nltk/parse/corenlp.py @@ -77,7 +77,7 @@ def __init__( else: try_port(port) - self.url = f"https://localhost:{port}" + self.url = f"http://localhost:{port}" model_jar = max( find_jar_iter( @@ -176,7 +176,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI): """Interface to the CoreNLP Parser.""" - def __init__(self, url="https://localhost:9000", encoding="utf8", tagtype=None): + def __init__(self, url="http://localhost:9000", encoding="utf8", tagtype=None): import requests self.url = url @@ -300,7 +300,7 @@ def parse_text(self, text, *args, **kwargs): def tokenize(self, text, properties=None): """Tokenize a string of text. - >>> parser = CoreNLPParser(url='https://localhost:9000') + >>> parser = CoreNLPParser(url='http://localhost:9000') >>> text = 'Good muffins cost $3.88\\nin New York. Please buy me\\ntwo of them.\\nThanks.' >>> list(parser.tokenize(text)) @@ -347,13 +347,13 @@ def tag(self, sentence): :rtype: list(tuple(str, str)) - >>> parser = CoreNLPParser(url='https://localhost:9000', tagtype='ner') + >>> parser = CoreNLPParser(url='http://localhost:9000', tagtype='ner') >>> tokens = 'Rami Eid is studying at Stony Brook University in NY'.split() >>> parser.tag(tokens) [('Rami', 'PERSON'), ('Eid', 'PERSON'), ('is', 'O'), ('studying', 'O'), ('at', 'O'), ('Stony', 'ORGANIZATION'), ('Brook', 'ORGANIZATION'), ('University', 'ORGANIZATION'), ('in', 'O'), ('NY', 'O')] - >>> parser = CoreNLPParser(url='https://localhost:9000', tagtype='pos') + >>> parser = CoreNLPParser(url='http://localhost:9000', tagtype='pos') >>> tokens = "What is the airspeed of an unladen swallow ?".split() >>> parser.tag(tokens) [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), @@ -393,7 +393,7 @@ def raw_tag_sents(self, sentences): class CoreNLPParser(GenericCoreNLPParser): """ - >>> parser = CoreNLPParser(url='https://localhost:9000') + >>> parser = CoreNLPParser(url='http://localhost:9000') >>> next( ... parser.raw_parse('The quick brown fox jumps over the lazy dog.') @@ -546,7 +546,7 @@ def make_tree(self, result): class CoreNLPDependencyParser(GenericCoreNLPParser): """Dependency parser. - >>> dep_parser = CoreNLPDependencyParser(url='https://localhost:9000') + >>> dep_parser = CoreNLPDependencyParser(url='http://localhost:9000') >>> parse, = dep_parser.raw_parse( ... 'The quick brown fox jumps over the lazy dog.' From 9fa98e72c0b8921c8f9a35bf6fcedeb809943697 Mon Sep 17 00:00:00 2001 From: Tom Aarsen Date: Mon, 11 Oct 2021 23:22:58 +0200 Subject: [PATCH 4/5] Revert to non-verbose pytest --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 53103466a6..5fafe82176 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -124,4 +124,4 @@ jobs: - name: Run pytest shell: bash run: | - pytest -v --numprocesses auto -rsx --doctest-modules nltk/test + pytest --numprocesses auto -rsx --doctest-modules nltk/test From faf658e1f20a1e91f50220122554ed1f3ef34f8b Mon Sep 17 00:00:00 2001 From: Tom Aarsen Date: Tue, 12 Oct 2021 12:26:14 +0200 Subject: [PATCH 5/5] Preserve http on websites that have some sort of error This way, the website can still be recovered using an internet archive --- nltk/app/wordnet_app.py | 8 +-- nltk/classify/maxent.py | 2 +- nltk/classify/senna.py | 2 - nltk/collocations.py | 2 +- nltk/corpus/reader/crubadan.py | 2 +- nltk/corpus/reader/ipipan.py | 2 +- nltk/corpus/reader/ppattach.py | 2 +- nltk/corpus/reader/semcor.py | 2 +- nltk/corpus/reader/senseval.py | 2 +- nltk/corpus/reader/sentiwordnet.py | 2 +- nltk/corpus/reader/wordlist.py | 2 +- nltk/corpus/reader/wordnet.py | 2 +- nltk/corpus/reader/ycoe.py | 2 +- nltk/data.py | 2 +- nltk/draw/util.py | 2 +- nltk/inference/discourse.py | 2 +- nltk/metrics/segmentation.py | 2 +- nltk/parse/malt.py | 2 +- nltk/sem/boxer.py | 4 +- nltk/sem/chat80.py | 2 +- nltk/stem/rslp.py | 4 +- nltk/tag/tnt.py | 2 +- nltk/test/chat80.doctest | 4 +- nltk/test/childes.doctest | 2 +- nltk/test/featgram.doctest | 10 +-- nltk/test/portuguese.doctest_latin1 | 96 ++++++++++++++--------------- nltk/test/unit/test_disagreement.py | 2 +- nltk/tgrep.py | 4 +- nltk/tokenize/repp.py | 2 +- nltk/tokenize/treebank.py | 4 +- nltk/treeprettyprinter.py | 2 +- nltk/util.py | 6 +- tools/global_replace.py | 2 +- tools/svnmime.py | 2 +- tox.ini | 2 +- web/data.rst | 2 +- web/dev/jenkins.rst | 2 +- web/news.rst | 10 +-- web/team.rst | 2 +- 39 files changed, 101 insertions(+), 107 deletions(-) diff --git a/nltk/app/wordnet_app.py b/nltk/app/wordnet_app.py index e9c89e3706..14fdac8e66 100644 --- a/nltk/app/wordnet_app.py +++ b/nltk/app/wordnet_app.py @@ -440,7 +440,7 @@ def get_relations_data(word, synset): html_header = """ +'http://www.w3.org/TR/html4/strict.dtd'> +