From f63d72ac9df78b3de8611738ae4cfd0c99cf8029 Mon Sep 17 00:00:00 2001 From: HosseiN Khademi Khaledi Date: Thu, 15 Dec 2022 21:01:50 +0330 Subject: [PATCH] Update nltk version https://github.com/nltk/nltk/pull/2877 --- requirements.txt | 2 +- setup.py | 2 +- tests/test_normalizer.py | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0a95151..21bbf3e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ tqdm>=4.62.3 spacy>=3.1.3 hazm>=0.7.0 -nltk>=3.2.0 +nltk>=3.8.0 setuptools>=58.1.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 71fcc1c..2faf615 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ "Normalization", ], url="https://github.com/arushadev/piraye", - version="0.1.4", + version="0.1.5", package_dir={"piraye": "src"}, packages=["piraye"], package_data={"piraye": ["data/*/*.json"]}, diff --git a/tests/test_normalizer.py b/tests/test_normalizer.py index 5880206..f4441ab 100644 --- a/tests/test_normalizer.py +++ b/tests/test_normalizer.py @@ -1,6 +1,7 @@ # testing Fibonacci number function # pylint: skip-file +from ..src import NltkTokenizer from ..src import NormalizerBuilder @@ -46,3 +47,8 @@ def test_quotes(): norm = NormalizerBuilder().digit_en().punctuation_en().alphabet_fa() \ .tokenizing().remove_extra_spaces().build() norm.normalize(text) + + +def test_normalizer(): + tokens = NltkTokenizer().word_tokenize('\'\'Y\'"') + print(tokens)