diff --git a/README.md b/README.md index f89401f..1bf1778 100644 --- a/README.md +++ b/README.md @@ -342,7 +342,7 @@ print(f'Run time: {time:.2f}s') rules.to_csv('output.csv') ``` -**Note:** You may need to download stopwords and the punkt tokenizer from nltk by running `import nltk; nltk.download('stopwords'); nltk.download('punkt')`. +**Note:** You may need to download stopwords and the punkt_tab tokenizer from nltk by running `import nltk; nltk.download('stopwords'); nltk.download('punkt_tab')`. For a full list of examples see the [examples folder](https://github.com/firefly-cpp/NiaARM/tree/main/examples) in the GitHub repository. diff --git a/docs/getting_started.rst b/docs/getting_started.rst index c48d740..2974db6 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -285,7 +285,7 @@ added to the :mod:`niaarm.mine` module. print('No rules generated') print(f'Run time: {time:.2f}s') -**Note:** You may need to download stopwords and the punkt tokenizer from nltk by running `import nltk; nltk.download('stopwords'); nltk.download('punkt')`. +**Note:** You may need to download stopwords and the punkt_tab tokenizer from nltk by running `import nltk; nltk.download('stopwords'); nltk.download('punkt_tab')`. **Output:** diff --git a/docs/requirements.txt b/docs/requirements.txt index 4ce5c58..fb2931c 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,6 +1,6 @@ niapy>=2.5.2,<3.0.0 -numpy>=1.26.1,<2.0.0 -pandas>=2.1.1,<3.0.0 +numpy>=1.22.4 +pandas>=2.2.2,<3.0.0 nltk>=3.8.1,<4.0.0 plotly>=5.22.0,<6.0.0 scikit-learn>=1.5.1,<2.0.0 diff --git a/examples/text_mining.py b/examples/text_mining.py index 62ddbcc..c4d6d10 100644 --- a/examples/text_mining.py +++ b/examples/text_mining.py @@ -7,13 +7,13 @@ df = pd.read_json("datasets/text/artm_test_dataset.json", orient="records") documents = df["text"].tolist() -# create a Corpus object from the documents (requires nltk's punkt tokenizer and the stopwords list) +# create a Corpus object from the documents (requires nltk's punkt_tab tokenizer and the stopwords list) try: corpus = Corpus.from_list(documents) except LookupError: import nltk - nltk.download("punkt") + nltk.download("punkt_tab") nltk.download("stopwords") corpus = Corpus.from_list(documents) diff --git a/poetry.lock b/poetry.lock index 1ea45ca..0b55322 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2046,7 +2046,7 @@ version = "2.5.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" -groups = ["dev", "docs"] +groups = ["docs"] files = [ {file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"}, {file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"}, @@ -2103,4 +2103,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<3.14" -content-hash = "c818a4593c4764d9d7dff76f946284a4092d34703abc836228b3a91d2b34f87b" +content-hash = "bfc1128d5f51a60b4f59e51d1621bbc4e9e699af5750c4d509e2ac0df0f968d5" diff --git a/pyproject.toml b/pyproject.toml index 471ea76..6945102 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,8 +20,8 @@ include = [ [tool.poetry.dependencies] python = ">=3.9,<3.14" niapy = "^2.5.2" -numpy = "^1.26.1" -pandas = "^2.1.1" +numpy = ">=1.22.4" +pandas = "^2.2.2" nltk = "^3.8.1" tomli = { version = "^2.0.1", python = "<3.11" } plotly = "^5.22.0" diff --git a/tests/test_text_mining.py b/tests/test_text_mining.py index 6da1109..15e9087 100644 --- a/tests/test_text_mining.py +++ b/tests/test_text_mining.py @@ -10,7 +10,7 @@ class TestTextMining(TestCase): def setUp(self): - nltk.download("punkt") + nltk.download("punkt_tab") nltk.download("stopwords") ds_path = os.path.join( os.path.dirname(__file__), "test_data", "artm_test_dataset.json"