Skip to content

Commit 18b0e80

Browse files
committed
chore: clean dependencies
1 parent 0e4d845 commit 18b0e80

File tree

14 files changed

+92
-289
lines changed

14 files changed

+92
-289
lines changed

edsnlp/pipes/core/endlines/model.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
from numpy.lib.function_base import iterable
77
from pandas.api.types import CategoricalDtype
88
from pandas.core.groupby import DataFrameGroupBy
9-
from scipy.sparse import hstack
10-
from sklearn.naive_bayes import MultinomialNB
11-
from sklearn.preprocessing import OneHotEncoder
129
from spacy.strings import StringStore
1310
from spacy.tokens import Doc
1411

@@ -83,7 +80,7 @@ def _preprocess_data(self, corpus: Iterable[Doc]) -> pd.DataFrame:
8380
)
8481

8582
# Assign a sentence id to each token
86-
df = df.groupby("DOC_ID").apply(self._retrieve_lines)
83+
df = df.groupby("DOC_ID", as_index=False).apply(self._retrieve_lines)
8784
df["SENTENCE_ID"] = df["SENTENCE_ID"].astype("int")
8885

8986
# Compute B1 and B2
@@ -404,6 +401,8 @@ def _fit_M1(
404401
[description]
405402
406403
"""
404+
from sklearn.naive_bayes import MultinomialNB
405+
407406
# Encode classes to OneHotEncoder representation
408407
encoder_A1_A2 = self._fit_encoder_2S(A1, A2)
409408
self.encoder_A1_A2 = encoder_A1_A2
@@ -427,6 +426,7 @@ def _fit_M2(self, B1: pd.Series, B2: pd.Series, label: pd.Series):
427426
B2 : pd.Series
428427
label : pd.Series
429428
"""
429+
from sklearn.naive_bayes import MultinomialNB
430430

431431
# Encode classes to OneHotEncoder representation
432432
encoder_B1 = self._fit_encoder_1S(B1)
@@ -456,6 +456,8 @@ def _get_X_for_M1(
456456
-------
457457
np.ndarray
458458
"""
459+
from scipy.sparse import hstack
460+
459461
A1_enc = self._encode_series(self.encoder_A1_A2, A1)
460462
A2_enc = self._encode_series(self.encoder_A1_A2, A2)
461463
A3_enc = self._encode_series(self.encoder_A3_A4, A3)
@@ -475,6 +477,8 @@ def _get_X_for_M2(self, B1: pd.Series, B2: pd.Series) -> np.ndarray:
475477
-------
476478
np.ndarray
477479
"""
480+
from scipy.sparse import hstack
481+
478482
B1_enc = self._encode_series(self.encoder_B1, B1)
479483
B2_enc = self._encode_series(self.encoder_B2, B2)
480484
X = hstack([B1_enc, B2_enc])
@@ -520,7 +524,7 @@ def _predict_M2(self, B1: pd.Series, B2: pd.Series) -> Dict[str, Any]:
520524
outputs = {"predictions": predictions, "predictions_proba": predictions_proba}
521525
return outputs
522526

523-
def _fit_encoder_2S(self, S1: pd.Series, S2: pd.Series) -> OneHotEncoder:
527+
def _fit_encoder_2S(self, S1: pd.Series, S2: pd.Series):
524528
"""Fit a one hot encoder with 2 Series. It concatenates the series and after it
525529
fits.
526530
@@ -539,7 +543,7 @@ def _fit_encoder_2S(self, S1: pd.Series, S2: pd.Series) -> OneHotEncoder:
539543
encoder = self._fit_one_hot_encoder(S)
540544
return encoder
541545

542-
def _fit_encoder_1S(self, S1: pd.Series) -> OneHotEncoder:
546+
def _fit_encoder_1S(self, S1: pd.Series):
543547
"""Fit a one hot encoder with 1 Series.
544548
545549
Parameters
@@ -554,7 +558,7 @@ def _fit_encoder_1S(self, S1: pd.Series) -> OneHotEncoder:
554558
encoder = self._fit_one_hot_encoder(_S1)
555559
return encoder
556560

557-
def _encode_series(self, encoder: OneHotEncoder, S: pd.Series) -> np.ndarray:
561+
def _encode_series(self, encoder, S: pd.Series) -> np.ndarray:
558562
"""Use the one hot encoder to transform a series.
559563
560564
Parameters
@@ -751,7 +755,7 @@ def _get_string(cls, _id: int, string_store: StringStore) -> str:
751755
return string_store[_id]
752756

753757
@classmethod
754-
def _fit_one_hot_encoder(cls, X: np.ndarray) -> OneHotEncoder:
758+
def _fit_one_hot_encoder(cls, X: np.ndarray):
755759
"""Fit a one hot encoder.
756760
757761
Parameters
@@ -763,6 +767,8 @@ def _fit_one_hot_encoder(cls, X: np.ndarray) -> OneHotEncoder:
763767
-------
764768
OneHotEncoder
765769
"""
770+
from sklearn.preprocessing import OneHotEncoder
771+
766772
encoder = OneHotEncoder(handle_unknown="ignore")
767773
encoder.fit(X)
768774
return encoder

edsnlp/pipes/qualifiers/base.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1+
import warnings
12
from itertools import chain
23
from typing import Dict, List, Optional, Set, Union
34

4-
from loguru import logger
55
from spacy.tokens import Doc, Span
66

77
from edsnlp.core import PipelineProtocol
@@ -19,25 +19,14 @@ def check_normalizer(nlp: PipelineProtocol) -> None:
1919
normalizer = components.get("normalizer")
2020

2121
if normalizer and not normalizer.lowercase:
22-
logger.warning(
22+
warnings.warn(
2323
"You have chosen the NORM attribute, but disabled lowercasing "
2424
"in your normalisation pipeline. "
2525
"This WILL hurt performance : you might want to use the "
2626
"LOWER attribute instead."
2727
)
2828

2929

30-
def get_qualifier_extensions(nlp: PipelineProtocol):
31-
"""
32-
Check for all qualifiers present in the pipe and return its corresponding extension
33-
"""
34-
return {
35-
name: nlp.get_pipe_meta(name).assigns[0].split("span.")[-1]
36-
for name, pipe in nlp.pipeline
37-
if isinstance(pipe, RuleBasedQualifier)
38-
}
39-
40-
4130
class RuleBasedQualifier(BaseSpanAttributeClassifierComponent):
4231
"""
4332
Implements the ConText algorithm (eq. NegEx for negations) for detecting contextual

edsnlp/viz/quick_examples.py

Lines changed: 0 additions & 182 deletions
This file was deleted.

pyproject.toml

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,18 @@ readme = "README.md"
99
requires-python = ">=3.7.1"
1010
dynamic = ['version']
1111
dependencies = [
12-
"decorator",
1312
"loguru",
1413
"pytz",
15-
"python-dateutil",
16-
"pydantic>=1.10.2",
1714
"pysimstring>=1.2.1",
1815
"regex",
19-
"rich>=12.0.0",
20-
"scikit-learn>=1.0.0",
2116
"spacy>=3.1,<4.0.0",
2217
"confit>=0.5.5",
2318
"tqdm",
2419
"umls-downloader>=0.1.1",
2520
"numpy>=1.15.0,<1.23.2; python_version<'3.8'",
2621
"numpy>=1.15.0; python_version>='3.8'",
27-
"pandas>=1.1.0,<2.0.0; python_version<'3.8'",
28-
"pandas>=1.4.0,<2.0.0; python_version>='3.8'",
22+
"pandas>=1.1.0; python_version<'3.8'",
23+
"pandas>=1.4.0; python_version>='3.8'",
2924
"typing-extensions>=4.0.0",
3025
"dill",
3126
# Packaging
@@ -36,17 +31,16 @@ dependencies = [
3631
"fsspec; python_version>='3.8'",
3732
"fsspec<2023.1.0; python_version<'3.8'",
3833
# this is only to avoid backtracking issues with spacy's capping
34+
"pydantic>=1.10.2",
3935
"pydantic<2.0.0; python_version<'3.8'",
4036
"pydantic-core<2.0.0; python_version<'3.8'",
4137
]
4238
[project.optional-dependencies]
4339
dev = [
44-
"black>=22.3.0",
4540
"pre-commit>=2.0.0; python_version<'3.8'",
4641
"pre-commit>=2.21.0; python_version>='3.8'",
4742
"pytest>=7.1.0",
4843
"pytest-cov>=3.0.0",
49-
"pytest-html>=3.1.1",
5044
"polars",
5145

5246
# Distributed inference
@@ -77,6 +71,7 @@ dev = [
7771
"safetensors>=0.3.0",
7872
"transformers>=4.0.0,<5.0.0",
7973
"accelerate>=0.20.3,<1.0.0",
74+
"scikit-learn>=1.0.0",
8075
]
8176
setup = [
8277
"typer"
@@ -88,6 +83,7 @@ ml = [
8883
"safetensors>=0.3.0",
8984
"transformers>=4.0.0,<5.0.0",
9085
"accelerate>=0.20.3,<1.0.0",
86+
"scikit-learn>=1.0.0",
9187
]
9288

9389
[project.urls]

tests/data/test_lazy_collection.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import pytest
2-
import torch
32

43
import edsnlp
54
from edsnlp.utils.collections import ld_to_dl
@@ -31,6 +30,8 @@ def test_flat_iterable(num_cpu_workers):
3130

3231
@pytest.mark.parametrize("num_gpu_workers", [0, 1])
3332
def test_map_gpu(num_gpu_workers):
33+
import torch
34+
3435
def prepare_batch(batch, device):
3536
return {"tensor": torch.tensor(batch).to(device)}
3637

0 commit comments

Comments
 (0)