ci: drop conjugator for now

percevalw · percevalw · commit 6fb35b9dcc95 · 2025-12-12T17:23:12.000+01:00
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -65,7 +65,7 @@ jobs:
         if: matrix.python-version != '3.10' && matrix.python-version != '3.12' && matrix.python-version != '3.13' && matrix.python-version != '3.14'
 
       - name: Install dependencies
-        run: uv pip install -e . --group dev --group setup
+        run: uv pip install -e . --group dev
         if: matrix.python-version == '3.10'
 
       - name: Install dependencies
diff --git a/contributing.md b/contributing.md
@@ -9,52 +9,35 @@ We welcome contributions ! There are many ways to help. For example, you can:
 
 ## Development installation
 
-To be able to run the test suite, run the example notebooks and develop your own pipeline component, you should clone the repo and install it locally.
+To be able to run the test suite, run the example notebooks and develop your own pipeline component, you should clone the repo and install it locally. We use `uv` to manage virtual environments, and think you should too.
 
-<div class="termy">
-
-```console
+```bash { data-md-color-scheme="slate" }
 # Clone the repository and change directory
-$ git clone https://github.com/aphp/edsnlp.git
----> 100%
-$ cd edsnlp
+git clone https://github.com/aphp/edsnlp.git
+cd edsnlp
 
 # Optional: create a virtual environment
-$ python -m venv venv
-$ source venv/bin/activate
+uv venv
+source .venv/bin/activate
 
-# Install the package with common, dev, setup dependencies in editable mode
-$ pip install -e . --group dev --group setup
-# And build resources
-$ python scripts/conjugate_verbs.py
+# Install the package with common, dev dependencies in editable mode
+uv pip install -e . --group dev
 ```
 
-</div>
-
 To make sure the pipeline will not fail because of formatting errors, we added pre-commit hooks using the `pre-commit` Python library. To use it, simply install it:
 
-<div class="termy">
-
-```console
-$ pre-commit install
+```bash { data-md-color-scheme="slate" }
+pre-commit install
 ```
 
-</div>
-
 The pre-commit hooks defined in the [configuration](https://github.com/aphp/edsnlp/blob/master/.pre-commit-config.yaml) will automatically run when you commit your changes, letting you know if something went wrong.
 
 The hooks only run on staged changes. To force-run it on all files, run:
 
-<div class="termy">
-
-```console
-$ pre-commit run --all-files
----> 100%
-color:green All good !
+```bash { data-md-color-scheme="slate" }
+pre-commit run --all-files
 ```
 
-</div>
-
 ## Proposing a merge request
 
 At the very least, your changes should :
@@ -70,7 +53,7 @@ We use the Pytest test suite.
 The following command will run the test suite. Writing your own tests is encouraged !
 
 ```shell
-python -m pytest
+pytest
 ```
 
 !!! warning "Testing Cython code"
@@ -93,11 +76,11 @@ edsnlp/pipes/<pipe>
 
 ### Style Guide
 
-We use [Black](https://github.com/psf/black) to reformat the code. While other formatter only enforce PEP8 compliance, Black also makes the code uniform. In short :
+We use [Ruff](https://github.com/astral-sh/ruff) to reformat the code. While other formatter only enforce PEP8 compliance, Ruff also makes the code uniform. In short :
 
-> Black reformats entire files in place. It is not configurable.
+> Ruff reformats entire files in place. It is not configurable.
 
-Moreover, the CI/CD pipeline enforces a number of checks on the "quality" of the code. To wit, non black-formatted code will make the test pipeline fail. We use `pre-commit` to keep our codebase clean.
+Moreover, the CI/CD pipeline enforces a number of checks on the "quality" of the code. To wit, non ruff-formatted code will make the test pipeline fail. We use `pre-commit` to keep our codebase clean.
 
 Refer to the [development install tutorial](#development-installation) for tips on how to format your files automatically.
 Most modern editors propose extensions that will format files on save.
@@ -109,19 +92,13 @@ as well as in the documentation itself if need be.
 
 We use `MkDocs` for EDS-NLP's documentation. You can check out the changes you make with:
 
-<div class="termy">
-
-```console
+```bash { data-md-color-scheme="slate" }
 # Install the requirements
-$ pip install -e . --group docs
----> 100%
-color:green Installation successful
+uv pip install -e . --group dev --group docs
 
 # Run the documentation
-$ mkdocs serve
+mkdocs serve
 ```
 
-</div>
-
 Go to [`localhost:8000`](http://localhost:8000) to see your changes. MkDocs watches for changes in the documentation folder
 and automatically reloads the page.
diff --git a/edsnlp/conjugator.py b/edsnlp/conjugator.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -77,10 +77,10 @@ dev = [
     "configobj>=5.0.9",
     "tensorboardx>=2.6.4",
 ]
-setup = [
-    "mlconjug3<3.9.0",  # bug https://github.com/Ars-Linguistica/mlconjug3/pull/506
-    "numpy<2",  # mlconjug has scikit-learn dep which doesn't support for numpy 2 yet
-]
+# setup = [
+#     "mlconjug3<3.9.0",  # bug https://github.com/Ars-Linguistica/mlconjug3/pull/506
+#     "numpy<2",  # mlconjug has scikit-learn dep which doesn't support for numpy 2 yet
+# ]
 ml = [
     "edsnlp[ml]"
 ]
diff --git a/scripts/conjugate_verbs.py b/scripts/conjugate_verbs.py
@@ -1,21 +1,138 @@
 import warnings
 from pathlib import Path
+from typing import Dict, List, Union
 
 import context  # noqa
+import mlconjug3
+import pandas as pd
 import typer
 
-from edsnlp.conjugator import conjugate
-from edsnlp.pipelines.qualifiers.hypothesis.patterns import verbs_eds, verbs_hyp
-from edsnlp.pipelines.qualifiers.negation.patterns import verbs as neg_verbs
-from edsnlp.pipelines.qualifiers.reported_speech.patterns import verbs as rspeech_verbs
+from edsnlp.pipes.qualifiers.hypothesis.patterns import verbs_eds, verbs_hyp
+from edsnlp.pipes.qualifiers.negation.patterns import verbs as neg_verbs
+from edsnlp.pipes.qualifiers.reported_speech.patterns import verbs as rspeech_verbs
 
 warnings.filterwarnings("ignore")
 
 
+def conjugate_verb(
+    verb: str,
+    conjugator: mlconjug3.Conjugator,
+) -> pd.DataFrame:
+    """
+    Conjugates the verb using an instance of mlconjug3,
+    and formats the results in a pandas `DataFrame`.
+
+    Parameters
+    ----------
+    verb : str
+        Verb to conjugate.
+    conjugator : mlconjug3.Conjugator
+        mlconjug3 instance for conjugating.
+
+    Returns
+    -------
+    pd.DataFrame
+        Normalized dataframe containing all conjugated forms
+        for the verb.
+    """
+
+    df = pd.DataFrame(
+        conjugator.conjugate(verb).iterate(),
+        columns=["mode", "tense", "person", "term"],
+    )
+
+    df.term = df.term.fillna(df.person)
+    df.loc[df.person == df.term, "person"] = None
+
+    df.insert(0, "verb", verb)
+
+    return df
+
+
+def conjugate(
+    verbs: Union[str, List[str]],
+    language: str = "fr",
+) -> pd.DataFrame:
+    """
+    Conjugate a list of verbs.
+
+    Parameters
+    ----------
+    verbs : Union[str, List[str]]
+        List of verbs to conjugate
+    language: str
+        Language to conjugate. Defaults to French (`fr`).
+
+    Returns
+    -------
+    pd.DataFrame
+        Dataframe containing the conjugations for the provided verbs.
+        Columns: `verb`, `mode`, `tense`, `person`, `term`
+    """
+    if isinstance(verbs, str):
+        verbs = [verbs]
+
+    conjugator = mlconjug3.Conjugator(language=language)
+
+    df = pd.concat([conjugate_verb(verb, conjugator=conjugator) for verb in verbs])
+
+    df = df.reset_index(drop=True)
+
+    return df
+
+
+def get_conjugated_verbs(
+    verbs: Union[str, List[str]],
+    matches: Union[List[Dict[str, str]], Dict[str, str]],
+    language: str = "fr",
+) -> List[str]:
+    """
+    Get a list of conjugated verbs.
+
+    Parameters
+    ----------
+    verbs : Union[str, List[str]]
+        List of verbs to conjugate.
+    matches : Union[List[Dict[str, str]], Dict[str, str]]
+        List of dictionary describing the mode/tense/persons to keep.
+    language : str, optional
+        [description], by default "fr" (French)
+
+    Returns
+    -------
+    List[str]
+        List of terms to look for.
+
+    Examples
+    --------
+    >>> get_conjugated_verbs(
+            "aimer",
+            dict(mode="Indicatif", tense="Présent", person="1p"),
+        )
+    ['aimons']
+    """
+
+    if isinstance(matches, dict):
+        matches = [matches]
+
+    terms = []
+
+    df = conjugate(
+        verbs=verbs,
+        language=language,
+    )
+
+    for match in matches:
+        q = " & ".join([f'{k} == "{v}"' for k, v in match.items()])
+        terms.extend(df.query(q).term.unique())
+
+    return list(set(terms))
+
+
 def conjugate_verbs(
     output_path: Path = typer.Argument(
         "edsnlp/resources/verbs.csv.gz", help="Path to the output CSV table."
-    )
+    ),
 ) -> None:
     """
     Convenience script to automatically conjugate a set of verbs,
diff --git a/tests/test_conjugator.py b/tests/test_conjugator.py