diff --git a/.gitignore b/.gitignore index bf0d160a0..02b093061 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,9 @@ _build/ *.tar.gz *.tsv *.ann +*.spacy +*.RRF +*.pkl # Editors .idea @@ -71,3 +74,6 @@ _build/ docs/reference docs/changelog.md docs/contributing.md + +# Logs +*.log diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 08f632d5b..000000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,57 +0,0 @@ -# See https://pre-commit.com for more information -# See https://pre-commit.com/hooks.html for more hooks -repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 - hooks: - - id: trailing-whitespace - exclude: | - (?x)^( - tests/resources/.*| - edsnlp/resources/.* - )$ - - id: no-commit-to-branch - - id: end-of-file-fixer - - id: check-yaml - args: ["--unsafe"] - - id: check-toml - - id: check-json - - id: check-symlinks - - id: check-docstring-first - - id: check-added-large-files - - id: detect-private-key - - repo: https://github.com/pycqa/isort - rev: 5.11.5 - hooks: - - id: isort - name: isort (python) - args: ["--profile", "black"] - - id: isort - name: isort (cython) - types: [cython] - args: ["--profile", "black"] - - id: isort - name: isort (pyi) - types: [pyi] - args: ["--profile", "black"] - - - repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black - - repo: https://github.com/asottile/blacken-docs - rev: v1.10.0 - hooks: - - id: blacken-docs - additional_dependencies: [black==20.8b1] - exclude: notebooks/ - - repo: https://github.com/pycqa/flake8 - rev: 4.0.1 - hooks: - - id: flake8 - - repo: https://github.com/econchick/interrogate - rev: 1.5.0 - hooks: - - id: interrogate - args: ["--config=pyproject.toml"] - pass_filenames: false diff --git a/NER_model/configs/config.cfg b/NER_model/configs/config.cfg new file mode 100644 index 000000000..008785353 --- /dev/null +++ b/NER_model/configs/config.cfg @@ -0,0 +1,185 @@ +[paths] +train = "corpus/train.spacy" +dev = "corpus/dev.spacy" +vectors = null +init_tok2vec = null +bert = "../../word-embedding/finetuning-camembert-2021-07-29" + +[system] +gpu_allocator = "pytorch" +seed = 0 + +[nlp] +lang = "eds" +pipeline = ["tok2vec","ner","qualifier"] +batch_size = 1 +disabled = [] +before_creation = null +after_creation = null +after_pipeline_creation = null + +[components] + +[components.ner] +factory = "nested_ner" +scorer = {"@scorers": "eds.nested_ner_scorer.v1"} +ent_labels = ["DISO","Constantes","BIO_comp","Chemical_and_drugs","dosage","BIO","strength","form","SECTION_antecedent","SECTION_motif","SECTION_histoire","SECTION_examen_clinique","SECTION_examen_complementaire","SECTION_mode_de_vie","SECTION_traitement_entree","SECTION_antecedent_familiaux","SECTION_traitement_sortie","SECTION_conclusion"] + +[components.qualifier] +factory = "eds.span_qualifier" + +[components.qualifier.candidate_getter] +@misc = "eds.candidate_span_qualifier_getter" +on_ents = true +on_span_groups = false + +qualifiers = ["_.Allergie", "_.Action","_.Certainty","_.Temporality","_.Negation","_.Family"] +label_constraints = {"_.Allergie":["Chemical_and_drugs"],"_.Action":["Chemical_and_drugs"],"_.Certainty":["Chemical_and_drugs"],"_.Temporality":["Chemical_and_drugs"],"_.Negation":["Chemical_and_drugs"],"_.Family":["Chemical_and_drugs"]} + +[components.qualifier.model] +@architectures = "eds.span_multi_classifier.v1" +projection_mode = "dot" +pooler_mode = "max" + +[components.qualifier.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.qualifier.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.qualifier.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.clean-entities] +factory = "clean-entities" + +[components.ner.model] +@architectures = "eds.stack_crf_ner_model.v1" +mode = "joint" + +[components.ner.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.ner.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.ner.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.tok2vec] +factory = "transformer" +max_batch_items = 4096 +set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} + +[components.tok2vec.model] +@architectures = "spacy-transformers.TransformerModel.v3" +name = ${paths.bert} +# name = "camembert-base" +mixed_precision = true + +[components.tok2vec.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 64 + +[components.tok2vec.model.grad_scaler_config] + +[components.tok2vec.model.tokenizer_config] +use_fast = true + +[components.tok2vec.model.transformer_config] + +[corpora] + +[corpora.train] +@readers = "eds.Corpus.v1" +path = ${paths.train} +max_length = 384 +gold_preproc = false +limit = 0 +augmenter = null +seed = ${system.seed} +shuffle = true +filter_expr = null +#filter_expr = "any(ent._.event_type is not None for ent in doc.ents)" + +[corpora.dev] +@readers = "eds.Corpus.v1" +path = ${paths.dev} +max_length = 0 +gold_preproc = false +limit = 0 +augmenter = null + +[training] +accumulate_gradient = 1 +dev_corpus = "corpora.dev" +train_corpus = "corpora.train" +seed = ${system.seed} +gpu_allocator = ${system.gpu_allocator} +dropout = 0.1 +patience = 1500 +max_epochs = -1 +max_steps = 20000 +eval_frequency = 100 +frozen_components = [] +annotating_components = [] +before_to_disk = null + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +size = 3072 +discard_oversize = true +buffer = 256 + +[training.logger] +@loggers = "spacy.ConsoleLogger.v1" +progress_bar = true +#@loggers = "DVCLive.v1" + +[training.optimizer] +@optimizers = "Adam.v1" +beta1 = 0.9 +beta2 = 0.999 +L2_is_weight_decay = true +L2 = 0.01 +grad_clip = 0.0 +use_averages = false +eps = 0.000001 + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = ${training.max_steps} +initial_rate = 0.0001 + +[training.score_weights] +ents_f = 0.8 +ents_p = 0.0 +ents_r = 0.0 +qual_f = 0.2 +speed = 0.0 +ents_per_type = null +qual_per_type = null + +[pretraining] + +[initialize] +vectors = ${paths.vectors} +init_tok2vec = ${paths.init_tok2vec} +vocab_data = null +lookups = null +before_init = null +after_init = null + +[initialize.components] + +[initialize.tokenizer] diff --git a/NER_model/configs/config_v1.cfg b/NER_model/configs/config_v1.cfg new file mode 100644 index 000000000..f13894a1f --- /dev/null +++ b/NER_model/configs/config_v1.cfg @@ -0,0 +1,185 @@ +[paths] +train = "corpus/train_test.spacy" +dev = "corpus/dev.spacy" +vectors = null +init_tok2vec = null +bert = "../../word-embedding/finetuning-camembert-2021-07-29" + +[system] +gpu_allocator = "pytorch" +seed = 0 + +[nlp] +lang = "eds" +pipeline = ["tok2vec","ner","qualifier"] +batch_size = 1 +disabled = [] +before_creation = null +after_creation = null +after_pipeline_creation = null + +[components] + +[components.ner] +factory = "nested_ner" +scorer = {"@scorers": "eds.nested_ner_scorer.v1"} +ent_labels = ["DISO","Constantes","BIO_comp","Chemical_and_drugs","dosage","BIO","strength","form","SECTION_antecedent","SECTION_motif","SECTION_histoire","SECTION_examen_clinique","SECTION_examen_complementaire","SECTION_mode_de_vie","SECTION_traitement_entree","SECTION_antecedent_familiaux","SECTION_traitement_sortie","SECTION_conclusion"] + +[components.qualifier] +factory = "eds.span_qualifier" + +[components.qualifier.candidate_getter] +@misc = "eds.candidate_span_qualifier_getter" +on_ents = true +on_span_groups = false + +qualifiers = ["_.Allergie", "_.Action","_.Certainty","_.Temporality","_.Negation","_.Family"] +label_constraints = {"_.Allergie":["Chemical_and_drugs"],"_.Action":["Chemical_and_drugs"],"_.Certainty":["Chemical_and_drugs"],"_.Temporality":["Chemical_and_drugs"],"_.Negation":["Chemical_and_drugs"],"_.Family":["Chemical_and_drugs"]} + +[components.qualifier.model] +@architectures = "eds.span_multi_classifier.v1" +projection_mode = "dot" +pooler_mode = "max" + +[components.qualifier.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.qualifier.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.qualifier.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.clean-entities] +factory = "clean-entities" + +[components.ner.model] +@architectures = "eds.stack_crf_ner_model.v1" +mode = "joint" + +[components.ner.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.ner.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.ner.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.tok2vec] +factory = "transformer" +max_batch_items = 4096 +set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} + +[components.tok2vec.model] +@architectures = "spacy-transformers.TransformerModel.v3" +name = ${paths.bert} +# name = "camembert-base" +mixed_precision = true + +[components.tok2vec.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 64 + +[components.tok2vec.model.grad_scaler_config] + +[components.tok2vec.model.tokenizer_config] +use_fast = true + +[components.tok2vec.model.transformer_config] + +[corpora] + +[corpora.train] +@readers = "eds.Corpus.v1" +path = ${paths.train} +max_length = 384 +gold_preproc = false +limit = 0 +augmenter = null +seed = ${system.seed} +shuffle = true +filter_expr = null +#filter_expr = "any(ent._.event_type is not None for ent in doc.ents)" + +[corpora.dev] +@readers = "eds.Corpus.v1" +path = ${paths.dev} +max_length = 0 +gold_preproc = false +limit = 0 +augmenter = null + +[training] +accumulate_gradient = 1 +dev_corpus = "corpora.dev" +train_corpus = "corpora.train" +seed = ${system.seed} +gpu_allocator = ${system.gpu_allocator} +dropout = 0.1 +patience = 1500 +max_epochs = -1 +max_steps = 20000 +eval_frequency = 100 +frozen_components = [] +annotating_components = [] +before_to_disk = null + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +size = 3072 +discard_oversize = true +buffer = 256 + +[training.logger] +@loggers = "spacy.ConsoleLogger.v1" +progress_bar = true +#@loggers = "DVCLive.v1" + +[training.optimizer] +@optimizers = "Adam.v1" +beta1 = 0.9 +beta2 = 0.999 +L2_is_weight_decay = true +L2 = 0.01 +grad_clip = 0.0 +use_averages = false +eps = 0.000001 + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = ${training.max_steps} +initial_rate = 0.0001 + +[training.score_weights] +ents_f = 0.8 +ents_p = 0.0 +ents_r = 0.0 +qual_f = 0.2 +speed = 0.0 +ents_per_type = null +qual_per_type = null + +[pretraining] + +[initialize] +vectors = ${paths.vectors} +init_tok2vec = ${paths.init_tok2vec} +vocab_data = null +lookups = null +before_init = null +after_init = null + +[initialize.components] + +[initialize.tokenizer] diff --git a/NER_model/configs/expe_lang_model/config_DrBert.cfg b/NER_model/configs/expe_lang_model/config_DrBert.cfg new file mode 100644 index 000000000..4c8083882 --- /dev/null +++ b/NER_model/configs/expe_lang_model/config_DrBert.cfg @@ -0,0 +1,185 @@ +[paths] +train = "corpus/train.spacy" +dev = "corpus/dev.spacy" +vectors = null +init_tok2vec = null +bert = "../../word-embedding/DrBERT-7GB" + +[system] +gpu_allocator = "pytorch" +seed = 0 + +[nlp] +lang = "eds" +pipeline = ["tok2vec","ner","qualifier"] +batch_size = 1 +disabled = [] +before_creation = null +after_creation = null +after_pipeline_creation = null + +[components] + +[components.ner] +factory = "nested_ner" +scorer = {"@scorers": "eds.nested_ner_scorer.v1"} +ent_labels = ["DISO","Constantes","BIO_comp","Chemical_and_drugs","dosage","BIO","strength","form","SECTION_antecedent","SECTION_motif","SECTION_histoire","SECTION_examen_clinique","SECTION_examen_complementaire","SECTION_mode_de_vie","SECTION_traitement_entree","SECTION_antecedent_familiaux","SECTION_traitement_sortie","SECTION_conclusion"] + +[components.qualifier] +factory = "eds.span_qualifier" + +[components.qualifier.candidate_getter] +@misc = "eds.candidate_span_qualifier_getter" +on_ents = true +on_span_groups = false + +qualifiers = ["_.Allergie", "_.Action","_.Certainty","_.Temporality","_.Negation","_.Family"] +label_constraints = {"_.Allergie":["Chemical_and_drugs"],"_.Action":["Chemical_and_drugs"],"_.Certainty":["Chemical_and_drugs"],"_.Temporality":["Chemical_and_drugs"],"_.Negation":["Chemical_and_drugs"],"_.Family":["Chemical_and_drugs"]} + +[components.qualifier.model] +@architectures = "eds.span_multi_classifier.v1" +projection_mode = "dot" +pooler_mode = "max" + +[components.qualifier.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.qualifier.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.qualifier.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.clean-entities] +factory = "clean-entities" + +[components.ner.model] +@architectures = "eds.stack_crf_ner_model.v1" +mode = "joint" + +[components.ner.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.ner.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.ner.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.tok2vec] +factory = "transformer" +max_batch_items = 4096 +set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} + +[components.tok2vec.model] +@architectures = "spacy-transformers.TransformerModel.v3" +name = ${paths.bert} +# name = "camembert-base" +mixed_precision = true + +[components.tok2vec.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 64 + +[components.tok2vec.model.grad_scaler_config] + +[components.tok2vec.model.tokenizer_config] +use_fast = true + +[components.tok2vec.model.transformer_config] + +[corpora] + +[corpora.train] +@readers = "eds.Corpus.v1" +path = ${paths.train} +max_length = 384 +gold_preproc = false +limit = 0 +augmenter = null +seed = ${system.seed} +shuffle = true +filter_expr = null +#filter_expr = "any(ent._.event_type is not None for ent in doc.ents)" + +[corpora.dev] +@readers = "eds.Corpus.v1" +path = ${paths.dev} +max_length = 0 +gold_preproc = false +limit = 0 +augmenter = null + +[training] +accumulate_gradient = 1 +dev_corpus = "corpora.dev" +train_corpus = "corpora.train" +seed = ${system.seed} +gpu_allocator = ${system.gpu_allocator} +dropout = 0.1 +patience = 1500 +max_epochs = -1 +max_steps = 20000 +eval_frequency = 100 +frozen_components = [] +annotating_components = [] +before_to_disk = null + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +size = 3072 +discard_oversize = true +buffer = 256 + +[training.logger] +@loggers = "spacy.ConsoleLogger.v1" +progress_bar = true +#@loggers = "DVCLive.v1" + +[training.optimizer] +@optimizers = "Adam.v1" +beta1 = 0.9 +beta2 = 0.999 +L2_is_weight_decay = true +L2 = 0.01 +grad_clip = 0.0 +use_averages = false +eps = 0.000001 + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = ${training.max_steps} +initial_rate = 0.0001 + +[training.score_weights] +ents_f = 0.8 +ents_p = 0.0 +ents_r = 0.0 +qual_f = 0.2 +speed = 0.0 +ents_per_type = null +qual_per_type = null + +[pretraining] + +[initialize] +vectors = ${paths.vectors} +init_tok2vec = ${paths.init_tok2vec} +vocab_data = null +lookups = null +before_init = null +after_init = null + +[initialize.components] + +[initialize.tokenizer] diff --git a/NER_model/configs/expe_lang_model/config_camembert_base.cfg b/NER_model/configs/expe_lang_model/config_camembert_base.cfg new file mode 100644 index 000000000..15a92b06c --- /dev/null +++ b/NER_model/configs/expe_lang_model/config_camembert_base.cfg @@ -0,0 +1,185 @@ +[paths] +train = "corpus/train.spacy" +dev = "corpus/dev.spacy" +vectors = null +init_tok2vec = null +bert = "../../word-embedding/camembert-base" + +[system] +gpu_allocator = "pytorch" +seed = 0 + +[nlp] +lang = "eds" +pipeline = ["tok2vec","ner","qualifier"] +batch_size = 1 +disabled = [] +before_creation = null +after_creation = null +after_pipeline_creation = null + +[components] + +[components.ner] +factory = "nested_ner" +scorer = {"@scorers": "eds.nested_ner_scorer.v1"} +ent_labels = ["DISO","Constantes","BIO_comp","Chemical_and_drugs","dosage","BIO","strength","form","SECTION_antecedent","SECTION_motif","SECTION_histoire","SECTION_examen_clinique","SECTION_examen_complementaire","SECTION_mode_de_vie","SECTION_traitement_entree","SECTION_antecedent_familiaux","SECTION_traitement_sortie","SECTION_conclusion"] + +[components.qualifier] +factory = "eds.span_qualifier" + +[components.qualifier.candidate_getter] +@misc = "eds.candidate_span_qualifier_getter" +on_ents = true +on_span_groups = false + +qualifiers = ["_.Allergie", "_.Action","_.Certainty","_.Temporality","_.Negation","_.Family"] +label_constraints = {"_.Allergie":["Chemical_and_drugs"],"_.Action":["Chemical_and_drugs"],"_.Certainty":["Chemical_and_drugs"],"_.Temporality":["Chemical_and_drugs"],"_.Negation":["Chemical_and_drugs"],"_.Family":["Chemical_and_drugs"]} + +[components.qualifier.model] +@architectures = "eds.span_multi_classifier.v1" +projection_mode = "dot" +pooler_mode = "max" + +[components.qualifier.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.qualifier.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.qualifier.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.clean-entities] +factory = "clean-entities" + +[components.ner.model] +@architectures = "eds.stack_crf_ner_model.v1" +mode = "joint" + +[components.ner.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.ner.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.ner.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.tok2vec] +factory = "transformer" +max_batch_items = 4096 +set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} + +[components.tok2vec.model] +@architectures = "spacy-transformers.TransformerModel.v3" +name = ${paths.bert} +# name = "camembert-base" +mixed_precision = true + +[components.tok2vec.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 64 + +[components.tok2vec.model.grad_scaler_config] + +[components.tok2vec.model.tokenizer_config] +use_fast = true + +[components.tok2vec.model.transformer_config] + +[corpora] + +[corpora.train] +@readers = "eds.Corpus.v1" +path = ${paths.train} +max_length = 384 +gold_preproc = false +limit = 0 +augmenter = null +seed = ${system.seed} +shuffle = true +filter_expr = null +#filter_expr = "any(ent._.event_type is not None for ent in doc.ents)" + +[corpora.dev] +@readers = "eds.Corpus.v1" +path = ${paths.dev} +max_length = 0 +gold_preproc = false +limit = 0 +augmenter = null + +[training] +accumulate_gradient = 1 +dev_corpus = "corpora.dev" +train_corpus = "corpora.train" +seed = ${system.seed} +gpu_allocator = ${system.gpu_allocator} +dropout = 0.1 +patience = 1500 +max_epochs = -1 +max_steps = 20000 +eval_frequency = 100 +frozen_components = [] +annotating_components = [] +before_to_disk = null + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +size = 3072 +discard_oversize = true +buffer = 256 + +[training.logger] +@loggers = "spacy.ConsoleLogger.v1" +progress_bar = true +#@loggers = "DVCLive.v1" + +[training.optimizer] +@optimizers = "Adam.v1" +beta1 = 0.9 +beta2 = 0.999 +L2_is_weight_decay = true +L2 = 0.01 +grad_clip = 0.0 +use_averages = false +eps = 0.000001 + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = ${training.max_steps} +initial_rate = 0.0001 + +[training.score_weights] +ents_f = 0.8 +ents_p = 0.0 +ents_r = 0.0 +qual_f = 0.2 +speed = 0.0 +ents_per_type = null +qual_per_type = null + +[pretraining] + +[initialize] +vectors = ${paths.vectors} +init_tok2vec = ${paths.init_tok2vec} +vocab_data = null +lookups = null +before_init = null +after_init = null + +[initialize.components] + +[initialize.tokenizer] diff --git a/NER_model/configs/expe_lang_model/config_camembert_bio.cfg b/NER_model/configs/expe_lang_model/config_camembert_bio.cfg new file mode 100644 index 000000000..09eb8484f --- /dev/null +++ b/NER_model/configs/expe_lang_model/config_camembert_bio.cfg @@ -0,0 +1,185 @@ +[paths] +train = "corpus/train.spacy" +dev = "corpus/dev.spacy" +vectors = null +init_tok2vec = null +bert = "../../word-embedding/camembert-bio-base" + +[system] +gpu_allocator = "pytorch" +seed = 0 + +[nlp] +lang = "eds" +pipeline = ["tok2vec","ner","qualifier"] +batch_size = 1 +disabled = [] +before_creation = null +after_creation = null +after_pipeline_creation = null + +[components] + +[components.ner] +factory = "nested_ner" +scorer = {"@scorers": "eds.nested_ner_scorer.v1"} +ent_labels = ["DISO","Constantes","BIO_comp","Chemical_and_drugs","dosage","BIO","strength","form","SECTION_antecedent","SECTION_motif","SECTION_histoire","SECTION_examen_clinique","SECTION_examen_complementaire","SECTION_mode_de_vie","SECTION_traitement_entree","SECTION_antecedent_familiaux","SECTION_traitement_sortie","SECTION_conclusion"] + +[components.qualifier] +factory = "eds.span_qualifier" + +[components.qualifier.candidate_getter] +@misc = "eds.candidate_span_qualifier_getter" +on_ents = true +on_span_groups = false + +qualifiers = ["_.Allergie", "_.Action","_.Certainty","_.Temporality","_.Negation","_.Family"] +label_constraints = {"_.Allergie":["Chemical_and_drugs"],"_.Action":["Chemical_and_drugs"],"_.Certainty":["Chemical_and_drugs"],"_.Temporality":["Chemical_and_drugs"],"_.Negation":["Chemical_and_drugs"],"_.Family":["Chemical_and_drugs"]} + +[components.qualifier.model] +@architectures = "eds.span_multi_classifier.v1" +projection_mode = "dot" +pooler_mode = "max" + +[components.qualifier.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.qualifier.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.qualifier.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.clean-entities] +factory = "clean-entities" + +[components.ner.model] +@architectures = "eds.stack_crf_ner_model.v1" +mode = "joint" + +[components.ner.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.ner.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.ner.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.tok2vec] +factory = "transformer" +max_batch_items = 4096 +set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} + +[components.tok2vec.model] +@architectures = "spacy-transformers.TransformerModel.v3" +name = ${paths.bert} +# name = "camembert-base" +mixed_precision = true + +[components.tok2vec.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 64 + +[components.tok2vec.model.grad_scaler_config] + +[components.tok2vec.model.tokenizer_config] +use_fast = true + +[components.tok2vec.model.transformer_config] + +[corpora] + +[corpora.train] +@readers = "eds.Corpus.v1" +path = ${paths.train} +max_length = 384 +gold_preproc = false +limit = 0 +augmenter = null +seed = ${system.seed} +shuffle = true +filter_expr = null +#filter_expr = "any(ent._.event_type is not None for ent in doc.ents)" + +[corpora.dev] +@readers = "eds.Corpus.v1" +path = ${paths.dev} +max_length = 0 +gold_preproc = false +limit = 0 +augmenter = null + +[training] +accumulate_gradient = 1 +dev_corpus = "corpora.dev" +train_corpus = "corpora.train" +seed = ${system.seed} +gpu_allocator = ${system.gpu_allocator} +dropout = 0.1 +patience = 1500 +max_epochs = -1 +max_steps = 20000 +eval_frequency = 100 +frozen_components = [] +annotating_components = [] +before_to_disk = null + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +size = 3072 +discard_oversize = true +buffer = 256 + +[training.logger] +@loggers = "spacy.ConsoleLogger.v1" +progress_bar = true +#@loggers = "DVCLive.v1" + +[training.optimizer] +@optimizers = "Adam.v1" +beta1 = 0.9 +beta2 = 0.999 +L2_is_weight_decay = true +L2 = 0.01 +grad_clip = 0.0 +use_averages = false +eps = 0.000001 + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = ${training.max_steps} +initial_rate = 0.0001 + +[training.score_weights] +ents_f = 0.8 +ents_p = 0.0 +ents_r = 0.0 +qual_f = 0.2 +speed = 0.0 +ents_per_type = null +qual_per_type = null + +[pretraining] + +[initialize] +vectors = ${paths.vectors} +init_tok2vec = ${paths.init_tok2vec} +vocab_data = null +lookups = null +before_init = null +after_init = null + +[initialize.components] + +[initialize.tokenizer] diff --git a/NER_model/configs/expe_lang_model/config_eds_finetune.cfg b/NER_model/configs/expe_lang_model/config_eds_finetune.cfg new file mode 100644 index 000000000..008785353 --- /dev/null +++ b/NER_model/configs/expe_lang_model/config_eds_finetune.cfg @@ -0,0 +1,185 @@ +[paths] +train = "corpus/train.spacy" +dev = "corpus/dev.spacy" +vectors = null +init_tok2vec = null +bert = "../../word-embedding/finetuning-camembert-2021-07-29" + +[system] +gpu_allocator = "pytorch" +seed = 0 + +[nlp] +lang = "eds" +pipeline = ["tok2vec","ner","qualifier"] +batch_size = 1 +disabled = [] +before_creation = null +after_creation = null +after_pipeline_creation = null + +[components] + +[components.ner] +factory = "nested_ner" +scorer = {"@scorers": "eds.nested_ner_scorer.v1"} +ent_labels = ["DISO","Constantes","BIO_comp","Chemical_and_drugs","dosage","BIO","strength","form","SECTION_antecedent","SECTION_motif","SECTION_histoire","SECTION_examen_clinique","SECTION_examen_complementaire","SECTION_mode_de_vie","SECTION_traitement_entree","SECTION_antecedent_familiaux","SECTION_traitement_sortie","SECTION_conclusion"] + +[components.qualifier] +factory = "eds.span_qualifier" + +[components.qualifier.candidate_getter] +@misc = "eds.candidate_span_qualifier_getter" +on_ents = true +on_span_groups = false + +qualifiers = ["_.Allergie", "_.Action","_.Certainty","_.Temporality","_.Negation","_.Family"] +label_constraints = {"_.Allergie":["Chemical_and_drugs"],"_.Action":["Chemical_and_drugs"],"_.Certainty":["Chemical_and_drugs"],"_.Temporality":["Chemical_and_drugs"],"_.Negation":["Chemical_and_drugs"],"_.Family":["Chemical_and_drugs"]} + +[components.qualifier.model] +@architectures = "eds.span_multi_classifier.v1" +projection_mode = "dot" +pooler_mode = "max" + +[components.qualifier.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.qualifier.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.qualifier.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.clean-entities] +factory = "clean-entities" + +[components.ner.model] +@architectures = "eds.stack_crf_ner_model.v1" +mode = "joint" + +[components.ner.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.ner.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.ner.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.tok2vec] +factory = "transformer" +max_batch_items = 4096 +set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} + +[components.tok2vec.model] +@architectures = "spacy-transformers.TransformerModel.v3" +name = ${paths.bert} +# name = "camembert-base" +mixed_precision = true + +[components.tok2vec.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 64 + +[components.tok2vec.model.grad_scaler_config] + +[components.tok2vec.model.tokenizer_config] +use_fast = true + +[components.tok2vec.model.transformer_config] + +[corpora] + +[corpora.train] +@readers = "eds.Corpus.v1" +path = ${paths.train} +max_length = 384 +gold_preproc = false +limit = 0 +augmenter = null +seed = ${system.seed} +shuffle = true +filter_expr = null +#filter_expr = "any(ent._.event_type is not None for ent in doc.ents)" + +[corpora.dev] +@readers = "eds.Corpus.v1" +path = ${paths.dev} +max_length = 0 +gold_preproc = false +limit = 0 +augmenter = null + +[training] +accumulate_gradient = 1 +dev_corpus = "corpora.dev" +train_corpus = "corpora.train" +seed = ${system.seed} +gpu_allocator = ${system.gpu_allocator} +dropout = 0.1 +patience = 1500 +max_epochs = -1 +max_steps = 20000 +eval_frequency = 100 +frozen_components = [] +annotating_components = [] +before_to_disk = null + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +size = 3072 +discard_oversize = true +buffer = 256 + +[training.logger] +@loggers = "spacy.ConsoleLogger.v1" +progress_bar = true +#@loggers = "DVCLive.v1" + +[training.optimizer] +@optimizers = "Adam.v1" +beta1 = 0.9 +beta2 = 0.999 +L2_is_weight_decay = true +L2 = 0.01 +grad_clip = 0.0 +use_averages = false +eps = 0.000001 + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = ${training.max_steps} +initial_rate = 0.0001 + +[training.score_weights] +ents_f = 0.8 +ents_p = 0.0 +ents_r = 0.0 +qual_f = 0.2 +speed = 0.0 +ents_per_type = null +qual_per_type = null + +[pretraining] + +[initialize] +vectors = ${paths.vectors} +init_tok2vec = ${paths.init_tok2vec} +vocab_data = null +lookups = null +before_init = null +after_init = null + +[initialize.components] + +[initialize.tokenizer] diff --git a/NER_model/configs/expe_lang_model/config_eds_scratch.cfg b/NER_model/configs/expe_lang_model/config_eds_scratch.cfg new file mode 100644 index 000000000..6a4d93f9b --- /dev/null +++ b/NER_model/configs/expe_lang_model/config_eds_scratch.cfg @@ -0,0 +1,185 @@ +[paths] +train = "corpus/train.spacy" +dev = "corpus/dev.spacy" +vectors = null +init_tok2vec = null +bert = "../../word-embedding/training-from-scratch-2021-08-13" + +[system] +gpu_allocator = "pytorch" +seed = 0 + +[nlp] +lang = "eds" +pipeline = ["tok2vec","ner","qualifier"] +batch_size = 1 +disabled = [] +before_creation = null +after_creation = null +after_pipeline_creation = null + +[components] + +[components.ner] +factory = "nested_ner" +scorer = {"@scorers": "eds.nested_ner_scorer.v1"} +ent_labels = ["DISO","Constantes","BIO_comp","Chemical_and_drugs","dosage","BIO","strength","form","SECTION_antecedent","SECTION_motif","SECTION_histoire","SECTION_examen_clinique","SECTION_examen_complementaire","SECTION_mode_de_vie","SECTION_traitement_entree","SECTION_antecedent_familiaux","SECTION_traitement_sortie","SECTION_conclusion"] + +[components.qualifier] +factory = "eds.span_qualifier" + +[components.qualifier.candidate_getter] +@misc = "eds.candidate_span_qualifier_getter" +on_ents = true +on_span_groups = false + +qualifiers = ["_.Allergie", "_.Action","_.Certainty","_.Temporality","_.Negation","_.Family"] +label_constraints = {"_.Allergie":["Chemical_and_drugs"],"_.Action":["Chemical_and_drugs"],"_.Certainty":["Chemical_and_drugs"],"_.Temporality":["Chemical_and_drugs"],"_.Negation":["Chemical_and_drugs"],"_.Family":["Chemical_and_drugs"]} + +[components.qualifier.model] +@architectures = "eds.span_multi_classifier.v1" +projection_mode = "dot" +pooler_mode = "max" + +[components.qualifier.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.qualifier.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.qualifier.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.clean-entities] +factory = "clean-entities" + +[components.ner.model] +@architectures = "eds.stack_crf_ner_model.v1" +mode = "joint" + +[components.ner.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.ner.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.ner.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.tok2vec] +factory = "transformer" +max_batch_items = 4096 +set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} + +[components.tok2vec.model] +@architectures = "spacy-transformers.TransformerModel.v3" +name = ${paths.bert} +# name = "camembert-base" +mixed_precision = true + +[components.tok2vec.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 64 + +[components.tok2vec.model.grad_scaler_config] + +[components.tok2vec.model.tokenizer_config] +use_fast = true + +[components.tok2vec.model.transformer_config] + +[corpora] + +[corpora.train] +@readers = "eds.Corpus.v1" +path = ${paths.train} +max_length = 384 +gold_preproc = false +limit = 0 +augmenter = null +seed = ${system.seed} +shuffle = true +filter_expr = null +#filter_expr = "any(ent._.event_type is not None for ent in doc.ents)" + +[corpora.dev] +@readers = "eds.Corpus.v1" +path = ${paths.dev} +max_length = 0 +gold_preproc = false +limit = 0 +augmenter = null + +[training] +accumulate_gradient = 1 +dev_corpus = "corpora.dev" +train_corpus = "corpora.train" +seed = ${system.seed} +gpu_allocator = ${system.gpu_allocator} +dropout = 0.1 +patience = 1500 +max_epochs = -1 +max_steps = 20000 +eval_frequency = 100 +frozen_components = [] +annotating_components = [] +before_to_disk = null + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +size = 3072 +discard_oversize = true +buffer = 256 + +[training.logger] +@loggers = "spacy.ConsoleLogger.v1" +progress_bar = true +#@loggers = "DVCLive.v1" + +[training.optimizer] +@optimizers = "Adam.v1" +beta1 = 0.9 +beta2 = 0.999 +L2_is_weight_decay = true +L2 = 0.01 +grad_clip = 0.0 +use_averages = false +eps = 0.000001 + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = ${training.max_steps} +initial_rate = 0.0001 + +[training.score_weights] +ents_f = 0.8 +ents_p = 0.0 +ents_r = 0.0 +qual_f = 0.2 +speed = 0.0 +ents_per_type = null +qual_per_type = null + +[pretraining] + +[initialize] +vectors = ${paths.vectors} +init_tok2vec = ${paths.init_tok2vec} +vocab_data = null +lookups = null +before_init = null +after_init = null + +[initialize.components] + +[initialize.tokenizer] diff --git a/NER_model/configs/rebus/__.cfg b/NER_model/configs/rebus/__.cfg new file mode 100644 index 000000000..7922b33a5 --- /dev/null +++ b/NER_model/configs/rebus/__.cfg @@ -0,0 +1,188 @@ +[paths] +train = "corpus/train.spacy" +dev = "corpus/dev.spacy" +vectors = null +init_tok2vec = null +bert = "../../../word-embedding/finetuning-camembert-2021-07-29" + +[system] +gpu_allocator = "pytorch" +seed = 0 + +[nlp] +lang = "eds" +pipeline = ["tok2vec","ner","qualifier"] +batch_size = 1 +disabled = [] +before_creation = null +after_creation = null +after_pipeline_creation = null + +[components] + +[components.ner] +factory = "nested_ner" +scorer = {"@scorers": "eds.nested_ner_scorer.v1"} +ent_labels = ["Disorders","SOSY","Chemical_and_drugs","Medical_Procedure", "Concept"] + + +[components.qualifier] +factory = "eds.span_qualifier" + + +[components.qualifier.candidate_getter] +@misc = "eds.candidate_span_qualifier_getter" +on_ents = true +on_span_groups = true +qualifiers = ["_.SOSY_type", "_.negation", "_.hypothetique","_.family"] +label_constraints = {"_.negation": [ "Disorders", "SOSY","Chemical_and_drugs","Medical_Procedure"], "_.hypothetique":[ "Disorders", "SOSY","Chemical_and_drugs","Medical_Procedure"],"_.family":[ "Disorders", "SOSY","Chemical_and_drugs","Medical_Procedure"],"SOSY_type":["SOSY"]} + + +[components.qualifier.model] +@architectures = "eds.span_multi_classifier.v1" +projection_mode = "dot" +pooler_mode = "max" + +[components.qualifier.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.qualifier.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.qualifier.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.clean-entities] +factory = "clean-entities" + +[components.ner.model] +@architectures = "eds.stack_crf_ner_model.v1" +mode = "joint" + +[components.ner.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.ner.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.ner.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.tok2vec] +factory = "transformer" +max_batch_items = 1 +set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} + +[components.tok2vec.model] +@architectures = "spacy-transformers.TransformerModel.v3" +name = ${paths.bert} +# name = "camembert-base" +mixed_precision = true + +[components.tok2vec.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 64 + +[components.tok2vec.model.grad_scaler_config] + +[components.tok2vec.model.tokenizer_config] +use_fast = true + +[components.tok2vec.model.transformer_config] + +[corpora] + +[corpora.train] +@readers = "eds-medic.Corpus.v1" +path = ${paths.train} +max_length = 384 +gold_preproc = false +limit = 0 +augmenter = null +seed = ${system.seed} +shuffle = true +filter_expr = null +#filter_expr = "any(ent._.event_type is not None for ent in doc.ents)" + +[corpora.dev] +@readers = "eds-medic.Corpus.v1" +path = ${paths.dev} +max_length = 0 +gold_preproc = false +limit = 0 +augmenter = null + +[training] +accumulate_gradient = 2 +dev_corpus = "corpora.dev" +train_corpus = "corpora.train" +seed = ${system.seed} +gpu_allocator = ${system.gpu_allocator} +dropout = 0.1 +patience = 1500 +max_epochs = 100000000000 +max_steps = 20000 +eval_frequency = 100 +frozen_components = [] +annotating_components = [] +before_to_disk = null + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +size = 1000 +discard_oversize = true +buffer = 256 + +[training.logger] +@loggers = "spacy.ConsoleLogger.v1" +progress_bar = true +#@loggers = "DVCLive.v1" + +[training.optimizer] +@optimizers = "Adam.v1" +beta1 = 0.9 +beta2 = 0.999 +L2_is_weight_decay = true +L2 = 0.01 +grad_clip = 0.0 +use_averages = false +eps = 0.000001 + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = ${training.max_steps} +initial_rate = 0.0001 + +[training.score_weights] +ents_f = 0.5 +ents_p = 0.0 +ents_r = 0.0 +qual_f = 0.5 +speed = 0.0 +ents_per_type = null +qual_per_type = null + + +[pretraining] + +[initialize] +vectors = ${paths.vectors} +init_tok2vec = ${paths.init_tok2vec} +vocab_data = null +lookups = null +before_init = null +after_init = null + +[initialize.components] + +[initialize.tokenizer] diff --git a/NER_model/configs/rebus/config.cfg b/NER_model/configs/rebus/config.cfg new file mode 100644 index 000000000..9e30cc62a --- /dev/null +++ b/NER_model/configs/rebus/config.cfg @@ -0,0 +1,186 @@ +[paths] +train = "corpus/train.spacy" +dev = "corpus/dev.spacy" +vectors = null +init_tok2vec = null +bert = "../../../word-embedding/finetuning-camembert-2021-07-29" + +[system] +gpu_allocator = "pytorch" +seed = 0 + +[nlp] +lang = "eds" +pipeline = ["tok2vec","ner","qualifier"] +batch_size = 1 +disabled = [] +before_creation = null +after_creation = null +after_pipeline_creation = null + +[components] + +[components.ner] +factory = "nested_ner" +scorer = {"@scorers": "eds.nested_ner_scorer.v1"} +ent_labels =["DISO","BIO_comp","Chemical_and_drugs","dosage","BIO","strength","form","SECTION_antecedent","route","SECTION_motif","SECTION_histoire","BIO_milieu","SECTION_examen_clinique","SECTION_examen_complementaire","SECTION_mode_de_vie","SECTION_traitement_entree","SECTION_antecedent_familiaux","SECTION_traitement_sortie","SECTION_traitement","SECTION_evolution","SECTION_autre","SECTION_conclusion"] + + +[components.qualifier] +factory = "eds.span_qualifier" + +[components.qualifier.candidate_getter] +@misc = "eds.candidate_span_qualifier_getter" +on_ents = true +on_span_groups = false + +qualifiers = ["_.Action","_.Certainty","_.Temporality","_.Negation","_.Family"] +label_constraints = {"_.Action":["Chemical_and_drugs"],"_.Certainty":["Chemical_and_drugs"],"_.Temporality":["Chemical_and_drugs"],"_.Negation":["Chemical_and_drugs"],"_.Family":["Chemical_and_drugs"]} + +[components.qualifier.model] +@architectures = "eds.span_multi_classifier.v1" +projection_mode = "dot" +pooler_mode = "max" + +[components.qualifier.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.qualifier.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.qualifier.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.clean-entities] +factory = "clean-entities" + +[components.ner.model] +@architectures = "eds.stack_crf_ner_model.v1" +mode = "joint" + +[components.ner.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.ner.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.ner.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.tok2vec] +factory = "transformer" +max_batch_items = 4096 +set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} + +[components.tok2vec.model] +@architectures = "spacy-transformers.TransformerModel.v3" +name = ${paths.bert} +# name = "camembert-base" +mixed_precision = true + +[components.tok2vec.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 64 + +[components.tok2vec.model.grad_scaler_config] + +[components.tok2vec.model.tokenizer_config] +use_fast = true + +[components.tok2vec.model.transformer_config] + +[corpora] + +[corpora.train] +@readers = "eds-medic.Corpus.v1" +path = ${paths.train} +max_length = 384 +gold_preproc = false +limit = 0 +augmenter = null +seed = ${system.seed} +shuffle = true +filter_expr = null +#filter_expr = "any(ent._.event_type is not None for ent in doc.ents)" + +[corpora.dev] +@readers = "eds-medic.Corpus.v1" +path = ${paths.dev} +max_length = 0 +gold_preproc = false +limit = 0 +augmenter = null + +[training] +accumulate_gradient = 1 +dev_corpus = "corpora.dev" +train_corpus = "corpora.train" +seed = ${system.seed} +gpu_allocator = ${system.gpu_allocator} +dropout = 0.1 +patience = 1500 +max_epochs = 100000000000 +max_steps = 20000 +eval_frequency = 100 +frozen_components = [] +annotating_components = [] +before_to_disk = null + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +size = 500 +discard_oversize = true +buffer = 256 + +[training.logger] +@loggers = "spacy.ConsoleLogger.v1" +progress_bar = true +#@loggers = "DVCLive.v1" + +[training.optimizer] +@optimizers = "Adam.v1" +beta1 = 0.9 +beta2 = 0.999 +L2_is_weight_decay = true +L2 = 0.01 +grad_clip = 0.0 +use_averages = false +eps = 0.000001 + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = ${training.max_steps} +initial_rate = 0.0001 + +[training.score_weights] +ents_f = 0.5 +ents_p = 0.0 +ents_r = 0.0 +qual_f = 0.5 +speed = 0.0 +ents_per_type = null +qual_per_type = null + +[pretraining] + +[initialize] +vectors = ${paths.vectors} +init_tok2vec = ${paths.init_tok2vec} +vocab_data = null +lookups = null +before_init = null +after_init = null + +[initialize.components] + +[initialize.tokenizer] diff --git a/NER_model/configs/rebus/config_0407_Pierre.cfg b/NER_model/configs/rebus/config_0407_Pierre.cfg new file mode 100644 index 000000000..9e30cc62a --- /dev/null +++ b/NER_model/configs/rebus/config_0407_Pierre.cfg @@ -0,0 +1,186 @@ +[paths] +train = "corpus/train.spacy" +dev = "corpus/dev.spacy" +vectors = null +init_tok2vec = null +bert = "../../../word-embedding/finetuning-camembert-2021-07-29" + +[system] +gpu_allocator = "pytorch" +seed = 0 + +[nlp] +lang = "eds" +pipeline = ["tok2vec","ner","qualifier"] +batch_size = 1 +disabled = [] +before_creation = null +after_creation = null +after_pipeline_creation = null + +[components] + +[components.ner] +factory = "nested_ner" +scorer = {"@scorers": "eds.nested_ner_scorer.v1"} +ent_labels =["DISO","BIO_comp","Chemical_and_drugs","dosage","BIO","strength","form","SECTION_antecedent","route","SECTION_motif","SECTION_histoire","BIO_milieu","SECTION_examen_clinique","SECTION_examen_complementaire","SECTION_mode_de_vie","SECTION_traitement_entree","SECTION_antecedent_familiaux","SECTION_traitement_sortie","SECTION_traitement","SECTION_evolution","SECTION_autre","SECTION_conclusion"] + + +[components.qualifier] +factory = "eds.span_qualifier" + +[components.qualifier.candidate_getter] +@misc = "eds.candidate_span_qualifier_getter" +on_ents = true +on_span_groups = false + +qualifiers = ["_.Action","_.Certainty","_.Temporality","_.Negation","_.Family"] +label_constraints = {"_.Action":["Chemical_and_drugs"],"_.Certainty":["Chemical_and_drugs"],"_.Temporality":["Chemical_and_drugs"],"_.Negation":["Chemical_and_drugs"],"_.Family":["Chemical_and_drugs"]} + +[components.qualifier.model] +@architectures = "eds.span_multi_classifier.v1" +projection_mode = "dot" +pooler_mode = "max" + +[components.qualifier.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.qualifier.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.qualifier.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.clean-entities] +factory = "clean-entities" + +[components.ner.model] +@architectures = "eds.stack_crf_ner_model.v1" +mode = "joint" + +[components.ner.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.ner.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.ner.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.tok2vec] +factory = "transformer" +max_batch_items = 4096 +set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} + +[components.tok2vec.model] +@architectures = "spacy-transformers.TransformerModel.v3" +name = ${paths.bert} +# name = "camembert-base" +mixed_precision = true + +[components.tok2vec.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 64 + +[components.tok2vec.model.grad_scaler_config] + +[components.tok2vec.model.tokenizer_config] +use_fast = true + +[components.tok2vec.model.transformer_config] + +[corpora] + +[corpora.train] +@readers = "eds-medic.Corpus.v1" +path = ${paths.train} +max_length = 384 +gold_preproc = false +limit = 0 +augmenter = null +seed = ${system.seed} +shuffle = true +filter_expr = null +#filter_expr = "any(ent._.event_type is not None for ent in doc.ents)" + +[corpora.dev] +@readers = "eds-medic.Corpus.v1" +path = ${paths.dev} +max_length = 0 +gold_preproc = false +limit = 0 +augmenter = null + +[training] +accumulate_gradient = 1 +dev_corpus = "corpora.dev" +train_corpus = "corpora.train" +seed = ${system.seed} +gpu_allocator = ${system.gpu_allocator} +dropout = 0.1 +patience = 1500 +max_epochs = 100000000000 +max_steps = 20000 +eval_frequency = 100 +frozen_components = [] +annotating_components = [] +before_to_disk = null + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +size = 500 +discard_oversize = true +buffer = 256 + +[training.logger] +@loggers = "spacy.ConsoleLogger.v1" +progress_bar = true +#@loggers = "DVCLive.v1" + +[training.optimizer] +@optimizers = "Adam.v1" +beta1 = 0.9 +beta2 = 0.999 +L2_is_weight_decay = true +L2 = 0.01 +grad_clip = 0.0 +use_averages = false +eps = 0.000001 + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = ${training.max_steps} +initial_rate = 0.0001 + +[training.score_weights] +ents_f = 0.5 +ents_p = 0.0 +ents_r = 0.0 +qual_f = 0.5 +speed = 0.0 +ents_per_type = null +qual_per_type = null + +[pretraining] + +[initialize] +vectors = ${paths.vectors} +init_tok2vec = ${paths.init_tok2vec} +vocab_data = null +lookups = null +before_init = null +after_init = null + +[initialize.components] + +[initialize.tokenizer] diff --git a/NER_model/configs/rebus/config_1907.cfg b/NER_model/configs/rebus/config_1907.cfg new file mode 100644 index 000000000..cf9399a90 --- /dev/null +++ b/NER_model/configs/rebus/config_1907.cfg @@ -0,0 +1,188 @@ +[paths] +train = "corpus/train.spacy" +dev = "corpus/dev.spacy" +vectors = null +init_tok2vec = null +bert = "../../../word-embedding/finetuning-camembert-2021-07-29" + +[system] +gpu_allocator = "pytorch" +seed = 0 + +[nlp] +lang = "eds" +pipeline = ["tok2vec","ner","qualifier"] +batch_size = 1 +disabled = [] +before_creation = null +after_creation = null +after_pipeline_creation = null + +[components] + +[components.ner] +factory = "nested_ner" +scorer = {"@scorers": "eds.nested_ner_scorer.v1"} +ent_labels = ["sosydiso"] + + +[components.qualifier] +factory = "eds.span_qualifier" + + +[components.qualifier.candidate_getter] +@misc = "eds.candidate_span_qualifier_getter" +on_ents = true +on_span_groups = true +qualifiers = ["_.negation", "_.hypothetique","_.family"] +label_constraints = {"_.negation": ["sosydiso"], "_.hypothetique":["sosydiso"],"_.family":["sosydiso"]} + + +[components.qualifier.model] +@architectures = "eds.span_multi_classifier.v1" +projection_mode = "dot" +pooler_mode = "max" + +[components.qualifier.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.qualifier.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.qualifier.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.clean-entities] +factory = "clean-entities" + +[components.ner.model] +@architectures = "eds.stack_crf_ner_model.v1" +mode = "joint" + +[components.ner.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.ner.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.ner.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.tok2vec] +factory = "transformer" +max_batch_items = 1 +set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} + +[components.tok2vec.model] +@architectures = "spacy-transformers.TransformerModel.v3" +name = ${paths.bert} +# name = "camembert-base" +mixed_precision = true + +[components.tok2vec.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 64 + +[components.tok2vec.model.grad_scaler_config] + +[components.tok2vec.model.tokenizer_config] +use_fast = true + +[components.tok2vec.model.transformer_config] + +[corpora] + +[corpora.train] +@readers = "eds-medic.Corpus.v1" +path = ${paths.train} +max_length = 384 +gold_preproc = false +limit = 0 +augmenter = null +seed = ${system.seed} +shuffle = true +filter_expr = null +#filter_expr = "any(ent._.event_type is not None for ent in doc.ents)" + +[corpora.dev] +@readers = "eds-medic.Corpus.v1" +path = ${paths.dev} +max_length = 0 +gold_preproc = false +limit = 0 +augmenter = null + +[training]max_len +accumulate_gradient = 2 +dev_corpus = "corpora.dev" +train_corpus = "corpora.train" +seed = ${system.seed} +gpu_allocator = ${system.gpu_allocator} +dropout = 0.1 +patience = 1500 +max_epochs = 100000000000 +max_steps = 20000 +eval_frequency = 100 +frozen_components = [] +annotating_components = [] +before_to_disk = null + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +size = 1000 +discard_oversize = true +buffer = 256 + +[training.logger] +@loggers = "spacy.ConsoleLogger.v1" +progress_bar = true +#@loggers = "DVCLive.v1" + +[training.optimizer] +@optimizers = "Adam.v1" +beta1 = 0.9 +beta2 = 0.999 +L2_is_weight_decay = true +L2 = 0.01 +grad_clip = 0.0 +use_averages = false +eps = 0.000001 + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = ${training.max_steps} +initial_rate = 0.0001 + +[training.score_weights] +ents_f = 0.5 +ents_p = 0.0 +ents_r = 0.0 +qual_f = 0.5 +speed = 0.0 +ents_per_type = null +qual_per_type = null + + +[pretraining] + +[initialize] +vectors = ${paths.vectors} +init_tok2vec = ${paths.init_tok2vec} +vocab_data = null +lookups = null +before_init = null +after_init = null + +[initialize.components] + +[initialize.tokenizer] diff --git a/NER_model/configs/rebus/config_backup2.cfg b/NER_model/configs/rebus/config_backup2.cfg new file mode 100644 index 000000000..3fe9a03d5 --- /dev/null +++ b/NER_model/configs/rebus/config_backup2.cfg @@ -0,0 +1,188 @@ +[paths] +train = "corpus/train.spacy" +dev = "corpus/dev.spacy" +vectors = null +init_tok2vec = null +bert = "../../../word-embedding/finetuning-camembert-2021-07-29" + +[system] +gpu_allocator = "pytorch" +seed = 0 + +[nlp] +lang = "eds" +pipeline = ["tok2vec","ner","qualifier"] +batch_size = 1 +disabled = [] +before_creation = null +after_creation = null +after_pipeline_creation = null + +[components] + +[components.ner] +factory = "nested_ner" +scorer = {"@scorers": "eds.nested_ner_scorer.v1"} +ent_labels = ["Disorders","SOSY","Chemical_and_drugs","Concept","gender","Medical_Procedure","SECTION_examen_complementaire","SECTION_conclusion","SECTION_motif","SECTION_antecedent","SECTION_head","SECTION_traitement_entree","SECTION_traitement_sortie","SECTION_evolution","SECTION_antecedent_familiaux","SECTION_histoire","SECTION_traitement","SECTION_examen_clinique","SECTION_autre","SECTION_mode_de_vie","metadata"] + + +[components.qualifier] +factory = "eds.span_qualifier" + +[components.qualifier.candidate_getter] +@misc = "eds.candidate_span_qualifier_getter" +on_ents = true +on_span_groups = false + +qualifiers = ["_.Disorders_type","_.SOSY_type","_.Chemical_and_drugs_type","_.Concept_type","_.negation","_.hypothetique","_.family","_.Medical_Procedure_type","_.gender_type"] + +label_constraints = {"_.Disorders_type":["Disorders"],"_.SOSY_type": ["SOSY"],"_.Chemical_and_drugs_type":["Chemical_and_drugs"],"_.Concept_type":["Concept"],"_.negation":["Disorders","SOSY","Chemical_and_drugs", "Medical_Procedure"],"_.hypothetique":["Disorders","SOSY","Chemical_and_drugs", "Medical_Procedure"],"_.family":["Disorders","SOSY","Chemical_and_drugs", "Medical_Procedure"],"_.Medical_Procedure_type":["Medical_Procedure"], "_.gender_type":["gender"]} + + +[components.qualifier.model] +@architectures = "eds.span_multi_classifier.v1" +projection_mode = "dot" +pooler_mode = "max" + +[components.qualifier.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.qualifier.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.qualifier.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.clean-entities] +factory = "clean-entities" + +[components.ner.model] +@architectures = "eds.stack_crf_ner_model.v1" +mode = "joint" + +[components.ner.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.ner.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.ner.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.tok2vec] +factory = "transformer" +max_batch_items = 1 +set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} + +[components.tok2vec.model] +@architectures = "spacy-transformers.TransformerModel.v3" +name = ${paths.bert} +# name = "camembert-base" +mixed_precision = true + +[components.tok2vec.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 64 + +[components.tok2vec.model.grad_scaler_config] + +[components.tok2vec.model.tokenizer_config] +use_fast = true + +[components.tok2vec.model.transformer_config] + +[corpora] + +[corpora.train] +@readers = "eds-medic.Corpus.v1" +path = ${paths.train} +max_length = 384 +gold_preproc = false +limit = 0 +augmenter = null +seed = ${system.seed} +shuffle = true +filter_expr = null +#filter_expr = "any(ent._.event_type is not None for ent in doc.ents)" + +[corpora.dev] +@readers = "eds-medic.Corpus.v1" +path = ${paths.dev} +max_length = 0 +gold_preproc = false +limit = 0 +augmenter = null + +[training] +accumulate_gradient = 5 +dev_corpus = "corpora.dev" +train_corpus = "corpora.train" +seed = ${system.seed} +gpu_allocator = ${system.gpu_allocator} +dropout = 0.1 +patience = 1500 +max_epochs = 100000000000 +max_steps = 20000 +eval_frequency = 100 +frozen_components = [] +annotating_components = [] +before_to_disk = null + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +size = 1000 +discard_oversize = true +buffer = 256 + +[training.logger] +@loggers = "spacy.ConsoleLogger.v1" +progress_bar = true +#@loggers = "DVCLive.v1" + +[training.optimizer] +@optimizers = "Adam.v1" +beta1 = 0.9 +beta2 = 0.999 +L2_is_weight_decay = true +L2 = 0.01 +grad_clip = 0.0 +use_averages = false +eps = 0.000001 + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = ${training.max_steps} +initial_rate = 0.0001 + +[training.score_weights] +ents_f = 0.5 +ents_p = 0.0 +ents_r = 0.0 +qual_f = 0.5 +speed = 0.0 +ents_per_type = null +qual_per_type = null + +[pretraining] + +[initialize] +vectors = ${paths.vectors} +init_tok2vec = ${paths.init_tok2vec} +vocab_data = null +lookups = null +before_init = null +after_init = null + +[initialize.components] + +[initialize.tokenizer] diff --git a/NER_model/configs/rebus/config_j.cfg b/NER_model/configs/rebus/config_j.cfg new file mode 100644 index 000000000..5c0779f58 --- /dev/null +++ b/NER_model/configs/rebus/config_j.cfg @@ -0,0 +1,186 @@ +[paths] +train = "corpus/train.spacy" +dev = "corpus/dev.spacy" +vectors = null +init_tok2vec = null +bert = "../../../word-embedding/finetuning-camembert-2021-07-29" + +[system] +gpu_allocator = "pytorch" +seed = 0 + +[nlp] +lang = "eds" +pipeline = ["tok2vec","ner"] +batch_size = 1 +disabled = [] +before_creation = null +after_creation = null +after_pipeline_creation = null + +[components] + +[components.ner] +factory = "nested_ner" +scorer = {"@scorers": "eds.nested_ner_scorer.v1"} +ent_labels =["DISO","BIO_comp","CHEM","dosage","BIO","strength","form","SECTION_antecedent","route","SECTION_motif","SECTION_histoire","BIO_milieu","SECTION_examen_clinique","SECTION_examen_complementaire","SECTION_mode_de_vie","SECTION_traitement_entree","SECTION_antecedent_familiaux","SECTION_traitement_sortie","SECTION_traitement","SECTION_evolution","SECTION_autre","SECTION_conclusion"] + + +[components.qualifier] +factory = "eds.span_qualifier" + +[components.qualifier.candidate_getter] +@misc = "eds.candidate_span_qualifier_getter" +on_ents = true +on_span_groups = false + +qualifiers = ["_.Action","_.Certainty","_.Temporality","_.Negation","_.Family"] +label_constraints = {"_.Action":["Chemical_and_drugs"],"_.Certainty":["Chemical_and_drugs"],"_.Temporality":["Chemical_and_drugs"],"_.Negation":["Chemical_and_drugs"],"_.Family":["Chemical_and_drugs"]} + +[components.qualifier.model] +@architectures = "eds.span_multi_classifier.v1" +projection_mode = "dot" +pooler_mode = "max" + +[components.qualifier.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.qualifier.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.qualifier.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.clean-entities] +factory = "clean-entities" + +[components.ner.model] +@architectures = "eds.stack_crf_ner_model.v1" +mode = "joint" + +[components.ner.model.tok2vec] +@architectures = "spacy.Tok2Vec.v2" + +[components.ner.model.tok2vec.embed] +@architectures = "spacy-transformers.TransformerListener.v1" +pooling = {"@layers":"reduce_mean.v1"} + +[components.ner.model.tok2vec.encode] +@architectures = "spacy.MishWindowEncoder.v2" +width = 768 +window_size = 1 +depth = 2 + +[components.tok2vec] +factory = "transformer" +max_batch_items = 4096 +set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} + +[components.tok2vec.model] +@architectures = "spacy-transformers.TransformerModel.v3" +name = ${paths.bert} +# name = "camembert-base" +mixed_precision = true + +[components.tok2vec.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 64 + +[components.tok2vec.model.grad_scaler_config] + +[components.tok2vec.model.tokenizer_config] +use_fast = true + +[components.tok2vec.model.transformer_config] + +[corpora] + +[corpora.train] +@readers = "eds-medic.Corpus.v1" +path = ${paths.train} +max_length = 200 +gold_preproc = false +limit = 0 +augmenter = null +seed = ${system.seed} +shuffle = true +filter_expr = null +#filter_expr = "any(ent._.event_type is not None for ent in doc.ents)" + +[corpora.dev] +@readers = "eds-medic.Corpus.v1" +path = ${paths.dev} +max_length = 0 +gold_preproc = false +limit = 0 +augmenter = null + +[training] +accumulate_gradient = 1 +dev_corpus = "corpora.dev" +train_corpus = "corpora.train" +seed = ${system.seed} +gpu_allocator = ${system.gpu_allocator} +dropout = 0.1 +patience = 1500 +max_epochs = 100000000000 +max_steps = 20000 +eval_frequency = 100 +frozen_components = [] +annotating_components = [] +before_to_disk = null + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +size = 500 +discard_oversize = true +buffer = 256 + +[training.logger] +@loggers = "spacy.ConsoleLogger.v1" +progress_bar = true +#@loggers = "DVCLive.v1" + +[training.optimizer] +@optimizers = "Adam.v1" +beta1 = 0.9 +beta2 = 0.999 +L2_is_weight_decay = true +L2 = 0.01 +grad_clip = 0.0 +use_averages = false +eps = 0.000001 + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = ${training.max_steps} +initial_rate = 0.0001 + +[training.score_weights] +ents_f = 0.5 +ents_p = 0.0 +ents_r = 0.0 +qual_f = 0.5 +speed = 0.0 +ents_per_type = null +qual_per_type = null + +[pretraining] + +[initialize] +vectors = ${paths.vectors} +init_tok2vec = ${paths.init_tok2vec} +vocab_data = null +lookups = null +before_init = null +after_init = null + +[initialize.components] + +[initialize.tokenizer] diff --git a/NER_model/data/NLP_diabeto/clean/annotation.conf b/NER_model/data/NLP_diabeto/clean/annotation.conf new file mode 100644 index 000000000..010a43788 --- /dev/null +++ b/NER_model/data/NLP_diabeto/clean/annotation.conf @@ -0,0 +1,39 @@ +[entities] +Chemical_and_drugs +BIO +BIO_comp +DISO +BIO_milieu +Constantes +!SECTION_ + SECTION_examen_complementaire + SECTION_conclusion + SECTION_motif + SECTION_antecedent + SECTION_head + SECTION_traitement_entree + SECTION_traitement_sortie + SECTION_evolution + SECTION_antecedent_familiaux + SECTION_histoire + SECTION_traitement + SECTION_examen_clinique + SECTION_autre + SECTION_mode_de_vie + + +[attributes] +Action Arg:Chemical_and_drugs, Value:Start|Stop|Increase|Decrease|OtherChange|UniqueDose|Unknown +Certainty Arg:Chemical_and_drugs, Value:Certain|Hypothetical|Conditional|Unknown +Temporality Arg:Chemical_and_drugs, Value:Past|Present|Future|Unknown +Negation Arg:Chemical_and_drugs +Allergie Arg:Chemical_and_drugs +Family Arg:Chemical_and_drugs +Tech Arg:Chemical_and_drugs, Value:dosage|form|route|strength + + +[relations] + Arg1:, Arg2:, : + + +[events] \ No newline at end of file diff --git a/NER_model/data/NLP_diabeto/clean/kb_shortcuts.conf b/NER_model/data/NLP_diabeto/clean/kb_shortcuts.conf new file mode 100644 index 000000000..4b83757fb --- /dev/null +++ b/NER_model/data/NLP_diabeto/clean/kb_shortcuts.conf @@ -0,0 +1,6 @@ +B BIO +C Chemical_and_drugs +X BIO_comp +M BIO_milieu +D DISO +T Constantes \ No newline at end of file diff --git a/NER_model/data/NLP_diabeto/clean/visual.conf b/NER_model/data/NLP_diabeto/clean/visual.conf new file mode 100644 index 000000000..a0e119d6c --- /dev/null +++ b/NER_model/data/NLP_diabeto/clean/visual.conf @@ -0,0 +1,42 @@ +[labels] +BIO | BIO +BIO_comp | BIO_comp +Chemical_and_drugs | Chemical_and_drugs +SECTION_examen_complementaire | SECTION_examen_complementaire +SECTION_conclusion | SECTION_conclusion +SECTION_motif | SECTION_motif +SECTION_antecedent | SECTION_antecedent +SECTION_head | SECTION_head +SECTION_traitement_entree | SECTION_traitement_entree +SECTION_traitement_sortie | SECTION_traitement_sortie +SECTION_evolution | SECTION_evolution +SECTION_antecedent_familiaux | SECTION_antecedent_familiaux +SECTION_histoire | SECTION_histoire +SECTION_traitement | SECTION_traitement +SECTION_examen_clinique | SECTION_examen_clinique +SECTION_autre | SECTION_autre +SECTION_mode_de_vie | SECTION_mode_de_vie +DISO | DISO +CONST | Constantes + +[drawing] +Chemical_and_drugs bgColor:#d62728 +DISO bgColor:#ff7f0e +SECTION_examen_complementaire bgColor:#17becf +SECTION_conclusion bgColor:#17becf +SECTION_motif bgColor:#17becf +SECTION_antecedent bgColor:#17becf +SECTION_head bgColor:#17becf +SECTION_traitement_entree bgColor:#17becf +SECTION_traitement_sortie bgColor:#17becf +SECTION_evolution bgColor:#17becf +SECTION_antecedent_familiaux bgColor:#17becf +SECTION_histoire bgColor:#17becf +SECTION_traitement bgColor:#17becf +SECTION_examen_clinique bgColor:#17becf +SECTION_autre bgColor:#17becf +SECTION_mode_de_vie bgColor:#17becf +BIO bgColor:#8c564b +BIO_comp bgColor:#779999 +BIO_milieu bgColor:#2ca02c +Constantes bgColor:#9467bd diff --git a/NER_model/data/NLP_diabeto/raw/annotation.conf b/NER_model/data/NLP_diabeto/raw/annotation.conf new file mode 100644 index 000000000..010a43788 --- /dev/null +++ b/NER_model/data/NLP_diabeto/raw/annotation.conf @@ -0,0 +1,39 @@ +[entities] +Chemical_and_drugs +BIO +BIO_comp +DISO +BIO_milieu +Constantes +!SECTION_ + SECTION_examen_complementaire + SECTION_conclusion + SECTION_motif + SECTION_antecedent + SECTION_head + SECTION_traitement_entree + SECTION_traitement_sortie + SECTION_evolution + SECTION_antecedent_familiaux + SECTION_histoire + SECTION_traitement + SECTION_examen_clinique + SECTION_autre + SECTION_mode_de_vie + + +[attributes] +Action Arg:Chemical_and_drugs, Value:Start|Stop|Increase|Decrease|OtherChange|UniqueDose|Unknown +Certainty Arg:Chemical_and_drugs, Value:Certain|Hypothetical|Conditional|Unknown +Temporality Arg:Chemical_and_drugs, Value:Past|Present|Future|Unknown +Negation Arg:Chemical_and_drugs +Allergie Arg:Chemical_and_drugs +Family Arg:Chemical_and_drugs +Tech Arg:Chemical_and_drugs, Value:dosage|form|route|strength + + +[relations] + Arg1:, Arg2:, : + + +[events] \ No newline at end of file diff --git a/NER_model/data/NLP_diabeto/raw/kb_shortcuts.conf b/NER_model/data/NLP_diabeto/raw/kb_shortcuts.conf new file mode 100644 index 000000000..4b83757fb --- /dev/null +++ b/NER_model/data/NLP_diabeto/raw/kb_shortcuts.conf @@ -0,0 +1,6 @@ +B BIO +C Chemical_and_drugs +X BIO_comp +M BIO_milieu +D DISO +T Constantes \ No newline at end of file diff --git a/NER_model/data/NLP_diabeto/raw/visual.conf b/NER_model/data/NLP_diabeto/raw/visual.conf new file mode 100644 index 000000000..a0e119d6c --- /dev/null +++ b/NER_model/data/NLP_diabeto/raw/visual.conf @@ -0,0 +1,42 @@ +[labels] +BIO | BIO +BIO_comp | BIO_comp +Chemical_and_drugs | Chemical_and_drugs +SECTION_examen_complementaire | SECTION_examen_complementaire +SECTION_conclusion | SECTION_conclusion +SECTION_motif | SECTION_motif +SECTION_antecedent | SECTION_antecedent +SECTION_head | SECTION_head +SECTION_traitement_entree | SECTION_traitement_entree +SECTION_traitement_sortie | SECTION_traitement_sortie +SECTION_evolution | SECTION_evolution +SECTION_antecedent_familiaux | SECTION_antecedent_familiaux +SECTION_histoire | SECTION_histoire +SECTION_traitement | SECTION_traitement +SECTION_examen_clinique | SECTION_examen_clinique +SECTION_autre | SECTION_autre +SECTION_mode_de_vie | SECTION_mode_de_vie +DISO | DISO +CONST | Constantes + +[drawing] +Chemical_and_drugs bgColor:#d62728 +DISO bgColor:#ff7f0e +SECTION_examen_complementaire bgColor:#17becf +SECTION_conclusion bgColor:#17becf +SECTION_motif bgColor:#17becf +SECTION_antecedent bgColor:#17becf +SECTION_head bgColor:#17becf +SECTION_traitement_entree bgColor:#17becf +SECTION_traitement_sortie bgColor:#17becf +SECTION_evolution bgColor:#17becf +SECTION_antecedent_familiaux bgColor:#17becf +SECTION_histoire bgColor:#17becf +SECTION_traitement bgColor:#17becf +SECTION_examen_clinique bgColor:#17becf +SECTION_autre bgColor:#17becf +SECTION_mode_de_vie bgColor:#17becf +BIO bgColor:#8c564b +BIO_comp bgColor:#779999 +BIO_milieu bgColor:#2ca02c +Constantes bgColor:#9467bd diff --git a/NER_model/dvc.lock b/NER_model/dvc.lock new file mode 100644 index 000000000..727e13edd --- /dev/null +++ b/NER_model/dvc.lock @@ -0,0 +1,75 @@ +schema: '2.0' +stages: + convert: + cmd: python -m spacy project run convert + deps: + - path: data/NLP_diabeto/test + md5: 3c83eeee1f827072761e56c3b135d56b.dir + size: 214537 + nfiles: 20 + - path: data/NLP_diabeto/train + md5: 02f70168761f49b871c0fae54f4cbd2a.dir + size: 1903044 + nfiles: 166 + - path: data/NLP_diabeto/val + md5: cba6c8166b223f5f007c9f51caefa124.dir + size: 213280 + nfiles: 20 + - path: scripts/convert.py + md5: 4cc6b87c58ddf5320123fde3fc04ac03 + size: 3268 + outs: + - path: corpus/dev.spacy + md5: 8ed7e6e2ebbbdde02f52e115c81a3c3b + size: 200671 + - path: corpus/test.spacy + md5: 64a9905aae87723375427b0fe0885472 + size: 212753 + - path: corpus/train.spacy + md5: 46d25cdcdcb64a19d02fb130a67f1090 + size: 1751413 + train: + cmd: python -m spacy project run train + deps: + - path: configs/config.cfg + md5: 0714cebf0e60346234c2a244d1924198 + size: 4450 + - path: corpus/dev.spacy + md5: 97ad6c031ae1b3522131d2675a1464a0 + size: 215568 + - path: corpus/train.spacy + md5: bd6c7c646cf288ee49f855177841c399 + size: 1741469 + outs: + - path: training/model-best + md5: 7342ebecce870db3e5435d9c5ad27967.dir + size: 474900558 + nfiles: 14 + package: + cmd: python -m spacy project run package + deps: + - path: training/model-best + md5: 7342ebecce870db3e5435d9c5ad27967.dir + size: 474900558 + nfiles: 14 + outs: + - path: packages/eds_medic-0.1.0/dist/eds_medic-0.1.0-py3-none-any.whl + md5: 1b491e3d4ce4f7822f381adc6525f0ba + size: 438563366 + evaluate: + cmd: python -m spacy project run evaluate + deps: + - path: corpus/test.spacy + md5: 0812cf1d19ab475be07eb65162e0ce2c + size: 180628 + - path: training/model-best + md5: 7342ebecce870db3e5435d9c5ad27967.dir + size: 474900558 + nfiles: 14 + outs: + - path: corpus/output.spacy + md5: 43bb5d78ab495c8be01f30b106cd2c3c + size: 176719 + - path: training/test_metrics.json + md5: 81d641b7c91e4b88587693f30364a092 + size: 1357 diff --git a/NER_model/dvc.yaml b/NER_model/dvc.yaml new file mode 100644 index 000000000..8102b2cde --- /dev/null +++ b/NER_model/dvc.yaml @@ -0,0 +1,28 @@ +# 60288f251a06a6832464289af6745578 +# This file is auto-generated by spaCy based on your project.yml. If you've +# edited your project.yml, you can regenerate this file by running: +# python -m spacy project dvc +stages: + train: + cmd: python -m spacy project run train + deps: + - configs/config.cfg + - corpus/dev.spacy + - corpus/train.spacy + outs: + - training/model-best + evaluate: + cmd: python -m spacy project run evaluate + deps: + - corpus/test.spacy + - training/model-best + outs: + - corpus/output.spacy + - training/test_metrics.json + package: + cmd: python -m spacy project run package + deps: + - training/model-best + outs: + - packages/eds_medic-0.1.0/dist/eds_medic-0.1.0-py3-none-any.whl: + cache: false diff --git a/NER_model/evaluate.md b/NER_model/evaluate.md new file mode 100644 index 000000000..87ad8a543 --- /dev/null +++ b/NER_model/evaluate.md @@ -0,0 +1,572 @@ +--- +jupyter: + jupytext: + formats: ipynb,md + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.15.0 + kernelspec: + display_name: BioMedics_client + language: python + name: biomedics_client +--- + +```python +%reload_ext autoreload +%autoreload 2 +%reload_ext jupyter_black +``` + +```python +import spacy +import pandas as pd +from edsnlp.connectors.brat import BratConnector +from edsnlp.evaluate import evaluate_test, evaluate +``` + +# Expe Data Size + +```python +GOLD_PATH = "/export/home/cse200093/scratch/BioMedics/NER_model/data/NLP_diabeto/test" + +loader = spacy.blank("eds") +brat = BratConnector(GOLD_PATH) +gold_docs = brat.brat2docs(loader) + +scores = [] +for i in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 62]: + PRED_PATH = f"/export/home/cse200093/scratch/BioMedics/NER_model/data/NLP_diabeto/expe_data_size/pred_{i}" + loader = spacy.blank("eds") + brat = BratConnector(PRED_PATH) + pred_docs = brat.brat2docs(loader) + score = pd.DataFrame( + evaluate_test( + gold_docs, + pred_docs, + boostrap_level="doc", + exact=True, + n_draw=5000, + alpha=0.05, + digits=5, + ) + ).T.sort_index() + score[["n_docs"]] = i + scores.append(score) +``` + +```python +import altair as alt +from functools import reduce + +alt.data_transformers.disable_max_rows() +result = ( + pd.concat(scores)[["n_docs", "Precision", "Recall", "F1"]] + .dropna() + .reset_index() + .rename(columns={"index": "label"}) + .melt( + id_vars=["n_docs", "label"], + value_vars=["Precision", "Recall", "F1"], + var_name="metric", + value_name="summary", + ) +) +result["mean"] = result["summary"].str.split().str.get(0) +result["lower"] = ( + result["summary"].str.split().str.get(1).str.split("-").str.get(0).str.slice(1) +) +result["upper"] = ( + result["summary"].str.split().str.get(1).str.split("-").str.get(1).str.slice(0, -1) +) +result = result[ + result.label.isin( + [ + "Overall", + "DISO", + "Constantes", + "BIO_comp", + "Chemical_and_drugs", + "dosage", + "BIO", + "strength", + "form", + "SECTION_antecedent", + "SECTION_motif", + "SECTION_histoire", + "SECTION_examen_clinique", + "SECTION_examen_complementaire", + "SECTION_mode_de_vie", + "SECTION_traitement_entree", + "SECTION_antecedent_familiaux", + "SECTION_traitement_sortie", + "SECTION_conclusion", + ] + ) +] +label_dropdown = alt.binding_select(options=list(result.label.unique()), name="Label ") +label_selection = alt.selection_point( + fields=["label"], bind=label_dropdown, value="Overall" +) + +metric_dropdown = alt.binding_select( + options=list(result.metric.unique()), name="Metric " +) +metric_selection = alt.selection_point( + fields=["metric"], bind=metric_dropdown, value="F1" +) + +line = ( + alt.Chart(result) + .mark_line(point=True) + .encode( + x="n_docs:O", + y=alt.Y(f"mean:Q").scale(zero=False, domain=[60, 100]), + ) +) + +band = ( + alt.Chart(result) + .mark_area(opacity=0.5) + .encode( + x="n_docs:O", + y=alt.Y(f"upper:Q").title(""), + y2=alt.Y2(f"lower:Q").title(""), + ) +) + +chart = line + band +chart = ( + chart.add_params(metric_selection) + .transform_filter(metric_selection) + .add_params(label_selection) + .transform_filter(label_selection) + .properties(width=600) +) + +display(chart) +display(result) +chart.save("metrics_by_n_docs.html") +``` + +# Expe Section + +```python +from os.path import isfile, isdir, join, basename +import edsnlp +import spacy +from edsnlp.evaluate import compute_scores + +nlp = spacy.blank("eds") +nlp.add_pipe("eds.normalizer") +nlp.add_pipe("eds.sections") + +GOLD_PATH = "/export/home/cse200093/scratch/BioMedics/NER_model/data/NLP_diabeto/test" + +loader = spacy.blank("eds") +brat = BratConnector(GOLD_PATH) +gold_docs = brat.brat2docs(loader) + +ML_PRED_PATH = "/export/home/cse200093/scratch/BioMedics/NER_model/data/NLP_diabeto/expe_lang_model/pred_model_eds_finetune" + +brat = BratConnector(ML_PRED_PATH) +ents_ml_pred = brat.brat2docs(loader) + +mapping = { + "antécédents": "SECTION_antecedent", + "motif": "SECTION_motif", + "histoire de la maladie": "SECTION_histoire", + "examens": "SECTION_examen_clinique", + "examens complémentaires": "SECTION_examen_complementaire", + "habitus": "SECTION_mode_de_vie", + "traitements entrée": "SECTION_traitement_entree", + "antécédents familiaux": "SECTION_antecedent_familiaux", + "traitements sortie": "SECTION_traitement_sortie", + "conclusion": "SECTION_conclusion", +} +rule_pred_docs = [] +for doc in gold_docs: + rule_pred_doc = nlp(doc.text) + rule_pred_doc._.note_id = doc._.note_id + del rule_pred_doc.spans["sections"] + rule_pred_docs.append(rule_pred_doc) +ents_rule_pred = [] +for doc in rule_pred_docs: + annotation = [doc._.note_id] + for label, ents in doc.spans.items(): + for ent in ents: + if ent.label_ in mapping.keys(): + annotation.append( + [ent.text, mapping[ent.label_], ent.start_char, ent.end_char] + ) + ents_rule_pred.append(annotation) + + +def get_annotation(docs): + full_annot = [] + for doc in docs: + annotation = [doc._.note_id] + for label, ents in doc.spans.items(): + for ent in ents: + if label in mapping.values(): + annotation.append([ent.text, label, ent.start_char, ent.end_char]) + full_annot.append(annotation) + return full_annot + + +ents_gold, ents_ml_pred = ( + get_annotation(gold_docs), + get_annotation(ents_ml_pred), +) +ents_gold.sort(key=lambda l: l[0]) +ents_ml_pred.sort(key=lambda l: l[0]) +ents_rule_pred.sort(key=lambda l: l[0]) + +scores_rule = ( + pd.DataFrame( + compute_scores( + ents_gold=ents_gold, + ents_pred=ents_rule_pred, + boostrap_level="doc", + exact=True, + n_draw=5000, + alpha=0.05, + digits=2, + ) + ) + .T.sort_index()[["N_entity", "Precision", "Recall", "F1"]] + .drop( + index=[ + "ents_per_type", + ] + ) +) +scores_rule.columns = pd.MultiIndex.from_product( + [["Rule-Based"], ["N_entity", "Precision", "Recall", "F1"]] +) + +scores_ml = ( + pd.DataFrame( + compute_scores( + ents_gold=ents_gold, + ents_pred=ents_ml_pred, + boostrap_level="doc", + exact=True, + n_draw=5000, + alpha=0.05, + digits=2, + ) + ) + .T.sort_index()[["Precision", "Recall", "F1"]] + .drop( + index=[ + "ents_per_type", + ] + ) +) +scores_ml.columns = pd.MultiIndex.from_product( + [["ML (NER)"], ["Precision", "Recall", "F1"]] +) +result = scores_rule.merge(scores_ml, left_index=True, right_index=True) +result +``` + +```python +import numpy as np + + +def highlight_max(row): + Precision_max = ( + row[:, "Precision"].str.split(" ").str.get(0).astype(float) + == row[:, "Precision"].str.split(" ").str.get(0).astype(float).max() + ) + Recall_max = ( + row[:, "Recall"].str.split(" ").str.get(0).astype(float) + == row[:, "Recall"].str.split(" ").str.get(0).astype(float).max() + ) + F1_max = ( + row[:, "F1"].str.split(" ").str.get(0).astype(float) + == row[:, "F1"].str.split(" ").str.get(0).astype(float).max() + ) + s_max = [False] + for i in range(len(F1_max)): + s_max.append(Precision_max[i]) + s_max.append(Recall_max[i]) + s_max.append(F1_max[i]) + return ["font-weight: bold" if cell else "" for cell in s_max] + + +def remove_confidence(row): + return row[:, :].str.split(" ").str.get(0) + + +result.apply(remove_confidence, axis=1).style.apply(highlight_max, axis=1) +``` + +# Expe lang models + +```python +import spacy + +GOLD_PATH = "/export/home/cse200093/scratch/BioMedics/NER_model/data/NLP_diabeto/test" + +loader = spacy.blank("eds") +brat = BratConnector(GOLD_PATH) +gold_docs = brat.brat2docs(loader) + +CAM_BASE_PRED_PATH = "/export/home/cse200093/scratch/BioMedics/NER_model/data/NLP_diabeto/expe_lang_model/pred_model_camembert_base" + +brat = BratConnector(CAM_BASE_PRED_PATH) +cam_base_pred_docs = brat.brat2docs(loader) + +CAM_BIO_PRED_PATH = "/export/home/cse200093/scratch/BioMedics/NER_model/data/NLP_diabeto/expe_lang_model/pred_model_camembert_bio" + +brat = BratConnector(CAM_BIO_PRED_PATH) +cam_bio_pred_docs = brat.brat2docs(loader) + +DR_BERT_PRED_PATH = "/export/home/cse200093/scratch/BioMedics/NER_model/data/NLP_diabeto/expe_lang_model/pred_model_DrBert" + +brat = BratConnector(DR_BERT_PRED_PATH) +DrBert_pred_docs = brat.brat2docs(loader) + +EDS_FINE_PRED_PATH = "/export/home/cse200093/scratch/BioMedics/NER_model/data/NLP_diabeto/expe_lang_model/pred_model_eds_finetune" + +brat = BratConnector(EDS_FINE_PRED_PATH) +eds_finetune_pred_docs = brat.brat2docs(loader) + +EDS_SCRATCH_PRED_PATH = "/export/home/cse200093/scratch/BioMedics/NER_model/data/NLP_diabeto/expe_lang_model/pred_model_eds_scratch" + +brat = BratConnector(EDS_SCRATCH_PRED_PATH) +eds_scratch_pred_docs = brat.brat2docs(loader) +``` + +```python +scores_cam_base = ( + pd.DataFrame( + evaluate_test( + gold_docs, + cam_base_pred_docs, + boostrap_level="doc", + exact=True, + n_draw=5000, + alpha=0.05, + digits=2, + ) + ) + .T.rename( + index={ + "DISO": "Diso", + "Chemical_and_drugs": "Drugs", + "dosage": "Drugs_Dosage", + "form": "Drugs_Form", + "strength": "Drugs_Strength", + "Overall": "overall", + } + ) + .sort_index()[["N_entity", "Precision", "Recall", "F1"]] + .drop( + index=[ + "ents_per_type", + "route", + "SECTION_traitement", + "SECTION_evolution", + "BIO_milieu", + ] + ) +) +scores_cam_base.columns = pd.MultiIndex.from_product( + [["CamemBert-Base"], ["N_entity", "Precision", "Recall", "F1"]] +) + +scores_cam_bio = ( + pd.DataFrame( + evaluate_test( + gold_docs, + cam_bio_pred_docs, + boostrap_level="doc", + exact=True, + n_draw=5000, + alpha=0.05, + digits=2, + ) + ) + .T.rename( + index={ + "DISO": "Diso", + "Chemical_and_drugs": "Drugs", + "dosage": "Drugs_Dosage", + "form": "Drugs_Form", + "strength": "Drugs_Strength", + "Overall": "overall", + } + ) + .sort_index()[["Precision", "Recall", "F1"]] + .drop( + index=[ + "ents_per_type", + "route", + "SECTION_traitement", + "SECTION_evolution", + "BIO_milieu", + ] + ) +) +scores_cam_bio.columns = pd.MultiIndex.from_product( + [["CamemBert-Bio"], ["Precision", "Recall", "F1"]] +) + +scores_DrBert = ( + pd.DataFrame( + evaluate_test( + gold_docs, + DrBert_pred_docs, + boostrap_level="doc", + exact=True, + n_draw=5000, + alpha=0.05, + digits=2, + ) + ) + .T.rename( + index={ + "DISO": "Diso", + "Chemical_and_drugs": "Drugs", + "dosage": "Drugs_Dosage", + "form": "Drugs_Form", + "strength": "Drugs_Strength", + "Overall": "overall", + } + ) + .sort_index()[["Precision", "Recall", "F1"]] + .drop( + index=[ + "ents_per_type", + "route", + "SECTION_traitement", + "SECTION_evolution", + "BIO_milieu", + ] + ) +) +scores_DrBert.columns = pd.MultiIndex.from_product( + [["DrBert"], ["Precision", "Recall", "F1"]] +) + +scores_eds_finetune = ( + pd.DataFrame( + evaluate_test( + gold_docs, + eds_finetune_pred_docs, + boostrap_level="doc", + exact=True, + n_draw=5000, + alpha=0.05, + digits=2, + ) + ) + .T.rename( + index={ + "DISO": "Diso", + "Chemical_and_drugs": "Drugs", + "dosage": "Drugs_Dosage", + "form": "Drugs_Form", + "strength": "Drugs_Strength", + "Overall": "overall", + } + ) + .sort_index()[["Precision", "Recall", "F1"]] + .drop( + index=[ + "ents_per_type", + "route", + "SECTION_traitement", + "SECTION_evolution", + "BIO_milieu", + ] + ) +) +scores_eds_finetune.columns = pd.MultiIndex.from_product( + [["CamemBert-EDS Finetuned"], ["Precision", "Recall", "F1"]] +) + +scores_eds_scratch = ( + pd.DataFrame( + evaluate_test( + gold_docs, + eds_scratch_pred_docs, + boostrap_level="doc", + exact=True, + n_draw=5000, + alpha=0.05, + digits=2, + ) + ) + .T.rename( + index={ + "DISO": "Diso", + "Chemical_and_drugs": "Drugs", + "dosage": "Drugs_Dosage", + "form": "Drugs_Form", + "strength": "Drugs_Strength", + "Overall": "overall", + } + ) + .sort_index()[["Precision", "Recall", "F1"]] + .drop( + index=[ + "ents_per_type", + "route", + "SECTION_traitement", + "SECTION_evolution", + "BIO_milieu", + ] + ) +) +scores_eds_scratch.columns = pd.MultiIndex.from_product( + [["CamemBert-EDS Scratch"], ["Precision", "Recall", "F1"]] +) + +result = ( + scores_cam_base.merge(scores_cam_bio, left_index=True, right_index=True) + .merge(scores_DrBert, left_index=True, right_index=True) + .merge(scores_eds_finetune, left_index=True, right_index=True) + .merge(scores_eds_scratch, left_index=True, right_index=True) +) +``` + +```python +import numpy as np + + +def highlight_max(row): + Precision_max = ( + row[:, "Precision"].str.split(" ").str.get(0).astype(float) + == row[:, "Precision"].str.split(" ").str.get(0).astype(float).max() + ) + Recall_max = ( + row[:, "Recall"].str.split(" ").str.get(0).astype(float) + == row[:, "Recall"].str.split(" ").str.get(0).astype(float).max() + ) + F1_max = ( + row[:, "F1"].str.split(" ").str.get(0).astype(float) + == row[:, "F1"].str.split(" ").str.get(0).astype(float).max() + ) + s_max = [False] + for i in range(len(F1_max)): + s_max.append(Precision_max[i]) + s_max.append(Recall_max[i]) + s_max.append(F1_max[i]) + return ["font-weight: bold" if cell else "" for cell in s_max] + + +def remove_confidence(row): + return row[:, :].str.split(" ").str.get(0) + + +result.apply(remove_confidence, axis=1).style.apply(highlight_max, axis=1) +``` + +```python + +``` diff --git a/NER_model/poetry.lock b/NER_model/poetry.lock new file mode 100644 index 000000000..171173247 --- /dev/null +++ b/NER_model/poetry.lock @@ -0,0 +1,5043 @@ +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. + +[[package]] +name = "aiohttp" +version = "3.8.6" +description = "Async http client/server framework (asyncio)" +optional = false +python-versions = ">=3.6" +files = [ + {file = "aiohttp-3.8.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:41d55fc043954cddbbd82503d9cc3f4814a40bcef30b3569bc7b5e34130718c1"}, + {file = "aiohttp-3.8.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1d84166673694841d8953f0a8d0c90e1087739d24632fe86b1a08819168b4566"}, + {file = "aiohttp-3.8.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:253bf92b744b3170eb4c4ca2fa58f9c4b87aeb1df42f71d4e78815e6e8b73c9e"}, + {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3fd194939b1f764d6bb05490987bfe104287bbf51b8d862261ccf66f48fb4096"}, + {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c5f938d199a6fdbdc10bbb9447496561c3a9a565b43be564648d81e1102ac22"}, + {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2817b2f66ca82ee699acd90e05c95e79bbf1dc986abb62b61ec8aaf851e81c93"}, + {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fa375b3d34e71ccccf172cab401cd94a72de7a8cc01847a7b3386204093bb47"}, + {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9de50a199b7710fa2904be5a4a9b51af587ab24c8e540a7243ab737b45844543"}, + {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e1d8cb0b56b3587c5c01de3bf2f600f186da7e7b5f7353d1bf26a8ddca57f965"}, + {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8e31e9db1bee8b4f407b77fd2507337a0a80665ad7b6c749d08df595d88f1cf5"}, + {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7bc88fc494b1f0311d67f29fee6fd636606f4697e8cc793a2d912ac5b19aa38d"}, + {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ec00c3305788e04bf6d29d42e504560e159ccaf0be30c09203b468a6c1ccd3b2"}, + {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ad1407db8f2f49329729564f71685557157bfa42b48f4b93e53721a16eb813ed"}, + {file = "aiohttp-3.8.6-cp310-cp310-win32.whl", hash = "sha256:ccc360e87341ad47c777f5723f68adbb52b37ab450c8bc3ca9ca1f3e849e5fe2"}, + {file = "aiohttp-3.8.6-cp310-cp310-win_amd64.whl", hash = "sha256:93c15c8e48e5e7b89d5cb4613479d144fda8344e2d886cf694fd36db4cc86865"}, + {file = "aiohttp-3.8.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e2f9cc8e5328f829f6e1fb74a0a3a939b14e67e80832975e01929e320386b34"}, + {file = "aiohttp-3.8.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e6a00ffcc173e765e200ceefb06399ba09c06db97f401f920513a10c803604ca"}, + {file = "aiohttp-3.8.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:41bdc2ba359032e36c0e9de5a3bd00d6fb7ea558a6ce6b70acedf0da86458321"}, + {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14cd52ccf40006c7a6cd34a0f8663734e5363fd981807173faf3a017e202fec9"}, + {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2d5b785c792802e7b275c420d84f3397668e9d49ab1cb52bd916b3b3ffcf09ad"}, + {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1bed815f3dc3d915c5c1e556c397c8667826fbc1b935d95b0ad680787896a358"}, + {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96603a562b546632441926cd1293cfcb5b69f0b4159e6077f7c7dbdfb686af4d"}, + {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d76e8b13161a202d14c9584590c4df4d068c9567c99506497bdd67eaedf36403"}, + {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e3f1e3f1a1751bb62b4a1b7f4e435afcdade6c17a4fd9b9d43607cebd242924a"}, + {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:76b36b3124f0223903609944a3c8bf28a599b2cc0ce0be60b45211c8e9be97f8"}, + {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:a2ece4af1f3c967a4390c284797ab595a9f1bc1130ef8b01828915a05a6ae684"}, + {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:16d330b3b9db87c3883e565340d292638a878236418b23cc8b9b11a054aaa887"}, + {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:42c89579f82e49db436b69c938ab3e1559e5a4409eb8639eb4143989bc390f2f"}, + {file = "aiohttp-3.8.6-cp311-cp311-win32.whl", hash = "sha256:efd2fcf7e7b9d7ab16e6b7d54205beded0a9c8566cb30f09c1abe42b4e22bdcb"}, + {file = "aiohttp-3.8.6-cp311-cp311-win_amd64.whl", hash = "sha256:3b2ab182fc28e7a81f6c70bfbd829045d9480063f5ab06f6e601a3eddbbd49a0"}, + {file = "aiohttp-3.8.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:fdee8405931b0615220e5ddf8cd7edd8592c606a8e4ca2a00704883c396e4479"}, + {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d25036d161c4fe2225d1abff2bd52c34ed0b1099f02c208cd34d8c05729882f0"}, + {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d791245a894be071d5ab04bbb4850534261a7d4fd363b094a7b9963e8cdbd31"}, + {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0cccd1de239afa866e4ce5c789b3032442f19c261c7d8a01183fd956b1935349"}, + {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f13f60d78224f0dace220d8ab4ef1dbc37115eeeab8c06804fec11bec2bbd07"}, + {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a9b5a0606faca4f6cc0d338359d6fa137104c337f489cd135bb7fbdbccb1e39"}, + {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:13da35c9ceb847732bf5c6c5781dcf4780e14392e5d3b3c689f6d22f8e15ae31"}, + {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:4d4cbe4ffa9d05f46a28252efc5941e0462792930caa370a6efaf491f412bc66"}, + {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:229852e147f44da0241954fc6cb910ba074e597f06789c867cb7fb0621e0ba7a"}, + {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:713103a8bdde61d13490adf47171a1039fd880113981e55401a0f7b42c37d071"}, + {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:45ad816b2c8e3b60b510f30dbd37fe74fd4a772248a52bb021f6fd65dff809b6"}, + {file = "aiohttp-3.8.6-cp36-cp36m-win32.whl", hash = "sha256:2b8d4e166e600dcfbff51919c7a3789ff6ca8b3ecce16e1d9c96d95dd569eb4c"}, + {file = "aiohttp-3.8.6-cp36-cp36m-win_amd64.whl", hash = "sha256:0912ed87fee967940aacc5306d3aa8ba3a459fcd12add0b407081fbefc931e53"}, + {file = "aiohttp-3.8.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e2a988a0c673c2e12084f5e6ba3392d76c75ddb8ebc6c7e9ead68248101cd446"}, + {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebf3fd9f141700b510d4b190094db0ce37ac6361a6806c153c161dc6c041ccda"}, + {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3161ce82ab85acd267c8f4b14aa226047a6bee1e4e6adb74b798bd42c6ae1f80"}, + {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d95fc1bf33a9a81469aa760617b5971331cdd74370d1214f0b3109272c0e1e3c"}, + {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c43ecfef7deaf0617cee936836518e7424ee12cb709883f2c9a1adda63cc460"}, + {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca80e1b90a05a4f476547f904992ae81eda5c2c85c66ee4195bb8f9c5fb47f28"}, + {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:90c72ebb7cb3a08a7f40061079817133f502a160561d0675b0a6adf231382c92"}, + {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bb54c54510e47a8c7c8e63454a6acc817519337b2b78606c4e840871a3e15349"}, + {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:de6a1c9f6803b90e20869e6b99c2c18cef5cc691363954c93cb9adeb26d9f3ae"}, + {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:a3628b6c7b880b181a3ae0a0683698513874df63783fd89de99b7b7539e3e8a8"}, + {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:fc37e9aef10a696a5a4474802930079ccfc14d9f9c10b4662169671ff034b7df"}, + {file = "aiohttp-3.8.6-cp37-cp37m-win32.whl", hash = "sha256:f8ef51e459eb2ad8e7a66c1d6440c808485840ad55ecc3cafefadea47d1b1ba2"}, + {file = "aiohttp-3.8.6-cp37-cp37m-win_amd64.whl", hash = "sha256:b2fe42e523be344124c6c8ef32a011444e869dc5f883c591ed87f84339de5976"}, + {file = "aiohttp-3.8.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:9e2ee0ac5a1f5c7dd3197de309adfb99ac4617ff02b0603fd1e65b07dc772e4b"}, + {file = "aiohttp-3.8.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:01770d8c04bd8db568abb636c1fdd4f7140b284b8b3e0b4584f070180c1e5c62"}, + {file = "aiohttp-3.8.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3c68330a59506254b556b99a91857428cab98b2f84061260a67865f7f52899f5"}, + {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89341b2c19fb5eac30c341133ae2cc3544d40d9b1892749cdd25892bbc6ac951"}, + {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71783b0b6455ac8f34b5ec99d83e686892c50498d5d00b8e56d47f41b38fbe04"}, + {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f628dbf3c91e12f4d6c8b3f092069567d8eb17814aebba3d7d60c149391aee3a"}, + {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b04691bc6601ef47c88f0255043df6f570ada1a9ebef99c34bd0b72866c217ae"}, + {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ee912f7e78287516df155f69da575a0ba33b02dd7c1d6614dbc9463f43066e3"}, + {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9c19b26acdd08dd239e0d3669a3dddafd600902e37881f13fbd8a53943079dbc"}, + {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:99c5ac4ad492b4a19fc132306cd57075c28446ec2ed970973bbf036bcda1bcc6"}, + {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:f0f03211fd14a6a0aed2997d4b1c013d49fb7b50eeb9ffdf5e51f23cfe2c77fa"}, + {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:8d399dade330c53b4106160f75f55407e9ae7505263ea86f2ccca6bfcbdb4921"}, + {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ec4fd86658c6a8964d75426517dc01cbf840bbf32d055ce64a9e63a40fd7b771"}, + {file = "aiohttp-3.8.6-cp38-cp38-win32.whl", hash = "sha256:33164093be11fcef3ce2571a0dccd9041c9a93fa3bde86569d7b03120d276c6f"}, + {file = "aiohttp-3.8.6-cp38-cp38-win_amd64.whl", hash = "sha256:bdf70bfe5a1414ba9afb9d49f0c912dc524cf60141102f3a11143ba3d291870f"}, + {file = "aiohttp-3.8.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d52d5dc7c6682b720280f9d9db41d36ebe4791622c842e258c9206232251ab2b"}, + {file = "aiohttp-3.8.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4ac39027011414dbd3d87f7edb31680e1f430834c8cef029f11c66dad0670aa5"}, + {file = "aiohttp-3.8.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3f5c7ce535a1d2429a634310e308fb7d718905487257060e5d4598e29dc17f0b"}, + {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b30e963f9e0d52c28f284d554a9469af073030030cef8693106d918b2ca92f54"}, + {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:918810ef188f84152af6b938254911055a72e0f935b5fbc4c1a4ed0b0584aed1"}, + {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:002f23e6ea8d3dd8d149e569fd580c999232b5fbc601c48d55398fbc2e582e8c"}, + {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fcf3eabd3fd1a5e6092d1242295fa37d0354b2eb2077e6eb670accad78e40e1"}, + {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:255ba9d6d5ff1a382bb9a578cd563605aa69bec845680e21c44afc2670607a95"}, + {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d67f8baed00870aa390ea2590798766256f31dc5ed3ecc737debb6e97e2ede78"}, + {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:86f20cee0f0a317c76573b627b954c412ea766d6ada1a9fcf1b805763ae7feeb"}, + {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:39a312d0e991690ccc1a61f1e9e42daa519dcc34ad03eb6f826d94c1190190dd"}, + {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e827d48cf802de06d9c935088c2924e3c7e7533377d66b6f31ed175c1620e05e"}, + {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bd111d7fc5591ddf377a408ed9067045259ff2770f37e2d94e6478d0f3fc0c17"}, + {file = "aiohttp-3.8.6-cp39-cp39-win32.whl", hash = "sha256:caf486ac1e689dda3502567eb89ffe02876546599bbf915ec94b1fa424eeffd4"}, + {file = "aiohttp-3.8.6-cp39-cp39-win_amd64.whl", hash = "sha256:3f0e27e5b733803333bb2371249f41cf42bae8884863e8e8965ec69bebe53132"}, + {file = "aiohttp-3.8.6.tar.gz", hash = "sha256:b0cf2a4501bff9330a8a5248b4ce951851e415bdcce9dc158e76cfd55e15085c"}, +] + +[package.dependencies] +aiosignal = ">=1.1.2" +async-timeout = ">=4.0.0a3,<5.0" +attrs = ">=17.3.0" +charset-normalizer = ">=2.0,<4.0" +frozenlist = ">=1.1.1" +multidict = ">=4.5,<7.0" +yarl = ">=1.0,<2.0" + +[package.extras] +speedups = ["Brotli", "aiodns", "cchardet"] + +[[package]] +name = "aiohttp-retry" +version = "2.8.3" +description = "Simple retry client for aiohttp" +optional = false +python-versions = ">=3.7" +files = [ + {file = "aiohttp_retry-2.8.3-py3-none-any.whl", hash = "sha256:3aeeead8f6afe48272db93ced9440cf4eda8b6fd7ee2abb25357b7eb28525b45"}, + {file = "aiohttp_retry-2.8.3.tar.gz", hash = "sha256:9a8e637e31682ad36e1ff9f8bcba912fcfc7d7041722bc901a4b948da4d71ea9"}, +] + +[package.dependencies] +aiohttp = "*" + +[[package]] +name = "aiosignal" +version = "1.3.1" +description = "aiosignal: a list of registered asynchronous callbacks" +optional = false +python-versions = ">=3.7" +files = [ + {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, + {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, +] + +[package.dependencies] +frozenlist = ">=1.1.0" + +[[package]] +name = "amqp" +version = "5.2.0" +description = "Low-level AMQP client for Python (fork of amqplib)." +optional = false +python-versions = ">=3.6" +files = [ + {file = "amqp-5.2.0-py3-none-any.whl", hash = "sha256:827cb12fb0baa892aad844fd95258143bce4027fdac4fccddbc43330fd281637"}, + {file = "amqp-5.2.0.tar.gz", hash = "sha256:a1ecff425ad063ad42a486c902807d1482311481c8ad95a72694b2975e75f7fd"}, +] + +[package.dependencies] +vine = ">=5.0.0,<6.0.0" + +[[package]] +name = "antlr4-python3-runtime" +version = "4.9.3" +description = "ANTLR 4.9.3 runtime for Python 3.7" +optional = false +python-versions = "*" +files = [ + {file = "antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b"}, +] + +[[package]] +name = "appdirs" +version = "1.4.4" +description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = "*" +files = [ + {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"}, + {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, +] + +[[package]] +name = "astunparse" +version = "1.6.3" +description = "An AST unparser for Python" +optional = false +python-versions = "*" +files = [ + {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, + {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, +] + +[package.dependencies] +six = ">=1.6.1,<2.0" +wheel = ">=0.23.0,<1.0" + +[[package]] +name = "async-timeout" +version = "4.0.3" +description = "Timeout context manager for asyncio programs" +optional = false +python-versions = ">=3.7" +files = [ + {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, + {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, +] + +[[package]] +name = "asyncssh" +version = "2.14.1" +description = "AsyncSSH: Asynchronous SSHv2 client and server library" +optional = false +python-versions = ">= 3.6" +files = [ + {file = "asyncssh-2.14.1-py3-none-any.whl", hash = "sha256:9611368f5db62d9adb0deaa3ff37080277f142acd101693a3a9d2b47a84d0e8b"}, + {file = "asyncssh-2.14.1.tar.gz", hash = "sha256:1ac31c333a0d83c88831523245500caa814503423741b0e465339ef6da5b5e29"}, +] + +[package.dependencies] +cryptography = ">=39.0" +typing-extensions = ">=3.6" + +[package.extras] +bcrypt = ["bcrypt (>=3.1.3)"] +fido2 = ["fido2 (>=0.9.2)"] +gssapi = ["gssapi (>=1.2.0)"] +libnacl = ["libnacl (>=1.4.2)"] +pkcs11 = ["python-pkcs11 (>=0.7.0)"] +pyopenssl = ["pyOpenSSL (>=23.0.0)"] +pywin32 = ["pywin32 (>=227)"] + +[[package]] +name = "atpublic" +version = "4.0" +description = "Keep all y'all's __all__'s in sync" +optional = false +python-versions = ">=3.8" +files = [ + {file = "atpublic-4.0-py3-none-any.whl", hash = "sha256:80057c55641253b86dcb68b524f82328172371b6547d4c7462a9127fbfbbabfc"}, + {file = "atpublic-4.0.tar.gz", hash = "sha256:0f40433219e124edf115c6c363808ca6f0e1cfa7d160d86b2fb94793086d1294"}, +] + +[[package]] +name = "attrs" +version = "23.1.0" +description = "Classes Without Boilerplate" +optional = false +python-versions = ">=3.7" +files = [ + {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, + {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, +] + +[package.extras] +cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] +dev = ["attrs[docs,tests]", "pre-commit"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] +tests = ["attrs[tests-no-zope]", "zope-interface"] +tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] + +[[package]] +name = "backports-zoneinfo" +version = "0.2.1" +description = "Backport of the standard library zoneinfo module" +optional = false +python-versions = ">=3.6" +files = [ + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1c5742112073a563c81f786e77514969acb58649bcdf6cdf0b4ed31a348d4546"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-win32.whl", hash = "sha256:e8236383a20872c0cdf5a62b554b27538db7fa1bbec52429d8d106effbaeca08"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-win_amd64.whl", hash = "sha256:8439c030a11780786a2002261569bdf362264f605dfa4d65090b64b05c9f79a7"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:f04e857b59d9d1ccc39ce2da1021d196e47234873820cbeaad210724b1ee28ac"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:17746bd546106fa389c51dbea67c8b7c8f0d14b5526a579ca6ccf5ed72c526cf"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5c144945a7752ca544b4b78c8c41544cdfaf9786f25fe5ffb10e838e19a27570"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-win32.whl", hash = "sha256:e55b384612d93be96506932a786bbcde5a2db7a9e6a4bb4bffe8b733f5b9036b"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a76b38c52400b762e48131494ba26be363491ac4f9a04c1b7e92483d169f6582"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:8961c0f32cd0336fb8e8ead11a1f8cd99ec07145ec2931122faaac1c8f7fd987"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:e81b76cace8eda1fca50e345242ba977f9be6ae3945af8d46326d776b4cf78d1"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7b0a64cda4145548fed9efc10322770f929b944ce5cee6c0dfe0c87bf4c0c8c9"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-win32.whl", hash = "sha256:1b13e654a55cd45672cb54ed12148cd33628f672548f373963b0bff67b217328"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:4a0f800587060bf8880f954dbef70de6c11bbe59c673c3d818921f042f9954a6"}, + {file = "backports.zoneinfo-0.2.1.tar.gz", hash = "sha256:fadbfe37f74051d024037f223b8e001611eac868b5c5b06144ef4d8b799862f2"}, +] + +[package.dependencies] +tzdata = {version = "*", optional = true, markers = "extra == \"tzdata\""} + +[package.extras] +tzdata = ["tzdata"] + +[[package]] +name = "beautifulsoup4" +version = "4.12.2" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, + {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, +] + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + +[[package]] +name = "billiard" +version = "4.2.0" +description = "Python multiprocessing fork with improvements and bugfixes" +optional = false +python-versions = ">=3.7" +files = [ + {file = "billiard-4.2.0-py3-none-any.whl", hash = "sha256:07aa978b308f334ff8282bd4a746e681b3513db5c9a514cbdd810cbbdc19714d"}, + {file = "billiard-4.2.0.tar.gz", hash = "sha256:9a3c3184cb275aa17a732f93f65b20c525d3d9f253722d26a82194803ade5a2c"}, +] + +[[package]] +name = "blis" +version = "0.7.11" +description = "The Blis BLAS-like linear algebra library, as a self-contained C-extension." +optional = false +python-versions = "*" +files = [ + {file = "blis-0.7.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cd5fba34c5775e4c440d80e4dea8acb40e2d3855b546e07c4e21fad8f972404c"}, + {file = "blis-0.7.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:31273d9086cab9c56986d478e3ed6da6752fa4cdd0f7b5e8e5db30827912d90d"}, + {file = "blis-0.7.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d06883f83d4c8de8264154f7c4a420b4af323050ed07398c1ff201c34c25c0d2"}, + {file = "blis-0.7.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee493683e3043650d4413d531e79e580d28a3c7bdd184f1b9cfa565497bda1e7"}, + {file = "blis-0.7.11-cp310-cp310-win_amd64.whl", hash = "sha256:a73945a9d635eea528bccfdfcaa59dd35bd5f82a4a40d5ca31f08f507f3a6f81"}, + {file = "blis-0.7.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1b68df4d01d62f9adaef3dad6f96418787265a6878891fc4e0fabafd6d02afba"}, + {file = "blis-0.7.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:162e60d941a8151418d558a94ee5547cb1bbeed9f26b3b6f89ec9243f111a201"}, + {file = "blis-0.7.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:686a7d0111d5ba727cd62f374748952fd6eb74701b18177f525b16209a253c01"}, + {file = "blis-0.7.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0421d6e44cda202b113a34761f9a062b53f8c2ae8e4ec8325a76e709fca93b6e"}, + {file = "blis-0.7.11-cp311-cp311-win_amd64.whl", hash = "sha256:0dc9dcb3843045b6b8b00432409fd5ee96b8344a324e031bfec7303838c41a1a"}, + {file = "blis-0.7.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dadf8713ea51d91444d14ad4104a5493fa7ecc401bbb5f4a203ff6448fadb113"}, + {file = "blis-0.7.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5bcdaf370f03adaf4171d6405a89fa66cb3c09399d75fc02e1230a78cd2759e4"}, + {file = "blis-0.7.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7de19264b1d49a178bf8035406d0ae77831f3bfaa3ce02942964a81a202abb03"}, + {file = "blis-0.7.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ea55c6a4a60fcbf6a0fdce40df6e254451ce636988323a34b9c94b583fc11e5"}, + {file = "blis-0.7.11-cp312-cp312-win_amd64.whl", hash = "sha256:5a305dbfc96d202a20d0edd6edf74a406b7e1404f4fa4397d24c68454e60b1b4"}, + {file = "blis-0.7.11-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:68544a1cbc3564db7ba54d2bf8988356b8c7acd025966e8e9313561b19f0fe2e"}, + {file = "blis-0.7.11-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:075431b13b9dd7b411894d4afbd4212acf4d0f56c5a20628f4b34902e90225f1"}, + {file = "blis-0.7.11-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:324fdf62af9075831aa62b51481960e8465674b7723f977684e32af708bb7448"}, + {file = "blis-0.7.11-cp36-cp36m-win_amd64.whl", hash = "sha256:afebdb02d2dcf9059f23ce1244585d3ce7e95c02a77fd45a500e4a55b7b23583"}, + {file = "blis-0.7.11-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2e62cd14b20e960f21547fee01f3a0b2ac201034d819842865a667c969c355d1"}, + {file = "blis-0.7.11-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89b01c05a5754edc0b9a3b69be52cbee03f645b2ec69651d12216ea83b8122f0"}, + {file = "blis-0.7.11-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfee5ec52ba1e9002311d9191f7129d7b0ecdff211e88536fb24c865d102b50d"}, + {file = "blis-0.7.11-cp37-cp37m-win_amd64.whl", hash = "sha256:844b6377e3e7f3a2e92e7333cc644095386548ad5a027fdc150122703c009956"}, + {file = "blis-0.7.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6df00c24128e323174cde5d80ebe3657df39615322098ce06613845433057614"}, + {file = "blis-0.7.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:809d1da1331108935bf06e22f3cf07ef73a41a572ecd81575bdedb67defe3465"}, + {file = "blis-0.7.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bfabd5272bbbe504702b8dfe30093653d278057656126716ff500d9c184b35a6"}, + {file = "blis-0.7.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca684f5c2f05269f17aefe7812360286e9a1cee3afb96d416485efd825dbcf19"}, + {file = "blis-0.7.11-cp38-cp38-win_amd64.whl", hash = "sha256:688a8b21d2521c2124ee8dfcbaf2c385981ccc27e313e052113d5db113e27d3b"}, + {file = "blis-0.7.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2ff7abd784033836b284ff9f4d0d7cb0737b7684daebb01a4c9fe145ffa5a31e"}, + {file = "blis-0.7.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f9caffcd14795bfe52add95a0dd8426d44e737b55fcb69e2b797816f4da0b1d2"}, + {file = "blis-0.7.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fb36989ed61233cfd48915896802ee6d3d87882190000f8cfe0cf4a3819f9a8"}, + {file = "blis-0.7.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ea09f961871f880d5dc622dce6c370e4859559f0ead897ae9b20ddafd6b07a2"}, + {file = "blis-0.7.11-cp39-cp39-win_amd64.whl", hash = "sha256:5bb38adabbb22f69f22c74bad025a010ae3b14de711bf5c715353980869d491d"}, + {file = "blis-0.7.11.tar.gz", hash = "sha256:cec6d48f75f7ac328ae1b6fbb372dde8c8a57c89559172277f66e01ff08d4d42"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.15.0", markers = "python_version < \"3.9\""}, + {version = ">=1.19.0", markers = "python_version >= \"3.9\""}, +] + +[[package]] +name = "cached-property" +version = "1.5.2" +description = "A decorator for caching properties in classes." +optional = false +python-versions = "*" +files = [ + {file = "cached-property-1.5.2.tar.gz", hash = "sha256:9fa5755838eecbb2d234c3aa390bd80fbd3ac6b6869109bfc1b499f7bd89a130"}, + {file = "cached_property-1.5.2-py2.py3-none-any.whl", hash = "sha256:df4f613cf7ad9a588cc381aaf4a512d26265ecebd5eb9e1ba12f1319eb85a6a0"}, +] + +[[package]] +name = "catalogue" +version = "2.0.10" +description = "Super lightweight function registries for your library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "catalogue-2.0.10-py3-none-any.whl", hash = "sha256:58c2de0020aa90f4a2da7dfad161bf7b3b054c86a5f09fcedc0b2b740c109a9f"}, + {file = "catalogue-2.0.10.tar.gz", hash = "sha256:4f56daa940913d3f09d589c191c74e5a6d51762b3a9e37dd53b7437afd6cda15"}, +] + +[package.dependencies] +typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} +zipp = {version = ">=0.5", markers = "python_version < \"3.8\""} + +[[package]] +name = "celery" +version = "5.3.5" +description = "Distributed Task Queue." +optional = false +python-versions = ">=3.8" +files = [ + {file = "celery-5.3.5-py3-none-any.whl", hash = "sha256:30b75ac60fb081c2d9f8881382c148ed7c9052031a75a1e8743ff4b4b071f184"}, + {file = "celery-5.3.5.tar.gz", hash = "sha256:6b65d8dd5db499dd6190c45aa6398e171b99592f2af62c312f7391587feb5458"}, +] + +[package.dependencies] +"backports.zoneinfo" = {version = ">=0.2.1", markers = "python_version < \"3.9\""} +billiard = ">=4.2.0,<5.0" +click = ">=8.1.2,<9.0" +click-didyoumean = ">=0.3.0" +click-plugins = ">=1.1.1" +click-repl = ">=0.2.0" +kombu = ">=5.3.3,<6.0" +python-dateutil = ">=2.8.2" +tzdata = ">=2022.7" +vine = ">=5.1.0,<6.0" + +[package.extras] +arangodb = ["pyArango (>=2.0.2)"] +auth = ["cryptography (==41.0.5)"] +azureblockblob = ["azure-storage-blob (>=12.15.0)"] +brotli = ["brotli (>=1.0.0)", "brotlipy (>=0.7.0)"] +cassandra = ["cassandra-driver (>=3.25.0,<4)"] +consul = ["python-consul2 (==0.1.5)"] +cosmosdbsql = ["pydocumentdb (==2.3.5)"] +couchbase = ["couchbase (>=3.0.0)"] +couchdb = ["pycouchdb (==1.14.2)"] +django = ["Django (>=2.2.28)"] +dynamodb = ["boto3 (>=1.26.143)"] +elasticsearch = ["elastic-transport (<=8.10.0)", "elasticsearch (<=8.10.1)"] +eventlet = ["eventlet (>=0.32.0)"] +gevent = ["gevent (>=1.5.0)"] +librabbitmq = ["librabbitmq (>=2.0.0)"] +memcache = ["pylibmc (==1.6.3)"] +mongodb = ["pymongo[srv] (>=4.0.2)"] +msgpack = ["msgpack (==1.0.7)"] +pymemcache = ["python-memcached (==1.59)"] +pyro = ["pyro4 (==4.82)"] +pytest = ["pytest-celery (==0.0.0)"] +redis = ["redis (>=4.5.2,!=4.5.5,<6.0.0)"] +s3 = ["boto3 (>=1.26.143)"] +slmq = ["softlayer-messaging (>=1.0.3)"] +solar = ["ephem (==4.1.5)"] +sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] +sqs = ["boto3 (>=1.26.143)", "kombu[sqs] (>=5.3.0)", "pycurl (>=7.43.0.5)", "urllib3 (>=1.26.16)"] +tblib = ["tblib (>=1.3.0)", "tblib (>=1.5.0)"] +yaml = ["PyYAML (>=3.10)"] +zookeeper = ["kazoo (>=1.3.1)"] +zstd = ["zstandard (==0.22.0)"] + +[[package]] +name = "certifi" +version = "2023.7.22" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, + {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, +] + +[[package]] +name = "cffi" +version = "1.16.0" +description = "Foreign Function Interface for Python calling C code." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, + {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, + {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, + {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, + {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, + {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, + {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, + {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, + {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, + {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, + {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, + {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, + {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, + {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, + {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, +] + +[package.dependencies] +pycparser = "*" + +[[package]] +name = "cfgv" +version = "3.3.1" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "cfgv-3.3.1-py2.py3-none-any.whl", hash = "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426"}, + {file = "cfgv-3.3.1.tar.gz", hash = "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + +[[package]] +name = "click" +version = "8.1.7" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} + +[[package]] +name = "click-didyoumean" +version = "0.3.0" +description = "Enables git-like *did-you-mean* feature in click" +optional = false +python-versions = ">=3.6.2,<4.0.0" +files = [ + {file = "click-didyoumean-0.3.0.tar.gz", hash = "sha256:f184f0d851d96b6d29297354ed981b7dd71df7ff500d82fa6d11f0856bee8035"}, + {file = "click_didyoumean-0.3.0-py3-none-any.whl", hash = "sha256:a0713dc7a1de3f06bc0df5a9567ad19ead2d3d5689b434768a6145bff77c0667"}, +] + +[package.dependencies] +click = ">=7" + +[[package]] +name = "click-plugins" +version = "1.1.1" +description = "An extension module for click to enable registering CLI commands via setuptools entry-points." +optional = false +python-versions = "*" +files = [ + {file = "click-plugins-1.1.1.tar.gz", hash = "sha256:46ab999744a9d831159c3411bb0c79346d94a444df9a3a3742e9ed63645f264b"}, + {file = "click_plugins-1.1.1-py2.py3-none-any.whl", hash = "sha256:5d262006d3222f5057fd81e1623d4443e41dcda5dc815c06b442aa3c02889fc8"}, +] + +[package.dependencies] +click = ">=4.0" + +[package.extras] +dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] + +[[package]] +name = "click-repl" +version = "0.3.0" +description = "REPL plugin for Click" +optional = false +python-versions = ">=3.6" +files = [ + {file = "click-repl-0.3.0.tar.gz", hash = "sha256:17849c23dba3d667247dc4defe1757fff98694e90fe37474f3feebb69ced26a9"}, + {file = "click_repl-0.3.0-py3-none-any.whl", hash = "sha256:fb7e06deb8da8de86180a33a9da97ac316751c094c6899382da7feeeeb51b812"}, +] + +[package.dependencies] +click = ">=7.0" +prompt-toolkit = ">=3.0.36" + +[package.extras] +testing = ["pytest (>=7.2.1)", "pytest-cov (>=4.0.0)", "tox (>=4.4.3)"] + +[[package]] +name = "cloudpathlib" +version = "0.16.0" +description = "pathlib-style classes for cloud storage services." +optional = false +python-versions = ">=3.7" +files = [ + {file = "cloudpathlib-0.16.0-py3-none-any.whl", hash = "sha256:f46267556bf91f03db52b5df7a152548596a15aabca1c8731ef32b0b25a1a6a3"}, + {file = "cloudpathlib-0.16.0.tar.gz", hash = "sha256:cdfcd35d46d529587d744154a0bdf962aca953b725c8784cd2ec478354ea63a3"}, +] + +[package.dependencies] +importlib_metadata = {version = "*", markers = "python_version < \"3.8\""} +typing_extensions = {version = ">4", markers = "python_version < \"3.11\""} + +[package.extras] +all = ["cloudpathlib[azure]", "cloudpathlib[gs]", "cloudpathlib[s3]"] +azure = ["azure-storage-blob (>=12)"] +gs = ["google-cloud-storage"] +s3 = ["boto3"] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "confection" +version = "0.1.3" +description = "The sweetest config system for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "confection-0.1.3-py3-none-any.whl", hash = "sha256:58b125c9bc6786f32e37fe4d98bc3a03e5f509a4b9de02541b99c559f2026092"}, + {file = "confection-0.1.3.tar.gz", hash = "sha256:5a876d368a7698eec58791126757a75a3df16e26cc49653b52426e9ffd39f12f"}, +] + +[package.dependencies] +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<3.0.0" +srsly = ">=2.4.0,<3.0.0" +typing-extensions = {version = ">=3.7.4.1,<4.5.0", markers = "python_version < \"3.8\""} + +[[package]] +name = "configobj" +version = "5.0.8" +description = "Config file reading, writing and validation." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "configobj-5.0.8-py2.py3-none-any.whl", hash = "sha256:a7a8c6ab7daade85c3f329931a807c8aee750a2494363934f8ea84d8a54c87ea"}, + {file = "configobj-5.0.8.tar.gz", hash = "sha256:6f704434a07dc4f4dc7c9a745172c1cad449feb548febd9f7fe362629c627a97"}, +] + +[package.dependencies] +six = "*" + +[[package]] +name = "coverage" +version = "6.5.0" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "coverage-6.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ef8674b0ee8cc11e2d574e3e2998aea5df5ab242e012286824ea3c6970580e53"}, + {file = "coverage-6.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:784f53ebc9f3fd0e2a3f6a78b2be1bd1f5575d7863e10c6e12504f240fd06660"}, + {file = "coverage-6.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4a5be1748d538a710f87542f22c2cad22f80545a847ad91ce45e77417293eb4"}, + {file = "coverage-6.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83516205e254a0cb77d2d7bb3632ee019d93d9f4005de31dca0a8c3667d5bc04"}, + {file = "coverage-6.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af4fffaffc4067232253715065e30c5a7ec6faac36f8fc8d6f64263b15f74db0"}, + {file = "coverage-6.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:97117225cdd992a9c2a5515db1f66b59db634f59d0679ca1fa3fe8da32749cae"}, + {file = "coverage-6.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a1170fa54185845505fbfa672f1c1ab175446c887cce8212c44149581cf2d466"}, + {file = "coverage-6.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:11b990d520ea75e7ee8dcab5bc908072aaada194a794db9f6d7d5cfd19661e5a"}, + {file = "coverage-6.5.0-cp310-cp310-win32.whl", hash = "sha256:5dbec3b9095749390c09ab7c89d314727f18800060d8d24e87f01fb9cfb40b32"}, + {file = "coverage-6.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:59f53f1dc5b656cafb1badd0feb428c1e7bc19b867479ff72f7a9dd9b479f10e"}, + {file = "coverage-6.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4a5375e28c5191ac38cca59b38edd33ef4cc914732c916f2929029b4bfb50795"}, + {file = "coverage-6.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4ed2820d919351f4167e52425e096af41bfabacb1857186c1ea32ff9983ed75"}, + {file = "coverage-6.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:33a7da4376d5977fbf0a8ed91c4dffaaa8dbf0ddbf4c8eea500a2486d8bc4d7b"}, + {file = "coverage-6.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8fb6cf131ac4070c9c5a3e21de0f7dc5a0fbe8bc77c9456ced896c12fcdad91"}, + {file = "coverage-6.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a6b7d95969b8845250586f269e81e5dfdd8ff828ddeb8567a4a2eaa7313460c4"}, + {file = "coverage-6.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1ef221513e6f68b69ee9e159506d583d31aa3567e0ae84eaad9d6ec1107dddaa"}, + {file = "coverage-6.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cca4435eebea7962a52bdb216dec27215d0df64cf27fc1dd538415f5d2b9da6b"}, + {file = "coverage-6.5.0-cp311-cp311-win32.whl", hash = "sha256:98e8a10b7a314f454d9eff4216a9a94d143a7ee65018dd12442e898ee2310578"}, + {file = "coverage-6.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:bc8ef5e043a2af066fa8cbfc6e708d58017024dc4345a1f9757b329a249f041b"}, + {file = "coverage-6.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4433b90fae13f86fafff0b326453dd42fc9a639a0d9e4eec4d366436d1a41b6d"}, + {file = "coverage-6.5.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4f05d88d9a80ad3cac6244d36dd89a3c00abc16371769f1340101d3cb899fc3"}, + {file = "coverage-6.5.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:94e2565443291bd778421856bc975d351738963071e9b8839ca1fc08b42d4bef"}, + {file = "coverage-6.5.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:027018943386e7b942fa832372ebc120155fd970837489896099f5cfa2890f79"}, + {file = "coverage-6.5.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:255758a1e3b61db372ec2736c8e2a1fdfaf563977eedbdf131de003ca5779b7d"}, + {file = "coverage-6.5.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:851cf4ff24062c6aec510a454b2584f6e998cada52d4cb58c5e233d07172e50c"}, + {file = "coverage-6.5.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:12adf310e4aafddc58afdb04d686795f33f4d7a6fa67a7a9d4ce7d6ae24d949f"}, + {file = "coverage-6.5.0-cp37-cp37m-win32.whl", hash = "sha256:b5604380f3415ba69de87a289a2b56687faa4fe04dbee0754bfcae433489316b"}, + {file = "coverage-6.5.0-cp37-cp37m-win_amd64.whl", hash = "sha256:4a8dbc1f0fbb2ae3de73eb0bdbb914180c7abfbf258e90b311dcd4f585d44bd2"}, + {file = "coverage-6.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d900bb429fdfd7f511f868cedd03a6bbb142f3f9118c09b99ef8dc9bf9643c3c"}, + {file = "coverage-6.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2198ea6fc548de52adc826f62cb18554caedfb1d26548c1b7c88d8f7faa8f6ba"}, + {file = "coverage-6.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c4459b3de97b75e3bd6b7d4b7f0db13f17f504f3d13e2a7c623786289dd670e"}, + {file = "coverage-6.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:20c8ac5386253717e5ccc827caad43ed66fea0efe255727b1053a8154d952398"}, + {file = "coverage-6.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b07130585d54fe8dff3d97b93b0e20290de974dc8177c320aeaf23459219c0b"}, + {file = "coverage-6.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:dbdb91cd8c048c2b09eb17713b0c12a54fbd587d79adcebad543bc0cd9a3410b"}, + {file = "coverage-6.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:de3001a203182842a4630e7b8d1a2c7c07ec1b45d3084a83d5d227a3806f530f"}, + {file = "coverage-6.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e07f4a4a9b41583d6eabec04f8b68076ab3cd44c20bd29332c6572dda36f372e"}, + {file = "coverage-6.5.0-cp38-cp38-win32.whl", hash = "sha256:6d4817234349a80dbf03640cec6109cd90cba068330703fa65ddf56b60223a6d"}, + {file = "coverage-6.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:7ccf362abd726b0410bf8911c31fbf97f09f8f1061f8c1cf03dfc4b6372848f6"}, + {file = "coverage-6.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:633713d70ad6bfc49b34ead4060531658dc6dfc9b3eb7d8a716d5873377ab745"}, + {file = "coverage-6.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:95203854f974e07af96358c0b261f1048d8e1083f2de9b1c565e1be4a3a48cfc"}, + {file = "coverage-6.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9023e237f4c02ff739581ef35969c3739445fb059b060ca51771e69101efffe"}, + {file = "coverage-6.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:265de0fa6778d07de30bcf4d9dc471c3dc4314a23a3c6603d356a3c9abc2dfcf"}, + {file = "coverage-6.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f830ed581b45b82451a40faabb89c84e1a998124ee4212d440e9c6cf70083e5"}, + {file = "coverage-6.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7b6be138d61e458e18d8e6ddcddd36dd96215edfe5f1168de0b1b32635839b62"}, + {file = "coverage-6.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:42eafe6778551cf006a7c43153af1211c3aaab658d4d66fa5fcc021613d02518"}, + {file = "coverage-6.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:723e8130d4ecc8f56e9a611e73b31219595baa3bb252d539206f7bbbab6ffc1f"}, + {file = "coverage-6.5.0-cp39-cp39-win32.whl", hash = "sha256:d9ecf0829c6a62b9b573c7bb6d4dcd6ba8b6f80be9ba4fc7ed50bf4ac9aecd72"}, + {file = "coverage-6.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:fc2af30ed0d5ae0b1abdb4ebdce598eafd5b35397d4d75deb341a614d333d987"}, + {file = "coverage-6.5.0-pp36.pp37.pp38-none-any.whl", hash = "sha256:1431986dac3923c5945271f169f59c45b8802a114c8f548d611f2015133df77a"}, + {file = "coverage-6.5.0.tar.gz", hash = "sha256:f642e90754ee3e06b0e7e51bce3379590e76b7f76b708e1a71ff043f87025c84"}, +] + +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "cryptography" +version = "41.0.5" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +optional = false +python-versions = ">=3.7" +files = [ + {file = "cryptography-41.0.5-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:da6a0ff8f1016ccc7477e6339e1d50ce5f59b88905585f77193ebd5068f1e797"}, + {file = "cryptography-41.0.5-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:b948e09fe5fb18517d99994184854ebd50b57248736fd4c720ad540560174ec5"}, + {file = "cryptography-41.0.5-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d38e6031e113b7421db1de0c1b1f7739564a88f1684c6b89234fbf6c11b75147"}, + {file = "cryptography-41.0.5-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e270c04f4d9b5671ebcc792b3ba5d4488bf7c42c3c241a3748e2599776f29696"}, + {file = "cryptography-41.0.5-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ec3b055ff8f1dce8e6ef28f626e0972981475173d7973d63f271b29c8a2897da"}, + {file = "cryptography-41.0.5-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:7d208c21e47940369accfc9e85f0de7693d9a5d843c2509b3846b2db170dfd20"}, + {file = "cryptography-41.0.5-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:8254962e6ba1f4d2090c44daf50a547cd5f0bf446dc658a8e5f8156cae0d8548"}, + {file = "cryptography-41.0.5-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:a48e74dad1fb349f3dc1d449ed88e0017d792997a7ad2ec9587ed17405667e6d"}, + {file = "cryptography-41.0.5-cp37-abi3-win32.whl", hash = "sha256:d3977f0e276f6f5bf245c403156673db103283266601405376f075c849a0b936"}, + {file = "cryptography-41.0.5-cp37-abi3-win_amd64.whl", hash = "sha256:73801ac9736741f220e20435f84ecec75ed70eda90f781a148f1bad546963d81"}, + {file = "cryptography-41.0.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3be3ca726e1572517d2bef99a818378bbcf7d7799d5372a46c79c29eb8d166c1"}, + {file = "cryptography-41.0.5-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e886098619d3815e0ad5790c973afeee2c0e6e04b4da90b88e6bd06e2a0b1b72"}, + {file = "cryptography-41.0.5-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:573eb7128cbca75f9157dcde974781209463ce56b5804983e11a1c462f0f4e88"}, + {file = "cryptography-41.0.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0c327cac00f082013c7c9fb6c46b7cc9fa3c288ca702c74773968173bda421bf"}, + {file = "cryptography-41.0.5-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:227ec057cd32a41c6651701abc0328135e472ed450f47c2766f23267b792a88e"}, + {file = "cryptography-41.0.5-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:22892cc830d8b2c89ea60148227631bb96a7da0c1b722f2aac8824b1b7c0b6b8"}, + {file = "cryptography-41.0.5-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:5a70187954ba7292c7876734183e810b728b4f3965fbe571421cb2434d279179"}, + {file = "cryptography-41.0.5-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:88417bff20162f635f24f849ab182b092697922088b477a7abd6664ddd82291d"}, + {file = "cryptography-41.0.5-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c707f7afd813478e2019ae32a7c49cd932dd60ab2d2a93e796f68236b7e1fbf1"}, + {file = "cryptography-41.0.5-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:580afc7b7216deeb87a098ef0674d6ee34ab55993140838b14c9b83312b37b86"}, + {file = "cryptography-41.0.5-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fba1e91467c65fe64a82c689dc6cf58151158993b13eb7a7f3f4b7f395636723"}, + {file = "cryptography-41.0.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0d2a6a598847c46e3e321a7aef8af1436f11c27f1254933746304ff014664d84"}, + {file = "cryptography-41.0.5.tar.gz", hash = "sha256:392cb88b597247177172e02da6b7a63deeff1937fa6fec3bbf902ebd75d97ec7"}, +] + +[package.dependencies] +cffi = ">=1.12" + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] +docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"] +nox = ["nox"] +pep8test = ["black", "check-sdist", "mypy", "ruff"] +sdist = ["build"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test-randomorder = ["pytest-randomly"] + +[[package]] +name = "cupy" +version = "11.6.0" +description = "CuPy: NumPy & SciPy for GPU" +optional = true +python-versions = ">=3.7" +files = [ + {file = "cupy-11.6.0.tar.gz", hash = "sha256:53dbb840072bb32d4bfbaa6bfa072365a30c98b1fcd1f43e48969071ad98f1a7"}, +] + +[package.dependencies] +fastrlock = ">=0.5" +numpy = ">=1.20,<1.27" + +[package.extras] +all = ["Cython (>=0.29.22,<3)", "optuna (>=2.0)", "scipy (>=1.6,<1.12)"] +stylecheck = ["autopep8 (==1.5.5)", "flake8 (==3.8.4)", "mypy (==0.950)", "pbr (==5.5.1)", "pycodestyle (==2.6.0)", "types-setuptools (==57.4.14)"] +test = ["hypothesis (>=6.37.2,<6.55.0)", "pytest (>=7.2)"] + +[[package]] +name = "cupy-cuda100" +version = "9.6.0" +description = "CuPy: NumPy & SciPy for GPU" +optional = true +python-versions = ">=3.6.0" +files = [ + {file = "cupy_cuda100-9.6.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:bd2252b13a845d664a13acf565c584c39c5cbf0511a55a2a7a22012ce32c5ae1"}, + {file = "cupy_cuda100-9.6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:cf06eca516d171c80316a9b214d5d6a6ad080b6da63241c0847fcbfdc9f260bb"}, + {file = "cupy_cuda100-9.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:b9dd5062054d171a15907c29d3c4383b38f77935a7a461571f4c35accb9f30c4"}, + {file = "cupy_cuda100-9.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8079be975652b93f4307f15d6ccf56dcb128c3394db49ed55fd646561733daf1"}, + {file = "cupy_cuda100-9.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:01902a9d06b266b6a2e7cf866257f5a9fa8b7a0d5f4ea64b7b99a01dcedf3606"}, + {file = "cupy_cuda100-9.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:9b7d4fc7bfb09378e9a601cf37e5ac48cafd12434a360ce0b832f20a1d18aaad"}, + {file = "cupy_cuda100-9.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:aa1e7ea4e90f115099a15c64eaad138ecc8531ff0707ae4c78870126dd589211"}, + {file = "cupy_cuda100-9.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:4578bdc28b31dc5f28e88e98cb0591570be2f7061940fa81631ffe40da1860c8"}, +] + +[package.dependencies] +fastrlock = ">=0.5" +numpy = ">=1.17,<1.24" + +[package.extras] +all = ["Cython (>=0.29.22,<3)", "optuna (>=2.0)", "scipy (>=1.4,<1.10)"] +jenkins = ["codecov", "coverage (<5)", "coveralls", "pytest (>=6.2)", "pytest-cov", "pytest-timeout"] +setup = ["Cython (>=0.29.22,<3)", "fastrlock (>=0.5)"] +stylecheck = ["autopep8 (==1.5.5)", "flake8 (==3.8.4)", "pbr (==5.5.1)", "pycodestyle (==2.6.0)"] +test = ["pytest (>=6.2)"] + +[[package]] +name = "cupy-cuda101" +version = "9.6.0" +description = "CuPy: NumPy & SciPy for GPU" +optional = true +python-versions = ">=3.6.0" +files = [ + {file = "cupy_cuda101-9.6.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:9b41d9b2b638a759a23555c849a4f12d4fa47a8631f0f981852941984d745ab6"}, + {file = "cupy_cuda101-9.6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:37d990500b1bd89e601fa408ba19a3ecd6a29159e9633dbfdc2d5e89041827e8"}, + {file = "cupy_cuda101-9.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:764c689b35a637e1b5887aebd2281f77c46f4ba97de4a427adf88c1f33caac49"}, + {file = "cupy_cuda101-9.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8bc286b1aa73436e7904bd6a47a2a4639cbe9fc8007fc8ea0e34a8bb34d34d52"}, + {file = "cupy_cuda101-9.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0a6ad4b304c751548b63ca18868e9fdc8b408a684fb46feec3edbe1bc576c3b9"}, + {file = "cupy_cuda101-9.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:ed37bd8cfff9201d64bb0293c577a37ead63057768938655cc72ed81092a84c0"}, + {file = "cupy_cuda101-9.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:97fe4754763bf514cd59e0df0feea34d884fcd96a8abb6ef0b00c6bfd202e170"}, + {file = "cupy_cuda101-9.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:27a32db3b318c06356ff56ec695f5fa52df3e0024c26bc30e09e6964c9168422"}, +] + +[package.dependencies] +fastrlock = ">=0.5" +numpy = ">=1.17,<1.24" + +[package.extras] +all = ["Cython (>=0.29.22,<3)", "optuna (>=2.0)", "scipy (>=1.4,<1.10)"] +jenkins = ["codecov", "coverage (<5)", "coveralls", "pytest (>=6.2)", "pytest-cov", "pytest-timeout"] +setup = ["Cython (>=0.29.22,<3)", "fastrlock (>=0.5)"] +stylecheck = ["autopep8 (==1.5.5)", "flake8 (==3.8.4)", "pbr (==5.5.1)", "pycodestyle (==2.6.0)"] +test = ["pytest (>=6.2)"] + +[[package]] +name = "cupy-cuda102" +version = "11.6.0" +description = "CuPy: NumPy & SciPy for GPU" +optional = true +python-versions = ">=3.7" +files = [ + {file = "cupy_cuda102-11.6.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:bf7db6d87983f2c932082a3f8357d051d6c96b08130163cc6d1a7e1d32f3740a"}, + {file = "cupy_cuda102-11.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:24c81c3fbdcd624b23a60a55141a0d474026b62a310693a7e38052d01852cb73"}, + {file = "cupy_cuda102-11.6.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:9adb9b13d93bd437769e9aa314f62199a22110b0eddc4e6a33a7aea6ff0b943d"}, + {file = "cupy_cuda102-11.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:8929520a5b04849fd6fdadb2174e80ea76aee548534d791b73cc27c79277dd34"}, + {file = "cupy_cuda102-11.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:fcb8e54f76b7a7a58a0f0e2d8310166d948778254e60b5d5cbfb012cecc847e7"}, + {file = "cupy_cuda102-11.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2e8b20d1d2207eb3a373d53887d9c4142135046365185ea0a59fda0baccfd4e3"}, + {file = "cupy_cuda102-11.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:b276fffded58073cfa18396bc6e915de1abf3c54bd2e69aab96158bf36aca313"}, + {file = "cupy_cuda102-11.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:2555ba27ea6d249c76d3062e479f20ffd0cf54f6c89de8f7c06373550d60f0d4"}, + {file = "cupy_cuda102-11.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:e6feae926a8ff3fda54074f595015b4544b8549b821d8270f171a0ea0d041196"}, + {file = "cupy_cuda102-11.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:2971ac5b294aefe962751756ff08fe0b5342a9c2ffe558e0528a7b7ce4ddc7d8"}, +] + +[package.dependencies] +fastrlock = ">=0.5" +numpy = ">=1.20,<1.27" + +[package.extras] +all = ["Cython (>=0.29.22,<3)", "optuna (>=2.0)", "scipy (>=1.6,<1.12)"] +stylecheck = ["autopep8 (==1.5.5)", "flake8 (==3.8.4)", "mypy (==0.950)", "pbr (==5.5.1)", "pycodestyle (==2.6.0)", "types-setuptools (==57.4.14)"] +test = ["hypothesis (>=6.37.2,<6.55.0)", "pytest (>=7.2)"] + +[[package]] +name = "cupy-cuda110" +version = "11.6.0" +description = "CuPy: NumPy & SciPy for GPU" +optional = true +python-versions = ">=3.7" +files = [ + {file = "cupy_cuda110-11.6.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:c7661cd401870259276ca7233d297755eea3c67a8cfcbc0f03b933bb5caad6f4"}, + {file = "cupy_cuda110-11.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:de1e5a08c6e10b63a0514a9721df500ca1553ad58e97671b2f2058a4caf73909"}, + {file = "cupy_cuda110-11.6.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:bb49c44951969d7c7a8c6d4bb22a5a264a0d6d1e08b51f2433ac7e0e843d3a79"}, + {file = "cupy_cuda110-11.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:27232a9db619aaaa63e06c8885438b56ed8419b79832906975cc23539718c897"}, + {file = "cupy_cuda110-11.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:7ff3439636fbdb7b17189703e85e8bf2f87558f5e2d83a195c1d87146c19c851"}, + {file = "cupy_cuda110-11.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:db0d5523117fe996632c61ddf9e1e5dd7c50db862feec5a11481db64679edc8c"}, + {file = "cupy_cuda110-11.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:fe49e000ed81fdafd4c6cb89d411f68fa9e18bad45907d8e0f9a82a35957a320"}, + {file = "cupy_cuda110-11.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:6f73ac134c1ab2343423aaf7ddb16c4c2540d501a5a9c446b30802e8b7a1e86e"}, + {file = "cupy_cuda110-11.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:32952b79110183b24fab1e01f937f9e899122fb12622412efde5f9650ba09799"}, + {file = "cupy_cuda110-11.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:0bb7384dbfb2352c2d26a28995f0954297729398c6eb02820d0ab20979576885"}, +] + +[package.dependencies] +fastrlock = ">=0.5" +numpy = ">=1.20,<1.27" + +[package.extras] +all = ["Cython (>=0.29.22,<3)", "optuna (>=2.0)", "scipy (>=1.6,<1.12)"] +stylecheck = ["autopep8 (==1.5.5)", "flake8 (==3.8.4)", "mypy (==0.950)", "pbr (==5.5.1)", "pycodestyle (==2.6.0)", "types-setuptools (==57.4.14)"] +test = ["hypothesis (>=6.37.2,<6.55.0)", "pytest (>=7.2)"] + +[[package]] +name = "cupy-cuda111" +version = "10.6.0" +description = "CuPy: NumPy & SciPy for GPU" +optional = true +python-versions = ">=3.7" +files = [ + {file = "cupy_cuda111-10.6.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:f89c7c476fc11533a25873489aed9e93ff8eac87203b8da7b5f85a20de54d58a"}, + {file = "cupy_cuda111-10.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:2e2a3a8ec54a563d97658ae4d61fbcb1c92be17d9939bc51608fdd96f380f519"}, + {file = "cupy_cuda111-10.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:8998a5e29fe7169c8b25331ddab8863234091c797bde55c000e73e2eb167f3fa"}, + {file = "cupy_cuda111-10.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:a9816799733f10479b34be03a38451cd3be6d56b8ba6a43e9506a42089c424a9"}, + {file = "cupy_cuda111-10.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:3a0ac219970058efe530c1e05c8ca8974524e2cc3f3de0a73fafd8883e30244b"}, + {file = "cupy_cuda111-10.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:90b7e3865145a6e7eb074f85d2b98a4b12350d758ad948cb240e0be09b24a577"}, + {file = "cupy_cuda111-10.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:f2b357f9754190eea42600c3b55e34b6568acca17904ea856a24cefdbbaa1035"}, + {file = "cupy_cuda111-10.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:0d8a44629e0b885743e59f0ac184565d9e0a0944e7cf23374c4513e62159e823"}, +] + +[package.dependencies] +fastrlock = ">=0.5" +numpy = ">=1.18,<1.25" + +[package.extras] +all = ["Cython (>=0.29.22,<3)", "optuna (>=2.0)", "scipy (>=1.4,<1.11)"] +stylecheck = ["autopep8 (==1.5.5)", "flake8 (==3.8.4)", "mypy (==0.950)", "pbr (==5.5.1)", "pycodestyle (==2.6.0)", "types-setuptools (==57.4.14)"] +test = ["hypothesis (>=6.37.2)", "pytest (>=6.2)"] + +[[package]] +name = "cupy-cuda112" +version = "10.6.0" +description = "CuPy: NumPy & SciPy for GPU" +optional = true +python-versions = ">=3.7" +files = [ + {file = "cupy_cuda112-10.6.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:c811a26534f410e913af1f7501dc0161ab5eabc8a10bdd29fc72d967d45e6a57"}, + {file = "cupy_cuda112-10.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:5d5adc75ea17b3ae09734f4bb4668501c66df4d4379f60aa32f1eff48a2021b3"}, + {file = "cupy_cuda112-10.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:bc2ce267e54ae58a8942f6d652a5d7716b24aa13c82e7a333757f534721b48c4"}, + {file = "cupy_cuda112-10.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:fda0dff30224ae42023c5f19b0c2b914f85952489a19658d08974ab18a020a48"}, + {file = "cupy_cuda112-10.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:a884af6cb594e89b0669964b47e1ae6fb13ae6d8c580db3426d08ee2ca84ecd4"}, + {file = "cupy_cuda112-10.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:3fd73efc6585c894c413bc08991ad2a4a0977cd72b149dae6d43d9ceba6226b7"}, + {file = "cupy_cuda112-10.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:f63a97eabbf01a24269fa77991f64456705abc7a245fa44e4c8be1b777ee5dc1"}, + {file = "cupy_cuda112-10.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:caef546e3cf8399636657a15f9db086bbc47ea1e9bbf593f9ad356d4aadf115c"}, +] + +[package.dependencies] +fastrlock = ">=0.5" +numpy = ">=1.18,<1.25" + +[package.extras] +all = ["Cython (>=0.29.22,<3)", "optuna (>=2.0)", "scipy (>=1.4,<1.11)"] +stylecheck = ["autopep8 (==1.5.5)", "flake8 (==3.8.4)", "mypy (==0.950)", "pbr (==5.5.1)", "pycodestyle (==2.6.0)", "types-setuptools (==57.4.14)"] +test = ["hypothesis (>=6.37.2)", "pytest (>=6.2)"] + +[[package]] +name = "cupy-cuda113" +version = "10.6.0" +description = "CuPy: NumPy & SciPy for GPU" +optional = true +python-versions = ">=3.7" +files = [ + {file = "cupy_cuda113-10.6.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:27e5efe2c3afa80ff48654cb27f9e0eddb36f8b26ef0d32d3ba0a233e1359b51"}, + {file = "cupy_cuda113-10.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:8b96076d1ddd33fdb2c908ed0f8109caf69d37d36f839a8a8cdae1312508336f"}, + {file = "cupy_cuda113-10.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:22363c2863727cae5154aa4bab9e8a648d7fe66c9e2195d81dd4e8693c2e61ce"}, + {file = "cupy_cuda113-10.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8cc69b9d5735372477a7af3822c8f8e996ffe6de05cfc917500af9dc0117ca3e"}, + {file = "cupy_cuda113-10.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:10dc6899577e445426d81f0960ba9059d9aaa750426997c61fad882d6345264c"}, + {file = "cupy_cuda113-10.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:c6893ac9040a11610e63973063dfd715dbda8bd07ef99951bab7a09c7f335e1e"}, + {file = "cupy_cuda113-10.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:4bf4bc06d991c06b95f6fe558d117cafd93bd4eeaf80606f18dd31d20d2eff25"}, + {file = "cupy_cuda113-10.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:3745fc42dca86ba8a1109ddc7964aed8e1efc0ce8085cb2f140dcd6429f26354"}, +] + +[package.dependencies] +fastrlock = ">=0.5" +numpy = ">=1.18,<1.25" + +[package.extras] +all = ["Cython (>=0.29.22,<3)", "optuna (>=2.0)", "scipy (>=1.4,<1.11)"] +stylecheck = ["autopep8 (==1.5.5)", "flake8 (==3.8.4)", "mypy (==0.950)", "pbr (==5.5.1)", "pycodestyle (==2.6.0)", "types-setuptools (==57.4.14)"] +test = ["hypothesis (>=6.37.2)", "pytest (>=6.2)"] + +[[package]] +name = "cupy-cuda114" +version = "10.6.0" +description = "CuPy: NumPy & SciPy for GPU" +optional = true +python-versions = ">=3.7" +files = [ + {file = "cupy_cuda114-10.6.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:039e53c7269a1fb188140f945af8fb82c1dfd126de41f65af1d4f4604fbbcf19"}, + {file = "cupy_cuda114-10.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:5251c2f561f785c7693aeb062b206779fc7c959930d167f0671a24bb09a9a6ff"}, + {file = "cupy_cuda114-10.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:288b04bf7b7e5aa5480c7ecb3905c36e7ed61c7dab89ee82c099c3c03429335a"}, + {file = "cupy_cuda114-10.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:9f17132d9c7d8cd5470343309da23f6d51dec1484a3eef123febc5ae1602b0d4"}, + {file = "cupy_cuda114-10.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:2db41e7788ecc6c4eaa78f10808a5e1cd17ab5ac82fd56b69f37150f50b2ac1a"}, + {file = "cupy_cuda114-10.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:61015d9ea498df21ca7387e9180c171cf9a6ba9071bdc31c8088d78f97aa4f47"}, + {file = "cupy_cuda114-10.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:44ade90ffc5cd46a27507da82f9044dc59580e8d4798755c73b02f5d6440873b"}, + {file = "cupy_cuda114-10.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:2fb25ae4177995bb30c7844092702d2126574353898196d579748f0b61715365"}, +] + +[package.dependencies] +fastrlock = ">=0.5" +numpy = ">=1.18,<1.25" + +[package.extras] +all = ["Cython (>=0.29.22,<3)", "optuna (>=2.0)", "scipy (>=1.4,<1.11)"] +stylecheck = ["autopep8 (==1.5.5)", "flake8 (==3.8.4)", "mypy (==0.950)", "pbr (==5.5.1)", "pycodestyle (==2.6.0)", "types-setuptools (==57.4.14)"] +test = ["hypothesis (>=6.37.2)", "pytest (>=6.2)"] + +[[package]] +name = "cupy-cuda115" +version = "10.6.0" +description = "CuPy: NumPy & SciPy for GPU" +optional = true +python-versions = ">=3.7" +files = [ + {file = "cupy_cuda115-10.6.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:49d6826e87fe4a349d984a5dd8319e375df9cdc40ea9c9c22fa07fca69221fea"}, + {file = "cupy_cuda115-10.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:09ca4b383260568484a9cea37c61dd15336bd551ddd3e85630920bfb92c72df1"}, + {file = "cupy_cuda115-10.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:2247d5951f1d94aa007a42755add93678a4eb74db8bd1758e27b144a68983055"}, + {file = "cupy_cuda115-10.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8a580ab452495d4dc006aff7fae8e94540925b3ee57576ed4bc461fe4564fbbd"}, + {file = "cupy_cuda115-10.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cf22c55f3eba48a1dd3d5f6863bf08ade32d377bc906a22e32178c3f9ff0ae80"}, + {file = "cupy_cuda115-10.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:61cc5124dd4325959cd7ddb0664e8b572b323db07c579962227833d6e3da3608"}, + {file = "cupy_cuda115-10.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:2d984552ed184f90b3f648924871d1bd3a9f5b63a78f12d7d145a24407443e87"}, + {file = "cupy_cuda115-10.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:f24a53ac1794c1d41b8c2283011c1366288a488920d6aab6a906f44cc02e145e"}, +] + +[package.dependencies] +fastrlock = ">=0.5" +numpy = ">=1.18,<1.25" + +[package.extras] +all = ["Cython (>=0.29.22,<3)", "optuna (>=2.0)", "scipy (>=1.4,<1.11)"] +stylecheck = ["autopep8 (==1.5.5)", "flake8 (==3.8.4)", "mypy (==0.950)", "pbr (==5.5.1)", "pycodestyle (==2.6.0)", "types-setuptools (==57.4.14)"] +test = ["hypothesis (>=6.37.2)", "pytest (>=6.2)"] + +[[package]] +name = "cupy-cuda116" +version = "10.6.0" +description = "CuPy: NumPy & SciPy for GPU" +optional = true +python-versions = ">=3.7" +files = [ + {file = "cupy_cuda116-10.6.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:08e1ce4e96bab3d2d7028aec6a13da9e3fced8fb6523ae8f45734a4d2271fd9a"}, + {file = "cupy_cuda116-10.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:2286ed8990b4e562fc68b10c037941f14bd9b463ba4b05309f1f2b040d45c756"}, + {file = "cupy_cuda116-10.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:dc86583dc3e722996a95c00bdfd53b7b6e79a72d9ebdfecd38795a4915e3f6d1"}, + {file = "cupy_cuda116-10.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:fb1116168c31c4af0adbff88c6242d5ec379ba699824b041f6fd6196f5a1e8fb"}, + {file = "cupy_cuda116-10.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5d100854febe2d41d8b347289417cfff28b4dcd241e28883ad8371dd98ff16e3"}, + {file = "cupy_cuda116-10.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:2fb8a5e6cc0c8690e1fed2a3a5b6f94c6a9463b6c2c4039e6d73dca1927deef5"}, + {file = "cupy_cuda116-10.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:1e459f2c912c71f336698f378916fb82fc35e4ae10fd2221e9c42975ef41956f"}, + {file = "cupy_cuda116-10.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:111b5f9ae199ea4deb9e1f20cb2c318149a7c0c53f1005c5d30a4a61aa204b17"}, +] + +[package.dependencies] +fastrlock = ">=0.5" +numpy = ">=1.18,<1.25" + +[package.extras] +all = ["Cython (>=0.29.22,<3)", "optuna (>=2.0)", "scipy (>=1.4,<1.11)"] +stylecheck = ["autopep8 (==1.5.5)", "flake8 (==3.8.4)", "mypy (==0.950)", "pbr (==5.5.1)", "pycodestyle (==2.6.0)", "types-setuptools (==57.4.14)"] +test = ["hypothesis (>=6.37.2)", "pytest (>=6.2)"] + +[[package]] +name = "cupy-cuda117" +version = "10.6.0" +description = "CuPy: NumPy & SciPy for GPU" +optional = true +python-versions = ">=3.7" +files = [ + {file = "cupy_cuda117-10.6.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:23cffa50a8d756dd41b842d450252bf18295d8fe185803295c8e34fabb317ee3"}, + {file = "cupy_cuda117-10.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:1ead21cc1d0e3938f67531d8be94c6e1142ed246f7e67e6eded2155afd6a84c4"}, + {file = "cupy_cuda117-10.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:7b1a59a41b6771e49404ff1770b20c1c0878454992c201f6e84216c0167a68fd"}, + {file = "cupy_cuda117-10.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:29e1545c7ebbe02f5ea1cc407444f1b0778836e2afc6392247a9c048953646fd"}, + {file = "cupy_cuda117-10.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:4736794f258fe810c51b649c8a57faac5b8fdecbd47f1f81113a4b54fbc4af84"}, + {file = "cupy_cuda117-10.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:37b5784cf5cf1008d69ea8d4d443383f786715dbf305ef359ec7689b34a6689b"}, + {file = "cupy_cuda117-10.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:a3ddaab1de49ade9ccd6fd749364fd945a026b582732f5c083f7dbbabffc1578"}, + {file = "cupy_cuda117-10.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:072aab906d0772172696ac8a8338c8d02f322d61e55b77ee02d403032ad2bc5f"}, +] + +[package.dependencies] +fastrlock = ">=0.5" +numpy = ">=1.18,<1.25" + +[package.extras] +all = ["Cython (>=0.29.22,<3)", "optuna (>=2.0)", "scipy (>=1.4,<1.11)"] +stylecheck = ["autopep8 (==1.5.5)", "flake8 (==3.8.4)", "mypy (==0.950)", "pbr (==5.5.1)", "pycodestyle (==2.6.0)", "types-setuptools (==57.4.14)"] +test = ["hypothesis (>=6.37.2)", "pytest (>=6.2)"] + +[[package]] +name = "cupy-cuda80" +version = "7.8.0" +description = "CuPy: NumPy-like API accelerated with CUDA" +optional = true +python-versions = ">=3.5.0" +files = [ + {file = "cupy_cuda80-7.8.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:4efdbd04db2339126ac305cae9756f91444c047a6cf5d5ccd265bd49bf5922ba"}, + {file = "cupy_cuda80-7.8.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:71536358e8218481110db47f375d87adc1636754fb12af879282d09473e76186"}, + {file = "cupy_cuda80-7.8.0-cp36-cp36m-win_amd64.whl", hash = "sha256:41fa1c27bf700f2aa3d8387716a03ce1ebbf889e3984080211c671a3acfd8e1e"}, + {file = "cupy_cuda80-7.8.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:d2602d687a9e4ad32efe0f797efd544a8a57a7f68d5330a0f0bc6130b662adea"}, + {file = "cupy_cuda80-7.8.0-cp37-cp37m-win_amd64.whl", hash = "sha256:248d7bd759e6b1daa0948ca0a615a5347f6763cfc7f5e6f353b99214fd451610"}, + {file = "cupy_cuda80-7.8.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:86f36907f259c082b4b230ab6328b9a1b2e9dd0662b957f7090f5d3e308b7617"}, + {file = "cupy_cuda80-7.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:dad9c4ea275727df60af3f269326a8d495476b3251472379961a512155266596"}, +] + +[package.dependencies] +fastrlock = ">=0.3" +numpy = ">=1.9.0" +six = ">=1.9.0" + +[package.extras] +appveyor = ["attrs (<19.2.0)", "mock", "pytest (<4.2.0)"] +docs = ["sphinx (==3.0.4)", "sphinx-rtd-theme"] +doctest = ["matplotlib"] +jenkins = ["attrs (<19.2.0)", "codecov", "coveralls", "mock", "pytest (<4.2.0)", "pytest-cov", "pytest-timeout"] +setup = ["fastrlock (>=0.3)"] +stylecheck = ["autopep8 (==1.4.4)", "flake8 (==3.7.9)", "pbr (==4.0.4)", "pycodestyle (==2.5.0)"] +test = ["attrs (<19.2.0)", "mock", "pytest (<4.2.0)"] +travis = ["autopep8 (==1.4.4)", "flake8 (==3.7.9)", "pbr (==4.0.4)", "pycodestyle (==2.5.0)", "sphinx (==3.0.4)", "sphinx-rtd-theme"] + +[[package]] +name = "cupy-cuda90" +version = "8.6.0" +description = "CuPy: A NumPy-compatible array library accelerated by CUDA" +optional = true +python-versions = ">=3.5.0" +files = [ + {file = "cupy_cuda90-8.6.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:fa2c9ee21d9a23029cfeabcdbfeae836c563d65e78cb08a76e46986300e9e907"}, + {file = "cupy_cuda90-8.6.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:ac6e2601b40b0237403c04075284e9574337113a1a74736d5ed3ca4c52bdb96a"}, + {file = "cupy_cuda90-8.6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:99772ed8d9abee8a6781521e17610b67e24bf0db7e2e8cb8d1238f0d9f444412"}, + {file = "cupy_cuda90-8.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:00e5eb6a7de4917b5089b552e5d05113c1bcb3e8ce6bf074e711fb4c0097e843"}, + {file = "cupy_cuda90-8.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09106bc2311a502a6aad05d768b000ffac80ccec9aa13db145b77b1b70207a1b"}, + {file = "cupy_cuda90-8.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:a97bd932e89517fcfc2888f8b626eef83d6c8a9d520b589f610d146eaee236f8"}, + {file = "cupy_cuda90-8.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:f3d86cf1f485e02ace26827c6c552630854284ca7d96219c7ea2e18f6bdbd6c6"}, + {file = "cupy_cuda90-8.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:48988bb51560a2498a1256fa1744561e9cb9d1384c2b88ab56bc499dc786afc0"}, + {file = "cupy_cuda90-8.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:d207b53b6e9e4d396795abc0e5cf0ffe46610174bb9640d4a9cfb9dd130e9ee3"}, +] + +[package.dependencies] +fastrlock = ">=0.3" +numpy = ">=1.15" + +[package.extras] +jenkins = ["attrs (<19.2.0)", "codecov", "coverage (<5)", "coveralls", "pytest (<4.2.0)", "pytest-cov (<2.10)", "pytest-timeout"] +setup = ["Cython (>=0.29.22)", "fastrlock (>=0.3)"] +stylecheck = ["autopep8 (==1.5.5)", "flake8 (==3.8.4)", "pbr (==5.5.1)", "pycodestyle (==2.6.0)"] +test = ["attrs (<19.2.0)", "pytest (<4.2.0)"] + +[[package]] +name = "cupy-cuda91" +version = "7.8.0" +description = "CuPy: NumPy-like API accelerated with CUDA" +optional = true +python-versions = ">=3.5.0" +files = [ + {file = "cupy_cuda91-7.8.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:dfb60b10a7d47090327e815934dc1f94b495fca1737cfc31787f69a8100de3ea"}, + {file = "cupy_cuda91-7.8.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:b6c75c6009ed509b1b6d51fe80e92f95fd170587f478622a62eca4a5e14051cb"}, + {file = "cupy_cuda91-7.8.0-cp36-cp36m-win_amd64.whl", hash = "sha256:db297b15085d715759251a85e8640fa44b397118c23f63d20009d1f73a4ebd08"}, + {file = "cupy_cuda91-7.8.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:95360ece88a6f448022962b59a44d36e242c125eaf8d23534476f5a79f9f91b4"}, + {file = "cupy_cuda91-7.8.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c976609d78ab90f0f0f0fe2b043f93a00f690f3617bf571d0826465836f15920"}, + {file = "cupy_cuda91-7.8.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:16ebdde7d11795bcf0b862496b87a5a11ef7d86894078a8a065bb6ae72ce3a15"}, + {file = "cupy_cuda91-7.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:7f24901760b489397cec0eb9beb09edde470c2157622896ed84c1342f0eef29c"}, +] + +[package.dependencies] +fastrlock = ">=0.3" +numpy = ">=1.9.0" +six = ">=1.9.0" + +[package.extras] +appveyor = ["attrs (<19.2.0)", "mock", "pytest (<4.2.0)"] +docs = ["sphinx (==3.0.4)", "sphinx-rtd-theme"] +doctest = ["matplotlib"] +jenkins = ["attrs (<19.2.0)", "codecov", "coveralls", "mock", "pytest (<4.2.0)", "pytest-cov", "pytest-timeout"] +setup = ["fastrlock (>=0.3)"] +stylecheck = ["autopep8 (==1.4.4)", "flake8 (==3.7.9)", "pbr (==4.0.4)", "pycodestyle (==2.5.0)"] +test = ["attrs (<19.2.0)", "mock", "pytest (<4.2.0)"] +travis = ["autopep8 (==1.4.4)", "flake8 (==3.7.9)", "pbr (==4.0.4)", "pycodestyle (==2.5.0)", "sphinx (==3.0.4)", "sphinx-rtd-theme"] + +[[package]] +name = "cupy-cuda92" +version = "8.6.0" +description = "CuPy: A NumPy-compatible array library accelerated by CUDA" +optional = true +python-versions = ">=3.5.0" +files = [ + {file = "cupy_cuda92-8.6.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:93de0d7a5a7e82326e3a6d54fcc031464e5c514b3683db3e3f47cd1e0995dec9"}, + {file = "cupy_cuda92-8.6.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:6acfc0b7f1b29b1cc5cb2d97c893d57c0f16208692174ec060d4162938fa9973"}, + {file = "cupy_cuda92-8.6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:c1c6a210f8978ee54b30f30fb4eecf9dd60d84788ab706d9120aae4caa8578a4"}, + {file = "cupy_cuda92-8.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:52c214d94cfc33b9513615f62877d1e235718c52c32178c645dcfbf72be4cd7b"}, + {file = "cupy_cuda92-8.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:4df33964d2064da47fa5c3cdc27607cd6559a82baab098da5e11d61d5e1085a2"}, + {file = "cupy_cuda92-8.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:bf3c5a5357ad96c2f669f27c952629550fdb162ee72f066214c9cdfd1a8e096b"}, + {file = "cupy_cuda92-8.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:60a91cac6276a8229be642b85a2f48ce7ab2412713f25417d6c2c6bb940ed028"}, + {file = "cupy_cuda92-8.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:def44e69633bf7fbd279eb914f721852d0715701e62fced993fd1e2ecc0937e0"}, + {file = "cupy_cuda92-8.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:99f0ec8807115ad4473294d5f3adcc9f9cc6c906b212212ced802a07517dff09"}, +] + +[package.dependencies] +fastrlock = ">=0.3" +numpy = ">=1.15" + +[package.extras] +jenkins = ["attrs (<19.2.0)", "codecov", "coverage (<5)", "coveralls", "pytest (<4.2.0)", "pytest-cov (<2.10)", "pytest-timeout"] +setup = ["Cython (>=0.29.22)", "fastrlock (>=0.3)"] +stylecheck = ["autopep8 (==1.5.5)", "flake8 (==3.8.4)", "pbr (==5.5.1)", "pycodestyle (==2.6.0)"] +test = ["attrs (<19.2.0)", "pytest (<4.2.0)"] + +[[package]] +name = "cymem" +version = "2.0.8" +description = "Manage calls to calloc/free through Cython" +optional = false +python-versions = "*" +files = [ + {file = "cymem-2.0.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:77b5d3a73c41a394efd5913ab7e48512054cd2dabb9582d489535456641c7666"}, + {file = "cymem-2.0.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bd33da892fb560ba85ea14b1528c381ff474048e861accc3366c8b491035a378"}, + {file = "cymem-2.0.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29a551eda23eebd6d076b855f77a5ed14a1d1cae5946f7b3cb5de502e21b39b0"}, + {file = "cymem-2.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8260445652ae5ab19fff6851f32969a7b774f309162e83367dd0f69aac5dbf7"}, + {file = "cymem-2.0.8-cp310-cp310-win_amd64.whl", hash = "sha256:a63a2bef4c7e0aec7c9908bca0a503bf91ac7ec18d41dd50dc7dff5d994e4387"}, + {file = "cymem-2.0.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6b84b780d52cb2db53d4494fe0083c4c5ee1f7b5380ceaea5b824569009ee5bd"}, + {file = "cymem-2.0.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0d5f83dc3cb5a39f0e32653cceb7c8ce0183d82f1162ca418356f4a8ed9e203e"}, + {file = "cymem-2.0.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ac218cf8a43a761dc6b2f14ae8d183aca2bbb85b60fe316fd6613693b2a7914"}, + {file = "cymem-2.0.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42c993589d1811ec665d37437d5677b8757f53afadd927bf8516ac8ce2d3a50c"}, + {file = "cymem-2.0.8-cp311-cp311-win_amd64.whl", hash = "sha256:ab3cf20e0eabee9b6025ceb0245dadd534a96710d43fb7a91a35e0b9e672ee44"}, + {file = "cymem-2.0.8-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cb51fddf1b920abb1f2742d1d385469bc7b4b8083e1cfa60255e19bc0900ccb5"}, + {file = "cymem-2.0.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9235957f8c6bc2574a6a506a1687164ad629d0b4451ded89d49ebfc61b52660c"}, + {file = "cymem-2.0.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2cc38930ff5409f8d61f69a01e39ecb185c175785a1c9bec13bcd3ac8a614ba"}, + {file = "cymem-2.0.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bf49e3ea2c441f7b7848d5c61b50803e8cbd49541a70bb41ad22fce76d87603"}, + {file = "cymem-2.0.8-cp312-cp312-win_amd64.whl", hash = "sha256:ecd12e3bacf3eed5486e4cd8ede3c12da66ee0e0a9d0ae046962bc2bb503acef"}, + {file = "cymem-2.0.8-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:167d8019db3b40308aabf8183fd3fbbc256323b645e0cbf2035301058c439cd0"}, + {file = "cymem-2.0.8-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17cd2c2791c8f6b52f269a756ba7463f75bf7265785388a2592623b84bb02bf8"}, + {file = "cymem-2.0.8-cp36-cp36m-win_amd64.whl", hash = "sha256:6204f0a3307bf45d109bf698ba37997ce765f21e359284328e4306c7500fcde8"}, + {file = "cymem-2.0.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b9c05db55ea338648f8e5f51dd596568c7f62c5ae32bf3fa5b1460117910ebae"}, + {file = "cymem-2.0.8-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ce641f7ba0489bd1b42a4335a36f38c8507daffc29a512681afaba94a0257d2"}, + {file = "cymem-2.0.8-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6b83a5972a64f62796118da79dfeed71f4e1e770b2b7455e889c909504c2358"}, + {file = "cymem-2.0.8-cp37-cp37m-win_amd64.whl", hash = "sha256:ada6eb022e4a0f4f11e6356a5d804ceaa917174e6cf33c0b3e371dbea4dd2601"}, + {file = "cymem-2.0.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1e593cd57e2e19eb50c7ddaf7e230b73c890227834425b9dadcd4a86834ef2ab"}, + {file = "cymem-2.0.8-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d513f0d5c6d76facdc605e42aa42c8d50bb7dedca3144ec2b47526381764deb0"}, + {file = "cymem-2.0.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e370dd54359101b125bfb191aca0542718077b4edb90ccccba1a28116640fed"}, + {file = "cymem-2.0.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84f8c58cde71b8fc7024883031a4eec66c0a9a4d36b7850c3065493652695156"}, + {file = "cymem-2.0.8-cp38-cp38-win_amd64.whl", hash = "sha256:6a6edddb30dd000a27987fcbc6f3c23b7fe1d74f539656952cb086288c0e4e29"}, + {file = "cymem-2.0.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b896c83c08dadafe8102a521f83b7369a9c5cc3e7768eca35875764f56703f4c"}, + {file = "cymem-2.0.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a4f8f2bfee34f6f38b206997727d29976666c89843c071a968add7d61a1e8024"}, + {file = "cymem-2.0.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7372e2820fa66fd47d3b135f3eb574ab015f90780c3a21cfd4809b54f23a4723"}, + {file = "cymem-2.0.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4e57bee56d35b90fc2cba93e75b2ce76feaca05251936e28a96cf812a1f5dda"}, + {file = "cymem-2.0.8-cp39-cp39-win_amd64.whl", hash = "sha256:ceeab3ce2a92c7f3b2d90854efb32cb203e78cb24c836a5a9a2cac221930303b"}, + {file = "cymem-2.0.8.tar.gz", hash = "sha256:8fb09d222e21dcf1c7e907dc85cf74501d4cea6c4ed4ac6c9e016f98fb59cbbf"}, +] + +[[package]] +name = "decorator" +version = "5.1.1" +description = "Decorators for Humans" +optional = false +python-versions = ">=3.5" +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] + +[[package]] +name = "dictdiffer" +version = "0.9.0" +description = "Dictdiffer is a library that helps you to diff and patch dictionaries." +optional = false +python-versions = "*" +files = [ + {file = "dictdiffer-0.9.0-py2.py3-none-any.whl", hash = "sha256:442bfc693cfcadaf46674575d2eba1c53b42f5e404218ca2c2ff549f2df56595"}, + {file = "dictdiffer-0.9.0.tar.gz", hash = "sha256:17bacf5fbfe613ccf1b6d512bd766e6b21fb798822a133aa86098b8ac9997578"}, +] + +[package.extras] +all = ["Sphinx (>=3)", "check-manifest (>=0.42)", "mock (>=1.3.0)", "numpy (>=1.13.0)", "numpy (>=1.15.0)", "numpy (>=1.18.0)", "numpy (>=1.20.0)", "pytest (==5.4.3)", "pytest (>=6)", "pytest-cov (>=2.10.1)", "pytest-isort (>=1.2.0)", "pytest-pycodestyle (>=2)", "pytest-pycodestyle (>=2.2.0)", "pytest-pydocstyle (>=2)", "pytest-pydocstyle (>=2.2.0)", "sphinx (>=3)", "sphinx-rtd-theme (>=0.2)", "tox (>=3.7.0)"] +docs = ["Sphinx (>=3)", "sphinx-rtd-theme (>=0.2)"] +numpy = ["numpy (>=1.13.0)", "numpy (>=1.15.0)", "numpy (>=1.18.0)", "numpy (>=1.20.0)"] +tests = ["check-manifest (>=0.42)", "mock (>=1.3.0)", "pytest (==5.4.3)", "pytest (>=6)", "pytest-cov (>=2.10.1)", "pytest-isort (>=1.2.0)", "pytest-pycodestyle (>=2)", "pytest-pycodestyle (>=2.2.0)", "pytest-pydocstyle (>=2)", "pytest-pydocstyle (>=2.2.0)", "sphinx (>=3)", "tox (>=3.7.0)"] + +[[package]] +name = "diskcache" +version = "5.6.3" +description = "Disk Cache -- Disk and file backed persistent cache." +optional = false +python-versions = ">=3" +files = [ + {file = "diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19"}, + {file = "diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc"}, +] + +[[package]] +name = "distlib" +version = "0.3.7" +description = "Distribution utilities" +optional = false +python-versions = "*" +files = [ + {file = "distlib-0.3.7-py2.py3-none-any.whl", hash = "sha256:2e24928bc811348f0feb63014e97aaae3037f2cf48712d51ae61df7fd6075057"}, + {file = "distlib-0.3.7.tar.gz", hash = "sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8"}, +] + +[[package]] +name = "distro" +version = "1.8.0" +description = "Distro - an OS platform information API" +optional = false +python-versions = ">=3.6" +files = [ + {file = "distro-1.8.0-py3-none-any.whl", hash = "sha256:99522ca3e365cac527b44bde033f64c6945d90eb9f769703caaec52b09bbd3ff"}, + {file = "distro-1.8.0.tar.gz", hash = "sha256:02e111d1dc6a50abb8eed6bf31c3e48ed8b0830d1ea2a1b78c61765c2513fdd8"}, +] + +[[package]] +name = "dpath" +version = "2.1.6" +description = "Filesystem-like pathing and searching for dictionaries" +optional = false +python-versions = ">=3.7" +files = [ + {file = "dpath-2.1.6-py3-none-any.whl", hash = "sha256:31407395b177ab63ef72e2f6ae268c15e938f2990a8ecf6510f5686c02b6db73"}, + {file = "dpath-2.1.6.tar.gz", hash = "sha256:f1e07c72e8605c6a9e80b64bc8f42714de08a789c7de417e49c3f87a19692e47"}, +] + +[[package]] +name = "dulwich" +version = "0.21.6" +description = "Python Git Library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "dulwich-0.21.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7f89bee4c97372e8aaf8ffaf5899f1bcd5184b5306d7eaf68738c1101ceba10e"}, + {file = "dulwich-0.21.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:847bb52562a211b596453a602e75739350c86d7edb846b5b1c46896a5c86b9bb"}, + {file = "dulwich-0.21.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4e09d0b4e985b371aa6728773781b19298d361a00772e20f98522868cf7edc6f"}, + {file = "dulwich-0.21.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dfb50b3915e223a97f50fbac0dbc298d5fffeaac004eeeb3d552c57fe38416f"}, + {file = "dulwich-0.21.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a64eca1601e79c16df78afe08da9ac9497b934cbc5765990ca7d89a4b87453d9"}, + {file = "dulwich-0.21.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1fedd924763a5d640348db43a267a394aa80d551228ad45708e0b0cc2130bb62"}, + {file = "dulwich-0.21.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:edc21c3784dd9d9b85abd9fe53f81a884e2cdcc4e5e09ada17287420d64cfd46"}, + {file = "dulwich-0.21.6-cp310-cp310-win32.whl", hash = "sha256:daa3584beabfcf0da76df57535a23c80ff6d8ccde6ddbd23bdc79d317a0e20a7"}, + {file = "dulwich-0.21.6-cp310-cp310-win_amd64.whl", hash = "sha256:40623cc39a3f1634663d22d87f86e2e406cc8ff17ae7a3edc7fcf963c288992f"}, + {file = "dulwich-0.21.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e8ed878553f0b76facbb620b455fafa0943162fe8e386920717781e490444efa"}, + {file = "dulwich-0.21.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a89b19f4960e759915dbc23a4dd0abc067b55d8d65e9df50961b73091b87b81a"}, + {file = "dulwich-0.21.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:28acbd08d6b38720d99cc01da9dd307a2e0585e00436c95bcac6357b9a9a6f76"}, + {file = "dulwich-0.21.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2f2683e0598f7c7071ef08a0822f062d8744549a0d45f2c156741033b7e3d7d"}, + {file = "dulwich-0.21.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54342cf96fe8a44648505c65f23d18889595762003a168d67d7263df66143bd2"}, + {file = "dulwich-0.21.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2a3fc071e5b14f164191286f7ffc02f60fe8b439d01fad0832697cc08c2237dd"}, + {file = "dulwich-0.21.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:32d7acfe3fe2ce4502446d8f7a5ab34cfd24c9ff8961e60337638410906a8fbb"}, + {file = "dulwich-0.21.6-cp311-cp311-win32.whl", hash = "sha256:5e58171a5d70f7910f73d25ff82a058edff09a4c1c3bd1de0dc6b1fbc9a42c3e"}, + {file = "dulwich-0.21.6-cp311-cp311-win_amd64.whl", hash = "sha256:ceabe8f96edfb9183034a860f5dc77586700b517457032867b64a03c44e5cf96"}, + {file = "dulwich-0.21.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4fdc2f081bc3e9e120079c2cea4be213e3f127335aca7c0ab0c19fe791270caa"}, + {file = "dulwich-0.21.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fe957564108f74325d0d042d85e0c67ef470921ca92b6e7d330c7c49a3b9c1d"}, + {file = "dulwich-0.21.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2912c8a845c8ccbc79d068a89db7172e355adeb84eb31f062cd3a406d528b30"}, + {file = "dulwich-0.21.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:81e237a6b1b20c79ef62ca19a8fb231f5519bab874b9a1c2acf9c05edcabd600"}, + {file = "dulwich-0.21.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:513d045e74307eeb31592255c38f37042c9aa68ce845a167943018ab5138b0e3"}, + {file = "dulwich-0.21.6-cp37-cp37m-win32.whl", hash = "sha256:e1ac882afa890ef993b8502647e6c6d2b3977ce56e3fe80058ce64607cbc7107"}, + {file = "dulwich-0.21.6-cp37-cp37m-win_amd64.whl", hash = "sha256:5d2ccf3d355850674f75655154a6519bf1f1664176c670109fa7041019b286f9"}, + {file = "dulwich-0.21.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:28c9724a167c84a83fc6238e0781f4702b5fe8c53ede31604525fb1a9d1833f4"}, + {file = "dulwich-0.21.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c816be529680659b6a19798287b4ec6de49040f58160d40b1b2934fd6c28e93f"}, + {file = "dulwich-0.21.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b0545f0fa9444a0eb84977d08e302e3f55fd7c34a0466ec28bedc3c839b2fc1f"}, + {file = "dulwich-0.21.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b1682e8e826471ea3c22b8521435e93799e3db8ad05dd3c8f9b1aaacfa78147"}, + {file = "dulwich-0.21.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24ad45928a65f39ea0f451f9989b7aaedba9893d48c3189b544a70c6a1043f71"}, + {file = "dulwich-0.21.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b1c9e55233f19cd19c484f607cd90ab578ac50ebfef607f77e3b35c2b6049470"}, + {file = "dulwich-0.21.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:18697b58e0fc5972de68b529b08ac9ddda3f39af27bcf3f6999635ed3da7ef68"}, + {file = "dulwich-0.21.6-cp38-cp38-win32.whl", hash = "sha256:22798e9ba59e32b8faff5d9067e2b5a308f6b0fba9b1e1e928571ad278e7b36c"}, + {file = "dulwich-0.21.6-cp38-cp38-win_amd64.whl", hash = "sha256:6c91e1ed20d3d9a6aaaed9e75adae37272b3fcbcc72bab1eb09574806da88563"}, + {file = "dulwich-0.21.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8b84450766a3b151c3676fec3e3ed76304e52a84d5d69ade0f34fff2782c1b41"}, + {file = "dulwich-0.21.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a3da632648ee27b64bb5b285a3a94fddf297a596891cca12ac0df43c4f59448f"}, + {file = "dulwich-0.21.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cef50c0a19f322b7150248b8fa0862ce1652dec657e340c4020573721e85f215"}, + {file = "dulwich-0.21.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ac20dfcfd6057efb8499158d23f2c059f933aefa381e192100e6d8bc25d562"}, + {file = "dulwich-0.21.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81d10aa50c0a9a6dd495990c639358e3a3bbff39e17ff302179be6e93b573da7"}, + {file = "dulwich-0.21.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a9b52a08d49731375662936d05a12c4a64a6fe0ce257111f62638e475fb5d26d"}, + {file = "dulwich-0.21.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ed2f1f638b9adfba862719693b371ffe5d58e94d552ace9a23dea0fb0db6f468"}, + {file = "dulwich-0.21.6-cp39-cp39-win32.whl", hash = "sha256:bf90f2f9328a82778cf85ab696e4a7926918c3f315c75fc432ba31346bfa89b7"}, + {file = "dulwich-0.21.6-cp39-cp39-win_amd64.whl", hash = "sha256:e0dee3840c3c72e1d60c8f87a7a715d8eac023b9e1b80199d97790f7a1c60d9c"}, + {file = "dulwich-0.21.6-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:32d3a35caad6879d04711b358b861142440a543f5f4e02df67b13cbcd57f84a6"}, + {file = "dulwich-0.21.6-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c04df87098053b7767b46fc04b7943d75443f91c73560ca50157cdc22e27a5d3"}, + {file = "dulwich-0.21.6-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e07f145c7b0d82a9f77d157f493a61900e913d1c1f8b1f40d07d919ffb0929a4"}, + {file = "dulwich-0.21.6-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:008ff08629ab16d3638a9f36cfc6f5bd74b4d594657f2dc1583d8d3201794571"}, + {file = "dulwich-0.21.6-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bf469cd5076623c2aad69d01ce9d5392fcb38a5faef91abe1501be733453e37d"}, + {file = "dulwich-0.21.6-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6592ef2d16ac61a27022647cf64a048f5be6e0a6ab2ebc7322bfbe24fb2b971b"}, + {file = "dulwich-0.21.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99577b2b37f64bc87280079245fb2963494c345d7db355173ecec7ab3d64b949"}, + {file = "dulwich-0.21.6-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:d7cd9fb896c65e4c28cb9332f2be192817805978dd8dc299681c4fe83c631158"}, + {file = "dulwich-0.21.6-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d9002094198e57e88fe77412d3aa64dd05978046ae725a16123ba621a7704628"}, + {file = "dulwich-0.21.6-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9b6f8a16f32190aa88c37ef013858b3e01964774bc983900bd0d74ecb6576e6"}, + {file = "dulwich-0.21.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee8aba4dec4d0a52737a8a141f3456229c87dcfd7961f8115786a27b6ebefed"}, + {file = "dulwich-0.21.6-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a780e2a0ff208c4f218e72eff8d13f9aff485ff9a6f3066c22abe4ec8cec7dcd"}, + {file = "dulwich-0.21.6.tar.gz", hash = "sha256:30fbe87e8b51f3813c131e2841c86d007434d160bd16db586b40d47f31dd05b0"}, +] + +[package.dependencies] +urllib3 = ">=1.25" + +[package.extras] +fastimport = ["fastimport"] +https = ["urllib3 (>=1.24.1)"] +paramiko = ["paramiko"] +pgp = ["gpg"] + +[[package]] +name = "dvc" +version = "2.58.2" +description = "Git for data scientists - manage your code and data together" +optional = false +python-versions = ">=3.8" +files = [ + {file = "dvc-2.58.2-py3-none-any.whl", hash = "sha256:3a935615812fd57c341e8b58a7f4de57d9d4a067500376041c3f1c805ee24345"}, + {file = "dvc-2.58.2.tar.gz", hash = "sha256:d40fff99b76719d1d524f103ad9dc64141bb363492abb9b8a61c6e70efe5a4dc"}, +] + +[package.dependencies] +colorama = ">=0.3.9" +configobj = ">=5.0.6" +distro = ">=1.3" +dpath = ">=2.1.0,<3" +dvc-data = ">=0.51.0,<0.52" +dvc-http = ">=2.29.0" +dvc-render = ">=0.3.1,<1" +dvc-studio-client = ">=0.9.2,<1" +dvc-task = ">=0.2.1,<1" +flatten-dict = ">=0.4.1,<1" +"flufl.lock" = ">=5" +funcy = ">=1.14" +grandalf = ">=0.7,<1" +hydra-core = ">=1.1" +iterative-telemetry = ">=0.0.7" +networkx = ">=2.5" +packaging = ">=19" +pathspec = ">=0.10.3" +platformdirs = ">=3.1.1,<4" +psutil = ">=5.8" +pydot = ">=1.2.4" +pygtrie = ">=2.3.2" +pyparsing = ">=2.4.7" +requests = ">=2.22" +rich = ">=12" +"ruamel.yaml" = ">=0.17.11" +scmrepo = ">=1.0.0,<2" +shortuuid = ">=0.5" +shtab = ">=1.3.4,<2" +tabulate = ">=0.8.7" +tomlkit = ">=0.11.1" +tqdm = ">=4.63.1,<5" +voluptuous = ">=0.11.7" +"zc.lockfile" = ">=1.2.1" + +[package.extras] +all = ["dvc[azure,gdrive,gs,hdfs,oss,s3,ssh,webdav,webhdfs]"] +azure = ["dvc-azure (>=2.21.2)"] +dev = ["dvc[azure,gdrive,gs,hdfs,lint,oss,s3,ssh,terraform,tests,webdav,webhdfs]"] +gdrive = ["dvc-gdrive (==2.19.2)"] +gs = ["dvc-gs (==2.22.0)"] +hdfs = ["dvc-hdfs (==2.19)"] +lint = ["mypy (==1.3.0)", "pylint (==2.17.4)", "types-colorama", "types-psutil", "types-requests", "types-tabulate", "types-toml", "types-tqdm"] +oss = ["dvc-oss (==2.19)"] +s3 = ["dvc-s3 (==2.22.0)"] +ssh = ["dvc-ssh (>=2.22.1,<3)"] +ssh-gssapi = ["dvc-ssh[gssapi] (>=2.22.1,<3)"] +terraform = ["tpi[ssh] (>=2.1)"] +testing = ["pytest-benchmark[histogram]", "pytest-test-utils", "pytest-virtualenv"] +tests = ["beautifulsoup4 (>=4.4)", "dvc-ssh", "dvc[testing]", "filelock", "flaky", "pytest (>=7,<8)", "pytest-cov", "pytest-docker (>=1,<2)", "pytest-lazy-fixture", "pytest-mock", "pytest-test-utils", "pytest-timeout (>=2)", "pytest-xdist (>=3.2)", "pywin32 (>=225)"] +webdav = ["dvc-webdav (==2.19.1)"] +webhdfs = ["dvc-webhdfs (==2.19)"] +webhdfs-kerberos = ["dvc-webhdfs[kerberos] (==2.19)"] + +[[package]] +name = "dvc-data" +version = "0.51.0" +description = "dvc data" +optional = false +python-versions = ">=3.8" +files = [ + {file = "dvc-data-0.51.0.tar.gz", hash = "sha256:32544bb7d3ae509f91c2d07a9dc3739dd87980be5b41582781efb4f246b58497"}, + {file = "dvc_data-0.51.0-py3-none-any.whl", hash = "sha256:f3244f7d848f10fdb21d5ff485410f6955a1de828a149577633e3ed0f451ebd2"}, +] + +[package.dependencies] +attrs = ">=21.3.0" +dictdiffer = ">=0.8.1" +diskcache = ">=5.2.1" +dvc-objects = ">=0.22.0,<1" +funcy = ">=1.14" +nanotime = ">=0.5.2" +pygtrie = ">=2.3.2" +shortuuid = ">=0.5.0" +sqltrie = ">=0.3.1,<1" + +[package.extras] +all = ["rich (>=10.11.0,<14.0.0)", "typer[all] (>=0.6)"] +cli = ["rich (>=10.11.0,<14.0.0)", "typer[all] (>=0.6)"] +dev = ["blake3 (>=0.3.1)", "mypy (==0.971)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-benchmark (==4.0.0)", "pytest-cov (==4.0.0)", "pytest-mock (==3.10.0)", "pytest-servers[s3] (==0.1.3)", "pytest-sugar (==0.9.6)", "rich (>=10.11.0,<14.0.0)", "typer[all] (>=0.6)"] +tests = ["mypy (==0.971)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-benchmark (==4.0.0)", "pytest-cov (==4.0.0)", "pytest-mock (==3.10.0)", "pytest-servers[s3] (==0.1.3)", "pytest-sugar (==0.9.6)"] + +[[package]] +name = "dvc-http" +version = "2.30.2" +description = "http plugin for dvc" +optional = false +python-versions = ">=3.8" +files = [ + {file = "dvc-http-2.30.2.tar.gz", hash = "sha256:d7cf66e8f8359cc9f5ca137de24d259beebdec444516fc7d085ad26fa7d3b34b"}, + {file = "dvc_http-2.30.2-py3-none-any.whl", hash = "sha256:e5e8c915af84e6e464a67053e22b75fef77c2eabb3b7f4355c2b968ca7dcf52b"}, +] + +[package.dependencies] +aiohttp-retry = ">=2.5.0" +fsspec = {version = "*", extras = ["http"]} + +[package.extras] +tests = ["dvc[testing]", "flaky (==3.7.0)", "mypy (==0.910)", "pylint (==2.15.9)", "pytest (==6.2.5)", "pytest-cov (==3.0.0)", "pytest-mock (==3.6.1)", "pytest-xdist (==2.4.0)", "rangehttpserver (==1.2.0)", "types-requests (==2.25.11)"] + +[[package]] +name = "dvc-objects" +version = "0.25.0" +description = "dvc objects" +optional = false +python-versions = ">=3.8" +files = [ + {file = "dvc-objects-0.25.0.tar.gz", hash = "sha256:6e13add661ab7766cc26493102c7981b5164351f0ca4ee33d080d1651d4b5899"}, + {file = "dvc_objects-0.25.0-py3-none-any.whl", hash = "sha256:09f318cbb376750f4d2ef0afcde4ae41ca3f3071d6192bfee676812acd1f6d1f"}, +] + +[package.dependencies] +fsspec = ">=2022.10.0" +funcy = ">=1.14" +packaging = ">=19" +shortuuid = ">=0.5.0" +tqdm = ">=4.63.1,<5" +typing-extensions = ">=3.7.4" + +[package.extras] +dev = ["mypy (==0.971)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-cov (==4.0.0)", "pytest-mock (==3.8.2)", "pytest-servers[s3] (==0.1.3)", "pytest-sugar (==0.9.6)"] +tests = ["mypy (==0.971)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-cov (==4.0.0)", "pytest-mock (==3.8.2)", "pytest-servers[s3] (==0.1.3)", "pytest-sugar (==0.9.6)"] + +[[package]] +name = "dvc-render" +version = "0.6.0" +description = "DVC render" +optional = false +python-versions = ">=3.8" +files = [ + {file = "dvc-render-0.6.0.tar.gz", hash = "sha256:69b7dfdadf890beb6d7fa5b3d4bd33323d78fc4c3ce33ed1bf777026192f9b4d"}, + {file = "dvc_render-0.6.0-py3-none-any.whl", hash = "sha256:2dc6c73d02538e9396475e146048e20242233d418967f82e0627e5caa3360303"}, +] + +[package.extras] +dev = ["flatten-dict (>=0.4.1,<1)", "funcy (>=1.17)", "matplotlib", "mkdocs (==1.5.2)", "mkdocs-gen-files (==0.5.0)", "mkdocs-material (==9.3.1)", "mkdocs-section-index (==0.3.6)", "mkdocstrings-python (==1.6.3)", "mypy (==0.981)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-cov (==3.0.0)", "pytest-mock (==3.8.2)", "pytest-sugar (>=0.9.6,<1.0)", "pytest-test-utils (>=0.0.6)", "tabulate (>=0.8.7)"] +docs = ["mkdocs (==1.5.2)", "mkdocs-gen-files (==0.5.0)", "mkdocs-material (==9.3.1)", "mkdocs-section-index (==0.3.6)", "mkdocstrings-python (==1.6.3)"] +markdown = ["flatten-dict (>=0.4.1,<1)", "matplotlib", "tabulate (>=0.8.7)"] +table = ["flatten-dict (>=0.4.1,<1)", "tabulate (>=0.8.7)"] +tests = ["flatten-dict (>=0.4.1,<1)", "funcy (>=1.17)", "matplotlib", "mypy (==0.981)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-cov (==3.0.0)", "pytest-mock (==3.8.2)", "pytest-sugar (>=0.9.6,<1.0)", "pytest-test-utils (>=0.0.6)", "tabulate (>=0.8.7)"] + +[[package]] +name = "dvc-studio-client" +version = "0.15.0" +description = "Small library to post data from DVC/DVCLive to Iterative Studio" +optional = false +python-versions = ">=3.8" +files = [ + {file = "dvc-studio-client-0.15.0.tar.gz", hash = "sha256:46dd508a0fb2c1c9986efd4111aa16ad3e40718c5e86a2be9f6e5ee509ff44a1"}, + {file = "dvc_studio_client-0.15.0-py3-none-any.whl", hash = "sha256:f51f36f9a86ea2bfcaed95b2ad6f532ed59a4d527c1febe079a938d79ff86796"}, +] + +[package.dependencies] +dulwich = "*" +requests = "*" +voluptuous = "*" + +[package.extras] +dev = ["mkdocs (==1.5.2)", "mkdocs-gen-files (==0.5.0)", "mkdocs-material (==9.2.2)", "mkdocs-section-index (==0.3.5)", "mkdocstrings-python (==1.5.0)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-mock (==3.11.1)", "pytest-sugar (==0.9.7)"] +docs = ["mkdocs (==1.5.2)", "mkdocs-gen-files (==0.5.0)", "mkdocs-material (==9.2.2)", "mkdocs-section-index (==0.3.5)", "mkdocstrings-python (==1.5.0)"] +tests = ["pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-mock (==3.11.1)", "pytest-sugar (==0.9.7)"] + +[[package]] +name = "dvc-task" +version = "0.3.0" +description = "Extensible task queue used in DVC." +optional = false +python-versions = ">=3.8" +files = [ + {file = "dvc-task-0.3.0.tar.gz", hash = "sha256:6ab288bfbbc4a2df8ef145c543bb979d6cb8fb49037fec821a59ad6e1dfdddce"}, + {file = "dvc_task-0.3.0-py3-none-any.whl", hash = "sha256:637908e3a54670cb09924dd96161e025399c426fc3cb2e3b9b8a030d7cfcfbcd"}, +] + +[package.dependencies] +celery = ">=5.3.0,<6" +funcy = ">=1.17" +kombu = ">=5.3.0,<6" +pywin32 = {version = ">=225", markers = "sys_platform == \"win32\""} +shortuuid = ">=1.0.8" + +[package.extras] +dev = ["celery-types (==0.15.0)", "flaky (==3.7.0)", "mkdocs (==1.3.1)", "mkdocs-gen-files (==0.3.5)", "mkdocs-material (==8.4.1)", "mkdocs-section-index (==0.3.4)", "mkdocstrings-python (==0.7.1)", "mypy (==0.971)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-celery", "pytest-cov (==3.0.0)", "pytest-mock (==3.8.2)", "pytest-sugar (==0.9.6)", "pytest-test-utils (>=0.0.6)"] +docs = ["mkdocs (==1.3.1)", "mkdocs-gen-files (==0.3.5)", "mkdocs-material (==8.4.1)", "mkdocs-section-index (==0.3.4)", "mkdocstrings-python (==0.7.1)"] +tests = ["celery-types (==0.15.0)", "flaky (==3.7.0)", "mypy (==0.971)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-celery", "pytest-cov (==3.0.0)", "pytest-mock (==3.8.2)", "pytest-sugar (==0.9.6)", "pytest-test-utils (>=0.0.6)"] + +[[package]] +name = "exceptiongroup" +version = "1.1.3" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, + {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "fastrlock" +version = "0.8.2" +description = "Fast, re-entrant optimistic lock implemented in Cython" +optional = true +python-versions = "*" +files = [ + {file = "fastrlock-0.8.2-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:94e348c72a1fd1f8191f25ea056448e4f5a87b8fbf005b39d290dcb0581a48cd"}, + {file = "fastrlock-0.8.2-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2d5595903444c854b99c42122b87edfe8a37cd698a4eae32f4fd1d2a7b6c115d"}, + {file = "fastrlock-0.8.2-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e4bbde174a0aff5f6eeba75cf8c4c5d2a316316bc21f03a0bddca0fc3659a6f3"}, + {file = "fastrlock-0.8.2-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7a2ccaf88ac0db153e84305d1ef0aa138cea82c6a88309066f6eaa3bc98636cd"}, + {file = "fastrlock-0.8.2-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:31a27a2edf482df72b91fe6c6438314d2c65290aa7becc55589d156c9b91f0da"}, + {file = "fastrlock-0.8.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:e9904b5b37c3e5bb4a245c56bc4b7e497da57ffb8528f4fc39af9dcb168ee2e1"}, + {file = "fastrlock-0.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:43a241655e83e4603a152192cf022d5ca348c2f4e56dfb02e5c9c4c1a32f9cdb"}, + {file = "fastrlock-0.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9121a894d74e65557e47e777060a495ab85f4b903e80dd73a3c940ba042920d7"}, + {file = "fastrlock-0.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:11bbbbc526363955aeddb9eec4cee2a0012322b7b2f15b54f44454fcf4fd398a"}, + {file = "fastrlock-0.8.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:27786c62a400e282756ae1b090bcd7cfa35f28270cff65a9e7b27a5327a32561"}, + {file = "fastrlock-0.8.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:08315bde19d0c2e6b06593d5a418be3dc8f9b1ee721afa96867b9853fceb45cf"}, + {file = "fastrlock-0.8.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e8b49b5743ede51e0bcf6805741f39f5e0e0fd6a172ba460cb39e3097ba803bb"}, + {file = "fastrlock-0.8.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b443e73a4dfc7b6e0800ea4c13567b9694358e86f53bb2612a51c9e727cac67b"}, + {file = "fastrlock-0.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:b3853ed4ce522598dc886160a7bab432a093051af85891fa2f5577c1dcac8ed6"}, + {file = "fastrlock-0.8.2-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:790fc19bccbd39426060047e53629f171a44745613bf360a045e9f9c8c4a2cea"}, + {file = "fastrlock-0.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:dbdce852e6bb66e1b8c36679d482971d69d93acf1785657522e51b7de30c3356"}, + {file = "fastrlock-0.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d47713ffe6d4a627fbf078be9836a95ac106b4a0543e3841572c91e292a5d885"}, + {file = "fastrlock-0.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:ea96503b918fceaf40443182742b8964d47b65c5ebdea532893cb9479620000c"}, + {file = "fastrlock-0.8.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:c6bffa978793bea5e1b00e677062e53a62255439339591b70e209fa1552d5ee0"}, + {file = "fastrlock-0.8.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:75c07726c8b1a52147fd7987d6baaa318c5dced1416c3f25593e40f56e10755b"}, + {file = "fastrlock-0.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:88f079335e9da631efa64486c8207564a7bcd0c00526bb9e842e9d5b7e50a6cc"}, + {file = "fastrlock-0.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4fb2e77ff04bc4beb71d63c8e064f052ce5a6ea1e001d528d4d7f4b37d736f2e"}, + {file = "fastrlock-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:b4c9083ea89ab236b06e9ef2263971db3b4b507195fc7d5eecab95828dcae325"}, + {file = "fastrlock-0.8.2-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:98195866d3a9949915935d40a88e4f1c166e82e378f622c88025f2938624a90a"}, + {file = "fastrlock-0.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b22ea9bf5f9fad2b0077e944a7813f91593a4f61adf8faf734a70aed3f2b3a40"}, + {file = "fastrlock-0.8.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dcc1bf0ac8a194313cf6e645e300a8a379674ceed8e0b1e910a2de3e3c28989e"}, + {file = "fastrlock-0.8.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a3dcc876050b8f5cbc0ee84ef1e7f0c1dfe7c148f10098828bc4403683c33f10"}, + {file = "fastrlock-0.8.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:685e656048b59d8dfde8c601f188ad53a4d719eb97080cafc8696cda6d75865e"}, + {file = "fastrlock-0.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:fb5363cf0fddd9b50525ddbf64a1e1b28ec4c6dfb28670a940cb1cf988a6786b"}, + {file = "fastrlock-0.8.2-cp35-cp35m-macosx_10_15_x86_64.whl", hash = "sha256:a74f5a92fa6e51c4f3c69b29c4662088b97be12f40652a21109605a175c81824"}, + {file = "fastrlock-0.8.2-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ccf39ad5702e33e4d335b48ef9d56e21619b529b7f7471b5211419f380329b62"}, + {file = "fastrlock-0.8.2-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:66f2662c640bb71a1016a031eea6eef9d25c2bcdf7ffd1d1ddc5a58f9a1ced04"}, + {file = "fastrlock-0.8.2-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:17734e2e5af4c07ddb0fb10bd484e062c22de3be6b67940b9cc6ec2f18fa61ba"}, + {file = "fastrlock-0.8.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:ab91b0c36e95d42e1041a4907e3eefd06c482d53af3c7a77be7e214cc7cd4a63"}, + {file = "fastrlock-0.8.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b32fdf874868326351a75b1e4c02f97e802147119ae44c52d3d9da193ec34f5b"}, + {file = "fastrlock-0.8.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:2074548a335fcf7d19ebb18d9208da9e33b06f745754466a7e001d2b1c58dd19"}, + {file = "fastrlock-0.8.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4fb04442b6d1e2b36c774919c6bcbe3339c61b337261d4bd57e27932589095af"}, + {file = "fastrlock-0.8.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:1fed2f4797ad68e9982038423018cf08bec5f4ce9fed63a94a790773ed6a795c"}, + {file = "fastrlock-0.8.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e380ec4e6d8b26e389713995a43cb7fe56baea2d25fe073d4998c4821a026211"}, + {file = "fastrlock-0.8.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:25945f962c7bd808415cfde3da624d4399d4ea71ed8918538375f16bceb79e1c"}, + {file = "fastrlock-0.8.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:2c1719ddc8218b01e82fb2e82e8451bd65076cb96d7bef4477194bbb4305a968"}, + {file = "fastrlock-0.8.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:5460c5ee6ced6d61ec8cd2324ebbe793a4960c4ffa2131ffff480e3b61c99ec5"}, + {file = "fastrlock-0.8.2-cp36-cp36m-win_amd64.whl", hash = "sha256:33145acbad8317584cd64588131c7e1e286beef6280c0009b4544c91fce171d2"}, + {file = "fastrlock-0.8.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:59344c1d46b7dec97d3f22f1cc930fafe8980b3c5bc9c9765c56738a5f1559e4"}, + {file = "fastrlock-0.8.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b2a1c354f13f22b737621d914f3b4a8434ae69d3027a775e94b3e671756112f9"}, + {file = "fastrlock-0.8.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:cf81e0278b645004388873e0a1f9e3bc4c9ab8c18e377b14ed1a544be4b18c9a"}, + {file = "fastrlock-0.8.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1b15430b93d7eb3d56f6ff690d2ebecb79ed0e58248427717eba150a508d1cd7"}, + {file = "fastrlock-0.8.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:067edb0a0805bf61e17a251d5046af59f6e9d2b8ad01222e0ef7a0b7937d5548"}, + {file = "fastrlock-0.8.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eb31fe390f03f7ae886dcc374f1099ec88526631a4cb891d399b68181f154ff0"}, + {file = "fastrlock-0.8.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:643e1e65b4f5b284427e61a894d876d10459820e93aa1e724dfb415117be24e0"}, + {file = "fastrlock-0.8.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5dfb78dd600a12f23fc0c3ec58f81336229fdc74501ecf378d1ce5b3f2f313ea"}, + {file = "fastrlock-0.8.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b8ca0fe21458457077e4cb2d81e1ebdb146a00b3e9e2db6180a773f7ea905032"}, + {file = "fastrlock-0.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:d918dfe473291e8bfd8e13223ea5cb9b317bd9f50c280923776c377f7c64b428"}, + {file = "fastrlock-0.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:c393af77c659a38bffbca215c0bcc8629ba4299568308dd7e4ff65d62cabed39"}, + {file = "fastrlock-0.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:73426f5eb2ecc10626c67cf86bd0af9e00d53e80e5c67d5ce8e18376d6abfa09"}, + {file = "fastrlock-0.8.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:320fd55bafee3eb069cfb5d6491f811a912758387ef2193840e2663e80e16f48"}, + {file = "fastrlock-0.8.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8c1c91a68926421f5ccbc82c85f83bd3ba593b121a46a1b9a554b3f0dd67a4bf"}, + {file = "fastrlock-0.8.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ad1bc61c7f6b0e58106aaab034916b6cb041757f708b07fbcdd9d6e1ac629225"}, + {file = "fastrlock-0.8.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:87f4e01b042c84e6090dbc4fbe3415ddd69f6bc0130382323f9d3f1b8dd71b46"}, + {file = "fastrlock-0.8.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:d34546ad2e4a480b94b6797bcc5a322b3c705c4c74c3e4e545c4a3841c1b2d59"}, + {file = "fastrlock-0.8.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ebb32d776b61acd49f859a1d16b9e3d84e7b46d0d92aebd58acd54dc38e96664"}, + {file = "fastrlock-0.8.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:30bdbe4662992348132d03996700e1cf910d141d629179b967b146a22942264e"}, + {file = "fastrlock-0.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:07ed3c7b3867c05a3d6be4ced200c7767000f3431b9be6da66972822dd86e8be"}, + {file = "fastrlock-0.8.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:ddf5d247f686aec853ddcc9a1234bfcc6f57b0a0670d2ad82fc25d8ae7e6a15f"}, + {file = "fastrlock-0.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:7269bb3fc15587b0c191eecd95831d771a7d80f0c48929e560806b038ff3066c"}, + {file = "fastrlock-0.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:adcb9e77aa132cc6c9de2ffe7cf880a20aa8cdba21d367d1da1a412f57bddd5d"}, + {file = "fastrlock-0.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:a3b8b5d2935403f1b4b25ae324560e94b59593a38c0d2e7b6c9872126a9622ed"}, + {file = "fastrlock-0.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2587cedbb36c7988e707d83f0f1175c1f882f362b5ebbee25d70218ea33d220d"}, + {file = "fastrlock-0.8.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:9af691a9861027181d4de07ed74f0aee12a9650ac60d0a07f4320bff84b5d95f"}, + {file = "fastrlock-0.8.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:99dd6652bd6f730beadf74ef769d38c6bbd8ee6d1c15c8d138ea680b0594387f"}, + {file = "fastrlock-0.8.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:4d63b6596368dab9e0cc66bf047e7182a56f33b34db141816a4f21f5bf958228"}, + {file = "fastrlock-0.8.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ff75c90663d6e8996610d435e71487daa853871ad1770dd83dc0f2fc4997241e"}, + {file = "fastrlock-0.8.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e27c3cd27fbd25e5223c5c992b300cd4ee8f0a75c6f222ce65838138d853712c"}, + {file = "fastrlock-0.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:dd961a32a7182c3891cdebca417fda67496d5d5de6ae636962254d22723bdf52"}, + {file = "fastrlock-0.8.2.tar.gz", hash = "sha256:644ec9215cf9c4df8028d8511379a15d9c1af3e16d80e47f1b6fdc6ba118356a"}, +] + +[[package]] +name = "filelock" +version = "3.12.2" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.7" +files = [ + {file = "filelock-3.12.2-py3-none-any.whl", hash = "sha256:cbb791cdea2a72f23da6ac5b5269ab0a0d161e9ef0100e653b69049a7706d1ec"}, + {file = "filelock-3.12.2.tar.gz", hash = "sha256:002740518d8aa59a26b0c76e10fb8c6e15eae825d34b6fdf670333fd7b938d81"}, +] + +[package.extras] +docs = ["furo (>=2023.5.20)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "diff-cover (>=7.5)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"] + +[[package]] +name = "flatten-dict" +version = "0.4.2" +description = "A flexible utility for flattening and unflattening dict-like objects in Python." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "flatten-dict-0.4.2.tar.gz", hash = "sha256:506a96b6e6f805b81ae46a0f9f31290beb5fa79ded9d80dbe1b7fa236ab43076"}, + {file = "flatten_dict-0.4.2-py2.py3-none-any.whl", hash = "sha256:7e245b20c4c718981212210eec4284a330c9f713e632e98765560e05421e48ad"}, +] + +[package.dependencies] +six = ">=1.12,<2.0" + +[[package]] +name = "flufl-lock" +version = "8.0.2" +description = "NFS-safe file locking with timeouts for POSIX and Windows" +optional = false +python-versions = ">=3.8" +files = [ + {file = "flufl_lock-8.0.2-py3-none-any.whl", hash = "sha256:ca33fb581122d651e4f24775bebed1e58cd1ea85a95a505881902ba050ed170b"}, + {file = "flufl_lock-8.0.2.tar.gz", hash = "sha256:61c7246b34d6e5544c8a1fa4dae396d10e16ceb23371a31db22e0a2993d01432"}, +] + +[package.dependencies] +atpublic = "*" +psutil = "*" + +[[package]] +name = "frozenlist" +version = "1.4.0" +description = "A list-like structure which implements collections.abc.MutableSequence" +optional = false +python-versions = ">=3.8" +files = [ + {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"}, + {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"}, + {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62"}, + {file = "frozenlist-1.4.0-cp310-cp310-win32.whl", hash = "sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0"}, + {file = "frozenlist-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb"}, + {file = "frozenlist-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431"}, + {file = "frozenlist-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8"}, + {file = "frozenlist-1.4.0-cp38-cp38-win32.whl", hash = "sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc"}, + {file = "frozenlist-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3"}, + {file = "frozenlist-1.4.0-cp39-cp39-win32.whl", hash = "sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f"}, + {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"}, + {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"}, +] + +[[package]] +name = "fsspec" +version = "2022.11.0" +description = "File-system specification" +optional = false +python-versions = ">=3.7" +files = [ + {file = "fsspec-2022.11.0-py3-none-any.whl", hash = "sha256:d6e462003e3dcdcb8c7aa84c73a228f8227e72453cd22570e2363e8844edfe7b"}, + {file = "fsspec-2022.11.0.tar.gz", hash = "sha256:259d5fd5c8e756ff2ea72f42e7613c32667dc2049a4ac3d84364a7ca034acb8b"}, +] + +[package.dependencies] +aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""} +requests = {version = "*", optional = true, markers = "extra == \"http\""} + +[package.extras] +abfs = ["adlfs"] +adl = ["adlfs"] +arrow = ["pyarrow (>=1)"] +dask = ["dask", "distributed"] +dropbox = ["dropbox", "dropboxdrivefs", "requests"] +entrypoints = ["importlib-metadata"] +fuse = ["fusepy"] +gcs = ["gcsfs"] +git = ["pygit2"] +github = ["requests"] +gs = ["gcsfs"] +gui = ["panel"] +hdfs = ["pyarrow (>=1)"] +http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"] +libarchive = ["libarchive-c"] +oci = ["ocifs"] +s3 = ["s3fs"] +sftp = ["paramiko"] +smb = ["smbprotocol"] +ssh = ["paramiko"] +tqdm = ["tqdm"] + +[[package]] +name = "funcy" +version = "2.0" +description = "A fancy and practical functional tools" +optional = false +python-versions = "*" +files = [ + {file = "funcy-2.0-py2.py3-none-any.whl", hash = "sha256:53df23c8bb1651b12f095df764bfb057935d49537a56de211b098f4c79614bb0"}, + {file = "funcy-2.0.tar.gz", hash = "sha256:3963315d59d41c6f30c04bc910e10ab50a3ac4a225868bfa96feed133df075cb"}, +] + +[[package]] +name = "ghp-import" +version = "2.1.0" +description = "Copy your docs directly to the gh-pages branch." +optional = false +python-versions = "*" +files = [ + {file = "ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"}, + {file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"}, +] + +[package.dependencies] +python-dateutil = ">=2.8.1" + +[package.extras] +dev = ["flake8", "markdown", "twine", "wheel"] + +[[package]] +name = "gitdb" +version = "4.0.11" +description = "Git Object Database" +optional = false +python-versions = ">=3.7" +files = [ + {file = "gitdb-4.0.11-py3-none-any.whl", hash = "sha256:81a3407ddd2ee8df444cbacea00e2d038e40150acfa3001696fe0dcf1d3adfa4"}, + {file = "gitdb-4.0.11.tar.gz", hash = "sha256:bf5421126136d6d0af55bc1e7c1af1c397a34f5b7bd79e776cd3e89785c2b04b"}, +] + +[package.dependencies] +smmap = ">=3.0.1,<6" + +[[package]] +name = "gitpython" +version = "3.1.40" +description = "GitPython is a Python library used to interact with Git repositories" +optional = false +python-versions = ">=3.7" +files = [ + {file = "GitPython-3.1.40-py3-none-any.whl", hash = "sha256:cf14627d5a8049ffbf49915732e5eddbe8134c3bdb9d476e6182b676fc573f8a"}, + {file = "GitPython-3.1.40.tar.gz", hash = "sha256:22b126e9ffb671fdd0c129796343a02bf67bf2994b35449ffc9321aa755e18a4"}, +] + +[package.dependencies] +gitdb = ">=4.0.1,<5" + +[package.extras] +test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest", "pytest-cov", "pytest-instafail", "pytest-subtests", "pytest-sugar"] + +[[package]] +name = "grandalf" +version = "0.8" +description = "Graph and drawing algorithms framework" +optional = false +python-versions = "*" +files = [ + {file = "grandalf-0.8-py3-none-any.whl", hash = "sha256:793ca254442f4a79252ea9ff1ab998e852c1e071b863593e5383afee906b4185"}, + {file = "grandalf-0.8.tar.gz", hash = "sha256:2813f7aab87f0d20f334a3162ccfbcbf085977134a17a5b516940a93a77ea974"}, +] + +[package.dependencies] +pyparsing = "*" + +[package.extras] +full = ["numpy", "ply"] + +[[package]] +name = "griffe" +version = "0.30.1" +description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." +optional = false +python-versions = ">=3.7" +files = [ + {file = "griffe-0.30.1-py3-none-any.whl", hash = "sha256:b2f3df6952995a6bebe19f797189d67aba7c860755d3d21cc80f64d076d0154c"}, + {file = "griffe-0.30.1.tar.gz", hash = "sha256:007cc11acd20becf1bb8f826419a52b9d403bbad9d8c8535699f5440ddc0a109"}, +] + +[package.dependencies] +cached-property = {version = "*", markers = "python_version < \"3.8\""} +colorama = ">=0.4" + +[[package]] +name = "huggingface-hub" +version = "0.16.4" +description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "huggingface_hub-0.16.4-py3-none-any.whl", hash = "sha256:0d3df29932f334fead024afc7cb4cc5149d955238b8b5e42dcf9740d6995a349"}, + {file = "huggingface_hub-0.16.4.tar.gz", hash = "sha256:608c7d4f3d368b326d1747f91523dbd1f692871e8e2e7a4750314a2dd8b63e14"}, +] + +[package.dependencies] +filelock = "*" +fsspec = "*" +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} +packaging = ">=20.9" +pyyaml = ">=5.1" +requests = "*" +tqdm = ">=4.42.1" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +cli = ["InquirerPy (==0.3.4)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] +inference = ["aiohttp", "pydantic"] +quality = ["black (>=23.1,<24.0)", "mypy (==0.982)", "ruff (>=0.0.241)"] +tensorflow = ["graphviz", "pydot", "tensorflow"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +torch = ["torch"] +typing = ["pydantic", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] + +[[package]] +name = "hydra-core" +version = "1.3.2" +description = "A framework for elegantly configuring complex applications" +optional = false +python-versions = "*" +files = [ + {file = "hydra-core-1.3.2.tar.gz", hash = "sha256:8a878ed67216997c3e9d88a8e72e7b4767e81af37afb4ea3334b269a4390a824"}, + {file = "hydra_core-1.3.2-py3-none-any.whl", hash = "sha256:fa0238a9e31df3373b35b0bfb672c34cc92718d21f81311d8996a16de1141d8b"}, +] + +[package.dependencies] +antlr4-python3-runtime = "==4.9.*" +importlib-resources = {version = "*", markers = "python_version < \"3.9\""} +omegaconf = ">=2.2,<2.4" +packaging = "*" + +[[package]] +name = "identify" +version = "2.5.24" +description = "File identification library for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "identify-2.5.24-py2.py3-none-any.whl", hash = "sha256:986dbfb38b1140e763e413e6feb44cd731faf72d1909543178aa79b0e258265d"}, + {file = "identify-2.5.24.tar.gz", hash = "sha256:0aac67d5b4812498056d28a9a512a483f5085cc28640b02b258a59dac34301d4"}, +] + +[package.extras] +license = ["ukkonen"] + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] + +[[package]] +name = "importlib-metadata" +version = "6.7.0" +description = "Read metadata from Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "importlib_metadata-6.7.0-py3-none-any.whl", hash = "sha256:cb52082e659e97afc5dac71e79de97d8681de3aa07ff18578330904a9d18e5b5"}, + {file = "importlib_metadata-6.7.0.tar.gz", hash = "sha256:1aaf550d4f73e5d6783e7acb77aec43d49da8017410afae93822cc9cca98c4d4"}, +] + +[package.dependencies] +typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] + +[[package]] +name = "importlib-resources" +version = "6.1.1" +description = "Read resources from Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_resources-6.1.1-py3-none-any.whl", hash = "sha256:e8bf90d8213b486f428c9c39714b920041cb02c184686a3dee24905aaa8105d6"}, + {file = "importlib_resources-6.1.1.tar.gz", hash = "sha256:3893a00122eafde6894c59914446a512f728a0c1a45f9bb9b63721b6bacf0b4a"}, +] + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff", "zipp (>=3.17)"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "iterative-telemetry" +version = "0.0.8" +description = "Common library for sending telemetry" +optional = false +python-versions = ">=3.8" +files = [ + {file = "iterative-telemetry-0.0.8.tar.gz", hash = "sha256:5bed9d19109c892cff2a4712a2fb18ad727079a7ab260a28b1e2f6934eec652d"}, + {file = "iterative_telemetry-0.0.8-py3-none-any.whl", hash = "sha256:af0a37ec727c1fd728df6e8103e4c89557b99869218e668dce5ca99e6e51231f"}, +] + +[package.dependencies] +appdirs = "*" +distro = "*" +filelock = "*" +requests = "*" + +[package.extras] +dev = ["mypy (==0.971)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-cov (==3.0.0)", "pytest-mock (==3.8.2)", "pytest-sugar (==0.9.5)", "types-requests"] +tests = ["mypy (==0.971)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-cov (==3.0.0)", "pytest-mock (==3.8.2)", "pytest-sugar (==0.9.5)", "types-requests"] + +[[package]] +name = "jinja2" +version = "3.1.2" +description = "A very fast and expressive template engine." +optional = false +python-versions = ">=3.7" +files = [ + {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, + {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, +] + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + +[[package]] +name = "kombu" +version = "5.3.3" +description = "Messaging library for Python." +optional = false +python-versions = ">=3.8" +files = [ + {file = "kombu-5.3.3-py3-none-any.whl", hash = "sha256:6cd5c5d5ef77538434b8f81f3e265c414269418645dbb47dbf130a8a05c3e357"}, + {file = "kombu-5.3.3.tar.gz", hash = "sha256:1491df826cfc5178c80f3e89dd6dfba68e484ef334db81070eb5cb8094b31167"}, +] + +[package.dependencies] +amqp = ">=5.1.1,<6.0.0" +"backports.zoneinfo" = {version = ">=0.2.1", extras = ["tzdata"], markers = "python_version < \"3.9\""} +typing-extensions = {version = "*", markers = "python_version < \"3.10\""} +vine = "*" + +[package.extras] +azureservicebus = ["azure-servicebus (>=7.10.0)"] +azurestoragequeues = ["azure-identity (>=1.12.0)", "azure-storage-queue (>=12.6.0)"] +confluentkafka = ["confluent-kafka (>=2.2.0)"] +consul = ["python-consul2"] +librabbitmq = ["librabbitmq (>=2.0.0)"] +mongodb = ["pymongo (>=4.1.1)"] +msgpack = ["msgpack"] +pyro = ["pyro4"] +qpid = ["qpid-python (>=0.26)", "qpid-tools (>=0.26)"] +redis = ["redis (>=4.5.2,!=4.5.5,<6.0.0)"] +slmq = ["softlayer-messaging (>=1.0.3)"] +sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] +sqs = ["boto3 (>=1.26.143)", "pycurl (>=7.43.0.5)", "urllib3 (>=1.26.16)"] +yaml = ["PyYAML (>=3.10)"] +zookeeper = ["kazoo (>=2.8.0)"] + +[[package]] +name = "langcodes" +version = "3.3.0" +description = "Tools for labeling human languages with IETF language tags" +optional = false +python-versions = ">=3.6" +files = [ + {file = "langcodes-3.3.0-py3-none-any.whl", hash = "sha256:4d89fc9acb6e9c8fdef70bcdf376113a3db09b67285d9e1d534de6d8818e7e69"}, + {file = "langcodes-3.3.0.tar.gz", hash = "sha256:794d07d5a28781231ac335a1561b8442f8648ca07cd518310aeb45d6f0807ef6"}, +] + +[package.extras] +data = ["language-data (>=1.1,<2.0)"] + +[[package]] +name = "latexcodec" +version = "2.0.1" +description = "A lexer and codec to work with LaTeX code in Python." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "latexcodec-2.0.1-py2.py3-none-any.whl", hash = "sha256:c277a193638dc7683c4c30f6684e3db728a06efb0dc9cf346db8bd0aa6c5d271"}, + {file = "latexcodec-2.0.1.tar.gz", hash = "sha256:2aa2551c373261cefe2ad3a8953a6d6533e68238d180eb4bb91d7964adb3fe9a"}, +] + +[package.dependencies] +six = ">=1.4.1" + +[[package]] +name = "loguru" +version = "0.6.0" +description = "Python logging made (stupidly) simple" +optional = false +python-versions = ">=3.5" +files = [ + {file = "loguru-0.6.0-py3-none-any.whl", hash = "sha256:4e2414d534a2ab57573365b3e6d0234dfb1d84b68b7f3b948e6fb743860a77c3"}, + {file = "loguru-0.6.0.tar.gz", hash = "sha256:066bd06758d0a513e9836fd9c6b5a75bfb3fd36841f4b996bc60b547a309d41c"}, +] + +[package.dependencies] +colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} +win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} + +[package.extras] +dev = ["Sphinx (>=4.1.1)", "black (>=19.10b0)", "colorama (>=0.3.4)", "docutils (==0.16)", "flake8 (>=3.7.7)", "isort (>=5.1.1)", "pytest (>=4.6.2)", "pytest-cov (>=2.7.1)", "sphinx-autobuild (>=0.7.1)", "sphinx-rtd-theme (>=0.4.3)", "tox (>=3.9.0)"] + +[[package]] +name = "markdown" +version = "3.4.4" +description = "Python implementation of John Gruber's Markdown." +optional = false +python-versions = ">=3.7" +files = [ + {file = "Markdown-3.4.4-py3-none-any.whl", hash = "sha256:a4c1b65c0957b4bd9e7d86ddc7b3c9868fb9670660f6f99f6d1bca8954d5a941"}, + {file = "Markdown-3.4.4.tar.gz", hash = "sha256:225c6123522495d4119a90b3a3ba31a1e87a70369e03f14799ea9c0d7183a3d6"}, +] + +[package.dependencies] +importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.0)", "mkdocs-nature (>=0.4)"] +testing = ["coverage", "pyyaml"] + +[[package]] +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + +[[package]] +name = "markupsafe" +version = "2.1.3" +description = "Safely add untrusted strings to HTML/XML markup." +optional = false +python-versions = ">=3.7" +files = [ + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, + {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + +[[package]] +name = "mergedeep" +version = "1.3.4" +description = "A deep merge function for 🐍." +optional = false +python-versions = ">=3.6" +files = [ + {file = "mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307"}, + {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"}, +] + +[[package]] +name = "mike" +version = "1.1.2" +description = "Manage multiple versions of your MkDocs-powered documentation" +optional = false +python-versions = "*" +files = [ + {file = "mike-1.1.2-py3-none-any.whl", hash = "sha256:4c307c28769834d78df10f834f57f810f04ca27d248f80a75f49c6fa2d1527ca"}, + {file = "mike-1.1.2.tar.gz", hash = "sha256:56c3f1794c2d0b5fdccfa9b9487beb013ca813de2e3ad0744724e9d34d40b77b"}, +] + +[package.dependencies] +jinja2 = "*" +mkdocs = ">=1.0" +pyyaml = ">=5.1" +verspec = "*" + +[package.extras] +dev = ["coverage", "flake8 (>=3.0)", "shtab"] +test = ["coverage", "flake8 (>=3.0)", "shtab"] + +[[package]] +name = "mkdocs" +version = "1.5.3" +description = "Project documentation with Markdown." +optional = false +python-versions = ">=3.7" +files = [ + {file = "mkdocs-1.5.3-py3-none-any.whl", hash = "sha256:3b3a78e736b31158d64dbb2f8ba29bd46a379d0c6e324c2246c3bc3d2189cfc1"}, + {file = "mkdocs-1.5.3.tar.gz", hash = "sha256:eb7c99214dcb945313ba30426c2451b735992c73c2e10838f76d09e39ff4d0e2"}, +] + +[package.dependencies] +click = ">=7.0" +colorama = {version = ">=0.4", markers = "platform_system == \"Windows\""} +ghp-import = ">=1.0" +importlib-metadata = {version = ">=4.3", markers = "python_version < \"3.10\""} +jinja2 = ">=2.11.1" +markdown = ">=3.2.1" +markupsafe = ">=2.0.1" +mergedeep = ">=1.3.4" +packaging = ">=20.5" +pathspec = ">=0.11.1" +platformdirs = ">=2.2.0" +pyyaml = ">=5.1" +pyyaml-env-tag = ">=0.1" +typing-extensions = {version = ">=3.10", markers = "python_version < \"3.8\""} +watchdog = ">=2.0" + +[package.extras] +i18n = ["babel (>=2.9.0)"] +min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.3)", "jinja2 (==2.11.1)", "markdown (==3.2.1)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "packaging (==20.5)", "pathspec (==0.11.1)", "platformdirs (==2.2.0)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "typing-extensions (==3.10)", "watchdog (==2.0)"] + +[[package]] +name = "mkdocs-autorefs" +version = "0.4.1" +description = "Automatically link across pages in MkDocs." +optional = false +python-versions = ">=3.7" +files = [ + {file = "mkdocs-autorefs-0.4.1.tar.gz", hash = "sha256:70748a7bd025f9ecd6d6feeba8ba63f8e891a1af55f48e366d6d6e78493aba84"}, + {file = "mkdocs_autorefs-0.4.1-py3-none-any.whl", hash = "sha256:a2248a9501b29dc0cc8ba4c09f4f47ff121945f6ce33d760f145d6f89d313f5b"}, +] + +[package.dependencies] +Markdown = ">=3.3" +mkdocs = ">=1.1" + +[[package]] +name = "mkdocs-bibtex" +version = "2.11.0" +description = "An MkDocs plugin that enables managing citations with BibTex" +optional = false +python-versions = ">=3.6" +files = [ + {file = "mkdocs-bibtex-2.11.0.tar.gz", hash = "sha256:9ed78e1e7cfc8cd6f3f5ca75641dbcea8a011c36dbefcde041e36f8e6d0ed10f"}, +] + +[package.dependencies] +mkdocs = ">=1" +pybtex = ">=0.22" +pypandoc = ">=1.5" +requests = ">=2.8.1" +validators = ">=0.19.0" + +[[package]] +name = "mkdocs-gen-files" +version = "0.3.5" +description = "MkDocs plugin to programmatically generate documentation pages during the build" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "mkdocs-gen-files-0.3.5.tar.gz", hash = "sha256:d90d9e1676531a0bb96b1287dc28aa41162986de4dc3c00400214724761ff6ef"}, + {file = "mkdocs_gen_files-0.3.5-py3-none-any.whl", hash = "sha256:69562fddc662482e8f54a00a8b4ede5166ad5384ae4dbb0469f1f338ef3285ca"}, +] + +[package.dependencies] +mkdocs = ">=1.0.3,<2.0.0" + +[[package]] +name = "mkdocs-glightbox" +version = "0.1.7" +description = "MkDocs plugin supports image lightbox with GLightbox." +optional = false +python-versions = "*" +files = [ + {file = "mkdocs-glightbox-0.1.7.tar.gz", hash = "sha256:7e86e107349eacfd17a4f25673482bd2f033b21c272beaffae430b2c9c5aead6"}, +] + +[package.dependencies] +beautifulsoup4 = ">=4.11.1" + +[[package]] +name = "mkdocs-literate-nav" +version = "0.4.1" +description = "MkDocs plugin to specify the navigation in Markdown instead of YAML" +optional = false +python-versions = ">=3.6,<4.0" +files = [ + {file = "mkdocs-literate-nav-0.4.1.tar.gz", hash = "sha256:9efe26b662f2f901cae5807bfd51446d30ea7e033c2bc43a15d6282c7dfac1ab"}, + {file = "mkdocs_literate_nav-0.4.1-py3-none-any.whl", hash = "sha256:a4b761792ba21defbe2dfd5e0de6ba451639e1ca0f0661c37eda83cc6261e4f9"}, +] + +[package.dependencies] +mkdocs = ">=1.0.3,<2.0.0" + +[[package]] +name = "mkdocs-material" +version = "8.5.11" +description = "Documentation that simply works" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mkdocs_material-8.5.11-py3-none-any.whl", hash = "sha256:c907b4b052240a5778074a30a78f31a1f8ff82d7012356dc26898b97559f082e"}, + {file = "mkdocs_material-8.5.11.tar.gz", hash = "sha256:b0ea0513fd8cab323e8a825d6692ea07fa83e917bb5db042e523afecc7064ab7"}, +] + +[package.dependencies] +jinja2 = ">=3.0.2" +markdown = ">=3.2" +mkdocs = ">=1.4.0" +mkdocs-material-extensions = ">=1.1" +pygments = ">=2.12" +pymdown-extensions = ">=9.4" +requests = ">=2.26" + +[[package]] +name = "mkdocs-material-extensions" +version = "1.2" +description = "Extension pack for Python Markdown and MkDocs Material." +optional = false +python-versions = ">=3.7" +files = [ + {file = "mkdocs_material_extensions-1.2-py3-none-any.whl", hash = "sha256:c767bd6d6305f6420a50f0b541b0c9966d52068839af97029be14443849fb8a1"}, + {file = "mkdocs_material_extensions-1.2.tar.gz", hash = "sha256:27e2d1ed2d031426a6e10d5ea06989d67e90bb02acd588bc5673106b5ee5eedf"}, +] + +[[package]] +name = "mkdocstrings" +version = "0.18.1" +description = "Automatic documentation from sources, for MkDocs." +optional = false +python-versions = ">=3.7" +files = [ + {file = "mkdocstrings-0.18.1-py3-none-any.whl", hash = "sha256:4053929356df8cd69ed32eef71d8f676a472ef72980c9ffd4f933ead1debcdad"}, + {file = "mkdocstrings-0.18.1.tar.gz", hash = "sha256:fb7c91ce7e3ab70488d3fa6c073a4f827cdc319042f682ef8ea95459790d64fc"}, +] + +[package.dependencies] +Jinja2 = ">=2.11.1" +Markdown = ">=3.3" +MarkupSafe = ">=1.1" +mkdocs = ">=1.2" +mkdocs-autorefs = ">=0.3.1" +mkdocstrings-python-legacy = ">=0.2" +pymdown-extensions = ">=6.3" + +[package.extras] +crystal = ["mkdocstrings-crystal (>=0.3.4)"] +python = ["mkdocstrings-python (>=0.5.2)"] +python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"] + +[[package]] +name = "mkdocstrings-python" +version = "0.6.6" +description = "A Python handler for mkdocstrings." +optional = false +python-versions = ">=3.7" +files = [ + {file = "mkdocstrings-python-0.6.6.tar.gz", hash = "sha256:37281696b9f199624ae420e0625b6659b7fdfbea736618bce7fd978682dea3b1"}, + {file = "mkdocstrings_python-0.6.6-py3-none-any.whl", hash = "sha256:c118438d3cb4b14c492a51d109f4e5b27ab06ba19b099d624430dfd904926152"}, +] + +[package.dependencies] +griffe = ">=0.11.1" +mkdocstrings = ">=0.18" + +[[package]] +name = "mkdocstrings-python-legacy" +version = "0.2.2" +description = "A legacy Python handler for mkdocstrings." +optional = false +python-versions = ">=3.7" +files = [ + {file = "mkdocstrings-python-legacy-0.2.2.tar.gz", hash = "sha256:f0e7ec6a19750581b752acb38f6b32fcd1efe006f14f6703125d2c2c9a5c6f02"}, + {file = "mkdocstrings_python_legacy-0.2.2-py3-none-any.whl", hash = "sha256:379107a3a5b8db9b462efc4493c122efe21e825e3702425dbd404621302a563a"}, +] + +[package.dependencies] +mkdocstrings = ">=0.18" +pytkdocs = ">=0.14" + +[[package]] +name = "multidict" +version = "6.0.4" +description = "multidict implementation" +optional = false +python-versions = ">=3.7" +files = [ + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5"}, + {file = "multidict-6.0.4-cp310-cp310-win32.whl", hash = "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8"}, + {file = "multidict-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461"}, + {file = "multidict-6.0.4-cp311-cp311-win32.whl", hash = "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636"}, + {file = "multidict-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0"}, + {file = "multidict-6.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d"}, + {file = "multidict-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775"}, + {file = "multidict-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1"}, + {file = "multidict-6.0.4-cp38-cp38-win32.whl", hash = "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779"}, + {file = "multidict-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95"}, + {file = "multidict-6.0.4-cp39-cp39-win32.whl", hash = "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313"}, + {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, + {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, +] + +[[package]] +name = "murmurhash" +version = "1.0.10" +description = "Cython bindings for MurmurHash" +optional = false +python-versions = ">=3.6" +files = [ + {file = "murmurhash-1.0.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3e90eef568adca5e17a91f96975e9a782ace3a617bbb3f8c8c2d917096e9bfeb"}, + {file = "murmurhash-1.0.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f8ecb00cc1ab57e4b065f9fb3ea923b55160c402d959c69a0b6dbbe8bc73efc3"}, + {file = "murmurhash-1.0.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3310101004d9e2e0530c2fed30174448d998ffd1b50dcbfb7677e95db101aa4b"}, + {file = "murmurhash-1.0.10-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c65401a6f1778676253cbf89c1f45a8a7feb7d73038e483925df7d5943c08ed9"}, + {file = "murmurhash-1.0.10-cp310-cp310-win_amd64.whl", hash = "sha256:f23f2dfc7174de2cdc5007c0771ab8376a2a3f48247f32cac4a5563e40c6adcc"}, + {file = "murmurhash-1.0.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90ed37ee2cace9381b83d56068334f77e3e30bc521169a1f886a2a2800e965d6"}, + {file = "murmurhash-1.0.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:22e9926fdbec9d24ced9b0a42f0fee68c730438be3cfb00c2499fd495caec226"}, + {file = "murmurhash-1.0.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54bfbfd68baa99717239b8844600db627f336a08b1caf4df89762999f681cdd1"}, + {file = "murmurhash-1.0.10-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18b9d200a09d48ef67f6840b77c14f151f2b6c48fd69661eb75c7276ebdb146c"}, + {file = "murmurhash-1.0.10-cp311-cp311-win_amd64.whl", hash = "sha256:e5d7cfe392c0a28129226271008e61e77bf307afc24abf34f386771daa7b28b0"}, + {file = "murmurhash-1.0.10-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:96f0a070344d4802ea76a160e0d4c88b7dc10454d2426f48814482ba60b38b9e"}, + {file = "murmurhash-1.0.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9f61862060d677c84556610ac0300a0776cb13cb3155f5075ed97e80f86e55d9"}, + {file = "murmurhash-1.0.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b3b6d2d877d8881a08be66d906856d05944be0faf22b9a0390338bcf45299989"}, + {file = "murmurhash-1.0.10-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8f54b0031d8696fed17ed6e9628f339cdea0ba2367ca051e18ff59193f52687"}, + {file = "murmurhash-1.0.10-cp312-cp312-win_amd64.whl", hash = "sha256:97e09d675de2359e586f09de1d0de1ab39f9911edffc65c9255fb5e04f7c1f85"}, + {file = "murmurhash-1.0.10-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b64e5332932993fef598e78d633b1ba664789ab73032ed511f3dc615a631a1a"}, + {file = "murmurhash-1.0.10-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e2a38437a8497e082408aa015c6d90554b9e00c2c221fdfa79728a2d99a739e"}, + {file = "murmurhash-1.0.10-cp36-cp36m-win_amd64.whl", hash = "sha256:55f4e4f9291a53c36070330950b472d72ba7d331e4ce3ce1ab349a4f458f7bc4"}, + {file = "murmurhash-1.0.10-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:16ef9f0855952493fe08929d23865425906a8c0c40607ac8a949a378652ba6a9"}, + {file = "murmurhash-1.0.10-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cc3351ae92b89c2fcdc6e41ac6f17176dbd9b3554c96109fd0713695d8663e7"}, + {file = "murmurhash-1.0.10-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6559fef7c2e7349a42a63549067709b656d6d1580752bd76be1541d8b2d65718"}, + {file = "murmurhash-1.0.10-cp37-cp37m-win_amd64.whl", hash = "sha256:8bf49e3bb33febb7057ae3a5d284ef81243a1e55eaa62bdcd79007cddbdc0461"}, + {file = "murmurhash-1.0.10-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f1605fde07030516eb63d77a598dd164fb9bf217fd937dbac588fe7e47a28c40"}, + {file = "murmurhash-1.0.10-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4904f7e68674a64eb2b08823c72015a5e14653e0b4b109ea00c652a005a59bad"}, + {file = "murmurhash-1.0.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0438f0cb44cf1cd26251f72c1428213c4197d40a4e3f48b1efc3aea12ce18517"}, + {file = "murmurhash-1.0.10-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db1171a3f9a10571931764cdbfaa5371f4cf5c23c680639762125cb075b833a5"}, + {file = "murmurhash-1.0.10-cp38-cp38-win_amd64.whl", hash = "sha256:1c9fbcd7646ad8ba67b895f71d361d232c6765754370ecea473dd97d77afe99f"}, + {file = "murmurhash-1.0.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7024ab3498434f22f8e642ae31448322ad8228c65c8d9e5dc2d563d57c14c9b8"}, + {file = "murmurhash-1.0.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a99dedfb7f0cc5a4cd76eb409ee98d3d50eba024f934e705914f6f4d765aef2c"}, + {file = "murmurhash-1.0.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b580b8503647de5dd7972746b7613ea586270f17ac92a44872a9b1b52c36d68"}, + {file = "murmurhash-1.0.10-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d75840212bf75eb1352c946c3cf1622dacddd6d6bdda34368237d1eb3568f23a"}, + {file = "murmurhash-1.0.10-cp39-cp39-win_amd64.whl", hash = "sha256:a4209962b9f85de397c3203ea4b3a554da01ae9fd220fdab38757d4e9eba8d1a"}, + {file = "murmurhash-1.0.10.tar.gz", hash = "sha256:5282aab1317804c6ebd6dd7f69f15ba9075aee671c44a34be2bde0f1b11ef88a"}, +] + +[[package]] +name = "mypy" +version = "0.950" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "mypy-0.950-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b"}, + {file = "mypy-0.950-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0"}, + {file = "mypy-0.950-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22"}, + {file = "mypy-0.950-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb"}, + {file = "mypy-0.950-cp310-cp310-win_amd64.whl", hash = "sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334"}, + {file = "mypy-0.950-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f"}, + {file = "mypy-0.950-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc"}, + {file = "mypy-0.950-cp36-cp36m-win_amd64.whl", hash = "sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2"}, + {file = "mypy-0.950-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed"}, + {file = "mypy-0.950-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075"}, + {file = "mypy-0.950-cp37-cp37m-win_amd64.whl", hash = "sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b"}, + {file = "mypy-0.950-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d"}, + {file = "mypy-0.950-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a"}, + {file = "mypy-0.950-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605"}, + {file = "mypy-0.950-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2"}, + {file = "mypy-0.950-cp38-cp38-win_amd64.whl", hash = "sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff"}, + {file = "mypy-0.950-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8"}, + {file = "mypy-0.950-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038"}, + {file = "mypy-0.950-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2"}, + {file = "mypy-0.950-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519"}, + {file = "mypy-0.950-cp39-cp39-win_amd64.whl", hash = "sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef"}, + {file = "mypy-0.950-py3-none-any.whl", hash = "sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb"}, + {file = "mypy-0.950.tar.gz", hash = "sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de"}, +] + +[package.dependencies] +mypy-extensions = ">=0.4.3" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""} +typing-extensions = ">=3.10" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +python2 = ["typed-ast (>=1.4.0,<2)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "nanotime" +version = "0.5.2" +description = "nanotime python implementation" +optional = false +python-versions = "*" +files = [ + {file = "nanotime-0.5.2.tar.gz", hash = "sha256:c7cc231fc5f6db401b448d7ab51c96d0a4733f4b69fabe569a576f89ffdf966b"}, +] + +[[package]] +name = "networkx" +version = "3.1" +description = "Python package for creating and manipulating graphs and networks" +optional = false +python-versions = ">=3.8" +files = [ + {file = "networkx-3.1-py3-none-any.whl", hash = "sha256:4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36"}, + {file = "networkx-3.1.tar.gz", hash = "sha256:de346335408f84de0eada6ff9fafafff9bcda11f0a0dfaa931133debb146ab61"}, +] + +[package.extras] +default = ["matplotlib (>=3.4)", "numpy (>=1.20)", "pandas (>=1.3)", "scipy (>=1.8)"] +developer = ["mypy (>=1.1)", "pre-commit (>=3.2)"] +doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.13)", "sphinx (>=6.1)", "sphinx-gallery (>=0.12)", "texext (>=0.6.7)"] +extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.10)", "sympy (>=1.10)"] +test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] + +[[package]] +name = "nodeenv" +version = "1.8.0" +description = "Node.js virtual environment builder" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" +files = [ + {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"}, + {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"}, +] + +[package.dependencies] +setuptools = "*" + +[[package]] +name = "numpy" +version = "1.21.1" +description = "NumPy is the fundamental package for array computing with Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "numpy-1.21.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:38e8648f9449a549a7dfe8d8755a5979b45b3538520d1e735637ef28e8c2dc50"}, + {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fd7d7409fa643a91d0a05c7554dd68aa9c9bb16e186f6ccfe40d6e003156e33a"}, + {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a75b4498b1e93d8b700282dc8e655b8bd559c0904b3910b144646dbbbc03e062"}, + {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1412aa0aec3e00bc23fbb8664d76552b4efde98fb71f60737c83efbac24112f1"}, + {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e46ceaff65609b5399163de5893d8f2a82d3c77d5e56d976c8b5fb01faa6b671"}, + {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c6a2324085dd52f96498419ba95b5777e40b6bcbc20088fddb9e8cbb58885e8e"}, + {file = "numpy-1.21.1-cp37-cp37m-win32.whl", hash = "sha256:73101b2a1fef16602696d133db402a7e7586654682244344b8329cdcbbb82172"}, + {file = "numpy-1.21.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7a708a79c9a9d26904d1cca8d383bf869edf6f8e7650d85dbc77b041e8c5a0f8"}, + {file = "numpy-1.21.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95b995d0c413f5d0428b3f880e8fe1660ff9396dcd1f9eedbc311f37b5652e16"}, + {file = "numpy-1.21.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:635e6bd31c9fb3d475c8f44a089569070d10a9ef18ed13738b03049280281267"}, + {file = "numpy-1.21.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4a3d5fb89bfe21be2ef47c0614b9c9c707b7362386c9a3ff1feae63e0267ccb6"}, + {file = "numpy-1.21.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8a326af80e86d0e9ce92bcc1e65c8ff88297de4fa14ee936cb2293d414c9ec63"}, + {file = "numpy-1.21.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:791492091744b0fe390a6ce85cc1bf5149968ac7d5f0477288f78c89b385d9af"}, + {file = "numpy-1.21.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0318c465786c1f63ac05d7c4dbcecd4d2d7e13f0959b01b534ea1e92202235c5"}, + {file = "numpy-1.21.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a513bd9c1551894ee3d31369f9b07460ef223694098cf27d399513415855b68"}, + {file = "numpy-1.21.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:91c6f5fc58df1e0a3cc0c3a717bb3308ff850abdaa6d2d802573ee2b11f674a8"}, + {file = "numpy-1.21.1-cp38-cp38-win32.whl", hash = "sha256:978010b68e17150db8765355d1ccdd450f9fc916824e8c4e35ee620590e234cd"}, + {file = "numpy-1.21.1-cp38-cp38-win_amd64.whl", hash = "sha256:9749a40a5b22333467f02fe11edc98f022133ee1bfa8ab99bda5e5437b831214"}, + {file = "numpy-1.21.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d7a4aeac3b94af92a9373d6e77b37691b86411f9745190d2c351f410ab3a791f"}, + {file = "numpy-1.21.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d9e7912a56108aba9b31df688a4c4f5cb0d9d3787386b87d504762b6754fbb1b"}, + {file = "numpy-1.21.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:25b40b98ebdd272bc3020935427a4530b7d60dfbe1ab9381a39147834e985eac"}, + {file = "numpy-1.21.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8a92c5aea763d14ba9d6475803fc7904bda7decc2a0a68153f587ad82941fec1"}, + {file = "numpy-1.21.1-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05a0f648eb28bae4bcb204e6fd14603de2908de982e761a2fc78efe0f19e96e1"}, + {file = "numpy-1.21.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f01f28075a92eede918b965e86e8f0ba7b7797a95aa8d35e1cc8821f5fc3ad6a"}, + {file = "numpy-1.21.1-cp39-cp39-win32.whl", hash = "sha256:88c0b89ad1cc24a5efbb99ff9ab5db0f9a86e9cc50240177a571fbe9c2860ac2"}, + {file = "numpy-1.21.1-cp39-cp39-win_amd64.whl", hash = "sha256:01721eefe70544d548425a07c80be8377096a54118070b8a62476866d5208e33"}, + {file = "numpy-1.21.1-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2d4d1de6e6fb3d28781c73fbde702ac97f03d79e4ffd6598b880b2d95d62ead4"}, + {file = "numpy-1.21.1.zip", hash = "sha256:dff4af63638afcc57a3dfb9e4b26d434a7a602d225b42d746ea7fe2edf1342fd"}, +] + +[[package]] +name = "omegaconf" +version = "2.3.0" +description = "A flexible configuration library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b"}, + {file = "omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7"}, +] + +[package.dependencies] +antlr4-python3-runtime = "==4.9.*" +PyYAML = ">=5.1.0" + +[[package]] +name = "orjson" +version = "3.9.10" +description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" +optional = false +python-versions = ">=3.8" +files = [ + {file = "orjson-3.9.10-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:c18a4da2f50050a03d1da5317388ef84a16013302a5281d6f64e4a3f406aabc4"}, + {file = "orjson-3.9.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5148bab4d71f58948c7c39d12b14a9005b6ab35a0bdf317a8ade9a9e4d9d0bd5"}, + {file = "orjson-3.9.10-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4cf7837c3b11a2dfb589f8530b3cff2bd0307ace4c301e8997e95c7468c1378e"}, + {file = "orjson-3.9.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c62b6fa2961a1dcc51ebe88771be5319a93fd89bd247c9ddf732bc250507bc2b"}, + {file = "orjson-3.9.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:deeb3922a7a804755bbe6b5be9b312e746137a03600f488290318936c1a2d4dc"}, + {file = "orjson-3.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1234dc92d011d3554d929b6cf058ac4a24d188d97be5e04355f1b9223e98bbe9"}, + {file = "orjson-3.9.10-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:06ad5543217e0e46fd7ab7ea45d506c76f878b87b1b4e369006bdb01acc05a83"}, + {file = "orjson-3.9.10-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4fd72fab7bddce46c6826994ce1e7de145ae1e9e106ebb8eb9ce1393ca01444d"}, + {file = "orjson-3.9.10-cp310-none-win32.whl", hash = "sha256:b5b7d4a44cc0e6ff98da5d56cde794385bdd212a86563ac321ca64d7f80c80d1"}, + {file = "orjson-3.9.10-cp310-none-win_amd64.whl", hash = "sha256:61804231099214e2f84998316f3238c4c2c4aaec302df12b21a64d72e2a135c7"}, + {file = "orjson-3.9.10-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:cff7570d492bcf4b64cc862a6e2fb77edd5e5748ad715f487628f102815165e9"}, + {file = "orjson-3.9.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed8bc367f725dfc5cabeed1ae079d00369900231fbb5a5280cf0736c30e2adf7"}, + {file = "orjson-3.9.10-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c812312847867b6335cfb264772f2a7e85b3b502d3a6b0586aa35e1858528ab1"}, + {file = "orjson-3.9.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9edd2856611e5050004f4722922b7b1cd6268da34102667bd49d2a2b18bafb81"}, + {file = "orjson-3.9.10-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:674eb520f02422546c40401f4efaf8207b5e29e420c17051cddf6c02783ff5ca"}, + {file = "orjson-3.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d0dc4310da8b5f6415949bd5ef937e60aeb0eb6b16f95041b5e43e6200821fb"}, + {file = "orjson-3.9.10-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e99c625b8c95d7741fe057585176b1b8783d46ed4b8932cf98ee145c4facf499"}, + {file = "orjson-3.9.10-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ec6f18f96b47299c11203edfbdc34e1b69085070d9a3d1f302810cc23ad36bf3"}, + {file = "orjson-3.9.10-cp311-none-win32.whl", hash = "sha256:ce0a29c28dfb8eccd0f16219360530bc3cfdf6bf70ca384dacd36e6c650ef8e8"}, + {file = "orjson-3.9.10-cp311-none-win_amd64.whl", hash = "sha256:cf80b550092cc480a0cbd0750e8189247ff45457e5a023305f7ef1bcec811616"}, + {file = "orjson-3.9.10-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:602a8001bdf60e1a7d544be29c82560a7b49319a0b31d62586548835bbe2c862"}, + {file = "orjson-3.9.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f295efcd47b6124b01255d1491f9e46f17ef40d3d7eabf7364099e463fb45f0f"}, + {file = "orjson-3.9.10-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:92af0d00091e744587221e79f68d617b432425a7e59328ca4c496f774a356071"}, + {file = "orjson-3.9.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5a02360e73e7208a872bf65a7554c9f15df5fe063dc047f79738998b0506a14"}, + {file = "orjson-3.9.10-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:858379cbb08d84fe7583231077d9a36a1a20eb72f8c9076a45df8b083724ad1d"}, + {file = "orjson-3.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666c6fdcaac1f13eb982b649e1c311c08d7097cbda24f32612dae43648d8db8d"}, + {file = "orjson-3.9.10-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3fb205ab52a2e30354640780ce4587157a9563a68c9beaf52153e1cea9aa0921"}, + {file = "orjson-3.9.10-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:7ec960b1b942ee3c69323b8721df2a3ce28ff40e7ca47873ae35bfafeb4555ca"}, + {file = "orjson-3.9.10-cp312-none-win_amd64.whl", hash = "sha256:3e892621434392199efb54e69edfff9f699f6cc36dd9553c5bf796058b14b20d"}, + {file = "orjson-3.9.10-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8b9ba0ccd5a7f4219e67fbbe25e6b4a46ceef783c42af7dbc1da548eb28b6531"}, + {file = "orjson-3.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e2ecd1d349e62e3960695214f40939bbfdcaeaaa62ccc638f8e651cf0970e5f"}, + {file = "orjson-3.9.10-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7f433be3b3f4c66016d5a20e5b4444ef833a1f802ced13a2d852c637f69729c1"}, + {file = "orjson-3.9.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4689270c35d4bb3102e103ac43c3f0b76b169760aff8bcf2d401a3e0e58cdb7f"}, + {file = "orjson-3.9.10-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4bd176f528a8151a6efc5359b853ba3cc0e82d4cd1fab9c1300c5d957dc8f48c"}, + {file = "orjson-3.9.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a2ce5ea4f71681623f04e2b7dadede3c7435dfb5e5e2d1d0ec25b35530e277b"}, + {file = "orjson-3.9.10-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:49f8ad582da6e8d2cf663c4ba5bf9f83cc052570a3a767487fec6af839b0e777"}, + {file = "orjson-3.9.10-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2a11b4b1a8415f105d989876a19b173f6cdc89ca13855ccc67c18efbd7cbd1f8"}, + {file = "orjson-3.9.10-cp38-none-win32.whl", hash = "sha256:a353bf1f565ed27ba71a419b2cd3db9d6151da426b61b289b6ba1422a702e643"}, + {file = "orjson-3.9.10-cp38-none-win_amd64.whl", hash = "sha256:e28a50b5be854e18d54f75ef1bb13e1abf4bc650ab9d635e4258c58e71eb6ad5"}, + {file = "orjson-3.9.10-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:ee5926746232f627a3be1cc175b2cfad24d0170d520361f4ce3fa2fd83f09e1d"}, + {file = "orjson-3.9.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a73160e823151f33cdc05fe2cea557c5ef12fdf276ce29bb4f1c571c8368a60"}, + {file = "orjson-3.9.10-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c338ed69ad0b8f8f8920c13f529889fe0771abbb46550013e3c3d01e5174deef"}, + {file = "orjson-3.9.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5869e8e130e99687d9e4be835116c4ebd83ca92e52e55810962446d841aba8de"}, + {file = "orjson-3.9.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d2c1e559d96a7f94a4f581e2a32d6d610df5840881a8cba8f25e446f4d792df3"}, + {file = "orjson-3.9.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81a3a3a72c9811b56adf8bcc829b010163bb2fc308877e50e9910c9357e78521"}, + {file = "orjson-3.9.10-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7f8fb7f5ecf4f6355683ac6881fd64b5bb2b8a60e3ccde6ff799e48791d8f864"}, + {file = "orjson-3.9.10-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c943b35ecdf7123b2d81d225397efddf0bce2e81db2f3ae633ead38e85cd5ade"}, + {file = "orjson-3.9.10-cp39-none-win32.whl", hash = "sha256:fb0b361d73f6b8eeceba47cd37070b5e6c9de5beaeaa63a1cb35c7e1a73ef088"}, + {file = "orjson-3.9.10-cp39-none-win_amd64.whl", hash = "sha256:b90f340cb6397ec7a854157fac03f0c82b744abdd1c0941a024c3c29d1340aff"}, + {file = "orjson-3.9.10.tar.gz", hash = "sha256:9ebbdbd6a046c304b1845e96fbcc5559cd296b4dfd3ad2509e33c4d9ce07d6a1"}, +] + +[[package]] +name = "packaging" +version = "23.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, +] + +[[package]] +name = "pathspec" +version = "0.11.2" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"}, + {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, +] + +[[package]] +name = "platformdirs" +version = "3.1.1" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = ">=3.7" +files = [ + {file = "platformdirs-3.1.1-py3-none-any.whl", hash = "sha256:e5986afb596e4bb5bde29a79ac9061aa955b94fca2399b7aaac4090860920dd8"}, + {file = "platformdirs-3.1.1.tar.gz", hash = "sha256:024996549ee88ec1a9aa99ff7f8fc819bb59e2c3477b410d90a16d32d6e707aa"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.4", markers = "python_version < \"3.8\""} + +[package.extras] +docs = ["furo (>=2022.12.7)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.22,!=1.23.4)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.2.2)", "pytest (>=7.2.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"] + +[[package]] +name = "pluggy" +version = "1.2.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pluggy-1.2.0-py3-none-any.whl", hash = "sha256:c2fd55a7d7a3863cba1a013e4e2414658b1d07b6bc57b3919e0c63c9abb99849"}, + {file = "pluggy-1.2.0.tar.gz", hash = "sha256:d12f0c4b579b15f5e054301bb226ee85eeeba08ffec228092f8defbaa3a4c4b3"}, +] + +[package.dependencies] +importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pre-commit" +version = "2.21.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pre_commit-2.21.0-py2.py3-none-any.whl", hash = "sha256:e2f91727039fc39a92f58a588a25b87f936de6567eed4f0e673e0507edc75bad"}, + {file = "pre_commit-2.21.0.tar.gz", hash = "sha256:31ef31af7e474a8d8995027fefdfcf509b5c913ff31f2015b4ec4beb26a6f658"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + +[[package]] +name = "preshed" +version = "3.0.9" +description = "Cython hash table that trusts the keys are pre-hashed" +optional = false +python-versions = ">=3.6" +files = [ + {file = "preshed-3.0.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4f96ef4caf9847b2bb9868574dcbe2496f974e41c2b83d6621c24fb4c3fc57e3"}, + {file = "preshed-3.0.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a61302cf8bd30568631adcdaf9e6b21d40491bd89ba8ebf67324f98b6c2a2c05"}, + {file = "preshed-3.0.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99499e8a58f58949d3f591295a97bca4e197066049c96f5d34944dd21a497193"}, + {file = "preshed-3.0.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea6b6566997dc3acd8c6ee11a89539ac85c77275b4dcefb2dc746d11053a5af8"}, + {file = "preshed-3.0.9-cp310-cp310-win_amd64.whl", hash = "sha256:bfd523085a84b1338ff18f61538e1cfcdedc4b9e76002589a301c364d19a2e36"}, + {file = "preshed-3.0.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7c2364da27f2875524ce1ca754dc071515a9ad26eb5def4c7e69129a13c9a59"}, + {file = "preshed-3.0.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:182138033c0730c683a6d97e567ceb8a3e83f3bff5704f300d582238dbd384b3"}, + {file = "preshed-3.0.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:345a10be3b86bcc6c0591d343a6dc2bfd86aa6838c30ced4256dfcfa836c3a64"}, + {file = "preshed-3.0.9-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51d0192274aa061699b284f9fd08416065348edbafd64840c3889617ee1609de"}, + {file = "preshed-3.0.9-cp311-cp311-win_amd64.whl", hash = "sha256:96b857d7a62cbccc3845ac8c41fd23addf052821be4eb987f2eb0da3d8745aa1"}, + {file = "preshed-3.0.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b4fe6720012c62e6d550d6a5c1c7ad88cacef8388d186dad4bafea4140d9d198"}, + {file = "preshed-3.0.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e04f05758875be9751e483bd3c519c22b00d3b07f5a64441ec328bb9e3c03700"}, + {file = "preshed-3.0.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a55091d0e395f1fdb62ab43401bb9f8b46c7d7794d5b071813c29dc1ab22fd0"}, + {file = "preshed-3.0.9-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7de8f5138bcac7870424e09684dc3dd33c8e30e81b269f6c9ede3d8c7bb8e257"}, + {file = "preshed-3.0.9-cp312-cp312-win_amd64.whl", hash = "sha256:24229c77364628743bc29c5620c5d6607ed104f0e02ae31f8a030f99a78a5ceb"}, + {file = "preshed-3.0.9-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73b0f7ecc58095ebbc6ca26ec806008ef780190fe685ce471b550e7eef58dc2"}, + {file = "preshed-3.0.9-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cb90ecd5bec71c21d95962db1a7922364d6db2abe284a8c4b196df8bbcc871e"}, + {file = "preshed-3.0.9-cp36-cp36m-win_amd64.whl", hash = "sha256:e304a0a8c9d625b70ba850c59d4e67082a6be9c16c4517b97850a17a282ebee6"}, + {file = "preshed-3.0.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1fa6d3d5529b08296ff9b7b4da1485c080311fd8744bbf3a86019ff88007b382"}, + {file = "preshed-3.0.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef1e5173809d85edd420fc79563b286b88b4049746b797845ba672cf9435c0e7"}, + {file = "preshed-3.0.9-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fe81eb21c7d99e8b9a802cc313b998c5f791bda592903c732b607f78a6b7dc4"}, + {file = "preshed-3.0.9-cp37-cp37m-win_amd64.whl", hash = "sha256:78590a4a952747c3766e605ce8b747741005bdb1a5aa691a18aae67b09ece0e6"}, + {file = "preshed-3.0.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3452b64d97ce630e200c415073040aa494ceec6b7038f7a2a3400cbd7858e952"}, + {file = "preshed-3.0.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ac970d97b905e9e817ec13d31befd5b07c9cfec046de73b551d11a6375834b79"}, + {file = "preshed-3.0.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eebaa96ece6641cd981491cba995b68c249e0b6877c84af74971eacf8990aa19"}, + {file = "preshed-3.0.9-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d473c5f6856e07a88d41fe00bb6c206ecf7b34c381d30de0b818ba2ebaf9406"}, + {file = "preshed-3.0.9-cp38-cp38-win_amd64.whl", hash = "sha256:0de63a560f10107a3f0a9e252cc3183b8fdedcb5f81a86938fd9f1dcf8a64adf"}, + {file = "preshed-3.0.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3a9ad9f738084e048a7c94c90f40f727217387115b2c9a95c77f0ce943879fcd"}, + {file = "preshed-3.0.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a671dfa30b67baa09391faf90408b69c8a9a7f81cb9d83d16c39a182355fbfce"}, + {file = "preshed-3.0.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23906d114fc97c17c5f8433342495d7562e96ecfd871289c2bb2ed9a9df57c3f"}, + {file = "preshed-3.0.9-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:778cf71f82cedd2719b256f3980d556d6fb56ec552334ba79b49d16e26e854a0"}, + {file = "preshed-3.0.9-cp39-cp39-win_amd64.whl", hash = "sha256:a6e579439b329eb93f32219ff27cb358b55fbb52a4862c31a915a098c8a22ac2"}, + {file = "preshed-3.0.9.tar.gz", hash = "sha256:721863c5244ffcd2651ad0928951a2c7c77b102f4e11a251ad85d37ee7621660"}, +] + +[package.dependencies] +cymem = ">=2.0.2,<2.1.0" +murmurhash = ">=0.28.0,<1.1.0" + +[[package]] +name = "prompt-toolkit" +version = "3.0.41" +description = "Library for building powerful interactive command lines in Python" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "prompt_toolkit-3.0.41-py3-none-any.whl", hash = "sha256:f36fe301fafb7470e86aaf90f036eef600a3210be4decf461a5b1ca8403d3cb2"}, + {file = "prompt_toolkit-3.0.41.tar.gz", hash = "sha256:941367d97fc815548822aa26c2a269fdc4eb21e9ec05fc5d447cf09bad5d75f0"}, +] + +[package.dependencies] +wcwidth = "*" + +[[package]] +name = "psutil" +version = "5.9.6" +description = "Cross-platform lib for process and system monitoring in Python." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ + {file = "psutil-5.9.6-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:fb8a697f11b0f5994550555fcfe3e69799e5b060c8ecf9e2f75c69302cc35c0d"}, + {file = "psutil-5.9.6-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:91ecd2d9c00db9817a4b4192107cf6954addb5d9d67a969a4f436dbc9200f88c"}, + {file = "psutil-5.9.6-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:10e8c17b4f898d64b121149afb136c53ea8b68c7531155147867b7b1ac9e7e28"}, + {file = "psutil-5.9.6-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:18cd22c5db486f33998f37e2bb054cc62fd06646995285e02a51b1e08da97017"}, + {file = "psutil-5.9.6-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:ca2780f5e038379e520281e4c032dddd086906ddff9ef0d1b9dcf00710e5071c"}, + {file = "psutil-5.9.6-cp27-none-win32.whl", hash = "sha256:70cb3beb98bc3fd5ac9ac617a327af7e7f826373ee64c80efd4eb2856e5051e9"}, + {file = "psutil-5.9.6-cp27-none-win_amd64.whl", hash = "sha256:51dc3d54607c73148f63732c727856f5febec1c7c336f8f41fcbd6315cce76ac"}, + {file = "psutil-5.9.6-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c69596f9fc2f8acd574a12d5f8b7b1ba3765a641ea5d60fb4736bf3c08a8214a"}, + {file = "psutil-5.9.6-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92e0cc43c524834af53e9d3369245e6cc3b130e78e26100d1f63cdb0abeb3d3c"}, + {file = "psutil-5.9.6-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:748c9dd2583ed86347ed65d0035f45fa8c851e8d90354c122ab72319b5f366f4"}, + {file = "psutil-5.9.6-cp36-cp36m-win32.whl", hash = "sha256:3ebf2158c16cc69db777e3c7decb3c0f43a7af94a60d72e87b2823aebac3d602"}, + {file = "psutil-5.9.6-cp36-cp36m-win_amd64.whl", hash = "sha256:ff18b8d1a784b810df0b0fff3bcb50ab941c3b8e2c8de5726f9c71c601c611aa"}, + {file = "psutil-5.9.6-cp37-abi3-win32.whl", hash = "sha256:a6f01f03bf1843280f4ad16f4bde26b817847b4c1a0db59bf6419807bc5ce05c"}, + {file = "psutil-5.9.6-cp37-abi3-win_amd64.whl", hash = "sha256:6e5fb8dc711a514da83098bc5234264e551ad980cec5f85dabf4d38ed6f15e9a"}, + {file = "psutil-5.9.6-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:daecbcbd29b289aac14ece28eca6a3e60aa361754cf6da3dfb20d4d32b6c7f57"}, + {file = "psutil-5.9.6.tar.gz", hash = "sha256:e4b92ddcd7dd4cdd3f900180ea1e104932c7bce234fb88976e2a3b296441225a"}, +] + +[package.extras] +test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] + +[[package]] +name = "py4j" +version = "0.10.7" +description = "Enables Python programs to dynamically access arbitrary Java objects" +optional = false +python-versions = "*" +files = [ + {file = "py4j-0.10.7-py2.py3-none-any.whl", hash = "sha256:a950fe7de1bfd247a0a4dddb9118f332d22a89e01e0699135ea8038c15ee1293"}, + {file = "py4j-0.10.7.zip", hash = "sha256:721189616b3a7d28212dfb2e7c6a1dd5147b03105f1fc37ff2432acd0e863fa5"}, +] + +[[package]] +name = "pybtex" +version = "0.24.0" +description = "A BibTeX-compatible bibliography processor in Python" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*" +files = [ + {file = "pybtex-0.24.0-py2.py3-none-any.whl", hash = "sha256:e1e0c8c69998452fea90e9179aa2a98ab103f3eed894405b7264e517cc2fcc0f"}, + {file = "pybtex-0.24.0.tar.gz", hash = "sha256:818eae35b61733e5c007c3fcd2cfb75ed1bc8b4173c1f70b56cc4c0802d34755"}, +] + +[package.dependencies] +latexcodec = ">=1.0.4" +PyYAML = ">=3.01" +six = "*" + +[package.extras] +test = ["pytest"] + +[[package]] +name = "pycparser" +version = "2.21" +description = "C parser in Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, + {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, +] + +[[package]] +name = "pydantic" +version = "1.10.13" +description = "Data validation and settings management using python type hints" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic-1.10.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:efff03cc7a4f29d9009d1c96ceb1e7a70a65cfe86e89d34e4a5f2ab1e5693737"}, + {file = "pydantic-1.10.13-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3ecea2b9d80e5333303eeb77e180b90e95eea8f765d08c3d278cd56b00345d01"}, + {file = "pydantic-1.10.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1740068fd8e2ef6eb27a20e5651df000978edce6da6803c2bef0bc74540f9548"}, + {file = "pydantic-1.10.13-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84bafe2e60b5e78bc64a2941b4c071a4b7404c5c907f5f5a99b0139781e69ed8"}, + {file = "pydantic-1.10.13-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bc0898c12f8e9c97f6cd44c0ed70d55749eaf783716896960b4ecce2edfd2d69"}, + {file = "pydantic-1.10.13-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:654db58ae399fe6434e55325a2c3e959836bd17a6f6a0b6ca8107ea0571d2e17"}, + {file = "pydantic-1.10.13-cp310-cp310-win_amd64.whl", hash = "sha256:75ac15385a3534d887a99c713aa3da88a30fbd6204a5cd0dc4dab3d770b9bd2f"}, + {file = "pydantic-1.10.13-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c553f6a156deb868ba38a23cf0df886c63492e9257f60a79c0fd8e7173537653"}, + {file = "pydantic-1.10.13-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5e08865bc6464df8c7d61439ef4439829e3ab62ab1669cddea8dd00cd74b9ffe"}, + {file = "pydantic-1.10.13-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e31647d85a2013d926ce60b84f9dd5300d44535a9941fe825dc349ae1f760df9"}, + {file = "pydantic-1.10.13-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:210ce042e8f6f7c01168b2d84d4c9eb2b009fe7bf572c2266e235edf14bacd80"}, + {file = "pydantic-1.10.13-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8ae5dd6b721459bfa30805f4c25880e0dd78fc5b5879f9f7a692196ddcb5a580"}, + {file = "pydantic-1.10.13-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f8e81fc5fb17dae698f52bdd1c4f18b6ca674d7068242b2aff075f588301bbb0"}, + {file = "pydantic-1.10.13-cp311-cp311-win_amd64.whl", hash = "sha256:61d9dce220447fb74f45e73d7ff3b530e25db30192ad8d425166d43c5deb6df0"}, + {file = "pydantic-1.10.13-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4b03e42ec20286f052490423682016fd80fda830d8e4119f8ab13ec7464c0132"}, + {file = "pydantic-1.10.13-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f59ef915cac80275245824e9d771ee939133be38215555e9dc90c6cb148aaeb5"}, + {file = "pydantic-1.10.13-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5a1f9f747851338933942db7af7b6ee8268568ef2ed86c4185c6ef4402e80ba8"}, + {file = "pydantic-1.10.13-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:97cce3ae7341f7620a0ba5ef6cf043975cd9d2b81f3aa5f4ea37928269bc1b87"}, + {file = "pydantic-1.10.13-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:854223752ba81e3abf663d685f105c64150873cc6f5d0c01d3e3220bcff7d36f"}, + {file = "pydantic-1.10.13-cp37-cp37m-win_amd64.whl", hash = "sha256:b97c1fac8c49be29486df85968682b0afa77e1b809aff74b83081cc115e52f33"}, + {file = "pydantic-1.10.13-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c958d053453a1c4b1c2062b05cd42d9d5c8eb67537b8d5a7e3c3032943ecd261"}, + {file = "pydantic-1.10.13-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4c5370a7edaac06daee3af1c8b1192e305bc102abcbf2a92374b5bc793818599"}, + {file = "pydantic-1.10.13-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d6f6e7305244bddb4414ba7094ce910560c907bdfa3501e9db1a7fd7eaea127"}, + {file = "pydantic-1.10.13-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3a3c792a58e1622667a2837512099eac62490cdfd63bd407993aaf200a4cf1f"}, + {file = "pydantic-1.10.13-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c636925f38b8db208e09d344c7aa4f29a86bb9947495dd6b6d376ad10334fb78"}, + {file = "pydantic-1.10.13-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:678bcf5591b63cc917100dc50ab6caebe597ac67e8c9ccb75e698f66038ea953"}, + {file = "pydantic-1.10.13-cp38-cp38-win_amd64.whl", hash = "sha256:6cf25c1a65c27923a17b3da28a0bdb99f62ee04230c931d83e888012851f4e7f"}, + {file = "pydantic-1.10.13-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8ef467901d7a41fa0ca6db9ae3ec0021e3f657ce2c208e98cd511f3161c762c6"}, + {file = "pydantic-1.10.13-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:968ac42970f57b8344ee08837b62f6ee6f53c33f603547a55571c954a4225691"}, + {file = "pydantic-1.10.13-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9849f031cf8a2f0a928fe885e5a04b08006d6d41876b8bbd2fc68a18f9f2e3fd"}, + {file = "pydantic-1.10.13-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:56e3ff861c3b9c6857579de282ce8baabf443f42ffba355bf070770ed63e11e1"}, + {file = "pydantic-1.10.13-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f00790179497767aae6bcdc36355792c79e7bbb20b145ff449700eb076c5f96"}, + {file = "pydantic-1.10.13-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:75b297827b59bc229cac1a23a2f7a4ac0031068e5be0ce385be1462e7e17a35d"}, + {file = "pydantic-1.10.13-cp39-cp39-win_amd64.whl", hash = "sha256:e70ca129d2053fb8b728ee7d1af8e553a928d7e301a311094b8a0501adc8763d"}, + {file = "pydantic-1.10.13-py3-none-any.whl", hash = "sha256:b87326822e71bd5f313e7d3bfdc77ac3247035ac10b0c0618bd99dcf95b1e687"}, + {file = "pydantic-1.10.13.tar.gz", hash = "sha256:32c8b48dcd3b2ac4e78b0ba4af3a2c2eb6048cb75202f0ea7b34feb740efc340"}, +] + +[package.dependencies] +typing-extensions = ">=4.2.0" + +[package.extras] +dotenv = ["python-dotenv (>=0.10.4)"] +email = ["email-validator (>=1.0.3)"] + +[[package]] +name = "pydot" +version = "1.4.2" +description = "Python interface to Graphviz's Dot" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pydot-1.4.2-py2.py3-none-any.whl", hash = "sha256:66c98190c65b8d2e2382a441b4c0edfdb4f4c025ef9cb9874de478fb0793a451"}, + {file = "pydot-1.4.2.tar.gz", hash = "sha256:248081a39bcb56784deb018977e428605c1c758f10897a339fce1dd728ff007d"}, +] + +[package.dependencies] +pyparsing = ">=2.1.4" + +[[package]] +name = "pygit2" +version = "1.13.2" +description = "Python bindings for libgit2." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pygit2-1.13.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:781aefab7efc464852e688965bf3b4acc7af951cebea174d69f86b213aa5d5fb"}, + {file = "pygit2-1.13.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3038b5ecef43e2c853e7cf405676241e0395bb37b37ae477ef3b73a91f12378"}, + {file = "pygit2-1.13.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c00927a2626325b64ebc9f860f024a3ae0b4c036663f6ada8d5de0e2393560ca"}, + {file = "pygit2-1.13.2-cp310-cp310-win32.whl", hash = "sha256:6988fc6cf99a3dbc03bd64060888c3b194ee27c810cb61624519ee3813f2da3d"}, + {file = "pygit2-1.13.2-cp310-cp310-win_amd64.whl", hash = "sha256:aec3df351b722ec7cdf7a7e642e421e3a15f3f2e3a51e57380d62d4992acf36d"}, + {file = "pygit2-1.13.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0eb53cc5212fad90e36693c0cd2ffd0d470efaea2506ce1c0d04f8d7fcf6767c"}, + {file = "pygit2-1.13.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32803ec881cd8f7dba91e03927e1fb13857e795bbe85cd3ec156b4798b933294"}, + {file = "pygit2-1.13.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba7297204e72c5cfdcd7a0c0d318af9d654a1d79b1cfe6cc8330570c749bec1f"}, + {file = "pygit2-1.13.2-cp311-cp311-win32.whl", hash = "sha256:2291707e648f5bba5b5c5e7ed652bc4563bd520718eb31e19525ccaceba5503c"}, + {file = "pygit2-1.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:96e534e92e485c4c1d4c3e151ce960655fed38ab9a1d65e2b16650cf24b3e088"}, + {file = "pygit2-1.13.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:75f3b6d754d91dbe47b27b53d5a4440d861906b2f476284e6fb7c46cafe244d7"}, + {file = "pygit2-1.13.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30e145730dc65a9b902a889efdca0126d6b274c0b14427ebb085e090b50f6470"}, + {file = "pygit2-1.13.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2311ca16e1d0b77bc3694407c367391c7f2f78e2f725dc858721a5d4e3635fdd"}, + {file = "pygit2-1.13.2-cp312-cp312-win32.whl", hash = "sha256:a027e06c44f987a217c6197970bb29de9fbc78524c81b1f37888711978a64ce2"}, + {file = "pygit2-1.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:9844fb5a38119a34b31012dddc9b439f81bb0411cbf4a4f8e92a044f6f3e7462"}, + {file = "pygit2-1.13.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2f3a5362c702a42e28c3bc84ff324b57676c8bfdbfab445c96f5e776873630a6"}, + {file = "pygit2-1.13.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7d5d1c3508b66e5e13883ff472b616d2d60feb7a4afea52d3b501e9f5ee5d08"}, + {file = "pygit2-1.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2507d99584c7e3976342566adf6bc48aca825c170b86f999fe7bd32f8aa1858e"}, + {file = "pygit2-1.13.2-cp38-cp38-win32.whl", hash = "sha256:acda61b726c33ada3639cac5ddc5898678f7bb7b8415e84e3ff07a2af94b1ac3"}, + {file = "pygit2-1.13.2-cp38-cp38-win_amd64.whl", hash = "sha256:4a86c4cae2e717acdd9d7ff00d196395fafe1abfc5efab5ada63650b49d5d47f"}, + {file = "pygit2-1.13.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ae9a77be5c5df5f4c9e586fbd53f1095bced2bba86ec669ead92c4c1e02f8373"}, + {file = "pygit2-1.13.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25ef1dcfb59e73f6a59491343393b6e843739cbc92e8088a551c73cd367a54d0"}, + {file = "pygit2-1.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b88d21ed961fe422275c9c20d2613e6ecff2fa8127ac7620a29aba1d001fc41"}, + {file = "pygit2-1.13.2-cp39-cp39-win32.whl", hash = "sha256:14b458af1e8c6b634d55110edeab055e3bd9075543792cb75d2fdb8b434c202a"}, + {file = "pygit2-1.13.2-cp39-cp39-win_amd64.whl", hash = "sha256:565b311c783a07768b91382620ad2b474fe40778411cb18c576f667be43d1299"}, + {file = "pygit2-1.13.2.tar.gz", hash = "sha256:75c7eb86b47c70f6f1434bcf3b5eb41f4e8006a15cee6bef606651b97d23788c"}, +] + +[package.dependencies] +cffi = ">=1.16.0" +setuptools = {version = "*", markers = "python_version >= \"3.12\""} + +[[package]] +name = "pygments" +version = "2.16.1" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"}, + {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"}, +] + +[package.extras] +plugins = ["importlib-metadata"] + +[[package]] +name = "pygtrie" +version = "2.5.0" +description = "A pure Python trie data structure implementation." +optional = false +python-versions = "*" +files = [ + {file = "pygtrie-2.5.0-py3-none-any.whl", hash = "sha256:8795cda8105493d5ae159a5bef313ff13156c5d4d72feddefacaad59f8c8ce16"}, + {file = "pygtrie-2.5.0.tar.gz", hash = "sha256:203514ad826eb403dab1d2e2ddd034e0d1534bbe4dbe0213bb0593f66beba4e2"}, +] + +[[package]] +name = "pymdown-extensions" +version = "10.2.1" +description = "Extension pack for Python Markdown." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pymdown_extensions-10.2.1-py3-none-any.whl", hash = "sha256:bded105eb8d93f88f2f821f00108cb70cef1269db6a40128c09c5f48bfc60ea4"}, + {file = "pymdown_extensions-10.2.1.tar.gz", hash = "sha256:d0c534b4a5725a4be7ccef25d65a4c97dba58b54ad7c813babf0eb5ba9c81591"}, +] + +[package.dependencies] +markdown = ">=3.2" +pyyaml = "*" + +[package.extras] +extra = ["pygments (>=2.12)"] + +[[package]] +name = "pypandoc" +version = "1.12" +description = "Thin wrapper for pandoc." +optional = false +python-versions = ">=3.6" +files = [ + {file = "pypandoc-1.12-py3-none-any.whl", hash = "sha256:efb4f7d68ead8bec32e22b62f02d5608a1700978b51bfc4af286fd6acfe9d218"}, + {file = "pypandoc-1.12.tar.gz", hash = "sha256:8f44740a9f074e121d81b489f073160421611d4ead62d1b306aeb11aab3c32df"}, +] + +[[package]] +name = "pyparsing" +version = "3.1.1" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = false +python-versions = ">=3.6.8" +files = [ + {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"}, + {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + +[[package]] +name = "pyspark" +version = "2.4.3" +description = "Apache Spark Python API" +optional = false +python-versions = "*" +files = [ + {file = "pyspark-2.4.3.tar.gz", hash = "sha256:6839718ce9f779e81153d8a14a843a5c4b2d5e6574f3c916aec241022d717cb2"}, +] + +[package.dependencies] +py4j = "0.10.7" + +[package.extras] +ml = ["numpy (>=1.7)"] +mllib = ["numpy (>=1.7)"] +sql = ["pandas (>=0.19.2)", "pyarrow (>=0.8.0)"] + +[[package]] +name = "pytest" +version = "7.4.3" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.3-py3-none-any.whl", hash = "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac"}, + {file = "pytest-7.4.3.tar.gz", hash = "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-cov" +version = "3.0.0" +description = "Pytest plugin for measuring coverage." +optional = false +python-versions = ">=3.6" +files = [ + {file = "pytest-cov-3.0.0.tar.gz", hash = "sha256:e7f0f5b1617d2210a2cabc266dfe2f4c75a8d32fb89eafb7ad9d06f6d076d470"}, + {file = "pytest_cov-3.0.0-py3-none-any.whl", hash = "sha256:578d5d15ac4a25e5f961c938b85a05b09fdaae9deef3bb6de9a6e766622ca7a6"}, +] + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytkdocs" +version = "0.16.1" +description = "Load Python objects documentation." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytkdocs-0.16.1-py3-none-any.whl", hash = "sha256:a8c3f46ecef0b92864cc598e9101e9c4cf832ebbf228f50c84aa5dd850aac379"}, + {file = "pytkdocs-0.16.1.tar.gz", hash = "sha256:e2ccf6dfe9dbbceb09818673f040f1a7c32ed0bffb2d709b06be6453c4026045"}, +] + +[package.dependencies] +astunparse = {version = ">=1.6", markers = "python_version < \"3.9\""} +cached-property = {version = ">=1.5", markers = "python_version < \"3.8\""} +typing-extensions = {version = ">=3.7", markers = "python_version < \"3.8\""} + +[package.extras] +numpy-style = ["docstring_parser (>=0.7)"] + +[[package]] +name = "pywin32" +version = "306" +description = "Python for Window Extensions" +optional = false +python-versions = "*" +files = [ + {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, + {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, + {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, + {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, + {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, + {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, + {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, + {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, + {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, + {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, + {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, + {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, + {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, + {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, +] + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "pyyaml-env-tag" +version = "0.1" +description = "A custom YAML tag for referencing environment variables in YAML files. " +optional = false +python-versions = ">=3.6" +files = [ + {file = "pyyaml_env_tag-0.1-py3-none-any.whl", hash = "sha256:af31106dec8a4d68c60207c1886031cbf839b68aa7abccdb19868200532c2069"}, + {file = "pyyaml_env_tag-0.1.tar.gz", hash = "sha256:70092675bda14fdec33b31ba77e7543de9ddc88f2e5b99160396572d11525bdb"}, +] + +[package.dependencies] +pyyaml = "*" + +[[package]] +name = "regex" +version = "2023.10.3" +description = "Alternative regular expression module, to replace re." +optional = false +python-versions = ">=3.7" +files = [ + {file = "regex-2023.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c34d4f73ea738223a094d8e0ffd6d2c1a1b4c175da34d6b0de3d8d69bee6bcc"}, + {file = "regex-2023.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8f4e49fc3ce020f65411432183e6775f24e02dff617281094ba6ab079ef0915"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cd1bccf99d3ef1ab6ba835308ad85be040e6a11b0977ef7ea8c8005f01a3c29"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:81dce2ddc9f6e8f543d94b05d56e70d03a0774d32f6cca53e978dc01e4fc75b8"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c6b4d23c04831e3ab61717a707a5d763b300213db49ca680edf8bf13ab5d91b"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c15ad0aee158a15e17e0495e1e18741573d04eb6da06d8b84af726cfc1ed02ee"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6239d4e2e0b52c8bd38c51b760cd870069f0bdf99700a62cd509d7a031749a55"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4a8bf76e3182797c6b1afa5b822d1d5802ff30284abe4599e1247be4fd6b03be"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d9c727bbcf0065cbb20f39d2b4f932f8fa1631c3e01fcedc979bd4f51fe051c5"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3ccf2716add72f80714b9a63899b67fa711b654be3fcdd34fa391d2d274ce767"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:107ac60d1bfdc3edb53be75e2a52aff7481b92817cfdddd9b4519ccf0e54a6ff"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:00ba3c9818e33f1fa974693fb55d24cdc8ebafcb2e4207680669d8f8d7cca79a"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f0a47efb1dbef13af9c9a54a94a0b814902e547b7f21acb29434504d18f36e3a"}, + {file = "regex-2023.10.3-cp310-cp310-win32.whl", hash = "sha256:36362386b813fa6c9146da6149a001b7bd063dabc4d49522a1f7aa65b725c7ec"}, + {file = "regex-2023.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:c65a3b5330b54103e7d21cac3f6bf3900d46f6d50138d73343d9e5b2900b2353"}, + {file = "regex-2023.10.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90a79bce019c442604662d17bf69df99090e24cdc6ad95b18b6725c2988a490e"}, + {file = "regex-2023.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c7964c2183c3e6cce3f497e3a9f49d182e969f2dc3aeeadfa18945ff7bdd7051"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ef80829117a8061f974b2fda8ec799717242353bff55f8a29411794d635d964"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5addc9d0209a9afca5fc070f93b726bf7003bd63a427f65ef797a931782e7edc"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c148bec483cc4b421562b4bcedb8e28a3b84fcc8f0aa4418e10898f3c2c0eb9b"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d1f21af4c1539051049796a0f50aa342f9a27cde57318f2fc41ed50b0dbc4ac"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b9ac09853b2a3e0d0082104036579809679e7715671cfbf89d83c1cb2a30f58"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ebedc192abbc7fd13c5ee800e83a6df252bec691eb2c4bedc9f8b2e2903f5e2a"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d8a993c0a0ffd5f2d3bda23d0cd75e7086736f8f8268de8a82fbc4bd0ac6791e"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:be6b7b8d42d3090b6c80793524fa66c57ad7ee3fe9722b258aec6d0672543fd0"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4023e2efc35a30e66e938de5aef42b520c20e7eda7bb5fb12c35e5d09a4c43f6"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0d47840dc05e0ba04fe2e26f15126de7c755496d5a8aae4a08bda4dd8d646c54"}, + {file = "regex-2023.10.3-cp311-cp311-win32.whl", hash = "sha256:9145f092b5d1977ec8c0ab46e7b3381b2fd069957b9862a43bd383e5c01d18c2"}, + {file = "regex-2023.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:b6104f9a46bd8743e4f738afef69b153c4b8b592d35ae46db07fc28ae3d5fb7c"}, + {file = "regex-2023.10.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bff507ae210371d4b1fe316d03433ac099f184d570a1a611e541923f78f05037"}, + {file = "regex-2023.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be5e22bbb67924dea15039c3282fa4cc6cdfbe0cbbd1c0515f9223186fc2ec5f"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a992f702c9be9c72fa46f01ca6e18d131906a7180950958f766c2aa294d4b41"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7434a61b158be563c1362d9071358f8ab91b8d928728cd2882af060481244c9e"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2169b2dcabf4e608416f7f9468737583ce5f0a6e8677c4efbf795ce81109d7c"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9e908ef5889cda4de038892b9accc36d33d72fb3e12c747e2799a0e806ec841"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12bd4bc2c632742c7ce20db48e0d99afdc05e03f0b4c1af90542e05b809a03d9"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bc72c231f5449d86d6c7d9cc7cd819b6eb30134bb770b8cfdc0765e48ef9c420"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bce8814b076f0ce5766dc87d5a056b0e9437b8e0cd351b9a6c4e1134a7dfbda9"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:ba7cd6dc4d585ea544c1412019921570ebd8a597fabf475acc4528210d7c4a6f"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b0c7d2f698e83f15228ba41c135501cfe7d5740181d5903e250e47f617eb4292"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5a8f91c64f390ecee09ff793319f30a0f32492e99f5dc1c72bc361f23ccd0a9a"}, + {file = "regex-2023.10.3-cp312-cp312-win32.whl", hash = "sha256:ad08a69728ff3c79866d729b095872afe1e0557251da4abb2c5faff15a91d19a"}, + {file = "regex-2023.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:39cdf8d141d6d44e8d5a12a8569d5a227f645c87df4f92179bd06e2e2705e76b"}, + {file = "regex-2023.10.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4a3ee019a9befe84fa3e917a2dd378807e423d013377a884c1970a3c2792d293"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76066d7ff61ba6bf3cb5efe2428fc82aac91802844c022d849a1f0f53820502d"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfe50b61bab1b1ec260fa7cd91106fa9fece57e6beba05630afe27c71259c59b"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fd88f373cb71e6b59b7fa597e47e518282455c2734fd4306a05ca219a1991b0"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3ab05a182c7937fb374f7e946f04fb23a0c0699c0450e9fb02ef567412d2fa3"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dac37cf08fcf2094159922edc7a2784cfcc5c70f8354469f79ed085f0328ebdf"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e54ddd0bb8fb626aa1f9ba7b36629564544954fff9669b15da3610c22b9a0991"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3367007ad1951fde612bf65b0dffc8fd681a4ab98ac86957d16491400d661302"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:16f8740eb6dbacc7113e3097b0a36065a02e37b47c936b551805d40340fb9971"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:f4f2ca6df64cbdd27f27b34f35adb640b5d2d77264228554e68deda54456eb11"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:39807cbcbe406efca2a233884e169d056c35aa7e9f343d4e78665246a332f597"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7eece6fbd3eae4a92d7c748ae825cbc1ee41a89bb1c3db05b5578ed3cfcfd7cb"}, + {file = "regex-2023.10.3-cp37-cp37m-win32.whl", hash = "sha256:ce615c92d90df8373d9e13acddd154152645c0dc060871abf6bd43809673d20a"}, + {file = "regex-2023.10.3-cp37-cp37m-win_amd64.whl", hash = "sha256:0f649fa32fe734c4abdfd4edbb8381c74abf5f34bc0b3271ce687b23729299ed"}, + {file = "regex-2023.10.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9b98b7681a9437262947f41c7fac567c7e1f6eddd94b0483596d320092004533"}, + {file = "regex-2023.10.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:91dc1d531f80c862441d7b66c4505cd6ea9d312f01fb2f4654f40c6fdf5cc37a"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82fcc1f1cc3ff1ab8a57ba619b149b907072e750815c5ba63e7aa2e1163384a4"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7979b834ec7a33aafae34a90aad9f914c41fd6eaa8474e66953f3f6f7cbd4368"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ef71561f82a89af6cfcbee47f0fabfdb6e63788a9258e913955d89fdd96902ab"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd829712de97753367153ed84f2de752b86cd1f7a88b55a3a775eb52eafe8a94"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00e871d83a45eee2f8688d7e6849609c2ca2a04a6d48fba3dff4deef35d14f07"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:706e7b739fdd17cb89e1fbf712d9dc21311fc2333f6d435eac2d4ee81985098c"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cc3f1c053b73f20c7ad88b0d1d23be7e7b3901229ce89f5000a8399746a6e039"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6f85739e80d13644b981a88f529d79c5bdf646b460ba190bffcaf6d57b2a9863"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:741ba2f511cc9626b7561a440f87d658aabb3d6b744a86a3c025f866b4d19e7f"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e77c90ab5997e85901da85131fd36acd0ed2221368199b65f0d11bca44549711"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:979c24cbefaf2420c4e377ecd1f165ea08cc3d1fbb44bdc51bccbbf7c66a2cb4"}, + {file = "regex-2023.10.3-cp38-cp38-win32.whl", hash = "sha256:58837f9d221744d4c92d2cf7201c6acd19623b50c643b56992cbd2b745485d3d"}, + {file = "regex-2023.10.3-cp38-cp38-win_amd64.whl", hash = "sha256:c55853684fe08d4897c37dfc5faeff70607a5f1806c8be148f1695be4a63414b"}, + {file = "regex-2023.10.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2c54e23836650bdf2c18222c87f6f840d4943944146ca479858404fedeb9f9af"}, + {file = "regex-2023.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:69c0771ca5653c7d4b65203cbfc5e66db9375f1078689459fe196fe08b7b4930"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ac965a998e1388e6ff2e9781f499ad1eaa41e962a40d11c7823c9952c77123e"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c0e8fae5b27caa34177bdfa5a960c46ff2f78ee2d45c6db15ae3f64ecadde14"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c56c3d47da04f921b73ff9415fbaa939f684d47293f071aa9cbb13c94afc17d"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ef1e014eed78ab650bef9a6a9cbe50b052c0aebe553fb2881e0453717573f52"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d29338556a59423d9ff7b6eb0cb89ead2b0875e08fe522f3e068b955c3e7b59b"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9c6d0ced3c06d0f183b73d3c5920727268d2201aa0fe6d55c60d68c792ff3588"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:994645a46c6a740ee8ce8df7911d4aee458d9b1bc5639bc968226763d07f00fa"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:66e2fe786ef28da2b28e222c89502b2af984858091675044d93cb50e6f46d7af"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:11175910f62b2b8c055f2b089e0fedd694fe2be3941b3e2633653bc51064c528"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:06e9abc0e4c9ab4779c74ad99c3fc10d3967d03114449acc2c2762ad4472b8ca"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fb02e4257376ae25c6dd95a5aec377f9b18c09be6ebdefa7ad209b9137b73d48"}, + {file = "regex-2023.10.3-cp39-cp39-win32.whl", hash = "sha256:3b2c3502603fab52d7619b882c25a6850b766ebd1b18de3df23b2f939360e1bd"}, + {file = "regex-2023.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:adbccd17dcaff65704c856bd29951c58a1bd4b2b0f8ad6b826dbd543fe740988"}, + {file = "regex-2023.10.3.tar.gz", hash = "sha256:3fef4f844d2290ee0ba57addcec17eec9e3df73f10a2748485dfd6a3a188cc0f"}, +] + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "rich" +version = "13.6.0" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "rich-13.6.0-py3-none-any.whl", hash = "sha256:2b38e2fe9ca72c9a00170a1a2d20c63c790d0e10ef1fe35eba76e1e7b1d7d245"}, + {file = "rich-13.6.0.tar.gz", hash = "sha256:5c14d22737e6d5084ef4771b62d5d4363165b403455a30a1c8ca39dc7b644bef"}, +] + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" +typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""} + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + +[[package]] +name = "ruamel-yaml" +version = "0.18.5" +description = "ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order" +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruamel.yaml-0.18.5-py3-none-any.whl", hash = "sha256:a013ac02f99a69cdd6277d9664689eb1acba07069f912823177c5eced21a6ada"}, + {file = "ruamel.yaml-0.18.5.tar.gz", hash = "sha256:61917e3a35a569c1133a8f772e1226961bf5a1198bea7e23f06a0841dea1ab0e"}, +] + +[package.dependencies] +"ruamel.yaml.clib" = {version = ">=0.2.7", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.13\""} + +[package.extras] +docs = ["mercurial (>5.7)", "ryd"] +jinja2 = ["ruamel.yaml.jinja2 (>=0.2)"] + +[[package]] +name = "ruamel-yaml-clib" +version = "0.2.8" +description = "C version of reader, parser and emitter for ruamel.yaml derived from libyaml" +optional = false +python-versions = ">=3.6" +files = [ + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b42169467c42b692c19cf539c38d4602069d8c1505e97b86387fcf7afb766e1d"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:07238db9cbdf8fc1e9de2489a4f68474e70dffcb32232db7c08fa61ca0c7c462"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fff3573c2db359f091e1589c3d7c5fc2f86f5bdb6f24252c2d8e539d4e45f412"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:aa2267c6a303eb483de8d02db2871afb5c5fc15618d894300b88958f729ad74f"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:840f0c7f194986a63d2c2465ca63af8ccbbc90ab1c6001b1978f05119b5e7334"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:024cfe1fc7c7f4e1aff4a81e718109e13409767e4f871443cbff3dba3578203d"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-win32.whl", hash = "sha256:c69212f63169ec1cfc9bb44723bf2917cbbd8f6191a00ef3410f5a7fe300722d"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-win_amd64.whl", hash = "sha256:cabddb8d8ead485e255fe80429f833172b4cadf99274db39abc080e068cbcc31"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:bef08cd86169d9eafb3ccb0a39edb11d8e25f3dae2b28f5c52fd997521133069"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:b16420e621d26fdfa949a8b4b47ade8810c56002f5389970db4ddda51dbff248"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:25c515e350e5b739842fc3228d662413ef28f295791af5e5110b543cf0b57d9b"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux_2_24_aarch64.whl", hash = "sha256:1707814f0d9791df063f8c19bb51b0d1278b8e9a2353abbb676c2f685dee6afe"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:46d378daaac94f454b3a0e3d8d78cafd78a026b1d71443f4966c696b48a6d899"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:09b055c05697b38ecacb7ac50bdab2240bfca1a0c4872b0fd309bb07dc9aa3a9"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-win32.whl", hash = "sha256:53a300ed9cea38cf5a2a9b069058137c2ca1ce658a874b79baceb8f892f915a7"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-win_amd64.whl", hash = "sha256:c2a72e9109ea74e511e29032f3b670835f8a59bbdc9ce692c5b4ed91ccf1eedb"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ebc06178e8821efc9692ea7544aa5644217358490145629914d8020042c24aa1"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:edaef1c1200c4b4cb914583150dcaa3bc30e592e907c01117c08b13a07255ec2"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d176b57452ab5b7028ac47e7b3cf644bcfdc8cacfecf7e71759f7f51a59e5c92"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux_2_24_aarch64.whl", hash = "sha256:1dc67314e7e1086c9fdf2680b7b6c2be1c0d8e3a8279f2e993ca2a7545fecf62"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3213ece08ea033eb159ac52ae052a4899b56ecc124bb80020d9bbceeb50258e9"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aab7fd643f71d7946f2ee58cc88c9b7bfc97debd71dcc93e03e2d174628e7e2d"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-win32.whl", hash = "sha256:5c365d91c88390c8d0a8545df0b5857172824b1c604e867161e6b3d59a827eaa"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-win_amd64.whl", hash = "sha256:1758ce7d8e1a29d23de54a16ae867abd370f01b5a69e1a3ba75223eaa3ca1a1b"}, + {file = "ruamel.yaml.clib-0.2.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a5aa27bad2bb83670b71683aae140a1f52b0857a2deff56ad3f6c13a017a26ed"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c58ecd827313af6864893e7af0a3bb85fd529f862b6adbefe14643947cfe2942"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-macosx_12_0_arm64.whl", hash = "sha256:f481f16baec5290e45aebdc2a5168ebc6d35189ae6fea7a58787613a25f6e875"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux_2_24_aarch64.whl", hash = "sha256:77159f5d5b5c14f7c34073862a6b7d34944075d9f93e681638f6d753606c6ce6"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7f67a1ee819dc4562d444bbafb135832b0b909f81cc90f7aa00260968c9ca1b3"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4ecbf9c3e19f9562c7fdd462e8d18dd902a47ca046a2e64dba80699f0b6c09b7"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:87ea5ff66d8064301a154b3933ae406b0863402a799b16e4a1d24d9fbbcbe0d3"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-win32.whl", hash = "sha256:75e1ed13e1f9de23c5607fe6bd1aeaae21e523b32d83bb33918245361e9cc51b"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-win_amd64.whl", hash = "sha256:3f215c5daf6a9d7bbed4a0a4f760f3113b10e82ff4c5c44bec20a68c8014f675"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1b617618914cb00bf5c34d4357c37aa15183fa229b24767259657746c9077615"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a6a9ffd280b71ad062eae53ac1659ad86a17f59a0fdc7699fd9be40525153337"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:305889baa4043a09e5b76f8e2a51d4ffba44259f6b4c72dec8ca56207d9c6fe1"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:700e4ebb569e59e16a976857c8798aee258dceac7c7d6b50cab63e080058df91"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:e2b4c44b60eadec492926a7270abb100ef9f72798e18743939bdbf037aab8c28"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e79e5db08739731b0ce4850bed599235d601701d5694c36570a99a0c5ca41a9d"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-win32.whl", hash = "sha256:955eae71ac26c1ab35924203fda6220f84dce57d6d7884f189743e2abe3a9fbe"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-win_amd64.whl", hash = "sha256:56f4252222c067b4ce51ae12cbac231bce32aee1d33fbfc9d17e5b8d6966c312"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:03d1162b6d1df1caa3a4bd27aa51ce17c9afc2046c31b0ad60a0a96ec22f8001"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba64af9fa9cebe325a62fa398760f5c7206b215201b0ec825005f1b18b9bccf"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:a1a45e0bb052edf6a1d3a93baef85319733a888363938e1fc9924cb00c8df24c"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:da09ad1c359a728e112d60116f626cc9f29730ff3e0e7db72b9a2dbc2e4beed5"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:184565012b60405d93838167f425713180b949e9d8dd0bbc7b49f074407c5a8b"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a75879bacf2c987c003368cf14bed0ffe99e8e85acfa6c0bfffc21a090f16880"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-win32.whl", hash = "sha256:84b554931e932c46f94ab306913ad7e11bba988104c5cff26d90d03f68258cd5"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-win_amd64.whl", hash = "sha256:25ac8c08322002b06fa1d49d1646181f0b2c72f5cbc15a85e80b4c30a544bb15"}, + {file = "ruamel.yaml.clib-0.2.8.tar.gz", hash = "sha256:beb2e0404003de9a4cab9753a8805a8fe9320ee6673136ed7f04255fe60bb512"}, +] + +[[package]] +name = "safetensors" +version = "0.4.0" +description = "" +optional = false +python-versions = ">=3.7" +files = [ + {file = "safetensors-0.4.0-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:2289ae6dbe6d027ecee016b28ced13a2e21a0b3a3a757a23033a2d1c0b1bad55"}, + {file = "safetensors-0.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bf6458959f310f551cbbeef2255527ade5f783f952738e73e4d0136198cc3bfe"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b6b60a58a8f7cc7aed3b5b73dce1f5259a53c83d9ba43a76a874e6ad868c1b4d"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:491b3477e4d0d4599bb75d79da4b75af2e6ed9b1f6ec2b715991f0bc927bf09a"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59d2e10b7e0cd18bb73ed7c17c624a5957b003b81345e18159591771c26ee428"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f667a4c12fb593f5f66ce966cb1b14a7148898b2b1a7f79e0761040ae1e3c51"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f9909512bcb6f712bdd04c296cdfb0d8ff73d258ffc5af884bb62ea02d221e0"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d33d29e846821f0e4f92614022949b09ccf063cb36fe2f9fe099cde1efbfbb87"}, + {file = "safetensors-0.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4d512525a8e05a045ce6698066ba0c5378c174a83e0b3720a8c7799dc1bb06f3"}, + {file = "safetensors-0.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0219cea445177f6ad1f9acd3a8d025440c8ff436d70a4a7c7ba9c36066aa9474"}, + {file = "safetensors-0.4.0-cp310-none-win32.whl", hash = "sha256:67ab171eeaad6972d3971c53d29d53353c67f6743284c6d637b59fa3e54c8a94"}, + {file = "safetensors-0.4.0-cp310-none-win_amd64.whl", hash = "sha256:7ffc736039f08a9ca1f09816a7481b8e4469c06e8f8a5ffa8cb67ddd79e6d77f"}, + {file = "safetensors-0.4.0-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:4fe9e3737b30de458225a23926219ca30b902ee779b6a3df96eaab2b6d625ec2"}, + {file = "safetensors-0.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e7916e814a90008de767b1c164a1d83803693c661ffe9af5a697b22e2752edb0"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cbc4a4da01143472323c145f3c289e5f6fabde0ac0a3414dabf912a21692fff4"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a54c21654a47669b38e359e8f852af754b786c9da884bb61ad5e9af12bd71ccb"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:25cd407955bad5340ba17f9f8ac789a0d751601a311e2f7b2733f9384478c95e"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:82e8fc4e3503cd738fd40718a430fe0e5ce6e7ff91a73d6ce628bbb89c41e8ce"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48b92059b1a4ad163024d4f526e0e73ebe2bb3ae70537e15e347820b4de5dc27"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5daa05058f7dce85b5f9f60c4eab483ed7859d63978f08a76e52e78859ff20ca"}, + {file = "safetensors-0.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a86565a5c112dd855909e20144947b4f53abb78c4de207f36ca71ee63ba5b90d"}, + {file = "safetensors-0.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38032078ed9fea52d06584e441bccc73fb475c4581600c6d6166de2fe2deb3d1"}, + {file = "safetensors-0.4.0-cp311-none-win32.whl", hash = "sha256:2f99d90c91b7c76b40a862acd9085bc77f7974a27dee7cfcebe46149af5a99a1"}, + {file = "safetensors-0.4.0-cp311-none-win_amd64.whl", hash = "sha256:74e2a448ffe19be188b457b130168190ee73b5a75e45ba96796320c1f5ae35d2"}, + {file = "safetensors-0.4.0-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:1e2f9c69b41d03b4826ffb96b29e07444bb6b34a78a7bafd0b88d59e8ec75b8a"}, + {file = "safetensors-0.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3910fb5bf747413b59f1a34e6d2a993b589fa7d919709518823c70efaaa350bd"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf8fdca709b2470a35a59b1e6dffea75cbe1214b22612b5dd4c93947697aea8b"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f27b8ef814c5fb43456caeb7f3cbb889b76115180aad1f42402839c14a47c5b"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7b2d6101eccc43c7be0cb052f13ceda64288b3d8b344b988ed08d7133cbce2f3"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fdc34027b545a69be3d4220c140b276129523e4e46db06ad1a0b60d6a4cf9214"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db7bb48ca9e90bb9526c71b388d38d8de160c0354f4c5126df23e8701a870dcb"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a78ffc0795d3595cd9e4d453502e35f764276c49e434b25556a15a337db4dafc"}, + {file = "safetensors-0.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:8e735b0f79090f6855b55e205e820b7b595502ffca0009a5c13eef3661ce465b"}, + {file = "safetensors-0.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f8d2416734e850d5392afffbcb2b8985ea29fb171f1cb197e2ae51b8e35d6438"}, + {file = "safetensors-0.4.0-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:e853e189ba7d47eaf561094586692ba2bbdd258c096f1755805cac098de0e6ab"}, + {file = "safetensors-0.4.0-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:4b2aa57b5a4d576f3d1dd6e56980026340f156f8a13c13016bfac4e25295b53f"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b6c1316ffde6cb4bf22c7445bc9fd224b4d1b9dd7320695f5611c89e802e4b6"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:003077ec85261d00061058fa12e3c1d2055366b02ce8f2938929359ffbaff2b8"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bd63d83a92f1437a8b0431779320376030ae43ace980bea5686d515de0784100"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2077801800b4b13301d8d6290c7fb5bd60737320001717153ebc4371776643b5"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7abe0e157a49a75aeeccfbc4f3dac38d8f98512d3cdb35c200f8e628dc5773cf"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3bfed574f6b1e7e7fe1f17213278875ef6c6e8b1582ab6eda93947db1178cae6"}, + {file = "safetensors-0.4.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:964ef166a286ce3b023d0d0bd0e21d440a1c8028981c8abdb136bc7872ba9b3d"}, + {file = "safetensors-0.4.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:44f84373e42183bd56a13a1f2d8acb1db7fedaeffbd83e79cec861477eee1af4"}, + {file = "safetensors-0.4.0-cp37-none-win32.whl", hash = "sha256:c68132727dd86fb641102e494d445f705efe402f4d5e24b278183a15499ab400"}, + {file = "safetensors-0.4.0-cp37-none-win_amd64.whl", hash = "sha256:1db87155454c168aef118d5657a403aee48a4cb08d8851a981157f07351ea317"}, + {file = "safetensors-0.4.0-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:9e583fa68e5a07cc859c4e13c1ebff12029904aa2e27185cf04a1f57fe9a81c4"}, + {file = "safetensors-0.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73e7696dcf3f72f99545eb1abe6106ad65ff1f62381d6ce4b34be3272552897a"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4936096a57c62e84e200f92620a536be067fc5effe46ecc7f230ebb496ecd579"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:87b328ee1591adac332543e1f5fc2c2d7f149b745ebb0d58d7850818ff9cee27"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b69554c143336256260eceff1d3c0969172a641b54d4668489a711b05f92a2c0"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ebf6bcece5d5d1bd6416472f94604d2c834ca752ac60ed42dba7157e595a990"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6686ce01b8602d55a7d9903c90d4a6e6f90aeb6ddced7cf4605892d0ba94bcb8"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9b8fd6cc2f3bda444a048b541c843c7b7fefc89c4120d7898ea7d5b026e93891"}, + {file = "safetensors-0.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8a6abfe67692f81b8bdb99c837f28351c17e624ebf136970c850ee989c720446"}, + {file = "safetensors-0.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:27a24ca8822c469ee452db4c13418ba983315a0d863c018a9af15f2305eac38c"}, + {file = "safetensors-0.4.0-cp38-none-win32.whl", hash = "sha256:c4a0a47c8640167792d8261ee21b26430bbc39130a7edaad7f4c0bc05669d00e"}, + {file = "safetensors-0.4.0-cp38-none-win_amd64.whl", hash = "sha256:a738970a367f39249e2abb900d9441a8a86d7ff50083e5eaa6e7760a9f216014"}, + {file = "safetensors-0.4.0-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:806379f37e1abd5d302288c4b2f4186dd7ea7143d4c7811f90a8077f0ae8967b"}, + {file = "safetensors-0.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2b9b94133ed2ae9dda0e95dcace7b7556eba023ffa4c4ae6df8f99377f571d6a"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b563a14c43614815a6b524d2e4edeaace50b717f7e7487bb227dd5b68350f5a"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00a9b157be660fb7ba88fa2eedd05ec93793a5b61e43e783e10cb0b995372802"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c8f194f45ab6aa767993c24f0aeb950af169dbc5d611b94c9021a1d13b8a1a34"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:469360b9451db10bfed3881378d5a71b347ecb1ab4f42367d77b8164a13af70b"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5f75fa97ccf32a3c7af476c6a0e851023197d3c078f6de3612008fff94735f9"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:acf0180283c2efae72f1d8c0a4a7974662091df01be3aa43b5237b1e52ed0a01"}, + {file = "safetensors-0.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cd02b495ba0814619f40bda46771bb06dbbf1d42524b66fa03b2a736c77e4515"}, + {file = "safetensors-0.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c42bdea183dbaa99e2f0e6120dc524df79cf4289a6f90f30a534444ef20f49fa"}, + {file = "safetensors-0.4.0-cp39-none-win32.whl", hash = "sha256:cef7bb5d9feae7146c3c3c7b3aef7d2c8b39ba7f5ff4252d368eb69462a47076"}, + {file = "safetensors-0.4.0-cp39-none-win_amd64.whl", hash = "sha256:79dd46fb1f19282fd12f544471efb97823ede927cedbf9cf35550d92b349fdd2"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:002301c1afa32909f83745b0c124d002e7ae07e15671f3b43cbebd0ffc5e6037"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:67762d36ae088c73d4a3c96bfc4ea8d31233554f35b6cace3a18533238d462ea"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f45230f20a206e5e4c7f7bbf9342178410c6f8b0af889843aa99045a76f7691"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f2ca939bbd8fb2f4dfa28e39a146dad03bc9325e9fc831b68f7b98f69a5a2f1"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:61a00f281391fae5ce91df70918bb61c12d2d514a493fd8056e12114be729911"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:435fd136a42492b280cb55126f9ce9535b35dd49df2c5d572a5945455a439448"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f0daa788273d683258fb1e4a5e16bef4486b2fca536451a2591bc0f4a6488895"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:0620ab0d41e390ccb1c4ea8f63dc00cb5f0b96a5cdd3cd0d64c21765720c074a"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc1fa8d067733cb67f22926689ee808f08afacf7700d2ffb44efae90a0693eb1"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dcaa40bc363edda145db75cd030f3b1822e5478d550c3500a42502ecef32c959"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b561fbc044db7beff2ece0ec219a291809d45a38d30c6b38e7cc46482582f4ba"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:79a983b09782dacf9a1adb19bb98f4a8f6c3144108939f572c047b5797e43cf5"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:10b65cd3ad79f5d0daf281523b4146bc271a34bb7430d4e03212e0de8622dab8"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:114decacc475a6a9e2f9102a00c171d113ddb5d35cb0bda0db2c0c82b2eaa9ce"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:72ddb741dd5fe42521db76a70e012f76995516a12e7e0ef26be03ea9be77802a"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c5556c2ec75f5a6134866eddd7341cb36062e6edaea343478a279591b63ddba"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed50f239b0ce7ae85b078395593b4a351ede7e6f73af25f4873e3392336f64c9"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:495dcaea8fbab70b927d2274e2547824462737acbf98ccd851a71124f779a5c6"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3f4d90c79a65ba2fe2ff0876f6140748f0a3ce6a21e27a35190f4f96321803f8"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:7a524382b5c55b5fbb168e0e9d3f502450c8cf3fb81b93e880018437c206a482"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:9849ea60c7e840bfdd6030ad454d4a6ba837b3398c902f15a30460dd6961c28c"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:6c42623ae7045615d9eaa6877b9df1db4e9cc71ecc14bcc721ea1e475dddd595"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80cb8342f00f3c41b3b93b1a599b84723280d3ac90829bc62262efc03ab28793"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8c4f5ed4ede384dea8c99bae76b0718a828dbf7b2c8ced1f44e3b9b1a124475"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40d7cf03493bfe75ef62e2c716314474b28d9ba5bf4909763e4b8dd14330c01a"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:232029f0a9fa6fa1f737324eda98a700409811186888536a2333cbbf64e41741"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:9ed55f4a20c78ff3e8477efb63c8303c2152cdfb3bfea4d025a80f54d38fd628"}, + {file = "safetensors-0.4.0.tar.gz", hash = "sha256:b985953c3cf11e942eac4317ef3db3da713e274109cf7cfb6076d877054f013e"}, +] + +[package.extras] +all = ["safetensors[jax]", "safetensors[numpy]", "safetensors[paddlepaddle]", "safetensors[pinned-tf]", "safetensors[quality]", "safetensors[testing]", "safetensors[torch]"] +dev = ["safetensors[all]"] +jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "safetensors[numpy]"] +numpy = ["numpy (>=1.21.6)"] +paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors[numpy]"] +pinned-tf = ["safetensors[numpy]", "tensorflow (==2.11.0)"] +quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"] +tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"] +testing = ["h5py (>=3.7.0)", "huggingface_hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools_rust (>=1.5.2)"] +torch = ["safetensors[numpy]", "torch (>=1.10)"] + +[[package]] +name = "scmrepo" +version = "1.4.1" +description = "SCM wrapper and fsspec filesystem for Git for use in DVC" +optional = false +python-versions = ">=3.8" +files = [ + {file = "scmrepo-1.4.1-py3-none-any.whl", hash = "sha256:025844fc27d2cc4b5056d3a89bcfdce361525ccf7a88bf52c05fba8a27372465"}, + {file = "scmrepo-1.4.1.tar.gz", hash = "sha256:a5b2c0fa35e529e036ce362edc7493f0d196af23412d85485ded7518ea7afb6b"}, +] + +[package.dependencies] +asyncssh = ">=2.13.1,<3" +dulwich = ">=0.21.6" +fsspec = ">=2021.7.0" +funcy = ">=1.14" +gitpython = ">3" +pathspec = ">=0.9.0" +pygit2 = ">=1.13.0" +pygtrie = ">=2.3.2" +shortuuid = ">=0.5.0" + +[package.extras] +dev = ["mock (==5.1.0)", "mypy (==0.971)", "paramiko (==3.3.1)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-asyncio (==0.18.3)", "pytest-cov (==3.0.0)", "pytest-docker (==0.12.0)", "pytest-mock (==3.8.2)", "pytest-sugar (==0.9.5)", "pytest-test-utils (==0.0.8)", "types-certifi (==2021.10.8.3)", "types-mock (==5.1.0.2)", "types-paramiko (==3.3.0.0)"] +tests = ["mock (==5.1.0)", "mypy (==0.971)", "paramiko (==3.3.1)", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-asyncio (==0.18.3)", "pytest-cov (==3.0.0)", "pytest-docker (==0.12.0)", "pytest-mock (==3.8.2)", "pytest-sugar (==0.9.5)", "pytest-test-utils (==0.0.8)", "types-certifi (==2021.10.8.3)", "types-mock (==5.1.0.2)", "types-paramiko (==3.3.0.0)"] + +[[package]] +name = "sentencepiece" +version = "0.1.99" +description = "SentencePiece python wrapper" +optional = false +python-versions = "*" +files = [ + {file = "sentencepiece-0.1.99-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0eb528e70571b7c02723e5804322469b82fe7ea418c96051d0286c0fa028db73"}, + {file = "sentencepiece-0.1.99-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:77d7fafb2c4e4659cbdf303929503f37a26eabc4ff31d3a79bf1c5a1b338caa7"}, + {file = "sentencepiece-0.1.99-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:be9cf5b9e404c245aeb3d3723c737ba7a8f5d4ba262ef233a431fa6c45f732a0"}, + {file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baed1a26464998f9710d20e52607c29ffd4293e7c71c6a1f83f51ad0911ec12c"}, + {file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9832f08bb372d4c8b567612f8eab9e36e268dff645f1c28f9f8e851be705f6d1"}, + {file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:019e7535108e309dae2b253a75834fc3128240aa87c00eb80732078cdc182588"}, + {file = "sentencepiece-0.1.99-cp310-cp310-win32.whl", hash = "sha256:fa16a830416bb823fa2a52cbdd474d1f7f3bba527fd2304fb4b140dad31bb9bc"}, + {file = "sentencepiece-0.1.99-cp310-cp310-win_amd64.whl", hash = "sha256:14b0eccb7b641d4591c3e12ae44cab537d68352e4d3b6424944f0c447d2348d5"}, + {file = "sentencepiece-0.1.99-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6d3c56f24183a1e8bd61043ff2c58dfecdc68a5dd8955dc13bab83afd5f76b81"}, + {file = "sentencepiece-0.1.99-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ed6ea1819fd612c989999e44a51bf556d0ef6abfb553080b9be3d347e18bcfb7"}, + {file = "sentencepiece-0.1.99-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2a0260cd1fb7bd8b4d4f39dc2444a8d5fd4e0a0c4d5c899810ef1abf99b2d45"}, + {file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a1abff4d1ff81c77cac3cc6fefa34fa4b8b371e5ee51cb7e8d1ebc996d05983"}, + {file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:004e6a621d4bc88978eecb6ea7959264239a17b70f2cbc348033d8195c9808ec"}, + {file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db361e03342c41680afae5807590bc88aa0e17cfd1a42696a160e4005fcda03b"}, + {file = "sentencepiece-0.1.99-cp311-cp311-win32.whl", hash = "sha256:2d95e19168875b70df62916eb55428a0cbcb834ac51d5a7e664eda74def9e1e0"}, + {file = "sentencepiece-0.1.99-cp311-cp311-win_amd64.whl", hash = "sha256:f90d73a6f81248a909f55d8e6ef56fec32d559e1e9af045f0b0322637cb8e5c7"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:62e24c81e74bd87a6e0d63c51beb6527e4c0add67e1a17bac18bcd2076afcfeb"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57efcc2d51caff20d9573567d9fd3f854d9efe613ed58a439c78c9f93101384a"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6a904c46197993bd1e95b93a6e373dca2f170379d64441041e2e628ad4afb16f"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d89adf59854741c0d465f0e1525b388c0d174f611cc04af54153c5c4f36088c4"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-win32.whl", hash = "sha256:47c378146928690d1bc106fdf0da768cebd03b65dd8405aa3dd88f9c81e35dba"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-win_amd64.whl", hash = "sha256:9ba142e7a90dd6d823c44f9870abdad45e6c63958eb60fe44cca6828d3b69da2"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b7b1a9ae4d7c6f1f867e63370cca25cc17b6f4886729595b885ee07a58d3cec3"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0f644c9d4d35c096a538507b2163e6191512460035bf51358794a78515b74f7"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c8843d23a0f686d85e569bd6dcd0dd0e0cbc03731e63497ca6d5bacd18df8b85"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33e6f690a1caebb4867a2e367afa1918ad35be257ecdb3455d2bbd787936f155"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-win32.whl", hash = "sha256:8a321866c2f85da7beac74a824b4ad6ddc2a4c9bccd9382529506d48f744a12c"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-win_amd64.whl", hash = "sha256:c42f753bcfb7661c122a15b20be7f684b61fc8592c89c870adf52382ea72262d"}, + {file = "sentencepiece-0.1.99-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:85b476406da69c70586f0bb682fcca4c9b40e5059814f2db92303ea4585c650c"}, + {file = "sentencepiece-0.1.99-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cfbcfe13c69d3f87b7fcd5da168df7290a6d006329be71f90ba4f56bc77f8561"}, + {file = "sentencepiece-0.1.99-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:445b0ec381af1cd4eef95243e7180c63d9c384443c16c4c47a28196bd1cda937"}, + {file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6890ea0f2b4703f62d0bf27932e35808b1f679bdb05c7eeb3812b935ba02001"}, + {file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb71af492b0eefbf9f2501bec97bcd043b6812ab000d119eaf4bd33f9e283d03"}, + {file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27b866b5bd3ddd54166bbcbf5c8d7dd2e0b397fac8537991c7f544220b1f67bc"}, + {file = "sentencepiece-0.1.99-cp38-cp38-win32.whl", hash = "sha256:b133e8a499eac49c581c3c76e9bdd08c338cc1939e441fee6f92c0ccb5f1f8be"}, + {file = "sentencepiece-0.1.99-cp38-cp38-win_amd64.whl", hash = "sha256:0eaf3591dd0690a87f44f4df129cf8d05d8a4029b5b6709b489b8e27f9a9bcff"}, + {file = "sentencepiece-0.1.99-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38efeda9bbfb55052d482a009c6a37e52f42ebffcea9d3a98a61de7aee356a28"}, + {file = "sentencepiece-0.1.99-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6c030b081dc1e1bcc9fadc314b19b740715d3d566ad73a482da20d7d46fd444c"}, + {file = "sentencepiece-0.1.99-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:84dbe53e02e4f8a2e45d2ac3e430d5c83182142658e25edd76539b7648928727"}, + {file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b0f55d0a0ee1719b4b04221fe0c9f0c3461dc3dabd77a035fa2f4788eb3ef9a"}, + {file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18e800f206cd235dc27dc749299e05853a4e4332e8d3dfd81bf13d0e5b9007d9"}, + {file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ae1c40cda8f9d5b0423cfa98542735c0235e7597d79caf318855cdf971b2280"}, + {file = "sentencepiece-0.1.99-cp39-cp39-win32.whl", hash = "sha256:c84ce33af12ca222d14a1cdd37bd76a69401e32bc68fe61c67ef6b59402f4ab8"}, + {file = "sentencepiece-0.1.99-cp39-cp39-win_amd64.whl", hash = "sha256:350e5c74d739973f1c9643edb80f7cc904dc948578bcb1d43c6f2b173e5d18dd"}, + {file = "sentencepiece-0.1.99.tar.gz", hash = "sha256:189c48f5cb2949288f97ccdb97f0473098d9c3dcf5a3d99d4eabe719ec27297f"}, +] + +[[package]] +name = "setuptools" +version = "68.0.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "setuptools-68.0.0-py3-none-any.whl", hash = "sha256:11e52c67415a381d10d6b462ced9cfb97066179f0e871399e006c4ab101fc85f"}, + {file = "setuptools-68.0.0.tar.gz", hash = "sha256:baf1fdb41c6da4cd2eae722e135500da913332ab3f2f5c7d33af9b492acb5235"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + +[[package]] +name = "shortuuid" +version = "1.0.11" +description = "A generator library for concise, unambiguous and URL-safe UUIDs." +optional = false +python-versions = ">=3.5" +files = [ + {file = "shortuuid-1.0.11-py3-none-any.whl", hash = "sha256:27ea8f28b1bd0bf8f15057a3ece57275d2059d2b0bb02854f02189962c13b6aa"}, + {file = "shortuuid-1.0.11.tar.gz", hash = "sha256:fc75f2615914815a8e4cb1501b3a513745cb66ef0fd5fc6fb9f8c3fa3481f789"}, +] + +[[package]] +name = "shtab" +version = "1.6.4" +description = "Automagic shell tab completion for Python CLI applications" +optional = false +python-versions = ">=3.7" +files = [ + {file = "shtab-1.6.4-py3-none-any.whl", hash = "sha256:4be38887a912091a1640e06f5ccbcbd24e176cf2fcb9ef0c2e011ee22d63834f"}, + {file = "shtab-1.6.4.tar.gz", hash = "sha256:aba9e049bed54ffdb650cb2e02657282d8c0148024b0f500277052df124d47de"}, +] + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "smart-open" +version = "6.4.0" +description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" +optional = false +python-versions = ">=3.6,<4.0" +files = [ + {file = "smart_open-6.4.0-py3-none-any.whl", hash = "sha256:8d3ef7e6997e8e42dd55c74166ed21e6ac70664caa32dd940b26d54a8f6b4142"}, + {file = "smart_open-6.4.0.tar.gz", hash = "sha256:be3c92c246fbe80ebce8fbacb180494a481a77fcdcb7c1aadb2ea5b9c2bee8b9"}, +] + +[package.extras] +all = ["azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "paramiko", "requests"] +azure = ["azure-common", "azure-core", "azure-storage-blob"] +gcs = ["google-cloud-storage (>=2.6.0)"] +http = ["requests"] +s3 = ["boto3"] +ssh = ["paramiko"] +test = ["azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "moto[server]", "paramiko", "pytest", "pytest-rerunfailures", "requests", "responses"] +webhdfs = ["requests"] + +[[package]] +name = "smmap" +version = "5.0.1" +description = "A pure Python implementation of a sliding window memory map manager" +optional = false +python-versions = ">=3.7" +files = [ + {file = "smmap-5.0.1-py3-none-any.whl", hash = "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da"}, + {file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"}, +] + +[[package]] +name = "soupsieve" +version = "2.4.1" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.7" +files = [ + {file = "soupsieve-2.4.1-py3-none-any.whl", hash = "sha256:1c1bfee6819544a3447586c889157365a27e10d88cde3ad3da0cf0ddf646feb8"}, + {file = "soupsieve-2.4.1.tar.gz", hash = "sha256:89d12b2d5dfcd2c9e8c22326da9d9aa9cb3dfab0a83a024f05704076ee8d35ea"}, +] + +[[package]] +name = "spacy" +version = "3.7.2" +description = "Industrial-strength Natural Language Processing (NLP) in Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "spacy-3.7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b4e285366d36c85f784d606a2d966912a18f4d24d47330c1c6acbdd9f19ee373"}, + {file = "spacy-3.7.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f132c05368781be5d3be3d706afce7e7a9a0c9edc0dbb7c616162c37bc386561"}, + {file = "spacy-3.7.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e3767b2cabbe337d62779ae4fdc4d57a39755c17dfc499de3ad2bae622caa43"}, + {file = "spacy-3.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a748ade269bdbea9baaa49ec00882404e7e921163cdc14f5612320d0a957dfd"}, + {file = "spacy-3.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:66467128e494bfa4dc9c3996e4cbb26bac4741bca4cdd8dd83a6e71182148945"}, + {file = "spacy-3.7.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5af30aea578e7414fb0eb4dbad0ff0fa0a7d8e833c3e733eceb2617534714c7d"}, + {file = "spacy-3.7.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7293de33b1e9ede151555070ad0fee3bac98aefcaac9e615eeeb4296846bd479"}, + {file = "spacy-3.7.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26940681cf20c8831c558e2c3d345ff20b5bc3c5e6d41c66172d0c5136042f0b"}, + {file = "spacy-3.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a334667625153f7aaf188c20af7e82c886e41a88483a056accba5a7d51095c6"}, + {file = "spacy-3.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:43e6147d3583b62a2d3af0cd913ac025068196d587345751e198391ff0b8c1e9"}, + {file = "spacy-3.7.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2558df8c11905a0f77a2a3639a12ef8a522d171bcd88eaec039bedf6c60d7e01"}, + {file = "spacy-3.7.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:df1b9c4bbadc89bad10dba226d52c113e231ea6ad35c8a916ab138b31f69fa24"}, + {file = "spacy-3.7.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbbe055d2170ac7505a9f580bbdcd2146d0701bdbd6cea2333e18b0db655b97a"}, + {file = "spacy-3.7.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d35129b16ae2ca4212bf22a5c88b67b1e019e434fc48b69d3b95f80bc9e14e42"}, + {file = "spacy-3.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:a7419682aba99624cc4df7df66764b6ec62ff415f32c3682c1af2a37bd11a913"}, + {file = "spacy-3.7.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b12ab9c4923ffd38da84baf09464982da44e8275d680fb3c5da2051d7dd7bd2d"}, + {file = "spacy-3.7.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09c5c9db529dc1caa908813c58ba1643e929d2c811768596a2b64e2e01a882b1"}, + {file = "spacy-3.7.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcaad95e3e7d0ea8f381f3e2d9e80b7f346ecb6566de9bd55361736fa563fc22"}, + {file = "spacy-3.7.2-cp37-cp37m-win_amd64.whl", hash = "sha256:5d9b12284871ca5daa7774604a964486957567a86f1af898da0260e94b815e0d"}, + {file = "spacy-3.7.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2bd89770f61d5980e788ef382297322cceb7dcc4b848d68cb1da8af7d80d6eb6"}, + {file = "spacy-3.7.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d42f9151a2f01b34227ed31c8db8b7c67889ebcc637eae390faec8093ea1fb12"}, + {file = "spacy-3.7.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3d25d2f22ba1d2dd46d103e4a54826582de2b853b6f95dfb97b005563b38838"}, + {file = "spacy-3.7.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:730f23340dd157817d2da6df21f69966791b0bdbd6ea108845a65f3e1c0e981c"}, + {file = "spacy-3.7.2-cp38-cp38-win_amd64.whl", hash = "sha256:9c2f3f04b4b894a6c42ee93cec2f2b158f246f344927e65d9d19b72c5a6493ea"}, + {file = "spacy-3.7.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b22e0e8dac76740d55556fa13ebb9e1c829779ea0b7ec7a9e04f32efc66f74b9"}, + {file = "spacy-3.7.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ad7f378350104ca1f9e81180485d8b094aad7acb9b4bce84f1387b905cf230a2"}, + {file = "spacy-3.7.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ccbffb7825c08c0586ef7384d0aa23196f9ac106b5c7b3c551907316930f94f"}, + {file = "spacy-3.7.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:111955d7f4786b952672e9c5cfd9f8b74d81e64b62d479f71efe9cfc2a027a1d"}, + {file = "spacy-3.7.2-cp39-cp39-win_amd64.whl", hash = "sha256:e8a7291e7e1cfcb6041b26f96d0a66b603725c1beff4e0391c3d9226fae16e04"}, + {file = "spacy-3.7.2.tar.gz", hash = "sha256:cedf4927bf0d3fec773a6ce48d5d2c91bdb02fed3c7d5ec07bdb873f1126f1a0"}, +] + +[package.dependencies] +catalogue = ">=2.0.6,<2.1.0" +cymem = ">=2.0.2,<2.1.0" +jinja2 = "*" +langcodes = ">=3.2.0,<4.0.0" +murmurhash = ">=0.28.0,<1.1.0" +numpy = [ + {version = ">=1.15.0", markers = "python_version < \"3.9\""}, + {version = ">=1.19.0", markers = "python_version >= \"3.9\""}, +] +packaging = ">=20.0" +preshed = ">=3.0.2,<3.1.0" +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<3.0.0" +requests = ">=2.13.0,<3.0.0" +setuptools = "*" +smart-open = ">=5.2.1,<7.0.0" +spacy-legacy = ">=3.0.11,<3.1.0" +spacy-loggers = ">=1.0.0,<2.0.0" +srsly = ">=2.4.3,<3.0.0" +thinc = ">=8.1.8,<8.3.0" +tqdm = ">=4.38.0,<5.0.0" +typer = ">=0.3.0,<0.10.0" +typing-extensions = {version = ">=3.7.4.1,<4.5.0", markers = "python_version < \"3.8\""} +wasabi = ">=0.9.1,<1.2.0" +weasel = ">=0.1.0,<0.4.0" + +[package.extras] +apple = ["thinc-apple-ops (>=0.1.0.dev0,<1.0.0)"] +cuda = ["cupy (>=5.0.0b4,<13.0.0)"] +cuda-autodetect = ["cupy-wheel (>=11.0.0,<13.0.0)"] +cuda100 = ["cupy-cuda100 (>=5.0.0b4,<13.0.0)"] +cuda101 = ["cupy-cuda101 (>=5.0.0b4,<13.0.0)"] +cuda102 = ["cupy-cuda102 (>=5.0.0b4,<13.0.0)"] +cuda110 = ["cupy-cuda110 (>=5.0.0b4,<13.0.0)"] +cuda111 = ["cupy-cuda111 (>=5.0.0b4,<13.0.0)"] +cuda112 = ["cupy-cuda112 (>=5.0.0b4,<13.0.0)"] +cuda113 = ["cupy-cuda113 (>=5.0.0b4,<13.0.0)"] +cuda114 = ["cupy-cuda114 (>=5.0.0b4,<13.0.0)"] +cuda115 = ["cupy-cuda115 (>=5.0.0b4,<13.0.0)"] +cuda116 = ["cupy-cuda116 (>=5.0.0b4,<13.0.0)"] +cuda117 = ["cupy-cuda117 (>=5.0.0b4,<13.0.0)"] +cuda11x = ["cupy-cuda11x (>=11.0.0,<13.0.0)"] +cuda12x = ["cupy-cuda12x (>=11.5.0,<13.0.0)"] +cuda80 = ["cupy-cuda80 (>=5.0.0b4,<13.0.0)"] +cuda90 = ["cupy-cuda90 (>=5.0.0b4,<13.0.0)"] +cuda91 = ["cupy-cuda91 (>=5.0.0b4,<13.0.0)"] +cuda92 = ["cupy-cuda92 (>=5.0.0b4,<13.0.0)"] +ja = ["sudachidict-core (>=20211220)", "sudachipy (>=0.5.2,!=0.6.1)"] +ko = ["natto-py (>=0.9.0)"] +lookups = ["spacy-lookups-data (>=1.0.3,<1.1.0)"] +th = ["pythainlp (>=2.0)"] +transformers = ["spacy-transformers (>=1.1.2,<1.4.0)"] + +[[package]] +name = "spacy-alignments" +version = "0.9.1" +description = "A spaCy package for the Rust tokenizations library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "spacy-alignments-0.9.1.tar.gz", hash = "sha256:7e020ec4797d6179060818d01cdb4e0013a52dba544b9bbfb5efcff8851926dc"}, + {file = "spacy_alignments-0.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f2d9b8da21d7924f4b5e6cfd89234b27f7939c4211c0fa866b3dde4110b96dd6"}, + {file = "spacy_alignments-0.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12402e2eea5c4b21b197c43c9bed2629ab1324ae46bd92f7b8e4630dec14ea3a"}, + {file = "spacy_alignments-0.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd0279610d5047205c8d10368a600fa6b9c6d995efdfb093708d54c9ad7efc1f"}, + {file = "spacy_alignments-0.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c152d78b25a88487145a6bb82aefc938e503c28c4249fd723390409deeb3f04"}, + {file = "spacy_alignments-0.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:61b42ba12222c1ea0e659ae5834e494f25492e7649425d0cef65aa8948818dd1"}, + {file = "spacy_alignments-0.9.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:285babdffd85840164446fbc40435c57510d4b90f12e893bbecb55c690b23c51"}, + {file = "spacy_alignments-0.9.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3eb9cc7efe494468e61038f91269d66ca9a4aa3395250f60eb942368c19a6e11"}, + {file = "spacy_alignments-0.9.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0dccd315b0d083dfae0c82f845e647ead16f04d2ec1c15c9fc05281d6ae00cf7"}, + {file = "spacy_alignments-0.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1fe1ad0bcc9f365746c4031d0523b52da79dd87f9c0e6e977c6c8fd4032a82b"}, + {file = "spacy_alignments-0.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:a58ce17fd919c3719529df17c34f82bbaec600130655294aa05effd2308baaeb"}, + {file = "spacy_alignments-0.9.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bf5a5d7b65f9c7dfbf9c9ac1d1a2ab3e1cdcfc93a1f52cef0d666c29b416fe7d"}, + {file = "spacy_alignments-0.9.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:20644e71b2d685fc31013ac8a806224a9de4a4dd2c03ded621a95a95efc6000d"}, + {file = "spacy_alignments-0.9.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36825157fbd7b96e6bfeb3a0076dd36d8d1f560624b824c2873d10a1a0d70fd2"}, + {file = "spacy_alignments-0.9.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35fa7444dd7117e45cfca51335a4eb737627c9a9dfd191c8291cf9f5fb0557ae"}, + {file = "spacy_alignments-0.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:adb04d06cf417f5df56a80f1a54f9eedaab3e4165b4fcb50bf7c3680eb549fc6"}, + {file = "spacy_alignments-0.9.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1264e21f7fbba166ed02c8b495e99f2d92e43335a476f4afa498c02e32566b4e"}, + {file = "spacy_alignments-0.9.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7fd8a59fe7d75a61d303e8a290cba53b82d85f3bfecaf267343ef47df5555e9d"}, + {file = "spacy_alignments-0.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4b97b879d614f1c37f330c0c0c2fcffacd6bf5322473169748aa76e4acbe484"}, + {file = "spacy_alignments-0.9.1-cp37-cp37m-win_amd64.whl", hash = "sha256:c70df885671f75ed33371984ac156e5002c1245f0c64eb5a0b2aef20805b835b"}, + {file = "spacy_alignments-0.9.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c4e68df531d177d5b07ee9396f22c085e54685a6c4ab349f0ce5c8f55b54dde0"}, + {file = "spacy_alignments-0.9.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:365c44a5f76d789af82d174235333f31cf0e151c28d56b886a1223a961b47ba4"}, + {file = "spacy_alignments-0.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c913af4e0e3da4acbd9265697fb86a2c8370b2e70d984ef8f7238efa2922ec9"}, + {file = "spacy_alignments-0.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4582d242808c4c5c44380e3543e6a53225bf6db2ae9b4d9d58e2a671442e1b60"}, + {file = "spacy_alignments-0.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:69d8081654b310390aa037c6caee70fdf6825c4474f84dbe42d58cc44874c9f5"}, + {file = "spacy_alignments-0.9.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:992e2768b6f2432922b616ca893fe7a66d3e865cf457352dc250bc16ab016633"}, + {file = "spacy_alignments-0.9.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:10ecfb8e42adf0d39fec87bed9f344e0f85be893d2258d0b7d81134d5b110525"}, + {file = "spacy_alignments-0.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80f36d49431d6d6067c57caaabe1aca501bbe8df39c9ffa92daf386bdc239074"}, + {file = "spacy_alignments-0.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62c1d70bfb6fc12ce2a7a92f1c1725abaa87a0e06bc2c4bf2b3b5b43f5a3f59"}, + {file = "spacy_alignments-0.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:0b3cd95356f27fa4dc41448e131b6b44eb065d11e4c4c4fbcbfc0ef20ad4e513"}, +] + +[[package]] +name = "spacy-legacy" +version = "3.0.12" +description = "Legacy registered functions for spaCy backwards compatibility" +optional = false +python-versions = ">=3.6" +files = [ + {file = "spacy-legacy-3.0.12.tar.gz", hash = "sha256:b37d6e0c9b6e1d7ca1cf5bc7152ab64a4c4671f59c85adaf7a3fcb870357a774"}, + {file = "spacy_legacy-3.0.12-py2.py3-none-any.whl", hash = "sha256:476e3bd0d05f8c339ed60f40986c07387c0a71479245d6d0f4298dbd52cda55f"}, +] + +[[package]] +name = "spacy-loggers" +version = "1.0.5" +description = "Logging utilities for SpaCy" +optional = false +python-versions = ">=3.6" +files = [ + {file = "spacy-loggers-1.0.5.tar.gz", hash = "sha256:d60b0bdbf915a60e516cc2e653baeff946f0cfc461b452d11a4d5458c6fe5f24"}, + {file = "spacy_loggers-1.0.5-py3-none-any.whl", hash = "sha256:196284c9c446cc0cdb944005384270d775fdeaf4f494d8e269466cfa497ef645"}, +] + +[[package]] +name = "spacy-transformers" +version = "1.3.3" +description = "spaCy pipelines for pre-trained BERT and other transformers" +optional = false +python-versions = ">=3.7" +files = [ + {file = "spacy-transformers-1.3.3.tar.gz", hash = "sha256:43ca15d26c3c3ce16c70571db1865f4e46af355432a9674b01d6f89435f86a13"}, + {file = "spacy_transformers-1.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d36d58963f1ac8366052f18e881ee7d7705acd464b6b5a73d3756a599a572c8"}, + {file = "spacy_transformers-1.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ae91eadf268da4ee2c01f203524abd83f837e56f0f8f3ec3f26ffe1200fc0794"}, + {file = "spacy_transformers-1.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:2fc524dd4706d196440572d715f884d0ad20a7e579cbc131a994981c871c1652"}, + {file = "spacy_transformers-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:20dfdea11df788ca8c2a2869ffc5af14bd58c020965e9c95fc6fdf56e934b3cd"}, + {file = "spacy_transformers-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9ba4d29ff7c8a2d1000487c966f4c6d6782616d51cad06ebb770278edcb8a541"}, + {file = "spacy_transformers-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:06903d2c50b559da2e460c513a9b0d13dbf5a036aede14dd26d18f7271bbd1a1"}, + {file = "spacy_transformers-1.3.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e3c38cdb2c22724337fa383cc44632950da9e851366172e2d8218fa3e8895370"}, + {file = "spacy_transformers-1.3.3-cp37-cp37m-win_amd64.whl", hash = "sha256:026467613cefb6f9fb23c19d8b9acba608065014fa82515f2cefe4b5976f38cf"}, + {file = "spacy_transformers-1.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:22b058fba486db5df0d9aee0cbd9e142fb716e281b94ba45502ff9e4ee1cefac"}, + {file = "spacy_transformers-1.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:33f338a64e7f77abf4dc763ebe2baf138a77a02414f7b424c61903f920bf141d"}, + {file = "spacy_transformers-1.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:4d348ab0ce1983b75f35476117b0bc530feac377739c985b9c95c36a2ac3feb1"}, + {file = "spacy_transformers-1.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0d56ffd59c03e75fab2c555982f8395539b913848c20f82f173a1ce69674bd12"}, + {file = "spacy_transformers-1.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e28b011d847f5ce7ebf9773b76d96024d9f08c3c69cd034ce2ba067f23fb82b3"}, + {file = "spacy_transformers-1.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:891d323dc7e343b543279165c985167b6213cd95d5d6f37084690bc9b8dbc341"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.15.0", markers = "python_version < \"3.9\""}, + {version = ">=1.19.0", markers = "python_version >= \"3.9\""}, +] +spacy = ">=3.5.0,<4.0.0" +spacy-alignments = ">=0.7.2,<1.0.0" +srsly = ">=2.4.0,<3.0.0" +torch = ">=1.8.0" +transformers = ">=3.4.0,<4.36.0" + +[package.extras] +cuda = ["cupy (>=5.0.0b4)"] +cuda100 = ["cupy-cuda100 (>=5.0.0b4)"] +cuda101 = ["cupy-cuda101 (>=5.0.0b4)"] +cuda102 = ["cupy-cuda102 (>=5.0.0b4)"] +cuda110 = ["cupy-cuda110 (>=5.0.0b4)"] +cuda111 = ["cupy-cuda111 (>=5.0.0b4)"] +cuda112 = ["cupy-cuda112 (>=5.0.0b4)"] +cuda80 = ["cupy-cuda80 (>=5.0.0b4)"] +cuda90 = ["cupy-cuda90 (>=5.0.0b4)"] +cuda91 = ["cupy-cuda91 (>=5.0.0b4)"] +cuda92 = ["cupy-cuda92 (>=5.0.0b4)"] + +[[package]] +name = "sqltrie" +version = "0.8.0" +description = "SQL-based prefix tree inspired by pygtrie and python-diskcache" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sqltrie-0.8.0-py3-none-any.whl", hash = "sha256:80a708960fd9468b645f527b39ea6beae30e57d5d5dd284f54a49ac267d240eb"}, + {file = "sqltrie-0.8.0.tar.gz", hash = "sha256:a773e41f00ae9215a79d3e0537526eaf5e37100037a2ef042d09edcc209abc9e"}, +] + +[package.dependencies] +attrs = "*" +orjson = {version = "*", markers = "implementation_name == \"cpython\""} +pygtrie = "*" + +[package.extras] +dev = ["mypy (==0.971)", "pyinstaller", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-benchmark", "pytest-cov (==3.0.0)", "pytest-mock (==3.8.2)", "pytest-sugar (==0.9.5)"] +tests = ["mypy (==0.971)", "pyinstaller", "pylint (==2.15.0)", "pytest (==7.2.0)", "pytest-benchmark", "pytest-cov (==3.0.0)", "pytest-mock (==3.8.2)", "pytest-sugar (==0.9.5)"] + +[[package]] +name = "srsly" +version = "2.4.8" +description = "Modern high-performance serialization utilities for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "srsly-2.4.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:17f3bcb418bb4cf443ed3d4dcb210e491bd9c1b7b0185e6ab10b6af3271e63b2"}, + {file = "srsly-2.4.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0b070a58e21ab0e878fd949f932385abb4c53dd0acb6d3a7ee75d95d447bc609"}, + {file = "srsly-2.4.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98286d20014ed2067ad02b0be1e17c7e522255b188346e79ff266af51a54eb33"}, + {file = "srsly-2.4.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18685084e2e0cc47c25158cbbf3e44690e494ef77d6418c2aae0598c893f35b0"}, + {file = "srsly-2.4.8-cp310-cp310-win_amd64.whl", hash = "sha256:980a179cbf4eb5bc56f7507e53f76720d031bcf0cef52cd53c815720eb2fc30c"}, + {file = "srsly-2.4.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5472ed9f581e10c32e79424c996cf54c46c42237759f4224806a0cd4bb770993"}, + {file = "srsly-2.4.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:50f10afe9230072c5aad9f6636115ea99b32c102f4c61e8236d8642c73ec7a13"}, + {file = "srsly-2.4.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c994a89ba247a4d4f63ef9fdefb93aa3e1f98740e4800d5351ebd56992ac75e3"}, + {file = "srsly-2.4.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ace7ed4a0c20fa54d90032be32f9c656b6d75445168da78d14fe9080a0c208ad"}, + {file = "srsly-2.4.8-cp311-cp311-win_amd64.whl", hash = "sha256:7a919236a090fb93081fbd1cec030f675910f3863825b34a9afbcae71f643127"}, + {file = "srsly-2.4.8-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7583c03d114b4478b7a357a1915305163e9eac2dfe080da900555c975cca2a11"}, + {file = "srsly-2.4.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:94ccdd2f6db824c31266aaf93e0f31c1c43b8bc531cd2b3a1d924e3c26a4f294"}, + {file = "srsly-2.4.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db72d2974f91aee652d606c7def98744ca6b899bd7dd3009fd75ebe0b5a51034"}, + {file = "srsly-2.4.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a60c905fd2c15e848ce1fc315fd34d8a9cc72c1dee022a0d8f4c62991131307"}, + {file = "srsly-2.4.8-cp312-cp312-win_amd64.whl", hash = "sha256:e0b8d5722057000694edf105b8f492e7eb2f3aa6247a5f0c9170d1e0d074151c"}, + {file = "srsly-2.4.8-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:196b4261f9d6372d1d3d16d1216b90c7e370b4141471322777b7b3c39afd1210"}, + {file = "srsly-2.4.8-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4750017e6d78590b02b12653e97edd25aefa4734281386cc27501d59b7481e4e"}, + {file = "srsly-2.4.8-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa034cd582ba9e4a120c8f19efa263fcad0f10fc481e73fb8c0d603085f941c4"}, + {file = "srsly-2.4.8-cp36-cp36m-win_amd64.whl", hash = "sha256:5a78ab9e9d177ee8731e950feb48c57380036d462b49e3fb61a67ce529ff5f60"}, + {file = "srsly-2.4.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:087e36439af517e259843df93eb34bb9e2d2881c34fa0f541589bcfbc757be97"}, + {file = "srsly-2.4.8-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad141d8a130cb085a0ed3a6638b643e2b591cb98a4591996780597a632acfe20"}, + {file = "srsly-2.4.8-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24d05367b2571c0d08d00459636b951e3ca2a1e9216318c157331f09c33489d3"}, + {file = "srsly-2.4.8-cp37-cp37m-win_amd64.whl", hash = "sha256:3fd661a1c4848deea2849b78f432a70c75d10968e902ca83c07c89c9b7050ab8"}, + {file = "srsly-2.4.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ec37233fe39af97b00bf20dc2ceda04d39b9ea19ce0ee605e16ece9785e11f65"}, + {file = "srsly-2.4.8-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d2fd4bc081f1d6a6063396b6d97b00d98e86d9d3a3ac2949dba574a84e148080"}, + {file = "srsly-2.4.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7347cff1eb4ef3fc335d9d4acc89588051b2df43799e5d944696ef43da79c873"}, + {file = "srsly-2.4.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a9dc1da5cc94d77056b91ba38365c72ae08556b6345bef06257c7e9eccabafe"}, + {file = "srsly-2.4.8-cp38-cp38-win_amd64.whl", hash = "sha256:dc0bf7b6f23c9ecb49ec0924dc645620276b41e160e9b283ed44ca004c060d79"}, + {file = "srsly-2.4.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ff8df21d00d73c371bead542cefef365ee87ca3a5660de292444021ff84e3b8c"}, + {file = "srsly-2.4.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0ac3e340e65a9fe265105705586aa56054dc3902789fcb9a8f860a218d6c0a00"}, + {file = "srsly-2.4.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06d1733f4275eff4448e96521cc7dcd8fdabd68ba9b54ca012dcfa2690db2644"}, + {file = "srsly-2.4.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be5b751ad88fdb58fb73871d456248c88204f213aaa3c9aab49b6a1802b3fa8d"}, + {file = "srsly-2.4.8-cp39-cp39-win_amd64.whl", hash = "sha256:822a38b8cf112348f3accbc73274a94b7bf82515cb14a85ba586d126a5a72851"}, + {file = "srsly-2.4.8.tar.gz", hash = "sha256:b24d95a65009c2447e0b49cda043ac53fecf4f09e358d87a57446458f91b8a91"}, +] + +[package.dependencies] +catalogue = ">=2.0.3,<2.1.0" + +[[package]] +name = "tabulate" +version = "0.9.0" +description = "Pretty-print tabular data" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, + {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, +] + +[package.extras] +widechars = ["wcwidth"] + +[[package]] +name = "thinc" +version = "8.2.1" +description = "A refreshing functional take on deep learning, compatible with your favorite libraries" +optional = false +python-versions = ">=3.6" +files = [ + {file = "thinc-8.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:67948bbcf86c3ace8838ca4cdb72977b051d8ee024eeb631d94467be18b15271"}, + {file = "thinc-8.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e1a558b323f15f60bd79ba3cb95f78945e76748684db00052587270217b96a5"}, + {file = "thinc-8.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca97679f14f3cd73be76375d6792ac2685c7eca50260cef1810415a2c75ac6c5"}, + {file = "thinc-8.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:228dabcb8667ff19b2576718e4201b203c3f78dfbed4fa79caab8eef6d5fed48"}, + {file = "thinc-8.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:b02dadc3e41dd5cfd515f0c60aa3e5c472e02c12613a1bb9d837ce5f49cf9d34"}, + {file = "thinc-8.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0afbcd243d27c076b8c47aded8e5e0aff2ff683af6b95a39839fe3aea862cfd9"}, + {file = "thinc-8.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4704354879abb052fbd2c658cd6df20d7bba40790ded0e81e994c879849b62f4"}, + {file = "thinc-8.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d6257369950002abe09d64b4f161d10d73af5df3764aea89f70cae018cca14b"}, + {file = "thinc-8.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a2ce2f93a06f8e56796fd2b9d237b6f6ef36ccd9dec66cb38d0092a3947c875"}, + {file = "thinc-8.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:5bbefd9939302ebed6d48f57b959be899b23a0c85f1afaf50c82e7b493e5de04"}, + {file = "thinc-8.2.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:70fabf9e3d7f4da9804be9d29800dab7506cac12598735edb05ed1cec7b2ee50"}, + {file = "thinc-8.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0fe6f36faa5a0a69d267d7196d821a9730b3bf1817941db2a83780a199599cd5"}, + {file = "thinc-8.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8a1bc995cace52503c906b87ff0cf428b94435b8b70539c6e6ad29b526925c5"}, + {file = "thinc-8.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be1f169f01451010822cde5052db3fee25a0793abebe8fbd48d02955a33d0692"}, + {file = "thinc-8.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:9cf766fac7e845e96e509ac9545ea1a60034a069aee3d75068b6e46da084c206"}, + {file = "thinc-8.2.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:0ad99b6d1f7c149137497c6ae9345304fd7465c0c290c00cedd504ff5ae5485d"}, + {file = "thinc-8.2.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:beda7380017df1fbdf8de1733851464886283786c3c9149e2ac7cef612eff6ed"}, + {file = "thinc-8.2.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95e6ae6309f110440bcbd6a03b5b4b940d7c607afd2027a6b638336cc42a2171"}, + {file = "thinc-8.2.1-cp36-cp36m-win_amd64.whl", hash = "sha256:aaad5532c3abd2fe69500426a102a3b53725a78eba5ba6867bed9e6b8de0bcba"}, + {file = "thinc-8.2.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3c32c1e1e60b5e676f1f618915fbb20547b573998693704d0b4987d972e35a62"}, + {file = "thinc-8.2.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6eae5a3415ff9be0fa21671a58166e82fe6c9ee832252779fd92c31c03692fb7"}, + {file = "thinc-8.2.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79e66eed14c2e7b333d69b376f8a091efad366e172b11e39c04814b54969b399"}, + {file = "thinc-8.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:8a1a2ef7061e23507f8172adb7978f7b7bc0bd4ccb266149de7065ee5331e1ea"}, + {file = "thinc-8.2.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d0216e17be5ddcc1014af55d2e02388698fb64dbc9f32a4782df0a3860615057"}, + {file = "thinc-8.2.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:16e7c0988df852cbae40ac03f45e11e3c39300b05dff87267c6fc13108723985"}, + {file = "thinc-8.2.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:637fafb7d3b51f2aa611371761578fe9999d2675f4fc87eb09e736648d12be30"}, + {file = "thinc-8.2.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c27bab1026284fba355eda7d83ebc0612ace437fb50ddc9d390e71d732b67e20"}, + {file = "thinc-8.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:88dab842c68c8e9f0b75a7b4352b53eaa385db2a1de91e276219bfcfda27e47b"}, + {file = "thinc-8.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5978a97b35a36adb133a83b9fc6cbb9f0c364f8db8525fa0ef5c4fc03f25b889"}, + {file = "thinc-8.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e8181d86b1c8de8dae154ad02399a8d59beb62881c172926594a5f3d7dc0e625"}, + {file = "thinc-8.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab83ade836933e34a82c61ff9fe0cb3ea9103165935ce9ea12102aff270dad9"}, + {file = "thinc-8.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19387a23ef2ce2714572040c15f0896b6e0d3751e37ccc1d927c0447f8eac7a1"}, + {file = "thinc-8.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:229efc84666901730e5575d5ec3c852d02009478411b24c0640f45b42e87a21c"}, + {file = "thinc-8.2.1.tar.gz", hash = "sha256:cd7fdb3d883a15e6906254e7fb0162f69878e9ccdd1f8519db6ffbfe46bf6f49"}, +] + +[package.dependencies] +blis = ">=0.7.8,<0.8.0" +catalogue = ">=2.0.4,<2.1.0" +confection = ">=0.0.1,<1.0.0" +cymem = ">=2.0.2,<2.1.0" +murmurhash = ">=1.0.2,<1.1.0" +numpy = [ + {version = ">=1.15.0", markers = "python_version < \"3.9\""}, + {version = ">=1.19.0", markers = "python_version >= \"3.9\""}, +] +packaging = ">=20.0" +preshed = ">=3.0.2,<3.1.0" +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<3.0.0" +setuptools = "*" +srsly = ">=2.4.0,<3.0.0" +typing-extensions = {version = ">=3.7.4.1,<4.5.0", markers = "python_version < \"3.8\""} +wasabi = ">=0.8.1,<1.2.0" + +[package.extras] +cuda = ["cupy (>=5.0.0b4)"] +cuda-autodetect = ["cupy-wheel (>=11.0.0)"] +cuda100 = ["cupy-cuda100 (>=5.0.0b4)"] +cuda101 = ["cupy-cuda101 (>=5.0.0b4)"] +cuda102 = ["cupy-cuda102 (>=5.0.0b4)"] +cuda110 = ["cupy-cuda110 (>=5.0.0b4)"] +cuda111 = ["cupy-cuda111 (>=5.0.0b4)"] +cuda112 = ["cupy-cuda112 (>=5.0.0b4)"] +cuda113 = ["cupy-cuda113 (>=5.0.0b4)"] +cuda114 = ["cupy-cuda114 (>=5.0.0b4)"] +cuda115 = ["cupy-cuda115 (>=5.0.0b4)"] +cuda116 = ["cupy-cuda116 (>=5.0.0b4)"] +cuda117 = ["cupy-cuda117 (>=5.0.0b4)"] +cuda11x = ["cupy-cuda11x (>=11.0.0)"] +cuda80 = ["cupy-cuda80 (>=5.0.0b4)"] +cuda90 = ["cupy-cuda90 (>=5.0.0b4)"] +cuda91 = ["cupy-cuda91 (>=5.0.0b4)"] +cuda92 = ["cupy-cuda92 (>=5.0.0b4)"] +datasets = ["ml-datasets (>=0.2.0,<0.3.0)"] +mxnet = ["mxnet (>=1.5.1,<1.6.0)"] +tensorflow = ["tensorflow (>=2.0.0,<2.6.0)"] +torch = ["torch (>=1.6.0)"] + +[[package]] +name = "thinc-apple-ops" +version = "0.1.4" +description = "Improve Thinc's performance on Apple devices with native libraries" +optional = true +python-versions = ">=3.7" +files = [ + {file = "thinc_apple_ops-0.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ad03f4247f8243164d4faba79aa157899d2a3ab22e2964f01be93a9a889895ee"}, + {file = "thinc_apple_ops-0.1.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ec3bd84e0c5ef5d9ee1a10de07c5bfd477f936029ebc698b3b418f82963dbd0b"}, + {file = "thinc_apple_ops-0.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6935883f055a040a41d6af8f8597044de570c8229361886727d9b6b85bb18788"}, + {file = "thinc_apple_ops-0.1.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dd098f13733e11f74444e6bd882068c35d7e9d15f40535526e420eb7a2ef56c9"}, + {file = "thinc_apple_ops-0.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:16cf0bd28a640177c62b1652915e225aa04458682bf698d9a51351654d206907"}, + {file = "thinc_apple_ops-0.1.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:30ebe3deecf0fb156b74ba4f600d52ec3fa96fb6efb8a00ca14d01e712a85cc4"}, + {file = "thinc_apple_ops-0.1.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fa9c53717b00f49de091046100e69c03556e209902c07eb50d0d4bbd26ba0f5d"}, + {file = "thinc_apple_ops-0.1.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9d7864bbee3e13a6c91ec59634810378d2dd11b808380ee40c11b3eb11aae1c6"}, + {file = "thinc_apple_ops-0.1.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f203451c5c0dead351cd7fdc495afab6f7b6f40e27651e9f1d5f8839be127210"}, + {file = "thinc_apple_ops-0.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:31533f28e1af497edb246afa88cd7287ff49a365a1a21dfab2c6d2b19aea3990"}, + {file = "thinc_apple_ops-0.1.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:60cd002ceb2406991dda5adedc79e4494e19ba45a72d01c53124ecc86a00861f"}, + {file = "thinc_apple_ops-0.1.4.tar.gz", hash = "sha256:992dfc4805ab964131c9d9c71bcc4330becabe41916665ec424c035311347d05"}, +] + +[package.dependencies] +numpy = ">=1.21.0" +thinc = ">=8.1.0,<9.1.0" + +[[package]] +name = "tokenizers" +version = "0.13.3" +description = "Fast and Customizable Tokenizers" +optional = false +python-versions = "*" +files = [ + {file = "tokenizers-0.13.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:f3835c5be51de8c0a092058a4d4380cb9244fb34681fd0a295fbf0a52a5fdf33"}, + {file = "tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4ef4c3e821730f2692489e926b184321e887f34fb8a6b80b8096b966ba663d07"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5fd1a6a25353e9aa762e2aae5a1e63883cad9f4e997c447ec39d071020459bc"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee0b1b311d65beab83d7a41c56a1e46ab732a9eed4460648e8eb0bd69fc2d059"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ef4215284df1277dadbcc5e17d4882bda19f770d02348e73523f7e7d8b8d396"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4d53976079cff8a033f778fb9adca2d9d69d009c02fa2d71a878b5f3963ed30"}, + {file = "tokenizers-0.13.3-cp310-cp310-win32.whl", hash = "sha256:1f0e3b4c2ea2cd13238ce43548959c118069db7579e5d40ec270ad77da5833ce"}, + {file = "tokenizers-0.13.3-cp310-cp310-win_amd64.whl", hash = "sha256:89649c00d0d7211e8186f7a75dfa1db6996f65edce4b84821817eadcc2d3c79e"}, + {file = "tokenizers-0.13.3-cp311-cp311-macosx_10_11_universal2.whl", hash = "sha256:56b726e0d2bbc9243872b0144515ba684af5b8d8cd112fb83ee1365e26ec74c8"}, + {file = "tokenizers-0.13.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc5c022ce692e1f499d745af293ab9ee6f5d92538ed2faf73f9708c89ee59ce6"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f55c981ac44ba87c93e847c333e58c12abcbb377a0c2f2ef96e1a266e4184ff2"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f247eae99800ef821a91f47c5280e9e9afaeed9980fc444208d5aa6ba69ff148"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b3e3215d048e94f40f1c95802e45dcc37c5b05eb46280fc2ccc8cd351bff839"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ba2b0bf01777c9b9bc94b53764d6684554ce98551fec496f71bc5be3a03e98b"}, + {file = "tokenizers-0.13.3-cp311-cp311-win32.whl", hash = "sha256:cc78d77f597d1c458bf0ea7c2a64b6aa06941c7a99cb135b5969b0278824d808"}, + {file = "tokenizers-0.13.3-cp311-cp311-win_amd64.whl", hash = "sha256:ecf182bf59bd541a8876deccf0360f5ae60496fd50b58510048020751cf1724c"}, + {file = "tokenizers-0.13.3-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:0527dc5436a1f6bf2c0327da3145687d3bcfbeab91fed8458920093de3901b44"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07cbb2c307627dc99b44b22ef05ff4473aa7c7cc1fec8f0a8b37d8a64b1a16d2"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4560dbdeaae5b7ee0d4e493027e3de6d53c991b5002d7ff95083c99e11dd5ac0"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64064bd0322405c9374305ab9b4c07152a1474370327499911937fd4a76d004b"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8c6e2ab0f2e3d939ca66aa1d596602105fe33b505cd2854a4c1717f704c51de"}, + {file = "tokenizers-0.13.3-cp37-cp37m-win32.whl", hash = "sha256:6cc29d410768f960db8677221e497226e545eaaea01aa3613fa0fdf2cc96cff4"}, + {file = "tokenizers-0.13.3-cp37-cp37m-win_amd64.whl", hash = "sha256:fc2a7fdf864554a0dacf09d32e17c0caa9afe72baf9dd7ddedc61973bae352d8"}, + {file = "tokenizers-0.13.3-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:8791dedba834c1fc55e5f1521be325ea3dafb381964be20684b92fdac95d79b7"}, + {file = "tokenizers-0.13.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:d607a6a13718aeb20507bdf2b96162ead5145bbbfa26788d6b833f98b31b26e1"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3791338f809cd1bf8e4fee6b540b36822434d0c6c6bc47162448deee3f77d425"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2f35f30e39e6aab8716f07790f646bdc6e4a853816cc49a95ef2a9016bf9ce6"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:310204dfed5aa797128b65d63538a9837cbdd15da2a29a77d67eefa489edda26"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0f9b92ea052305166559f38498b3b0cae159caea712646648aaa272f7160963"}, + {file = "tokenizers-0.13.3-cp38-cp38-win32.whl", hash = "sha256:9a3fa134896c3c1f0da6e762d15141fbff30d094067c8f1157b9fdca593b5806"}, + {file = "tokenizers-0.13.3-cp38-cp38-win_amd64.whl", hash = "sha256:8e7b0cdeace87fa9e760e6a605e0ae8fc14b7d72e9fc19c578116f7287bb873d"}, + {file = "tokenizers-0.13.3-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:00cee1e0859d55507e693a48fa4aef07060c4bb6bd93d80120e18fea9371c66d"}, + {file = "tokenizers-0.13.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a23ff602d0797cea1d0506ce69b27523b07e70f6dda982ab8cf82402de839088"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70ce07445050b537d2696022dafb115307abdffd2a5c106f029490f84501ef97"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:280ffe95f50eaaf655b3a1dc7ff1d9cf4777029dbbc3e63a74e65a056594abc3"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97acfcec592f7e9de8cadcdcda50a7134423ac8455c0166b28c9ff04d227b371"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd7730c98a3010cd4f523465867ff95cd9d6430db46676ce79358f65ae39797b"}, + {file = "tokenizers-0.13.3-cp39-cp39-win32.whl", hash = "sha256:48625a108029cb1ddf42e17a81b5a3230ba6888a70c9dc14e81bc319e812652d"}, + {file = "tokenizers-0.13.3-cp39-cp39-win_amd64.whl", hash = "sha256:bc0a6f1ba036e482db6453571c9e3e60ecd5489980ffd95d11dc9f960483d783"}, + {file = "tokenizers-0.13.3.tar.gz", hash = "sha256:2e546dbb68b623008a5442353137fbb0123d311a6d7ba52f2667c8862a75af2e"}, +] + +[package.extras] +dev = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] +docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "tomlkit" +version = "0.12.2" +description = "Style preserving TOML library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomlkit-0.12.2-py3-none-any.whl", hash = "sha256:eeea7ac7563faeab0a1ed8fe12c2e5a51c61f933f2502f7e9db0241a65163ad0"}, + {file = "tomlkit-0.12.2.tar.gz", hash = "sha256:df32fab589a81f0d7dc525a4267b6d7a64ee99619cbd1eeb0fae32c1dd426977"}, +] + +[[package]] +name = "torch" +version = "1.12.1" +description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "torch-1.12.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:9c038662db894a23e49e385df13d47b2a777ffd56d9bcd5b832593fab0a7e286"}, + {file = "torch-1.12.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:4e1b9c14cf13fd2ab8d769529050629a0e68a6fc5cb8e84b4a3cc1dd8c4fe541"}, + {file = "torch-1.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:e9c8f4a311ac29fc7e8e955cfb7733deb5dbe1bdaabf5d4af2765695824b7e0d"}, + {file = "torch-1.12.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:976c3f997cea38ee91a0dd3c3a42322785414748d1761ef926b789dfa97c6134"}, + {file = "torch-1.12.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:68104e4715a55c4bb29a85c6a8d57d820e0757da363be1ba680fa8cc5be17b52"}, + {file = "torch-1.12.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:743784ccea0dc8f2a3fe6a536bec8c4763bd82c1352f314937cb4008d4805de1"}, + {file = "torch-1.12.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b5dbcca369800ce99ba7ae6dee3466607a66958afca3b740690d88168752abcf"}, + {file = "torch-1.12.1-cp37-cp37m-win_amd64.whl", hash = "sha256:f3b52a634e62821e747e872084ab32fbcb01b7fa7dbb7471b6218279f02a178a"}, + {file = "torch-1.12.1-cp37-none-macosx_10_9_x86_64.whl", hash = "sha256:8a34a2fbbaa07c921e1b203f59d3d6e00ed379f2b384445773bd14e328a5b6c8"}, + {file = "torch-1.12.1-cp37-none-macosx_11_0_arm64.whl", hash = "sha256:42f639501928caabb9d1d55ddd17f07cd694de146686c24489ab8c615c2871f2"}, + {file = "torch-1.12.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0b44601ec56f7dd44ad8afc00846051162ef9c26a8579dda0a02194327f2d55e"}, + {file = "torch-1.12.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:cd26d8c5640c3a28c526d41ccdca14cf1cbca0d0f2e14e8263a7ac17194ab1d2"}, + {file = "torch-1.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:42e115dab26f60c29e298559dbec88444175528b729ae994ec4c65d56fe267dd"}, + {file = "torch-1.12.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:a8320ba9ad87e80ca5a6a016e46ada4d1ba0c54626e135d99b2129a4541c509d"}, + {file = "torch-1.12.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:03e31c37711db2cd201e02de5826de875529e45a55631d317aadce2f1ed45aa8"}, + {file = "torch-1.12.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:9b356aea223772cd754edb4d9ecf2a025909b8615a7668ac7d5130f86e7ec421"}, + {file = "torch-1.12.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:6cf6f54b43c0c30335428195589bd00e764a6d27f3b9ba637aaa8c11aaf93073"}, + {file = "torch-1.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:f00c721f489089dc6364a01fd84906348fe02243d0af737f944fddb36003400d"}, + {file = "torch-1.12.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:bfec2843daa654f04fda23ba823af03e7b6f7650a873cdb726752d0e3718dada"}, + {file = "torch-1.12.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:69fe2cae7c39ccadd65a123793d30e0db881f1c1927945519c5c17323131437e"}, +] + +[package.dependencies] +typing-extensions = "*" + +[[package]] +name = "tqdm" +version = "4.66.1" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, + {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "transformers" +version = "4.30.2" +description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "transformers-4.30.2-py3-none-any.whl", hash = "sha256:c332e3a3097f9ed89ce556b403251235931c00237b8bc2d7adaa19d226c13f1d"}, + {file = "transformers-4.30.2.tar.gz", hash = "sha256:f4a8aac4e1baffab4033f4a345b0d7dc7957d12a4f1ba969afea08205a513045"}, +] + +[package.dependencies] +filelock = "*" +huggingface-hub = ">=0.14.1,<1.0" +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} +numpy = ">=1.17" +packaging = ">=20.0" +pyyaml = ">=5.1" +regex = "!=2019.12.17" +requests = "*" +safetensors = ">=0.3.1" +tokenizers = ">=0.11.1,<0.11.3 || >0.11.3,<0.14" +tqdm = ">=4.27" + +[package.extras] +accelerate = ["accelerate (>=0.20.2)"] +agents = ["Pillow", "accelerate (>=0.20.2)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.9,!=1.12.0)"] +all = ["Pillow", "accelerate (>=0.20.2)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.6.9)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf (<=3.20.3)", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision"] +audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +codecarbon = ["codecarbon (==1.2.0)"] +deepspeed = ["accelerate (>=0.20.2)", "deepspeed (>=0.8.3)"] +deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.20.2)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.8.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf (<=3.20.3)", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "timeout-decorator"] +dev = ["GitPython (<3.1.19)", "Pillow", "accelerate (>=0.20.2)", "av (==9.2.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.6.9)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf (<=3.20.3)", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +dev-tensorflow = ["GitPython (<3.1.19)", "Pillow", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf (<=3.20.3)", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "urllib3 (<2.0.0)"] +dev-torch = ["GitPython (<3.1.19)", "Pillow", "accelerate (>=0.20.2)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf (<=3.20.3)", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +docs = ["Pillow", "accelerate (>=0.20.2)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.6.9)", "hf-doc-builder", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf (<=3.20.3)", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision"] +docs-specific = ["hf-doc-builder"] +fairscale = ["fairscale (>0.3)"] +flax = ["flax (>=0.4.1,<=0.6.9)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "optax (>=0.0.8,<=0.1.4)"] +flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +ftfy = ["ftfy"] +integrations = ["optuna", "ray[tune]", "sigopt"] +ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] +modelcreation = ["cookiecutter (==1.7.3)"] +natten = ["natten (>=0.14.6)"] +onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"] +onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"] +optuna = ["optuna"] +quality = ["GitPython (<3.1.19)", "black (>=23.1,<24.0)", "datasets (!=2.5.0)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (>=0.0.241,<=0.0.259)", "urllib3 (<2.0.0)"] +ray = ["ray[tune]"] +retrieval = ["datasets (!=2.5.0)", "faiss-cpu"] +sagemaker = ["sagemaker (>=2.31.0)"] +sentencepiece = ["protobuf (<=3.20.3)", "sentencepiece (>=0.1.91,!=0.1.92)"] +serving = ["fastapi", "pydantic", "starlette", "uvicorn"] +sigopt = ["sigopt"] +sklearn = ["scikit-learn"] +speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +testing = ["GitPython (<3.1.19)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "parameterized", "protobuf (<=3.20.3)", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "timeout-decorator"] +tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx"] +tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx"] +tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +timm = ["timm"] +tokenizers = ["tokenizers (>=0.11.1,!=0.11.3,<0.14)"] +torch = ["accelerate (>=0.20.2)", "torch (>=1.9,!=1.12.0)"] +torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +torch-vision = ["Pillow", "torchvision"] +torchhub = ["filelock", "huggingface-hub (>=0.14.1,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf (<=3.20.3)", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "tqdm (>=4.27)"] +video = ["av (==9.2.0)", "decord (==0.6.0)"] +vision = ["Pillow"] + +[[package]] +name = "typed-ast" +version = "1.5.5" +description = "a fork of Python 2 and 3 ast modules with type comment support" +optional = false +python-versions = ">=3.6" +files = [ + {file = "typed_ast-1.5.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4bc1efe0ce3ffb74784e06460f01a223ac1f6ab31c6bc0376a21184bf5aabe3b"}, + {file = "typed_ast-1.5.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5f7a8c46a8b333f71abd61d7ab9255440d4a588f34a21f126bbfc95f6049e686"}, + {file = "typed_ast-1.5.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:597fc66b4162f959ee6a96b978c0435bd63791e31e4f410622d19f1686d5e769"}, + {file = "typed_ast-1.5.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d41b7a686ce653e06c2609075d397ebd5b969d821b9797d029fccd71fdec8e04"}, + {file = "typed_ast-1.5.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5fe83a9a44c4ce67c796a1b466c270c1272e176603d5e06f6afbc101a572859d"}, + {file = "typed_ast-1.5.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d5c0c112a74c0e5db2c75882a0adf3133adedcdbfd8cf7c9d6ed77365ab90a1d"}, + {file = "typed_ast-1.5.5-cp310-cp310-win_amd64.whl", hash = "sha256:e1a976ed4cc2d71bb073e1b2a250892a6e968ff02aa14c1f40eba4f365ffec02"}, + {file = "typed_ast-1.5.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c631da9710271cb67b08bd3f3813b7af7f4c69c319b75475436fcab8c3d21bee"}, + {file = "typed_ast-1.5.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b445c2abfecab89a932b20bd8261488d574591173d07827c1eda32c457358b18"}, + {file = "typed_ast-1.5.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc95ffaaab2be3b25eb938779e43f513e0e538a84dd14a5d844b8f2932593d88"}, + {file = "typed_ast-1.5.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61443214d9b4c660dcf4b5307f15c12cb30bdfe9588ce6158f4a005baeb167b2"}, + {file = "typed_ast-1.5.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6eb936d107e4d474940469e8ec5b380c9b329b5f08b78282d46baeebd3692dc9"}, + {file = "typed_ast-1.5.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e48bf27022897577d8479eaed64701ecaf0467182448bd95759883300ca818c8"}, + {file = "typed_ast-1.5.5-cp311-cp311-win_amd64.whl", hash = "sha256:83509f9324011c9a39faaef0922c6f720f9623afe3fe220b6d0b15638247206b"}, + {file = "typed_ast-1.5.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:44f214394fc1af23ca6d4e9e744804d890045d1643dd7e8229951e0ef39429b5"}, + {file = "typed_ast-1.5.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:118c1ce46ce58fda78503eae14b7664163aa735b620b64b5b725453696f2a35c"}, + {file = "typed_ast-1.5.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be4919b808efa61101456e87f2d4c75b228f4e52618621c77f1ddcaae15904fa"}, + {file = "typed_ast-1.5.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:fc2b8c4e1bc5cd96c1a823a885e6b158f8451cf6f5530e1829390b4d27d0807f"}, + {file = "typed_ast-1.5.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:16f7313e0a08c7de57f2998c85e2a69a642e97cb32f87eb65fbfe88381a5e44d"}, + {file = "typed_ast-1.5.5-cp36-cp36m-win_amd64.whl", hash = "sha256:2b946ef8c04f77230489f75b4b5a4a6f24c078be4aed241cfabe9cbf4156e7e5"}, + {file = "typed_ast-1.5.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2188bc33d85951ea4ddad55d2b35598b2709d122c11c75cffd529fbc9965508e"}, + {file = "typed_ast-1.5.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0635900d16ae133cab3b26c607586131269f88266954eb04ec31535c9a12ef1e"}, + {file = "typed_ast-1.5.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57bfc3cf35a0f2fdf0a88a3044aafaec1d2f24d8ae8cd87c4f58d615fb5b6311"}, + {file = "typed_ast-1.5.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:fe58ef6a764de7b4b36edfc8592641f56e69b7163bba9f9c8089838ee596bfb2"}, + {file = "typed_ast-1.5.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d09d930c2d1d621f717bb217bf1fe2584616febb5138d9b3e8cdd26506c3f6d4"}, + {file = "typed_ast-1.5.5-cp37-cp37m-win_amd64.whl", hash = "sha256:d40c10326893ecab8a80a53039164a224984339b2c32a6baf55ecbd5b1df6431"}, + {file = "typed_ast-1.5.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fd946abf3c31fb50eee07451a6aedbfff912fcd13cf357363f5b4e834cc5e71a"}, + {file = "typed_ast-1.5.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ed4a1a42df8a3dfb6b40c3d2de109e935949f2f66b19703eafade03173f8f437"}, + {file = "typed_ast-1.5.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:045f9930a1550d9352464e5149710d56a2aed23a2ffe78946478f7b5416f1ede"}, + {file = "typed_ast-1.5.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:381eed9c95484ceef5ced626355fdc0765ab51d8553fec08661dce654a935db4"}, + {file = "typed_ast-1.5.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bfd39a41c0ef6f31684daff53befddae608f9daf6957140228a08e51f312d7e6"}, + {file = "typed_ast-1.5.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8c524eb3024edcc04e288db9541fe1f438f82d281e591c548903d5b77ad1ddd4"}, + {file = "typed_ast-1.5.5-cp38-cp38-win_amd64.whl", hash = "sha256:7f58fabdde8dcbe764cef5e1a7fcb440f2463c1bbbec1cf2a86ca7bc1f95184b"}, + {file = "typed_ast-1.5.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:042eb665ff6bf020dd2243307d11ed626306b82812aba21836096d229fdc6a10"}, + {file = "typed_ast-1.5.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:622e4a006472b05cf6ef7f9f2636edc51bda670b7bbffa18d26b255269d3d814"}, + {file = "typed_ast-1.5.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1efebbbf4604ad1283e963e8915daa240cb4bf5067053cf2f0baadc4d4fb51b8"}, + {file = "typed_ast-1.5.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0aefdd66f1784c58f65b502b6cf8b121544680456d1cebbd300c2c813899274"}, + {file = "typed_ast-1.5.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:48074261a842acf825af1968cd912f6f21357316080ebaca5f19abbb11690c8a"}, + {file = "typed_ast-1.5.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:429ae404f69dc94b9361bb62291885894b7c6fb4640d561179548c849f8492ba"}, + {file = "typed_ast-1.5.5-cp39-cp39-win_amd64.whl", hash = "sha256:335f22ccb244da2b5c296e6f96b06ee9bed46526db0de38d2f0e5a6597b81155"}, + {file = "typed_ast-1.5.5.tar.gz", hash = "sha256:94282f7a354f36ef5dbce0ef3467ebf6a258e370ab33d5b40c249fa996e590dd"}, +] + +[[package]] +name = "typer" +version = "0.4.2" +description = "Typer, build great CLIs. Easy to code. Based on Python type hints." +optional = false +python-versions = ">=3.6" +files = [ + {file = "typer-0.4.2-py3-none-any.whl", hash = "sha256:023bae00d1baf358a6cc7cea45851639360bb716de687b42b0a4641cd99173f1"}, + {file = "typer-0.4.2.tar.gz", hash = "sha256:b8261c6c0152dd73478b5ba96ba677e5d6948c715c310f7c91079f311f62ec03"}, +] + +[package.dependencies] +click = ">=7.1.1,<9.0.0" + +[package.extras] +all = ["colorama (>=0.4.3,<0.5.0)", "shellingham (>=1.3.0,<2.0.0)"] +dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2.17.0,<3.0.0)"] +doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)"] +test = ["black (>=22.3.0,<23.0.0)", "coverage (>=5.2,<6.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<5.4.0)", "pytest-cov (>=2.10.0,<3.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<2.0.0)", "shellingham (>=1.3.0,<2.0.0)"] + +[[package]] +name = "typing-extensions" +version = "4.4.0" +description = "Backported and Experimental Type Hints for Python 3.7+" +optional = false +python-versions = ">=3.7" +files = [ + {file = "typing_extensions-4.4.0-py3-none-any.whl", hash = "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"}, + {file = "typing_extensions-4.4.0.tar.gz", hash = "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa"}, +] + +[[package]] +name = "tzdata" +version = "2023.3" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, + {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, +] + +[[package]] +name = "urllib3" +version = "2.0.7" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.7" +files = [ + {file = "urllib3-2.0.7-py3-none-any.whl", hash = "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e"}, + {file = "urllib3-2.0.7.tar.gz", hash = "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "validators" +version = "0.20.0" +description = "Python Data Validation for Humans™." +optional = false +python-versions = ">=3.4" +files = [ + {file = "validators-0.20.0.tar.gz", hash = "sha256:24148ce4e64100a2d5e267233e23e7afeb55316b47d30faae7eb6e7292bc226a"}, +] + +[package.dependencies] +decorator = ">=3.4.0" + +[package.extras] +test = ["flake8 (>=2.4.0)", "isort (>=4.2.2)", "pytest (>=2.2.3)"] + +[[package]] +name = "verspec" +version = "0.1.0" +description = "Flexible version handling" +optional = false +python-versions = "*" +files = [ + {file = "verspec-0.1.0-py3-none-any.whl", hash = "sha256:741877d5633cc9464c45a469ae2a31e801e6dbbaa85b9675d481cda100f11c31"}, + {file = "verspec-0.1.0.tar.gz", hash = "sha256:c4504ca697b2056cdb4bfa7121461f5a0e81809255b41c03dda4ba823637c01e"}, +] + +[package.extras] +test = ["coverage", "flake8 (>=3.7)", "mypy", "pretend", "pytest"] + +[[package]] +name = "vine" +version = "5.1.0" +description = "Python promises." +optional = false +python-versions = ">=3.6" +files = [ + {file = "vine-5.1.0-py3-none-any.whl", hash = "sha256:40fdf3c48b2cfe1c38a49e9ae2da6fda88e4794c810050a728bd7413811fb1dc"}, + {file = "vine-5.1.0.tar.gz", hash = "sha256:8b62e981d35c41049211cf62a0a1242d8c1ee9bd15bb196ce38aefd6799e61e0"}, +] + +[[package]] +name = "virtualenv" +version = "20.21.1" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.7" +files = [ + {file = "virtualenv-20.21.1-py3-none-any.whl", hash = "sha256:09ddbe1af0c8ed2bb4d6ed226b9e6415718ad18aef9fa0ba023d96b7a8356049"}, + {file = "virtualenv-20.21.1.tar.gz", hash = "sha256:4c104ccde994f8b108163cf9ba58f3d11511d9403de87fb9b4f52bf33dbc8668"}, +] + +[package.dependencies] +distlib = ">=0.3.6,<1" +filelock = ">=3.4.1,<4" +importlib-metadata = {version = ">=4.8.3", markers = "python_version < \"3.8\""} +platformdirs = ">=2.4,<4" + +[package.extras] +docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=22.12)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.3.1)", "pytest-env (>=0.8.1)", "pytest-freezegun (>=0.4.2)", "pytest-mock (>=3.10)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)"] + +[[package]] +name = "voluptuous" +version = "0.14.0" +description = "Python data validation library" +optional = false +python-versions = "*" +files = [ + {file = "voluptuous-0.14.0-py3-none-any.whl", hash = "sha256:f9d63353f270c87d5f3aea29f8c251beddfd164d74d934e54dad7b8f84ad7a74"}, + {file = "voluptuous-0.14.0.tar.gz", hash = "sha256:145384a9613f7520b70e214e5c06de0809e069ab56716685446450f9bee15e2e"}, +] + +[[package]] +name = "wasabi" +version = "1.1.2" +description = "A lightweight console printing and formatting toolkit" +optional = false +python-versions = ">=3.6" +files = [ + {file = "wasabi-1.1.2-py3-none-any.whl", hash = "sha256:0a3f933c4bf0ed3f93071132c1b87549733256d6c8de6473c5f7ed2e171b5cf9"}, + {file = "wasabi-1.1.2.tar.gz", hash = "sha256:1aaef3aceaa32edb9c91330d29d3936c0c39fdb965743549c173cb54b16c30b5"}, +] + +[package.dependencies] +colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\" and python_version >= \"3.7\""} +typing-extensions = {version = ">=3.7.4.1,<4.5.0", markers = "python_version < \"3.8\""} + +[[package]] +name = "watchdog" +version = "3.0.0" +description = "Filesystem events monitoring" +optional = false +python-versions = ">=3.7" +files = [ + {file = "watchdog-3.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:336adfc6f5cc4e037d52db31194f7581ff744b67382eb6021c868322e32eef41"}, + {file = "watchdog-3.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a70a8dcde91be523c35b2bf96196edc5730edb347e374c7de7cd20c43ed95397"}, + {file = "watchdog-3.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:adfdeab2da79ea2f76f87eb42a3ab1966a5313e5a69a0213a3cc06ef692b0e96"}, + {file = "watchdog-3.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2b57a1e730af3156d13b7fdddfc23dea6487fceca29fc75c5a868beed29177ae"}, + {file = "watchdog-3.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7ade88d0d778b1b222adebcc0927428f883db07017618a5e684fd03b83342bd9"}, + {file = "watchdog-3.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7e447d172af52ad204d19982739aa2346245cc5ba6f579d16dac4bfec226d2e7"}, + {file = "watchdog-3.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9fac43a7466eb73e64a9940ac9ed6369baa39b3bf221ae23493a9ec4d0022674"}, + {file = "watchdog-3.0.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:8ae9cda41fa114e28faf86cb137d751a17ffd0316d1c34ccf2235e8a84365c7f"}, + {file = "watchdog-3.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:25f70b4aa53bd743729c7475d7ec41093a580528b100e9a8c5b5efe8899592fc"}, + {file = "watchdog-3.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4f94069eb16657d2c6faada4624c39464f65c05606af50bb7902e036e3219be3"}, + {file = "watchdog-3.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7c5f84b5194c24dd573fa6472685b2a27cc5a17fe5f7b6fd40345378ca6812e3"}, + {file = "watchdog-3.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3aa7f6a12e831ddfe78cdd4f8996af9cf334fd6346531b16cec61c3b3c0d8da0"}, + {file = "watchdog-3.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:233b5817932685d39a7896b1090353fc8efc1ef99c9c054e46c8002561252fb8"}, + {file = "watchdog-3.0.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:13bbbb462ee42ec3c5723e1205be8ced776f05b100e4737518c67c8325cf6100"}, + {file = "watchdog-3.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:8f3ceecd20d71067c7fd4c9e832d4e22584318983cabc013dbf3f70ea95de346"}, + {file = "watchdog-3.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c9d8c8ec7efb887333cf71e328e39cffbf771d8f8f95d308ea4125bf5f90ba64"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0e06ab8858a76e1219e68c7573dfeba9dd1c0219476c5a44d5333b01d7e1743a"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:d00e6be486affb5781468457b21a6cbe848c33ef43f9ea4a73b4882e5f188a44"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:c07253088265c363d1ddf4b3cdb808d59a0468ecd017770ed716991620b8f77a"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:5113334cf8cf0ac8cd45e1f8309a603291b614191c9add34d33075727a967709"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:51f90f73b4697bac9c9a78394c3acbbd331ccd3655c11be1a15ae6fe289a8c83"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:ba07e92756c97e3aca0912b5cbc4e5ad802f4557212788e72a72a47ff376950d"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d429c2430c93b7903914e4db9a966c7f2b068dd2ebdd2fa9b9ce094c7d459f33"}, + {file = "watchdog-3.0.0-py3-none-win32.whl", hash = "sha256:3ed7c71a9dccfe838c2f0b6314ed0d9b22e77d268c67e015450a29036a81f60f"}, + {file = "watchdog-3.0.0-py3-none-win_amd64.whl", hash = "sha256:4c9956d27be0bb08fc5f30d9d0179a855436e655f046d288e2bcc11adfae893c"}, + {file = "watchdog-3.0.0-py3-none-win_ia64.whl", hash = "sha256:5d9f3a10e02d7371cd929b5d8f11e87d4bad890212ed3901f9b4d68767bee759"}, + {file = "watchdog-3.0.0.tar.gz", hash = "sha256:4d98a320595da7a7c5a18fc48cb633c2e73cda78f93cac2ef42d42bf609a33f9"}, +] + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + +[[package]] +name = "wcwidth" +version = "0.2.10" +description = "Measures the displayed width of unicode strings in a terminal" +optional = false +python-versions = "*" +files = [ + {file = "wcwidth-0.2.10-py2.py3-none-any.whl", hash = "sha256:aec5179002dd0f0d40c456026e74a729661c9d468e1ed64405e3a6c2176ca36f"}, + {file = "wcwidth-0.2.10.tar.gz", hash = "sha256:390c7454101092a6a5e43baad8f83de615463af459201709556b6e4b1c861f97"}, +] + +[[package]] +name = "weasel" +version = "0.3.4" +description = "Weasel: A small and easy workflow system" +optional = false +python-versions = ">=3.6" +files = [ + {file = "weasel-0.3.4-py3-none-any.whl", hash = "sha256:ee48a944f051d007201c2ea1661d0c41035028c5d5a8bcb29a0b10f1100206ae"}, + {file = "weasel-0.3.4.tar.gz", hash = "sha256:eb16f92dc9f1a3ffa89c165e3a9acd28018ebb656e0da4da02c0d7d8ae3f6178"}, +] + +[package.dependencies] +cloudpathlib = ">=0.7.0,<0.17.0" +confection = ">=0.0.4,<0.2.0" +packaging = ">=20.0" +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<3.0.0" +requests = ">=2.13.0,<3.0.0" +smart-open = ">=5.2.1,<7.0.0" +srsly = ">=2.4.3,<3.0.0" +typer = ">=0.3.0,<0.10.0" +wasabi = ">=0.9.1,<1.2.0" + +[[package]] +name = "wheel" +version = "0.41.3" +description = "A built-package format for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "wheel-0.41.3-py3-none-any.whl", hash = "sha256:488609bc63a29322326e05560731bf7bfea8e48ad646e1f5e40d366607de0942"}, + {file = "wheel-0.41.3.tar.gz", hash = "sha256:4d4987ce51a49370ea65c0bfd2234e8ce80a12780820d9dc462597a6e60d0841"}, +] + +[package.extras] +test = ["pytest (>=6.0.0)", "setuptools (>=65)"] + +[[package]] +name = "win32-setctime" +version = "1.1.0" +description = "A small Python utility to set file creation time on Windows" +optional = false +python-versions = ">=3.5" +files = [ + {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"}, + {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, +] + +[package.extras] +dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] + +[[package]] +name = "yarl" +version = "1.9.2" +description = "Yet another URL library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8c2ad583743d16ddbdf6bb14b5cd76bf43b0d0006e918809d5d4ddf7bde8dd82"}, + {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82aa6264b36c50acfb2424ad5ca537a2060ab6de158a5bd2a72a032cc75b9eb8"}, + {file = "yarl-1.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c0c77533b5ed4bcc38e943178ccae29b9bcf48ffd1063f5821192f23a1bd27b9"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee4afac41415d52d53a9833ebae7e32b344be72835bbb589018c9e938045a560"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9bf345c3a4f5ba7f766430f97f9cc1320786f19584acc7086491f45524a551ac"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a96c19c52ff442a808c105901d0bdfd2e28575b3d5f82e2f5fd67e20dc5f4ea"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:891c0e3ec5ec881541f6c5113d8df0315ce5440e244a716b95f2525b7b9f3608"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c3a53ba34a636a256d767c086ceb111358876e1fb6b50dfc4d3f4951d40133d5"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:566185e8ebc0898b11f8026447eacd02e46226716229cea8db37496c8cdd26e0"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:2b0738fb871812722a0ac2154be1f049c6223b9f6f22eec352996b69775b36d4"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:32f1d071b3f362c80f1a7d322bfd7b2d11e33d2adf395cc1dd4df36c9c243095"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:e9fdc7ac0d42bc3ea78818557fab03af6181e076a2944f43c38684b4b6bed8e3"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:56ff08ab5df8429901ebdc5d15941b59f6253393cb5da07b4170beefcf1b2528"}, + {file = "yarl-1.9.2-cp310-cp310-win32.whl", hash = "sha256:8ea48e0a2f931064469bdabca50c2f578b565fc446f302a79ba6cc0ee7f384d3"}, + {file = "yarl-1.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:50f33040f3836e912ed16d212f6cc1efb3231a8a60526a407aeb66c1c1956dde"}, + {file = "yarl-1.9.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:646d663eb2232d7909e6601f1a9107e66f9791f290a1b3dc7057818fe44fc2b6"}, + {file = "yarl-1.9.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aff634b15beff8902d1f918012fc2a42e0dbae6f469fce134c8a0dc51ca423bb"}, + {file = "yarl-1.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a83503934c6273806aed765035716216cc9ab4e0364f7f066227e1aaea90b8d0"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b25322201585c69abc7b0e89e72790469f7dad90d26754717f3310bfe30331c2"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:22a94666751778629f1ec4280b08eb11815783c63f52092a5953faf73be24191"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ec53a0ea2a80c5cd1ab397925f94bff59222aa3cf9c6da938ce05c9ec20428d"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:159d81f22d7a43e6eabc36d7194cb53f2f15f498dbbfa8edc8a3239350f59fe7"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:832b7e711027c114d79dffb92576acd1bd2decc467dec60e1cac96912602d0e6"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:95d2ecefbcf4e744ea952d073c6922e72ee650ffc79028eb1e320e732898d7e8"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d4e2c6d555e77b37288eaf45b8f60f0737c9efa3452c6c44626a5455aeb250b9"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:783185c75c12a017cc345015ea359cc801c3b29a2966c2655cd12b233bf5a2be"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:b8cc1863402472f16c600e3e93d542b7e7542a540f95c30afd472e8e549fc3f7"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:822b30a0f22e588b32d3120f6d41e4ed021806418b4c9f0bc3048b8c8cb3f92a"}, + {file = "yarl-1.9.2-cp311-cp311-win32.whl", hash = "sha256:a60347f234c2212a9f0361955007fcf4033a75bf600a33c88a0a8e91af77c0e8"}, + {file = "yarl-1.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:be6b3fdec5c62f2a67cb3f8c6dbf56bbf3f61c0f046f84645cd1ca73532ea051"}, + {file = "yarl-1.9.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:38a3928ae37558bc1b559f67410df446d1fbfa87318b124bf5032c31e3447b74"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac9bb4c5ce3975aeac288cfcb5061ce60e0d14d92209e780c93954076c7c4367"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3da8a678ca8b96c8606bbb8bfacd99a12ad5dd288bc6f7979baddd62f71c63ef"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13414591ff516e04fcdee8dc051c13fd3db13b673c7a4cb1350e6b2ad9639ad3"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf74d08542c3a9ea97bb8f343d4fcbd4d8f91bba5ec9d5d7f792dbe727f88938"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e7221580dc1db478464cfeef9b03b95c5852cc22894e418562997df0d074ccc"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:494053246b119b041960ddcd20fd76224149cfea8ed8777b687358727911dd33"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:52a25809fcbecfc63ac9ba0c0fb586f90837f5425edfd1ec9f3372b119585e45"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:e65610c5792870d45d7b68c677681376fcf9cc1c289f23e8e8b39c1485384185"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:1b1bba902cba32cdec51fca038fd53f8beee88b77efc373968d1ed021024cc04"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:662e6016409828ee910f5d9602a2729a8a57d74b163c89a837de3fea050c7582"}, + {file = "yarl-1.9.2-cp37-cp37m-win32.whl", hash = "sha256:f364d3480bffd3aa566e886587eaca7c8c04d74f6e8933f3f2c996b7f09bee1b"}, + {file = "yarl-1.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6a5883464143ab3ae9ba68daae8e7c5c95b969462bbe42e2464d60e7e2698368"}, + {file = "yarl-1.9.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5610f80cf43b6202e2c33ba3ec2ee0a2884f8f423c8f4f62906731d876ef4fac"}, + {file = "yarl-1.9.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b9a4e67ad7b646cd6f0938c7ebfd60e481b7410f574c560e455e938d2da8e0f4"}, + {file = "yarl-1.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:83fcc480d7549ccebe9415d96d9263e2d4226798c37ebd18c930fce43dfb9574"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fcd436ea16fee7d4207c045b1e340020e58a2597301cfbcfdbe5abd2356c2fb"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84e0b1599334b1e1478db01b756e55937d4614f8654311eb26012091be109d59"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3458a24e4ea3fd8930e934c129b676c27452e4ebda80fbe47b56d8c6c7a63a9e"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:838162460b3a08987546e881a2bfa573960bb559dfa739e7800ceeec92e64417"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f4e2d08f07a3d7d3e12549052eb5ad3eab1c349c53ac51c209a0e5991bbada78"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:de119f56f3c5f0e2fb4dee508531a32b069a5f2c6e827b272d1e0ff5ac040333"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:149ddea5abf329752ea5051b61bd6c1d979e13fbf122d3a1f9f0c8be6cb6f63c"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:674ca19cbee4a82c9f54e0d1eee28116e63bc6fd1e96c43031d11cbab8b2afd5"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:9b3152f2f5677b997ae6c804b73da05a39daa6a9e85a512e0e6823d81cdad7cc"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5415d5a4b080dc9612b1b63cba008db84e908b95848369aa1da3686ae27b6d2b"}, + {file = "yarl-1.9.2-cp38-cp38-win32.whl", hash = "sha256:f7a3d8146575e08c29ed1cd287068e6d02f1c7bdff8970db96683b9591b86ee7"}, + {file = "yarl-1.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:63c48f6cef34e6319a74c727376e95626f84ea091f92c0250a98e53e62c77c72"}, + {file = "yarl-1.9.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:75df5ef94c3fdc393c6b19d80e6ef1ecc9ae2f4263c09cacb178d871c02a5ba9"}, + {file = "yarl-1.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c027a6e96ef77d401d8d5a5c8d6bc478e8042f1e448272e8d9752cb0aff8b5c8"}, + {file = "yarl-1.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3b078dbe227f79be488ffcfc7a9edb3409d018e0952cf13f15fd6512847f3f7"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59723a029760079b7d991a401386390c4be5bfec1e7dd83e25a6a0881859e716"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b03917871bf859a81ccb180c9a2e6c1e04d2f6a51d953e6a5cdd70c93d4e5a2a"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c1012fa63eb6c032f3ce5d2171c267992ae0c00b9e164efe4d73db818465fac3"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a74dcbfe780e62f4b5a062714576f16c2f3493a0394e555ab141bf0d746bb955"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8c56986609b057b4839968ba901944af91b8e92f1725d1a2d77cbac6972b9ed1"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2c315df3293cd521033533d242d15eab26583360b58f7ee5d9565f15fee1bef4"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:b7232f8dfbd225d57340e441d8caf8652a6acd06b389ea2d3222b8bc89cbfca6"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:53338749febd28935d55b41bf0bcc79d634881195a39f6b2f767870b72514caf"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:066c163aec9d3d073dc9ffe5dd3ad05069bcb03fcaab8d221290ba99f9f69ee3"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8288d7cd28f8119b07dd49b7230d6b4562f9b61ee9a4ab02221060d21136be80"}, + {file = "yarl-1.9.2-cp39-cp39-win32.whl", hash = "sha256:b124e2a6d223b65ba8768d5706d103280914d61f5cae3afbc50fc3dfcc016623"}, + {file = "yarl-1.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:61016e7d582bc46a5378ffdd02cd0314fb8ba52f40f9cf4d9a5e7dbef88dee18"}, + {file = "yarl-1.9.2.tar.gz", hash = "sha256:04ab9d4b9f587c06d801c2abfe9317b77cdf996c65a90d5e84ecc45010823571"}, +] + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" + +[[package]] +name = "zc-lockfile" +version = "3.0.post1" +description = "Basic inter-process locks" +optional = false +python-versions = ">=3.7" +files = [ + {file = "zc.lockfile-3.0.post1-py3-none-any.whl", hash = "sha256:ddb2d71088c061dc8a5edbaa346b637d742ca1e1564be75cb98e7dcae715de19"}, + {file = "zc.lockfile-3.0.post1.tar.gz", hash = "sha256:adb2ee6d9e6a2333c91178dcb2c9b96a5744c78edb7712dc784a7d75648e81ec"}, +] + +[package.dependencies] +setuptools = "*" + +[package.extras] +test = ["zope.testing"] + +[[package]] +name = "zipp" +version = "3.15.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = false +python-versions = ">=3.7" +files = [ + {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"}, + {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[extras] +apple = ["thinc-apple-ops"] +cuda = ["cupy"] +cuda100 = ["cupy-cuda100"] +cuda101 = ["cupy-cuda101"] +cuda102 = ["cupy-cuda102"] +cuda110 = ["cupy-cuda110"] +cuda111 = ["cupy-cuda111"] +cuda112 = ["cupy-cuda112"] +cuda113 = ["cupy-cuda113"] +cuda114 = ["cupy-cuda114"] +cuda115 = ["cupy-cuda115"] +cuda116 = ["cupy-cuda116"] +cuda117 = ["cupy-cuda117"] +cuda80 = ["cupy-cuda80"] +cuda90 = ["cupy-cuda90"] +cuda91 = ["cupy-cuda91"] +cuda92 = ["cupy-cuda92"] + +[metadata] +lock-version = "2.0" +python-versions = ">3.7.6,<4.0,!=3.8.1" +content-hash = "bb2663c5bce60bd6807b15a6a890186b8b46eba3c330f52ef71e004dbe903a03" diff --git a/NER_model/project.lock b/NER_model/project.lock new file mode 100644 index 000000000..e380c9cdd --- /dev/null +++ b/NER_model/project.lock @@ -0,0 +1,108 @@ +convert: + cmd: python -m spacy run convert + script: + - python scripts/convert.py --lang eds --input-path data/NLP_diabeto/train --output-path + corpus/train.spacy + - python scripts/convert.py --lang eds --input-path data/NLP_diabeto/val --output-path + corpus/dev.spacy + - python scripts/convert.py --lang eds --input-path data/NLP_diabeto/test --output-path + corpus/test.spacy + deps: + - path: data/NLP_diabeto/train + md5: 30f19f2f900601d4ba8be1732ff81279 + - path: data/NLP_diabeto/val + md5: 98194594382fa1039782f651ad0110af + - path: data/NLP_diabeto/test + md5: ddec584923d06f2685a232976a5a0eca + - path: scripts/convert.py + md5: 951ecd34bc4ef64d218a90225a5c2aca + outs: + - path: corpus/train.spacy + md5: bd6c7c646cf288ee49f855177841c399 + - path: corpus/dev.spacy + md5: 97ad6c031ae1b3522131d2675a1464a0 + - path: corpus/test.spacy + md5: 0812cf1d19ab475be07eb65162e0ce2c + spacy_version: 3.5.1 + spacy_git_version: Unknown +train: + cmd: python -m spacy run train + script: + - python -m spacy train configs/config.cfg --output training --paths.train corpus/train.spacy + --paths.dev corpus/dev.spacy --nlp.lang eds --gpu-id 0 + deps: + - path: configs/config.cfg + md5: 0714cebf0e60346234c2a244d1924198 + - path: corpus/train.spacy + md5: bd6c7c646cf288ee49f855177841c399 + - path: corpus/dev.spacy + md5: 97ad6c031ae1b3522131d2675a1464a0 + outs: + - path: training/model-best + md5: dc2f4b221dc791390bc17c84aeae714d + spacy_version: 3.5.1 + spacy_git_version: Unknown +evaluate: + cmd: python -m spacy run evaluate + script: + - python scripts/evaluate.py training/model-best corpus/test.spacy --output training/test_metrics.json + --docbin corpus/output.spacy --gpu-id 0 + deps: + - path: corpus/test.spacy + md5: 0812cf1d19ab475be07eb65162e0ce2c + - path: training/model-best + md5: dc2f4b221dc791390bc17c84aeae714d + outs: + - path: corpus/output.spacy + md5: aa7d595acc52e9ff79323f24fd0f106c + - path: training/test_metrics.json + md5: b7c84305723d1917301506545e41f5db + spacy_version: 3.5.1 + spacy_git_version: Unknown +package: + cmd: python -m spacy run package + script: + - python scripts/package.py training/model-best packages --name medic --version + 0.1.0 --force --build wheel --code eds_medic + deps: + - path: training/model-best + md5: dc2f4b221dc791390bc17c84aeae714d + outs: + - path: packages/eds_medic-0.1.0/dist/eds_medic-0.1.0-py3-none-any.whl + md5: 1b491e3d4ce4f7822f381adc6525f0ba + spacy_version: 3.5.1 + spacy_git_version: Unknown +save: + cmd: python -m spacy run save + script: + - python scripts/save_to_brat.py training/model-best corpus/test.spacy --output + training/test_metrics.json --docbin corpus/output.spacy --gpu-id 0 + deps: + - path: corpus/test.spacy + md5: 6f167a761337976d4cc34309a1dccbf2 + - path: training/model-best + md5: 36b40cd2d45edf99e9bc14b4e8080b17 + outs: + - path: corpus/output.spacy + md5: null + - path: training/test_metrics.json + md5: null + spacy_version: 3.5.1 + spacy_git_version: Unknown +save_to_brat: + cmd: python -m spacy run save_to_brat + script: + - python scripts/save_to_brat.py training/model-best corpus/test.spacy --output + training/test_metrics.json --docbin corpus/output.spacy --gpu-id 0 + deps: + - path: corpus/test.spacy + md5: a5390dbbaa344bac5c71c7cdc3141849 + - path: training/model-best + md5: c9319bca2767deb0868c641da5302bb5 + outs: + - path: corpus/output.spacy + md5: 25794a6a530648bd92bd82882de9e730 + - path: training/test_metrics.json + md5: de7b607948f789e8069088592314eb0d + spacy_version: 3.5.1 + spacy_git_version: Unknown diff --git a/NER_model/project.yml b/NER_model/project.yml new file mode 100644 index 000000000..153ca9344 --- /dev/null +++ b/NER_model/project.yml @@ -0,0 +1,138 @@ +title: "EDS-Medic" +description: | + EDS-Medic is a spaCy-based project used at APHP to extract drug prescriptions from clinical reports + + To run the full pipeline (download, split and format the dataset, train the pipeline and package it), simply run : + ```shell + spacy project run all + ``` + +# Variables can be referenced across the project.yml using ${vars.var_name} +vars: + name: "medic" + lang: "eds" + version: "0.1.0" + train: "data/NLP_diabeto/train" + test: "data/NLP_diabeto/test" + dev: "data/NLP_diabeto/val" +# jeu suyr lequel tester le modèle : +# brat_data/QUAERO_FrenchMed/corpus/train/EMEA/ + + corpus: "corpus" + training: "training" + seed: 0 + fraction: 200 + gpu_id: 0 + +env: + registry_token: GITLAB_REGISTRY_TOKEN + +# These are the directories that the project needs. The project CLI will make +# sure that they always exist. +directories: + ["data", "corpus", "configs", "training", "scripts", "packages", "output"] + +# Workflows are sequences of commands (see below) executed in order. You can +# run them via "spacy project run [workflow]". If a commands's inputs/outputs +# haven't changed, it won't be re-run. +workflows: + all: + - train + - evaluate + - package + xp: + - convert + - train + - evaluate + +# Project commands, specified in a style similar to CI config files (e.g. Azure +# pipelines). The name is the command name that lets you trigger the command +# via "spacy project run [command] [path]". The help message is optional and +# shown when executing "spacy project run [optional command] [path] --help". +commands: + - name: "convert" + help: "Convert the data to spaCy's binary format" + script: + - "python scripts/convert.py --lang ${vars.lang} --input-path ${vars.train} --output-path ${vars.corpus}/train.spacy" + - "python scripts/convert.py --lang ${vars.lang} --input-path ${vars.dev} --output-path ${vars.corpus}/dev.spacy" + - "python scripts/convert.py --lang ${vars.lang} --input-path ${vars.test} --output-path ${vars.corpus}/test.spacy" + deps: + - "${vars.train}" + - "${vars.dev}" + - "${vars.test}" + - "scripts/convert.py" + outputs: + - "${vars.corpus}/train.spacy" + - "${vars.corpus}/dev.spacy" + - "${vars.corpus}/test.spacy" + + - name: "create-config" + help: "Create a new config with an NER pipeline component" + script: + - "python -m spacy init config --lang ${vars.lang} --pipeline ner configs/config.cfg --force --gpu" + outputs: + - "configs/config.cfg" + + - name: "train" + help: "Train the NER model" + script: + - "python -m spacy train configs/config.cfg --output ${vars.training} --paths.train ${vars.corpus}/train.spacy --paths.dev ${vars.corpus}/dev.spacy --nlp.lang ${vars.lang} --gpu-id ${vars.gpu_id}" + deps: + - "configs/config.cfg" + - "${vars.corpus}/train.spacy" + - "${vars.corpus}/dev.spacy" + outputs: + - "${vars.training}/model-best" + + # - name: "evaluate" + # help: "Evaluate the model and export metrics" + # script: + # - "python scripts/new_evaluate.py ${vars.training}/model-best ${vars.corpus}/test.spacy --output ${vars.training}/test_metrics.json --docbin ${vars.corpus}/output.spacy --gpu-id ${vars.gpu_id}" + # deps: + # - "${vars.corpus}/test.spacy" + # - "${vars.training}/model-best" + # outputs: + # - "${vars.corpus}/output.spacy" + # - "${vars.training}/test_metrics.json" + + - name: "evaluate" + help: "Evaluate the model and export metrics" + script: + - "python scripts/evaluate.py ${vars.training}/model-best ${vars.corpus}/test.spacy --output ${vars.training}/test_metrics.json --docbin ${vars.corpus}/output.spacy --gpu-id ${vars.gpu_id}" + deps: + - "${vars.corpus}/test.spacy" + - "${vars.training}/model-best" + outputs: + - "${vars.corpus}/output.spacy" + - "${vars.training}/test_metrics.json" + + + - name: "infer" + help: "Infer the model on test documents" + script: + - "python scripts/infer.py --model ${vars.training}/model-best --data ${vars.corpus}/test.spacy --output ${vars.corpus}/output.spacy" + deps: + - "${vars.corpus}/test.spacy" + - "${vars.training}/model-best" + outputs: + - "${vars.corpus}/output.spacy" + + - name: "package" + help: "Package the trained model as a pip package" + script: + - "python scripts/package.py ${vars.training}/model-best packages --name ${vars.name} --version ${vars.version} --force --build wheel --code eds_medic" + deps: + - "${vars.training}/model-best" + outputs_no_cache: + - "packages/${vars.lang}_${vars.name}-${vars.version}/dist/${vars.lang}_${vars.name}-${vars.version}-py3-none-any.whl" + + - name: "save_to_brat" + help: "Save the test set with prediction on a BRAT format" + script: + - "python scripts/save_to_brat.py ${vars.training}/model-best ${vars.corpus}/test.spacy --output ${vars.training}/test_metrics.json --docbin ${vars.corpus}/output.spacy --gpu-id ${vars.gpu_id}" + deps: + - "${vars.corpus}/test.spacy" + - "${vars.training}/model-best" + outputs: + - "${vars.corpus}/output.spacy" + - "${vars.training}/test_metrics.json" diff --git a/NER_model/pyproject.toml b/NER_model/pyproject.toml new file mode 100644 index 000000000..8c4b8de76 --- /dev/null +++ b/NER_model/pyproject.toml @@ -0,0 +1,121 @@ +[tool.poetry] +name = "NER_model" +version = "0.1.0" +description = "" +authors = ["Perceval Wajsburt "] + +[tool.poetry.dependencies] +python = ">3.7.6,<4.0,!=3.8.1" +spacy = "^3.2.4" +spacy-transformers = "^1.1.5" +thinc = "^8.0.13" +sentencepiece = "^0.1.96" +cupy = {version = "^11.0.0", optional = true} +cupy-cuda80 = {version = "^7.8.0", optional = true} +cupy-cuda90 = {version = "^8.6.0", optional = true} +cupy-cuda91 = {version = "^7.8.0", optional = true} +cupy-cuda92 = {version = "^8.6.0", optional = true} +cupy-cuda100 = {version = "^9.6.0", optional = true} +cupy-cuda101 = {version = "^9.6.0", optional = true} +cupy-cuda102 = {version = "^11.0.0", optional = true} +cupy-cuda110 = {version = "^11.0.0", optional = true} +cupy-cuda111 = {version = "^10.6.0", optional = true} +cupy-cuda112 = {version = "^10.6.0", optional = true} +cupy-cuda113 = {version = "^10.6.0", optional = true} +cupy-cuda114 = {version = "^10.6.0", optional = true} +cupy-cuda115 = {version = "^10.6.0", optional = true} +cupy-cuda116 = {version = "^10.6.0", optional = true} +cupy-cuda117 = {version = "^10.6.0", optional = true} +thinc-apple-ops = {version = "^0.1.0", optional = true} +torch = "<1.13" + +[tool.poetry.group.dev.dependencies] +dvc = {version = "^2.37.0", markers="python_version >= '3.8'"} +loguru = "^0.6.0" +typer = "^0.4.1" +fsspec = "^2022.3.0" +pre-commit = "^2.18.1" +pytest = "^7.1.1" +pytest-cov = "^3.0.0" +mypy = "^0.950" +coverage = "^6.5.0" +jupyter_black = "0.3.4" + +[tool.poetry.group.docs] +optional = true + +[tool.poetry.group.docs.dependencies] +mike = "^1.1.2" +mkdocs-bibtex = "^2.0.3" +mkdocs-gen-files = "^0.3.4" +mkdocs-literate-nav = "^0.4.1" +mkdocs-material = "^8.2.8" +mkdocstrings = "^0.18.1" +mkdocstrings-python = "^0.6.6" +mkdocs-glightbox = "^0.1.6" +mkdocs-autorefs = "^0.4.1" + +[tool.poetry.extras] +cuda = ["cupy"] +cuda80 = ["cupy-cuda80"] +cuda90 = ["cupy-cuda90"] +cuda91 = ["cupy-cuda91"] +cuda92 = ["cupy-cuda92"] +cuda100 = ["cupy-cuda100"] +cuda101 = ["cupy-cuda101"] +cuda102 = ["cupy-cuda102"] +cuda110 = ["cupy-cuda110"] +cuda111 = ["cupy-cuda111"] +cuda112 = ["cupy-cuda112"] +cuda113 = ["cupy-cuda113"] +cuda114 = ["cupy-cuda114"] +cuda115 = ["cupy-cuda115"] +cuda116 = ["cupy-cuda116"] +cuda117 = ["cupy-cuda117"] +apple = ["thinc-apple-ops"] + +[tool.poetry.group.spark] +optional = true + +[tool.poetry.group.spark.dependencies] +pyspark = "2.4.3" + +[tool.interrogate] +ignore-init-method = true +ignore-init-module = true +ignore-magic = false +ignore-semiprivate = true +ignore-private = true +ignore-property-decorators = false +ignore-module = true +ignore-nested-functions = true +ignore-nested-classes = true +ignore-setters = false +fail-under = 10 +exclude = ["docs", "build", "tests", "packages", "setup.py"] +verbose = 0 +quiet = false +whitelist-regex = [] +color = true +omit-covered-files = false + +[tool.pytest.ini_options] +testpaths = [ + "tests", +] + +[tool.mypy] +plugins = "pydantic.mypy" + +[tool.ruff] +exclude = [".git", "__pycache__", "__init__.py", ".mypy_cache", ".pytest_cache", ".venv", "build", "packages"] +ignore = [] +line-length = 88 +select = ["E", "F", "W"] + +[tool.ruff.per-file-ignores] +"__init__.py" = ["F401"] + +[build-system] +requires = ["poetry-core>=1.0.0", "pypandoc<1.8.0"] +build-backend = "poetry.core.masonry.api" diff --git a/NER_model/scripts/convert.py b/NER_model/scripts/convert.py new file mode 100644 index 000000000..874418d6c --- /dev/null +++ b/NER_model/scripts/convert.py @@ -0,0 +1,153 @@ +import os +from pathlib import Path +from typing import Dict, List, Union + +import spacy +import srsly +import typer +from spacy.language import Language +from spacy.tokens import Doc, DocBin +from spacy.util import filter_spans + +from edsnlp.connectors.brat import BratConnector + +if not Doc.has_extension("context"): + Doc.set_extension("context", default=dict()) +if not Doc.has_extension("note_id"): + Doc.set_extension("note_id", default=None) +if not Doc.has_extension("note_datetime"): + Doc.set_extension("note_datetime", default=None) +if not Doc.has_extension("note_class_source_value"): + Doc.set_extension("note_class_source_value", default=None) +if not Doc.has_extension("split"): + Doc.set_extension("split", default=None) + + +def add_entities(doc: Doc, entities: List[Dict[str, Union[int, str]]]): + """ + Add annotations as Doc entities, re-tokenizing the document if need be. + + Parameters + ---------- + doc : Doc + spaCy Doc object + entities : List[Dict[str, Union[int, str]]] + List of annotations. + """ + + ents = [] + + for entity in entities: + start, end, label = entity["start"], entity["end"], entity["label"] + span = doc.char_span(start, end, label=label, alignment_mode="expand") + if span: + ents.append(span) + + doc.ents = filter_spans(ents) + + +def get_nlp(lang: str) -> Language: + nlp = spacy.blank(lang) + nlp.add_pipe("sentencizer") + + return nlp + + +def convert_jsonl( + nlp: spacy.Language, + input_path: Path, + n_limit: int, +) -> spacy.tokens.DocBin: + db = DocBin(store_user_data=True) + + if n_limit is not None: + nb_docs = 0 + for annot in srsly.read_jsonl(input_path): + if n_limit is not None: + if nb_docs >= n_limit: + break + nb_docs += 1 + ( + text, + note_id, + # note_datetime, + note_class_source_value, + entities, + context, + split, + ) = ( + annot["note_text"], + annot["note_id"], + # annot["note_datetime"], + annot["note_class_source_value"], + annot.get("entities", []), + annot.get("context", {}), + annot.get("split", None), + ) + + doc = nlp(text) + doc._.note_id = note_id + # doc._.note_datetime = note_datetime + doc._.note_class_source_value = note_class_source_value + doc._.context = context + doc._.split = split + + add_entities(doc, entities) + + db.add(doc) + + return db + + +def convert_brat( + nlp: spacy.Language, + input_path: Path, + n_limit: int, +) -> spacy.tokens.DocBin: + db = DocBin(store_user_data=True) + + connector = BratConnector(input_path) + docs = connector.brat2docs(nlp, run_pipe=True) + if n_limit is not None: + docs = docs[:n_limit] + for doc in docs: + if hasattr(doc, "text"): + db.add(doc) + + return db + + +def convert( + lang: str = typer.Option( + "fr", + help="Language to use", + ), + input_path: Path = typer.Option( + ..., + help="Path to the JSONL file", + ), + output_path: Path = typer.Option( + ..., + help="Path to the output spacy DocBin", + ), + n_limit: int = typer.Option( + None, + help="Limit number of documents to convert", + ), +) -> None: + nlp = get_nlp(lang) + + if os.path.isdir(input_path): + db = convert_brat(nlp, input_path, n_limit) + else: + db = convert_jsonl(nlp, input_path, n_limit) + + typer.echo(f"The saved dataset contains {len(db)} documents.") + if not os.path.exists(output_path.parent): + os.makedirs(output_path.parent) + print(f"Folder: {output_path.parent} has been created") + db.to_disk(output_path) + + +if __name__ == "__main__": + typer.run(convert) diff --git a/NER_model/scripts/evaluate.py b/NER_model/scripts/evaluate.py new file mode 100644 index 000000000..3f833d0a4 --- /dev/null +++ b/NER_model/scripts/evaluate.py @@ -0,0 +1,214 @@ +import re +from pathlib import Path +from typing import Any, Dict, Optional + +import srsly +import typer +from spacy import util +from spacy.cli._util import Arg, Opt, import_code, setup_gpu +from spacy.cli.evaluate import ( + print_prf_per_type, + print_textcats_auc_per_cat, + render_parses, +) +from spacy.tokens import DocBin +from thinc.api import fix_random_seed +from wasabi import Printer + +from edsnlp.corpus_reader import Corpus + + +# fmt: off +def evaluate_cli( + model: str = Arg(..., help="Model name or path"), # noqa: E501 + data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True), # noqa: E501 + output: Optional[Path] = Opt(None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False), # noqa: E501 + docbin: Optional[Path] = Opt(None, "--docbin", help="Output Doc Bin path", dir_okay=False), # noqa: E501 + code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), # noqa: E501 + use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"), # noqa: E501 + gold_preproc: bool = Opt(False, "--gold-preproc", "-G", help="Use gold preprocessing"), # noqa: E501 + displacy_path: Optional[Path] = Opt(None, "--displacy-path", "-dp", help="Directory to output rendered parses as HTML", exists=True, file_okay=False), # noqa: E501 + displacy_limit: int = Opt(25, "--displacy-limit", "-dl", help="Limit of parses to render as HTML"), # noqa: E501 +): + # fmt: on + + """ + Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation + data in the binary .spacy format. The --gold-preproc option sets up the + evaluation examples with gold-standard sentences and tokens for the + predictions. Gold preprocessing helps the annotations align to the + tokenization, and may result in sequences of more consistent length. However, + it may reduce runtime accuracy due to train/test skew. To render a sample of + dependency parses in a HTML file, set as output directory as the + displacy_path argument. + DOCS: https://spacy.io/api/cli#evaluate + """ + import_code(code_path) + evaluate( + model, + data_path, + output=output, + docbin=docbin, + use_gpu=use_gpu, + gold_preproc=gold_preproc, + displacy_path=displacy_path, + displacy_limit=displacy_limit, + silent=False, + ) + + +def evaluate( + model: str, + data_path: Path, + output: Optional[Path] = None, + docbin: Optional[Path] = None, + use_gpu: int = -1, + gold_preproc: bool = False, + displacy_path: Optional[Path] = None, + displacy_limit: int = 25, + silent: bool = True, + spans_key: str = "sc", +) -> Dict[str, Any]: + msg = Printer(no_print=silent, pretty=not silent) + fix_random_seed() + setup_gpu(use_gpu, silent=silent) + data_path = util.ensure_path(data_path) + output_path = util.ensure_path(output) + displacy_path = util.ensure_path(displacy_path) + if not data_path.exists(): + msg.fail("Evaluation data not found", data_path, exits=1) + if displacy_path and not displacy_path.exists(): + msg.fail("Visualization output directory not found", displacy_path, exits=1) + corpus = Corpus(data_path, gold_preproc=gold_preproc) + nlp = util.load_model(model) + nlp.batch_size = 1 + # nlp.remove_pipe("dates") + # nlp.remove_pipe("addresses") + # nlp.remove_pipe("rules") + # nlp.remove_pipe("structured") + + dev_dataset = [ + eg + for eg in corpus(nlp) # if getattr(eg.reference._, "split", "test") == "test" + ] + print(f"Evaluating {len(dev_dataset)} docs") + + if docbin is not None: + output_db = DocBin(store_user_data=True) + input_docs = DocBin().from_disk(data_path).get_docs(nlp.vocab) + for doc in input_docs: + doc.ents = [] + doc.spans.clear() + for doc in nlp.pipe(input_docs): + doc.user_data = { + k: v + for k, v in doc.user_data.items() + if "note_id" in k or "context" in k or "split" in k or "Action" in k or "Allergie" in k or "Certainty" in k or "Temporality" in k or "Family" in k or "Negation" in k + } + output_db.add(doc) + output_db.to_disk(docbin) + + scores = nlp.evaluate(dev_dataset) + metrics = { + "TOK": "token_acc", + "TAG": "tag_acc", + "POS": "pos_acc", + "MORPH": "morph_acc", + "LEMMA": "lemma_acc", + "UAS": "dep_uas", + "LAS": "dep_las", + "NER P": "ents_p", + "NER R": "ents_r", + "NER F": "ents_f", + "TEXTCAT": "cats_score", + "SENT P": "sents_p", + "SENT R": "sents_r", + "SENT F": "sents_f", + "SPAN P": f"spans_{spans_key}_p", + "SPAN R": f"spans_{spans_key}_r", + "SPAN F": f"spans_{spans_key}_f", + "SPEED": "speed", + "QUAL": "qual", + } + results = {} + data = {} + for metric, key in metrics.items(): + if key in scores: + if key == "cats_score": + metric = metric + " (" + scores.get("cats_score_desc", "unk") + ")" + if isinstance(scores[key], (int, float)): + if key == "speed": + results[metric] = f"{scores[key]:.0f}" + else: + results[metric] = f"{scores[key]*100:.2f}" + else: + results[metric] = "-" + data[re.sub(r"[\s/]", "_", key.lower())] = scores[key] + + msg.table(results, title="Results") + data = handle_scores_per_type(scores, data, spans_key=spans_key, silent=silent) + + if displacy_path: + factory_names = [nlp.get_pipe_meta(pipe).factory for pipe in nlp.pipe_names] + docs = list(nlp.pipe(ex.reference.text for ex in dev_dataset[:displacy_limit])) + render_deps = "parser" in factory_names + render_ents = "ner" in factory_names + render_parses( + docs, + displacy_path, + model_name=model, + limit=displacy_limit, + deps=render_deps, + ents=render_ents, + ) + msg.good(f"Generated {displacy_limit} parses as HTML", displacy_path) + + if output_path is not None: + srsly.write_json(output_path, data) + msg.good(f"Saved results to {output_path}") + return data + + +def handle_scores_per_type( + scores: Dict[str, Any], + data: Dict[str, Any] = {}, + *, + spans_key: str = "sc", + silent: bool = False, +) -> Dict[str, Any]: + msg = Printer(no_print=silent, pretty=not silent) + if "morph_per_feat" in scores: + if scores["morph_per_feat"]: + print_prf_per_type(msg, scores["morph_per_feat"], "MORPH", "feat") + data["morph_per_feat"] = scores["morph_per_feat"] + if "dep_las_per_type" in scores: + if scores["dep_las_per_type"]: + print_prf_per_type(msg, scores["dep_las_per_type"], "LAS", "type") + data["dep_las_per_type"] = scores["dep_las_per_type"] + if "ents_per_type" in scores: + if scores["ents_per_type"]: + print_prf_per_type(msg, scores["ents_per_type"], "NER", "type") + data["ents_per_type"] = scores["ents_per_type"] + if f"spans_{spans_key}_per_type" in scores: + if scores[f"spans_{spans_key}_per_type"]: + print_prf_per_type( + msg, scores[f"spans_{spans_key}_per_type"], "SPANS", "type" + ) + data[f"spans_{spans_key}_per_type"] = scores[f"spans_{spans_key}_per_type"] + if "cats_f_per_type" in scores: + if scores["cats_f_per_type"]: + print_prf_per_type(msg, scores["cats_f_per_type"], "Textcat F", "label") + data["cats_f_per_type"] = scores["cats_f_per_type"] + if "cats_auc_per_type" in scores: + if scores["cats_auc_per_type"]: + print_textcats_auc_per_cat(msg, scores["cats_auc_per_type"]) + data["cats_auc_per_type"] = scores["cats_auc_per_type"] + if "qual_per_type" in scores: + if scores["qual_per_type"]: + print_prf_per_type(msg, scores["qual_per_type"], "Qualifiers", "qualifier") + data["qual_per_type"] = scores["qual_per_type"] + return scores + + +if __name__ == "__main__": + typer.run(evaluate_cli) diff --git a/NER_model/scripts/infer.py b/NER_model/scripts/infer.py new file mode 100644 index 000000000..a7f9de603 --- /dev/null +++ b/NER_model/scripts/infer.py @@ -0,0 +1,82 @@ +import os +from pathlib import Path +from typing import Optional + +import spacy +import typer +from spacy.tokens import DocBin +from tqdm import tqdm + +from edsnlp.connectors.brat import BratConnector + + +def main( + model: Optional[Path] = typer.Option(None, help="Path to the model"), + input: Path = typer.Option( + ..., help="Path to the evaluation dataset, in spaCy format" + ), + output: Path = typer.Option(..., help="Path to the output dataset"), + format: str = typer.Option(..., help="spacy or brat"), +): + """Partition the data into train/test/dev split.""" + + assert format in ("spacy", "brat") + + spacy.require_gpu() + + nlp = spacy.load(model) + + if os.path.isdir(input): + print("Input format is BRAT") + input_docs = list(BratConnector(input).brat2docs(nlp)) + else: + print("Input format is spaCy") + input_docs = DocBin().from_disk(input).get_docs(nlp.vocab) + + print("Number of docs:", len(input_docs)) + + for doc in input_docs: + doc.ents = [] + doc.spans.clear() + + predicted = [] + + nlp.batch_size = 1 + + for doc in tqdm(nlp.pipe(input_docs), total=len(input_docs)): + doc.user_data = { + k: v + for k, v in doc.user_data.items() + if "note_id" in k + or "context" in k + or "split" in k + or "Action" in k + or "Allergie" in k + or "Certainty" in k + or "Temporality" in k + or "Family" in k + or "Negation" in k + } + predicted.append(doc) + # predicted[0].ents[i]._.negation donne None au lieu de False/True + if format == "spacy": + print("Output format is spaCy") + out_db = DocBin(store_user_data=True, docs=predicted) + out_db.to_disk(output) + elif format == "brat": + print("Output format is BRAT") + BratConnector( + output, + attributes=[ + "Negation", + "Family", + "Temporality", + "Certainty", + "Action", + "Allergie", + ], + ).docs2brat(predicted) + + +if __name__ == "__main__": + typer.run(main) diff --git a/NER_model/scripts/new_evaluate.py b/NER_model/scripts/new_evaluate.py new file mode 100644 index 000000000..51520ecc5 --- /dev/null +++ b/NER_model/scripts/new_evaluate.py @@ -0,0 +1,235 @@ +import copy +import re +from pathlib import Path +from typing import Any, Dict, Optional + +import srsly +import typer +from spacy import util +from spacy.cli._util import Arg, Opt, import_code, setup_gpu +from spacy.cli.evaluate import ( + print_prf_per_type, + print_textcats_auc_per_cat, + render_parses, +) +from spacy.scorer import Scorer +from spacy.tokens import DocBin +from spacy.training import Example +from thinc.api import fix_random_seed +from wasabi import Printer + +from edsnlp.evaluate import evaluate_test + + +# fmt: off +def evaluate_cli( + model: str = Arg(..., help="Model name or path"), # noqa: E501 + data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True), # noqa: E501 + output: Optional[Path] = Opt(None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False), # noqa: E501 + docbin: Optional[Path] = Opt(None, "--docbin", help="Output Doc Bin path", dir_okay=False), # noqa: E501 + code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), # noqa: E501 + use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"), # noqa: E501 + gold_preproc: bool = Opt(False, "--gold-preproc", "-G", help="Use gold preprocessing"), # noqa: E501 + displacy_path: Optional[Path] = Opt(None, "--displacy-path", "-dp", help="Directory to output rendered parses as HTML", exists=True, file_okay=False), # noqa: E501 + displacy_limit: int = Opt(25, "--displacy-limit", "-dl", help="Limit of parses to render as HTML"), # noqa: E501 +): + # fmt: on + + """ + Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation + data in the binary .spacy format. The --gold-preproc option sets up the + evaluation examples with gold-standard sentences and tokens for the + predictions. Gold preprocessing helps the annotations align to the + tokenization, and may result in sequences of more consistent length. However, + it may reduce runtime accuracy due to train/test skew. To render a sample of + dependency parses in a HTML file, set as output directory as the + displacy_path argument. + DOCS: https://spacy.io/api/cli#evaluate + """ + import_code(code_path) + evaluate( + model, + data_path, + output=output, + docbin=docbin, + use_gpu=use_gpu, + gold_preproc=gold_preproc, + displacy_path=displacy_path, + displacy_limit=displacy_limit, + silent=False, + ) + + +def evaluate( + model: str, + data_path: Path, + output: Optional[Path] = None, + docbin: Optional[Path] = None, + use_gpu: int = -1, + gold_preproc: bool = False, + displacy_path: Optional[Path] = None, + displacy_limit: int = 25, + silent: bool = True, + spans_key: str = "sc", +) -> Dict[str, Any]: + msg = Printer(no_print=silent, pretty=not silent) + fix_random_seed() + setup_gpu(use_gpu, silent=silent) + data_path = util.ensure_path(data_path) + output_path = util.ensure_path(output) + displacy_path = util.ensure_path(displacy_path) + if not data_path.exists(): + msg.fail("Evaluation data not found", data_path, exits=1) + if displacy_path and not displacy_path.exists(): + msg.fail("Visualization output directory not found", displacy_path, exits=1) + nlp = util.load_model(model) + nlp.batch_size = 1 + # nlp.remove_pipe("dates") + # nlp.remove_pipe("addresses") + # nlp.remove_pipe("rules") + # nlp.remove_pipe("structured") + + gold_docs = list(DocBin().from_disk(data_path).get_docs(nlp.vocab)) + gold_docs.sort(key=lambda doc: doc._.note_id) + input_docs = copy.deepcopy(gold_docs) + for doc in input_docs: + doc.ents = [] + doc.spans.clear() + print(f"Evaluating {len(gold_docs)} docs") + pred_docs = [] + for doc in tqdm(nlp.pipe(input_docs), total=len(input_docs)): + doc.user_data = { + k: v + for k, v in doc.user_data.items() + if "note_id" in k or "context" in k or "split" in k or "Action" in k or "Certainty" in k or "Temporality" in k or "Family" in k or "Negation" in k + } + pred_docs.append(doc) + pred_docs.sort(key=lambda doc: doc._.note_id) + + if docbin is not None: + output_db = DocBin(store_user_data=True) + for doc in pred_docs: + output_db.add(doc) + output_db.to_disk(docbin) + + scores = evaluate_test(gold_docs, pred_docs) + + metrics = { + "TOK": "token_acc", + "TAG": "tag_acc", + "POS": "pos_acc", + "MORPH": "morph_acc", + "LEMMA": "lemma_acc", + "UAS": "dep_uas", + "LAS": "dep_las", + "NER P": "ents_p", + "NER R": "ents_r", + "NER F": "ents_f", + "TEXTCAT": "cats_score", + "SENT P": "sents_p", + "SENT R": "sents_r", + "SENT F": "sents_f", + "SPAN P": f"spans_{spans_key}_p", + "SPAN R": f"spans_{spans_key}_r", + "SPAN F": f"spans_{spans_key}_f", + "SPEED": "speed", + "QUAL": "qual", + } + results = {} + data = {} + for metric, key in metrics.items(): + if key in scores: + if key == "cats_score": + metric = metric + " (" + scores.get("cats_score_desc", "unk") + ")" + if isinstance(scores[key], (int, float)): + if key == "speed": + results[metric] = f"{scores[key]:.0f}" + else: + results[metric] = f"{scores[key]*100:.2f}" + else: + results[metric] = "-" + data[re.sub(r"[\s/]", "_", key.lower())] = scores[key] + + msg.table(results, title="Results") + data = handle_scores_per_type(scores, data, spans_key=spans_key, silent=silent) + + if displacy_path: + factory_names = [nlp.get_pipe_meta(pipe).factory for pipe in nlp.pipe_names] + docs = list(nlp.pipe(ex.reference.text for ex in dev_dataset[:displacy_limit])) + render_deps = "parser" in factory_names + render_ents = "ner" in factory_names + render_parses( + docs, + displacy_path, + model_name=model, + limit=displacy_limit, + deps=render_deps, + ents=render_ents, + ) + msg.good(f"Generated {displacy_limit} parses as HTML", displacy_path) + + if output_path is not None: + srsly.write_json(output_path, data) + msg.good(f"Saved results to {output_path}") + return data + +def print_prf_per_type( + msg: Printer, scores: Dict[str, Dict[str, float]], name: str, type: str +) -> None: + data = [] + for key, value in scores.items(): + row = [key] + for k in ("p", "r", "f", "n_entity"): + v = value[k] + row.append(f"{v * 100:.2f}" if isinstance(v, (int, float)) else v) + data.append(row) + msg.table( + data, + header=("", "P", "R", "F", "N_entity"), + aligns=("l", "r", "r", "r", "r"), + title=f"{name} (per {type})", + ) + +def handle_scores_per_type( + scores: Dict[str, Any], + data: Dict[str, Any] = {}, + *, + spans_key: str = "sc", + silent: bool = False, +) -> Dict[str, Any]: + msg = Printer(no_print=silent, pretty=not silent) + if "morph_per_feat" in scores: + if scores["morph_per_feat"]: + print_prf_per_type(msg, scores["morph_per_feat"], "MORPH", "feat") + data["morph_per_feat"] = scores["morph_per_feat"] + if "dep_las_per_type" in scores: + if scores["dep_las_per_type"]: + print_prf_per_type(msg, scores["dep_las_per_type"], "LAS", "type") + data["dep_las_per_type"] = scores["dep_las_per_type"] + if "ents_per_type" in scores: + if scores["ents_per_type"]: + print_prf_per_type(msg, scores["ents_per_type"], "NER", "type") + data["ents_per_type"] = scores["ents_per_type"] + if f"spans_{spans_key}_per_type" in scores: + if scores[f"spans_{spans_key}_per_type"]: + print_prf_per_type( + msg, scores[f"spans_{spans_key}_per_type"], "SPANS", "type" + ) + data[f"spans_{spans_key}_per_type"] = scores[f"spans_{spans_key}_per_type"] + if "cats_f_per_type" in scores: + if scores["cats_f_per_type"]: + print_prf_per_type(msg, scores["cats_f_per_type"], "Textcat F", "label") + data["cats_f_per_type"] = scores["cats_f_per_type"] + if "cats_auc_per_type" in scores: + if scores["cats_auc_per_type"]: + print_textcats_auc_per_cat(msg, scores["cats_auc_per_type"]) + data["cats_auc_per_type"] = scores["cats_auc_per_type"] + if "qual_per_type" in scores: + if scores["qual_per_type"]: + print_prf_per_type(msg, scores["qual_per_type"], "Qualifiers", "qualifier") + data["qual_per_type"] = scores["qual_per_type"] + return scores + + +if __name__ == "__main__": + typer.run(evaluate_cli) diff --git a/NER_model/scripts/package.py b/NER_model/scripts/package.py new file mode 100644 index 000000000..deb499bd1 --- /dev/null +++ b/NER_model/scripts/package.py @@ -0,0 +1,324 @@ +# This script exists because spaCy's package script cannot natively +# - export a package (for medic rules) as well as the model weights +# - keep the same requirements as those listed in the pyproject.toml file +# - include package_data for the medic rules, listed also in the pyproject +# Therefore, we in this script, we build the model package using most of spaCy's code +# and build a new pyproject.toml derived from the main one (at the root of the repo) +# and use poetry to build the model instead of `python setup.py`. + +import os.path +import re +import shutil +import sys +from pathlib import Path +from typing import List, Optional + +import srsly +import toml +from spacy import util +from spacy.cli._util import SDIST_SUFFIX, WHEEL_SUFFIX, Arg, Opt, string_to_list +from spacy.cli.package import ( + FILENAMES_DOCS, + TEMPLATE_MANIFEST, + _is_permitted_package_name, + create_file, + generate_meta, + generate_readme, + get_build_formats, + get_meta, + has_wheel, +) +from spacy.schemas import ModelMetaSchema, validate +from typer import Typer +from wasabi import Printer + +app = Typer() + + +TEMPLATE_INIT = """ +from pathlib import Path +from spacy.util import load_model_from_init_py, get_model_meta + +{imports} + +__version__ = {version} + + +def load(**overrides): + return load_model_from_init_py(__file__, **overrides) +""".lstrip() + + +def make_pyproject_toml( + pyproject_path: str, + name: str, + package_name: str, + package_data: str, +) -> str: + """ + Creates a new pyproject.toml config for the generated model + from the root poetry-based pyproject.toml file + For that we: + - adapt paths to the new structure (nested under the new model name) + - change the pyproject name + - include any original included files as well as the model weights + + Parameters + ---------- + pyproject_path: str + Path to the root level pyproject.toml path + name: str + Model name + package_name: str + Package directory name + package_data: str + Package data name in the above directory + + Returns + ------- + str + The string content of the new pyproject.toml file + """ + package_name = Path(package_name) + pyproject_text = Path(pyproject_path).read_text() + pyproject_data = toml.loads(pyproject_text) + print(pyproject_data) + pyproject_data["tool"]["poetry"]["name"] = name + new_includes = [ + str(package_name / include) + for include in pyproject_data["tool"]["poetry"]["include"] + ] + [ + str(package_name / "**/*.py"), + str(package_name / package_data / "**/*"), + str("**/meta.json"), + ] + pyproject_data["tool"]["poetry"]["include"] = new_includes + for key, plugins in pyproject_data["tool"]["poetry"]["plugins"].items(): + new_plugins = {} + for value, path in plugins.items(): + new_plugins[value] = f"{package_name}.{path}" + plugins.clear() + plugins.update(new_plugins) + return toml.dumps(pyproject_data) + + +# fmt: off +@app.command() +def package_medic_cli( + input_dir: Path = Arg(..., help="Directory with pipeline data", exists=True, file_okay=False), # noqa: E501 + output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False), # noqa: E501 + code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python file with additional code (registered functions) to be included in the package"), # noqa: E501 + meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False), # noqa: E501 + create_meta: bool = Opt(False, "--create-meta", "-C", help="Create meta.json, even if one exists"), # noqa: E501 + name: Optional[str] = Opt(None, "--name", "-n", help="Package name to override meta"), # noqa: E501 + version: Optional[str] = Opt(None, "--version", "-v", help="Package version to override meta"), # noqa: E501 + build: str = Opt("sdist", "--build", "-b", help="Comma-separated formats to build: sdist and/or wheel, or none."), # noqa: E501 + force: bool = Opt(False, "--force", "-f", "-F", help="Force overwriting existing data in output directory"), # noqa: E501 +): + # fmt: on + """ + Adapted from spaCy package CLI command (documentation copied below) + This script exists spaCy's standard script cannot natively + - export a package (for medic rules) as well as the model weights + - keep the same requirements as those listed in the pyproject.toml file + - include package_data for the medic rules, listed also in the pyproject + Therefore, we in this script, we build the model package using most of spaCy's code + and build a new pyproject.toml derived from the main one (at the root of the repo) + and use poetry to build the model instead of `python setup.py`. + + SpaCy's original docstring: + Generate an installable Python package for a pipeline. Includes binary data, + meta and required installation files. A new directory will be created in the + specified output directory, and the data will be copied over. If + --create-meta is set and a meta.json already exists in the output directory, + the existing values will be used as the defaults in the command-line prompt. + After packaging, "python setup.py sdist" is run in the package directory, + which will create a .tar.gz archive that can be installed via "pip install". + If additional code files are provided (e.g. Python files containing custom + registered functions like pipeline components), they are copied into the + package and imported in the __init__.py. + DOCS: https://spacy.io/api/cli#package + """ + create_sdist, create_wheel = get_build_formats(string_to_list(build)) + code_paths = [Path(p.strip()) for p in string_to_list(code_paths)] + package( + input_dir, + output_dir, + meta_path=meta_path, + code_paths=code_paths, + name=name, + version=version, + create_meta=create_meta, + create_sdist=create_sdist, + create_wheel=create_wheel, + force=force, + silent=False, + ) + + +def package( + input_dir: Path, + output_dir: Path, + meta_path: Optional[Path] = None, + code_paths: List[Path] = [], + name: Optional[str] = None, + version: Optional[str] = None, + create_meta: bool = False, + create_sdist: bool = True, + create_wheel: bool = False, + force: bool = False, + silent: bool = True, +) -> None: + msg = Printer(no_print=silent, pretty=not silent) + input_path = util.ensure_path(input_dir) + output_path = util.ensure_path(output_dir) + meta_path = util.ensure_path(meta_path) + if create_wheel and not has_wheel(): + err = "Generating a binary .whl file requires wheel to be installed" + msg.fail(err, "pip install wheel", exits=1) + if not input_path or not input_path.exists(): + msg.fail("Can't locate pipeline data", input_path, exits=1) + if not output_path or not output_path.exists(): + msg.fail("Output directory not found", output_path, exits=1) + if create_sdist or create_wheel: + opts = ["sdist" if create_sdist else "", "wheel" if create_wheel else ""] + msg.info(f"Building package artifacts: {', '.join(opt for opt in opts if opt)}") + for code_path in code_paths: + if not code_path.exists(): + msg.fail("Can't find code file", code_path, exits=1) + if os.path.isdir(code_path): + print("Will import", code_path.stem, "but did not test it before packaging") + # Import the code here so it's available when model is loaded (via + # get_meta helper). Also verifies that everything works + else: + util.import_file(code_path.stem, code_path) + if code_paths: + msg.good(f"Including {len(code_paths)} Python module(s) with custom code") + if meta_path and not meta_path.exists(): + msg.fail("Can't find pipeline meta.json", meta_path, exits=1) + meta_path = meta_path or input_dir / "meta.json" + if not meta_path.exists() or not meta_path.is_file(): + msg.fail("Can't load pipeline meta.json", meta_path, exits=1) + meta = srsly.read_json(meta_path) + meta = get_meta(input_dir, meta) + if meta["requirements"]: + msg.good( + f"Including {len(meta['requirements'])} package requirement(s) from " + f"meta and config", + ", ".join(meta["requirements"]), + ) + if name is not None: + if not name.isidentifier(): + msg.fail( + f"Model name ('{name}') is not a valid module name. " + "This is required so it can be imported as a module.", + "We recommend names that use ASCII A-Z, a-z, _ (underscore), " + "and 0-9. " + "For specific details see: " + "https://docs.python.org/3/reference/lexical_analysis.html#identifiers", + exits=1, + ) + if not _is_permitted_package_name(name): + msg.fail( + f"Model name ('{name}') is not a permitted package name. " + "This is required to correctly load the model with spacy.load.", + "We recommend names that use ASCII A-Z, a-z, _ (underscore), " + "and 0-9. " + "For specific details see: " + "https://www.python.org/dev/peps/pep-0426/#name", + exits=1, + ) + meta["name"] = name + if version is not None: + meta["version"] = version + if not create_meta: # only print if user doesn't want to overwrite + msg.good("Loaded meta.json from file", meta_path) + else: + meta = generate_meta(meta, msg) + errors = validate(ModelMetaSchema, meta) + if errors: + msg.fail("Invalid pipeline meta.json") + print("\n".join(errors)) + sys.exit(1) + model_name = meta["name"] + if not model_name.startswith(meta["lang"] + "_"): + model_name = f"{meta['lang']}_{model_name}" + model_name_v = model_name + "-" + meta["version"] + main_path = output_dir / model_name_v + package_path = main_path / model_name + if package_path.exists(): + if force: + shutil.rmtree(str(package_path)) + else: + msg.fail( + "Package directory already exists", + "Please delete the directory and try again, or use the " + "`--force` flag to overwrite existing directories.", + exits=1, + ) + Path.mkdir(package_path, parents=True) + shutil.copytree(str(input_dir), str(package_path / model_name_v)) + for file_name in FILENAMES_DOCS: + file_path = package_path / model_name_v / file_name + if file_path.exists(): + shutil.copy(str(file_path), str(main_path)) + readme_path = main_path / "README.md" + if not readme_path.exists(): + readme = generate_readme(meta) + create_file(readme_path, readme) + create_file(package_path / model_name_v / "README.md", readme) + msg.good("Generated README.md from meta.json") + else: + msg.info("Using existing README.md from pipeline directory") + imports = [] + for code_path in code_paths: + imports.append(code_path.stem) + if os.path.isdir(code_path): + print("Copying module", code_path, "to", str(package_path / code_path.stem)) + shutil.copytree(str(code_path), str(package_path / code_path.stem)) + else: + shutil.copy(str(code_path), str(package_path)) + + # no more top level meta.json, it was only used to load version + # number and toplevel resources are not compatible with poetry + create_file(main_path / model_name / "meta.json", srsly.json_dumps(meta, indent=2)) + + # no more setup.py, we use poetry now + # create_file(main_path / "setup.py", TEMPLATE_SETUP) + + create_file( + main_path / "pyproject.toml", + make_pyproject_toml( + "pyproject.toml", + model_name, + model_name, + model_name_v, + ), + ) + create_file(main_path / "MANIFEST.in", TEMPLATE_MANIFEST) + init_py = TEMPLATE_INIT.format( + imports="\n".join(f"from . import {m}" for m in imports), + version=repr(version), + ) + create_file(package_path / "__init__.py", init_py) + msg.good(f"Successfully created package directory '{model_name_v}'", main_path) + if create_sdist: + with util.working_dir(main_path): + util.run_command(["poetry", "build", "-f", "sdist"]) + zip_file = main_path / "dist" / f"{model_name_v}{SDIST_SUFFIX}" + msg.good(f"Successfully created zipped Python package {zip_file}") + if create_wheel: + with util.working_dir(main_path): + util.run_command(["poetry", "build", "-f", "wheel"]) + wheel_name_squashed = re.sub("_+", "_", model_name_v) + wheel = main_path / "dist" / f"{wheel_name_squashed}{WHEEL_SUFFIX}" + msg.good(f"Successfully created binary wheel {wheel}") + if "__" in model_name: + msg.warn( + f"Model name ('{model_name}') contains a run of underscores. " + "Runs of underscores are not significant in installed package names.", + ) + + +if __name__ == "__main__": + app() diff --git a/NER_model/scripts/save_to_brat.py b/NER_model/scripts/save_to_brat.py new file mode 100644 index 000000000..d4596cc0b --- /dev/null +++ b/NER_model/scripts/save_to_brat.py @@ -0,0 +1,157 @@ +import os +import re +from pathlib import Path +from typing import Any, Dict, Optional + +import numpy as np +import pandas as pd +import spacy +import srsly +import typer +from eds_medic.corpus_reader import Corpus +from spacy import util +from spacy.cli._util import Arg, Opt, import_code, setup_gpu +from spacy.cli.evaluate import ( + print_prf_per_type, + print_textcats_auc_per_cat, + render_parses, +) +from spacy.scorer import Scorer +from spacy.tokens import Doc, DocBin +from spacy.training import Example +from thinc.api import fix_random_seed +from tqdm import tqdm +from wasabi import Printer + +from edsnlp.connectors.brat import BratConnector + + +def evaluate_cli( + model: str = Arg(..., help="Model name or path"), # noqa: E501 + data_path: Path = Arg( + ..., help="Location of binary evaluation data in .spacy format", exists=True + ), # noqa: E501 + output: Optional[Path] = Opt( + None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False + ), # noqa: E501 + docbin: Optional[Path] = Opt( + None, "--docbin", help="Output Doc Bin path", dir_okay=False + ), # noqa: E501 + code_path: Optional[Path] = Opt( + None, + "--code", + "-c", + help="Path to Python file with additional code (registered functions) to be imported", + ), # noqa: E501 + use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"), # noqa: E501 + gold_preproc: bool = Opt( + False, "--gold-preproc", "-G", help="Use gold preprocessing" + ), # noqa: E501 + displacy_path: Optional[Path] = Opt( + None, + "--displacy-path", + "-dp", + help="Directory to output rendered parses as HTML", + exists=True, + file_okay=False, + ), # noqa: E501 + displacy_limit: int = Opt( + 25, "--displacy-limit", "-dl", help="Limit of parses to render as HTML" + ), # noqa: E501 +): + + save( + model, + ### A DECOMMENTER ### + # data_path = '../data/attr2/test', + # output_brat ='../data/attr2/pred', + # data_path = "/export/home/"cse200093/Jacques_Bio/data_bio/brat_annotated_bio_val/test", + # output_brat = "/export/home/cse200093/Jacques_Bio/data_bio/brat_annotated_bio_val/test_eds-medic", + # data_path = '/export/home/cse200093/RV_Inter_conf/unnested_sosydiso_qualifiers_final/test_, + # output_brat = '/export/home/cse200093/RV_Inter_conf/unnested_sosydiso_qualifiers_final/pred', + # data_path = '/export/home/cse200093/RV_Inter_conf/unnested_final/test', + # output_brat = '/export/home/cse200093/RV_Inter_conf/unnested_final/pred', + data_path="/export/home/cse200093/Jacques_Bio/data_bio/super_pipe_get_stats_by_section_on_cim10/pred/syndrome_des_anti-phospholipides_init", + output_brat="/export/home/cse200093/Jacques_Bio/data_bio/super_pipe_get_stats_by_section_on_cim10/pred/syndrome_des_anti-phospholipides_pred2", + output=output, + docbin=docbin, + use_gpu=use_gpu, + gold_preproc=gold_preproc, + displacy_path=displacy_path, + displacy_limit=displacy_limit, + silent=False, + ) + + +def save( + model: str, + output_brat: str, + data_path: Path, + output: Optional[Path] = None, + docbin: Optional[Path] = None, + use_gpu: int = -1, + gold_preproc: bool = False, + displacy_path: Optional[Path] = None, + displacy_limit: int = 25, + silent: bool = True, + spans_key: str = "sc", +): + setup_gpu(use_gpu, silent) + + # brat = BratConnector(data_path, attributes = {"Disorders_type":"Disorders_type",'SOSY_type':'SOSY_type','Chemical_and_drugs_type':'Chemical_and_drugs_type', + # 'Concept_type':'Concept_type','negation':'negation','hypothetique':'hypothetique', 'family':'family','Medical_Procedure_type':'Medical_Procedure_type','gender_type':'gender_type'}) + brat = BratConnector( + data_path, + attributes={ + "Negation": "Negation", + "Family": "Family", + "Temporality": "Temporality", + "Certainty": "Certainty", + "Action": "Action", + }, + ) + empty = spacy.blank("fr") + df_gold = brat.brat2docs(empty) + df_gold.sort(key=lambda doc: doc.text) + + print("-- Model running --") + # model_path = '/export/home/cse200093/Pierre_Medic/NEURAL_BASED_NER/inference_model/model-best' + df_txt = [doc.text for doc in df_gold] + model = spacy.load(model) + model.add_pipe("clean-entities") + # caanot find clean entities... --> from edsmedic.... import clean_entites + df_txt_pred = [] + for doc in tqdm(df_txt, desc="Processing documents"): + doc = model(doc) + doc._.trf_data = None + df_txt_pred.append(doc) + + for i in range(len(df_txt_pred)): + df_txt_pred[i]._.note_id = df_gold[i]._.note_id + + # for doc in df_txt: + # for ent in doc.ents: + # if ent._.Action: + # print(ent, ent._.Action) + + print("-- try saving --") + + print("path: ", output_brat) + brat = BratConnector( + output_brat, + attributes={ + "Negation": "Negation", + "Family": "Family", + "Temporality": "Temporality", + "Certainty": "Certainty", + "Action": "Action", + }, + ) + + brat.docs2brat(df_txt_pred) + + print("-- saved -- ") + + +if __name__ == "__main__": + typer.run(evaluate_cli) diff --git a/NER_model/scripts/visualize_model.py b/NER_model/scripts/visualize_model.py new file mode 100644 index 000000000..f204d7903 --- /dev/null +++ b/NER_model/scripts/visualize_model.py @@ -0,0 +1,14 @@ +import spacy_streamlit +import typer + + +def main(models: str, default_text: str): + models = [name.strip() for name in models.split(",")] + spacy_streamlit.visualize(models, default_text, visualizers=["ner"]) + + +if __name__ == "__main__": + try: + typer.run(main) + except SystemExit: + pass diff --git a/Normalisation/drugs/exception.py b/Normalisation/drugs/exception.py new file mode 100644 index 000000000..9f653ff1e --- /dev/null +++ b/Normalisation/drugs/exception.py @@ -0,0 +1,15 @@ +exception_list = { + "glucocorticoid": [ + "cortico", + "corticoides", + "corticoide" "corticos", + "corticotherapie", + "corticotherapies", + "glucocorticoides", + "glucocorticoide", + "corticostéroides", + "corticostéroide", + ], + "rituximab": ["ritux", "rtx"], + "prevenar": ["prevenar13"], +} diff --git a/Normalisation/drugs/normalisation.py b/Normalisation/drugs/normalisation.py new file mode 100644 index 000000000..25e23ef4d --- /dev/null +++ b/Normalisation/drugs/normalisation.py @@ -0,0 +1,257 @@ +import re +from collections import defaultdict + +import duckdb +import Levenshtein +import numpy as np +import pandas as pd +import spacy +from exception import exception_list +from levenpandas import fuzzymerge +from sklearn.metrics import f1_score, precision_score, recall_score +from sklearn.preprocessing import MultiLabelBinarizer +from tqdm import tqdm +from unidecode import unidecode + +from edsnlp.connectors import BratConnector + + +class DrugNormaliser: + def __init__(self, df_path, drug_dict, method="exact", max_pred=5, atc_len=7): + if df_path.endswith("json"): + self.df = pd.read_json(df_path) + else: + self.df = self.gold_generation(df_path) + self.drug_dict = drug_dict + # self.df['drug'] = self.df['drug'].apply(lambda x: re.sub(r'\W+', '',x.lower())) + self.df["norm_term"] = self.df["norm_term"].apply(lambda x: unidecode(x)) + # self.df['score'] = None + + merged_dict = {} + for atc_code, values in self.drug_dict.items(): + # Shorten the ATC code + shortened_code = atc_code[:atc_len] + + # Check if the shortened ATC code already exists in the merged dictionary + if shortened_code in merged_dict: + # Merge the arrays + merged_dict[shortened_code] = list( + set(merged_dict[shortened_code] + values) + ) + + else: + # Add a new entry for the shortened ATC code + merged_dict[shortened_code] = values + + merged_dict = ( + pd.DataFrame.from_dict({"norm_term": merged_dict}, "index") + .T.explode("norm_term") + .reset_index() + .rename(columns={"index": "label"}) + ) + merged_dict.norm_term = merged_dict.norm_term.str.split(",") + merged_dict = merged_dict.explode("norm_term").reset_index(drop=True) + self.drug_dict = merged_dict + self.method = method + self.max_pred = max_pred + + def get_gold(self): + return self.df + + def get_dict(self): + return self.drug_dict + + def gold_generation(self, df_path): + doc_list = BratConnector(df_path).brat2docs(spacy.blank("fr")) + drug_list = [] + for doc in doc_list: + for ent in doc.ents: + if ent.label_ == "Chemical_and_drugs": + if not ent._.Tech: + drug_list.append( + [ + ent.text, + doc._.note_id, + [ent.start, ent.end], + ent.text.lower().strip(), + ] + ) + + drug_list_df = pd.DataFrame( + drug_list, columns=["term", "source", "span_converted", "norm_term"] + ) + drug_list_df.span_converted = drug_list_df.span_converted.astype(str) + return drug_list_df + + # def exact_match(self, drug_name, atc, names): + # matching_atc = [] + # matching_names = [] + # for name in names: + # if drug_name == name: + # matching_atc.append(atc) + # matching_names.append(name) + # return matching_atc, matching_names + + # def levenshtein_match(self, drug_name, name): + # return Levenshtein.ratio(drug_name, name) + + # def dice_match(self, word1, word2): + # intersection = len(set(word1) & set(word2)) + # coefficient = (2 * intersection) / (len(word1) + len(word2)) + # return coefficient + + def normalize(self, threshold=0.85): + # self.df['pred_atc'] = [None]*len(self.df) + # self.df['pred_string'] = [None]*len(self.df) + + for index, row in self.df.iterrows(): + for k, v in exception_list.items(): + if row["norm_term"] in v: + self.df.at[index, "norm_term"] = k + + if self.method == "exact": + self.df = self.df.merge(self.drug_dict, how="left", on="norm_term") + if self.method == "lev": + df_1 = self.df.copy() + df_2 = self.drug_dict.copy() + merged_df = duckdb.query( + f"""select *, jaro_winkler_similarity(df_1.norm_term, df_2.norm_term) score from df_1, df_2 where score > {threshold}""" + ).to_df() + idx = ( + merged_df.groupby(["term", "source", "span_converted", "norm_term"])[ + "score" + ].transform(max) + == merged_df["score"] + ) + self.df = merged_df[idx] + self.df = self.df.groupby( + ["term", "source", "span_converted", "norm_term"], as_index=False + ).agg({"label": list}) + return self.df + + +# if self.method =='lev': +# for index, row in self.df.iterrows(): +# drug_name = row['drug'] +# matching_atc = [] +# matching_names = [] +# matching_scores = [] +# for atc, names in self.drug_dict.items(): +# names = [name for name in names if name is not np.nan] +# Levenshtein_distance = [] +# Levenshtein_distance_name = [] +# for name in names: +# Levenshtein_distance.append(self.levenshtein_match(drug_name,name)) +# Levenshtein_distance_name.append(name) +# if len(Levenshtein_distance) > 0: +# max_value_id = Levenshtein_distance.index(max(Levenshtein_distance)) +# max_value = Levenshtein_distance[max_value_id] +# max_value_name = Levenshtein_distance_name[max_value_id] +# if max_value >= treshold: +# matching_atc.append(atc) +# matching_names.append(max_value_name) +# matching_scores.append(max_value) +# #sort matching_atc by score +# matching_atc = [x for _,x in sorted(zip(matching_scores,matching_atc), reverse=True)] +# matching_names = [x for _,x in sorted(zip(matching_scores,matching_names), reverse=True)] +# matching_scores = sorted(matching_scores, reverse=True) +# if 1 in matching_scores: +# self.df.at[index, 'pred_atc'] = [matching_atc[i] for i, score in enumerate(matching_scores) if score == 1] +# self.df.at[index, 'pred_string'] = [matching_names[i] for i, score in enumerate(matching_scores) if score == 1] +# self.df.at[index, 'score'] = [score for score in matching_scores if score == 1] +# else: +# self.df.at[index, 'pred_atc'] = matching_atc[:self.max_pred] +# self.df.at[index, 'pred_string'] = matching_names[:self.max_pred] +# self.df.at[index, 'score'] = matching_scores[:self.max_pred] +# return self.df + + +# def acc(self, verbose = False): +# correct_predictions = self.df.apply(lambda row: row['ATC'][:len(row['ATC'])] in [x[:len(row['ATC'])] for x in row['pred_atc']], axis=1).sum() +# total_predictions = len(self.df) +# accuracy = correct_predictions / total_predictions +# if verbose: +# return f'{accuracy}, ({correct_predictions}/{total_predictions})' +# else: +# return accuracy + +# def get_good_predictions(self): +# good_predictions = self.df.apply(lambda row: row['ATC'][:len(row['ATC'])] in [x[:len(row['ATC'])] for x in row['pred_atc']], axis=1) +# return self.df[good_predictions] + +# def get_bad_predictions(self): +# bad_predictions = self.df.apply(lambda row: row['ATC'][:len(row['ATC'])] not in [x[:len(row['ATC'])] for x in row['pred_atc']], axis=1) +# return self.df[bad_predictions] + +# def get_no_predictions(self): +# no_predictions = self.df.apply(lambda row: len(row['pred_atc'])==0, axis=1) +# return self.df[no_predictions] + + +# def metrics(self, verbose = True): +# y_true = self.df['ATC'] +# y_pred = self.df['pred_atc'] + +# unique_atc = set([atc for atc in y_true]) + +# results = {atc: {'TP': 0, 'FP': 0, 'FN': 0} for atc in unique_atc} + +# for atc in unique_atc: +# TP = 0 +# FP = 0 +# FN = 0 +# for c,atc_gold in enumerate(y_true): +# if atc_gold == atc: +# if atc_gold in y_pred[c]: +# TP += 1 +# else: +# FN += 1 +# for c, atcs_pred in enumerate(y_pred): +# for atc_pred in atcs_pred: +# if atc_pred == atc: +# if atc_pred not in y_true[c]: +# FP += 1 + +# results[atc]['TP'] = TP +# results[atc]['FP'] = FP +# results[atc]['FN'] = FN + +# #we get the micro_average +# total_TP = sum([results[atc]['TP'] for atc in unique_atc]) +# total_FP = sum([results[atc]['FP'] for atc in unique_atc]) +# total_FN = sum([results[atc]['FN'] for atc in unique_atc]) + +# precision_micro = total_TP/(total_TP+total_FP) +# recall_micro = total_TP/(total_TP+total_FN) +# f1_micro = 2*precision_micro*recall_micro/(precision_micro+recall_micro) + +# #we get the macro_average +# total_precision = 0 +# total_recall = 0 +# total_f1 = 0 + +# for atc in unique_atc: +# if results[atc]['TP']+results[atc]['FP'] != 0: +# precision = results[atc]['TP']/(results[atc]['TP']+results[atc]['FP']) +# else: +# precision = 0 +# if results[atc]['TP']+results[atc]['FN'] != 0: +# recall = results[atc]['TP']/(results[atc]['TP']+results[atc]['FN']) +# else: +# recall = 0 +# if precision+recall != 0: +# f1 = 2*precision*recall/(precision+recall) +# else: +# f1 = 0 + +# total_precision += precision +# total_recall += recall +# total_f1 += f1 + +# total_precision = total_precision/len(unique_atc) +# total_recall = total_recall/len(unique_atc) +# total_f1 = total_f1/len(unique_atc) + + +# print(f' MICRO : The precision is {precision_micro}, the recall is {recall_micro} and the f1 score is {f1_micro}') +# print(f' MACRO : The precision is {total_precision}, the recall is {total_recall} and the f1 score is {total_f1}') diff --git a/Normalisation/extract_measurement/config.py b/Normalisation/extract_measurement/config.py new file mode 100644 index 000000000..109539330 --- /dev/null +++ b/Normalisation/extract_measurement/config.py @@ -0,0 +1,17 @@ +import pandas as pd +from measurements_patterns import * + +################################ +# ## MEASUREMENTS PIPE CONFIG ### +# ############################### +measurements_pipe_regex_convert_spans = regex_convert_spans +measurements_pipe_label_key = label_key +measurements_pipe_labels_to_remove = labels_to_remove +measurements_pipe_labels_linkable_to_measurement = labels_linkable_to_measurement +measurements_pipe_config_normalizer_from_label_key = config_normalizer_from_label_key +measurements_pipe_config_measurements_from_label_key = ( + config_measurements_from_label_key +) +measurements_pipe_config_normalizer_from_tables = config_normalizer_from_tables +measurements_pipe_config_measurements_from_tables = config_measurements_from_tables +measurements_only_tables = False diff --git a/Normalisation/extract_measurement/extract_measurements_from_brat.py b/Normalisation/extract_measurement/extract_measurements_from_brat.py new file mode 100644 index 000000000..9f921626a --- /dev/null +++ b/Normalisation/extract_measurement/extract_measurements_from_brat.py @@ -0,0 +1,437 @@ +import math +import random +import re +import sys +import time +from itertools import combinations +from os import listdir +from os.path import basename, isdir, isfile, join +from statistics import mean +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import spacy +from extract_pandas_from_brat import extract_pandas +from measurements_patterns import ( + config_measurements_from_label_key, + config_measurements_from_tables, + config_normalizer_from_label_key, + config_normalizer_from_tables, + label_key, + labels_linkable_to_measurement, + labels_to_remove, + regex_convert_spans, +) +from scipy.stats import bootstrap +from tqdm import tqdm +from typing_extensions import TypedDict + +from edsnlp.processing import pipe + + +class UnitConfig(TypedDict): + scale: float + terms: List[str] + followed_by: Optional[str] = None + ui_decomposition: Dict[str, int] + + +class UnitlessRange(TypedDict): + min: Optional[int] + max: Optional[int] + unit: str + + +class UnitlessPatternConfig(TypedDict): + terms: List[str] + ranges: List[UnitlessRange] + + +class SimpleMeasurementConfigWithoutRegistry(TypedDict): + value_range: str + value: Union[float, int] + unit: str + + +class ValuelessPatternConfig(TypedDict): + terms: Optional[List[str]] + regex: Optional[List[str]] + measurement: SimpleMeasurementConfigWithoutRegistry + + +class MeasureConfig(TypedDict): + unit: str + unitless_patterns: Optional[List[UnitlessPatternConfig]] + valueless_patterns: Optional[List[ValuelessPatternConfig]] + + +class MeasurementsPipeConfig(TypedDict): + measurements: Union[List[str], Tuple[str], Dict[str, MeasureConfig]] + units_config: Dict[str, UnitConfig] + number_terms: Dict[str, List[str]] + value_range_terms: Dict[str, List[str]] + all_measurements: bool + parse_tables: bool + parse_doc: bool + stopwords_unitless: List[str] + stopwords_measure_unit: List[str] + measure_before_unit: bool + unit_divisors: List[str] + name: str + ignore_excluded: bool + attr: str + + +class ExtractMeasurements: + def __init__( + self, + regex_convert_spans: Optional[str] = regex_convert_spans, + label_key: Optional[str] = label_key, + labels_to_remove: Optional[List[str]] = labels_to_remove, + labels_linkable_to_measurement: Optional[ + List[str] + ] = labels_linkable_to_measurement, + config_normalizer_from_label_key: Optional[ + Dict[str, bool] + ] = config_normalizer_from_label_key, + config_measurements_from_label_key: Optional[ + MeasurementsPipeConfig + ] = config_measurements_from_label_key, + config_normalizer_from_tables: Optional[ + Dict[str, bool] + ] = config_normalizer_from_tables, + config_measurements_from_tables: Optional[ + MeasurementsPipeConfig + ] = config_measurements_from_tables, + ): + print("--------------- Loading extraction pipe ---------------") + self.regex_convert_spans = re.compile(regex_convert_spans) + self.label_key = label_key + self.labels_to_remove = labels_to_remove + self.labels_linkable_to_measurement = labels_linkable_to_measurement + self.nlp_from_label_key = self.load_nlp( + config_normalizer_from_label_key, config_measurements_from_label_key + ) + self.nlp_from_tables = self.load_nlp( + config_normalizer_from_tables, config_measurements_from_tables + ) + print("--------------- Extraction pipe loaded ---------------") + + def load_nlp(self, config_normalizer, config_measurements): + nlp = spacy.blank("eds") + nlp.add_pipe("eds.normalizer", config=config_normalizer) + nlp.add_pipe("eds.dates") + nlp.add_pipe("eds.tables") + nlp.add_pipe("eds.measurements", config=config_measurements) + return nlp + + def extract_pandas_labels_of_interest(self, brat_dir): + # Convert span to list with span_start, span_end. It considers the new lines by adding one character. + def convert_spans(span): + span_match = self.regex_convert_spans.match(span) + span_start = int(span_match.group(1)) + span_end = int(span_match.group(2)) + return [span_start, span_end] + + df = extract_pandas(IN_BRAT_DIR=brat_dir) + df = df.loc[ + df["label"].isin( + [self.label_key] + + self.labels_to_remove + + self.labels_linkable_to_measurement + ) + ] + df["span_converted"] = df["span"].apply(convert_spans) + df = df[["term", "source", "span_converted", "label"]] + return df + + @classmethod + def is_overlapping(cls, a, b): + # Return crop parts in a if a and b overlaps, 0, 0 if not + if max(0, 1 + min(a[1], b[1]) - max(a[0], b[0])): + return max(a[0], b[0]), min(a[1], b[1]) + else: + return 0, 0 + + def remove_labels_from_label_key(self, df): + def get_parts_to_crop(old_parts_to_crop, new_part_to_crop): + # From old_part_to_crops which is a list of segments and new_part_to_crop + # which is a segment, return the union all the segments. + # All segments in old_parts_to_crop are disjunct + for old_part in old_parts_to_crop: + crop_start, crop_end = self.is_overlapping(old_part, new_part_to_crop) + if crop_start or crop_end: + return get_parts_to_crop( + old_parts_to_crop[1:], + [ + min(old_part[0], new_part_to_crop[0]), + max(old_part[1], new_part_to_crop[1]), + ], + ) + old_parts_to_crop.append(new_part_to_crop) + return old_parts_to_crop + + def crop_with_parts_to_crop(parts_to_crop, to_crop, to_crop_span): + # parts_to_crop contains a list of segments to crop in to_crop (str) + parts_to_crop.insert(0, [to_crop_span[0], to_crop_span[0]]) + parts_to_crop.append([to_crop_span[1], to_crop_span[1]]) + res = [ + to_crop[ + parts_to_crop[i][1] + - to_crop_span[0] : parts_to_crop[i + 1][0] + - to_crop_span[0] + ] + for i in range(len(parts_to_crop) - 1) + ] + return "".join(res) + + label_key_df = df.loc[df["label"] == self.label_key].sort_values("source") + specific_label_df = df.loc[ + df["label"].isin( + self.labels_to_remove + self.labels_linkable_to_measurement + ) + ] + res = {"term_labels_removed": [], "terms_linked_to_measurement": []} + label_keys = [] + source = None + for label_key in tqdm( + label_key_df.itertuples(index=False), total=label_key_df.shape[0] + ): + new_source = label_key.source + if new_source != source: + temp_df = specific_label_df.loc[ + (specific_label_df["source"] == new_source) + ] + source = new_source + labels_linkable_to_measurement = [] + parts_to_crop = [] + + for label in temp_df.itertuples(index=False): + + crop_start, crop_end = self.is_overlapping( + label_key.span_converted, label.span_converted + ) + + if crop_start or crop_end: + + if label.label in self.labels_to_remove: + if not parts_to_crop: + parts_to_crop.append([crop_start, crop_end]) + + else: + parts_to_crop = get_parts_to_crop( + parts_to_crop, [crop_start, crop_end] + ) + if label.label in self.labels_linkable_to_measurement: + labels_linkable_to_measurement.append(label.term) + res["term_labels_removed"].append( + crop_with_parts_to_crop( + parts_to_crop, label_key.term, label_key.span_converted + ) + ) + res["terms_linked_to_measurement"].append(labels_linkable_to_measurement) + label_keys.append(label_key) + res = pd.DataFrame(label_keys).join(pd.DataFrame(res)) + return res.reset_index(drop=True) + + def get_measurements_from_label_key(self, df): + df_for_nlp_from_label_key = pd.DataFrame( + {"note_text": df["term_labels_removed"], "note_id": df.index} + ) + df_for_nlp_from_label_key = pipe( + note=df_for_nlp_from_label_key, + nlp=self.nlp_from_label_key, + n_jobs=-1, + additional_spans=["measurements"], + extensions=["value"], + ) + df_for_nlp_from_label_key = ( + df_for_nlp_from_label_key.groupby("note_id") + .agg({"note_id": "first", "value": list, "lexical_variant": list}) + .reset_index(drop=True) + .rename(columns={"value": "found"}) + ) + df = pd.merge( + df, df_for_nlp_from_label_key, left_index=True, right_on="note_id" + ) + df["found"] = df["found"].fillna("").apply(list) + return df.reset_index(drop=True) + + def get_measurements_from_tables( + self, df, df_labels_of_interest, brat_dir, only_tables + ): + + # Treat each txt files + txt_files = [ + f + for f in listdir(brat_dir) + if isfile(join(brat_dir, f)) + if f.endswith(".txt") + ] + ann_files = [f[:-3] + "ann" for f in txt_files] + text_df = {"note_text": [], "note_id": []} + for i, txt_file in enumerate(txt_files): + with open(join(brat_dir, txt_file), "r") as file: + text = file.read() + text_df["note_text"].append(text) + text_df["note_id"].append(txt_file[:-3] + "ann") + text_df = pd.DataFrame(text_df) + df_for_nlp_from_table = pipe( + note=text_df, + nlp=self.nlp_from_tables, + n_jobs=-1, + additional_spans=["measurements"], + extensions=["value"], + ) + # Load discriminative dataframe (in other words df containing terms with a label to remove) so that we can drop matches when one overlaps one of these words + discriminative_df = df_labels_of_interest.loc[ + df_labels_of_interest["label"].isin(self.labels_to_remove) + ] + + def get_measurements_from_tables_one_file(df_for_nlp_from_table, ann_file): + # Select label_keys from the ann_file + # and check if our matcher from tables + # finds measurements with a span overlapping + # one of these label_keys. If yes, then we keep this measurement + # and throw all the ones found by our first matcher + # from the label_key at stake. + df_part = df.loc[df["source"] == ann_file].copy().reset_index(drop=True) + df_part["new_found"] = [[] for _ in range(len(df_part))] + discriminative_df_part = ( + discriminative_df.loc[discriminative_df["source"] == ann_file] + .copy() + .reset_index(drop=True) + ) + df_for_nlp_from_table_part = ( + df_for_nlp_from_table.loc[df_for_nlp_from_table["note_id"] == ann_file] + .copy() + .reset_index(drop=True) + ) + for measurement_from_table in df_for_nlp_from_table_part.itertuples( + index=False + ): + measurement_span = [ + measurement_from_table.start, + measurement_from_table.end, + ] + + # Check if a match is in a term with a label to remove + overlapping_discriminative_indexes = discriminative_df_part.loc[ + discriminative_df_part["span_converted"].apply( + lambda x: self.is_overlapping(x, measurement_span) + ) + != (0, 0) + ].index.values.tolist() + if overlapping_discriminative_indexes: + continue + # Link the match measure to a label_key - label_linkable_to_measurement + overlapping_label_key_indexes = df_part.loc[ + df_part["span_converted"].apply( + lambda x: self.is_overlapping(x, measurement_span) + ) + != (0, 0) + ].index.values.tolist() + for i in overlapping_label_key_indexes: + df_part.iloc[i]["new_found"].append(measurement_from_table.value) + return df_part + + # DataFrame with merged doc and tables matches + result_df_per_file = [] + for ann_file in tqdm(ann_files, total=len(ann_files)): + result_df_per_file.append( + get_measurements_from_tables_one_file(df_for_nlp_from_table, ann_file) + ) + + result_df = pd.concat(result_df_per_file, ignore_index=True) + if only_tables: + result_df = result_df.loc[result_df["new_found"].astype(bool)].reset_index( + drop=True + ) + result_df = result_df.drop(columns=["found"]).rename( + columns={"new_found": "found"} + ) + return result_df + else: + result_df["found"] = result_df.apply( + lambda x: x["found"] * (not x["new_found"]) + + x["new_found"] * (len(x["new_found"]) > 0), + axis=1, + ) + return result_df.drop(columns=["new_found"]) + + def prepare_df_for_normalization(self, df): + # This method converts SimpleMeasurement objects to strings + # So that It can be exported to json + # Moreover, for each term, if any terms_linked_to_measurement are found, + # We fill the cell with a list of 1 item: + # this term cropped by the found measures from It + + # Fill empty terms_linked_to_measurement + mask_empty_labels_linkable_to_measurement = ( + df["terms_linked_to_measurement"].str.len() == 0 + ) + df_empty_labels_linkable_to_measurement = df[ + mask_empty_labels_linkable_to_measurement + ][["term", "lexical_variant"]] + df.loc[ + mask_empty_labels_linkable_to_measurement, "terms_linked_to_measurement" + ] = df_empty_labels_linkable_to_measurement.apply( + lambda row: [ + re.compile( + r"\b(?:" + "|".join(row["lexical_variant"]) + r")\b", re.IGNORECASE + ).sub("", row["term"]) + ], + axis=1, + ) + + # Convert SimpleMeasurement to str + df["found"] = df["found"].apply( + lambda measurements: [ + measurement.value_range + + " " + + str(measurement.value) + + " " + + measurement.unit + for measurement in measurements + ] + ) + df = df.drop( + columns=["label", "term_labels_removed", "note_id", "lexical_variant"] + ) + return df + + def __call__(self, brat_dir, only_tables): + print( + "--------------- Converting BRAT files to Pandas DataFrame... ---------------" + ) + tic = time.time() + df_labels_of_interest = self.extract_pandas_labels_of_interest(brat_dir) + tac = time.time() + print(f"Converting BRAT files to Pandas DataFrame : {tac-tic:.2f} sec") + print("--------------- Removing labels from label keys... ---------------") + tic = time.time() + df = self.remove_labels_from_label_key(df_labels_of_interest) + tac = time.time() + print(f"Removing labels from label keys : {tac-tic:.2f} sec") + print( + "--------------- Extracting measurements from label keys... ---------------" + ) + tic = time.time() + df = self.get_measurements_from_label_key(df) + tac = time.time() + print(f"Extracting measurements from label keys : {tac-tic:.2f} sec") + print("--------------- Extracting measurements from tables... ---------------") + tic = time.time() + df = self.get_measurements_from_tables( + df, df_labels_of_interest, brat_dir, only_tables + ) + tac = time.time() + print(f"Extracting measurements from tables : {tac-tic:.2f} sec") + print("--------------- Formatting table for normalization... ---------------") + tic = time.time() + df = self.prepare_df_for_normalization(df) + tac = time.time() + print(f"Formatting table for normalization : {tac-tic:.2f} sec") + return df diff --git a/Normalisation/extract_measurement/extract_pandas_from_brat.py b/Normalisation/extract_measurement/extract_pandas_from_brat.py new file mode 100644 index 000000000..82e99e239 --- /dev/null +++ b/Normalisation/extract_measurement/extract_pandas_from_brat.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# coding: utf-8 +# %% + +# # Build prediction file +# +# From our files with NER prediction, extract a pandas data frame to work on entities easily +# + +# %% + + +import collections +import math +import re +from os import listdir +from os.path import basename, isdir, isfile, join + +import numpy as np +import pandas as pd + + +def extract_pandas(IN_BRAT_DIR, OUT_DF=None, labels=None): + + assert isdir(IN_BRAT_DIR) + + ENTITY_REGEX = re.compile("^(.\d+)\t([^ ]+) ([^\t]+)\t(.*)$") + + data = [] + patients = [] + + # extract all ann_files from IN_BRAT_DIR + ann_files = [ + f + for f in listdir(IN_BRAT_DIR) + if isfile(join(IN_BRAT_DIR, f)) + if f.endswith(".ann") + ] + for ann_file in ann_files: + ann_path = join(IN_BRAT_DIR, ann_file) + txt_path = ann_path[:-4] + ".txt" + + # sanity check + assert isfile(ann_path) + assert isfile(txt_path) + + # Read text file to get patient number : + with open(txt_path, "r", encoding="utf-8") as f_txt: + lines_txt = f_txt.readlines() + patient_num = lines_txt[0][:-1] + patients.append(patient_num) + + # Read ann file + with open(ann_path, "r", encoding="utf-8") as f_in: + lines = f_in.readlines() + + for line in lines: + entity_match = ENTITY_REGEX.match(line.strip()) + if entity_match is not None: + ann_id = entity_match.group(1) + label = entity_match.group(2) + offsets = entity_match.group(3) + term = entity_match.group(4) + if labels is None: + data.append([ann_id, term, label, basename(ann_path), offsets]) + elif label in labels: + data.append([ann_id, term, label, basename(ann_path), offsets]) + + columns = ["ann_id", "term", "label", "source", "span"] + dataset_df = pd.DataFrame(data=list(data), columns=columns) + if OUT_DF: + dataset_df.to_csv(OUT_DF) + + return dataset_df diff --git a/Normalisation/extract_measurement/main.py b/Normalisation/extract_measurement/main.py new file mode 100644 index 000000000..cf9e7288b --- /dev/null +++ b/Normalisation/extract_measurement/main.py @@ -0,0 +1,33 @@ +import os + +import typer + +os.environ["OMP_NUM_THREADS"] = "16" +from pathlib import Path + +import pandas as pd +from config import * +from extract_measurements_from_brat import ExtractMeasurements + + +def extract_measurements_cli( + input_dir: Path, + output_dir: Path, +): + df = ExtractMeasurements( + regex_convert_spans=measurements_pipe_regex_convert_spans, + label_key=measurements_pipe_label_key, + labels_to_remove=measurements_pipe_labels_to_remove, + labels_linkable_to_measurement=measurements_pipe_labels_linkable_to_measurement, + config_normalizer_from_label_key=measurements_pipe_config_normalizer_from_label_key, + config_measurements_from_label_key=measurements_pipe_config_measurements_from_label_key, + config_normalizer_from_tables=measurements_pipe_config_normalizer_from_tables, + config_measurements_from_tables=measurements_pipe_config_measurements_from_tables, + )(brat_dir=input_dir, only_tables=measurements_only_tables) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + df.to_json(output_dir / "pred_with_extraction.json") + + +if __name__ == "__main__": + typer.run(extract_measurements_cli) diff --git a/Normalisation/extract_measurement/measurements_patterns.py b/Normalisation/extract_measurement/measurements_patterns.py new file mode 100644 index 000000000..5d9885a60 --- /dev/null +++ b/Normalisation/extract_measurement/measurements_patterns.py @@ -0,0 +1,341 @@ +from edsnlp.pipelines.misc.measurements.patterns import ( + common_measurements, + number_terms, + stopwords_measure_unit, + stopwords_unitless, + unit_divisors, + units_config, + value_range_terms, +) + +####################################### +# ## CONFIG TO PRETREAT THE BRAT DIR ### +# ###################################### + +# Regex which identifies in group 1 the beginning of a span and in group 2 +# the end of the same span +regex_convert_spans = r"^(\d+).*\s(\d+)$" + +# Label of the entities containing the measurement and possibly +# other random entities +label_key = "BIO_comp" + +# Labels of the entities It is possible not to consider during matching +labels_to_remove = ["BIO"] + +# Labels of the entities which can be linked to a measurement +labels_linkable_to_measurement = ["BIO"] + + +########################################################## +# ## PIPE TO MATCH MEASUREMENTS IN `label_key` ENTITIES ### +# ######################################################### + +# Config of the normalizer pipe used in entities labeled `label_key` +config_normalizer_from_label_key = dict( + lowercase=True, + accents=True, + quotes=True, + pollution=True, +) + +# Terms which will make the measurements pipe match a positive measurement +positive_terms_from_label_key = ("positifs", "positives", "positivites") +# We create a list to match abbreviations of the positive words. This list will +# be the final dictionnary used to match the positive measurements. +positive_terms_from_label_key = [ + word[: i + 1] + for word in positive_terms_from_label_key + for i in range(min(len(word) - 1, 1), len(word)) +] +# Symbols which will make the measurements pipe match a positive measurement +positive_symbols_from_label_key = ("\+", "p") +# To match symbols, we create regex +positive_regex_from_label_key = [ + r"^[^a-zA-Z0-9]*(?:% s)" % "|".join(positive_symbols_from_label_key) + + r"[^a-zA-Z0-9]*$" +] + +# Terms which will make the measurements pipe match a negative measurement +negative_terms_from_label_key = ( + "negatifs", + "negatives", + "negativites", + "absences", + "absents", +) +# We create a list to match abbreviations of the negative words. This list will +# be the final dictionnary used to match the negative measurements. +negative_terms_from_label_key = [ + word[: i + 1] + for word in negative_terms_from_label_key + for i in range(min(len(word) - 1, 1), len(word)) +] +# Symbols which will make the measurements pipe match a positive measurement +negative_symbols_from_label_key = ("\-", "n") +# To match symbols, we create regex +negative_regex_from_label_key = [ + r"^[^a-zA-Z0-9]*(?:% s)" % "|".join(negative_symbols_from_label_key) + + r"[^a-zA-Z0-9]*$" +] + +# Terms which will make the measurements pipe match a normal measurement +normal_terms_from_label_key = ("normales", "normaux", "normalisations", "normalites") +# We create a list to match abbreviations of the normal words. This list will +# be the final dictionnary used to match the normal measurements. +normal_terms_from_label_key = [ + word[: i + 1] + for word in normal_terms_from_label_key + for i in range(min(len(word) - 1, 1), len(word)) +] + +# Custom mesurements mainly to include custom positive, negative +# and normal measurements +measurements_from_label_key = { + "eds.weight": { + "unit": "kg", + "unitless_patterns": [ + { + "terms": ["poids", "poid", "pese", "pesant", "pesait", "pesent"], + "ranges": [ + {"min": 0, "max": 200, "unit": "kg"}, + {"min": 200, "unit": "g"}, + ], + } + ], + }, + "eds.size": { + "unit": "m", + "unitless_patterns": [ + { + "terms": [ + "mesure", + "taille", + "mesurant", + "mesurent", + "mesurait", + "mesuree", + "hauteur", + "largeur", + "longueur", + ], + "ranges": [ + {"min": 0, "max": 3, "unit": "m"}, + {"min": 3, "unit": "cm"}, + ], + } + ], + }, + "eds.bmi": { + "unit": "kg_per_m2", + "unitless_patterns": [ + {"terms": ["imc", "bmi"], "ranges": [{"unit": "kg_per_m2"}]} + ], + }, + "eds.volume": {"unit": "m3", "unitless_patterns": []}, + "eds.bool": { + "unit": "bool", + "valueless_patterns": [ + { + "terms": positive_terms_from_label_key, + "regex": positive_regex_from_label_key, + "measurement": { + "value_range": "=", + "value": 1, + "unit": "bool", + }, + }, + { + "terms": negative_terms_from_label_key, + "regex": negative_regex_from_label_key, + "measurement": { + "value_range": "=", + "value": 0, + "unit": "bool", + }, + }, + { + "terms": normal_terms_from_label_key, + "measurement": { + "value_range": "=", + "value": 0.5, + "unit": "bool", + }, + }, + ], + }, +} + +# Config of the measurement pipe used in entities labeled `label_key` +config_measurements_from_label_key = dict( + measurements=measurements_from_label_key, + units_config=units_config, + number_terms=number_terms, + value_range_terms=value_range_terms, + unit_divisors=unit_divisors, + stopwords_unitless=stopwords_unitless, + stopwords_measure_unit=stopwords_measure_unit, + measure_before_unit=False, + ignore_excluded=True, + attr="NORM", + all_measurements=True, + parse_tables=False, + parse_doc=True, +) + + +############################################ +# ## PIPE TO MATCH MEASUREMENTS IN TABLES ### +# ########################################### + +# Config of the normalizer pipe used in tables +config_normalizer_from_tables = dict( + lowercase=True, + accents=True, + quotes=True, + pollution=True, +) + +# Terms which will make the measurements pipe match a positive measurement +positive_terms_from_tables = ("positifs", "positives", "positivites") +# We create a list to match abbreviations of the positive words. This list will +# be the final dictionnary used to match the positive measurements. +positive_terms_from_tables = [ + word[: i + 1] + for word in positive_terms_from_tables + for i in range(min(len(word) - 1, 1), len(word)) +] +# Symbols which will make the measurements pipe match a positive measurement +positive_symbols_from_tables = ("\+", "p") +# To match symbols, we create regex +positive_regex_from_tables = [ + r"^[^a-zA-Z0-9]*(?:% s)" % "|".join(positive_symbols_from_tables) + + r"[^a-zA-Z0-9]*$" +] + +# Terms which will make the measurements pipe match a negative measurement +negative_terms_from_tables = ( + "negatifs", + "negatives", + "negativites", + "absences", + "absents", +) +# We create a list to match abbreviations of the negative words. This list will +# be the final dictionnary used to match the negative measurements. +negative_terms_from_tables = [ + word[: i + 1] + for word in negative_terms_from_tables + for i in range(min(len(word) - 1, 1), len(word)) +] +# Symbols which will make the measurements pipe match a positive measurement +negative_symbols_from_tables = ("\-", "n") +# To match symbols, we create regex +negative_regex_from_tables = [ + r"^[^a-zA-Z0-9]*(?:% s)" % "|".join(negative_symbols_from_tables) + + r"[^a-zA-Z0-9]*$" +] + +# Terms which will make the measurements pipe match a normal measurement +normal_terms_from_tables = ("normales", "normaux", "normalisations", "normalites") +# We create a list to match abbreviations of the normal words. This list will +# be the final dictionnary used to match the normal measurements. +normal_terms_from_tables = [ + word[: i + 1] + for word in normal_terms_from_tables + for i in range(min(len(word) - 1, 1), len(word)) +] + +# Custom mesurements mainly to include custom positive, negative +# and normal measurements +measurements_from_tables = { + "eds.weight": { + "unit": "kg", + "unitless_patterns": [ + { + "terms": ["poids", "poid", "pese", "pesant", "pesait", "pesent"], + "ranges": [ + {"min": 0, "max": 200, "unit": "kg"}, + {"min": 200, "unit": "g"}, + ], + } + ], + }, + "eds.size": { + "unit": "m", + "unitless_patterns": [ + { + "terms": [ + "mesure", + "taille", + "mesurant", + "mesurent", + "mesurait", + "mesuree", + "hauteur", + "largeur", + "longueur", + ], + "ranges": [ + {"min": 0, "max": 3, "unit": "m"}, + {"min": 3, "unit": "cm"}, + ], + } + ], + }, + "eds.bmi": { + "unit": "kg_per_m2", + "unitless_patterns": [ + {"terms": ["imc", "bmi"], "ranges": [{"unit": "kg_per_m2"}]} + ], + }, + "eds.volume": {"unit": "m3", "unitless_patterns": []}, + "eds.bool": { + "unit": "bool", + "valueless_patterns": [ + { + "terms": positive_terms_from_tables, + "regex": positive_regex_from_tables, + "measurement": { + "value_range": "=", + "value": 1, + "unit": "bool", + }, + }, + { + "terms": negative_terms_from_tables, + "regex": negative_regex_from_tables, + "measurement": { + "value_range": "=", + "value": 0, + "unit": "bool", + }, + }, + { + "terms": normal_terms_from_tables, + "measurement": { + "value_range": "=", + "value": 0.5, + "unit": "bool", + }, + }, + ], + }, +} + +# Config of the measurement pipe used in tables +config_measurements_from_tables = dict( + measurements=measurements_from_tables, + units_config=units_config, + number_terms=number_terms, + value_range_terms=value_range_terms, + unit_divisors=unit_divisors, + stopwords_unitless=stopwords_unitless, + stopwords_measure_unit=stopwords_measure_unit, + measure_before_unit=False, + ignore_excluded=True, + attr="NORM", + all_measurements=True, + parse_tables=True, + parse_doc=False, +) diff --git a/Normalisation/inference/config.py b/Normalisation/inference/config.py new file mode 100644 index 000000000..081273cc8 --- /dev/null +++ b/Normalisation/inference/config.py @@ -0,0 +1,89 @@ +import pandas as pd + +###################### +# ## GENERAL CONFIG ### +# ##################### +umls_path = ( + "/export/home/cse200093/scratch/BioMedics/data/umls/bio_str_SNOMEDCT_US.json" +) +labels_column_name = "CUI" +# Name of the column which contains the CUIs +synonyms_column_name = "STR" +# Name of the column which contains the synonyms +res_path = "/export/home/cse200093/Jacques_Bio/BioMedics/data/maladie_de_takayasu_norm/res.json" + + +#################### +# ## CODER CONFIG ### +# ################### +column_name_to_normalize = "term" +# Name of the preceding column of interest. Default should be +# "terms_linked_to_measurement" to make the entire pipe work +coder_model_name_or_path = ( + "/export/home/cse200093/scratch/word-embedding/coder_eds/model_967662.pth" +) +coder_tokenizer_name_or_path = ( + "/export/home/cse200093/scratch/word-embedding/finetuning-camembert-2021-07-29" +) +coder_device = "cuda:0" +coder_save_umls_embeddings_dir = False +# set to False if you don't want to save +coder_save_umls_des_dir = False +# set to False if you don't want to save +coder_save_umls_labels_dir = False +# set to False if you don't want to save +coder_save_data_embeddings_dir = False +# set to False if you don't want to save +coder_normalize = True +coder_summary_method = "CLS" +coder_tqdm_bar = True +coder_cased = True +coder_batch_size = 128 +coder_stopwords = [ + "for", + "assay", + "by", + "tests", + "minute", + "exam", + "with", + "human", + "moyenne", + "in", + "to", + "from", + "analyse", + "test", + "level", + "fluid", + "laboratory", + "determination", + "examination", + "releasing", + "quantitative", + "screening", + "and", + "exploration", + "factor", + "method", + "analysis", + "laboratoire", + "specimen", + "or", + "typing", + "of", + "concentration", + "measurement", + "detection", + "procedure", + "identification", + "numeration", + "hour", + "retired", + "technique", + "count", +] +coder_remove_stopwords_terms = False +coder_remove_special_characters_terms = False +coder_remove_stopwords_umls = True +coder_remove_special_characters_umls = True diff --git a/Normalisation/inference/extract_pandas_from_brat.py b/Normalisation/inference/extract_pandas_from_brat.py new file mode 100644 index 000000000..ac04c0d09 --- /dev/null +++ b/Normalisation/inference/extract_pandas_from_brat.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# coding: utf-8 +# %% + +# # Build prediction file +# +# From our files with NER prediction, extract a pandas data frame to work on entities easily +# + +# %% + + +import collections +import math +import re +from os import listdir +from os.path import basename, isdir, isfile, join + +import numpy as np +import pandas as pd + + +def extract_pandas(IN_BRAT_DIR, OUT_DF=None, labels=None): + + assert isdir(IN_BRAT_DIR) + + ENTITY_REGEX = re.compile("^(.\d+)\t([^ ]+) ([^\t]+)\t(.*)$") + + data = [] + patients = [] + + # extract all ann_files from IN_BRAT_DIR + ann_files = [ + f + for f in listdir(IN_BRAT_DIR) + if isfile(join(IN_BRAT_DIR, f)) + if f.endswith(".ann") + ] + + for ann_file in ann_files: + ann_path = join(IN_BRAT_DIR, ann_file) + txt_path = ann_path[:-4] + ".txt" + + # sanity check + assert isfile(ann_path) + assert isfile(txt_path) + + # Read text file to get patient number : + with open(txt_path, "r", encoding="utf-8") as f_txt: + lines_txt = f_txt.readlines() + patient_num = lines_txt[0][:-1] + patients.append(patient_num) + + # Read ann file + with open(ann_path, "r", encoding="utf-8") as f_in: + lines = f_in.readlines() + + for line in lines: + entity_match = ENTITY_REGEX.match(line.strip()) + if entity_match is not None: + ann_id = entity_match.group(1) + label = entity_match.group(2) + offsets = entity_match.group(3) + term = entity_match.group(4) + if labels is None: + data.append([ann_id, term, label, basename(ann_path), offsets]) + elif label in labels: + data.append([ann_id, term, label, basename(ann_path), offsets]) + + columns = ["ann_id", "term", "label", "source", "span"] + dataset_df = pd.DataFrame(data=list(data), columns=columns) + if OUT_DF: + dataset_df.to_csv(OUT_DF) + + return dataset_df diff --git a/Normalisation/inference/get_normalization_with_coder.py b/Normalisation/inference/get_normalization_with_coder.py new file mode 100644 index 000000000..4526c1633 --- /dev/null +++ b/Normalisation/inference/get_normalization_with_coder.py @@ -0,0 +1,136 @@ +import argparse +import os +import pathlib +import sys +import time + +import numpy as np +import torch +from torch import nn +from tqdm import tqdm, trange +from transformers import ( + AdamW, + AutoConfig, + AutoModel, + AutoTokenizer, + get_constant_schedule_with_warmup, + get_cosine_schedule_with_warmup, + get_linear_schedule_with_warmup, +) + +sys.path.append("/export/home/cse200093/scratch/BioMedics/normalisation/training") + + +class CoderNormalizer: + def __init__( + self, + model_name_or_path: str, + tokenizer_name_or_path: str, + device: str = "cuda:0", + ): + self.device = device + try: + self.model = AutoModel.from_pretrained(model_name_or_path).to(self.device) + self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path) + self.model_from_transformers = True + except: + self.model = torch.load(model_name_or_path).to(self.device) + self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path) + self.model_from_transformers = False + + def get_bert_embed( + self, + phrase_list, + normalize=True, + summary_method="CLS", + tqdm_bar=False, + coder_batch_size=128, + ): + input_ids = [] + for phrase in phrase_list: + input_ids.append( + self.tokenizer.encode_plus( + phrase, + max_length=32, + add_special_tokens=True, + truncation=True, + pad_to_max_length=True, + )["input_ids"] + ) + self.model.eval() + + count = len(input_ids) + now_count = 0 + with torch.no_grad(): + if tqdm_bar: + pbar = tqdm(total=count) + while now_count < count: + input_gpu_0 = torch.LongTensor( + input_ids[now_count : min(now_count + coder_batch_size, count)] + ).to(self.device) + if summary_method == "CLS": + if self.model_from_transformers: + embed = self.model(input_gpu_0)[1] + else: + embed = self.model.bert(input_gpu_0)[1] + if summary_method == "MEAN": + if self.model_from_transformers: + embed = torch.mean(self.model(input_gpu_0)[0], dim=1) + else: + embed = torch.mean(self.model.bert(input_gpu_0)[0], dim=1) + if normalize: + embed_norm = torch.norm(embed, p=2, dim=1, keepdim=True).clamp( + min=1e-12 + ) + embed = embed / embed_norm + if now_count == 0: + output = embed + else: + output = torch.cat((output, embed), dim=0) + if tqdm_bar: + pbar.update(min(now_count + coder_batch_size, count) - now_count) + now_count = min(now_count + coder_batch_size, count) + if tqdm_bar: + pbar.close() + return output + + def get_sim_results( + self, res_embeddings, umls_embeddings, umls_labels, umls_des, split_size=200 + ): + label_matches = [] + des_matches = [] + print(f"Number of split: {len(torch.split(res_embeddings, split_size))}") + for split_res_embeddings in torch.split(res_embeddings, split_size): + sim = torch.matmul(split_res_embeddings, umls_embeddings.t()) + most_similar = torch.max(sim, dim=1)[1].tolist() + label_matches_split = [umls_labels[idx] for idx in most_similar] + des_matches_split = [umls_des[idx] for idx in most_similar] + label_matches.extend(label_matches_split) + des_matches.extend(des_matches_split) + return label_matches, des_matches + + def __call__( + self, + umls_labels_list, + umls_des_list, + data_list, + save_umls_embeddings_dir=False, + save_data_embeddings_dir=False, + normalize=True, + summary_method="CLS", + tqdm_bar=False, + coder_batch_size=128, + ): + umls_embeddings = self.get_bert_embed( + umls_des_list, normalize, summary_method, tqdm_bar, coder_batch_size + ) + res_embeddings = self.get_bert_embed( + data_list, normalize, summary_method, tqdm_bar, coder_batch_size + ) + if save_umls_embeddings_dir: + torch.save(umls_embeddings, save_umls_embeddings_dir) + if save_data_embeddings_dir: + torch.save(res_embeddings, save_data_embeddings_dir) + return self.get_sim_results( + res_embeddings, umls_embeddings, umls_labels_list, umls_des_list + ) diff --git a/Normalisation/inference/main.py b/Normalisation/inference/main.py new file mode 100644 index 000000000..cffeab945 --- /dev/null +++ b/Normalisation/inference/main.py @@ -0,0 +1,138 @@ +import os + +import typer + +os.environ["OMP_NUM_THREADS"] = "16" +import pickle +from pathlib import Path + +import pandas as pd +from config import * +from get_normalization_with_coder import CoderNormalizer +from text_preprocessor import TextPreprocessor + + +def coder_wrapper(df): + # This wrapper is needed to preprocess terms + # and in case the cells contains list of terms instead of one unique term + df = df.reset_index(drop=True) + text_preprocessor = TextPreprocessor(cased=coder_cased, stopwords=coder_stopwords) + coder_normalizer = CoderNormalizer( + model_name_or_path=coder_model_name_or_path, + tokenizer_name_or_path=coder_tokenizer_name_or_path, + device=coder_device, + ) + + # Preprocess UMLS + print("--- Preprocessing UMLS ---") + umls_df = pd.read_json(umls_path) + + umls_df[synonyms_column_name] = umls_df[synonyms_column_name].apply( + lambda term: text_preprocessor( + text=term, + remove_stopwords=coder_remove_stopwords_umls, + remove_special_characters=coder_remove_special_characters_umls, + ) + ) + umls_df = ( + umls_df.loc[ + (~umls_df[synonyms_column_name].str.isnumeric()) + & (umls_df[synonyms_column_name] != "") + ] + .groupby([synonyms_column_name]) + .agg({labels_column_name: set, synonyms_column_name: "first"}) + .reset_index(drop=True) + ) + coder_umls_des_list = umls_df[synonyms_column_name] + coder_umls_labels_list = umls_df[labels_column_name] + if coder_save_umls_des_dir: + with open(coder_save_umls_des_dir, "wb") as f: + pickle.dump(coder_umls_des_list, f) + if coder_save_umls_labels_dir: + with open(coder_save_umls_labels_dir, "wb") as f: + pickle.dump(coder_umls_labels_list, f) + + # Preprocessing and inference on terms + print("--- Preprocessing terms ---") + if type(df[column_name_to_normalize].iloc[0]) == str: + coder_data_list = ( + df[column_name_to_normalize] + .apply( + lambda term: text_preprocessor( + text=term, + remove_stopwords=coder_remove_stopwords_terms, + remove_special_characters=coder_remove_special_characters_terms, + ) + ) + .tolist() + ) + print("--- CODER inference ---") + coder_res = coder_normalizer( + umls_labels_list=coder_umls_labels_list, + umls_des_list=coder_umls_des_list, + data_list=coder_data_list, + save_umls_embeddings_dir=coder_save_umls_embeddings_dir, + save_data_embeddings_dir=coder_save_data_embeddings_dir, + normalize=coder_normalize, + summary_method=coder_summary_method, + tqdm_bar=coder_tqdm_bar, + coder_batch_size=coder_batch_size, + ) + df[["label", "des"]] = pd.DataFrame(zip(*coder_res)) + else: + exploded_term_df = ( + pd.DataFrame( + {"id": df.index, column_name_to_normalize: df[column_name_to_normalize]} + ) + .explode(column_name_to_normalize) + .reset_index(drop=True) + ) + coder_data_list = ( + exploded_term_df[column_name_to_normalize] + .apply( + lambda term: text_preprocessor( + text=term, + remove_stopwords=coder_remove_stopwords_terms, + remove_special_characters=coder_remove_special_characters_terms, + ) + ) + .tolist() + ) + print("--- CODER inference ---") + coder_res = coder_normalizer( + umls_labels_list=coder_umls_labels_list, + umls_des_list=coder_umls_des_list, + data_list=coder_data_list, + save_umls_embeddings_dir=coder_save_umls_embeddings_dir, + save_data_embeddings_dir=coder_save_data_embeddings_dir, + normalize=coder_normalize, + summary_method=coder_summary_method, + tqdm_bar=coder_tqdm_bar, + coder_batch_size=coder_batch_size, + ) + exploded_term_df[["label", "des"]] = pd.DataFrame(zip(*coder_res)) + df = ( + pd.merge( + df.drop(columns=[column_name_to_normalize]), + exploded_term_df, + left_index=True, + right_on="id", + ) + .drop(columns=["id"]) + .reset_index(drop=True) + ) + return df + + +def coder_inference_cli( + input_dir: Path, + output_dir: Path, +): + df = pd.read_json(input_dir) + df = coder_wrapper(df) + if res_path: + df.to_json(output_dir) + + +if __name__ == "__main__": + typer.run(coder_inference_cli) diff --git a/Normalisation/inference/text_preprocessor.py b/Normalisation/inference/text_preprocessor.py new file mode 100644 index 000000000..a9a7b40c5 --- /dev/null +++ b/Normalisation/inference/text_preprocessor.py @@ -0,0 +1,40 @@ +import re + +from unidecode import unidecode + + +class TextPreprocessor: + def __init__(self, cased, stopwords): + self.cased = cased + self.regex_stopwords = re.compile( + r"\b(?:" + "|".join(stopwords) + r")\b", re.IGNORECASE + ) + self.regex_special_characters = re.compile(r"[^a-zA-Z0-9\s]", re.IGNORECASE) + + def normalize(self, txt, remove_stopwords, remove_special_characters): + if not self.cased: + txt = unidecode( + txt.lower() + .replace("-", " ") + .replace("ag ", "antigene ") + .replace("ac ", "anticorps ") + .replace("antigenes ", "antigene ") + ) + else: + txt = unidecode( + txt.replace("-", " ") + .replace("ag ", "antigene ") + .replace("ac ", "anticorps ") + .replace("antigenes ", "antigene ") + .replace("Ag ", "Antigene ") + .replace("Ac ", "Anticorps ") + .replace("Antigenes ", "Antigene ") + ) + if remove_stopwords: + txt = self.regex_stopwords.sub("", txt) + if remove_special_characters: + txt = self.regex_special_characters.sub(" ", txt) + return re.sub(" +", " ", txt).strip() + + def __call__(self, text, remove_stopwords=False, remove_special_characters=False): + return self.normalize(text, remove_stopwords, remove_special_characters) diff --git a/edsnlp/matchers/__init__.py b/Normalisation/training/__init__.py similarity index 100% rename from edsnlp/matchers/__init__.py rename to Normalisation/training/__init__.py diff --git a/Normalisation/training/data_util.py b/Normalisation/training/data_util.py new file mode 100644 index 000000000..afef4f48a --- /dev/null +++ b/Normalisation/training/data_util.py @@ -0,0 +1,207 @@ +import json +import os +from pathlib import Path +from random import sample +from time import time + +import ipdb +import numpy as np +import pandas as pd +from load_umls import UMLS +from sampler_util import FixedLengthBatchSampler, my_collate_fn +from torch.utils.data import DataLoader, Dataset +from torch.utils.data.sampler import RandomSampler +from transformers import AutoTokenizer + + +def pad(list_ids, pad_length, pad_mark=0): + output = [] + for l in list_ids: + if len(l) > pad_length: + output.append(l[0:pad_length]) + else: + output.append(l + [pad_mark] * (pad_length - len(l))) + return output + + +def my_sample(lst, lst_length, start, length): + start = start % lst_length + if start + length < lst_length: + return lst[start : start + length] + return lst[start:] + lst[0 : start + length - lst_length] + + +class UMLSDataset(Dataset): + def __init__( + self, + umls_folder, + model_name_or_path, + lang, + json_save_path=None, + max_lui_per_cui=8, + max_length=32, + ): + self.umls = UMLS(umls_folder, lang_range=lang) + self.len = len(self.umls.rel) + self.max_lui_per_cui = max_lui_per_cui + self.max_length = max_length + self.tokenizer = AutoTokenizer.from_pretrained( + "/export/home/cse200093/scratch/word-embedding/finetuning-camembert-2021-07-29" + ) + self.json_save_path = json_save_path + self.calculate_class_count() + + def calculate_class_count(self): + print("Calculate class count") + + self.cui2id = {cui: index for index, cui in enumerate(self.umls.cui2str.keys())} + + self.re_set = set() + self.rel_set = set() + for r in self.umls.rel: + _, _, re, rel = r.split("\t") + self.re_set.update([re]) + self.rel_set.update([rel]) + self.re_set = list(self.re_set) + self.rel_set = list(self.rel_set) + self.re_set.sort() + self.rel_set.sort() + + self.re2id = {re: index for index, re in enumerate(self.re_set)} + self.rel2id = {rel: index for index, rel in enumerate(self.rel_set)} + + sty_list = list(set(self.umls.cui2sty.values())) + sty_list.sort() + self.sty2id = {sty: index for index, sty in enumerate(sty_list)} + + if self.json_save_path: + with open(os.path.join(self.json_save_path, "re2id.json"), "w") as f: + json.dump(self.re2id, f) + with open(os.path.join(self.json_save_path, "rel2id.json"), "w") as f: + json.dump(self.rel2id, f) + with open(os.path.join(self.json_save_path, "sty2id.json"), "w") as f: + json.dump(self.sty2id, f) + + print("CUI:", len(self.cui2id)) + print("RE:", len(self.re2id)) + print("REL:", len(self.rel2id)) + print("STY:", len(self.sty2id)) + + def tokenize_one(self, string): + return self.tokenizer.encode_plus( + string, max_length=self.max_length, truncation=True + )["input_ids"] + + # @profile + def __getitem__(self, index): + cui0, cui1, re, rel = self.umls.rel[index].split("\t") + + str0_list = list(self.umls.cui2str[cui0]) + str1_list = list(self.umls.cui2str[cui1]) + if len(str0_list) > self.max_lui_per_cui: + str0_list = sample(str0_list, self.max_lui_per_cui) + if len(str1_list) > self.max_lui_per_cui: + str1_list = sample(str1_list, self.max_lui_per_cui) + use_len = min(len(str0_list), len(str1_list)) + str0_list = str0_list[0:use_len] + str1_list = str1_list[0:use_len] + + sty0_index = self.sty2id[self.umls.cui2sty[cui0]] + sty1_index = self.sty2id[self.umls.cui2sty[cui1]] + + str2_list = [] + cui2_index_list = [] + sty2_index_list = [] + + cui2 = my_sample( + self.umls.cui, + self.umls.cui_count, + index * self.max_lui_per_cui, + use_len * 2, + ) + sample_index = 0 + while len(str2_list) < use_len: + if sample_index < len(cui2): + use_cui2 = cui2[sample_index] + else: + sample_index = 0 + cui2 = my_sample( + self.umls.cui, + self.umls.cui_count, + index * self.max_lui_per_cui, + use_len * 2, + ) + use_cui2 = cui2[sample_index] + # if not "\t".join([cui0, use_cui2, re, rel]) in self.umls.rel: # TOO SLOW! + if True: + cui2_index_list.append(self.cui2id[use_cui2]) + sty2_index_list.append(self.sty2id[self.umls.cui2sty[use_cui2]]) + str2_list.append(sample(self.umls.cui2str[use_cui2], 1)[0]) + sample_index += 1 + + # print(str0_list) + # print(str1_list) + # print(str2_list) + + input_ids = [self.tokenize_one(s) for s in str0_list + str1_list + str2_list] + input_ids = pad(input_ids, self.max_length) + input_ids_0 = input_ids[0:use_len] + input_ids_1 = input_ids[use_len : 2 * use_len] + input_ids_2 = input_ids[2 * use_len :] + + cui0_index = self.cui2id[cui0] + cui1_index = self.cui2id[cui1] + + re_index = self.re2id[re] + rel_index = self.rel2id[rel] + return ( + input_ids_0, + input_ids_1, + input_ids_2, + [cui0_index] * use_len, + [cui1_index] * use_len, + cui2_index_list, + [sty0_index] * use_len, + [sty1_index] * use_len, + sty2_index_list, + [re_index] * use_len, + [rel_index] * use_len, + ) + + def __len__(self): + return self.len + + +def fixed_length_dataloader(umls_dataset, fixed_length=96, num_workers=0): + base_sampler = RandomSampler(umls_dataset) + batch_sampler = FixedLengthBatchSampler( + sampler=base_sampler, fixed_length=fixed_length, drop_last=True + ) + dataloader = DataLoader( + umls_dataset, + batch_sampler=batch_sampler, + collate_fn=my_collate_fn, + num_workers=num_workers, + pin_memory=True, + ) + return dataloader + + +if __name__ == "__main__": + umls_dataset = UMLSDataset( + umls_folder="../umls", model_name_or_path="../biobert_v1.1", lang=None + ) + ipdb.set_trace() + umls_dataloader = fixed_length_dataloader(umls_dataset, num_workers=4) + now_time = time() + for index, batch in enumerate(umls_dataloader): + print(time() - now_time) + now_time = time() + if index < 10: + for item in batch: + print(item.shape) + # print(batch) + else: + import sys + + sys.exit() diff --git a/Normalisation/training/extract_bert.py b/Normalisation/training/extract_bert.py new file mode 100644 index 000000000..7b6ab9b28 --- /dev/null +++ b/Normalisation/training/extract_bert.py @@ -0,0 +1,8 @@ +import os +import sys + +import torch + +model = torch.load(sys.argv[1], map_location=torch.device("cpu")) +bert_model = model.bert +torch.save(bert_model, sys.argv[2]) diff --git a/Normalisation/training/extract_pandas_from_brat.py b/Normalisation/training/extract_pandas_from_brat.py new file mode 100644 index 000000000..ac04c0d09 --- /dev/null +++ b/Normalisation/training/extract_pandas_from_brat.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# coding: utf-8 +# %% + +# # Build prediction file +# +# From our files with NER prediction, extract a pandas data frame to work on entities easily +# + +# %% + + +import collections +import math +import re +from os import listdir +from os.path import basename, isdir, isfile, join + +import numpy as np +import pandas as pd + + +def extract_pandas(IN_BRAT_DIR, OUT_DF=None, labels=None): + + assert isdir(IN_BRAT_DIR) + + ENTITY_REGEX = re.compile("^(.\d+)\t([^ ]+) ([^\t]+)\t(.*)$") + + data = [] + patients = [] + + # extract all ann_files from IN_BRAT_DIR + ann_files = [ + f + for f in listdir(IN_BRAT_DIR) + if isfile(join(IN_BRAT_DIR, f)) + if f.endswith(".ann") + ] + + for ann_file in ann_files: + ann_path = join(IN_BRAT_DIR, ann_file) + txt_path = ann_path[:-4] + ".txt" + + # sanity check + assert isfile(ann_path) + assert isfile(txt_path) + + # Read text file to get patient number : + with open(txt_path, "r", encoding="utf-8") as f_txt: + lines_txt = f_txt.readlines() + patient_num = lines_txt[0][:-1] + patients.append(patient_num) + + # Read ann file + with open(ann_path, "r", encoding="utf-8") as f_in: + lines = f_in.readlines() + + for line in lines: + entity_match = ENTITY_REGEX.match(line.strip()) + if entity_match is not None: + ann_id = entity_match.group(1) + label = entity_match.group(2) + offsets = entity_match.group(3) + term = entity_match.group(4) + if labels is None: + data.append([ann_id, term, label, basename(ann_path), offsets]) + elif label in labels: + data.append([ann_id, term, label, basename(ann_path), offsets]) + + columns = ["ann_id", "term", "label", "source", "span"] + dataset_df = pd.DataFrame(data=list(data), columns=columns) + if OUT_DF: + dataset_df.to_csv(OUT_DF) + + return dataset_df diff --git a/Normalisation/training/generate_term_embeddings.py b/Normalisation/training/generate_term_embeddings.py new file mode 100644 index 000000000..8d854dfa0 --- /dev/null +++ b/Normalisation/training/generate_term_embeddings.py @@ -0,0 +1,103 @@ +import os +import sys + +from gensim import models + +sys.path.append("/export/home/cse200093/Jacques_Bio/normalisation/py_files") +import pickle + +import numpy as np +import pandas as pd +import torch +import tqdm +from load_umls import UMLS +from transformers import AutoConfig, AutoModel, AutoTokenizer +from unidecode import unidecode + +batch_size = 128 +device = "cuda:0" + +# Defining the model +# coder_all +model_checkpoint = ( + "/export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29" +) +tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) +model = AutoModel.from_pretrained(model_checkpoint).to(device) + +# Defining data paths +DATA_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/json_annotated_normalisation_formatted/annotated_normalisation_formatted_train_umls_snomed.json" +EMBEDDINGS_SAVE_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/data_embeddings_normalized_snomed_camembert_eds.pt" + +# Method to generate embeddings +def get_bert_embed( + phrase_list, m, tok, normalize=True, summary_method="CLS", tqdm_bar=False +): + input_ids = [] + for phrase in phrase_list: + input_ids.append( + tok.encode_plus( + phrase, + max_length=32, + add_special_tokens=True, + truncation=True, + pad_to_max_length=True, + )["input_ids"] + ) + m.eval() + + count = len(input_ids) + now_count = 0 + with torch.no_grad(): + if tqdm_bar: + pbar = tqdm.tqdm(total=count) + while now_count < count: + input_gpu_0 = torch.LongTensor( + input_ids[now_count : min(now_count + batch_size, count)] + ).to(device) + if summary_method == "CLS": + embed = m(input_gpu_0)[1] + if summary_method == "MEAN": + embed = torch.mean(m(input_gpu_0)[0], dim=1) + if normalize: + embed_norm = torch.norm(embed, p=2, dim=1, keepdim=True).clamp( + min=1e-12 + ) + embed = embed / embed_norm + if now_count == 0: + output = embed + else: + output = torch.cat((output, embed), dim=0) + if tqdm_bar: + pbar.update(min(now_count + batch_size, count) - now_count) + now_count = min(now_count + batch_size, count) + if tqdm_bar: + pbar.close() + return output + + +# Normalisation of words +def normalize(txt): + return unidecode( + txt.lower() + .replace("-", " ") + .replace("ag ", "antigene ") + .replace("ac ", "anticorps ") + .replace("antigenes ", "antigene ") + ) + + +# Loading our data to match +data_df = pd.read_json(DATA_DIR) +data_df["term"] = data_df["term"].apply(normalize) +# Merge same terms and keep all possible loincs +data_df = ( + data_df.groupby("term") + .agg({"term": "first", "annotation": set, "source": set}) + .reset_index(drop=True) +) + +umls_embedding = get_bert_embed( + data_df["term"].tolist(), model, tokenizer, tqdm_bar=False +) +torch.save(umls_embedding, EMBEDDINGS_SAVE_DIR) diff --git a/Normalisation/training/generate_term_embeddings.sh b/Normalisation/training/generate_term_embeddings.sh new file mode 100644 index 000000000..277dd37d2 --- /dev/null +++ b/Normalisation/training/generate_term_embeddings.sh @@ -0,0 +1,22 @@ +#!/bin/bash +#SBATCH --job-name=generate_umls_embeddings.sh +#SBATCH -t 48:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH -N1-1 +#SBATCH -c2 +#SBATCH --mem=40000 +#SBATCH -p gpuT4 +#SBATCH --output=./log/%x-%j.out +#SBATCH --error=./log/%x-%j.err +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable + +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script +# your code here : +echo starting +conda activate pierrenv +cd /export/home/cse200093/Jacques_Bio/normalisation/py_files +which python +python ./generate_term_embeddings.py +#export PYTHONPATH=/export/home/cse200093/nlstruct-master/:. +#which python +#python ./main.py diff --git a/Normalisation/training/generate_term_embeddings_coder_eds.py b/Normalisation/training/generate_term_embeddings_coder_eds.py new file mode 100644 index 000000000..6624e68ec --- /dev/null +++ b/Normalisation/training/generate_term_embeddings_coder_eds.py @@ -0,0 +1,104 @@ +import os +import sys + +from gensim import models + +sys.path.append("/export/home/cse200093/Jacques_Bio/normalisation/py_files") +import pickle + +import numpy as np +import pandas as pd +import torch +import tqdm +from load_umls import UMLS +from transformers import AutoConfig, AutoModel, AutoTokenizer +from unidecode import unidecode + +batch_size = 128 +device = "cuda:0" + +# Defining the model +# coder_all +model_checkpoint = ( + "/export/home/cse200093/Jacques_Bio/data_bio/coder_output/model_150000.pth" +) +tokenizer_path = "/export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29" +model = torch.load(model_checkpoint).to(device) +tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) + +# Defining data paths +DATA_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/json_annotated_normalisation_formatted/annotated_normalisation_formatted_train_umls_snomed.json" +EMBEDDINGS_SAVE_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/data_embeddings_normalized_snomed_coder_eds.pt" + +# Method to generate embeddings +def get_bert_embed( + phrase_list, m, tok, normalize=True, summary_method="CLS", tqdm_bar=False +): + input_ids = [] + for phrase in phrase_list: + input_ids.append( + tok.encode_plus( + phrase, + max_length=32, + add_special_tokens=True, + truncation=True, + pad_to_max_length=True, + )["input_ids"] + ) + m.eval() + + count = len(input_ids) + now_count = 0 + with torch.no_grad(): + if tqdm_bar: + pbar = tqdm.tqdm(total=count) + while now_count < count: + input_gpu_0 = torch.LongTensor( + input_ids[now_count : min(now_count + batch_size, count)] + ).to(device) + if summary_method == "CLS": + embed = m.bert(input_gpu_0)[1] + if summary_method == "MEAN": + embed = torch.mean(m.bert(input_gpu_0)[0], dim=1) + if normalize: + embed_norm = torch.norm(embed, p=2, dim=1, keepdim=True).clamp( + min=1e-12 + ) + embed = embed / embed_norm + if now_count == 0: + output = embed + else: + output = torch.cat((output, embed), dim=0) + if tqdm_bar: + pbar.update(min(now_count + batch_size, count) - now_count) + now_count = min(now_count + batch_size, count) + if tqdm_bar: + pbar.close() + return output + + +# Normalisation of words +def normalize(txt): + return unidecode( + txt.lower() + .replace("-", " ") + .replace("ag ", "antigene ") + .replace("ac ", "anticorps ") + .replace("antigenes ", "antigene ") + ) + + +# Loading our data to match +data_df = pd.read_json(DATA_DIR) +data_df["term"] = data_df["term"].apply(normalize) +# Merge same terms and keep all possible loincs +data_df = ( + data_df.groupby("term") + .agg({"term": "first", "annotation": set, "source": set}) + .reset_index(drop=True) +) + +umls_embedding = get_bert_embed( + data_df["term"].tolist(), model, tokenizer, tqdm_bar=False +) +torch.save(umls_embedding, EMBEDDINGS_SAVE_DIR) diff --git a/Normalisation/training/generate_term_embeddings_coder_eds.sh b/Normalisation/training/generate_term_embeddings_coder_eds.sh new file mode 100644 index 000000000..979085094 --- /dev/null +++ b/Normalisation/training/generate_term_embeddings_coder_eds.sh @@ -0,0 +1,22 @@ +#!/bin/bash +#SBATCH --job-name=generate_umls_embeddings_coder_eds.sh +#SBATCH -t 48:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH -N1-1 +#SBATCH -c2 +#SBATCH --mem=40000 +#SBATCH -p gpuT4 +#SBATCH --output=./log/%x-%j.out +#SBATCH --error=./log/%x-%j.err +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable + +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script +# your code here : +echo starting +conda activate pierrenv +cd /export/home/cse200093/Jacques_Bio/normalisation/py_files +which python +python ./generate_term_embeddings_coder_eds.py +#export PYTHONPATH=/export/home/cse200093/nlstruct-master/:. +#which python +#python ./main.py diff --git a/Normalisation/training/generate_term_embeddings_coder_eds_cased.py b/Normalisation/training/generate_term_embeddings_coder_eds_cased.py new file mode 100644 index 000000000..1c4f95bcb --- /dev/null +++ b/Normalisation/training/generate_term_embeddings_coder_eds_cased.py @@ -0,0 +1,106 @@ +import os +import sys + +from gensim import models + +sys.path.append("/export/home/cse200093/Jacques_Bio/normalisation/py_files") +import pickle + +import numpy as np +import pandas as pd +import torch +import tqdm +from load_umls import UMLS +from transformers import AutoConfig, AutoModel, AutoTokenizer +from unidecode import unidecode + +batch_size = 128 +device = "cuda:0" + +# Defining the model +# coder_all +model_checkpoint = ( + "/export/home/cse200093/Jacques_Bio/data_bio/coder_output/model_150000.pth" +) +tokenizer_path = "/export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29" +model = torch.load(model_checkpoint).to(device) +tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) + +# Defining data paths +DATA_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/json_annotated_normalisation_formatted/annotated_normalisation_formatted_train_umls_snomed.json" +EMBEDDINGS_SAVE_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/data_embeddings_normalized_snomed_coder_eds_2_cased.pt" + +# Method to generate embeddings +def get_bert_embed( + phrase_list, m, tok, normalize=True, summary_method="CLS", tqdm_bar=False +): + input_ids = [] + for phrase in phrase_list: + input_ids.append( + tok.encode_plus( + phrase, + max_length=32, + add_special_tokens=True, + truncation=True, + pad_to_max_length=True, + )["input_ids"] + ) + m.eval() + + count = len(input_ids) + now_count = 0 + with torch.no_grad(): + if tqdm_bar: + pbar = tqdm.tqdm(total=count) + while now_count < count: + input_gpu_0 = torch.LongTensor( + input_ids[now_count : min(now_count + batch_size, count)] + ).to(device) + if summary_method == "CLS": + embed = m.bert(input_gpu_0)[1] + if summary_method == "MEAN": + embed = torch.mean(m.bert(input_gpu_0)[0], dim=1) + if normalize: + embed_norm = torch.norm(embed, p=2, dim=1, keepdim=True).clamp( + min=1e-12 + ) + embed = embed / embed_norm + if now_count == 0: + output = embed + else: + output = torch.cat((output, embed), dim=0) + if tqdm_bar: + pbar.update(min(now_count + batch_size, count) - now_count) + now_count = min(now_count + batch_size, count) + if tqdm_bar: + pbar.close() + return output + + +# Normalisation of words +def normalize(txt): + return unidecode( + txt.replace("-", " ") + .replace("ag ", "antigene ") + .replace("ac ", "anticorps ") + .replace("antigenes ", "antigene ") + .replace("Ag ", "Antigene ") + .replace("Ac ", "Anticorps ") + .replace("Antigenes ", "Antigene ") + ) + + +# Loading our data to match +data_df = pd.read_json(DATA_DIR) +data_df["term"] = data_df["term"].apply(normalize) +# Merge same terms and keep all possible loincs +data_df = ( + data_df.groupby("term") + .agg({"term": "first", "annotation": set, "source": set}) + .reset_index(drop=True) +) + +umls_embedding = get_bert_embed( + data_df["term"].tolist(), model, tokenizer, tqdm_bar=False +) +torch.save(umls_embedding, EMBEDDINGS_SAVE_DIR) diff --git a/Normalisation/training/generate_term_embeddings_coder_eds_cased.sh b/Normalisation/training/generate_term_embeddings_coder_eds_cased.sh new file mode 100644 index 000000000..b921d9de4 --- /dev/null +++ b/Normalisation/training/generate_term_embeddings_coder_eds_cased.sh @@ -0,0 +1,22 @@ +#!/bin/bash +#SBATCH --job-name=generate_umls_embeddings_coder_eds_cased.sh +#SBATCH -t 48:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH -N1-1 +#SBATCH -c2 +#SBATCH --mem=40000 +#SBATCH -p gpuT4 +#SBATCH --output=./log/%x-%j.out +#SBATCH --error=./log/%x-%j.err +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable + +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script +# your code here : +echo starting +conda activate pierrenv +cd /export/home/cse200093/Jacques_Bio/normalisation/py_files +which python +python ./generate_term_embeddings_coder_eds_cased.py +#export PYTHONPATH=/export/home/cse200093/nlstruct-master/:. +#which python +#python ./main.py diff --git a/Normalisation/training/generate_umls_embeddings.py b/Normalisation/training/generate_umls_embeddings.py new file mode 100644 index 000000000..ca35acf08 --- /dev/null +++ b/Normalisation/training/generate_umls_embeddings.py @@ -0,0 +1,115 @@ +print("BONJOUR") + +import os +import sys + +from gensim import models + +sys.path.append("/export/home/cse200093/Jacques_Bio/normalisation/py_files") +import pickle + +import numpy as np +import torch +import tqdm +from load_umls import UMLS +from transformers import AutoConfig, AutoModel, AutoTokenizer + +batch_size = 128 +device = "cuda:0" + +# Defining the model +# coder_all +model_checkpoint = "/export/home/cse200093/coder_all" +tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) +model = AutoModel.from_pretrained(model_checkpoint).to(device) + +# Defining save paths +DES_SAVE_DIR = ( + "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_des.pkl" +) +LABEL_SAVE_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_label.pkl" +EMBEDDINGS_SAVE_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_embeddings.pt" + +# Method to generate embeddings +def get_bert_embed( + phrase_list, m, tok, normalize=True, summary_method="CLS", tqdm_bar=False +): + input_ids = [] + for phrase in phrase_list: + input_ids.append( + tok.encode_plus( + phrase, + max_length=32, + add_special_tokens=True, + truncation=True, + pad_to_max_length=True, + )["input_ids"] + ) + m.eval() + + count = len(input_ids) + now_count = 0 + with torch.no_grad(): + if tqdm_bar: + pbar = tqdm.tqdm(total=count) + while now_count < count: + input_gpu_0 = torch.LongTensor( + input_ids[now_count : min(now_count + batch_size, count)] + ).to(device) + if summary_method == "CLS": + embed = m(input_gpu_0)[1] + if summary_method == "MEAN": + embed = torch.mean(m(input_gpu_0)[0], dim=1) + if normalize: + embed_norm = torch.norm(embed, p=2, dim=1, keepdim=True).clamp( + min=1e-12 + ) + embed = embed / embed_norm + if now_count == 0: + output = embed + else: + output = torch.cat((output, embed), dim=0) + if tqdm_bar: + pbar.update(min(now_count + batch_size, count) - now_count) + now_count = min(now_count + batch_size, count) + if tqdm_bar: + pbar.close() + return output + + +def get_umls(): + umls_label = [] + umls_label_set = set() + umls_des = [] + umls = UMLS( + "/export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB", + lang_range=["ENG", "FRA"], + only_load_dict=True, + ) + umls.load_sty() + umls_sty = umls.cui2sty + for cui in tqdm.tqdm(umls.cui2str): + if not cui in umls_label_set and umls_sty[cui] == "Laboratory Procedure": + tmp_str = list(umls.cui2str[cui]) + umls_label.extend([cui] * len(tmp_str)) + umls_des.extend(tmp_str) + umls_label_set.update([cui]) + print(len(umls_des)) + return umls_label, umls_des + + +umls_label, umls_des = get_umls() +umls_embedding = get_bert_embed(umls_des, model, tokenizer, tqdm_bar=False) + +"""# Save embeddings +torch.save(umls_embedding, EMBEDDINGS_SAVE_DIR) + +# Save umls_des +open_file = open(DES_SAVE_DIR, "wb") +pickle.dump(umls_des, open_file) +open_file.close() + +# Save umls_labels +open_file = open(LABEL_SAVE_DIR, "wb") +pickle.dump(umls_label, open_file) +open_file.close()""" diff --git a/Normalisation/training/generate_umls_embeddings.sh b/Normalisation/training/generate_umls_embeddings.sh new file mode 100644 index 000000000..a22ed4f59 --- /dev/null +++ b/Normalisation/training/generate_umls_embeddings.sh @@ -0,0 +1,22 @@ +#!/bin/bash +#SBATCH --job-name=generate_umls_embeddings.sh +#SBATCH -t 48:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH -N1-1 +#SBATCH -c2 +#SBATCH --mem=40000 +#SBATCH -p gpuT4 +#SBATCH --output=./log/%x-%j.out +#SBATCH --error=./log/%x-%j.err +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable + +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script +# your code here : +echo starting +conda activate pierrenv +cd /export/home/cse200093/Jacques_Bio/normalisation/py_files +which python +python ./generate_umls_embeddings.py +#export PYTHONPATH=/export/home/cse200093/nlstruct-master/:. +#which python +#python ./main.py diff --git a/Normalisation/training/generate_umls_normalized_embeddings.py b/Normalisation/training/generate_umls_normalized_embeddings.py new file mode 100644 index 000000000..b40813721 --- /dev/null +++ b/Normalisation/training/generate_umls_normalized_embeddings.py @@ -0,0 +1,174 @@ +import os +import pickle +import re +import sys + +import numpy as np +import pandas as pd +import torch +import tqdm +from gensim import models +from transformers import AutoConfig, AutoModel, AutoTokenizer +from unidecode import unidecode + +batch_size = 128 +device = "cuda:0" + +# Defining the model +# coder_all +model_checkpoint = ( + "/export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29" +) +tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) +model = AutoModel.from_pretrained(model_checkpoint).to(device) + +# UMLS path +UMLS_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_umls_synonyms/bio_str_SNOMEDCT_US.json" + +# Defining save paths +DES_SAVE_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_normalized_des_camembert_eds.pkl" +LABEL_SAVE_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_normalized_label_camembert_eds.pkl" +EMBEDDINGS_SAVE_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_normalized_embeddings_camembert_eds.pt" + +# Method to generate embeddings +def get_bert_embed( + phrase_list, m, tok, normalize=True, summary_method="CLS", tqdm_bar=False +): + input_ids = [] + for phrase in phrase_list: + input_ids.append( + tok.encode_plus( + phrase, + max_length=32, + add_special_tokens=True, + truncation=True, + pad_to_max_length=True, + )["input_ids"] + ) + m.eval() + + count = len(input_ids) + now_count = 0 + with torch.no_grad(): + if tqdm_bar: + pbar = tqdm.tqdm(total=count) + while now_count < count: + input_gpu_0 = torch.LongTensor( + input_ids[now_count : min(now_count + batch_size, count)] + ).to(device) + if summary_method == "CLS": + embed = m(input_gpu_0)[1] + if summary_method == "MEAN": + embed = torch.mean(m(input_gpu_0)[0], dim=1) + if normalize: + embed_norm = torch.norm(embed, p=2, dim=1, keepdim=True).clamp( + min=1e-12 + ) + embed = embed / embed_norm + if now_count == 0: + output = embed + else: + output = torch.cat((output, embed), dim=0) + if tqdm_bar: + pbar.update(min(now_count + batch_size, count) - now_count) + now_count = min(now_count + batch_size, count) + if tqdm_bar: + pbar.close() + return output + + +# Normalisation of words +def normalize(txt): + return unidecode( + txt.lower() + .replace("-", " ") + .replace("ag ", "antigene ") + .replace("ac ", "anticorps ") + .replace("antigenes ", "antigene ") + ) + + +umls_raw = pd.read_json(UMLS_DIR) +umls_raw["STR"] = umls_raw["STR"].apply(normalize) + +words_to_remove = [ + "for", + "assay", + "by", + "tests", + "minute", + "exam", + "with", + "human", + "moyenne", + "in", + "to", + "from", + "analyse", + "test", + "level", + "fluid", + "laboratory", + "determination", + "examination", + "releasing", + "quantitative", + "screening", + "and", + "exploration", + "factor", + "method", + "analysis", + "laboratoire", + "specimen", + "or", + "typing", + "of", + "concentration", + "measurement", + "detection", + "procedure", + "identification", + "numeration", + "hour", + "retired", + "technique", + "count", +] + +# Second step of normalization: we remove stop words and special characters with regex +regex_words_to_remove = r"\b(?:" + "|".join(words_to_remove) + r")\b" +regex_remove_special_characters = "[^a-zA-Z0-9\s]" + +umls_raw["STR"] = umls_raw["STR"].apply( + lambda syn: re.compile(regex_words_to_remove).sub("", syn) +) +umls_raw["STR"] = umls_raw["STR"].apply( + lambda syn: re.compile(regex_remove_special_characters).sub("", syn) +) +umls_raw["STR"] = umls_raw["STR"].apply(lambda syn: re.sub(" +", " ", syn).strip()) + +umls_raw = ( + umls_raw.loc[(~umls_raw["STR"].str.isnumeric()) & (umls_raw["STR"] != "")] + .groupby(["STR"]) + .agg({"CUI": set, "STR": "first"}) + .reset_index(drop=True) +) +umls_label = umls_raw["CUI"] +umls_des = umls_raw["STR"] + +print("Starting embeddings generation...") +umls_embedding = get_bert_embed(umls_des, model, tokenizer, tqdm_bar=True) + +# Save embeddings +torch.save(umls_embedding, EMBEDDINGS_SAVE_DIR) + +# Save umls_des +open_file = open(DES_SAVE_DIR, "wb") +pickle.dump(umls_des, open_file) +open_file.close() + +# Save umls_labels +open_file = open(LABEL_SAVE_DIR, "wb") +pickle.dump(umls_label, open_file) +open_file.close() diff --git a/Normalisation/training/generate_umls_normalized_embeddings.sh b/Normalisation/training/generate_umls_normalized_embeddings.sh new file mode 100644 index 000000000..e8b419dff --- /dev/null +++ b/Normalisation/training/generate_umls_normalized_embeddings.sh @@ -0,0 +1,25 @@ +#!/bin/bash +#SBATCH --job-name=generate_umls_normalized_embeddings.sh +#SBATCH -t 24:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH -N1-1 +#SBATCH -c2 +#SBATCH --mem=40000 +#SBATCH -p gpuT4 +#SBATCH -w bbs-edsg28-p012 +#SBATCH --output=./log/%x-%j.out +#SBATCH --error=./log/%x-%j.err +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable + +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script +# your code here : +echo starting +conda activate pierrenv +#cd /export/home/cse200093/Jacques_Bio/normalisation/py_files +which python +pwd +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/generate_umls_normalized_embeddings.py +echo end +#export PYTHONPATH=/export/home/cse200093/nlstruct-master/:. +#which python +#python ./main.py diff --git a/Normalisation/training/generate_umls_normalized_embeddings_coder_eds.py b/Normalisation/training/generate_umls_normalized_embeddings_coder_eds.py new file mode 100644 index 000000000..88e7f2721 --- /dev/null +++ b/Normalisation/training/generate_umls_normalized_embeddings_coder_eds.py @@ -0,0 +1,175 @@ +import os +import pickle +import re +import sys + +import numpy as np +import pandas as pd +import torch +import tqdm +from gensim import models +from transformers import AutoConfig, AutoModel, AutoTokenizer +from unidecode import unidecode + +batch_size = 128 +device = "cuda:0" + +# Defining the model +# coder_eds +model_checkpoint = ( + "/export/home/cse200093/Jacques_Bio/data_bio/coder_output/model_150000.pth" +) +tokenizer_path = "/export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29" +model = torch.load(model_checkpoint).to(device) +tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) + +# UMLS path +UMLS_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_umls_synonyms/bio_str_SNOMEDCT_US.json" + +# Defining save paths +DES_SAVE_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_normalized_des_coder_eds.pkl" +LABEL_SAVE_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_normalized_label_coder_eds.pkl" +EMBEDDINGS_SAVE_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_normalized_embeddings_coder_eds.pt" + +# Method to generate embeddings +def get_bert_embed( + phrase_list, m, tok, normalize=True, summary_method="CLS", tqdm_bar=False +): + input_ids = [] + for phrase in phrase_list: + input_ids.append( + tok.encode_plus( + phrase, + max_length=32, + add_special_tokens=True, + truncation=True, + pad_to_max_length=True, + )["input_ids"] + ) + m.eval() + + count = len(input_ids) + now_count = 0 + with torch.no_grad(): + if tqdm_bar: + pbar = tqdm.tqdm(total=count) + while now_count < count: + input_gpu_0 = torch.LongTensor( + input_ids[now_count : min(now_count + batch_size, count)] + ).to(device) + if summary_method == "CLS": + embed = m.bert(input_gpu_0)[1] + if summary_method == "MEAN": + embed = torch.mean(m.bert(input_gpu_0)[0], dim=1) + if normalize: + embed_norm = torch.norm(embed, p=2, dim=1, keepdim=True).clamp( + min=1e-12 + ) + embed = embed / embed_norm + if now_count == 0: + output = embed + else: + output = torch.cat((output, embed), dim=0) + if tqdm_bar: + pbar.update(min(now_count + batch_size, count) - now_count) + now_count = min(now_count + batch_size, count) + if tqdm_bar: + pbar.close() + return output + + +# Normalisation of words +def normalize(txt): + return unidecode( + txt.lower() + .replace("-", " ") + .replace("ag ", "antigene ") + .replace("ac ", "anticorps ") + .replace("antigenes ", "antigene ") + ) + + +umls_raw = pd.read_json(UMLS_DIR) +umls_raw["STR"] = umls_raw["STR"].apply(normalize) + +words_to_remove = [ + "for", + "assay", + "by", + "tests", + "minute", + "exam", + "with", + "human", + "moyenne", + "in", + "to", + "from", + "analyse", + "test", + "level", + "fluid", + "laboratory", + "determination", + "examination", + "releasing", + "quantitative", + "screening", + "and", + "exploration", + "factor", + "method", + "analysis", + "laboratoire", + "specimen", + "or", + "typing", + "of", + "concentration", + "measurement", + "detection", + "procedure", + "identification", + "numeration", + "hour", + "retired", + "technique", + "count", +] + +# Second step of normalization: we remove stop words and special characters with regex +regex_words_to_remove = r"\b(?:" + "|".join(words_to_remove) + r")\b" +regex_remove_special_characters = "[^a-zA-Z0-9\s]" + +umls_raw["STR"] = umls_raw["STR"].apply( + lambda syn: re.compile(regex_words_to_remove).sub("", syn) +) +umls_raw["STR"] = umls_raw["STR"].apply( + lambda syn: re.compile(regex_remove_special_characters).sub("", syn) +) +umls_raw["STR"] = umls_raw["STR"].apply(lambda syn: re.sub(" +", " ", syn).strip()) + +umls_raw = ( + umls_raw.loc[(~umls_raw["STR"].str.isnumeric()) & (umls_raw["STR"] != "")] + .groupby(["STR"]) + .agg({"CUI": set, "STR": "first"}) + .reset_index(drop=True) +) +umls_label = umls_raw["CUI"] +umls_des = umls_raw["STR"] + +print("Starting embeddings generation...") +umls_embedding = get_bert_embed(umls_des, model, tokenizer, tqdm_bar=True) + +# Save embeddings +torch.save(umls_embedding, EMBEDDINGS_SAVE_DIR) + +# Save umls_des +open_file = open(DES_SAVE_DIR, "wb") +pickle.dump(umls_des, open_file) +open_file.close() + +# Save umls_labels +open_file = open(LABEL_SAVE_DIR, "wb") +pickle.dump(umls_label, open_file) +open_file.close() diff --git a/Normalisation/training/generate_umls_normalized_embeddings_coder_eds.sh b/Normalisation/training/generate_umls_normalized_embeddings_coder_eds.sh new file mode 100644 index 000000000..b9535ac5d --- /dev/null +++ b/Normalisation/training/generate_umls_normalized_embeddings_coder_eds.sh @@ -0,0 +1,25 @@ +#!/bin/bash +#SBATCH --job-name=generate_umls_normalized_embeddings_coder_eds.sh +#SBATCH -t 24:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH -N1-1 +#SBATCH -c2 +#SBATCH --mem=40000 +#SBATCH -p gpuT4 +#SBATCH -w bbs-edsg28-p009 +#SBATCH --output=./log/%x-%j.out +#SBATCH --error=./log/%x-%j.err +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable + +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script +# your code here : +echo starting +conda activate pierrenv +#cd /export/home/cse200093/Jacques_Bio/normalisation/py_files +which python +pwd +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/generate_umls_normalized_embeddings_coder_eds.py +echo end +#export PYTHONPATH=/export/home/cse200093/nlstruct-master/:. +#which python +#python ./main.py diff --git a/Normalisation/training/generate_umls_normalized_embeddings_coder_eds_cased.py b/Normalisation/training/generate_umls_normalized_embeddings_coder_eds_cased.py new file mode 100644 index 000000000..0a3e89943 --- /dev/null +++ b/Normalisation/training/generate_umls_normalized_embeddings_coder_eds_cased.py @@ -0,0 +1,177 @@ +import os +import pickle +import re +import sys + +import numpy as np +import pandas as pd +import torch +import tqdm +from gensim import models +from transformers import AutoConfig, AutoModel, AutoTokenizer +from unidecode import unidecode + +batch_size = 128 +device = "cuda:0" + +# Defining the model +# coder_eds +model_checkpoint = ( + "/export/home/cse200093/Jacques_Bio/data_bio/coder_output/model_150000.pth" +) +tokenizer_path = "/export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29" +model = torch.load(model_checkpoint).to(device) +tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) + +# UMLS path +UMLS_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_umls_synonyms/bio_str_SNOMEDCT_US.json" + +# Defining save paths +DES_SAVE_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_normalized_des_coder_eds_2_cased.pkl" +LABEL_SAVE_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_normalized_label_coder_eds_2_cased.pkl" +EMBEDDINGS_SAVE_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_normalized_embeddings_coder_eds_2_cased.pt" + +# Method to generate embeddings +def get_bert_embed( + phrase_list, m, tok, normalize=True, summary_method="CLS", tqdm_bar=False +): + input_ids = [] + for phrase in phrase_list: + input_ids.append( + tok.encode_plus( + phrase, + max_length=32, + add_special_tokens=True, + truncation=True, + pad_to_max_length=True, + )["input_ids"] + ) + m.eval() + + count = len(input_ids) + now_count = 0 + with torch.no_grad(): + if tqdm_bar: + pbar = tqdm.tqdm(total=count) + while now_count < count: + input_gpu_0 = torch.LongTensor( + input_ids[now_count : min(now_count + batch_size, count)] + ).to(device) + if summary_method == "CLS": + embed = m.bert(input_gpu_0)[1] + if summary_method == "MEAN": + embed = torch.mean(m.bert(input_gpu_0)[0], dim=1) + if normalize: + embed_norm = torch.norm(embed, p=2, dim=1, keepdim=True).clamp( + min=1e-12 + ) + embed = embed / embed_norm + if now_count == 0: + output = embed + else: + output = torch.cat((output, embed), dim=0) + if tqdm_bar: + pbar.update(min(now_count + batch_size, count) - now_count) + now_count = min(now_count + batch_size, count) + if tqdm_bar: + pbar.close() + return output + + +# Normalisation of words +def normalize(txt): + return unidecode( + txt.replace("-", " ") + .replace("ag ", "antigene ") + .replace("ac ", "anticorps ") + .replace("antigenes ", "antigene ") + .replace("Ag ", "Antigene ") + .replace("Ac ", "Anticorps ") + .replace("Antigenes ", "Antigene ") + ) + + +umls_raw = pd.read_json(UMLS_DIR) +umls_raw["STR"] = umls_raw["STR"].apply(normalize) + +words_to_remove = [ + "for", + "assay", + "by", + "tests", + "minute", + "exam", + "with", + "human", + "moyenne", + "in", + "to", + "from", + "analyse", + "test", + "level", + "fluid", + "laboratory", + "determination", + "examination", + "releasing", + "quantitative", + "screening", + "and", + "exploration", + "factor", + "method", + "analysis", + "laboratoire", + "specimen", + "or", + "typing", + "of", + "concentration", + "measurement", + "detection", + "procedure", + "identification", + "numeration", + "hour", + "retired", + "technique", + "count", +] + +# Second step of normalization: we remove stop words and special characters with regex +regex_words_to_remove = r"\b(?:" + "|".join(words_to_remove) + r")\b" +regex_remove_special_characters = "[^a-zA-Z0-9\s]" + +umls_raw["STR"] = umls_raw["STR"].apply( + lambda syn: re.compile(regex_words_to_remove, re.IGNORECASE).sub("", syn) +) +umls_raw["STR"] = umls_raw["STR"].apply( + lambda syn: re.compile(regex_remove_special_characters, re.IGNORECASE).sub("", syn) +) +umls_raw["STR"] = umls_raw["STR"].apply(lambda syn: re.sub(" +", " ", syn).strip()) + +umls_raw = ( + umls_raw.loc[(~umls_raw["STR"].str.isnumeric()) & (umls_raw["STR"] != "")] + .groupby(["STR"]) + .agg({"CUI": set, "STR": "first"}) + .reset_index(drop=True) +) +umls_label = umls_raw["CUI"] +umls_des = umls_raw["STR"] + +print("Starting embeddings generation...") +umls_embedding = get_bert_embed(umls_des, model, tokenizer, tqdm_bar=True) + +# Save embeddings +torch.save(umls_embedding, EMBEDDINGS_SAVE_DIR) + +# Save umls_des +open_file = open(DES_SAVE_DIR, "wb") +pickle.dump(umls_des, open_file) +open_file.close() + +# Save umls_labels +open_file = open(LABEL_SAVE_DIR, "wb") +pickle.dump(umls_label, open_file) +open_file.close() diff --git a/Normalisation/training/generate_umls_normalized_embeddings_coder_eds_cased.sh b/Normalisation/training/generate_umls_normalized_embeddings_coder_eds_cased.sh new file mode 100644 index 000000000..6edb1868a --- /dev/null +++ b/Normalisation/training/generate_umls_normalized_embeddings_coder_eds_cased.sh @@ -0,0 +1,25 @@ +#!/bin/bash +#SBATCH --job-name=generate_umls_normalized_embeddings_coder_eds_cased.sh +#SBATCH -t 24:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH -N1-1 +#SBATCH -c2 +#SBATCH --mem=40000 +#SBATCH -p gpuT4 +#SBATCH -w bbs-edsg28-p009 +#SBATCH --output=./log/%x-%j.out +#SBATCH --error=./log/%x-%j.err +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable + +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script +# your code here : +echo starting +conda activate pierrenv +#cd /export/home/cse200093/Jacques_Bio/normalisation/py_files +which python +pwd +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/generate_umls_normalized_embeddings_coder_eds_cased.py +echo end +#export PYTHONPATH=/export/home/cse200093/nlstruct-master/:. +#which python +#python ./main.py diff --git a/Normalisation/training/get_matches.py b/Normalisation/training/get_matches.py new file mode 100644 index 000000000..734bed0dd --- /dev/null +++ b/Normalisation/training/get_matches.py @@ -0,0 +1,35 @@ +import os +import pickle +import sys + +import numpy as np +import pandas as pd +import torch +import tqdm +from gensim import models +from transformers import AutoConfig, AutoModel, AutoTokenizer + +device = "cpu" +EMBEDDINGS_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_normalized_embeddings_coder_eds.pt" +RES_EMBEDDINGS_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/data_embeddings_normalized_snomed_coder_eds.pt" +LABEL_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_normalized_label_coder_eds.pkl" +DES_DIR = "/export/home/cse200093/Jacques_Bio/data_bio/normalisation_embeddings/umls_normalized_des_coder_eds.pkl" + +# LOAD UMLS EMBEDDINGS ALREADY GENERATED +umls_embeddings = torch.load(EMBEDDINGS_DIR, map_location=torch.device(device)) +# LOAD EMBEDDINGS FROM ANNOTATED DATASET ALREADY GENERATED +res_embeddings = torch.load(RES_EMBEDDINGS_DIR, map_location=torch.device(device)) + +# LOAD CORRESPONDANCE FILE BETWEEN EMBEDDINGS AND CUIS AND DES +with open(LABEL_DIR, "rb") as f: + umls_labels = pickle.load(f) + +with open(DES_DIR, "rb") as f: + umls_des = pickle.load(f) + +sim = torch.matmul(res_embeddings, umls_embeddings.t()) +most_similar = torch.max(sim, dim=1)[1].tolist() +label_matches = [umls_labels[idx] for idx in most_similar] +des_matches = [umls_des[idx] for idx in most_similar] +print(label_matches) +print(des_matches) diff --git a/Normalisation/training/get_matches.sh b/Normalisation/training/get_matches.sh new file mode 100644 index 000000000..61a5375d0 --- /dev/null +++ b/Normalisation/training/get_matches.sh @@ -0,0 +1,22 @@ +#!/bin/bash +#SBATCH --job-name=get_matches.sh +#SBATCH -t 48:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH -N1-1 +#SBATCH -c2 +#SBATCH --mem=40000 +#SBATCH -p gpuT4 +#SBATCH --output=./log/%x-%j.out +#SBATCH --error=./log/%x-%j.err +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable + +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script +# your code here : +echo starting +conda activate pierrenv +cd /export/home/cse200093/Jacques_Bio/normalisation/py_files +which python +python ./get_matches.py +#export PYTHONPATH=/export/home/cse200093/nlstruct-master/:. +#which python +#python ./main.py diff --git a/Normalisation/training/load_umls.py b/Normalisation/training/load_umls.py new file mode 100644 index 000000000..6f0c7fe14 --- /dev/null +++ b/Normalisation/training/load_umls.py @@ -0,0 +1,186 @@ +import os +import re +from random import shuffle + +from tqdm import tqdm + +# import ipdb + + +def byLineReader(filename): + with open(filename, "r", encoding="utf-8") as f: + line = f.readline() + while line: + yield line + line = f.readline() + return + + +class UMLS(object): + def __init__( + self, umls_path, source_range=None, lang_range=["ENG"], only_load_dict=False + ): + self.umls_path = umls_path + self.source_range = source_range + self.lang_range = lang_range + self.detect_type() + self.load() + if not only_load_dict: + self.load_rel() + self.load_sty() + + def detect_type(self): + if os.path.exists(os.path.join(self.umls_path, "MRCONSO.RRF")): + self.type = "RRF" + else: + self.type = "txt" + + def load(self): + reader = byLineReader(os.path.join(self.umls_path, "MRCONSO." + self.type)) + self.lui_set = set() + self.cui2str = {} + self.str2cui = {} + self.code2cui = {} + # self.lui_status = {} + read_count = 0 + for line in tqdm(reader, ascii=True): + if self.type == "txt": + l = [t.replace('"', "") for t in line.split(",")] + else: + l = line.strip().split("|") + if len(l) < 3: + continue + cui = l[0] + lang = l[1] + # lui_status = l[2].lower() # p -> preferred + lui = l[3] + source = l[11] + code = l[13] + string = l[14] + + if (self.source_range is None or source in self.source_range) and ( + self.lang_range is None or lang in self.lang_range + ): + if not lui in self.lui_set: + read_count += 1 + self.str2cui[string] = cui + self.str2cui[string.lower()] = cui + clean_string = self.clean(string) + self.str2cui[clean_string] = cui + + if not cui in self.cui2str: + self.cui2str[cui] = set() + self.cui2str[cui].update([clean_string]) + self.code2cui[code] = cui + self.lui_set.update([lui]) + + # For debug + # if read_count > 1000: + # break + + self.cui = list(self.cui2str.keys()) + shuffle(self.cui) + self.cui_count = len(self.cui) + + print("cui count:", self.cui_count) + print("str2cui count:", len(self.str2cui)) + print("MRCONSO count:", read_count) + + def load_rel(self): + reader = byLineReader(os.path.join(self.umls_path, "MRREL." + self.type)) + self.rel = set() + for line in tqdm(reader, ascii=True): + if self.type == "txt": + l = [t.replace('"', "") for t in line.split(",")] + else: + l = line.strip().split("|") + cui0 = l[0] + re = l[3] + cui1 = l[4] + rel = l[7] + if cui0 in self.cui2str and cui1 in self.cui2str: + str_rel = "\t".join([cui0, cui1, re, rel]) + if not str_rel in self.rel and cui0 != cui1: + self.rel.update([str_rel]) + + # For debug + # if len(self.rel) > 1000: + # break + self.rel = list(self.rel) + + print("rel count:", len(self.rel)) + + def load_sty(self): + reader = byLineReader(os.path.join(self.umls_path, "MRSTY." + self.type)) + self.cui2sty = {} + for line in tqdm(reader, ascii=True): + if self.type == "txt": + l = [t.replace('"', "") for t in line.split(",")] + else: + l = line.strip().split("|") + cui = l[0] + sty = l[3] + if cui in self.cui2str: + self.cui2sty[cui] = sty + + print("sty count:", len(self.cui2sty)) + + def clean( + self, term, lower=True, clean_NOS=True, clean_bracket=True, clean_dash=True + ): + term = " " + term + " " + if lower: + term = term.lower() + if clean_NOS: + term = term.replace(" NOS ", " ").replace(" nos ", " ") + if clean_bracket: + term = re.sub("\\(.*?\\)", "", term) + if clean_dash: + term = term.replace("-", " ") + term = " ".join([w for w in term.split() if w]) + return term + + def search_by_code(self, code): + if code in self.cui2str: + return list(self.cui2str[code]) + if code in self.code2cui: + return list(self.cui2str[self.code2cui[code]]) + return None + + def search_by_string_list(self, string_list): + for string in string_list: + if string in self.str2cui: + find_string = self.cui2str[self.str2cui[string]] + return [string for string in find_string if not string in string_list] + if string.lower() in self.str2cui: + find_string = self.cui2str[self.str2cui[string.lower()]] + return [string for string in find_string if not string in string_list] + return None + + def search(self, code=None, string_list=None, max_number=-1): + result_by_code = self.search_by_code(code) + if result_by_code is not None: + if max_number > 0: + return result_by_code[0 : min(len(result_by_code), max_number)] + return result_by_code + return None + result_by_string = self.search_by_string_list(string_list) + if result_by_string is not None: + if max_number > 0: + return result_by_string[0 : min(len(result_by_string), max_number)] + return result_by_string + return None + + +if __name__ == "__main__": + umls = UMLS("E:\\code\\research\\umls") + # print(umls.search_by_code("282299006")) + # print(umls.search_by_string_list(["Backache", "aching muscles in back"])) + # print(umls.search(code="95891005", max_number=10)) + # ipdb.set_trace() + +""" +['unable to balance', 'loss of balance'] +['backache', 'back pain', 'dorsalgi', 'dorsodynia', 'pain over the back', 'back pain [disease/finding]', 'back ache', 'dorsal back pain', 'backach', 'dorsalgia', 'dorsal pain', 'notalgia', 'unspecified back pain', 'backpain', 'backache symptom'] +['influenza like illness', 'flu-like illness', 'influenza-like illness'] +""" diff --git a/Normalisation/training/load_umls_normalized.py b/Normalisation/training/load_umls_normalized.py new file mode 100644 index 000000000..277aa9a6c --- /dev/null +++ b/Normalisation/training/load_umls_normalized.py @@ -0,0 +1,208 @@ +import os +import re +from random import shuffle + +from tqdm import tqdm + +# import ipdb + +### THIS LOADING PROCESS DEFERS FROM load_umls.py IN THE WAY THAT source_range LETS YOU +### SELECT ALL SYNONYMS FROM ONE CUI IF THIS CUI HAVE AT LEAST ONE SYNONYM +### FROM A SOURCE OF source_range +### THUS, ALL SYNONYMS ARE NOT FROM THE SOURCES OF source_range + + +def byLineReader(filename): + with open(filename, "r", encoding="utf-8") as f: + line = f.readline() + while line: + yield line + line = f.readline() + return + + +class UMLS(object): + def __init__( + self, umls_path, source_range=None, lang_range=["ENG"], only_load_dict=False + ): + self.umls_path = umls_path + self.source_range = source_range + self.lang_range = lang_range + self.detect_type() + self.load() + if not only_load_dict: + self.load_rel() + self.load_sty() + + def detect_type(self): + if os.path.exists(os.path.join(self.umls_path, "MRCONSO.RRF")): + self.type = "RRF" + else: + self.type = "txt" + + def load(self): + reader = byLineReader(os.path.join(self.umls_path, "MRCONSO." + self.type)) + self.lui_set = set() + self.cui2str = {} + self.str2cui = {} + self.code2cui = {} + # self.lui_status = {} + + # Select all CUIs which have at least one synonym in source_range + cuis2keep = [] + if self.source_range is not None: + for line in tqdm(reader, ascii=True): + if self.type == "txt": + l = [t.replace('"', "") for t in line.split(",")] + else: + l = line.strip().split("|") + if len(l) < 3: + continue + cui = l[0] + source = l[11] + if source in self.source_range: + cuis2keep.append(cui) + + reader = byLineReader(os.path.join(self.umls_path, "MRCONSO." + self.type)) + read_count = 0 + for line in tqdm(reader, ascii=True): + if self.type == "txt": + l = [t.replace('"', "") for t in line.split(",")] + else: + l = line.strip().split("|") + if len(l) < 3: + continue + cui = l[0] + lang = l[1] + # lui_status = l[2].lower() # p -> preferred + lui = l[3] + source = l[11] + code = l[13] + string = l[14] + + if (self.source_range is None or source in cuis2keep) and ( + self.lang_range is None or lang in self.lang_range + ): + if not lui in self.lui_set: + read_count += 1 + self.str2cui[string] = cui + self.str2cui[string.lower()] = cui + clean_string = self.clean(string) + self.str2cui[clean_string] = cui + + if not cui in self.cui2str: + self.cui2str[cui] = set() + self.cui2str[cui].update([clean_string]) + self.code2cui[code] = cui + self.lui_set.update([lui]) + + # For debug + # if read_count > 1000: + # break + + self.cui = list(self.cui2str.keys()) + shuffle(self.cui) + self.cui_count = len(self.cui) + + print("cui count:", self.cui_count) + print("str2cui count:", len(self.str2cui)) + print("MRCONSO count:", read_count) + + def load_rel(self): + reader = byLineReader(os.path.join(self.umls_path, "MRREL." + self.type)) + self.rel = set() + for line in tqdm(reader, ascii=True): + if self.type == "txt": + l = [t.replace('"', "") for t in line.split(",")] + else: + l = line.strip().split("|") + cui0 = l[0] + re = l[3] + cui1 = l[4] + rel = l[7] + if cui0 in self.cui2str and cui1 in self.cui2str: + str_rel = "\t".join([cui0, cui1, re, rel]) + if not str_rel in self.rel and cui0 != cui1: + self.rel.update([str_rel]) + + # For debug + # if len(self.rel) > 1000: + # break + self.rel = list(self.rel) + + print("rel count:", len(self.rel)) + + def load_sty(self): + reader = byLineReader(os.path.join(self.umls_path, "MRSTY." + self.type)) + self.cui2sty = {} + for line in tqdm(reader, ascii=True): + if self.type == "txt": + l = [t.replace('"', "") for t in line.split(",")] + else: + l = line.strip().split("|") + cui = l[0] + sty = l[3] + if cui in self.cui2str: + self.cui2sty[cui] = sty + + print("sty count:", len(self.cui2sty)) + + def clean( + self, term, lower=True, clean_NOS=True, clean_bracket=True, clean_dash=True + ): + term = " " + term + " " + if lower: + term = term.lower() + if clean_NOS: + term = term.replace(" NOS ", " ").replace(" nos ", " ") + if clean_bracket: + term = re.sub("\\(.*?\\)", "", term) + if clean_dash: + term = term.replace("-", " ") + term = " ".join([w for w in term.split() if w]) + return term + + def search_by_code(self, code): + if code in self.cui2str: + return list(self.cui2str[code]) + if code in self.code2cui: + return list(self.cui2str[self.code2cui[code]]) + return None + + def search_by_string_list(self, string_list): + for string in string_list: + if string in self.str2cui: + find_string = self.cui2str[self.str2cui[string]] + return [string for string in find_string if not string in string_list] + if string.lower() in self.str2cui: + find_string = self.cui2str[self.str2cui[string.lower()]] + return [string for string in find_string if not string in string_list] + return None + + def search(self, code=None, string_list=None, max_number=-1): + result_by_code = self.search_by_code(code) + if result_by_code is not None: + if max_number > 0: + return result_by_code[0 : min(len(result_by_code), max_number)] + return result_by_code + return None + result_by_string = self.search_by_string_list(string_list) + if result_by_string is not None: + if max_number > 0: + return result_by_string[0 : min(len(result_by_string), max_number)] + return result_by_string + return None + + +if __name__ == "__main__": + umls = UMLS("E:\\code\\research\\umls") + # print(umls.search_by_code("282299006")) + # print(umls.search_by_string_list(["Backache", "aching muscles in back"])) + # print(umls.search(code="95891005", max_number=10)) + # ipdb.set_trace() + +""" +['unable to balance', 'loss of balance'] +['backache', 'back pain', 'dorsalgi', 'dorsodynia', 'pain over the back', 'back pain [disease/finding]', 'back ache', 'dorsal back pain', 'backach', 'dorsalgia', 'dorsal pain', 'notalgia', 'unspecified back pain', 'backpain', 'backache symptom'] +['influenza like illness', 'flu-like illness', 'influenza-like illness'] +""" diff --git a/Normalisation/training/log/generate_umls_embeddings_coder_eds_cased.sh-11575.err b/Normalisation/training/log/generate_umls_embeddings_coder_eds_cased.sh-11575.err new file mode 100644 index 000000000..6d050726f --- /dev/null +++ b/Normalisation/training/log/generate_umls_embeddings_coder_eds_cased.sh-11575.err @@ -0,0 +1,2 @@ +/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/lib/python3.7/site-packages/transformers/tokenization_utils_base.py:2345: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert). + FutureWarning, diff --git a/Normalisation/training/log/generate_umls_embeddings_coder_eds_cased.sh-11575.out b/Normalisation/training/log/generate_umls_embeddings_coder_eds_cased.sh-11575.out new file mode 100644 index 000000000..9bb6e53d0 --- /dev/null +++ b/Normalisation/training/log/generate_umls_embeddings_coder_eds_cased.sh-11575.out @@ -0,0 +1,2 @@ +starting +/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python diff --git a/Normalisation/training/loss.py b/Normalisation/training/loss.py new file mode 100644 index 000000000..4b638f612 --- /dev/null +++ b/Normalisation/training/loss.py @@ -0,0 +1,124 @@ +import torch +import torch.nn as nn + + +class AMSoftmax(nn.Module): + def __init__(self, in_feats, n_classes=10, m=0.35, s=30): + super(AMSoftmax, self).__init__() + self.m = m + self.s = s + self.in_feats = in_feats + self.W = torch.nn.Parameter( + torch.randn(in_feats, n_classes), requires_grad=True + ) + self.ce = nn.CrossEntropyLoss() + nn.init.xavier_normal_(self.W, gain=1) + + def forward(self, x, label): + # print(x.shape, lb.shape, self.in_feats) + # assert x.size()[0] == label.size()[0] + # assert x.size()[1] == self.in_feats + x_norm = torch.norm(x, p=2, dim=1, keepdim=True).clamp(min=1e-12) + x_norm = torch.div(x, x_norm) + w_norm = torch.norm(self.W, p=2, dim=0, keepdim=True).clamp(min=1e-12) + w_norm = torch.div(self.W, w_norm) + costh = torch.mm(x_norm, w_norm) + # print(x_norm.shape, w_norm.shape, costh.shape) + lb_view = label.view(-1, 1).to(x.device) + delt_costh = torch.zeros(costh.size()).to(x.device).scatter_(1, lb_view, self.m) + costh_m = costh - delt_costh + costh_m_s = self.s * costh_m + loss = self.ce(costh_m_s, label) + return loss, costh_m_s + + def predict(self, x): + x_norm = torch.norm(x, p=2, dim=1, keepdim=True).clamp(min=1e-12) + x_norm = torch.div(x, x_norm) + w_norm = torch.norm(self.W, p=2, dim=0, keepdim=True).clamp(min=1e-12) + w_norm = torch.div(self.W, w_norm) + costh = torch.mm(x_norm, w_norm) + return costh + + +class MultiSimilarityLoss(nn.Module): + def __init__(self): + super(MultiSimilarityLoss, self).__init__() + self.thresh = 0.5 + self.margin = 0.1 + + self.scale_pos = 2.0 + self.scale_neg = 50.0 + + def forward(self, feats, labels): + # assert feats.size(0) == labels.size(0), \ + # f"feats.size(0): {feats.size(0)} is not equal to labels.size(0): {labels.size(0)}" + batch_size = feats.size(0) + + # Feature normalize + x_norm = torch.norm(feats, p=2, dim=1, keepdim=True).clamp(min=1e-12) + x_norm = torch.div(feats, x_norm) + + sim_mat = torch.matmul(x_norm, torch.t(x_norm)) + + epsilon = 1e-5 + loss = [] + + # unique_label, inverse_indices = torch.unique_consecutive(labels, return_inverse=True) + + for i in range(batch_size): + pos_pair_ = sim_mat[i][labels == labels[i]] + pos_pair_ = pos_pair_[pos_pair_ < 1 - epsilon] + neg_pair_ = sim_mat[i][labels != labels[i]] + + # print(pos_pair_) + # print(neg_pair_) + + if len(neg_pair_) >= 1: + pos_pair = pos_pair_[pos_pair_ - self.margin < max(neg_pair_)] + if len(pos_pair) >= 1: + pos_loss = ( + 1.0 + / self.scale_pos + * torch.log( + 1 + + torch.sum( + torch.exp(-self.scale_pos * (pos_pair - self.thresh)) + ) + ) + ) + loss.append(pos_loss) + + if len(pos_pair_) >= 1: + neg_pair = neg_pair_[neg_pair_ + self.margin > min(pos_pair_)] + if len(neg_pair) >= 1: + neg_loss = ( + 1.0 + / self.scale_neg + * torch.log( + 1 + + torch.sum( + torch.exp(self.scale_neg * (neg_pair - self.thresh)) + ) + ) + ) + loss.append(neg_loss) + + # print(labels, len(loss)) + if len(loss) == 0: + return torch.zeros([], requires_grad=True).to(feats.device) + + loss = sum(loss) / batch_size + return loss + + +if __name__ == "__main__": + criteria = AMSoftmax(20, 5) + a = torch.randn(10, 20) + lb = torch.randint(0, 5, (10,), dtype=torch.long) + loss = criteria(a, lb) + loss.backward() + + print(loss.detach().numpy()) + print(list(criteria.parameters())[0].shape) + print(type(next(criteria.parameters()))) + print(lb) diff --git a/Normalisation/training/model.py b/Normalisation/training/model.py new file mode 100644 index 000000000..4fe752953 --- /dev/null +++ b/Normalisation/training/model.py @@ -0,0 +1,195 @@ +# from transformers import BertConfig, BertPreTrainedModel, BertTokenizer, BertModel +import torch +import torch.nn.functional as F +from loss import AMSoftmax +from pytorch_metric_learning import losses, miners +from torch import nn +from trans import TransE +from transformers import AutoConfig, AutoModel, AutoModelForPreTraining, AutoTokenizer +from transformers.modeling_utils import SequenceSummary + + +class UMLSPretrainedModel(nn.Module): + def __init__( + self, + device, + model_name_or_path, + cui_label_count, + rel_label_count, + sty_label_count, + re_weight=1.0, + sty_weight=0.1, + cui_loss_type="ms_loss", + trans_loss_type="TransE", + trans_margin=1.0, + ): + super(UMLSPretrainedModel, self).__init__() + + self.device = device + self.model_name_or_path = model_name_or_path + if self.model_name_or_path.find("large") >= 0: + self.feature_dim = 1024 + else: + self.feature_dim = 768 + self.bert = AutoModel.from_pretrained(model_name_or_path) + self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) + self.dropout = nn.Dropout(0.1) + + self.rel_label_count = rel_label_count + self.re_weight = re_weight + + self.sty_label_count = sty_label_count + self.linear_sty = nn.Linear(self.feature_dim, self.sty_label_count) + self.sty_loss_fn = nn.CrossEntropyLoss() + self.sty_weight = sty_weight + + self.cui_loss_type = cui_loss_type + self.cui_label_count = cui_label_count + + if self.cui_loss_type == "softmax": + self.cui_loss_fn = nn.CrossEntropyLoss() + self.linear = nn.Linear(self.feature_dim, self.cui_label_count) + if self.cui_loss_type == "am_softmax": + self.cui_loss_fn = AMSoftmax(self.feature_dim, self.cui_label_count) + if self.cui_loss_type == "ms_loss": + self.cui_loss_fn = losses.MultiSimilarityLoss(alpha=2, beta=50) + self.miner = miners.MultiSimilarityMiner(epsilon=0.1) + + self.trans_loss_type = trans_loss_type + if self.trans_loss_type == "TransE": + self.re_loss_fn = TransE(trans_margin) + self.re_embedding = nn.Embedding(self.rel_label_count, self.feature_dim) + + self.standard_dataloader = None + + self.sequence_summary = SequenceSummary( + AutoConfig.from_pretrained(model_name_or_path) + ) # Now only used for XLNet + + def softmax(self, logits, label): + loss = self.cui_loss_fn(logits, label) + return loss + + def am_softmax(self, pooled_output, label): + loss, _ = self.cui_loss_fn(pooled_output, label) + return loss + + def ms_loss(self, pooled_output, label): + pairs = self.miner(pooled_output, label) + loss = self.cui_loss_fn(pooled_output, label, pairs) + return loss + + def calculate_loss(self, pooled_output=None, logits=None, label=None): + if self.cui_loss_type == "softmax": + return self.softmax(logits, label) + if self.cui_loss_type == "am_softmax": + return self.am_softmax(pooled_output, label) + if self.cui_loss_type == "ms_loss": + return self.ms_loss(pooled_output, label) + + def get_sentence_feature(self, input_ids): + # bert, albert, roberta + if self.model_name_or_path.find("xlnet") < 0: + outputs = self.bert(input_ids) + pooled_output = outputs[1] + return pooled_output + + # xlnet + outputs = self.bert(input_ids) + pooled_output = self.sequence_summary(outputs[0]) + return pooled_output + + # @profile + def forward( + self, + input_ids_0, + input_ids_1, + input_ids_2, + cui_label_0, + cui_label_1, + cui_label_2, + sty_label_0, + sty_label_1, + sty_label_2, + re_label, + ): + input_ids = torch.cat((input_ids_0, input_ids_1, input_ids_2), 0) + cui_label = torch.cat((cui_label_0, cui_label_1, cui_label_2)) + sty_label = torch.cat((sty_label_0, sty_label_1, sty_label_2)) + # print(input_ids.shape, cui_label.shape, sty_label.shape) + + use_len = input_ids_0.shape[0] + + pooled_output = self.get_sentence_feature(input_ids) # (3 * pair) * re_label + logits_sty = self.linear_sty(pooled_output) + sty_loss = self.sty_loss_fn(logits_sty, sty_label) + + if self.cui_loss_type == "softmax": + logits = self.linear(pooled_output) + else: + logits = None + cui_loss = self.calculate_loss(pooled_output, logits, cui_label) + + cui_0_output = pooled_output[0:use_len] + cui_1_output = pooled_output[use_len : 2 * use_len] + cui_2_output = pooled_output[2 * use_len :] + re_output = self.re_embedding(re_label) + re_loss = self.re_loss_fn(cui_0_output, cui_1_output, cui_2_output, re_output) + + loss = self.sty_weight * sty_loss + cui_loss + self.re_weight * re_loss + # print(sty_loss.device, cui_loss.device, re_loss.device) + + return loss, (sty_loss, cui_loss, re_loss) + + """ + def predict(self, input_ids): + if self.loss_type == "softmax": + return self.predict_by_softmax(input_ids) + if self.loss_type == "am_softmax": + return self.predict_by_amsoftmax(input_ids) + + def predict_by_softmax(self, input_ids): + pooled_output = self.get_sentence_feature(input_ids) + logits = self.linear(pooled_output) + return torch.max(logits, dim=1)[1], logits + + def predict_by_amsoftmax(self, input_ids): + pooled_output = self.get_sentence_feature(input_ids) + logits = self.loss_fn.predict(pooled_output) + return torch.max(logits, dim=1)[1], logits + """ + + def init_standard_feature(self): + if self.standard_dataloader is not None: + for index, batch in enumerate(self.standard_dataloader): + input_ids = batch[0].to(self.device) + outputs = self.get_sentence_feature(input_ids) + normalized_standard_feature = torch.norm( + outputs, p=2, dim=1, keepdim=True + ).clamp(min=1e-12) + normalized_standard_feature = torch.div( + outputs, normalized_standard_feature + ) + if index == 0: + self.standard_feature = normalized_standard_feature + else: + self.standard_feature = torch.cat( + (self.standard_feature, normalized_standard_feature), 0 + ) + assert self.standard_feature.shape == ( + self.num_label, + self.feature_dim, + ), self.standard_feature.shape + return None + + def predict_by_cosine(self, input_ids): + pooled_output = self.get_sentence_feature(input_ids) + + normalized_feature = torch.norm(pooled_output, p=2, dim=1, keepdim=True).clamp( + min=1e-12 + ) + normalized_feature = torch.div(pooled_output, normalized_feature) + sim_mat = torch.matmul( + normalized_feature, torch.t(self.standard_feature) + ) # batch_size * num_label + return torch.max(sim_mat, dim=1)[1], sim_mat diff --git a/Normalisation/training/sampler_util.py b/Normalisation/training/sampler_util.py new file mode 100644 index 000000000..379457bdf --- /dev/null +++ b/Normalisation/training/sampler_util.py @@ -0,0 +1,55 @@ +import torch +from torch.utils.data import DataLoader, Dataset +from torch.utils.data.sampler import RandomSampler, Sampler + +""" +class TmpDataset(Dataset): + def __init__(self, m=10): + self.len = m + + def __getitem__(self, index): + return (list(range(10)) * index, [0] * index) + + def __len__(self): + return self.len +""" + + +class FixedLengthBatchSampler(Sampler): + def __init__(self, sampler, fixed_length, drop_last): + self.sampler = sampler + self.fixed_length = fixed_length + self.drop_last = drop_last + self.rel_sampler_count = 0 + + def __iter__(self): + batch = [] + now_length = 0 + for idx in self.sampler: + # print(batch, now_length) + sample_length = len(self.sampler.data_source[idx][-1]) * 3 + if now_length + sample_length > self.fixed_length: + # print(batch, now_length) + yield batch + batch = [] + now_length = 0 + batch.append(idx) + now_length += sample_length + self.rel_sampler_count += 1 + if len(batch) > 0 and not self.drop_last: + yield batch + + +def my_collate_fn(batch): + type_count = len(batch[0]) + batch_size = sum([len(item[-1]) for item in batch]) + output = () + for i in range(type_count): + tmp = [] + for item in batch: + tmp.extend(item[i]) + if len(tmp) <= batch_size: + output += (torch.LongTensor(tmp),) + else: + output += (torch.LongTensor(tmp).reshape(batch_size, -1),) + return output diff --git a/Normalisation/training/train.py b/Normalisation/training/train.py new file mode 100644 index 000000000..ebb3a43be --- /dev/null +++ b/Normalisation/training/train.py @@ -0,0 +1,418 @@ +import argparse +import os +import pathlib +import time + +import numpy as np +import torch +from data_util import UMLSDataset, fixed_length_dataloader +from model import UMLSPretrainedModel + +# import ipdb +# try: +# from torch.utils.tensorboard import SummaryWriter +# except: +from tensorboardX import SummaryWriter +from torch import nn +from tqdm import tqdm, trange +from transformers import ( + AdamW, + get_constant_schedule_with_warmup, + get_cosine_schedule_with_warmup, + get_linear_schedule_with_warmup, +) + + +def train(args, model, train_dataloader, umls_dataset): + writer = SummaryWriter(comment="umls") + + t_total = args.max_steps + + no_decay = ["bias", "LayerNorm.weight"] + optimizer_grouped_parameters = [ + { + "params": [ + p + for n, p in model.named_parameters() + if not any(nd in n for nd in no_decay) + ], + "weight_decay": args.weight_decay, + }, + { + "params": [ + p + for n, p in model.named_parameters() + if any(nd in n for nd in no_decay) + ], + "weight_decay": 0.0, + }, + ] + + optimizer = AdamW( + optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon + ) + args.warmup_steps = int(args.warmup_steps) + if args.schedule == "linear": + scheduler = get_linear_schedule_with_warmup( + optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total + ) + if args.schedule == "constant": + scheduler = get_constant_schedule_with_warmup( + optimizer, num_warmup_steps=args.warmup_steps + ) + if args.schedule == "cosine": + scheduler = get_cosine_schedule_with_warmup( + optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total + ) + + print("***** Running training *****") + print(" Total Steps =", t_total) + print(" Steps needs to be trained=", t_total - args.shift) + print(" Instantaneous batch size per GPU =", args.train_batch_size) + print( + " Total train batch size (w. parallel, distributed & accumulation) =", + args.train_batch_size * args.gradient_accumulation_steps, + ) + print(" Gradient Accumulation steps =", args.gradient_accumulation_steps) + + model.zero_grad() + + for i in range(args.shift): + scheduler.step() + global_step = args.shift + + best_batch_loss = 0.033 + + while True: + model.train() + epoch_iterator = tqdm(train_dataloader, desc="Iteration", ascii=True) + batch_loss = 0.0 + batch_sty_loss = 0.0 + batch_cui_loss = 0.0 + batch_re_loss = 0.0 + for _, batch in enumerate(epoch_iterator): + input_ids_0 = batch[0].to(args.device) + input_ids_1 = batch[1].to(args.device) + input_ids_2 = batch[2].to(args.device) + cui_label_0 = batch[3].to(args.device) + cui_label_1 = batch[4].to(args.device) + cui_label_2 = batch[5].to(args.device) + sty_label_0 = batch[6].to(args.device) + sty_label_1 = batch[7].to(args.device) + sty_label_2 = batch[8].to(args.device) + # use batch[9] for re, use batch[10] for rel + if args.use_re: + re_label = batch[9].to(args.device) + else: + re_label = batch[10].to(args.device) + # for item in batch: + # print(item.shape) + + loss, (sty_loss, cui_loss, re_loss) = model( + input_ids_0, + input_ids_1, + input_ids_2, + cui_label_0, + cui_label_1, + cui_label_2, + sty_label_0, + sty_label_1, + sty_label_2, + re_label, + ) + batch_loss = float(loss.item()) + batch_sty_loss = float(sty_loss.item()) + batch_cui_loss = float(cui_loss.item()) + batch_re_loss = float(re_loss.item()) + + # tensorboardX + writer.add_scalar( + "rel_count", + train_dataloader.batch_sampler.rel_sampler_count, + global_step=global_step, + ) + writer.add_scalar("batch_loss", batch_loss, global_step=global_step) + writer.add_scalar("batch_sty_loss", batch_sty_loss, global_step=global_step) + writer.add_scalar("batch_cui_loss", batch_cui_loss, global_step=global_step) + writer.add_scalar("batch_re_loss", batch_re_loss, global_step=global_step) + + if args.gradient_accumulation_steps > 1: + loss = loss / args.gradient_accumulation_steps + loss.backward() + + epoch_iterator.set_description( + "Rel_count: %s, Loss: %0.4f, Sty: %0.4f, Cui: %0.4f, Re: %0.4f" + % ( + train_dataloader.batch_sampler.rel_sampler_count, + batch_loss, + batch_sty_loss, + batch_cui_loss, + batch_re_loss, + ) + ) + + if (global_step + 1) % args.gradient_accumulation_steps == 0: + torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) + optimizer.step() + scheduler.step() # Update learning rate schedule + model.zero_grad() + + global_step += 1 + if batch_loss < best_batch_loss: + best_batch_loss = batch_loss + save_path = os.path.join(args.output_dir, f"model_{global_step}.pth") + torch.save(model, save_path) + + # re_embedding + if args.use_re: + writer.add_embedding( + model.re_embedding.weight, + metadata=umls_dataset.re2id.keys(), + global_step=global_step, + tag="re embedding", + ) + else: + # print(len(umls_dataset.rel2id)) + # print(model.re_embedding.weight.shape) + writer.add_embedding( + model.re_embedding.weight, + metadata=umls_dataset.rel2id.keys(), + global_step=global_step, + tag="rel embedding", + ) + + # sty_parameter + writer.add_embedding( + model.linear_sty.weight, + metadata=umls_dataset.sty2id.keys(), + global_step=global_step, + tag="sty weight", + ) + + if global_step % args.save_step == 0 and global_step > 0: + save_path = os.path.join(args.output_dir, f"model_{global_step}.pth") + torch.save(model, save_path) + + # re_embedding + if args.use_re: + writer.add_embedding( + model.re_embedding.weight, + metadata=umls_dataset.re2id.keys(), + global_step=global_step, + tag="re embedding", + ) + else: + # print(len(umls_dataset.rel2id)) + # print(model.re_embedding.weight.shape) + writer.add_embedding( + model.re_embedding.weight, + metadata=umls_dataset.rel2id.keys(), + global_step=global_step, + tag="rel embedding", + ) + + # sty_parameter + writer.add_embedding( + model.linear_sty.weight, + metadata=umls_dataset.sty2id.keys(), + global_step=global_step, + tag="sty weight", + ) + + if args.max_steps > 0 and global_step > args.max_steps: + return None + + return None + + +def run(args): + torch.manual_seed(args.seed) # cpu + torch.cuda.manual_seed(args.seed) # gpu + np.random.seed(args.seed) # numpy + torch.backends.cudnn.deterministic = True # cudnn + + # args.output_dir = args.output_dir + "_" + str(int(time.time())) + + # dataloader + if args.lang == "eng": + lang = ["ENG"] + if args.lang == "all": + lang = None + if args.lang == "eng_fr": + lang = ["ENG", "FRE"] + # assert args.model_name_or_path.find("bio") == -1, "Should use multi-language model" + umls_dataset = UMLSDataset( + umls_folder=args.umls_dir, + model_name_or_path=args.model_name_or_path, + lang=lang, + json_save_path=args.output_dir, + ) + umls_dataloader = fixed_length_dataloader( + umls_dataset, fixed_length=args.train_batch_size, num_workers=args.num_workers + ) + + if args.use_re: + rel_label_count = len(umls_dataset.re2id) + else: + rel_label_count = len(umls_dataset.rel2id) + + model_load = False + if os.path.exists(args.output_dir): + save_list = [] + for f in os.listdir(args.output_dir): + if f[0:5] == "model" and f[-4:] == ".pth": + save_list.append(int(f[6:-4])) + if len(save_list) > 0: + args.shift = max(save_list) + if os.path.exists(os.path.join(args.output_dir, "last_model.pth")): + model = torch.load(os.path.join(args.output_dir, "last_model.pth")).to( + args.device + ) + model_load = True + else: + model = torch.load( + os.path.join(args.output_dir, f"model_{max(save_list)}.pth") + ).to(args.device) + model_load = True + if not model_load: + if not os.path.exists(args.output_dir): + os.makedirs(args.output_dir) + model = UMLSPretrainedModel( + device=args.device, + model_name_or_path=args.model_name_or_path, + cui_label_count=len(umls_dataset.cui2id), + rel_label_count=rel_label_count, + sty_label_count=len(umls_dataset.sty2id), + re_weight=args.re_weight, + sty_weight=args.sty_weight, + ).to(args.device) + args.shift = 0 + model_load = True + + if args.do_train: + torch.save(args, os.path.join(args.output_dir, "training_args.bin")) + train(args, model, umls_dataloader, umls_dataset) + torch.save(model, os.path.join(args.output_dir, "last_model.pth")) + + return None + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--umls_dir", + default="../umls", + type=str, + help="UMLS dir", + ) + parser.add_argument( + "--model_name_or_path", + default="../biobert_v1.1", + type=str, + help="Path to pre-trained model or shortcut name selected in the list: ", + ) + parser.add_argument( + "--output_dir", + default="output", + type=str, + help="The output directory where the model predictions and checkpoints will be written.", + ) + parser.add_argument( + "--save_step", + default=10000, + type=int, + help="Save step", + ) + + # Other parameters + parser.add_argument( + "--max_seq_length", + default=32, + type=int, + help="The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded.", + ) + parser.add_argument( + "--do_train", default=True, type=bool, help="Whether to run training." + ) + parser.add_argument( + "--train_batch_size", + default=256, + type=int, + help="Batch size per GPU/CPU for training.", + ) + parser.add_argument( + "--gradient_accumulation_steps", + type=int, + default=8, + help="Number of updates steps to accumulate before performing a backward/update pass.", + ) + parser.add_argument( + "--learning_rate", + default=2e-5, + type=float, + help="The initial learning rate for Adam.", + ) + parser.add_argument( + "--weight_decay", + default=0.01, + type=float, + help="Weight decay if we apply some.", + ) + parser.add_argument( + "--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer." + ) + parser.add_argument( + "--max_grad_norm", default=1.0, type=float, help="Max gradient norm." + ) + parser.add_argument( + "--max_steps", + default=1000000, + type=int, + help="If > 0: set total number of training steps to perform. Override num_train_epochs.", + ) + parser.add_argument( + "--warmup_steps", + default=10000, + help="Linear warmup over warmup_steps or a float.", + ) + parser.add_argument("--device", type=str, default="cuda:0", help="device") + parser.add_argument( + "--seed", type=int, default=72, help="random seed for initialization" + ) + parser.add_argument( + "--schedule", + type=str, + default="linear", + choices=["linear", "cosine", "constant"], + help="Schedule.", + ) + parser.add_argument( + "--trans_margin", type=float, default=1.0, help="Margin of TransE." + ) + parser.add_argument( + "--use_re", default=False, type=bool, help="Whether to use re or rel." + ) + parser.add_argument( + "--num_workers", + default=1, + type=int, + help="Num workers for data loader, only 0 can be used for Windows", + ) + parser.add_argument( + "--lang", + default="eng", + type=str, + choices=["eng", "all", "eng_fr"], + help="language range, eng or all", + ) + parser.add_argument("--sty_weight", type=float, default=0.0, help="Weight of sty.") + parser.add_argument("--re_weight", type=float, default=1.0, help="Weight of re.") + + args = parser.parse_args() + + run(args) + + +if __name__ == "__main__": + main() diff --git a/Normalisation/training/train_coder_slurm.cfg b/Normalisation/training/train_coder_slurm.cfg new file mode 100644 index 000000000..af65aa9bb --- /dev/null +++ b/Normalisation/training/train_coder_slurm.cfg @@ -0,0 +1,7 @@ +[slurm] +gpu_type = v100 +log_path = "BioMedics/bash_scripts/Coder_model/log_train_coder" +mem = 40G +job_duration = "72:00:00" +n_gpu = 1 +n_cpu = 5 diff --git a/Normalisation/training/trans.py b/Normalisation/training/trans.py new file mode 100644 index 000000000..cc2ec75ca --- /dev/null +++ b/Normalisation/training/trans.py @@ -0,0 +1,18 @@ +import torch +import torch.nn.functional as F +from torch import nn + + +class TransE(nn.Module): + def __init__(self, margin=1.0): + super(TransE, self).__init__() + self.margin = margin + + def forward(self, cui_0, cui_1, cui_2, re): + pos = cui_0 + re - cui_1 + neg = cui_0 + re - cui_2 + return torch.mean( + F.relu( + self.margin + torch.norm(pos, p=2, dim=1) - torch.norm(neg, p=2, dim=1) + ) + ) diff --git a/bash_scripts/NER_model/expe_data_size.sh b/bash_scripts/NER_model/expe_data_size.sh new file mode 100644 index 000000000..3b395e0e9 --- /dev/null +++ b/bash_scripts/NER_model/expe_data_size.sh @@ -0,0 +1,55 @@ +#!/bin/bash +#SBATCH --job-name=ner_med_training +#SBATCH -t 24:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --cpus-per-task=2 +#SBATCH --mem=20000 +#SBATCH --partition gpuV100 +#SBATCH --output=logs/slurm-%j-stdout.log +#SBATCH --error=logs/slurm-%j-stderr.log +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script + +cd 'data/scratch/cse200093/BioMedics/NER_model' +source ../.venv/bin/activate +conda deactivate + +for i in 5 10 15 20 25 30 35 40 45 50 55 60 62 +do + echo ----------------- + echo CONVERT $i DOCS + echo ----------------- + + python scripts/convert.py --lang eds --input-path ./data/NLP_diabeto/train --output-path ./corpus/expe_data_size/train_$i.spacy --n-limit $i + + echo ----------------- + echo TRAIN ON $i DOCS + echo ----------------- + + python -m spacy train configs/config.cfg --output ./training/expe_data_size/model_$i/ --paths.train ./corpus/expe_data_size/train_$i.spacy --paths.dev ./corpus/dev.spacy --nlp.lang eds --gpu-id 0 + + + echo ----------------- + echo REMOVE MODEL LAST + echo ----------------- + + rm -rf ./training/expe_data_size/model_$i/model-last + + echo ----------------- + echo INFER TEST DOCS WITH MODEL TRAINED ON $i DOCS + echo ----------------- + + python ./scripts/infer.py --model ./training/expe_data_size/model_$i/model-best/ --input ./data/NLP_diabeto/test/ --output ./data/NLP_diabeto/expe_data_size/pred_$i/ --format brat + + echo ----------------- + echo EVALUATE MODEL TRAINED ON $i DOCS + echo ----------------- + + python ./scripts/evaluate.py ./training/expe_data_size/model_$i/model-best ./corpus/test.spacy --output ./training/expe_data_size/model_$i/test_metrics.json --docbin ./data/NLP_diabeto/expe_data_size/pred_$i.spacy --gpu-id 0 + +done + + +echo --Training_done--- + +echo --------------- diff --git a/bash_scripts/NER_model/expe_hyperparams.sh b/bash_scripts/NER_model/expe_hyperparams.sh new file mode 100644 index 000000000..569d22a8e --- /dev/null +++ b/bash_scripts/NER_model/expe_hyperparams.sh @@ -0,0 +1,46 @@ +#!/bin/bash +#SBATCH --job-name=ner_med_training +#SBATCH -t 24:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --cpus-per-task=2 +#SBATCH --mem=20000 +#SBATCH --partition gpuV100 +#SBATCH --output=logs/slurm-%j-stdout.log +#SBATCH --error=logs/slurm-%j-stderr.log +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script + +cd 'data/scratch/cse200093/BioMedics/NER_model' +source ../.venv/bin/activate +conda deactivate + +echo ----------------- +echo CONVERT DOCS +echo ----------------- + +python scripts/convert.py --lang eds --input-path ./data/NLP_diabeto/train --output-path ./corpus/train.spacy +python scripts/convert.py --lang eds --input-path ./data/NLP_diabeto/test --output-path ./corpus/test.spacy +python scripts/convert.py --lang eds --input-path ./data/NLP_diabeto/val --output-path ./corpus/dev.spacy + +echo ----------------- +echo TRAIN ON DOCS +echo ----------------- + +python -m spacy train ./configs/config.cfg --output ./training/expe_section/model_all_labels/ --paths.train ./corpus/train.spacy --paths.dev ./corpus/dev.spacy --nlp.lang eds --gpu-id 0 + +echo ----------------- +echo INFER TEST DOCS +echo ----------------- + +python ./scripts/infer.py --model ./training/expe_section/model_all_labels/model-best --input ./data/NLP_diabeto/test/ --output ./data/NLP_diabeto/expe_section/pred_model_all_labels/ --format brat + + +echo ----------------- +echo EVALUATE MODEL +echo ----------------- + +python ./scripts/evaluate.py ./training/expe_section/model_all_labels/model-best ./corpus/test.spacy --output ./training/expe_section/model_all_labels/test_metrics.json --docbin ./data/NLP_diabeto/expe_section/pred_model_all_labels.spacy --gpu-id 0 + +echo --Training_done--- + +echo --------------- diff --git a/bash_scripts/NER_model/expe_model_lang.sh b/bash_scripts/NER_model/expe_model_lang.sh new file mode 100644 index 000000000..ec447e7e3 --- /dev/null +++ b/bash_scripts/NER_model/expe_model_lang.sh @@ -0,0 +1,60 @@ +#!/bin/bash +#SBATCH --job-name=ner_med_training +#SBATCH -t 24:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --cpus-per-task=2 +#SBATCH --mem=20000 +#SBATCH --partition gpuV100 +#SBATCH --output=logs/slurm-%j-stdout.log +#SBATCH --error=logs/slurm-%j-stderr.log +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script + +cd 'data/scratch/cse200093/BioMedics/NER_model' +source ../.venv/bin/activate +conda deactivate + + + +# echo ----------------- +# echo CONVERT DOCS +# echo ----------------- + +# python scripts/convert.py --lang eds --input-path ./data/NLP_diabeto/train --output-path ./corpus/train.spacy +# python scripts/convert.py --lang eds --input-path ./data/NLP_diabeto/test --output-path ./corpus/test.spacy +# python scripts/convert.py --lang eds --input-path ./data/NLP_diabeto/val --output-path ./corpus/dev.spacy + +for lang_model in "camembert_base" "DrBert" "camembert_bio" "eds_finetune" "eds_scratch" +do + + echo ----------------- + echo TRAIN $lang_model ON DOCS + echo ----------------- + + python -m spacy train ./configs/expe_lang_model/config_$lang_model.cfg --output ./training/expe_lang_model/model_$lang_model/ --paths.train ./corpus/train.spacy --paths.dev ./corpus/dev.spacy --nlp.lang eds --gpu-id 0 + + + echo ----------------- + echo REMOVE $lang_model MODEL LAST + echo ----------------- + + rm -rf ./training/expe_lang_model/model_$lang_model/model-last + + echo ----------------- + echo INFER $lang_model TEST DOCS + echo ----------------- + + python ./scripts/infer.py --model ./training/expe_lang_model/model_$lang_model/model-best --input ./data/NLP_diabeto/test/ --output ./data/NLP_diabeto/expe_lang_model/pred_model_$lang_model/ --format brat + + + echo ----------------- + echo EVALUATE $lang_model MODEL + echo ----------------- + + python ./scripts/evaluate.py ./training/expe_lang_model/model_$lang_model/model-best ./corpus/test.spacy --output ./training/expe_lang_model/model_$lang_model/test_metrics.json --docbin ./data/NLP_diabeto/expe_lang_model/pred_model_$lang_model.spacy --gpu-id 0 + +done + +echo --Training_done--- + +echo --------------- diff --git a/bash_scripts/NER_model/infer.sh b/bash_scripts/NER_model/infer.sh new file mode 100644 index 000000000..e047369e9 --- /dev/null +++ b/bash_scripts/NER_model/infer.sh @@ -0,0 +1,21 @@ +#!/bin/bash +#SBATCH --job-name=ner_med_training +#SBATCH -t 24:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH --cpus-per-task=2 +#SBATCH --mem=20000 +#SBATCH --partition gpuT4 +#SBATCH --output=logs/slurm-%j-stdout.log +#SBATCH --error=logs/slurm-%j-stderr.log +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script +# your code here : + +echo starting +conda activate pierrenv + +cd '/export/home/cse200093/Jacques_Bio/BioMedics/eds-medic' + +#python ./scripts/infer.py --model ~/RV_Inter_conf/model-best/ --input ~/RV_Inter_conf/unnested_sosydiso_qualifiers_final/test_ --output ~/RV_Inter_conf/usqf_pred/ --format brat + +python ./scripts/infer.py --model ./training/model-best/ --input ../data/lupus_erythemateux_dissemine_raw --output ../data/lupus_erythemateux_dissemine_pred/ --format brat diff --git a/bash_scripts/NER_model/save.sh b/bash_scripts/NER_model/save.sh new file mode 100644 index 000000000..c9f4b9c9f --- /dev/null +++ b/bash_scripts/NER_model/save.sh @@ -0,0 +1,26 @@ +#!/bin/bash +#SBATCH --job-name=ner_med_training +#SBATCH -t 24:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --cpus-per-task=2 +#SBATCH --mem=20000 +#SBATCH --partition gpuV100 +#SBATCH --output=logs/slurm-%j-stdout.log +#SBATCH --error=logs/slurm-%j-stderr.log +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script +# your code here : + +echo starting +conda activate pierrenv +cd '/export/home/cse200093/Jacques_Bio/BioMedics/eds-medic' + +echo ---- Building dvc.yaml ---- + +python -m spacy project dvc + +echo ---- Saving Brat files ---- + +python -m spacy project run save_to_brat --force + +echo --------------- diff --git a/bash_scripts/NER_model/test.sh b/bash_scripts/NER_model/test.sh new file mode 100644 index 000000000..9ea204864 --- /dev/null +++ b/bash_scripts/NER_model/test.sh @@ -0,0 +1,26 @@ +#!/bin/bash +#SBATCH --job-name=ner_med_training +#SBATCH -t 1:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH --cpus-per-task=2 +#SBATCH --mem=20000 +#SBATCH --partition gpuT4 +#SBATCH --output=logs/slurm-%j-stdout.log +#SBATCH --error=logs/slurm-%j-stderr.log +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script +# your code here : + +echo starting +conda activate pierrenv +cd '/export/home/cse200093/Jacques_Bio/BioMedics/eds-medic' + +echo ---- Building dvc.yaml ---- + +python -m spacy project dvc + +echo ---- Testing model ---- + +python -m spacy project run evaluate --force + +echo --------------- diff --git a/bash_scripts/NER_model/train.sh b/bash_scripts/NER_model/train.sh new file mode 100644 index 000000000..8a595abde --- /dev/null +++ b/bash_scripts/NER_model/train.sh @@ -0,0 +1,35 @@ +#!/bin/bash +#SBATCH --job-name=ner_med_training +#SBATCH -t 1:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --cpus-per-task=2 +#SBATCH --mem=20000 +#SBATCH --partition gpuV100 +#SBATCH --output=logs/slurm-%j-stdout.log +#SBATCH --error=logs/slurm-%j-stderr.log +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script + +conda activate pierrenv +cd '/export/home/cse200093/Jacques_Bio/BioMedics/eds-medic' + + +python -m spacy project dvc + +echo dvc.yml built succesfully + +echo ----------------- +echo CONVERT +echo ----------------- + +python -m spacy project run convert + +echo ----------------- +echo TRAIN +echo ----------------- + +dvc repro -f 2>&1 | tee training/train.log + +echo --Training_done--- + +echo --------------- diff --git a/bash_scripts/NER_model/train_v1.sh b/bash_scripts/NER_model/train_v1.sh new file mode 100644 index 000000000..ba40c59c8 --- /dev/null +++ b/bash_scripts/NER_model/train_v1.sh @@ -0,0 +1,57 @@ +#!/bin/bash +#SBATCH --job-name=ner_med_training +#SBATCH -t 48:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --cpus-per-task=2 +#SBATCH --mem=20000 +#SBATCH --partition gpuV100 +#SBATCH --output=logs/slurm-%j-stdout.log +#SBATCH --error=logs/slurm-%j-stderr.log +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script + +cd 'data/scratch/cse200093/BioMedics/NER_model' +source ../.venv/bin/activate +conda deactivate + +echo ----------------- +echo CONVERT DOCS +echo ----------------- + +python scripts/convert.py --lang eds --input-path ./data/NLP_diabeto/train_test --output-path ./corpus/train_test.spacy +python scripts/convert.py --lang eds --input-path ./data/NLP_diabeto/val --output-path ./corpus/dev.spacy + +echo ----------------- +echo TRAIN ON DOCS +echo ----------------- + +python -m spacy train ./configs/config_v1.cfg --output ./training/model_v1/ --paths.train ./corpus/train_test.spacy --paths.dev ./corpus/dev.spacy --nlp.lang eds --gpu-id 0 + +echo ----------------- +echo INFER LUPUS DOCS +echo ----------------- + +python ./scripts/infer.py --model ./training/model_v1/model-best/ --input ../data/CRH/raw/lupus_erythemateux_dissemine/ --output ../data/CRH/pred/lupus_erythemateux_dissemine/ --format brat + +echo ----------------- +echo INFER MALADIE TAKAYASU DOCS +echo ----------------- + +python ./scripts/infer.py --model ./training/model_v1/model-best/ --input ../data/CRH/raw/maladie_de_takayasu/ --output ../data/CRH/pred/maladie_de_takayasu/ --format brat + +echo ----------------- +echo INFER SCLERODERMIE SYSTEMIQUE DOCS +echo ----------------- + +python ./scripts/infer.py --model ./training/model_v1/model-best/ --input ../data/CRH/raw/sclerodermie_systemique/ --output ../data/CRH/pred/sclerodermie_systemique/ --format brat + +echo ----------------- +echo INFER SAPL DOCS +echo ----------------- + +python ./scripts/infer.py --model ./training/model_v1/model-best/ --input ../data/CRH/raw/syndrome_des_anti-phospholipides/ --output ../data/CRH/pred/syndrome_des_anti-phospholipides/ --format brat + + +echo --Inference_done--- + +echo --------------- diff --git a/bash_scripts/Normalisation/extract_measurement.sh b/bash_scripts/Normalisation/extract_measurement.sh new file mode 100644 index 000000000..e4cf11408 --- /dev/null +++ b/bash_scripts/Normalisation/extract_measurement.sh @@ -0,0 +1,39 @@ +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh + +cd ~/scratch/BioMedics +source .venv/bin/activate +conda deactivate + + +echo ----------------- +echo EXTRACTING MEASUREMENT VALUES AND UNITS USING BIO_COMP LABEL AND RULES. +echo ----------------- + +echo ----------------- +echo EXTRACT MEASUREMENT FROM MALADIE TAKAYASU +echo ----------------- + +python extract_measurement/main.py ./data/CRH/pred/maladie_de_takayasu ./data/bio_results/maladie_de_takayasu + +echo ----------------- +echo EXTRACT MEASUREMENT FROM LUPUS +echo ----------------- + +python extract_measurement/main.py ./data/CRH/pred/lupus_erythemateux_dissemine ./data/bio_results/lupus_erythemateux_dissemine + +echo ----------------- +echo EXTRACT MEASUREMENT FROM SCLERODERMIE SYSTEMIQUE +echo ----------------- + +python extract_measurement/main.py ./data/CRH/pred/sclerodermie_systemique ./data/bio_results/sclerodermie_systemique + +echo ----------------- +echo EXTRACT MEASUREMENT FROM SAPL +echo ----------------- + +python extract_measurement/main.py ./data/CRH/pred/syndrome_des_anti-phospholipides ./data/bio_results/syndrome_des_anti-phospholipides + + +echo --EXTRACTION_FINISHED--- + +echo --------------- diff --git a/bash_scripts/Normalisation/infer_coder.sh b/bash_scripts/Normalisation/infer_coder.sh new file mode 100644 index 000000000..2e9a09a26 --- /dev/null +++ b/bash_scripts/Normalisation/infer_coder.sh @@ -0,0 +1,43 @@ +#!/bin/bash +#SBATCH --job-name=ner_med_training +#SBATCH -t 48:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --cpus-per-task=2 +#SBATCH --mem=20000 +#SBATCH --partition gpuV100 +#SBATCH --output=log_infer_coder/slurm-%j-stdout.log +#SBATCH --error=log_infer_coder/slurm-%j-stderr.log +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script + +cd 'data/scratch/cse200093/BioMedics' +source .venv/bin/activate +conda deactivate + +echo ----------------- +echo NORMALIZE LUPUS DOCS +echo ----------------- + +python normalisation/inference/main.py data/bio_results/lupus_erythemateux_dissemine/pred_with_extraction.json data/bio_results/lupus_erythemateux_dissemine/norm_coder_all.json + +echo ----------------- +echo NORMALIZE MALADIE TAKAYASU DOCS +echo ----------------- + +python normalisation/inference/main.py data/bio_results/maladie_de_takayasu/pred_with_extraction.json data/bio_results/maladie_de_takayasu/norm_coder_all.json + +echo ----------------- +echo NORMALIZE SCLERODERMIE SYSTEMIQUE DOCS +echo ----------------- + +python normalisation/inference/main.py data/bio_results/sclerodermie_systemique/pred_with_extraction.json data/bio_results/sclerodermie_systemique/norm_coder_all.json + +echo ----------------- +echo NORMALIZE SAPL DOCS +echo ----------------- + +python normalisation/inference/main.py data/bio_results/syndrome_des_anti-phospholipides/pred_with_extraction.json data/bio_results/syndrome_des_anti-phospholipides/norm_coder_all.json + +echo --NORMALIZATION_FINISHED--- + +echo --------------- diff --git a/bash_scripts/Normalisation/infer_coder_quaero.sh b/bash_scripts/Normalisation/infer_coder_quaero.sh new file mode 100644 index 000000000..1d079150c --- /dev/null +++ b/bash_scripts/Normalisation/infer_coder_quaero.sh @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=ner_med_training +#SBATCH -t 48:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --cpus-per-task=2 +#SBATCH --mem=20000 +#SBATCH --partition gpuV100 +#SBATCH --output=log_infer_coder/slurm-%j-stdout.log +#SBATCH --error=log_infer_coder/slurm-%j-stderr.log +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh # appel de ce script + +cd 'data/scratch/cse200093/BioMedics' +source .venv/bin/activate +conda deactivate + + +echo ----------------- +echo NORMALIZE ANNOTATED DOCS +echo ----------------- + +python normalisation/inference/main.py normalisation/data/CRH/annotated_umls_snomed_full.json normalisation/data/pred_coder_eds/annotated_bio_micro.json + +echo ----------------- +echo NORMALIZE QUAERO DOCS +echo ----------------- + +python normalisation/inference/main.py normalisation/data/quaero_bio_micro.json normalisation/data/pred_coder_eds/quaero_bio_micro.json + +echo --NORMALIZATION_FINISHED--- + +echo --------------- diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:03:31/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:03:31/config.cfg new file mode 100644 index 000000000..b306974ac --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:03:31/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-06-16_15:03:31" +stdout_path = "log_train_coder/2023-06-16_15:03:31/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-06-16_15:03:31/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:03:31/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:03:31/sbatch.sh new file mode 100644 index 000000000..fe3e9629a --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:03:31/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-06-16_15:03:31/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-06-16_15:03:31/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/CODER/pretrain/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29 --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 2 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:07:58/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:07:58/config.cfg new file mode 100644 index 000000000..6929599b8 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:07:58/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "t4" +gpu_type_upper = "T4" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-06-16_15:07:58" +stdout_path = "log_train_coder/2023-06-16_15:07:58/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-06-16_15:07:58/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:07:58/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:07:58/sbatch.sh new file mode 100644 index 000000000..d7e97a535 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:07:58/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuT4 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-06-16_15:07:58/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-06-16_15:07:58/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/CODER/pretrain/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29 --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 2 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:09:08/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:09:08/config.cfg new file mode 100644 index 000000000..aa4607ccb --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:09:08/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "t4" +gpu_type_upper = "T4" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-06-16_15:09:08" +stdout_path = "log_train_coder/2023-06-16_15:09:08/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-06-16_15:09:08/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:09:08/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:09:08/sbatch.sh new file mode 100644 index 000000000..1d549b2e4 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:09:08/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuT4 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-06-16_15:09:08/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-06-16_15:09:08/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/CODER/pretrain/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29 --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 2 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:09:57/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:09:57/config.cfg new file mode 100644 index 000000000..3d9017273 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:09:57/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "t4" +gpu_type_upper = "T4" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-06-16_15:09:57" +stdout_path = "log_train_coder/2023-06-16_15:09:57/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-06-16_15:09:57/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:09:57/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:09:57/sbatch.sh new file mode 100644 index 000000000..c9ff7bea2 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:09:57/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuT4 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-06-16_15:09:57/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-06-16_15:09:57/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/CODER/pretrain/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29 --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 2 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:11:04/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:11:04/config.cfg new file mode 100644 index 000000000..263b2a090 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:11:04/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "t4" +gpu_type_upper = "T4" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-06-16_15:11:04" +stdout_path = "log_train_coder/2023-06-16_15:11:04/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-06-16_15:11:04/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:11:04/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:11:04/sbatch.sh new file mode 100644 index 000000000..bc62fdd39 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:11:04/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuT4 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-06-16_15:11:04/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-06-16_15:11:04/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/CODER/pretrain/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29 --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 2 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:16:23/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:16:23/config.cfg new file mode 100644 index 000000000..0ed036c20 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:16:23/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "t4" +gpu_type_upper = "T4" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-06-16_15:16:23" +stdout_path = "log_train_coder/2023-06-16_15:16:23/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-06-16_15:16:23/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:16:23/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:16:23/sbatch.sh new file mode 100644 index 000000000..28b201c13 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:16:23/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuT4 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-06-16_15:16:23/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-06-16_15:16:23/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/CODER/pretrain/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29 --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:20:28/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:20:28/config.cfg new file mode 100644 index 000000000..5fe1c7048 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:20:28/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "t4" +gpu_type_upper = "T4" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-06-16_15:20:28" +stdout_path = "log_train_coder/2023-06-16_15:20:28/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-06-16_15:20:28/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:20:28/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:20:28/sbatch.sh new file mode 100644 index 000000000..0c54f6bbb --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:20:28/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuT4 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-06-16_15:20:28/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-06-16_15:20:28/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/CODER/pretrain/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29 --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 256 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:24:27/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:24:27/config.cfg new file mode 100644 index 000000000..434cc5593 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:24:27/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "t4" +gpu_type_upper = "T4" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-06-16_15:24:27" +stdout_path = "log_train_coder/2023-06-16_15:24:27/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-06-16_15:24:27/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:24:27/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:24:27/sbatch.sh new file mode 100644 index 000000000..37a0d5bf0 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-16_15:24:27/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuT4 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-06-16_15:24:27/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-06-16_15:24:27/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/CODER/pretrain/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29 --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 256 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-19_12:50:04/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-06-19_12:50:04/config.cfg new file mode 100644 index 000000000..088c843c7 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-19_12:50:04/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "t4" +gpu_type_upper = "T4" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-06-19_12:50:04" +stdout_path = "log_train_coder/2023-06-19_12:50:04/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-06-19_12:50:04/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-19_12:50:04/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-06-19_12:50:04/sbatch.sh new file mode 100644 index 000000000..9e50d458d --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-19_12:50:04/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuT4 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-06-19_12:50:04/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-06-19_12:50:04/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/CODER/pretrain/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29 --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 256 --lang eng_fr --do_train False \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:33:55/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:33:55/config.cfg new file mode 100644 index 000000000..ffb6a1cd1 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:33:55/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-06-23_15:33:55" +stdout_path = "log_train_coder/2023-06-23_15:33:55/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-06-23_15:33:55/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:33:55/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:33:55/sbatch.sh new file mode 100644 index 000000000..c09a8b63e --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:33:55/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-06-23_15:33:55/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-06-23_15:33:55/slurm-%j-stderr.log +export PATH=/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/export/home/cse200093/.user_conda/miniconda/bin:/export/home/cse200093/.user_conda/miniconda/bin:/export/home/cse200093/.user_conda/miniconda/bin:/export/home/cse200093/.user_conda/miniconda/condabin:/export/home/cse200093/.user_conda/miniconda/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/CODER/pretrain/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29 --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:34:32/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:34:32/config.cfg new file mode 100644 index 000000000..2725c1d82 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:34:32/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-06-23_15:34:32" +stdout_path = "log_train_coder/2023-06-23_15:34:32/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-06-23_15:34:32/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:34:32/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:34:32/sbatch.sh new file mode 100644 index 000000000..1e18d936d --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:34:32/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-06-23_15:34:32/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-06-23_15:34:32/slurm-%j-stderr.log +export PATH=/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/export/home/cse200093/.user_conda/miniconda/bin:/export/home/cse200093/.user_conda/miniconda/bin:/export/home/cse200093/.user_conda/miniconda/bin:/export/home/cse200093/.user_conda/miniconda/condabin:/export/home/cse200093/.user_conda/miniconda/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/CODER/pretrain/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29 --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:35:28/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:35:28/config.cfg new file mode 100644 index 000000000..fa8c39962 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:35:28/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "t4" +gpu_type_upper = "T4" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-06-23_15:35:28" +stdout_path = "log_train_coder/2023-06-23_15:35:28/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-06-23_15:35:28/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:35:28/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:35:28/sbatch.sh new file mode 100644 index 000000000..100f2afec --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:35:28/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:t4:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuT4 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-06-23_15:35:28/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-06-23_15:35:28/slurm-%j-stderr.log +export PATH=/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/export/home/cse200093/.user_conda/miniconda/bin:/export/home/cse200093/.user_conda/miniconda/bin:/export/home/cse200093/.user_conda/miniconda/bin:/export/home/cse200093/.user_conda/miniconda/condabin:/export/home/cse200093/.user_conda/miniconda/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/CODER/pretrain/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29 --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:36:41/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:36:41/config.cfg new file mode 100644 index 000000000..22df67a2f --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:36:41/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-06-23_15:36:41" +stdout_path = "log_train_coder/2023-06-23_15:36:41/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-06-23_15:36:41/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:36:41/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:36:41/sbatch.sh new file mode 100644 index 000000000..46f72a5b9 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-06-23_15:36:41/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-06-23_15:36:41/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-06-23_15:36:41/slurm-%j-stderr.log +export PATH=/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/export/home/cse200093/.user_conda/miniconda/bin:/export/home/cse200093/.user_conda/miniconda/bin:/export/home/cse200093/.user_conda/miniconda/bin:/export/home/cse200093/.user_conda/miniconda/condabin:/export/home/cse200093/.user_conda/miniconda/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/CODER/pretrain/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/word-embedding/finetuning-camembert-2021-07-29 --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:09:43/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:09:43/config.cfg new file mode 100644 index 000000000..8bd254a0f --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:09:43/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-06_15:09:43" +stdout_path = "log_train_coder/2023-07-06_15:09:43/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-06_15:09:43/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:09:43/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:09:43/sbatch.sh new file mode 100644 index 000000000..d66224380 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:09:43/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-06_15:09:43/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-06_15:09:43/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/CODER/pretrain/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:16:12/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:16:12/config.cfg new file mode 100644 index 000000000..40731a6a2 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:16:12/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-06_15:16:12" +stdout_path = "log_train_coder/2023-07-06_15:16:12/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-06_15:16:12/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:16:12/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:16:12/sbatch.sh new file mode 100644 index 000000000..27e43ce78 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:16:12/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-06_15:16:12/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-06_15:16:12/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:20:05/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:20:05/config.cfg new file mode 100644 index 000000000..8def8edbb --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:20:05/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-06_15:20:05" +stdout_path = "log_train_coder/2023-07-06_15:20:05/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-06_15:20:05/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:20:05/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:20:05/sbatch.sh new file mode 100644 index 000000000..c808e7b5a --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:20:05/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-06_15:20:05/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-06_15:20:05/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:31:19/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:31:19/config.cfg new file mode 100644 index 000000000..ced3ce2f2 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:31:19/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-06_15:31:19" +stdout_path = "log_train_coder/2023-07-06_15:31:19/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-06_15:31:19/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:31:19/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:31:19/sbatch.sh new file mode 100644 index 000000000..ccda82d23 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-06_15:31:19/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-06_15:31:19/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-06_15:31:19/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-07_09:59:30/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-07_09:59:30/config.cfg new file mode 100644 index 000000000..615370d61 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-07_09:59:30/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-07_09:59:30" +stdout_path = "log_train_coder/2023-07-07_09:59:30/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-07_09:59:30/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-07_09:59:30/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-07_09:59:30/sbatch.sh new file mode 100644 index 000000000..f6a2bb776 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-07_09:59:30/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-07_09:59:30/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-07_09:59:30/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-10_14:47:39/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-10_14:47:39/config.cfg new file mode 100644 index 000000000..50b82a4e7 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-10_14:47:39/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-10_14:47:39" +stdout_path = "log_train_coder/2023-07-10_14:47:39/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-10_14:47:39/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-10_14:47:39/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-10_14:47:39/sbatch.sh new file mode 100644 index 000000000..b16c5fa41 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-10_14:47:39/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-10_14:47:39/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-10_14:47:39/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-11_15:31:59/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-11_15:31:59/config.cfg new file mode 100644 index 000000000..01404eb60 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-11_15:31:59/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-11_15:31:59" +stdout_path = "log_train_coder/2023-07-11_15:31:59/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-11_15:31:59/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-11_15:31:59/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-11_15:31:59/sbatch.sh new file mode 100644 index 000000000..efae43f97 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-11_15:31:59/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-11_15:31:59/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-11_15:31:59/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-12_15:42:51/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-12_15:42:51/config.cfg new file mode 100644 index 000000000..991526d27 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-12_15:42:51/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-12_15:42:51" +stdout_path = "log_train_coder/2023-07-12_15:42:51/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-12_15:42:51/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-12_15:42:51/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-12_15:42:51/sbatch.sh new file mode 100644 index 000000000..7692efd8d --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-12_15:42:51/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-12_15:42:51/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-12_15:42:51/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-17_12:17:49/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-17_12:17:49/config.cfg new file mode 100644 index 000000000..8ce67533a --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-17_12:17:49/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-17_12:17:49" +stdout_path = "log_train_coder/2023-07-17_12:17:49/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-17_12:17:49/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-17_12:17:49/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-17_12:17:49/sbatch.sh new file mode 100644 index 000000000..0dab12cc4 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-17_12:17:49/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-17_12:17:49/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-17_12:17:49/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-19_15:24:15/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-19_15:24:15/config.cfg new file mode 100644 index 000000000..abd5ddae8 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-19_15:24:15/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-19_15:24:15" +stdout_path = "log_train_coder/2023-07-19_15:24:15/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-19_15:24:15/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-19_15:24:15/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-19_15:24:15/sbatch.sh new file mode 100644 index 000000000..6b503dc36 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-19_15:24:15/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-19_15:24:15/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-19_15:24:15/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-21_09:07:59/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-21_09:07:59/config.cfg new file mode 100644 index 000000000..ed7765f3b --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-21_09:07:59/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-21_09:07:59" +stdout_path = "log_train_coder/2023-07-21_09:07:59/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-21_09:07:59/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-21_09:07:59/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-21_09:07:59/sbatch.sh new file mode 100644 index 000000000..730e5ea2b --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-21_09:07:59/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-21_09:07:59/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-21_09:07:59/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-24_07:41:05/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-24_07:41:05/config.cfg new file mode 100644 index 000000000..ce41a51d0 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-24_07:41:05/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-24_07:41:05" +stdout_path = "log_train_coder/2023-07-24_07:41:05/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-24_07:41:05/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-24_07:41:05/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-24_07:41:05/sbatch.sh new file mode 100644 index 000000000..3f57f42dd --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-24_07:41:05/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-24_07:41:05/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-24_07:41:05/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-24_09:35:24/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-24_09:35:24/config.cfg new file mode 100644 index 000000000..b32d76599 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-24_09:35:24/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-24_09:35:24" +stdout_path = "log_train_coder/2023-07-24_09:35:24/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-24_09:35:24/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-24_09:35:24/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-24_09:35:24/sbatch.sh new file mode 100644 index 000000000..91fa89096 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-24_09:35:24/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-24_09:35:24/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-24_09:35:24/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:10:30/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:10:30/config.cfg new file mode 100644 index 000000000..210c588e2 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:10:30/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-25_14:10:30" +stdout_path = "log_train_coder/2023-07-25_14:10:30/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-25_14:10:30/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:10:30/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:10:30/sbatch.sh new file mode 100644 index 000000000..d3be7688e --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:10:30/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-25_14:10:30/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-25_14:10:30/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:10:58/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:10:58/config.cfg new file mode 100644 index 000000000..7978f212e --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:10:58/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-25_14:10:58" +stdout_path = "log_train_coder/2023-07-25_14:10:58/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-25_14:10:58/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:10:58/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:10:58/sbatch.sh new file mode 100644 index 000000000..49ba426da --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:10:58/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-25_14:10:58/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-25_14:10:58/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:13:54/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:13:54/config.cfg new file mode 100644 index 000000000..fe8789d09 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:13:54/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "log_train_coder/2023-07-25_14:13:54" +stdout_path = "log_train_coder/2023-07-25_14:13:54/slurm-%j-stdout.log" +stderr_path = "log_train_coder/2023-07-25_14:13:54/slurm-%j-stderr.log" +copy_scratch = true +python = "/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:13:54/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:13:54/sbatch.sh new file mode 100644 index 000000000..333d467bf --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-07-25_14:13:54/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=log_train_coder/2023-07-25_14:13:54/slurm-%j-stdout.log +#SBATCH --error=log_train_coder/2023-07-25_14:13:54/slurm-%j-stderr.log +export PATH=/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/envs/pierrenv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/export/home/cse200093/Jacques_Bio/normalisation/py_files' +python /export/home/cse200093/Jacques_Bio/normalisation/py_files/train.py --umls_dir /export/home/cse200093/deep_mlg_normalization/resources/umls/2021AB/ --model_name_or_path /export/home/cse200093/Jacques_Bio/data_bio/coder_output --output_dir /export/home/cse200093/Jacques_Bio/data_bio/coder_output --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:14:27/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:14:27/config.cfg new file mode 100644 index 000000000..98c52d2eb --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:14:27/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:14:27" +stdout_path = "BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:14:27/slurm-%j-stdout.log" +stderr_path = "BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:14:27/slurm-%j-stderr.log" +copy_scratch = true +python = "/data/scratch/cse200093/BioMedics/.venv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:14:27/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:14:27/sbatch.sh new file mode 100644 index 000000000..8944674d7 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:14:27/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:14:27/slurm-%j-stdout.log +#SBATCH --error=BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:14:27/slurm-%j-stderr.log +export PATH=/data/scratch/cse200093/BioMedics/.venv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/condabin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/data/scratch/cse200093' +python BioMedics/normalisation/py_files/train.py --umls_dir BioMedics/data/umls/2021AB/ --model_name_or_path word-embedding/coder_eds --output_dir word-embedding/coder_eds --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:17:42/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:17:42/config.cfg new file mode 100644 index 000000000..f13fb4dd4 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:17:42/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:17:42" +stdout_path = "BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:17:42/slurm-%j-stdout.log" +stderr_path = "BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:17:42/slurm-%j-stderr.log" +copy_scratch = true +python = "/data/scratch/cse200093/BioMedics/.venv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:17:42/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:17:42/sbatch.sh new file mode 100644 index 000000000..7986f55ea --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:17:42/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:17:42/slurm-%j-stdout.log +#SBATCH --error=BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:17:42/slurm-%j-stderr.log +export PATH=/data/scratch/cse200093/BioMedics/.venv/bin:/data/scratch/cse200093/BioMedics/.venv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/condabin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/data/scratch/cse200093' +python BioMedics/normalisation/py_files/train.py --umls_dir BioMedics/data/umls/2021AB/ --model_name_or_path word-embedding/coder_eds --output_dir word-embedding/coder_eds --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:19:16/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:19:16/config.cfg new file mode 100644 index 000000000..48dba8d5a --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:19:16/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:19:16" +stdout_path = "BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:19:16/slurm-%j-stdout.log" +stderr_path = "BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:19:16/slurm-%j-stderr.log" +copy_scratch = true +python = "/data/scratch/cse200093/BioMedics/.venv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:19:16/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:19:16/sbatch.sh new file mode 100644 index 000000000..3e59882f9 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-11-23_19:19:16/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:19:16/slurm-%j-stdout.log +#SBATCH --error=BioMedics/normalisation/py_files/log_train_coder/2023-11-23_19:19:16/slurm-%j-stderr.log +export PATH=/data/scratch/cse200093/BioMedics/.venv/bin:/data/scratch/cse200093/BioMedics/.venv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/condabin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/data/scratch/cse200093' +python BioMedics/normalisation/py_files/train.py --umls_dir BioMedics/data/umls/2021AB/ --model_name_or_path word-embedding/coder_eds --output_dir word-embedding/coder_eds --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-11-27_13:49:17/config.cfg b/bash_scripts/Normalisation/log_train_coder/2023-11-27_13:49:17/config.cfg new file mode 100644 index 000000000..03c7eb448 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-11-27_13:49:17/config.cfg @@ -0,0 +1,18 @@ +[slurm] +job_name = "slurm-job-cse200093" +job_duration = "72:00:00" +gpu_type = "v100" +gpu_type_upper = "V100" +n_gpu = 1 +n_node = 1 +n_cpu = 5 +mem = 40000 +hdfs = false +log_path = "BioMedics/bash_scripts/Coder_model/log_train_coder/2023-11-27_13:49:17" +stdout_path = "BioMedics/bash_scripts/Coder_model/log_train_coder/2023-11-27_13:49:17/slurm-%j-stdout.log" +stderr_path = "BioMedics/bash_scripts/Coder_model/log_train_coder/2023-11-27_13:49:17/slurm-%j-stderr.log" +copy_scratch = true +python = "/data/scratch/cse200093/BioMedics/.venv/bin/python" +conda = null + +[slurm.conf] \ No newline at end of file diff --git a/bash_scripts/Normalisation/log_train_coder/2023-11-27_13:49:17/sbatch.sh b/bash_scripts/Normalisation/log_train_coder/2023-11-27_13:49:17/sbatch.sh new file mode 100644 index 000000000..3f4b2b0e9 --- /dev/null +++ b/bash_scripts/Normalisation/log_train_coder/2023-11-27_13:49:17/sbatch.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=slurm-job-cse200093 +#SBATCH --time 72:00:00 +#SBATCH --gres=gpu:v100:1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=5 +#SBATCH --mem=40000 +#SBATCH --partition gpuV100 +#SBATCH --container-image /scratch/images/sparkhadoop.sqsh --container-mounts=/export/home/$USER:/export/home/$USER,/export/home/share:/export/home/share,/data/scratch/$USER:/data/scratch/$USER --container-mount-home --container-writable --container-workdir=/ +#SBATCH --output=BioMedics/bash_scripts/Coder_model/log_train_coder/2023-11-27_13:49:17/slurm-%j-stdout.log +#SBATCH --error=BioMedics/bash_scripts/Coder_model/log_train_coder/2023-11-27_13:49:17/slurm-%j-stderr.log +export PATH=/data/scratch/cse200093/BioMedics/.venv/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/export/home/cse200093/.user_conda/miniconda/condabin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/accumulo-client/bin:/usr/hdp/current/atlas-server/bin:/usr/hdp/current/beacon-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/falcon-client/bin:/usr/hdp/current/flume-server/bin:/usr/hdp/current/hadoop-client/bin:/usr/hdp/current/hbase-client/bin:/usr/hdp/current/hadoop-hdfs-client/bin:/usr/hdp/current/hadoop-mapreduce-client/bin:/usr/hdp/current/hadoop-yarn-client/bin:/usr/hdp/current/hive-client/bin:/usr/hdp/current/hive-hcatalog/bin:/usr/hdp/current/hive-server2/bin:/usr/hdp/current/kafka-broker/bin:/usr/hdp/current/mahout-client/bin:/usr/hdp/current/oozie-client/bin:/usr/hdp/current/oozie-server/bin:/usr/hdp/current/phoenix-client/bin:/usr/hdp/current/pig-client/bin:/usr/hdp/share/hst/hst-agent/python-wrap:/usr/hdp/current/slider-client/bin:/usr/hdp/current/sqoop-client/bin:/usr/hdp/current/sqoop-server/bin:/usr/hdp/current/storm-slider-client/bin:/usr/hdp/current/zookeeper-client/bin:/usr/hdp/current/zookeeper-server/bin:/export/home/opt/jupyterhub/conda/bin:/export/home/opt/jupyterhub/node/bin:/export/home/opt/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/hdp/current/spark-2.4.3-client/bin:/usr/local/hadoop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/opt/apps/texlive-20190227/2018/bin/x86_64-linux:/export/home/cse200093/.local/bin:/export/home/cse200093/bin:$PATH +cd '/data/scratch/cse200093' +python BioMedics/normalisation/training/train.py --umls_dir BioMedics/data/umls/2021AB/ --model_name_or_path word-embedding/coder_eds --output_dir word-embedding/coder_eds --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr \ No newline at end of file diff --git a/bash_scripts/Normalisation/train_coder.sh b/bash_scripts/Normalisation/train_coder.sh new file mode 100755 index 000000000..934075b76 --- /dev/null +++ b/bash_scripts/Normalisation/train_coder.sh @@ -0,0 +1,7 @@ +source $HOME/.user_conda/miniconda/etc/profile.d/conda.sh + +cd ~/scratch +source BioMedics/.venv/bin/activate +conda deactivate + +eds-toolbox slurm submit --config BioMedics/normalisation/training/train_coder_slurm.cfg -c "python BioMedics/normalisation/training/train.py --umls_dir BioMedics/data/umls/2021AB/ --model_name_or_path word-embedding/coder_eds --output_dir word-embedding/coder_eds --gradient_accumulation_steps 8 --train_batch_size 1024 --lang eng_fr" diff --git a/demo/requirements.txt b/demo/requirements.txt deleted file mode 100644 index 49895c26c..000000000 --- a/demo/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -git+https://github.com/aphp/edsnlp.git -streamlit diff --git a/docs/pipelines/misc/measurements.md b/docs/pipelines/misc/measurements.md deleted file mode 100644 index 4630f993c..000000000 --- a/docs/pipelines/misc/measurements.md +++ /dev/null @@ -1,135 +0,0 @@ -# Measurements - -The `eds.measurements` pipeline's role is to detect and normalise numerical measurements within a medical document. -We use simple regular expressions to extract and normalize measurements, and use `Measurement` classes to store them. - -!!! warning - - The ``measurements`` pipeline is still in active development and has not been rigorously validated. - If you come across a measurement expression that goes undetected, please file an issue ! - -## Scope - -The `eds.measurements` pipeline can extract simple (eg `3cm`) measurements. -It can detect elliptic enumerations (eg `32, 33 et 34kg`) of measurements of the same type and split the measurements accordingly. - -The normalized value can then be accessed via the `span._.value` attribute and converted on the fly to a desired unit. - -The current pipeline annotates the following measurements out of the box: - -| Measurement name | Example | -| ---------------- | ---------------------- | -| `eds.size` | `1m50`, `1.50m` | -| `eds.weight` | `12kg`, `1kg300` | -| `eds.bmi` | `BMI: 24`, `24 kg.m-2` | -| `eds.volume` | `2 cac`, `8ml` | - -## Usage - -```python -import spacy - -nlp = spacy.blank("eds") -nlp.add_pipe( - "eds.measurements", config=dict(measurements=["eds.size", "eds.weight", "eds.bmi"]) -) - -text = ( - "Le patient est admis hier, fait 1m78 pour 76kg. " - "Les deux nodules bénins sont larges de 1,2 et 2.4mm. " - "BMI: 24 " -) - -doc = nlp(text) - -measurements = doc.spans["measurements"] - -measurements -# Out: [1m78, 76kg, 1,2, 2.4mm, 24] - -measurements[0] -# Out: 1m78 - -str(measurements[0]._.value) -# Out: '1.78 m' - -measurements[0]._.value.cm -# Out: 178.0 - -measurements[2] -# Out: 1,2 - -str(measurements[2]._.value) -# Out: '1.2 mm' - -str(measurements[2]._.value.mm) -# Out: 1.2 - -measurements[4] -# Out: 24 - -str(measurements[4]._.value) -# Out: '24.0 kg_per_m2' - -str(measurements[4]._.value.kg_per_m2) -# Out: 24.0 -``` - -## Custom measurement - -You can declare custom measurements by changing the patterns - -```python -import spacy - -nlp = spacy.blank("eds") -nlp.add_pipe( - "eds.measurements", - config=dict( - measurements={ - # this name will be used to define the labels of the matched entities - "my_custom_surface_measurement": { - # This measurement unit is homogenous to square meters - "unit": "m2", - # To handle cases like "surface: 1.8" (implied m2), we can use - # unitless patterns - "unitless_patterns": [ - { - "terms": ["surface", "aire"], - "ranges": [ - { - "unit": "m2", - "min": 0, - "max": 9, - } - ], - } - ], - }, - } - ), -) -``` - -## Declared extensions - -The `eds.measurements` pipeline declares a single [spaCy extension](https://spacy.io/usage/processing-pipelines#custom-components-attributes) on the `Span` object, -the `value` attribute that is a `Measurement` instance. - -## Configuration - -The pipeline can be configured using the following parameters : - -| Parameter | Explanation | Default | -| ----------------- | -------------------------------------------------------------------------- | -------------------------------------------------------------------- | -| `measurements` | A list or dict of the measurements to extract | `["eds.size", "eds.weight", "eds.angle"]` | -| `units_config` | A dict describing the units with lexical patterns, dimensions, scales, ... | ... | -| `number_terms` | A dict describing the textual forms of common numbers | ... | -| `stopwords` | A list of stopwords that do not matter when placed between a unitless trigger | ... | -| `unit_divisors` | A list of terms used to divide two units (like: m / s) | ... | -| `ignore_excluded` | Whether to ignore excluded tokens for matching | `False` | -| `attr` | spaCy attribute to match on, eg `NORM` or `TEXT` | `"NORM"` | - -## Authors and citation - -The `eds.measurements` pipeline was developed by AP-HP's Data Science team. diff --git a/CITATION.cff b/edsnlp/CITATION.cff similarity index 100% rename from CITATION.cff rename to edsnlp/CITATION.cff diff --git a/LICENSE b/edsnlp/LICENSE similarity index 100% rename from LICENSE rename to edsnlp/LICENSE diff --git a/Makefile b/edsnlp/Makefile similarity index 100% rename from Makefile rename to edsnlp/Makefile diff --git a/README.md b/edsnlp/README.md similarity index 100% rename from README.md rename to edsnlp/README.md diff --git a/edsnlp/__init__.py b/edsnlp/__init__.py deleted file mode 100644 index 3a232eac8..000000000 --- a/edsnlp/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -""" -EDS-NLP -""" - -from . import patch_spacy_dot_components # isort: skip -from pathlib import Path - -from . import extensions -from .language import * - -__version__ = "0.8.0" - -BASE_DIR = Path(__file__).parent diff --git a/changelog.md b/edsnlp/changelog.md similarity index 100% rename from changelog.md rename to edsnlp/changelog.md diff --git a/contributing.md b/edsnlp/contributing.md similarity index 100% rename from contributing.md rename to edsnlp/contributing.md diff --git a/demo/app.py b/edsnlp/demo/app.py similarity index 100% rename from demo/app.py rename to edsnlp/demo/app.py diff --git a/docs/advanced-tutorials/fastapi.md b/edsnlp/docs/advanced-tutorials/fastapi.md similarity index 100% rename from docs/advanced-tutorials/fastapi.md rename to edsnlp/docs/advanced-tutorials/fastapi.md diff --git a/docs/advanced-tutorials/index.md b/edsnlp/docs/advanced-tutorials/index.md similarity index 100% rename from docs/advanced-tutorials/index.md rename to edsnlp/docs/advanced-tutorials/index.md diff --git a/docs/advanced-tutorials/word-vectors.md b/edsnlp/docs/advanced-tutorials/word-vectors.md similarity index 100% rename from docs/advanced-tutorials/word-vectors.md rename to edsnlp/docs/advanced-tutorials/word-vectors.md diff --git a/docs/assets/logo/aphp-blue.svg b/edsnlp/docs/assets/logo/aphp-blue.svg similarity index 100% rename from docs/assets/logo/aphp-blue.svg rename to edsnlp/docs/assets/logo/aphp-blue.svg diff --git a/docs/assets/logo/aphp-white.svg b/edsnlp/docs/assets/logo/aphp-white.svg similarity index 100% rename from docs/assets/logo/aphp-white.svg rename to edsnlp/docs/assets/logo/aphp-white.svg diff --git a/docs/assets/logo/edsnlp.svg b/edsnlp/docs/assets/logo/edsnlp.svg similarity index 100% rename from docs/assets/logo/edsnlp.svg rename to edsnlp/docs/assets/logo/edsnlp.svg diff --git a/docs/assets/stylesheets/extra.css b/edsnlp/docs/assets/stylesheets/extra.css similarity index 100% rename from docs/assets/stylesheets/extra.css rename to edsnlp/docs/assets/stylesheets/extra.css diff --git a/docs/assets/templates/python/material/docstring.html b/edsnlp/docs/assets/templates/python/material/docstring.html similarity index 100% rename from docs/assets/templates/python/material/docstring.html rename to edsnlp/docs/assets/templates/python/material/docstring.html diff --git a/docs/assets/templates/python/material/docstring/parameters.html b/edsnlp/docs/assets/templates/python/material/docstring/parameters.html similarity index 100% rename from docs/assets/templates/python/material/docstring/parameters.html rename to edsnlp/docs/assets/templates/python/material/docstring/parameters.html diff --git a/docs/assets/templates/python/material/function.html b/edsnlp/docs/assets/templates/python/material/function.html similarity index 100% rename from docs/assets/templates/python/material/function.html rename to edsnlp/docs/assets/templates/python/material/function.html diff --git a/docs/assets/termynal/termynal.css b/edsnlp/docs/assets/termynal/termynal.css similarity index 100% rename from docs/assets/termynal/termynal.css rename to edsnlp/docs/assets/termynal/termynal.css diff --git a/docs/assets/termynal/termynal.js b/edsnlp/docs/assets/termynal/termynal.js similarity index 100% rename from docs/assets/termynal/termynal.js rename to edsnlp/docs/assets/termynal/termynal.js diff --git a/edsnlp/docs/changelog.md b/edsnlp/docs/changelog.md new file mode 100644 index 000000000..0e156fb04 --- /dev/null +++ b/edsnlp/docs/changelog.md @@ -0,0 +1,304 @@ +# Changelog + +## Unreleased + +### Added + +- Add `eds.spaces` (or `eds.normalizer` with `spaces=True`) to detect space tokens, and add `ignore_space_tokens` to `EDSPhraseMatcher` and `SimstringMatcher` to skip them +- Add `ignore_space_tokens` option in most components + +## v0.8.0 (2023-03-09) + +### Added +- Tokenization exceptions (`Mr.`, `Dr.`, `Mrs.`) and non end-of-sentence periods are now tokenized with the next letter in the `eds` tokenizer + +### Changed + +- Disable `EDSMatcher` preprocessing auto progress tracking by default +- Moved dependencies to a single pyproject.toml: support for `pip install -e '.[dev,docs,setup]'` +- ADICAP matcher now allow dot separators (e.g. `B.H.HP.A7A0`) + +### Fixed + +- `eds.adicap` : reparsed the dictionnary used to decode the ADICAP codes (some of them were wrongly decoded) + +## v0.7.4 (2022-12-12) + +### Added +- `eds.history` : Add the option to consider only the closest dates in the sentence (dates inside the boundaries and if there is not, it takes the closest date in the entire sentence). +- `eds.negation` : It takes into account following past participates and preceding infinitives. +- `eds.hypothesis`: It takes into account following past participates hypothesis verbs. +- `eds.negation` & `eds.hypothesis` : Introduce new patterns and remove unnecessary patterns. +- `eds.dates` : Add a pattern for preceding relative dates (ex: l'embolie qui est survenue **à 10 jours**). +- Improve patterns in the `eds.pollution` component to account for multiline footers +- Add `QuickExample` object to quickly try a pipeline. +- Add UMLS terminology matcher `eds.umls` +- New `RegexMatcher` method to create spans from groupdicts +- New `eds.dates` option to disable time detection + +### Changed + +- Improve date detection by removing false positives + +### Fixed + +- `eds.hypothesis` : Remove too generic patterns. +- `EDSTokenizer` : It now tokenizes `"rechereche d'"` as `["recherche", "d'"]`, instead of `["recherche", "d", "'"]`. +- Fix small typos in the documentation and in the docstring. +- Harmonize processing utils (distributed custom_pipe) to have the same API for Pandas and Pyspark +- Fix BratConnector file loading issues with complex file hierarchies + +## v0.7.2 (2022-10-26) + +### Added + +- Improve the `eds.history` component by taking into account the date extracted from `eds.dates` component. +- New pop up when you click on the copy icon in the termynal widget (docs). +- Add NER `eds.elston-ellis` pipeline to identify Elston Ellis scores +- Add flags=re.MULTILINE to `eds.pollution` and change pattern of footer + +### Fixed + +- Remove the warning in the ``eds.sections`` when ``eds.normalizer`` is in the pipe. +- Fix filter_spans for strictly nested entities +- Fill eds.remove-lowercase "assign" metadata to run the pipeline during EDSPhraseMatcher preprocessing +- Allow back spaCy components whose name contains a dot (forbidden since spaCy v3.4.2) for backward compatibility. + +## v0.7.1 (2022-10-13) + +### Added + +- Add new patterns (footer, web entities, biology tables, coding sections) to pipeline normalisation (pollution) + +### Changed + +- Improved TNM detection algorithm +- Account for more modifiers in ADICAP codes detection + +### Fixed + +- Add nephew, niece and daughter to family qualifier patterns +- EDSTokenizer (`spacy.blank('eds')`) now recognizes non-breaking whitespaces as spaces and does not split float numbers +- `eds.dates` pipeline now allows new lines as space separators in dates + +## v0.7.0 (2022-09-06) + +### Added + +- New nested NER trainable `nested_ner` pipeline component +- Support for nested entities and attributes in BratDataConnector +- Pytorch wrappers and experimental training utils +- Add attribute `section` to entities +- Add new cases for separator pattern when components of the TNM score are separated by a forward slash +- Add NER `eds.adicap` pipeline to identify ADICAP codes +- Add patterns to `pollution` pipeline and simplifies activating or deactivating specific patterns + +### Changed +- Simplified the configuration scheme of the `pollution` pipeline +- Update of the `ContextualMatcher` (and all pipelines depending on it), rendering it more flexible to use +- Rename R component of score TNM as "resection_completeness" + +### Fixed + +- Prevent section titles from capturing surrounding tokens, causing overlaps (#113) +- Enhance existing patterns for section detection and add patterns for previously ignored sections (introduction, evolution, modalites de sortie, vaccination) . +- Fix explain mode, which was always triggered, in `eds.history` factory. +- Fix test in `eds.sections`. Previously, no check was done +- Remove SOFA scores spurious span suffixes + +## v0.6.2 (2022-08-02) + +### Added + +- New `SimstringMatcher` matcher to perform fuzzy term matching, and `algorithm` parameter in terminology components and `eds.matcher` component +- Makefile to install,test the application and see the documentation + +### Changed + +- Add consultation date pattern "CS", and False Positive patterns for dates (namely phone numbers and pagination). +- Update the pipeline score `eds.TNM`. Now it is possible to return a dictionary where the results are either `str` or `int` values + +### Fixed + +- Add new patterns to the negation qualifier +- Numpy header issues with binary distributed packages +- Simstring dependency on Windows + +## v0.6.1 (2022-07-11) + +### Added + +- Now possible to provide regex flags when using the RegexMatcher +- New `ContextualMatcher` pipe, aiming at replacing the `AdvancedRegex` pipe. +- New `as_ents` parameter for `eds.dates`, to save detected dates as entities + +### Changed + +- Faster `eds.sentences` pipeline component with Cython +- Bump version of Pydantic in `requirements.txt` to 1.8.2 to handle an incompatibility with the ContextualMatcher +- Optimise space requirements by using `.csv.gz` compression for verbs + +### Fixed + +- `eds.sentences` behaviour with dot-delimited dates (eg `02.07.2022`, which counted as three sentences) + +## v0.6.0 (2022-06-17) + +### Added + +- Complete revamp of the measurements detection pipeline, with better parsing and more exhaustive matching +- Add new functionality to the method `Span._.date.to_datetime()` to return a result infered from context for those cases with missing information. +- Force a batch size of 2000 when distributing a pipeline with Spark +- New patterns to pipeline `eds.dates` to identify cases where only the month is mentioned +- New `eds.terminology` component for generic terminology matching, using the `kb_id_` attribute to store fine-grained entity label +- New `eds.cim10` terminology matching pipeline +- New `eds.drugs` terminology pipeline that maps brand names and active ingredients to a unique [ATC](https://en.wikipedia.org/wiki/Anatomical_Therapeutic_Chemical_Classification_System) code + +## v0.5.3 (2022-05-04) + +### Added + +- Support for strings in the example utility +- [TNM](https://en.wikipedia.org/wiki/TNM_staging_system) detection and normalisation with the `eds.TNM` pipeline +- Support for arbitrary callback for Pandas multiprocessing, with the `callback` argument + +## v0.5.2 (2022-04-29) + +### Added + +- Support for chained attributes in the `processing` pipelines +- Colour utility with the category20 colour palette + +### Fixed + +- Correct a REGEX on the date detector (both `nov` and `nov.` are now detected, as all other months) + +## v0.5.1 (2022-04-11) + +### Fixed + +- Updated Numpy requirements to be compatible with the `EDSPhraseMatcher` + +## v0.5.0 (2022-04-08) + +### Added + +- New `eds` language to better fit French clinical documents and improve speed +- Testing for markdown codeblocks to make sure the documentation is actually executable + +### Changed + +- Complete revamp of the date detection pipeline, with better parsing and more exhaustive matching +- Reimplementation of the EDSPhraseMatcher in Cython, leading to a x15 speed increase + +## v0.4.4 + +- Add `measures` pipeline +- Cap Jinja2 version to fix mkdocs +- Adding the possibility to add context in the processing module +- Improve the speed of char replacement pipelines (accents and quotes) +- Improve the speed of the regex matcher + +## v0.4.3 + +- Fix regex matching on spans. +- Add fast_parse in date pipeline. +- Add relative_date information parsing + +## v0.4.2 + +- Fix issue with `dateparser` library (see scrapinghub/dateparser#1045) +- Fix `attr` issue in the `advanced-regex` pipelin +- Add documentation for `eds.covid` +- Update the demo with an explanation for the regex + +## v0.4.1 + +- Added support to Koalas DataFrames in the `edsnlp.processing` pipe. +- Added `eds.covid` NER pipeline for detecting COVID19 mentions. + +## v0.4.0 + +- Profound re-write of the normalisation : + - The custom attribute `CUSTOM_NORM` is completely abandoned in favour of a more _spacyfic_ alternative + - The `normalizer` pipeline modifies the `NORM` attribute in place + - Other pipelines can modify the `Token._.excluded` custom attribute +- EDS regex and term matchers can ignore excluded tokens during matching, effectively adding a second dimension to normalisation (choice of the attribute and possibility to skip _pollution_ tokens regardless of the attribute) +- Matching can be performed on custom attributes more easily +- Qualifiers are regrouped together within the `edsnlp.qualifiers` submodule, the inheritance from the `GenericMatcher` is dropped. +- `edsnlp.utils.filter.filter_spans` now accepts a `label_to_remove` parameter. If set, only corresponding spans are removed, along with overlapping spans. Primary use-case: removing pseudo cues for qualifiers. +- Generalise the naming convention for extensions, which keep the same name as the pipeline that created them (eg `Span._.negation` for the `eds.negation` pipeline). The previous convention is kept for now, but calling it issues a warning. +- The `dates` pipeline underwent some light formatting to increase robustness and fix a few issues +- A new `consultation_dates` pipeline was added, which looks for dates preceded by expressions specific to consultation dates +- In rule-based processing, the `terms.py` submodule is replaced by `patterns.py` to reflect the possible presence of regular expressions +- Refactoring of the architecture : + - pipelines are now regrouped by type (`core`, `ner`, `misc`, `qualifiers`) + - `matchers` submodule contains `RegexMatcher` and `PhraseMatcher` classes, which interact with the normalisation + - `multiprocessing` submodule contains `spark` and `local` multiprocessing tools + - `connectors` contains `Brat`, `OMOP` and `LabelTool` connectors + - `utils` contains various utilities +- Add entry points to make pipeline usable directly, removing the need to import `edsnlp.components`. +- Add a `eds` namespace for components: for instance, `negation` becomes `eds.negation`. Using the former pipeline name still works, but issues a deprecation warning. +- Add 3 score pipelines related to emergency +- Add a helper function to use a spaCy pipeline as a Spark UDF. +- Fix alignment issues in RegexMatcher +- Change the alignment procedure, dropping clumsy `numpy` dependency in favour of `bisect` +- Change the name of `eds.antecedents` to `eds.history`. + Calling `eds.antecedents` still works, but issues a deprecation warning and support will be removed in a future version. +- Add a `eds.covid` component, that identifies mentions of COVID +- Change the demo, to include NER components + +## v0.3.2 + +- Major revamp of the normalisation. + - The `normalizer` pipeline **now adds atomic components** (`lowercase`, `accents`, `quotes`, `pollution` & `endlines`) to the processing pipeline, and compiles the results into a new `Doc._.normalized` extension. The latter is itself a spaCy `Doc` object, wherein tokens are normalised and pollution tokens are removed altogether. Components that match on the `CUSTOM_NORM` attribute process the `normalized` document, and matches are brought back to the original document using a token-wise mapping. + - Update the `RegexMatcher` to use the `CUSTOM_NORM` attribute + - Add an `EDSPhraseMatcher`, wrapping spaCy's `PhraseMatcher` to enable matching on `CUSTOM_NORM`. + - Update the `matcher` and `advanced` pipelines to enable matching on the `CUSTOM_NORM` attribute. +- Add an OMOP connector, to help go back and forth between OMOP-formatted pandas dataframes and spaCy documents. +- Add a `reason` pipeline, that extracts the reason for visit. +- Add an `endlines` pipeline, that classifies newline characters between spaces and actual ends of line. +- Add possibility to annotate within entities for qualifiers (`negation`, `hypothesis`, etc), ie if the cue is within the entity. Disabled by default. + +## v0.3.1 + +- Update `dates` to remove miscellaneous bugs. +- Add `isort` pre-commit hook. +- Improve performance for `negation`, `hypothesis`, `antecedents`, `family` and `rspeech` by using spaCy's `filter_spans` and our `consume_spans` methods. +- Add proposition segmentation to `hypothesis` and `family`, enhancing results. + +## v0.3.0 + +- Renamed `generic` to `matcher`. This is a non-breaking change for the average user, adding the pipeline is still : + + + + ```python + nlp.add_pipe("matcher", config=dict(terms=dict(maladie="maladie"))) + ``` + +- Removed `quickumls` pipeline. It was untested, unmaintained. Will be added back in a future release. +- Add `score` pipeline, and `charlson`. +- Add `advanced-regex` pipeline +- Corrected bugs in the `negation` pipeline + +## v0.2.0 + +- Add `negation` pipeline +- Add `family` pipeline +- Add `hypothesis` pipeline +- Add `antecedents` pipeline +- Add `rspeech` pipeline +- Refactor the library : + - Remove the `rules` folder + - Add a `pipelines` folder, containing one subdirectory per component + - Every component subdirectory contains a module defining the component, and a module defining a factory, plus any other utilities (eg `terms.py`) + +## v0.1.0 + +First working version. Available pipelines : + +- `section` +- `sentences` +- `normalization` +- `pollution` diff --git a/edsnlp/docs/contributing.md b/edsnlp/docs/contributing.md new file mode 100644 index 000000000..2ca7634f2 --- /dev/null +++ b/edsnlp/docs/contributing.md @@ -0,0 +1,127 @@ +# Contributing to EDS-NLP + +We welcome contributions ! There are many ways to help. For example, you can: + +1. Help us track bugs by filing issues +2. Suggest and help prioritise new functionalities +3. Develop a new pipeline ! Fork the project and propose a new functionality through a pull request +4. Help us make the library as straightforward as possible, by simply asking questions on whatever does not seem clear to you. + +## Development installation + +To be able to run the test suite, run the example notebooks and develop your own pipeline, you should clone the repo and install it locally. + +
+ +```console +# Clone the repository and change directory +$ git clone https://github.com/aphp/edsnlp.git +---> 100% +$ cd edsnlp + +# Optional: create a virtual environment +$ python -m venv venv +$ source venv/bin/activate + +# Install the package with common, dev, setup dependencies in editable mode +$ pip install -e '.[dev,docs,setup]' +# And build resources +$ python scripts/conjugate_verbs.py +``` + +
+ +To make sure the pipeline will not fail because of formatting errors, we added pre-commit hooks using the `pre-commit` Python library. To use it, simply install it: + +
+ +```console +$ pre-commit install +``` + +
+ +The pre-commit hooks defined in the [configuration](https://github.com/aphp/edsnlp/blob/master/.pre-commit-config.yaml) will automatically run when you commit your changes, letting you know if something went wrong. + +The hooks only run on staged changes. To force-run it on all files, run: + +
+ +```console +$ pre-commit run --all-files +---> 100% +color:green All good ! +``` + +
+ +## Proposing a merge request + +At the very least, your changes should : + +- Be well-documented ; +- Pass every tests, and preferably implement its own ; +- Follow the style guide. + +### Testing your code + +We use the Pytest test suite. + +The following command will run the test suite. Writing your own tests is encouraged ! + +```shell +python -m pytest +``` + +!!! warning "Testing Cython code" + + Make sure the package is [installed in editable mode](#development-installation). + Otherwise `Pytest` won't be able to find the Cython modules. + +Should your contribution propose a bug fix, we require the bug be thoroughly tested. + +### Architecture of a pipeline + +Pipelines should follow the same pattern : + +``` +edsnlp/pipelines/ + |-- .py # Defines the component logic + |-- patterns.py # Defines matched patterns + |-- factory.py # Declares the pipeline to spaCy +``` + +### Style Guide + +We use [Black](https://github.com/psf/black) to reformat the code. While other formatter only enforce PEP8 compliance, Black also makes the code uniform. In short : + +> Black reformats entire files in place. It is not configurable. + +Moreover, the CI/CD pipeline enforces a number of checks on the "quality" of the code. To wit, non black-formatted code will make the test pipeline fail. We use `pre-commit` to keep our codebase clean. + +Refer to the [development install tutorial](#development-installation) for tips on how to format your files automatically. +Most modern editors propose extensions that will format files on save. + +### Documentation + +Make sure to document your improvements, both within the code with comprehensive docstrings, +as well as in the documentation itself if need be. + +We use `MkDocs` for EDS-NLP's documentation. You can checkout the changes you make with: + +
+ +```console +# Install the requirements +$ pip install -e '.[docs]' +---> 100% +color:green Installation successful + +# Run the documentation +$ mkdocs serve +``` + +
+ +Go to [`localhost:8000`](http://localhost:8000) to see your changes. MkDocs watches for changes in the documentation folder +and automatically reloads the page. diff --git a/docs/index.md b/edsnlp/docs/index.md similarity index 100% rename from docs/index.md rename to edsnlp/docs/index.md diff --git a/docs/pipelines/architecture.md b/edsnlp/docs/pipelines/architecture.md similarity index 100% rename from docs/pipelines/architecture.md rename to edsnlp/docs/pipelines/architecture.md diff --git a/docs/pipelines/core/contextual-matcher.md b/edsnlp/docs/pipelines/core/contextual-matcher.md similarity index 100% rename from docs/pipelines/core/contextual-matcher.md rename to edsnlp/docs/pipelines/core/contextual-matcher.md diff --git a/docs/pipelines/core/endlines.md b/edsnlp/docs/pipelines/core/endlines.md similarity index 100% rename from docs/pipelines/core/endlines.md rename to edsnlp/docs/pipelines/core/endlines.md diff --git a/docs/pipelines/core/index.md b/edsnlp/docs/pipelines/core/index.md similarity index 100% rename from docs/pipelines/core/index.md rename to edsnlp/docs/pipelines/core/index.md diff --git a/docs/pipelines/core/matcher.md b/edsnlp/docs/pipelines/core/matcher.md similarity index 100% rename from docs/pipelines/core/matcher.md rename to edsnlp/docs/pipelines/core/matcher.md diff --git a/docs/pipelines/core/normalisation.md b/edsnlp/docs/pipelines/core/normalisation.md similarity index 100% rename from docs/pipelines/core/normalisation.md rename to edsnlp/docs/pipelines/core/normalisation.md diff --git a/docs/pipelines/core/resources/alignment.svg b/edsnlp/docs/pipelines/core/resources/alignment.svg similarity index 100% rename from docs/pipelines/core/resources/alignment.svg rename to edsnlp/docs/pipelines/core/resources/alignment.svg diff --git a/docs/pipelines/core/resources/span-alignment.svg b/edsnlp/docs/pipelines/core/resources/span-alignment.svg similarity index 100% rename from docs/pipelines/core/resources/span-alignment.svg rename to edsnlp/docs/pipelines/core/resources/span-alignment.svg diff --git a/docs/pipelines/core/sentences.md b/edsnlp/docs/pipelines/core/sentences.md similarity index 100% rename from docs/pipelines/core/sentences.md rename to edsnlp/docs/pipelines/core/sentences.md diff --git a/docs/pipelines/core/terminology.md b/edsnlp/docs/pipelines/core/terminology.md similarity index 100% rename from docs/pipelines/core/terminology.md rename to edsnlp/docs/pipelines/core/terminology.md diff --git a/docs/pipelines/index.md b/edsnlp/docs/pipelines/index.md similarity index 100% rename from docs/pipelines/index.md rename to edsnlp/docs/pipelines/index.md diff --git a/docs/pipelines/misc/consultation-dates.md b/edsnlp/docs/pipelines/misc/consultation-dates.md similarity index 100% rename from docs/pipelines/misc/consultation-dates.md rename to edsnlp/docs/pipelines/misc/consultation-dates.md diff --git a/docs/pipelines/misc/dates.md b/edsnlp/docs/pipelines/misc/dates.md similarity index 100% rename from docs/pipelines/misc/dates.md rename to edsnlp/docs/pipelines/misc/dates.md diff --git a/docs/pipelines/misc/index.md b/edsnlp/docs/pipelines/misc/index.md similarity index 100% rename from docs/pipelines/misc/index.md rename to edsnlp/docs/pipelines/misc/index.md diff --git a/edsnlp/docs/pipelines/misc/measurements.md b/edsnlp/docs/pipelines/misc/measurements.md new file mode 100644 index 000000000..5aeb5a850 --- /dev/null +++ b/edsnlp/docs/pipelines/misc/measurements.md @@ -0,0 +1,167 @@ +# Measurements + +The `eds.measurements` pipeline's role is to detect and normalise numerical measurements within a medical document. +We use simple regular expressions to extract and normalize measurements, and use `SimpleMeasurement` classes to store them. + +## Scope + +By default, the `eds.measurements` pipeline lets you match all measurements, i.e measurements in most units as well as unitless measurements. If a unit is not in our register, +then you can add It manually. If not, the measurement will be matched without Its unit. + +If you prefer matching specific measurements only, you can create your own measurement config anda set `all_measurements` parameter to `False`. Nevertheless, some default measurements configs are already provided out of the box: + +| Measurement name | Example | +| ---------------- | ---------------------- | +| `eds.size` | `1m50`, `1.50m` | +| `eds.weight` | `12kg`, `1kg300` | +| `eds.bmi` | `BMI: 24`, `24 kg.m-2` | +| `eds.volume` | `2 cac`, `8ml` | +| `eds.bool` | `positive`, `negatif` | + +The normalized value can then be accessed via the `span._.value` attribute and converted on the fly to a desired unit (eg `span._.value.g_per_cl` or `span._.value.kg_per_m3` for a density). + +The measurements that can be extracted can have one or many of the following characteristics: +- Unitless measurements +- Measurements with unit +- Measurements with range indication (escpecially < or >) +- Measurements with power + +The measurement can be written in many complex forms. Among them, this pipe can detect: +- Measurements with range indication, numerical value, power and units in many different orders and separated by customizable stop words +- Composed units (eg `1m50`) +- Measurement with "unitless patterns", i.e some textual information next to a numerical value which allows us to retrieve a unit even if It is not written (eg in the text `Height: 80`, this pipe will a detect the numlerical value `80`and match It to the unit `kg`) +- Elliptic enumerations (eg `32, 33 et 34mol`) of measurements of the same type and split the measurements accordingly + +## Usage + +This pipe works better with `eds.dates` and `eds.tables` pipe at the same time. These pipes let `eds.measurements` skip dates as measurements and make a specific matching for each table, benefitting of the structured data. + +The matched measurements are labeled with a default measurement name if available (eg `eds.size`), else `eds.measurement` if any measure is linked to the dimension of the measure's unit and if `all_measurements` is set to `True`. + +As said before, each matched measurement can be accessed via the `span._.value`. This gives you a `SimpleMeasurement` object with the following attributes : +- `value_range` ("<", "=" or ">") +- `value` +- `unit` +- `registry` (This attribute stores the entire unit config like the link between each unit, Its dimension like `length`, `quantity of matter`...) + +`SimpleMeasurement` objects are especially usefull when converting measurements to an other specified unit with the same dimension (eg densities stay densities). To do so, simply call your `SimpleMeasurement` followed by `.` + name of the usual unit abbreviation with `per` and `_` as separators (eg `object.kg_per_dm3`, `mol_per_l`, `g_per_cm2`). + +Moreover, for now, `SimpleMeasurement` objects can be manipulated with the following operations: +- compared with an other `SimpleMeasurement` object with the same dimension with automatic conversion (eg a density in kg_per_m3 and a density in g_per_l) +- summed with an other `SimpleMeasurement` object with the same dimension with automatic conversion +- substracted with an other `SimpleMeasurement` object with the same dimension with automatic conversion + +Note that for all operations listed above, different `value_range` attributes between two units do not matter: by default, the `value_range` of the first measurement is kept. + +Below is a complete example on a use case where we want to extract size, weigth and bmi measurements a simple text. + +```python +import spacy + +nlp = spacy.blank("eds") +nlp.add_pipe( + "eds.measurements", config=dict(measurements=["eds.size", "eds.weight", "eds.bmi"]) +) + +text = ( + "Le patient est admis hier, fait 1m78 pour 76kg. " + "Les deux nodules bénins sont larges de 1,2 et 2.4mm. " + "BMI: 24 " +) + +doc = nlp(text) + +measurements = doc.spans["measurements"] + +measurements +# Out: [1m78, 76kg, 1,2, 2.4mm, 24] + +measurements[0] +# Out: 1m78 + +str(measurements[0]._.value) +# Out: '1.78 m' + +measurements[0]._.value.cm +# Out: 178.0 + +measurements[2] +# Out: 1,2 + +str(measurements[2]._.value) +# Out: '1.2 mm' + +str(measurements[2]._.value.mm) +# Out: 1.2 + +measurements[4] +# Out: 24 + +str(measurements[4]._.value) +# Out: '24.0 kg_per_m2' + +str(measurements[4]._.value.kg_per_m2) +# Out: 24.0 +``` + +## Custom measurement + +You can declare custom measurements by changing the patterns. + +```python +import spacy + +nlp = spacy.blank("eds") +nlp.add_pipe( + "eds.measurements", + config=dict( + measurements={ + # this name will be used to define the labels of the matched entities + "my_custom_surface_measurement": { + # This measurement unit is homogenous to square meters + "unit": "m2", + # To handle cases like "surface: 1.8" (implied m2), we can use + # unitless patterns + "unitless_patterns": [ + { + "terms": ["surface", "aire"], + "ranges": [ + { + "unit": "m2", + "min": 0, + "max": 9, + } + ], + } + ], + }, + } + ), +) +``` + +## Declared extensions + +The `eds.measurements` pipeline declares a single [spaCy extension](https://spacy.io/usage/processing-pipelines#custom-components-attributes) on the `Span` object, +the `value` attribute that is a `SimpleMeasurement` instance. + +## Configuration + +The pipeline can be configured using the following parameters : + +| Parameter | Explanation | Default | +| ------------------------ | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | +| `measurements` | A list or dict of the measurements to extract | `None` # Extract measurements from all units | +| `units_config` | A dict describing the units with lexical patterns, dimensions, scales, ... | ... # Config of mostly all commonly used units | +| `number_terms` | A dict describing the textual forms of common numbers | ... # Config of mostly all commonly used textual forms of common numbers | +| `value_range_terms` | A dict describing the textual forms of ranges ("<", "=" or ">") | ... # Config of mostly all commonly used range terms | +| `stopwords_unitless` | A list of stopwords that do not matter when placed between a unitless trigger | `["par", "sur", "de", "a", ":", ",", "et"]` | +| `stopwords_measure_unit` | A list of stopwords that do not matter when placed between a measure and a unit | `["|", "¦", "…", "."]` | +| `measure_before_unit` | A bool to tell if the numerical value is usually placed before the unit | `["par", "sur", "de", "a", ":", ",", "et"]` | +| `unit_divisors` | A list of terms used to divide two units (like: m / s) | `["/", "par"]` | +| `ignore_excluded` | Whether to ignore excluded tokens for matching | `False` | +| `attr` | spaCy attribute to match on, eg `NORM` or `TEXT` | `"NORM"` | + +## Authors and citation + +The `eds.measurements` pipeline was developed by AP-HP's Data Science team. diff --git a/docs/pipelines/misc/reason.md b/edsnlp/docs/pipelines/misc/reason.md similarity index 100% rename from docs/pipelines/misc/reason.md rename to edsnlp/docs/pipelines/misc/reason.md diff --git a/docs/pipelines/misc/sections.md b/edsnlp/docs/pipelines/misc/sections.md similarity index 100% rename from docs/pipelines/misc/sections.md rename to edsnlp/docs/pipelines/misc/sections.md diff --git a/edsnlp/docs/pipelines/misc/tables.md b/edsnlp/docs/pipelines/misc/tables.md new file mode 100644 index 000000000..0acc54f9d --- /dev/null +++ b/edsnlp/docs/pipelines/misc/tables.md @@ -0,0 +1,105 @@ +# Tables + +The `eds.tables` pipeline's role is to detect tables present in a medical document. +We use simple regular expressions to extract tables like text. + +## Usage + +This pipe lets you match different forms of tables. They can have a frame or not, rows can be spread on multiple consecutive lines (in case of a bad parsing for example)... You can also indicate the presence of headers with the `col_names` and `row_names` boolean parameters. + +Each matched table is returned as a `Span` object. You can then access to an equivalent dictionnary formatted table with `table` extension or use `to_pandas_table()` to get the equivalent pandas DataFrame. The key of the dictionnary is determined as folowed: +- If `col_names` is True, then, the dictionnary keys are the names of the columns (str). +- Elif `row_names` is True, then, the dictionnary keys are the names (str). +- Else the dictionnary keys are indexes of the columns (int). + +`to_pandas_table()` can be customised with `as_spans` parameter. If set to `True`, then the pandas dataframe will contain the cells as spans, else the pandas dataframe will contain the cells as raw strings. + +```python +import spacy + +nlp = spacy.blank("fr") +nlp.add_pipe("eds.normalizer") +nlp.add_pipe("eds.tables") + +text = """ +SERVICE +MEDECINE INTENSIVE – +REANIMATION +Réanimation / Surveillance Continue +Médicale + +COMPTE RENDU D'HOSPITALISATION du 05/06/2020 au 10/06/2020 +Madame DUPONT Marie, née le 16/05/1900, âgée de 20 ans, a été hospitalisée en réanimation du +05/06/1920 au 10/06/1920 pour intoxication médicamenteuse volontaire. + + +Examens complémentaires +Hématologie +Numération +Leucocytes ¦x10*9/L ¦4.97 ¦4.09-11 +Hématies ¦x10*12/L¦4.68 ¦4.53-5.79 +Hémoglobine ¦g/dL ¦14.8 ¦13.4-16.7 +Hématocrite ¦% ¦44.2 ¦39.2-48.6 +VGM ¦fL ¦94.4 + ¦79.6-94 +TCMH ¦pg ¦31.6 ¦27.3-32.8 +CCMH ¦g/dL ¦33.5 ¦32.4-36.3 +Plaquettes ¦x10*9/L ¦191 ¦172-398 +VMP ¦fL ¦11.5 + ¦7.4-10.8 + +Sur le plan neurologique : Devant la persistance d'une confusion à distance de l'intoxication au +... + +2/2Pat : |F | | |Intitulé RCP + +""" + +doc = nlp(text) + +# A table span +table = doc.spans["tables"][0] +# Leucocytes ¦x10*9/L ¦4.97 ¦4.09-11 +# Hématies ¦x10*12/L¦4.68 ¦4.53-5.79 +# Hémoglobine ¦g/dL ¦14.8 ¦13.4-16.7 +# Hématocrite ¦% ¦44.2 ¦39.2-48.6 +# VGM ¦fL ¦94.4 + ¦79.6-94 +# TCMH ¦pg ¦31.6 ¦27.3-32.8 +# CCMH ¦g/dL ¦33.5 ¦32.4-36.3 +# Plaquettes ¦x10*9/L ¦191 ¦172-398 +# VMP ¦fL ¦11.5 + ¦7.4-10.8 + +# Convert span to Pandas table +df = table._.to_pd_table(as_spans=False) +type(df) +# >> pandas.core.frame.DataFrame +``` +The pd DataFrame: +| | 0 | 1 | 2 | 3 | +| ---: | :---------- | :------- | :----- | :-------- | +| 0 | Leucocytes | x10*9/L | 4.97 | 4.09-11 | +| 1 | Hématies | x10*12/L | 4.68 | 4.53-5.79 | +| 2 | Hémoglobine | g/dL | 14.8 | 13.4-16.7 | +| 3 | Hématocrite | % | 44.2 | 39.2-48.6 | +| 4 | VGM | fL | 94.4 + | 79.6-94 | +| 5 | TCMH | pg | 31.6 | 27.3-32.8 | +| 6 | CCMH | g/dL | 33.5 | 32.4-36.3 | +| 7 | Plaquettes | x10*9/L | 191 | 172-398 | +| 8 | VMP | fL | 11.5 + | 7.4-10.8 | + +## Declared extensions + +The `eds.tables` pipeline declares two [spaCy extension](https://spacy.io/usage/processing-pipelines#custom-components-attributes) on the `Span` object. The first one is `to_pd_table()` method which returns a parsed pandas version of the table. The second one is `table` which contains the table stored as a dictionnary containing cells as `Span` objects. + +## Configuration + +The pipeline can be configured using the following parameters : + +| Parameter | Explanation | Default | +| ----------------- | ------------------------------------------------ | ---------------------- | +| `tables_pattern` | Pattern to identify table spans | `rf"(\b.*{sep}.*\n)+"` | +| `sep_pattern` | Pattern to identify column separation | `r"¦"` | +| `ignore_excluded` | Ignore excluded tokens | `True` | +| `attr` | spaCy attribute to match on, eg `NORM` or `TEXT` | `"TEXT"` | + +## Authors and citation + +The `eds.tables` pipeline was developed by AP-HP's Data Science team. diff --git a/docs/pipelines/ner/adicap.md b/edsnlp/docs/pipelines/ner/adicap.md similarity index 100% rename from docs/pipelines/ner/adicap.md rename to edsnlp/docs/pipelines/ner/adicap.md diff --git a/docs/pipelines/ner/cim10.md b/edsnlp/docs/pipelines/ner/cim10.md similarity index 100% rename from docs/pipelines/ner/cim10.md rename to edsnlp/docs/pipelines/ner/cim10.md diff --git a/docs/pipelines/ner/covid.md b/edsnlp/docs/pipelines/ner/covid.md similarity index 100% rename from docs/pipelines/ner/covid.md rename to edsnlp/docs/pipelines/ner/covid.md diff --git a/docs/pipelines/ner/drugs.md b/edsnlp/docs/pipelines/ner/drugs.md similarity index 100% rename from docs/pipelines/ner/drugs.md rename to edsnlp/docs/pipelines/ner/drugs.md diff --git a/docs/pipelines/ner/index.md b/edsnlp/docs/pipelines/ner/index.md similarity index 100% rename from docs/pipelines/ner/index.md rename to edsnlp/docs/pipelines/ner/index.md diff --git a/docs/pipelines/ner/score.md b/edsnlp/docs/pipelines/ner/score.md similarity index 100% rename from docs/pipelines/ner/score.md rename to edsnlp/docs/pipelines/ner/score.md diff --git a/docs/pipelines/ner/umls.md b/edsnlp/docs/pipelines/ner/umls.md similarity index 100% rename from docs/pipelines/ner/umls.md rename to edsnlp/docs/pipelines/ner/umls.md diff --git a/docs/pipelines/qualifiers/family.md b/edsnlp/docs/pipelines/qualifiers/family.md similarity index 100% rename from docs/pipelines/qualifiers/family.md rename to edsnlp/docs/pipelines/qualifiers/family.md diff --git a/docs/pipelines/qualifiers/history.md b/edsnlp/docs/pipelines/qualifiers/history.md similarity index 100% rename from docs/pipelines/qualifiers/history.md rename to edsnlp/docs/pipelines/qualifiers/history.md diff --git a/docs/pipelines/qualifiers/hypothesis.md b/edsnlp/docs/pipelines/qualifiers/hypothesis.md similarity index 100% rename from docs/pipelines/qualifiers/hypothesis.md rename to edsnlp/docs/pipelines/qualifiers/hypothesis.md diff --git a/docs/pipelines/qualifiers/index.md b/edsnlp/docs/pipelines/qualifiers/index.md similarity index 100% rename from docs/pipelines/qualifiers/index.md rename to edsnlp/docs/pipelines/qualifiers/index.md diff --git a/docs/pipelines/qualifiers/negation.md b/edsnlp/docs/pipelines/qualifiers/negation.md similarity index 100% rename from docs/pipelines/qualifiers/negation.md rename to edsnlp/docs/pipelines/qualifiers/negation.md diff --git a/docs/pipelines/qualifiers/reported-speech.md b/edsnlp/docs/pipelines/qualifiers/reported-speech.md similarity index 100% rename from docs/pipelines/qualifiers/reported-speech.md rename to edsnlp/docs/pipelines/qualifiers/reported-speech.md diff --git a/docs/pipelines/trainable/edsnlp-ner.svg b/edsnlp/docs/pipelines/trainable/edsnlp-ner.svg similarity index 100% rename from docs/pipelines/trainable/edsnlp-ner.svg rename to edsnlp/docs/pipelines/trainable/edsnlp-ner.svg diff --git a/docs/pipelines/trainable/index.md b/edsnlp/docs/pipelines/trainable/index.md similarity index 100% rename from docs/pipelines/trainable/index.md rename to edsnlp/docs/pipelines/trainable/index.md diff --git a/docs/pipelines/trainable/ner.md b/edsnlp/docs/pipelines/trainable/ner.md similarity index 100% rename from docs/pipelines/trainable/ner.md rename to edsnlp/docs/pipelines/trainable/ner.md diff --git a/edsnlp/docs/reference/components.md b/edsnlp/docs/reference/components.md new file mode 100644 index 000000000..ac3814956 --- /dev/null +++ b/edsnlp/docs/reference/components.md @@ -0,0 +1,3 @@ +# `edsnlp.components` + +::: edsnlp.components diff --git a/edsnlp/docs/reference/conjugator.md b/edsnlp/docs/reference/conjugator.md new file mode 100644 index 000000000..cb00455ce --- /dev/null +++ b/edsnlp/docs/reference/conjugator.md @@ -0,0 +1,3 @@ +# `edsnlp.conjugator` + +::: edsnlp.conjugator diff --git a/edsnlp/docs/reference/connectors/brat.md b/edsnlp/docs/reference/connectors/brat.md new file mode 100644 index 000000000..42b3ff59d --- /dev/null +++ b/edsnlp/docs/reference/connectors/brat.md @@ -0,0 +1,3 @@ +# `edsnlp.connectors.brat` + +::: edsnlp.connectors.brat diff --git a/edsnlp/docs/reference/connectors/index.md b/edsnlp/docs/reference/connectors/index.md new file mode 100644 index 000000000..a43078957 --- /dev/null +++ b/edsnlp/docs/reference/connectors/index.md @@ -0,0 +1,3 @@ +# `edsnlp.connectors` + +::: edsnlp.connectors diff --git a/edsnlp/docs/reference/connectors/labeltool.md b/edsnlp/docs/reference/connectors/labeltool.md new file mode 100644 index 000000000..70a0d11ba --- /dev/null +++ b/edsnlp/docs/reference/connectors/labeltool.md @@ -0,0 +1,3 @@ +# `edsnlp.connectors.labeltool` + +::: edsnlp.connectors.labeltool diff --git a/edsnlp/docs/reference/connectors/omop.md b/edsnlp/docs/reference/connectors/omop.md new file mode 100644 index 000000000..c114883f4 --- /dev/null +++ b/edsnlp/docs/reference/connectors/omop.md @@ -0,0 +1,3 @@ +# `edsnlp.connectors.omop` + +::: edsnlp.connectors.omop diff --git a/edsnlp/docs/reference/extensions.md b/edsnlp/docs/reference/extensions.md new file mode 100644 index 000000000..bd7f97d1d --- /dev/null +++ b/edsnlp/docs/reference/extensions.md @@ -0,0 +1,3 @@ +# `edsnlp.extensions` + +::: edsnlp.extensions diff --git a/edsnlp/docs/reference/index.md b/edsnlp/docs/reference/index.md new file mode 100644 index 000000000..89d0457c5 --- /dev/null +++ b/edsnlp/docs/reference/index.md @@ -0,0 +1,3 @@ +# `edsnlp` + +::: edsnlp diff --git a/edsnlp/docs/reference/language.md b/edsnlp/docs/reference/language.md new file mode 100644 index 000000000..73a67a809 --- /dev/null +++ b/edsnlp/docs/reference/language.md @@ -0,0 +1,3 @@ +# `edsnlp.language` + +::: edsnlp.language diff --git a/edsnlp/docs/reference/matchers/index.md b/edsnlp/docs/reference/matchers/index.md new file mode 100644 index 000000000..6c7e24e6c --- /dev/null +++ b/edsnlp/docs/reference/matchers/index.md @@ -0,0 +1,3 @@ +# `edsnlp.matchers` + +::: edsnlp.matchers diff --git a/edsnlp/docs/reference/matchers/regex.md b/edsnlp/docs/reference/matchers/regex.md new file mode 100644 index 000000000..f94457403 --- /dev/null +++ b/edsnlp/docs/reference/matchers/regex.md @@ -0,0 +1,3 @@ +# `edsnlp.matchers.regex` + +::: edsnlp.matchers.regex diff --git a/edsnlp/docs/reference/matchers/simstring.md b/edsnlp/docs/reference/matchers/simstring.md new file mode 100644 index 000000000..7c59d9a97 --- /dev/null +++ b/edsnlp/docs/reference/matchers/simstring.md @@ -0,0 +1,3 @@ +# `edsnlp.matchers.simstring` + +::: edsnlp.matchers.simstring diff --git a/edsnlp/docs/reference/matchers/utils/index.md b/edsnlp/docs/reference/matchers/utils/index.md new file mode 100644 index 000000000..58dcfd9bb --- /dev/null +++ b/edsnlp/docs/reference/matchers/utils/index.md @@ -0,0 +1,3 @@ +# `edsnlp.matchers.utils` + +::: edsnlp.matchers.utils diff --git a/edsnlp/docs/reference/matchers/utils/offset.md b/edsnlp/docs/reference/matchers/utils/offset.md new file mode 100644 index 000000000..da96a727f --- /dev/null +++ b/edsnlp/docs/reference/matchers/utils/offset.md @@ -0,0 +1,3 @@ +# `edsnlp.matchers.utils.offset` + +::: edsnlp.matchers.utils.offset diff --git a/edsnlp/docs/reference/matchers/utils/text.md b/edsnlp/docs/reference/matchers/utils/text.md new file mode 100644 index 000000000..3d5f05796 --- /dev/null +++ b/edsnlp/docs/reference/matchers/utils/text.md @@ -0,0 +1,3 @@ +# `edsnlp.matchers.utils.text` + +::: edsnlp.matchers.utils.text diff --git a/edsnlp/docs/reference/models/index.md b/edsnlp/docs/reference/models/index.md new file mode 100644 index 000000000..72c9adc2a --- /dev/null +++ b/edsnlp/docs/reference/models/index.md @@ -0,0 +1,3 @@ +# `edsnlp.models` + +::: edsnlp.models diff --git a/edsnlp/docs/reference/models/pytorch_wrapper.md b/edsnlp/docs/reference/models/pytorch_wrapper.md new file mode 100644 index 000000000..f2817cd46 --- /dev/null +++ b/edsnlp/docs/reference/models/pytorch_wrapper.md @@ -0,0 +1,3 @@ +# `edsnlp.models.pytorch_wrapper` + +::: edsnlp.models.pytorch_wrapper diff --git a/edsnlp/docs/reference/models/stack_crf_ner.md b/edsnlp/docs/reference/models/stack_crf_ner.md new file mode 100644 index 000000000..414094c62 --- /dev/null +++ b/edsnlp/docs/reference/models/stack_crf_ner.md @@ -0,0 +1,3 @@ +# `edsnlp.models.stack_crf_ner` + +::: edsnlp.models.stack_crf_ner diff --git a/edsnlp/docs/reference/models/torch/crf.md b/edsnlp/docs/reference/models/torch/crf.md new file mode 100644 index 000000000..8863c1ab8 --- /dev/null +++ b/edsnlp/docs/reference/models/torch/crf.md @@ -0,0 +1,3 @@ +# `edsnlp.models.torch.crf` + +::: edsnlp.models.torch.crf diff --git a/edsnlp/docs/reference/models/torch/index.md b/edsnlp/docs/reference/models/torch/index.md new file mode 100644 index 000000000..c186df0c6 --- /dev/null +++ b/edsnlp/docs/reference/models/torch/index.md @@ -0,0 +1,3 @@ +# `edsnlp.models.torch` + +::: edsnlp.models.torch diff --git a/edsnlp/docs/reference/patch_spacy_dot_components.md b/edsnlp/docs/reference/patch_spacy_dot_components.md new file mode 100644 index 000000000..a94d83c1a --- /dev/null +++ b/edsnlp/docs/reference/patch_spacy_dot_components.md @@ -0,0 +1,3 @@ +# `edsnlp.patch_spacy_dot_components` + +::: edsnlp.patch_spacy_dot_components diff --git a/edsnlp/docs/reference/pipelines/base.md b/edsnlp/docs/reference/pipelines/base.md new file mode 100644 index 000000000..9717e2467 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/base.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.base` + +::: edsnlp.pipelines.base diff --git a/edsnlp/docs/reference/pipelines/core/context/context.md b/edsnlp/docs/reference/pipelines/core/context/context.md new file mode 100644 index 000000000..edcf07e7f --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/context/context.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.context.context` + +::: edsnlp.pipelines.core.context.context diff --git a/edsnlp/docs/reference/pipelines/core/context/factory.md b/edsnlp/docs/reference/pipelines/core/context/factory.md new file mode 100644 index 000000000..096b5d364 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/context/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.context.factory` + +::: edsnlp.pipelines.core.context.factory diff --git a/edsnlp/docs/reference/pipelines/core/context/index.md b/edsnlp/docs/reference/pipelines/core/context/index.md new file mode 100644 index 000000000..eebe999fe --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/context/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.context` + +::: edsnlp.pipelines.core.context diff --git a/edsnlp/docs/reference/pipelines/core/contextual_matcher/contextual_matcher.md b/edsnlp/docs/reference/pipelines/core/contextual_matcher/contextual_matcher.md new file mode 100644 index 000000000..6eea02499 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/contextual_matcher/contextual_matcher.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.contextual_matcher.contextual_matcher` + +::: edsnlp.pipelines.core.contextual_matcher.contextual_matcher diff --git a/edsnlp/docs/reference/pipelines/core/contextual_matcher/factory.md b/edsnlp/docs/reference/pipelines/core/contextual_matcher/factory.md new file mode 100644 index 000000000..eebca8f65 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/contextual_matcher/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.contextual_matcher.factory` + +::: edsnlp.pipelines.core.contextual_matcher.factory diff --git a/edsnlp/docs/reference/pipelines/core/contextual_matcher/index.md b/edsnlp/docs/reference/pipelines/core/contextual_matcher/index.md new file mode 100644 index 000000000..e69d6b620 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/contextual_matcher/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.contextual_matcher` + +::: edsnlp.pipelines.core.contextual_matcher diff --git a/edsnlp/docs/reference/pipelines/core/contextual_matcher/models.md b/edsnlp/docs/reference/pipelines/core/contextual_matcher/models.md new file mode 100644 index 000000000..c11554814 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/contextual_matcher/models.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.contextual_matcher.models` + +::: edsnlp.pipelines.core.contextual_matcher.models diff --git a/edsnlp/docs/reference/pipelines/core/endlines/endlines.md b/edsnlp/docs/reference/pipelines/core/endlines/endlines.md new file mode 100644 index 000000000..bcc1b2e30 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/endlines/endlines.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.endlines.endlines` + +::: edsnlp.pipelines.core.endlines.endlines diff --git a/edsnlp/docs/reference/pipelines/core/endlines/endlinesmodel.md b/edsnlp/docs/reference/pipelines/core/endlines/endlinesmodel.md new file mode 100644 index 000000000..23b1f79f4 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/endlines/endlinesmodel.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.endlines.endlinesmodel` + +::: edsnlp.pipelines.core.endlines.endlinesmodel diff --git a/edsnlp/docs/reference/pipelines/core/endlines/factory.md b/edsnlp/docs/reference/pipelines/core/endlines/factory.md new file mode 100644 index 000000000..634905a3e --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/endlines/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.endlines.factory` + +::: edsnlp.pipelines.core.endlines.factory diff --git a/edsnlp/docs/reference/pipelines/core/endlines/functional.md b/edsnlp/docs/reference/pipelines/core/endlines/functional.md new file mode 100644 index 000000000..33045b3df --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/endlines/functional.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.endlines.functional` + +::: edsnlp.pipelines.core.endlines.functional diff --git a/edsnlp/docs/reference/pipelines/core/endlines/index.md b/edsnlp/docs/reference/pipelines/core/endlines/index.md new file mode 100644 index 000000000..36c0db206 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/endlines/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.endlines` + +::: edsnlp.pipelines.core.endlines diff --git a/edsnlp/docs/reference/pipelines/core/index.md b/edsnlp/docs/reference/pipelines/core/index.md new file mode 100644 index 000000000..e611fff2d --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core` + +::: edsnlp.pipelines.core diff --git a/edsnlp/docs/reference/pipelines/core/matcher/factory.md b/edsnlp/docs/reference/pipelines/core/matcher/factory.md new file mode 100644 index 000000000..657b6205d --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/matcher/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.matcher.factory` + +::: edsnlp.pipelines.core.matcher.factory diff --git a/edsnlp/docs/reference/pipelines/core/matcher/index.md b/edsnlp/docs/reference/pipelines/core/matcher/index.md new file mode 100644 index 000000000..fca207785 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/matcher/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.matcher` + +::: edsnlp.pipelines.core.matcher diff --git a/edsnlp/docs/reference/pipelines/core/matcher/matcher.md b/edsnlp/docs/reference/pipelines/core/matcher/matcher.md new file mode 100644 index 000000000..2ca0e5145 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/matcher/matcher.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.matcher.matcher` + +::: edsnlp.pipelines.core.matcher.matcher diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/accents/accents.md b/edsnlp/docs/reference/pipelines/core/normalizer/accents/accents.md new file mode 100644 index 000000000..1089d7d4d --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/accents/accents.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.accents.accents` + +::: edsnlp.pipelines.core.normalizer.accents.accents diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/accents/factory.md b/edsnlp/docs/reference/pipelines/core/normalizer/accents/factory.md new file mode 100644 index 000000000..4f26e980e --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/accents/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.accents.factory` + +::: edsnlp.pipelines.core.normalizer.accents.factory diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/accents/index.md b/edsnlp/docs/reference/pipelines/core/normalizer/accents/index.md new file mode 100644 index 000000000..06a49942b --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/accents/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.accents` + +::: edsnlp.pipelines.core.normalizer.accents diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/accents/patterns.md b/edsnlp/docs/reference/pipelines/core/normalizer/accents/patterns.md new file mode 100644 index 000000000..23e986ebe --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/accents/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.accents.patterns` + +::: edsnlp.pipelines.core.normalizer.accents.patterns diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/factory.md b/edsnlp/docs/reference/pipelines/core/normalizer/factory.md new file mode 100644 index 000000000..85109de2d --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.factory` + +::: edsnlp.pipelines.core.normalizer.factory diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/index.md b/edsnlp/docs/reference/pipelines/core/normalizer/index.md new file mode 100644 index 000000000..f276c371e --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer` + +::: edsnlp.pipelines.core.normalizer diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/lowercase/factory.md b/edsnlp/docs/reference/pipelines/core/normalizer/lowercase/factory.md new file mode 100644 index 000000000..b9c832c21 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/lowercase/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.lowercase.factory` + +::: edsnlp.pipelines.core.normalizer.lowercase.factory diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/lowercase/index.md b/edsnlp/docs/reference/pipelines/core/normalizer/lowercase/index.md new file mode 100644 index 000000000..b537ad71e --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/lowercase/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.lowercase` + +::: edsnlp.pipelines.core.normalizer.lowercase diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/normalizer.md b/edsnlp/docs/reference/pipelines/core/normalizer/normalizer.md new file mode 100644 index 000000000..562186b14 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/normalizer.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.normalizer` + +::: edsnlp.pipelines.core.normalizer.normalizer diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/pollution/factory.md b/edsnlp/docs/reference/pipelines/core/normalizer/pollution/factory.md new file mode 100644 index 000000000..edde5b58e --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/pollution/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.pollution.factory` + +::: edsnlp.pipelines.core.normalizer.pollution.factory diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/pollution/index.md b/edsnlp/docs/reference/pipelines/core/normalizer/pollution/index.md new file mode 100644 index 000000000..5f26500c5 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/pollution/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.pollution` + +::: edsnlp.pipelines.core.normalizer.pollution diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/pollution/patterns.md b/edsnlp/docs/reference/pipelines/core/normalizer/pollution/patterns.md new file mode 100644 index 000000000..652be29b8 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/pollution/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.pollution.patterns` + +::: edsnlp.pipelines.core.normalizer.pollution.patterns diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/pollution/pollution.md b/edsnlp/docs/reference/pipelines/core/normalizer/pollution/pollution.md new file mode 100644 index 000000000..f443bbadf --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/pollution/pollution.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.pollution.pollution` + +::: edsnlp.pipelines.core.normalizer.pollution.pollution diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/quotes/factory.md b/edsnlp/docs/reference/pipelines/core/normalizer/quotes/factory.md new file mode 100644 index 000000000..e14bd2802 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/quotes/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.quotes.factory` + +::: edsnlp.pipelines.core.normalizer.quotes.factory diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/quotes/index.md b/edsnlp/docs/reference/pipelines/core/normalizer/quotes/index.md new file mode 100644 index 000000000..65fcf969d --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/quotes/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.quotes` + +::: edsnlp.pipelines.core.normalizer.quotes diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/quotes/patterns.md b/edsnlp/docs/reference/pipelines/core/normalizer/quotes/patterns.md new file mode 100644 index 000000000..1129dfc86 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/quotes/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.quotes.patterns` + +::: edsnlp.pipelines.core.normalizer.quotes.patterns diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/quotes/quotes.md b/edsnlp/docs/reference/pipelines/core/normalizer/quotes/quotes.md new file mode 100644 index 000000000..775fcc1b8 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/quotes/quotes.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.quotes.quotes` + +::: edsnlp.pipelines.core.normalizer.quotes.quotes diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/spaces/factory.md b/edsnlp/docs/reference/pipelines/core/normalizer/spaces/factory.md new file mode 100644 index 000000000..206c573a3 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/spaces/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.spaces.factory` + +::: edsnlp.pipelines.core.normalizer.spaces.factory diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/spaces/index.md b/edsnlp/docs/reference/pipelines/core/normalizer/spaces/index.md new file mode 100644 index 000000000..fecfcfc29 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/spaces/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.spaces` + +::: edsnlp.pipelines.core.normalizer.spaces diff --git a/edsnlp/docs/reference/pipelines/core/normalizer/spaces/spaces.md b/edsnlp/docs/reference/pipelines/core/normalizer/spaces/spaces.md new file mode 100644 index 000000000..94fa6da1b --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/normalizer/spaces/spaces.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.normalizer.spaces.spaces` + +::: edsnlp.pipelines.core.normalizer.spaces.spaces diff --git a/edsnlp/docs/reference/pipelines/core/sentences/factory.md b/edsnlp/docs/reference/pipelines/core/sentences/factory.md new file mode 100644 index 000000000..346b9f476 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/sentences/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.sentences.factory` + +::: edsnlp.pipelines.core.sentences.factory diff --git a/edsnlp/docs/reference/pipelines/core/sentences/index.md b/edsnlp/docs/reference/pipelines/core/sentences/index.md new file mode 100644 index 000000000..a415052ac --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/sentences/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.sentences` + +::: edsnlp.pipelines.core.sentences diff --git a/edsnlp/docs/reference/pipelines/core/sentences/terms.md b/edsnlp/docs/reference/pipelines/core/sentences/terms.md new file mode 100644 index 000000000..c586f143e --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/sentences/terms.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.sentences.terms` + +::: edsnlp.pipelines.core.sentences.terms diff --git a/edsnlp/docs/reference/pipelines/core/terminology/factory.md b/edsnlp/docs/reference/pipelines/core/terminology/factory.md new file mode 100644 index 000000000..0afca89a1 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/terminology/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.terminology.factory` + +::: edsnlp.pipelines.core.terminology.factory diff --git a/edsnlp/docs/reference/pipelines/core/terminology/index.md b/edsnlp/docs/reference/pipelines/core/terminology/index.md new file mode 100644 index 000000000..269e84b43 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/terminology/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.terminology` + +::: edsnlp.pipelines.core.terminology diff --git a/edsnlp/docs/reference/pipelines/core/terminology/terminology.md b/edsnlp/docs/reference/pipelines/core/terminology/terminology.md new file mode 100644 index 000000000..476048021 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/core/terminology/terminology.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.core.terminology.terminology` + +::: edsnlp.pipelines.core.terminology.terminology diff --git a/edsnlp/docs/reference/pipelines/factories.md b/edsnlp/docs/reference/pipelines/factories.md new file mode 100644 index 000000000..bd043e2b6 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/factories.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.factories` + +::: edsnlp.pipelines.factories diff --git a/edsnlp/docs/reference/pipelines/index.md b/edsnlp/docs/reference/pipelines/index.md new file mode 100644 index 000000000..687bcaca5 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines` + +::: edsnlp.pipelines diff --git a/edsnlp/docs/reference/pipelines/misc/consultation_dates/consultation_dates.md b/edsnlp/docs/reference/pipelines/misc/consultation_dates/consultation_dates.md new file mode 100644 index 000000000..a7dfca1ab --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/consultation_dates/consultation_dates.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.consultation_dates.consultation_dates` + +::: edsnlp.pipelines.misc.consultation_dates.consultation_dates diff --git a/edsnlp/docs/reference/pipelines/misc/consultation_dates/factory.md b/edsnlp/docs/reference/pipelines/misc/consultation_dates/factory.md new file mode 100644 index 000000000..43f45b013 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/consultation_dates/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.consultation_dates.factory` + +::: edsnlp.pipelines.misc.consultation_dates.factory diff --git a/edsnlp/docs/reference/pipelines/misc/consultation_dates/index.md b/edsnlp/docs/reference/pipelines/misc/consultation_dates/index.md new file mode 100644 index 000000000..1873b2e24 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/consultation_dates/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.consultation_dates` + +::: edsnlp.pipelines.misc.consultation_dates diff --git a/edsnlp/docs/reference/pipelines/misc/consultation_dates/patterns.md b/edsnlp/docs/reference/pipelines/misc/consultation_dates/patterns.md new file mode 100644 index 000000000..fa527fed3 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/consultation_dates/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.consultation_dates.patterns` + +::: edsnlp.pipelines.misc.consultation_dates.patterns diff --git a/edsnlp/docs/reference/pipelines/misc/dates/dates.md b/edsnlp/docs/reference/pipelines/misc/dates/dates.md new file mode 100644 index 000000000..bb05936b6 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/dates.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.dates` + +::: edsnlp.pipelines.misc.dates.dates diff --git a/edsnlp/docs/reference/pipelines/misc/dates/factory.md b/edsnlp/docs/reference/pipelines/misc/dates/factory.md new file mode 100644 index 000000000..33b431cb6 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.factory` + +::: edsnlp.pipelines.misc.dates.factory diff --git a/edsnlp/docs/reference/pipelines/misc/dates/index.md b/edsnlp/docs/reference/pipelines/misc/dates/index.md new file mode 100644 index 000000000..614ea1ee3 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates` + +::: edsnlp.pipelines.misc.dates diff --git a/edsnlp/docs/reference/pipelines/misc/dates/models.md b/edsnlp/docs/reference/pipelines/misc/dates/models.md new file mode 100644 index 000000000..d79a7eeff --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/models.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.models` + +::: edsnlp.pipelines.misc.dates.models diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/absolute.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/absolute.md new file mode 100644 index 000000000..77e04e7ed --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/absolute.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.absolute` + +::: edsnlp.pipelines.misc.dates.patterns.absolute diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/days.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/days.md new file mode 100644 index 000000000..b928b78c1 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/days.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.atomic.days` + +::: edsnlp.pipelines.misc.dates.patterns.atomic.days diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/delimiters.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/delimiters.md new file mode 100644 index 000000000..a21f41b3e --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/delimiters.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.atomic.delimiters` + +::: edsnlp.pipelines.misc.dates.patterns.atomic.delimiters diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/directions.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/directions.md new file mode 100644 index 000000000..40304ce02 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/directions.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.atomic.directions` + +::: edsnlp.pipelines.misc.dates.patterns.atomic.directions diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/index.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/index.md new file mode 100644 index 000000000..80c069235 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.atomic` + +::: edsnlp.pipelines.misc.dates.patterns.atomic diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/modes.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/modes.md new file mode 100644 index 000000000..c31a86b02 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/modes.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.atomic.modes` + +::: edsnlp.pipelines.misc.dates.patterns.atomic.modes diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/months.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/months.md new file mode 100644 index 000000000..585983903 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/months.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.atomic.months` + +::: edsnlp.pipelines.misc.dates.patterns.atomic.months diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/numbers.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/numbers.md new file mode 100644 index 000000000..901944da6 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/numbers.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.atomic.numbers` + +::: edsnlp.pipelines.misc.dates.patterns.atomic.numbers diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/time.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/time.md new file mode 100644 index 000000000..64ad27b4d --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/time.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.atomic.time` + +::: edsnlp.pipelines.misc.dates.patterns.atomic.time diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/units.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/units.md new file mode 100644 index 000000000..32a7029f5 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/units.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.atomic.units` + +::: edsnlp.pipelines.misc.dates.patterns.atomic.units diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/years.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/years.md new file mode 100644 index 000000000..6120d228d --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/atomic/years.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.atomic.years` + +::: edsnlp.pipelines.misc.dates.patterns.atomic.years diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/current.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/current.md new file mode 100644 index 000000000..378aad259 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/current.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.current` + +::: edsnlp.pipelines.misc.dates.patterns.current diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/duration.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/duration.md new file mode 100644 index 000000000..b9a832562 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/duration.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.duration` + +::: edsnlp.pipelines.misc.dates.patterns.duration diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/false_positive.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/false_positive.md new file mode 100644 index 000000000..1f943d60c --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/false_positive.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.false_positive` + +::: edsnlp.pipelines.misc.dates.patterns.false_positive diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/index.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/index.md new file mode 100644 index 000000000..49c355615 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns` + +::: edsnlp.pipelines.misc.dates.patterns diff --git a/edsnlp/docs/reference/pipelines/misc/dates/patterns/relative.md b/edsnlp/docs/reference/pipelines/misc/dates/patterns/relative.md new file mode 100644 index 000000000..20151b8dc --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/dates/patterns/relative.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.dates.patterns.relative` + +::: edsnlp.pipelines.misc.dates.patterns.relative diff --git a/edsnlp/docs/reference/pipelines/misc/index.md b/edsnlp/docs/reference/pipelines/misc/index.md new file mode 100644 index 000000000..6496d081d --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc` + +::: edsnlp.pipelines.misc diff --git a/edsnlp/docs/reference/pipelines/misc/measurements/factory.md b/edsnlp/docs/reference/pipelines/misc/measurements/factory.md new file mode 100644 index 000000000..094289408 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/measurements/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.measurements.factory` + +::: edsnlp.pipelines.misc.measurements.factory diff --git a/edsnlp/docs/reference/pipelines/misc/measurements/index.md b/edsnlp/docs/reference/pipelines/misc/measurements/index.md new file mode 100644 index 000000000..b2faab473 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/measurements/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.measurements` + +::: edsnlp.pipelines.misc.measurements diff --git a/edsnlp/docs/reference/pipelines/misc/measurements/measurements.md b/edsnlp/docs/reference/pipelines/misc/measurements/measurements.md new file mode 100644 index 000000000..3f342691f --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/measurements/measurements.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.measurements.measurements` + +::: edsnlp.pipelines.misc.measurements.measurements diff --git a/edsnlp/docs/reference/pipelines/misc/measurements/patterns.md b/edsnlp/docs/reference/pipelines/misc/measurements/patterns.md new file mode 100644 index 000000000..f578fc732 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/measurements/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.measurements.patterns` + +::: edsnlp.pipelines.misc.measurements.patterns diff --git a/edsnlp/docs/reference/pipelines/misc/reason/factory.md b/edsnlp/docs/reference/pipelines/misc/reason/factory.md new file mode 100644 index 000000000..20ab1f43a --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/reason/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.reason.factory` + +::: edsnlp.pipelines.misc.reason.factory diff --git a/edsnlp/docs/reference/pipelines/misc/reason/index.md b/edsnlp/docs/reference/pipelines/misc/reason/index.md new file mode 100644 index 000000000..08f734858 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/reason/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.reason` + +::: edsnlp.pipelines.misc.reason diff --git a/edsnlp/docs/reference/pipelines/misc/reason/patterns.md b/edsnlp/docs/reference/pipelines/misc/reason/patterns.md new file mode 100644 index 000000000..3c152f437 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/reason/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.reason.patterns` + +::: edsnlp.pipelines.misc.reason.patterns diff --git a/edsnlp/docs/reference/pipelines/misc/reason/reason.md b/edsnlp/docs/reference/pipelines/misc/reason/reason.md new file mode 100644 index 000000000..2c5ed9f55 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/reason/reason.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.reason.reason` + +::: edsnlp.pipelines.misc.reason.reason diff --git a/edsnlp/docs/reference/pipelines/misc/sections/factory.md b/edsnlp/docs/reference/pipelines/misc/sections/factory.md new file mode 100644 index 000000000..4f571c56d --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/sections/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.sections.factory` + +::: edsnlp.pipelines.misc.sections.factory diff --git a/edsnlp/docs/reference/pipelines/misc/sections/index.md b/edsnlp/docs/reference/pipelines/misc/sections/index.md new file mode 100644 index 000000000..e379c60c3 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/sections/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.sections` + +::: edsnlp.pipelines.misc.sections diff --git a/edsnlp/docs/reference/pipelines/misc/sections/patterns.md b/edsnlp/docs/reference/pipelines/misc/sections/patterns.md new file mode 100644 index 000000000..0465ddbf5 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/sections/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.sections.patterns` + +::: edsnlp.pipelines.misc.sections.patterns diff --git a/edsnlp/docs/reference/pipelines/misc/sections/sections.md b/edsnlp/docs/reference/pipelines/misc/sections/sections.md new file mode 100644 index 000000000..72e3fff03 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/misc/sections/sections.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.misc.sections.sections` + +::: edsnlp.pipelines.misc.sections.sections diff --git a/edsnlp/docs/reference/pipelines/ner/adicap/adicap.md b/edsnlp/docs/reference/pipelines/ner/adicap/adicap.md new file mode 100644 index 000000000..6ff11b0e1 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/adicap/adicap.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.adicap.adicap` + +::: edsnlp.pipelines.ner.adicap.adicap diff --git a/edsnlp/docs/reference/pipelines/ner/adicap/factory.md b/edsnlp/docs/reference/pipelines/ner/adicap/factory.md new file mode 100644 index 000000000..e0e4d89e3 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/adicap/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.adicap.factory` + +::: edsnlp.pipelines.ner.adicap.factory diff --git a/edsnlp/docs/reference/pipelines/ner/adicap/index.md b/edsnlp/docs/reference/pipelines/ner/adicap/index.md new file mode 100644 index 000000000..db502968c --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/adicap/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.adicap` + +::: edsnlp.pipelines.ner.adicap diff --git a/edsnlp/docs/reference/pipelines/ner/adicap/models.md b/edsnlp/docs/reference/pipelines/ner/adicap/models.md new file mode 100644 index 000000000..74b431fc4 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/adicap/models.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.adicap.models` + +::: edsnlp.pipelines.ner.adicap.models diff --git a/edsnlp/docs/reference/pipelines/ner/adicap/patterns.md b/edsnlp/docs/reference/pipelines/ner/adicap/patterns.md new file mode 100644 index 000000000..fcc047f46 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/adicap/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.adicap.patterns` + +::: edsnlp.pipelines.ner.adicap.patterns diff --git a/edsnlp/docs/reference/pipelines/ner/cim10/factory.md b/edsnlp/docs/reference/pipelines/ner/cim10/factory.md new file mode 100644 index 000000000..46a66d27e --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/cim10/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.cim10.factory` + +::: edsnlp.pipelines.ner.cim10.factory diff --git a/edsnlp/docs/reference/pipelines/ner/cim10/index.md b/edsnlp/docs/reference/pipelines/ner/cim10/index.md new file mode 100644 index 000000000..96a3edec8 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/cim10/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.cim10` + +::: edsnlp.pipelines.ner.cim10 diff --git a/edsnlp/docs/reference/pipelines/ner/cim10/patterns.md b/edsnlp/docs/reference/pipelines/ner/cim10/patterns.md new file mode 100644 index 000000000..0e592c4b7 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/cim10/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.cim10.patterns` + +::: edsnlp.pipelines.ner.cim10.patterns diff --git a/edsnlp/docs/reference/pipelines/ner/covid/factory.md b/edsnlp/docs/reference/pipelines/ner/covid/factory.md new file mode 100644 index 000000000..0ac87f5a0 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/covid/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.covid.factory` + +::: edsnlp.pipelines.ner.covid.factory diff --git a/edsnlp/docs/reference/pipelines/ner/covid/index.md b/edsnlp/docs/reference/pipelines/ner/covid/index.md new file mode 100644 index 000000000..ea3539e02 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/covid/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.covid` + +::: edsnlp.pipelines.ner.covid diff --git a/edsnlp/docs/reference/pipelines/ner/covid/patterns.md b/edsnlp/docs/reference/pipelines/ner/covid/patterns.md new file mode 100644 index 000000000..0298a6dab --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/covid/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.covid.patterns` + +::: edsnlp.pipelines.ner.covid.patterns diff --git a/edsnlp/docs/reference/pipelines/ner/drugs/factory.md b/edsnlp/docs/reference/pipelines/ner/drugs/factory.md new file mode 100644 index 000000000..8555428d5 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/drugs/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.drugs.factory` + +::: edsnlp.pipelines.ner.drugs.factory diff --git a/edsnlp/docs/reference/pipelines/ner/drugs/index.md b/edsnlp/docs/reference/pipelines/ner/drugs/index.md new file mode 100644 index 000000000..562836792 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/drugs/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.drugs` + +::: edsnlp.pipelines.ner.drugs diff --git a/edsnlp/docs/reference/pipelines/ner/drugs/patterns.md b/edsnlp/docs/reference/pipelines/ner/drugs/patterns.md new file mode 100644 index 000000000..8ef57b99d --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/drugs/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.drugs.patterns` + +::: edsnlp.pipelines.ner.drugs.patterns diff --git a/edsnlp/docs/reference/pipelines/ner/index.md b/edsnlp/docs/reference/pipelines/ner/index.md new file mode 100644 index 000000000..2bca642d3 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner` + +::: edsnlp.pipelines.ner diff --git a/edsnlp/docs/reference/pipelines/ner/scores/base_score.md b/edsnlp/docs/reference/pipelines/ner/scores/base_score.md new file mode 100644 index 000000000..845a40294 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/base_score.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.base_score` + +::: edsnlp.pipelines.ner.scores.base_score diff --git a/edsnlp/docs/reference/pipelines/ner/scores/charlson/factory.md b/edsnlp/docs/reference/pipelines/ner/scores/charlson/factory.md new file mode 100644 index 000000000..e50b1d768 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/charlson/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.charlson.factory` + +::: edsnlp.pipelines.ner.scores.charlson.factory diff --git a/edsnlp/docs/reference/pipelines/ner/scores/charlson/index.md b/edsnlp/docs/reference/pipelines/ner/scores/charlson/index.md new file mode 100644 index 000000000..cb0f203ee --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/charlson/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.charlson` + +::: edsnlp.pipelines.ner.scores.charlson diff --git a/edsnlp/docs/reference/pipelines/ner/scores/charlson/patterns.md b/edsnlp/docs/reference/pipelines/ner/scores/charlson/patterns.md new file mode 100644 index 000000000..bbabb6137 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/charlson/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.charlson.patterns` + +::: edsnlp.pipelines.ner.scores.charlson.patterns diff --git a/edsnlp/docs/reference/pipelines/ner/scores/elstonellis/factory.md b/edsnlp/docs/reference/pipelines/ner/scores/elstonellis/factory.md new file mode 100644 index 000000000..434be1c5e --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/elstonellis/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.elstonellis.factory` + +::: edsnlp.pipelines.ner.scores.elstonellis.factory diff --git a/edsnlp/docs/reference/pipelines/ner/scores/elstonellis/index.md b/edsnlp/docs/reference/pipelines/ner/scores/elstonellis/index.md new file mode 100644 index 000000000..4e02a6d94 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/elstonellis/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.elstonellis` + +::: edsnlp.pipelines.ner.scores.elstonellis diff --git a/edsnlp/docs/reference/pipelines/ner/scores/elstonellis/patterns.md b/edsnlp/docs/reference/pipelines/ner/scores/elstonellis/patterns.md new file mode 100644 index 000000000..53d717bf4 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/elstonellis/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.elstonellis.patterns` + +::: edsnlp.pipelines.ner.scores.elstonellis.patterns diff --git a/edsnlp/docs/reference/pipelines/ner/scores/emergency/ccmu/factory.md b/edsnlp/docs/reference/pipelines/ner/scores/emergency/ccmu/factory.md new file mode 100644 index 000000000..a93cc1455 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/emergency/ccmu/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.emergency.ccmu.factory` + +::: edsnlp.pipelines.ner.scores.emergency.ccmu.factory diff --git a/edsnlp/docs/reference/pipelines/ner/scores/emergency/ccmu/index.md b/edsnlp/docs/reference/pipelines/ner/scores/emergency/ccmu/index.md new file mode 100644 index 000000000..3bb9ee9d0 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/emergency/ccmu/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.emergency.ccmu` + +::: edsnlp.pipelines.ner.scores.emergency.ccmu diff --git a/edsnlp/docs/reference/pipelines/ner/scores/emergency/ccmu/patterns.md b/edsnlp/docs/reference/pipelines/ner/scores/emergency/ccmu/patterns.md new file mode 100644 index 000000000..8a5b93a89 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/emergency/ccmu/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.emergency.ccmu.patterns` + +::: edsnlp.pipelines.ner.scores.emergency.ccmu.patterns diff --git a/edsnlp/docs/reference/pipelines/ner/scores/emergency/gemsa/factory.md b/edsnlp/docs/reference/pipelines/ner/scores/emergency/gemsa/factory.md new file mode 100644 index 000000000..35aed7169 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/emergency/gemsa/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.emergency.gemsa.factory` + +::: edsnlp.pipelines.ner.scores.emergency.gemsa.factory diff --git a/edsnlp/docs/reference/pipelines/ner/scores/emergency/gemsa/index.md b/edsnlp/docs/reference/pipelines/ner/scores/emergency/gemsa/index.md new file mode 100644 index 000000000..3d79bc9b0 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/emergency/gemsa/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.emergency.gemsa` + +::: edsnlp.pipelines.ner.scores.emergency.gemsa diff --git a/edsnlp/docs/reference/pipelines/ner/scores/emergency/gemsa/patterns.md b/edsnlp/docs/reference/pipelines/ner/scores/emergency/gemsa/patterns.md new file mode 100644 index 000000000..e4f8e331d --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/emergency/gemsa/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.emergency.gemsa.patterns` + +::: edsnlp.pipelines.ner.scores.emergency.gemsa.patterns diff --git a/edsnlp/docs/reference/pipelines/ner/scores/emergency/index.md b/edsnlp/docs/reference/pipelines/ner/scores/emergency/index.md new file mode 100644 index 000000000..58493d38a --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/emergency/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.emergency` + +::: edsnlp.pipelines.ner.scores.emergency diff --git a/edsnlp/docs/reference/pipelines/ner/scores/emergency/priority/factory.md b/edsnlp/docs/reference/pipelines/ner/scores/emergency/priority/factory.md new file mode 100644 index 000000000..b47b10288 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/emergency/priority/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.emergency.priority.factory` + +::: edsnlp.pipelines.ner.scores.emergency.priority.factory diff --git a/edsnlp/docs/reference/pipelines/ner/scores/emergency/priority/index.md b/edsnlp/docs/reference/pipelines/ner/scores/emergency/priority/index.md new file mode 100644 index 000000000..fea97464c --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/emergency/priority/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.emergency.priority` + +::: edsnlp.pipelines.ner.scores.emergency.priority diff --git a/edsnlp/docs/reference/pipelines/ner/scores/emergency/priority/patterns.md b/edsnlp/docs/reference/pipelines/ner/scores/emergency/priority/patterns.md new file mode 100644 index 000000000..df5d28842 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/emergency/priority/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.emergency.priority.patterns` + +::: edsnlp.pipelines.ner.scores.emergency.priority.patterns diff --git a/edsnlp/docs/reference/pipelines/ner/scores/factory.md b/edsnlp/docs/reference/pipelines/ner/scores/factory.md new file mode 100644 index 000000000..0671ca1b6 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.factory` + +::: edsnlp.pipelines.ner.scores.factory diff --git a/edsnlp/docs/reference/pipelines/ner/scores/index.md b/edsnlp/docs/reference/pipelines/ner/scores/index.md new file mode 100644 index 000000000..300d26fd0 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores` + +::: edsnlp.pipelines.ner.scores diff --git a/edsnlp/docs/reference/pipelines/ner/scores/sofa/factory.md b/edsnlp/docs/reference/pipelines/ner/scores/sofa/factory.md new file mode 100644 index 000000000..216c643bd --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/sofa/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.sofa.factory` + +::: edsnlp.pipelines.ner.scores.sofa.factory diff --git a/edsnlp/docs/reference/pipelines/ner/scores/sofa/index.md b/edsnlp/docs/reference/pipelines/ner/scores/sofa/index.md new file mode 100644 index 000000000..ff8780440 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/sofa/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.sofa` + +::: edsnlp.pipelines.ner.scores.sofa diff --git a/edsnlp/docs/reference/pipelines/ner/scores/sofa/patterns.md b/edsnlp/docs/reference/pipelines/ner/scores/sofa/patterns.md new file mode 100644 index 000000000..a3664dd5b --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/sofa/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.sofa.patterns` + +::: edsnlp.pipelines.ner.scores.sofa.patterns diff --git a/edsnlp/docs/reference/pipelines/ner/scores/sofa/sofa.md b/edsnlp/docs/reference/pipelines/ner/scores/sofa/sofa.md new file mode 100644 index 000000000..ef88dce94 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/sofa/sofa.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.sofa.sofa` + +::: edsnlp.pipelines.ner.scores.sofa.sofa diff --git a/edsnlp/docs/reference/pipelines/ner/scores/tnm/factory.md b/edsnlp/docs/reference/pipelines/ner/scores/tnm/factory.md new file mode 100644 index 000000000..e90e031e1 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/tnm/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.tnm.factory` + +::: edsnlp.pipelines.ner.scores.tnm.factory diff --git a/edsnlp/docs/reference/pipelines/ner/scores/tnm/index.md b/edsnlp/docs/reference/pipelines/ner/scores/tnm/index.md new file mode 100644 index 000000000..65134b9ca --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/tnm/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.tnm` + +::: edsnlp.pipelines.ner.scores.tnm diff --git a/edsnlp/docs/reference/pipelines/ner/scores/tnm/models.md b/edsnlp/docs/reference/pipelines/ner/scores/tnm/models.md new file mode 100644 index 000000000..dc6c97b64 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/tnm/models.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.tnm.models` + +::: edsnlp.pipelines.ner.scores.tnm.models diff --git a/edsnlp/docs/reference/pipelines/ner/scores/tnm/patterns.md b/edsnlp/docs/reference/pipelines/ner/scores/tnm/patterns.md new file mode 100644 index 000000000..82d0fb890 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/tnm/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.tnm.patterns` + +::: edsnlp.pipelines.ner.scores.tnm.patterns diff --git a/edsnlp/docs/reference/pipelines/ner/scores/tnm/tnm.md b/edsnlp/docs/reference/pipelines/ner/scores/tnm/tnm.md new file mode 100644 index 000000000..38ce86f1c --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/scores/tnm/tnm.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.scores.tnm.tnm` + +::: edsnlp.pipelines.ner.scores.tnm.tnm diff --git a/edsnlp/docs/reference/pipelines/ner/umls/factory.md b/edsnlp/docs/reference/pipelines/ner/umls/factory.md new file mode 100644 index 000000000..73f8c3a44 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/umls/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.umls.factory` + +::: edsnlp.pipelines.ner.umls.factory diff --git a/edsnlp/docs/reference/pipelines/ner/umls/index.md b/edsnlp/docs/reference/pipelines/ner/umls/index.md new file mode 100644 index 000000000..b6a0a4593 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/umls/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.umls` + +::: edsnlp.pipelines.ner.umls diff --git a/edsnlp/docs/reference/pipelines/ner/umls/patterns.md b/edsnlp/docs/reference/pipelines/ner/umls/patterns.md new file mode 100644 index 000000000..72f781ac1 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/ner/umls/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.ner.umls.patterns` + +::: edsnlp.pipelines.ner.umls.patterns diff --git a/edsnlp/docs/reference/pipelines/qualifiers/base.md b/edsnlp/docs/reference/pipelines/qualifiers/base.md new file mode 100644 index 000000000..89b6aff03 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/base.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.base` + +::: edsnlp.pipelines.qualifiers.base diff --git a/edsnlp/docs/reference/pipelines/qualifiers/factories.md b/edsnlp/docs/reference/pipelines/qualifiers/factories.md new file mode 100644 index 000000000..451b0d57b --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/factories.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.factories` + +::: edsnlp.pipelines.qualifiers.factories diff --git a/edsnlp/docs/reference/pipelines/qualifiers/family/factory.md b/edsnlp/docs/reference/pipelines/qualifiers/family/factory.md new file mode 100644 index 000000000..1c1579e87 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/family/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.family.factory` + +::: edsnlp.pipelines.qualifiers.family.factory diff --git a/edsnlp/docs/reference/pipelines/qualifiers/family/family.md b/edsnlp/docs/reference/pipelines/qualifiers/family/family.md new file mode 100644 index 000000000..8c88ab215 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/family/family.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.family.family` + +::: edsnlp.pipelines.qualifiers.family.family diff --git a/edsnlp/docs/reference/pipelines/qualifiers/family/index.md b/edsnlp/docs/reference/pipelines/qualifiers/family/index.md new file mode 100644 index 000000000..ffd044399 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/family/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.family` + +::: edsnlp.pipelines.qualifiers.family diff --git a/edsnlp/docs/reference/pipelines/qualifiers/family/patterns.md b/edsnlp/docs/reference/pipelines/qualifiers/family/patterns.md new file mode 100644 index 000000000..5310ceb08 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/family/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.family.patterns` + +::: edsnlp.pipelines.qualifiers.family.patterns diff --git a/edsnlp/docs/reference/pipelines/qualifiers/history/factory.md b/edsnlp/docs/reference/pipelines/qualifiers/history/factory.md new file mode 100644 index 000000000..8a5c7cdc7 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/history/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.history.factory` + +::: edsnlp.pipelines.qualifiers.history.factory diff --git a/edsnlp/docs/reference/pipelines/qualifiers/history/history.md b/edsnlp/docs/reference/pipelines/qualifiers/history/history.md new file mode 100644 index 000000000..87020b5e7 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/history/history.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.history.history` + +::: edsnlp.pipelines.qualifiers.history.history diff --git a/edsnlp/docs/reference/pipelines/qualifiers/history/index.md b/edsnlp/docs/reference/pipelines/qualifiers/history/index.md new file mode 100644 index 000000000..42d8de8e6 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/history/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.history` + +::: edsnlp.pipelines.qualifiers.history diff --git a/edsnlp/docs/reference/pipelines/qualifiers/history/patterns.md b/edsnlp/docs/reference/pipelines/qualifiers/history/patterns.md new file mode 100644 index 000000000..6369de836 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/history/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.history.patterns` + +::: edsnlp.pipelines.qualifiers.history.patterns diff --git a/edsnlp/docs/reference/pipelines/qualifiers/hypothesis/factory.md b/edsnlp/docs/reference/pipelines/qualifiers/hypothesis/factory.md new file mode 100644 index 000000000..f51bcf705 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/hypothesis/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.hypothesis.factory` + +::: edsnlp.pipelines.qualifiers.hypothesis.factory diff --git a/edsnlp/docs/reference/pipelines/qualifiers/hypothesis/hypothesis.md b/edsnlp/docs/reference/pipelines/qualifiers/hypothesis/hypothesis.md new file mode 100644 index 000000000..aab79687a --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/hypothesis/hypothesis.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.hypothesis.hypothesis` + +::: edsnlp.pipelines.qualifiers.hypothesis.hypothesis diff --git a/edsnlp/docs/reference/pipelines/qualifiers/hypothesis/index.md b/edsnlp/docs/reference/pipelines/qualifiers/hypothesis/index.md new file mode 100644 index 000000000..0c9b50508 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/hypothesis/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.hypothesis` + +::: edsnlp.pipelines.qualifiers.hypothesis diff --git a/edsnlp/docs/reference/pipelines/qualifiers/hypothesis/patterns.md b/edsnlp/docs/reference/pipelines/qualifiers/hypothesis/patterns.md new file mode 100644 index 000000000..41ac3e0d0 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/hypothesis/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.hypothesis.patterns` + +::: edsnlp.pipelines.qualifiers.hypothesis.patterns diff --git a/edsnlp/docs/reference/pipelines/qualifiers/index.md b/edsnlp/docs/reference/pipelines/qualifiers/index.md new file mode 100644 index 000000000..4c3cf698a --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers` + +::: edsnlp.pipelines.qualifiers diff --git a/edsnlp/docs/reference/pipelines/qualifiers/negation/factory.md b/edsnlp/docs/reference/pipelines/qualifiers/negation/factory.md new file mode 100644 index 000000000..867b4462b --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/negation/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.negation.factory` + +::: edsnlp.pipelines.qualifiers.negation.factory diff --git a/edsnlp/docs/reference/pipelines/qualifiers/negation/index.md b/edsnlp/docs/reference/pipelines/qualifiers/negation/index.md new file mode 100644 index 000000000..4e20cb2e3 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/negation/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.negation` + +::: edsnlp.pipelines.qualifiers.negation diff --git a/edsnlp/docs/reference/pipelines/qualifiers/negation/negation.md b/edsnlp/docs/reference/pipelines/qualifiers/negation/negation.md new file mode 100644 index 000000000..d16e56171 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/negation/negation.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.negation.negation` + +::: edsnlp.pipelines.qualifiers.negation.negation diff --git a/edsnlp/docs/reference/pipelines/qualifiers/negation/patterns.md b/edsnlp/docs/reference/pipelines/qualifiers/negation/patterns.md new file mode 100644 index 000000000..fbc504d97 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/negation/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.negation.patterns` + +::: edsnlp.pipelines.qualifiers.negation.patterns diff --git a/edsnlp/docs/reference/pipelines/qualifiers/reported_speech/factory.md b/edsnlp/docs/reference/pipelines/qualifiers/reported_speech/factory.md new file mode 100644 index 000000000..07a124716 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/reported_speech/factory.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.reported_speech.factory` + +::: edsnlp.pipelines.qualifiers.reported_speech.factory diff --git a/edsnlp/docs/reference/pipelines/qualifiers/reported_speech/index.md b/edsnlp/docs/reference/pipelines/qualifiers/reported_speech/index.md new file mode 100644 index 000000000..9e5200189 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/reported_speech/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.reported_speech` + +::: edsnlp.pipelines.qualifiers.reported_speech diff --git a/edsnlp/docs/reference/pipelines/qualifiers/reported_speech/patterns.md b/edsnlp/docs/reference/pipelines/qualifiers/reported_speech/patterns.md new file mode 100644 index 000000000..cd50eecd6 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/reported_speech/patterns.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.reported_speech.patterns` + +::: edsnlp.pipelines.qualifiers.reported_speech.patterns diff --git a/edsnlp/docs/reference/pipelines/qualifiers/reported_speech/reported_speech.md b/edsnlp/docs/reference/pipelines/qualifiers/reported_speech/reported_speech.md new file mode 100644 index 000000000..e75ae1495 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/qualifiers/reported_speech/reported_speech.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.qualifiers.reported_speech.reported_speech` + +::: edsnlp.pipelines.qualifiers.reported_speech.reported_speech diff --git a/edsnlp/docs/reference/pipelines/terminations.md b/edsnlp/docs/reference/pipelines/terminations.md new file mode 100644 index 000000000..1cb07f9e6 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/terminations.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.terminations` + +::: edsnlp.pipelines.terminations diff --git a/edsnlp/docs/reference/pipelines/trainable/index.md b/edsnlp/docs/reference/pipelines/trainable/index.md new file mode 100644 index 000000000..dce41bd34 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/trainable/index.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.trainable` + +::: edsnlp.pipelines.trainable diff --git a/edsnlp/docs/reference/pipelines/trainable/nested_ner.md b/edsnlp/docs/reference/pipelines/trainable/nested_ner.md new file mode 100644 index 000000000..79a3349d8 --- /dev/null +++ b/edsnlp/docs/reference/pipelines/trainable/nested_ner.md @@ -0,0 +1,3 @@ +# `edsnlp.pipelines.trainable.nested_ner` + +::: edsnlp.pipelines.trainable.nested_ner diff --git a/edsnlp/docs/reference/processing/distributed.md b/edsnlp/docs/reference/processing/distributed.md new file mode 100644 index 000000000..b209fbe98 --- /dev/null +++ b/edsnlp/docs/reference/processing/distributed.md @@ -0,0 +1,3 @@ +# `edsnlp.processing.distributed` + +::: edsnlp.processing.distributed diff --git a/edsnlp/docs/reference/processing/helpers.md b/edsnlp/docs/reference/processing/helpers.md new file mode 100644 index 000000000..48657b2bd --- /dev/null +++ b/edsnlp/docs/reference/processing/helpers.md @@ -0,0 +1,3 @@ +# `edsnlp.processing.helpers` + +::: edsnlp.processing.helpers diff --git a/edsnlp/docs/reference/processing/index.md b/edsnlp/docs/reference/processing/index.md new file mode 100644 index 000000000..626e52c2c --- /dev/null +++ b/edsnlp/docs/reference/processing/index.md @@ -0,0 +1,3 @@ +# `edsnlp.processing` + +::: edsnlp.processing diff --git a/edsnlp/docs/reference/processing/parallel.md b/edsnlp/docs/reference/processing/parallel.md new file mode 100644 index 000000000..52c0349a4 --- /dev/null +++ b/edsnlp/docs/reference/processing/parallel.md @@ -0,0 +1,3 @@ +# `edsnlp.processing.parallel` + +::: edsnlp.processing.parallel diff --git a/edsnlp/docs/reference/processing/simple.md b/edsnlp/docs/reference/processing/simple.md new file mode 100644 index 000000000..cbc825c01 --- /dev/null +++ b/edsnlp/docs/reference/processing/simple.md @@ -0,0 +1,3 @@ +# `edsnlp.processing.simple` + +::: edsnlp.processing.simple diff --git a/edsnlp/docs/reference/processing/utils.md b/edsnlp/docs/reference/processing/utils.md new file mode 100644 index 000000000..ad415738c --- /dev/null +++ b/edsnlp/docs/reference/processing/utils.md @@ -0,0 +1,3 @@ +# `edsnlp.processing.utils` + +::: edsnlp.processing.utils diff --git a/edsnlp/docs/reference/processing/wrapper.md b/edsnlp/docs/reference/processing/wrapper.md new file mode 100644 index 000000000..caefa8d2d --- /dev/null +++ b/edsnlp/docs/reference/processing/wrapper.md @@ -0,0 +1,3 @@ +# `edsnlp.processing.wrapper` + +::: edsnlp.processing.wrapper diff --git a/edsnlp/docs/reference/utils/blocs.md b/edsnlp/docs/reference/utils/blocs.md new file mode 100644 index 000000000..559d9e814 --- /dev/null +++ b/edsnlp/docs/reference/utils/blocs.md @@ -0,0 +1,3 @@ +# `edsnlp.utils.blocs` + +::: edsnlp.utils.blocs diff --git a/edsnlp/docs/reference/utils/colors.md b/edsnlp/docs/reference/utils/colors.md new file mode 100644 index 000000000..822c8fce0 --- /dev/null +++ b/edsnlp/docs/reference/utils/colors.md @@ -0,0 +1,3 @@ +# `edsnlp.utils.colors` + +::: edsnlp.utils.colors diff --git a/edsnlp/docs/reference/utils/deprecation.md b/edsnlp/docs/reference/utils/deprecation.md new file mode 100644 index 000000000..116dc9d5c --- /dev/null +++ b/edsnlp/docs/reference/utils/deprecation.md @@ -0,0 +1,3 @@ +# `edsnlp.utils.deprecation` + +::: edsnlp.utils.deprecation diff --git a/edsnlp/docs/reference/utils/examples.md b/edsnlp/docs/reference/utils/examples.md new file mode 100644 index 000000000..a07b55697 --- /dev/null +++ b/edsnlp/docs/reference/utils/examples.md @@ -0,0 +1,3 @@ +# `edsnlp.utils.examples` + +::: edsnlp.utils.examples diff --git a/edsnlp/docs/reference/utils/extensions.md b/edsnlp/docs/reference/utils/extensions.md new file mode 100644 index 000000000..d0575eebd --- /dev/null +++ b/edsnlp/docs/reference/utils/extensions.md @@ -0,0 +1,3 @@ +# `edsnlp.utils.extensions` + +::: edsnlp.utils.extensions diff --git a/edsnlp/docs/reference/utils/filter.md b/edsnlp/docs/reference/utils/filter.md new file mode 100644 index 000000000..8e6ce836f --- /dev/null +++ b/edsnlp/docs/reference/utils/filter.md @@ -0,0 +1,3 @@ +# `edsnlp.utils.filter` + +::: edsnlp.utils.filter diff --git a/edsnlp/docs/reference/utils/inclusion.md b/edsnlp/docs/reference/utils/inclusion.md new file mode 100644 index 000000000..81bd9f33c --- /dev/null +++ b/edsnlp/docs/reference/utils/inclusion.md @@ -0,0 +1,3 @@ +# `edsnlp.utils.inclusion` + +::: edsnlp.utils.inclusion diff --git a/edsnlp/docs/reference/utils/index.md b/edsnlp/docs/reference/utils/index.md new file mode 100644 index 000000000..042f9f2cc --- /dev/null +++ b/edsnlp/docs/reference/utils/index.md @@ -0,0 +1,3 @@ +# `edsnlp.utils` + +::: edsnlp.utils diff --git a/edsnlp/docs/reference/utils/lists.md b/edsnlp/docs/reference/utils/lists.md new file mode 100644 index 000000000..a323c30eb --- /dev/null +++ b/edsnlp/docs/reference/utils/lists.md @@ -0,0 +1,3 @@ +# `edsnlp.utils.lists` + +::: edsnlp.utils.lists diff --git a/edsnlp/docs/reference/utils/merge_configs.md b/edsnlp/docs/reference/utils/merge_configs.md new file mode 100644 index 000000000..930f356b4 --- /dev/null +++ b/edsnlp/docs/reference/utils/merge_configs.md @@ -0,0 +1,3 @@ +# `edsnlp.utils.merge_configs` + +::: edsnlp.utils.merge_configs diff --git a/edsnlp/docs/reference/utils/regex.md b/edsnlp/docs/reference/utils/regex.md new file mode 100644 index 000000000..1eb13aca0 --- /dev/null +++ b/edsnlp/docs/reference/utils/regex.md @@ -0,0 +1,3 @@ +# `edsnlp.utils.regex` + +::: edsnlp.utils.regex diff --git a/edsnlp/docs/reference/utils/resources.md b/edsnlp/docs/reference/utils/resources.md new file mode 100644 index 000000000..5f5b4873b --- /dev/null +++ b/edsnlp/docs/reference/utils/resources.md @@ -0,0 +1,3 @@ +# `edsnlp.utils.resources` + +::: edsnlp.utils.resources diff --git a/edsnlp/docs/reference/utils/training.md b/edsnlp/docs/reference/utils/training.md new file mode 100644 index 000000000..c3df27135 --- /dev/null +++ b/edsnlp/docs/reference/utils/training.md @@ -0,0 +1,3 @@ +# `edsnlp.utils.training` + +::: edsnlp.utils.training diff --git a/edsnlp/docs/reference/viz/index.md b/edsnlp/docs/reference/viz/index.md new file mode 100644 index 000000000..6355f6f36 --- /dev/null +++ b/edsnlp/docs/reference/viz/index.md @@ -0,0 +1,3 @@ +# `edsnlp.viz` + +::: edsnlp.viz diff --git a/edsnlp/docs/reference/viz/quick_examples.md b/edsnlp/docs/reference/viz/quick_examples.md new file mode 100644 index 000000000..88311e26a --- /dev/null +++ b/edsnlp/docs/reference/viz/quick_examples.md @@ -0,0 +1,3 @@ +# `edsnlp.viz.quick_examples` + +::: edsnlp.viz.quick_examples diff --git a/docs/references.bib b/edsnlp/docs/references.bib similarity index 100% rename from docs/references.bib rename to edsnlp/docs/references.bib diff --git a/docs/resources/sections.svg b/edsnlp/docs/resources/sections.svg similarity index 100% rename from docs/resources/sections.svg rename to edsnlp/docs/resources/sections.svg diff --git a/docs/scripts/plugin.py b/edsnlp/docs/scripts/plugin.py similarity index 100% rename from docs/scripts/plugin.py rename to edsnlp/docs/scripts/plugin.py diff --git a/docs/tokenizers.md b/edsnlp/docs/tokenizers.md similarity index 100% rename from docs/tokenizers.md rename to edsnlp/docs/tokenizers.md diff --git a/docs/tutorials/detecting-dates.md b/edsnlp/docs/tutorials/detecting-dates.md similarity index 100% rename from docs/tutorials/detecting-dates.md rename to edsnlp/docs/tutorials/detecting-dates.md diff --git a/docs/tutorials/endlines.md b/edsnlp/docs/tutorials/endlines.md similarity index 100% rename from docs/tutorials/endlines.md rename to edsnlp/docs/tutorials/endlines.md diff --git a/docs/tutorials/index.md b/edsnlp/docs/tutorials/index.md similarity index 100% rename from docs/tutorials/index.md rename to edsnlp/docs/tutorials/index.md diff --git a/docs/tutorials/matching-a-terminology.md b/edsnlp/docs/tutorials/matching-a-terminology.md similarity index 100% rename from docs/tutorials/matching-a-terminology.md rename to edsnlp/docs/tutorials/matching-a-terminology.md diff --git a/docs/tutorials/multiple-texts.md b/edsnlp/docs/tutorials/multiple-texts.md similarity index 100% rename from docs/tutorials/multiple-texts.md rename to edsnlp/docs/tutorials/multiple-texts.md diff --git a/docs/tutorials/qualifying-entities.md b/edsnlp/docs/tutorials/qualifying-entities.md similarity index 100% rename from docs/tutorials/qualifying-entities.md rename to edsnlp/docs/tutorials/qualifying-entities.md diff --git a/docs/tutorials/quick-examples.md b/edsnlp/docs/tutorials/quick-examples.md similarity index 100% rename from docs/tutorials/quick-examples.md rename to edsnlp/docs/tutorials/quick-examples.md diff --git a/docs/tutorials/reason.md b/edsnlp/docs/tutorials/reason.md similarity index 100% rename from docs/tutorials/reason.md rename to edsnlp/docs/tutorials/reason.md diff --git a/docs/tutorials/spacy101.md b/edsnlp/docs/tutorials/spacy101.md similarity index 100% rename from docs/tutorials/spacy101.md rename to edsnlp/docs/tutorials/spacy101.md diff --git a/docs/utilities/connectors/brat.md b/edsnlp/docs/utilities/connectors/brat.md similarity index 100% rename from docs/utilities/connectors/brat.md rename to edsnlp/docs/utilities/connectors/brat.md diff --git a/docs/utilities/connectors/index.md b/edsnlp/docs/utilities/connectors/index.md similarity index 100% rename from docs/utilities/connectors/index.md rename to edsnlp/docs/utilities/connectors/index.md diff --git a/docs/utilities/connectors/labeltool.md b/edsnlp/docs/utilities/connectors/labeltool.md similarity index 100% rename from docs/utilities/connectors/labeltool.md rename to edsnlp/docs/utilities/connectors/labeltool.md diff --git a/docs/utilities/connectors/omop.md b/edsnlp/docs/utilities/connectors/omop.md similarity index 100% rename from docs/utilities/connectors/omop.md rename to edsnlp/docs/utilities/connectors/omop.md diff --git a/docs/utilities/evaluation.md b/edsnlp/docs/utilities/evaluation.md similarity index 100% rename from docs/utilities/evaluation.md rename to edsnlp/docs/utilities/evaluation.md diff --git a/docs/utilities/index.md b/edsnlp/docs/utilities/index.md similarity index 100% rename from docs/utilities/index.md rename to edsnlp/docs/utilities/index.md diff --git a/docs/utilities/matchers.md b/edsnlp/docs/utilities/matchers.md similarity index 100% rename from docs/utilities/matchers.md rename to edsnlp/docs/utilities/matchers.md diff --git a/docs/utilities/processing/index.md b/edsnlp/docs/utilities/processing/index.md similarity index 100% rename from docs/utilities/processing/index.md rename to edsnlp/docs/utilities/processing/index.md diff --git a/docs/utilities/processing/multi.md b/edsnlp/docs/utilities/processing/multi.md similarity index 100% rename from docs/utilities/processing/multi.md rename to edsnlp/docs/utilities/processing/multi.md diff --git a/docs/utilities/processing/single.md b/edsnlp/docs/utilities/processing/single.md similarity index 100% rename from docs/utilities/processing/single.md rename to edsnlp/docs/utilities/processing/single.md diff --git a/docs/utilities/processing/spark.md b/edsnlp/docs/utilities/processing/spark.md similarity index 100% rename from docs/utilities/processing/spark.md rename to edsnlp/docs/utilities/processing/spark.md diff --git a/docs/utilities/regex.md b/edsnlp/docs/utilities/regex.md similarity index 100% rename from docs/utilities/regex.md rename to edsnlp/docs/utilities/regex.md diff --git a/docs/utilities/tests/blocs.md b/edsnlp/docs/utilities/tests/blocs.md similarity index 100% rename from docs/utilities/tests/blocs.md rename to edsnlp/docs/utilities/tests/blocs.md diff --git a/docs/utilities/tests/examples.md b/edsnlp/docs/utilities/tests/examples.md similarity index 100% rename from docs/utilities/tests/examples.md rename to edsnlp/docs/utilities/tests/examples.md diff --git a/docs/utilities/tests/index.md b/edsnlp/docs/utilities/tests/index.md similarity index 100% rename from docs/utilities/tests/index.md rename to edsnlp/docs/utilities/tests/index.md diff --git a/edsnlp/edsnlp/__init__.py b/edsnlp/edsnlp/__init__.py new file mode 100644 index 000000000..4175a4257 --- /dev/null +++ b/edsnlp/edsnlp/__init__.py @@ -0,0 +1,26 @@ +""" +EDS-NLP +""" + +from pathlib import Path + +import spacy + +from . import extensions +from .evaluate import evaluate +from .language import * + +from . import patch_spacy_dot_components # isort: skip + + +__version__ = "0.8.0" + +BASE_DIR = Path(__file__).parent + + +for ext in ["Allergie", "Action", "Certainty", "Temporality", "Negation", "Family"]: + if not spacy.tokens.Span.has_extension(ext): + spacy.tokens.Span.set_extension(ext, default=None) + +print("Monkey patching spacy.Language.evaluate") +spacy.Language.evaluate = evaluate diff --git a/edsnlp/components.py b/edsnlp/edsnlp/components.py similarity index 100% rename from edsnlp/components.py rename to edsnlp/edsnlp/components.py diff --git a/edsnlp/conjugator.py b/edsnlp/edsnlp/conjugator.py similarity index 100% rename from edsnlp/conjugator.py rename to edsnlp/edsnlp/conjugator.py diff --git a/edsnlp/connectors/__init__.py b/edsnlp/edsnlp/connectors/__init__.py similarity index 100% rename from edsnlp/connectors/__init__.py rename to edsnlp/edsnlp/connectors/__init__.py diff --git a/edsnlp/connectors/brat.py b/edsnlp/edsnlp/connectors/brat.py similarity index 95% rename from edsnlp/connectors/brat.py rename to edsnlp/edsnlp/connectors/brat.py index b69ac37ff..6dceecd2a 100644 --- a/edsnlp/connectors/brat.py +++ b/edsnlp/edsnlp/connectors/brat.py @@ -226,18 +226,17 @@ def export_to_brat(doc, txt_filename, overwrite_txt=False, overwrite_ann=False): ): idx = fragment["begin"] entity_text = doc["text"][fragment["begin"] : fragment["end"]] - for part in entity_text.split("\n"): - begin = idx - end = idx + len(part) - idx = end + 1 - if begin != end: - spans.append((begin, end)) + # eg: "mon entité \n est problématique" + for match in re.finditer( + r"\s*(.+?)(?:( *\n+)+ *|$)", entity_text, flags=re.DOTALL + ): + spans.append((idx + match.start(1), idx + match.end(1))) print( "{}\t{} {}\t{}".format( brat_entity_id, str(entity["label"]), ";".join(" ".join(map(str, span)) for span in spans), - entity_text.replace("\n", " "), + " ".join(doc["text"][begin:end] for begin, end in spans), ), file=f, ) @@ -313,20 +312,16 @@ def load_brat(self) -> List[Dict]: """ Transforms a BRAT folder to a list of spaCy documents. - Parameters - ---------- - nlp: - A spaCy pipeline. - Returns ------- - docs: + List[Dict] List of spaCy documents, with annotations in the `ents` attribute. """ filenames = [ - path.relative_to(self.directory) for path in self.directory.rglob("*.txt") + path.relative_to(self.directory) + for path in self.directory.rglob("*.txt") + if "checkpoint" not in path.stem ] - assert len(filenames), f"BRAT directory {self.directory} is empty!" logger.info( @@ -371,7 +366,6 @@ def brat2docs(self, nlp: Language, run_pipe=False) -> List[Doc]: """ annotations = self.load_brat() - texts = [doc["text"] for doc in annotations] docs = [] @@ -390,7 +384,6 @@ def brat2docs(self, nlp: Language, run_pipe=False) -> List[Doc]: ): doc._.note_id = doc_annotations["note_id"] - spans = [] span_groups = defaultdict(lambda: []) @@ -400,6 +393,7 @@ def brat2docs(self, nlp: Language, run_pipe=False) -> List[Doc]: Span.set_extension(dst, default=None) encountered_attributes = set() + encountered_span_groups = set() for ent in doc_annotations["entities"]: if self.attr_map is None: for a in ent["attributes"]: @@ -423,15 +417,8 @@ def brat2docs(self, nlp: Language, run_pipe=False) -> List[Doc]: ) span._.set(new_name, a["value"] if a is not None else True) spans.append(span) - - if self.span_groups is None or ent["label"] in self.span_groups: - span_groups[ent["label"]].append(span) - - if self.attr_map is None: - self.attr_map = {k: k for k in encountered_attributes} - - if self.span_groups is None: - self.span_groups = sorted(span_groups.keys()) + span_groups[ent["label"]].append(span) + encountered_span_groups.add(ent["label"]) doc.ents = filter_spans(spans) for group_name, group in span_groups.items(): @@ -439,6 +426,10 @@ def brat2docs(self, nlp: Language, run_pipe=False) -> List[Doc]: docs.append(doc) + if self.span_groups is None: + self.span_groups = sorted(list(encountered_span_groups)) + if self.attr_map is None: + self.attr_map = {k: k for k in encountered_attributes} return docs def doc2brat(self, doc: Doc) -> None: diff --git a/edsnlp/connectors/labeltool.py b/edsnlp/edsnlp/connectors/labeltool.py similarity index 100% rename from edsnlp/connectors/labeltool.py rename to edsnlp/edsnlp/connectors/labeltool.py diff --git a/edsnlp/connectors/omop.py b/edsnlp/edsnlp/connectors/omop.py similarity index 100% rename from edsnlp/connectors/omop.py rename to edsnlp/edsnlp/connectors/omop.py diff --git a/edsnlp/edsnlp/corpus_reader.py b/edsnlp/edsnlp/corpus_reader.py new file mode 100644 index 000000000..74098e622 --- /dev/null +++ b/edsnlp/edsnlp/corpus_reader.py @@ -0,0 +1,175 @@ +import random +from pathlib import Path +from typing import Callable, Iterable, Iterator, Optional, Union + +import spacy +from spacy import Errors, Vocab +from spacy.language import Language +from spacy.tokens import Doc, DocBin, Span +from spacy.training import Corpus, Example, dont_augment +from spacy.training.corpus import FILE_TYPE, walk_corpus + +if not Doc.has_extension("context"): + Doc.set_extension("context", default=dict()) +if not Doc.has_extension("note_id"): + Doc.set_extension("note_id", default=None) +if not Doc.has_extension("note_datetime"): + Doc.set_extension("note_datetime", default=None) +if not Doc.has_extension("note_class_source_value"): + Doc.set_extension("note_class_source_value", default=None) +if not Doc.has_extension("split"): + Doc.set_extension("split", default=None) + + +@spacy.registry.readers("eds.Corpus.v1") +class Corpus(Corpus): + def __init__( + self, + path: Union[str, Path], + *, + limit: int = 0, + gold_preproc: bool = False, + max_length: int = 0, + augmenter: Optional[Callable] = None, + shuffle: bool = False, + filter_expr: Optional[str] = None, + seed: Optional[int] = None, + ) -> None: + if path is None: + raise ValueError(Errors.E913) + spacy.util.logger.debug(f"Loading corpus from path: {path}") + self.path = spacy.util.ensure_path(path) + self.gold_preproc = gold_preproc + self.max_length = max_length + self.limit = limit + self.augmenter = augmenter if augmenter is not None else dont_augment + self.shuffle = shuffle + self.filter_fn = eval(f"lambda doc: {filter_expr}") if filter_expr else None + if filter_expr is not None: + spacy.util.logger.info(f"Filtering corpus with expression: {filter_expr}") + self.seed = seed + + def __call__(self, nlp: "Language") -> Iterator[Example]: + """Yield examples from the data. + + A difference with the standard spacy.Corpus object is that we + - first shuffle the data + - then subset it + + nlp (Language): The current nlp object. + YIELDS (Example): The examples. + DOCS: https://spacy.io/api/corpus#call + """ + ref_docs = self.read_docbin(nlp.vocab, walk_corpus(self.path, FILE_TYPE)) + if self.shuffle: + ref_docs = list(ref_docs) # type: ignore + random.Random(self.seed).shuffle(ref_docs) # type: ignore + + if self.limit >= 1: + ref_docs = ref_docs[: self.limit] + + if self.gold_preproc: + examples = self.make_examples_gold_preproc(nlp, ref_docs) + else: + examples = self.make_examples(nlp, ref_docs) + for real_eg in examples: + if len(real_eg) and len(real_eg.reference.ents): + for augmented_eg in self.augmenter( + nlp, real_eg + ): # type: ignore[operator] + yield augmented_eg + + def subset_doc(self, doc, start, end): + new_doc = doc[start:end].as_doc(copy_user_data=True) + for name, group in doc.spans.items(): + new_doc.spans[name] = [ + Span( + new_doc, + max(0, span.start - start), + min(end, span.end) - start, + span.label, + ) + for span in group + if span.end > start and span.start < end + ] + + return new_doc + + def make_examples( + self, nlp: "Language", reference_docs: Iterable[Doc] + ) -> Iterator[Example]: + for reference in reference_docs: + if len(reference) == 0: + continue + elif self.max_length == 0 or len(reference) < self.max_length: + yield self._make_example(nlp, reference, False) + else: + start = 0 + end = 0 + for sent in ( + reference.sents + if reference.has_annotation("SENT_START") + else (reference[:],) + ): + if len(sent) == 0: + continue + # If the sentence adds too many tokens + if sent.end - start > self.max_length: + # But the current buffer too large + while end - start > self.max_length: + yield self._make_example( + nlp, + self.subset_doc( + reference, start, start + self.max_length + ), + False, + ) + start = start + self.max_length + yield self._make_example( + nlp, self.subset_doc(reference, start, end), False + ) + start = end + + # Otherwise, extend the current buffer + end = sent.end + + while end - start > self.max_length: + yield self._make_example( + nlp, + self.subset_doc(reference, start, start + self.max_length), + False, + ) + start = start + self.max_length + yield self._make_example( + nlp, self.subset_doc(reference, start, end), False + ) + + def _make_example( + self, nlp: "Language", reference: Doc, gold_preproc: bool + ) -> Example: + eg = super()._make_example(nlp, reference, gold_preproc) + eg.predicted._.note_id = eg.reference._.note_id + eg.predicted._.note_datetime = eg.reference._.note_datetime + eg.predicted._.note_class_source_value = eg.reference._.note_class_source_value + eg.predicted._.context = eg.reference._.context + eg.predicted._.split = eg.reference._.split + + eg.predicted.ents = eg.reference.ents + for name in eg.predicted.spans: + eg.predicted.spans[name] = eg.reference.spans[name] + + return eg + + def read_docbin( + self, vocab: Vocab, locs: Iterable[Union[str, Path]] + ) -> Iterator[Doc]: + """Yield training examples as example dicts""" + self.not_called_twice = False + for loc in locs: + loc = spacy.util.ensure_path(loc) + if loc.parts[-1].endswith(FILE_TYPE): # type: ignore[union-attr] + doc_bin = DocBin().from_disk(loc) + docs = doc_bin.get_docs(vocab) + for doc in docs: + if len(doc) and (self.filter_fn is None or self.filter_fn(doc)): + yield doc diff --git a/edsnlp/edsnlp/evaluate.py b/edsnlp/edsnlp/evaluate.py new file mode 100644 index 000000000..985055041 --- /dev/null +++ b/edsnlp/edsnlp/evaluate.py @@ -0,0 +1,454 @@ +import time +from copy import deepcopy +from timeit import default_timer as timer +from typing import Any, Dict, Iterable, List, Optional + +import numpy as np +from spacy.language import _copy_examples +from spacy.tokens import Doc +from spacy.training import Example, validate_examples +from tqdm import tqdm + + +def get_annotation(docs): + full_annot = [] + for doc in docs: + annotation = [doc._.note_id] + for label, ents in doc.spans.items(): + for ent in ents: + annotation.append([ent.text, label, ent.start_char, ent.end_char]) + full_annot.append(annotation) + return full_annot + + +def overlap(start_g, end_g, start_p, end_p, exact): + if exact == False: + if start_p <= start_g and end_p >= end_g: + return 1 + else: + return 0 + + if exact == True: + if start_g == start_p and end_g == end_p: + return 1 + else: + return 0 + + +def compute_scores( + ents_gold, + ents_pred, + boostrap_level="entity", + exact=True, + n_draw=500, + alpha=0.05, + digits=2, +): + docs = [doc[0] for doc in ents_gold] + gold_labels = [ + [ent[1] for ent in doc[1:]] for doc in ents_gold + ] # get all the entities from the various documents of ents_gold + gold_labels = set( + [item for sublist in gold_labels for item in sublist] + ) # flatten and transform it to a set to get unique values + pred_labels = [ + [ent[1] for ent in doc[1:]] for doc in ents_pred + ] # get all the entities from the various documents of ents_gold + pred_labels = set( + [item for sublist in pred_labels for item in sublist] + ) # flatten and transform it to a set to get unique values + results = { # we create a dic with the labels of the dataset (CHEM, BIO...) + label: {} for label in pred_labels.union(gold_labels) + } + # COMPUTATION OF TRUE POSITIVE / FALSE POSITIVE / FALSE NEGATIVE + for label in results.keys(): + results[label]["TP"] = 0 + results[label]["FP"] = 0 + results[label]["FN"] = 0 + results_by_doc = {doc: deepcopy(results) for doc in docs} + + for i in range(len(ents_gold)): # iterate through doc + # list of doc, inside each of them is a quadrupet ['text','label','start_char','stop_char'] + doc_id = ents_gold[i][0] + ents_gold_doc = [(ent[1], ent[2], ent[3]) for ent in ents_gold[i][1:]] + ents_pred_doc = [(ent[1], ent[2], ent[3]) for ent in ents_pred[i][1:]] + + for ent in ents_gold_doc: + label_g = ent[0] + start_g = ent[1] + stop_g = ent[2] + r = False + + for ent in ents_pred_doc: + label_p = ent[0] + start_p = ent[1] + stop_p = ent[2] + + # exact is given as parameter because the overlap function take into account if we want an exact match or an inclusive match + if ( + label_g == label_p + and overlap(start_g, stop_g, start_p, stop_p, exact) > 0 + ): + r = True + + if r: + results_by_doc[doc_id][label_g]["TP"] += 1 + results[label_g]["TP"] += 1 + else: + results_by_doc[doc_id][label_g]["FN"] += 1 + results[label_g]["FN"] += 1 + + for ent in ents_pred_doc: + label_p = ent[0] + start_p = ent[1] + stop_p = ent[2] + r = True + + for ent in ents_gold_doc: + label_g = ent[0] + start_g = ent[1] + stop_g = ent[2] + + if ( + label_g == label_p + and overlap(start_g, stop_g, start_p, stop_p, exact) > 0 + ): + r = False + if r == True: + results_by_doc[doc_id][label_p]["FP"] += 1 + results[label_p]["FP"] += 1 + + if exact == True: + print("Exact match") + else: + print("Inclusive match") + + # we will use this copy of the results dataframe + results_list = deepcopy(results) + + # We transform the result dictionnary value from int to list to be able to append the new ones + for key, value in results_list.items(): + for k, v in value.items(): + results_list[key][k] = [v] + + total_words = 0 + for entity in results_list.keys(): + total_words += results[entity]["TP"] + total_words += results[entity]["FN"] + total_words += results[entity]["FP"] + label_to_draw = [] + proba = [] + for entity in results_list.keys(): + for test in ["TP", "FN", "FP"]: + label_to_draw.append(entity + "-" + test) + proba.append(results[entity][test] / total_words) + + micro_avg = { + "TP": [sum(results[entity]["TP"] for entity in results.keys())], + "FN": [sum(results[entity]["FN"] for entity in results.keys())], + "FP": [sum(results[entity]["FP"] for entity in results.keys())], + } + + # Bootstrap per doc + if boostrap_level == "doc": + for i in tqdm(range(1, n_draw)): + draw = np.random.choice( + docs, + size=len(docs), + replace=True, + ) + micro_avg_draw = {"TP": 0, "FN": 0, "FP": 0} + results_draw = ( + { # we create a dic with the labels of the dataset (CHEM, BIO...) + label: {} for label in pred_labels.union(gold_labels) + } + ) + for label in results_draw.keys(): + results_draw[label]["Precision"] = 0 + results_draw[label]["TP"] = 0 + results_draw[label]["FP"] = 0 + results_draw[label]["FN"] = 0 + for doc in draw: + for label in results_by_doc[doc].keys(): + micro_avg_draw["TP"] += results_by_doc[doc][label]["TP"] + results_draw[label]["TP"] += results_by_doc[doc][label]["TP"] + micro_avg_draw["FN"] += results_by_doc[doc][label]["FN"] + results_draw[label]["FN"] += results_by_doc[doc][label]["FN"] + micro_avg_draw["FP"] += results_by_doc[doc][label]["FP"] + results_draw[label]["FP"] += results_by_doc[doc][label]["FP"] + for entity in results_list.keys(): + results_list[entity]["TP"].append(results_draw[entity]["TP"]) + results_list[entity]["FN"].append(results_draw[entity]["FN"]) + results_list[entity]["FP"].append(results_draw[entity]["FP"]) + micro_avg["TP"].append(micro_avg_draw["TP"]) + micro_avg["FN"].append(micro_avg_draw["FN"]) + micro_avg["FP"].append(micro_avg_draw["FP"]) + + # Bootstrap per entities + if boostrap_level == "entity": + for i in tqdm(range(1, n_draw)): + draw = np.random.choice( + label_to_draw, + size=total_words, + p=proba, + replace=True, + ) + draw = np.stack( + np.char.split(draw, "-"), + axis=0, + ) + micro_avg["TP"].append(len(draw[(draw[:, 1] == "TP")])) + micro_avg["FN"].append(len(draw[(draw[:, 1] == "FN")])) + micro_avg["FP"].append(len(draw[(draw[:, 1] == "FP")])) + for entity in results_list.keys(): + results_list[entity]["TP"].append( + len(draw[(draw[:, 0] == entity) & (draw[:, 1] == "TP")]) + ) + results_list[entity]["FN"].append( + len(draw[(draw[:, 0] == entity) & (draw[:, 1] == "FN")]) + ) + results_list[entity]["FP"].append( + len(draw[(draw[:, 0] == entity) & (draw[:, 1] == "FP")]) + ) + + results_list["Overall"] = micro_avg + for entity in results_list.keys(): + results_list[entity]["N_entity"] = [] + results_list[entity]["Precision"] = [] + results_list[entity]["Recall"] = [] + results_list[entity]["F1"] = [] + for i in range(n_draw): + results_list[entity]["N_entity"].append( + results_list[entity]["TP"][i] + + results_list[entity]["FP"][i] + + results_list[entity]["FN"][i] + ) + if results_list[entity]["TP"][i] + results_list[entity]["FP"][i] != 0: + results_list[entity]["Precision"].append( + results_list[entity]["TP"][i] + / (results_list[entity]["TP"][i] + results_list[entity]["FP"][i]) + * 100 + ) + else: + results_list[entity]["Precision"].append( + int(results_list[entity]["TP"][i] == 0) * 100 + ) + if (results_list[entity]["TP"][i] + results_list[entity]["FN"][i]) != 0: + results_list[entity]["Recall"].append( + results_list[entity]["TP"][i] + / (results_list[entity]["TP"][i] + results_list[entity]["FN"][i]) + * 100 + ) + else: + results_list[entity]["Recall"].append( + int(results_list[entity]["TP"][i] == 0) * 100 + ) + if ( + results_list[entity]["Precision"][i] + results_list[entity]["Recall"][i] + ) != 0: + results_list[entity]["F1"].append( + 2 + * ( + results_list[entity]["Precision"][i] + * results_list[entity]["Recall"][i] + ) + / ( + results_list[entity]["Precision"][i] + + results_list[entity]["Recall"][i] + ) + ) + else: + results_list[entity]["F1"].append(0) + # we aim at displaying the "true" observe value with confidence interval corresponding to the top 5 and 95% of the bootstrapped data + lower_confidence_interval = { + label: { + k: round(np.quantile(v, alpha / 2), digits) + for k, v in results_list[label].items() + if k in ["Precision", "Recall", "F1", "N_entity"] + } + for label in results_list.keys() + } + upper_confidence_interval = { + label: { + k: round(np.quantile(v, (1 - alpha / 2)), digits) + for k, v in results_list[label].items() + if k in ["Precision", "Recall", "F1", "N_entity"] + } + for label in results_list.keys() + } + + # we create a dict result_panel with the same keys as results_list but with the values of the nested dict being empty + result_panel = { + label: { + k: "" + for k, v in results_list[label].items() + if k in ["Precision", "Recall", "F1", "N_entity"] + } + for label in results_list.keys() + } + + # we take the value to build the result panel and the confidence interval + # we take value['Precision'][0] because it is the original draw + for key, value in results_list.items(): + precision = value["Precision"][0] + precision_up = upper_confidence_interval[key]["Precision"] + precision_down = lower_confidence_interval[key]["Precision"] + recall = value["Recall"][0] + recall_up = upper_confidence_interval[key]["Recall"] + recall_down = lower_confidence_interval[key]["Recall"] + f1 = value["F1"][0] + f1_up = upper_confidence_interval[key]["F1"] + f1_down = lower_confidence_interval[key]["F1"] + n_entity = value["N_entity"][0] + n_entity_up = upper_confidence_interval[key]["N_entity"] + n_entity_down = lower_confidence_interval[key]["N_entity"] + + result_panel[key]["Precision"] = ( + str(round(precision, digits)) + + " (" + + str(precision_down) + + "-" + + str(precision_up) + + ")" + ) + result_panel[key]["Recall"] = ( + str(round(recall, digits)) + + " (" + + str(recall_down) + + "-" + + str(recall_up) + + ")" + ) + result_panel[key]["F1"] = ( + str(round(f1, digits)) + " (" + str(f1_down) + "-" + str(f1_up) + ")" + ) + result_panel[key]["N_entity"] = ( + str(n_entity) + + " (" + + str(int(n_entity_up)) + + "-" + + str(int(n_entity_down)) + + ")" + ) + print(f"With alpha = {alpha} and {n_draw} draws") + output = f"With alpha = {alpha} and {n_draw} draws\n" + for key, value in result_panel.items(): + if "SECTION" not in key: + output += f"\nLabel: {key}\n" + for metric, metric_value in value.items(): + output += f"{metric}: {metric_value}\n" + output += "-" * 30 + + # print(output) + result_panel["ents_per_type"] = { + label: { + "p": value["Precision"], + "r": value["Recall"], + "f": value["F1"], + "n_entity": value["N_entity"], + } + for label, value in result_panel.items() + } + return result_panel + + +def evaluate_test( + gold_docs: List[Doc], + pred_docs: List[Doc], + boostrap_level: str = "entity", + exact: bool = True, + n_draw: int = 500, + alpha: float = 0.05, + digits: int = 2, +) -> Dict[str, Any]: + """ + Evaluate a model's pipeline components. + + Parameters + ---------- + gold_docs : List[Doc] + `Doc` objects. + pred_docs : List[Doc] + `Doc` objects. + + Returns + ------- + Dict[str, Any] + The evaluation results. + """ + ents_pred, ents_gold = get_annotation(pred_docs), get_annotation(gold_docs) + ents_pred.sort(key=lambda l: l[0]) + ents_gold.sort(key=lambda l: l[0]) + + scores = compute_scores( + ents_gold, + ents_pred, + boostrap_level=boostrap_level, + exact=exact, + n_draw=n_draw, + alpha=alpha, + digits=digits, + ) + + return scores + + +def evaluate( + self, + examples: Iterable[Example], + *, + batch_size: Optional[int] = None, + **kwargs: Any, +) -> Dict[str, Any]: + """ + Evaluate a model's pipeline components. + + Parameters + ---------- + examples : Iterable[Example] + `Example` objects. + batch_size : Optional[int] + Batch size to use. + + Returns + ------- + Dict[str, Any] + The evaluation results. + """ + examples = list(examples) + validate_examples(examples, "Language.evaluate") + examples = _copy_examples(examples) + if batch_size is None: + batch_size = self.batch_size + + scores = {} + + total_time = 0 + + begin_time = timer() + # this is purely for timing + for eg in examples: + self.make_doc(eg.reference.text) + total_time += timer() - begin_time + + n_words = sum(len(eg.predicted) for eg in examples) + + predictions = [eg.predicted for eg in examples] + + for name, component in self.pipeline: + begin_time = timer() + docs = [doc.copy() for doc in predictions] + docs = list(component.pipe(docs, batch_size=batch_size)) + total_time += timer() - begin_time + + if name == "tok2vec": + predictions = docs + if hasattr(component, "score"): + scores.update( + component.score( + [Example(doc, eg.reference) for doc, eg in zip(docs, examples)] + ) + ) + + scores["speed"] = n_words / total_time + return scores diff --git a/edsnlp/extensions.py b/edsnlp/edsnlp/extensions.py similarity index 100% rename from edsnlp/extensions.py rename to edsnlp/edsnlp/extensions.py diff --git a/edsnlp/language.py b/edsnlp/edsnlp/language.py similarity index 100% rename from edsnlp/language.py rename to edsnlp/edsnlp/language.py diff --git a/edsnlp/models/__init__.py b/edsnlp/edsnlp/matchers/__init__.py similarity index 100% rename from edsnlp/models/__init__.py rename to edsnlp/edsnlp/matchers/__init__.py diff --git a/edsnlp/matchers/phrase.pxd b/edsnlp/edsnlp/matchers/phrase.pxd similarity index 100% rename from edsnlp/matchers/phrase.pxd rename to edsnlp/edsnlp/matchers/phrase.pxd diff --git a/edsnlp/matchers/phrase.pyx b/edsnlp/edsnlp/matchers/phrase.pyx similarity index 100% rename from edsnlp/matchers/phrase.pyx rename to edsnlp/edsnlp/matchers/phrase.pyx diff --git a/edsnlp/matchers/regex.py b/edsnlp/edsnlp/matchers/regex.py similarity index 100% rename from edsnlp/matchers/regex.py rename to edsnlp/edsnlp/matchers/regex.py diff --git a/edsnlp/matchers/simstring.py b/edsnlp/edsnlp/matchers/simstring.py similarity index 100% rename from edsnlp/matchers/simstring.py rename to edsnlp/edsnlp/matchers/simstring.py diff --git a/edsnlp/matchers/utils/__init__.py b/edsnlp/edsnlp/matchers/utils/__init__.py similarity index 100% rename from edsnlp/matchers/utils/__init__.py rename to edsnlp/edsnlp/matchers/utils/__init__.py diff --git a/edsnlp/matchers/utils/offset.py b/edsnlp/edsnlp/matchers/utils/offset.py similarity index 100% rename from edsnlp/matchers/utils/offset.py rename to edsnlp/edsnlp/matchers/utils/offset.py diff --git a/edsnlp/matchers/utils/text.py b/edsnlp/edsnlp/matchers/utils/text.py similarity index 100% rename from edsnlp/matchers/utils/text.py rename to edsnlp/edsnlp/matchers/utils/text.py diff --git a/edsnlp/patch_spacy_dot_components.py b/edsnlp/edsnlp/patch_spacy_dot_components.py similarity index 100% rename from edsnlp/patch_spacy_dot_components.py rename to edsnlp/edsnlp/patch_spacy_dot_components.py diff --git a/edsnlp/models/torch/__init__.py b/edsnlp/edsnlp/pipelines/__init__.py similarity index 100% rename from edsnlp/models/torch/__init__.py rename to edsnlp/edsnlp/pipelines/__init__.py diff --git a/edsnlp/edsnlp/pipelines/base.py b/edsnlp/edsnlp/pipelines/base.py new file mode 100644 index 000000000..685406307 --- /dev/null +++ b/edsnlp/edsnlp/pipelines/base.py @@ -0,0 +1,318 @@ +from collections import defaultdict +from operator import attrgetter +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Iterable, + List, + Optional, + Sequence, + Tuple, + Union, +) + +from spacy import Language +from spacy.tokens import Doc, Span + +from edsnlp.utils.filter import filter_spans + + +class BaseComponent: + """ + The `BaseComponent` adds a `set_extensions` method, + called at the creation of the object. + + It helps decouple the initialisation of the pipeline from + the creation of extensions, and is particularly usefull when + distributing EDSNLP on a cluster, since the serialisation mechanism + imposes that the extensions be reset. + """ + + def __init__(self, nlp: Language = None, name: str = None, *args, **kwargs): + super().__init__(*args, **kwargs) + self.nlp = nlp + self.name = name + self.set_extensions() + + def set_extensions(self): + """ + Set `Doc`, `Span` and `Token` extensions. + """ + Span.set_extension( + "value", + getter=lambda span: span._.get(span.label_) + if span._.has(span.label_) + else None, + force=True, + ) + + def get_spans(self, doc: Doc): + """ + Returns sorted spans of interest according to the + possible value of `on_ents_only`. + Includes `doc.ents` by default, and adds eventual SpanGroups. + """ + ents = list(doc.ents) + list(doc.spans.get("discarded", [])) + + on_ents_only = getattr(self, "on_ents_only", None) + + if isinstance(on_ents_only, str): + on_ents_only = [on_ents_only] + if isinstance(on_ents_only, (set, list)): + for spankey in set(on_ents_only) & set(doc.spans.keys()): + ents.extend(doc.spans.get(spankey, [])) + + return sorted(list(set(ents)), key=(attrgetter("start", "end"))) + + def _boundaries( + self, doc: Doc, terminations: Optional[List[Span]] = None + ) -> List[Tuple[int, int]]: + """ + Create sub sentences based sentences and terminations found in text. + + Parameters + ---------- + doc: + spaCy Doc object + terminations: + List of tuples with (match_id, start, end) + + Returns + ------- + boundaries: + List of tuples with (start, end) of spans + """ + + if terminations is None: + terminations = [] + + sent_starts = [sent.start for sent in doc.sents] + termination_starts = [t.start for t in terminations] + + starts = sent_starts + termination_starts + [len(doc)] + + # Remove duplicates + starts = list(set(starts)) + + # Sort starts + starts.sort() + + boundaries = [(start, end) for start, end in zip(starts[:-1], starts[1:])] + + return boundaries + + +SeqStr = Union[str, Sequence[str]] +SpanFilter = Union[bool, SeqStr] + +SpanSetterMapping = Dict[str, SpanFilter] +SpanGetterMapping = Dict[str, SpanFilter] + +SpanGetter = Union[ + SpanGetterMapping, + Callable[[Doc], Iterable[Span]], +] +SpanSetter = Union[ + SpanSetterMapping, + Callable[[Doc, Iterable[Span]], Any], +] + + +def get_spans(doc, span_getter): + if callable(span_getter): + yield from span_getter(doc) + return + for key, span_filter in span_getter.items(): + candidates = doc.spans.get(key, ()) if key != "ents" else doc.ents + if span_filter is True: + yield from candidates + else: + for span in candidates: + if span.label_ in span_filter: + yield span + + +def validate_span_setter(value: Union[SeqStr, Dict[str, SpanFilter]]) -> SpanSetter: + if callable(value): + return value + if isinstance(value, str): + return {value: True} + if isinstance(value, list): + return {group: True for group in value} + elif isinstance(value, dict): + new_value = {} + for k, v in value.items(): + if isinstance(v, bool): + new_value[k] = v + elif isinstance(v, str): + new_value[k] = [v] + elif isinstance(v, list) and all(isinstance(i, str) for i in v): + new_value[k] = v + else: + raise TypeError( + f"Invalid entry {value} ({type(value)}) for SpanSetterArg, " + f"expected bool/string(s), dict of bool/string(s) or callable" + ) + return new_value + else: + raise TypeError( + f"Invalid entry {value} ({type(value)}) for SpanSetterArg, " + f"expected bool/string(s), dict of bool/string(s) or callable" + ) + + +def validate_span_getter( + value: Union[SeqStr, Dict[str, SpanFilter]], optional: bool = False +) -> SpanSetter: + if value is None: + if optional: + return None + raise ValueError( + "Mising entry for SpanGetterArg, " + "expected bool/string(s), dict of bool/string(s) or callable" + ) + if callable(value): + return value + if isinstance(value, str): + return {value: True} + if isinstance(value, list): + return {group: True for group in value} + elif isinstance(value, dict): + new_value = {} + for k, v in value.items(): + if isinstance(v, bool): + new_value[k] = v + elif isinstance(v, str): + new_value[k] = [v] + elif isinstance(v, list) and all(isinstance(i, str) for i in v): + new_value[k] = v + else: + raise TypeError( + f"Invalid entry {value} ({type(value)}) for SpanGetterArg, " + f"expected bool/string(s), dict of bool/string(s) or callable" + ) + return new_value + else: + raise TypeError( + f"Invalid entry {value} ({type(value)}) for SpanGetterArg, " + f"expected bool/string(s), dict of bool/string(s) or callable" + ) + + +class SpanSetterArg: + """ + Valid values for the `span_setter` argument of a component can be : + + - a (doc, matches) -> None callable + - a span group name + - a list of span group names + - a dict of group name to True or list of labels + + The group name `"ents"` is a special case, and will add the matches to `doc.ents` + + Examples + -------- + - `span_setter=["ents", "ckd"]` will add the matches to both `doc.ents` and + `doc.spans["ckd"]`. It is equivalent to `{"ents": True, "ckd": True}`. + - `span_setter={"ents": ["foo", "bar"]}` will add the matches with label + "foo" and "bar" to `doc.ents`. + - `span_setter="ents"` will add all matches only to `doc.ents`. + - `span_setter="ckd"` will add all matches only to `doc.spans["ckd"]`. + """ + + @classmethod + def __get_validators__(cls): + yield cls.validate + + @classmethod + def validate(cls, value: Union[SeqStr, Dict[str, SpanFilter]]) -> SpanSetter: + return validate_span_setter(value) + + +class SpanGetterArg: + """ + Valid values for the `span_getter` argument of a component can be : + + - a (doc) -> spans callable + - a span group name + - a list of span group names + - a dict of group name to True or list of labels + + The group name `"ents"` is a special case, and will get the matches from `doc.ents` + + Examples + -------- + - `span_getter=["ents", "ckd"]` will get the matches from both `doc.ents` and + `doc.spans["ckd"]`. It is equivalent to `{"ents": True, "ckd": True}`. + - `span_getter={"ents": ["foo", "bar"]}` will get the matches with label + "foo" and "bar" from `doc.ents`. + - `span_getter="ents"` will get all matches from `doc.ents`. + - `span_getter="ckd"` will get all matches from `doc.spans["ckd"]`. + """ + + @classmethod + def __get_validators__(cls): + yield cls.validate + + @classmethod + def validate(cls, value: Union[SeqStr, Dict[str, SpanFilter]]) -> SpanSetter: + return validate_span_setter(value) + + +class BaseNERComponent(BaseComponent): + def __init__( + self, + nlp: Language = None, + name: str = None, + *args, + span_setter: SpanSetterArg, + **kwargs, + ): + super().__init__(nlp, name, *args, **kwargs) + self.span_setter: SpanSetter = validate_span_setter(span_setter) # type: ignore + + def set_spans(self, doc, matches): + if callable(self.span_setter): + self.span_setter(doc, matches) + else: + + match_all = [] + label_to_group = defaultdict(list) + for name, spans_filter in self.span_setter.items(): + if name != "ents": + doc.spans.setdefault(name, []) + if spans_filter: + if spans_filter is True: + match_all.append(name) + else: + for label in spans_filter: + label_to_group[label].append(name) + + new_ents = [] if "ents" in self.span_setter else None + + for span in matches: + for group in match_all + label_to_group[span.label_]: + if group == "ents": + new_ents.append(span) + else: + doc.spans[group].append(span) + if new_ents is not None: + doc.ents = filter_spans((*new_ents, *doc.ents)) + return doc + + +if TYPE_CHECKING: + SpanGetterArg = Union[ # noqa: F811 + str, + Sequence[str], + SpanGetterMapping, + Callable[[Doc], Iterable[Span]], + ] + SpanSetterArg = Union[ # noqa: F811 + str, + Sequence[str], + SpanSetterMapping, + Callable[[Doc, Iterable[Span]], Any], + ] diff --git a/edsnlp/edsnlp/pipelines/clean_entities.py b/edsnlp/edsnlp/pipelines/clean_entities.py new file mode 100644 index 000000000..dba742694 --- /dev/null +++ b/edsnlp/edsnlp/pipelines/clean_entities.py @@ -0,0 +1,47 @@ +import re +import string +from typing import Callable, Optional + +from spacy.language import Language +from spacy.tokens import Doc + +DEFAULT_CONFIG = dict( + scorer={"@scorers": "eds.nested_ner_scorer.v1"}, +) + + +@Language.factory("clean-entities", default_config=DEFAULT_CONFIG) +class CleanEntities: + def __init__( + self, + nlp: Language, + name: str, + scorer: Optional[Callable], + ): + """ + Removes empty entities from the document and clean entity boundaries + """ + self.scorer = scorer + + def score(self, examples, **kwargs): + return self.scorer(examples, **kwargs) + + def __call__(self, doc: Doc) -> Doc: + new_ents = [] + for ent in doc.ents: + if len(ent.text.strip(string.punctuation)) == 0: + continue + m = re.match(r"^\s*(.*?)\s*$", ent.text, flags=re.DOTALL) + new_begin = m.start(1) + new_end = m.end(1) + new_ent = doc.char_span( + ent[0].idx + new_begin, + ent[0].idx + new_end, + label=ent.label_, + alignment_mode="expand", + ) + if new_ent is not None: + new_ents.append(new_ent) + + doc.ents = new_ents + return doc diff --git a/edsnlp/pipelines/__init__.py b/edsnlp/edsnlp/pipelines/core/__init__.py similarity index 100% rename from edsnlp/pipelines/__init__.py rename to edsnlp/edsnlp/pipelines/core/__init__.py diff --git a/edsnlp/pipelines/core/context/__init__.py b/edsnlp/edsnlp/pipelines/core/context/__init__.py similarity index 100% rename from edsnlp/pipelines/core/context/__init__.py rename to edsnlp/edsnlp/pipelines/core/context/__init__.py diff --git a/edsnlp/pipelines/core/context/context.py b/edsnlp/edsnlp/pipelines/core/context/context.py similarity index 100% rename from edsnlp/pipelines/core/context/context.py rename to edsnlp/edsnlp/pipelines/core/context/context.py diff --git a/edsnlp/pipelines/core/context/factory.py b/edsnlp/edsnlp/pipelines/core/context/factory.py similarity index 100% rename from edsnlp/pipelines/core/context/factory.py rename to edsnlp/edsnlp/pipelines/core/context/factory.py diff --git a/edsnlp/pipelines/core/contextual_matcher/__init__.py b/edsnlp/edsnlp/pipelines/core/contextual_matcher/__init__.py similarity index 100% rename from edsnlp/pipelines/core/contextual_matcher/__init__.py rename to edsnlp/edsnlp/pipelines/core/contextual_matcher/__init__.py diff --git a/edsnlp/pipelines/core/contextual_matcher/contextual_matcher.py b/edsnlp/edsnlp/pipelines/core/contextual_matcher/contextual_matcher.py similarity index 100% rename from edsnlp/pipelines/core/contextual_matcher/contextual_matcher.py rename to edsnlp/edsnlp/pipelines/core/contextual_matcher/contextual_matcher.py diff --git a/edsnlp/pipelines/core/contextual_matcher/factory.py b/edsnlp/edsnlp/pipelines/core/contextual_matcher/factory.py similarity index 100% rename from edsnlp/pipelines/core/contextual_matcher/factory.py rename to edsnlp/edsnlp/pipelines/core/contextual_matcher/factory.py diff --git a/edsnlp/pipelines/core/contextual_matcher/models.py b/edsnlp/edsnlp/pipelines/core/contextual_matcher/models.py similarity index 100% rename from edsnlp/pipelines/core/contextual_matcher/models.py rename to edsnlp/edsnlp/pipelines/core/contextual_matcher/models.py diff --git a/edsnlp/pipelines/core/endlines/__init__.py b/edsnlp/edsnlp/pipelines/core/endlines/__init__.py similarity index 100% rename from edsnlp/pipelines/core/endlines/__init__.py rename to edsnlp/edsnlp/pipelines/core/endlines/__init__.py diff --git a/edsnlp/pipelines/core/endlines/endlines.py b/edsnlp/edsnlp/pipelines/core/endlines/endlines.py similarity index 100% rename from edsnlp/pipelines/core/endlines/endlines.py rename to edsnlp/edsnlp/pipelines/core/endlines/endlines.py diff --git a/edsnlp/pipelines/core/endlines/endlinesmodel.py b/edsnlp/edsnlp/pipelines/core/endlines/endlinesmodel.py similarity index 100% rename from edsnlp/pipelines/core/endlines/endlinesmodel.py rename to edsnlp/edsnlp/pipelines/core/endlines/endlinesmodel.py diff --git a/edsnlp/pipelines/core/endlines/factory.py b/edsnlp/edsnlp/pipelines/core/endlines/factory.py similarity index 100% rename from edsnlp/pipelines/core/endlines/factory.py rename to edsnlp/edsnlp/pipelines/core/endlines/factory.py diff --git a/edsnlp/pipelines/core/endlines/functional.py b/edsnlp/edsnlp/pipelines/core/endlines/functional.py similarity index 100% rename from edsnlp/pipelines/core/endlines/functional.py rename to edsnlp/edsnlp/pipelines/core/endlines/functional.py diff --git a/edsnlp/pipelines/core/matcher/__init__.py b/edsnlp/edsnlp/pipelines/core/matcher/__init__.py similarity index 100% rename from edsnlp/pipelines/core/matcher/__init__.py rename to edsnlp/edsnlp/pipelines/core/matcher/__init__.py diff --git a/edsnlp/pipelines/core/matcher/factory.py b/edsnlp/edsnlp/pipelines/core/matcher/factory.py similarity index 100% rename from edsnlp/pipelines/core/matcher/factory.py rename to edsnlp/edsnlp/pipelines/core/matcher/factory.py diff --git a/edsnlp/pipelines/core/matcher/matcher.py b/edsnlp/edsnlp/pipelines/core/matcher/matcher.py similarity index 100% rename from edsnlp/pipelines/core/matcher/matcher.py rename to edsnlp/edsnlp/pipelines/core/matcher/matcher.py diff --git a/edsnlp/pipelines/core/normalizer/__init__.py b/edsnlp/edsnlp/pipelines/core/normalizer/__init__.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/__init__.py rename to edsnlp/edsnlp/pipelines/core/normalizer/__init__.py diff --git a/edsnlp/pipelines/core/normalizer/accents/__init__.py b/edsnlp/edsnlp/pipelines/core/normalizer/accents/__init__.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/accents/__init__.py rename to edsnlp/edsnlp/pipelines/core/normalizer/accents/__init__.py diff --git a/edsnlp/pipelines/core/normalizer/accents/accents.py b/edsnlp/edsnlp/pipelines/core/normalizer/accents/accents.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/accents/accents.py rename to edsnlp/edsnlp/pipelines/core/normalizer/accents/accents.py diff --git a/edsnlp/pipelines/core/normalizer/accents/factory.py b/edsnlp/edsnlp/pipelines/core/normalizer/accents/factory.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/accents/factory.py rename to edsnlp/edsnlp/pipelines/core/normalizer/accents/factory.py diff --git a/edsnlp/pipelines/core/normalizer/accents/patterns.py b/edsnlp/edsnlp/pipelines/core/normalizer/accents/patterns.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/accents/patterns.py rename to edsnlp/edsnlp/pipelines/core/normalizer/accents/patterns.py diff --git a/edsnlp/pipelines/core/normalizer/factory.py b/edsnlp/edsnlp/pipelines/core/normalizer/factory.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/factory.py rename to edsnlp/edsnlp/pipelines/core/normalizer/factory.py diff --git a/edsnlp/pipelines/core/normalizer/lowercase/__init__.py b/edsnlp/edsnlp/pipelines/core/normalizer/lowercase/__init__.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/lowercase/__init__.py rename to edsnlp/edsnlp/pipelines/core/normalizer/lowercase/__init__.py diff --git a/edsnlp/pipelines/core/normalizer/lowercase/factory.py b/edsnlp/edsnlp/pipelines/core/normalizer/lowercase/factory.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/lowercase/factory.py rename to edsnlp/edsnlp/pipelines/core/normalizer/lowercase/factory.py diff --git a/edsnlp/pipelines/core/normalizer/normalizer.py b/edsnlp/edsnlp/pipelines/core/normalizer/normalizer.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/normalizer.py rename to edsnlp/edsnlp/pipelines/core/normalizer/normalizer.py diff --git a/edsnlp/pipelines/core/normalizer/pollution/__init__.py b/edsnlp/edsnlp/pipelines/core/normalizer/pollution/__init__.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/pollution/__init__.py rename to edsnlp/edsnlp/pipelines/core/normalizer/pollution/__init__.py diff --git a/edsnlp/pipelines/core/normalizer/pollution/factory.py b/edsnlp/edsnlp/pipelines/core/normalizer/pollution/factory.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/pollution/factory.py rename to edsnlp/edsnlp/pipelines/core/normalizer/pollution/factory.py diff --git a/edsnlp/pipelines/core/normalizer/pollution/patterns.py b/edsnlp/edsnlp/pipelines/core/normalizer/pollution/patterns.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/pollution/patterns.py rename to edsnlp/edsnlp/pipelines/core/normalizer/pollution/patterns.py diff --git a/edsnlp/pipelines/core/normalizer/pollution/pollution.py b/edsnlp/edsnlp/pipelines/core/normalizer/pollution/pollution.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/pollution/pollution.py rename to edsnlp/edsnlp/pipelines/core/normalizer/pollution/pollution.py diff --git a/edsnlp/pipelines/core/normalizer/quotes/__init__.py b/edsnlp/edsnlp/pipelines/core/normalizer/quotes/__init__.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/quotes/__init__.py rename to edsnlp/edsnlp/pipelines/core/normalizer/quotes/__init__.py diff --git a/edsnlp/pipelines/core/normalizer/quotes/factory.py b/edsnlp/edsnlp/pipelines/core/normalizer/quotes/factory.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/quotes/factory.py rename to edsnlp/edsnlp/pipelines/core/normalizer/quotes/factory.py diff --git a/edsnlp/pipelines/core/normalizer/quotes/patterns.py b/edsnlp/edsnlp/pipelines/core/normalizer/quotes/patterns.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/quotes/patterns.py rename to edsnlp/edsnlp/pipelines/core/normalizer/quotes/patterns.py diff --git a/edsnlp/pipelines/core/normalizer/quotes/quotes.py b/edsnlp/edsnlp/pipelines/core/normalizer/quotes/quotes.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/quotes/quotes.py rename to edsnlp/edsnlp/pipelines/core/normalizer/quotes/quotes.py diff --git a/edsnlp/pipelines/core/normalizer/spaces/__init__.py b/edsnlp/edsnlp/pipelines/core/normalizer/spaces/__init__.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/spaces/__init__.py rename to edsnlp/edsnlp/pipelines/core/normalizer/spaces/__init__.py diff --git a/edsnlp/pipelines/core/normalizer/spaces/factory.py b/edsnlp/edsnlp/pipelines/core/normalizer/spaces/factory.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/spaces/factory.py rename to edsnlp/edsnlp/pipelines/core/normalizer/spaces/factory.py diff --git a/edsnlp/pipelines/core/normalizer/spaces/spaces.py b/edsnlp/edsnlp/pipelines/core/normalizer/spaces/spaces.py similarity index 100% rename from edsnlp/pipelines/core/normalizer/spaces/spaces.py rename to edsnlp/edsnlp/pipelines/core/normalizer/spaces/spaces.py diff --git a/edsnlp/pipelines/core/sentences/__init__.py b/edsnlp/edsnlp/pipelines/core/sentences/__init__.py similarity index 100% rename from edsnlp/pipelines/core/sentences/__init__.py rename to edsnlp/edsnlp/pipelines/core/sentences/__init__.py diff --git a/edsnlp/pipelines/core/sentences/factory.py b/edsnlp/edsnlp/pipelines/core/sentences/factory.py similarity index 100% rename from edsnlp/pipelines/core/sentences/factory.py rename to edsnlp/edsnlp/pipelines/core/sentences/factory.py diff --git a/edsnlp/pipelines/core/sentences/sentences.pxd b/edsnlp/edsnlp/pipelines/core/sentences/sentences.pxd similarity index 100% rename from edsnlp/pipelines/core/sentences/sentences.pxd rename to edsnlp/edsnlp/pipelines/core/sentences/sentences.pxd diff --git a/edsnlp/pipelines/core/sentences/sentences.pyx b/edsnlp/edsnlp/pipelines/core/sentences/sentences.pyx similarity index 100% rename from edsnlp/pipelines/core/sentences/sentences.pyx rename to edsnlp/edsnlp/pipelines/core/sentences/sentences.pyx diff --git a/edsnlp/pipelines/core/sentences/terms.py b/edsnlp/edsnlp/pipelines/core/sentences/terms.py similarity index 100% rename from edsnlp/pipelines/core/sentences/terms.py rename to edsnlp/edsnlp/pipelines/core/sentences/terms.py diff --git a/edsnlp/pipelines/core/terminology/__init__.py b/edsnlp/edsnlp/pipelines/core/terminology/__init__.py similarity index 100% rename from edsnlp/pipelines/core/terminology/__init__.py rename to edsnlp/edsnlp/pipelines/core/terminology/__init__.py diff --git a/edsnlp/pipelines/core/terminology/factory.py b/edsnlp/edsnlp/pipelines/core/terminology/factory.py similarity index 100% rename from edsnlp/pipelines/core/terminology/factory.py rename to edsnlp/edsnlp/pipelines/core/terminology/factory.py diff --git a/edsnlp/pipelines/core/terminology/terminology.py b/edsnlp/edsnlp/pipelines/core/terminology/terminology.py similarity index 100% rename from edsnlp/pipelines/core/terminology/terminology.py rename to edsnlp/edsnlp/pipelines/core/terminology/terminology.py diff --git a/edsnlp/pipelines/factories.py b/edsnlp/edsnlp/pipelines/factories.py similarity index 91% rename from edsnlp/pipelines/factories.py rename to edsnlp/edsnlp/pipelines/factories.py index c3c226112..76a52ddf7 100644 --- a/edsnlp/pipelines/factories.py +++ b/edsnlp/edsnlp/pipelines/factories.py @@ -15,6 +15,7 @@ from .misc.measurements.factory import create_component as measurements from .misc.reason.factory import create_component as reason from .misc.sections.factory import create_component as sections +from .misc.tables.factory import create_component as tables from .ner.adicap.factory import create_component as adicap from .ner.cim10.factory import create_component as cim10 from .ner.covid.factory import create_component as covid @@ -32,4 +33,5 @@ from .qualifiers.hypothesis.factory import create_component as hypothesis from .qualifiers.negation.factory import create_component as negation from .qualifiers.reported_speech.factory import create_component as rspeech -from .trainable.nested_ner import create_component as nested_ner +from .trainable.nested_ner.factory import create_component as nested_ner +from .trainable.span_qualifier.factory import create_component as span_qualifier diff --git a/edsnlp/pipelines/core/__init__.py b/edsnlp/edsnlp/pipelines/misc/__init__.py similarity index 100% rename from edsnlp/pipelines/core/__init__.py rename to edsnlp/edsnlp/pipelines/misc/__init__.py diff --git a/edsnlp/pipelines/misc/consultation_dates/__init__.py b/edsnlp/edsnlp/pipelines/misc/consultation_dates/__init__.py similarity index 100% rename from edsnlp/pipelines/misc/consultation_dates/__init__.py rename to edsnlp/edsnlp/pipelines/misc/consultation_dates/__init__.py diff --git a/edsnlp/pipelines/misc/consultation_dates/consultation_dates.py b/edsnlp/edsnlp/pipelines/misc/consultation_dates/consultation_dates.py similarity index 100% rename from edsnlp/pipelines/misc/consultation_dates/consultation_dates.py rename to edsnlp/edsnlp/pipelines/misc/consultation_dates/consultation_dates.py diff --git a/edsnlp/pipelines/misc/consultation_dates/factory.py b/edsnlp/edsnlp/pipelines/misc/consultation_dates/factory.py similarity index 100% rename from edsnlp/pipelines/misc/consultation_dates/factory.py rename to edsnlp/edsnlp/pipelines/misc/consultation_dates/factory.py diff --git a/edsnlp/pipelines/misc/consultation_dates/patterns.py b/edsnlp/edsnlp/pipelines/misc/consultation_dates/patterns.py similarity index 100% rename from edsnlp/pipelines/misc/consultation_dates/patterns.py rename to edsnlp/edsnlp/pipelines/misc/consultation_dates/patterns.py diff --git a/edsnlp/pipelines/misc/dates/__init__.py b/edsnlp/edsnlp/pipelines/misc/dates/__init__.py similarity index 100% rename from edsnlp/pipelines/misc/dates/__init__.py rename to edsnlp/edsnlp/pipelines/misc/dates/__init__.py diff --git a/edsnlp/pipelines/misc/dates/dates.py b/edsnlp/edsnlp/pipelines/misc/dates/dates.py similarity index 100% rename from edsnlp/pipelines/misc/dates/dates.py rename to edsnlp/edsnlp/pipelines/misc/dates/dates.py diff --git a/edsnlp/pipelines/misc/dates/factory.py b/edsnlp/edsnlp/pipelines/misc/dates/factory.py similarity index 100% rename from edsnlp/pipelines/misc/dates/factory.py rename to edsnlp/edsnlp/pipelines/misc/dates/factory.py diff --git a/edsnlp/pipelines/misc/dates/models.py b/edsnlp/edsnlp/pipelines/misc/dates/models.py similarity index 100% rename from edsnlp/pipelines/misc/dates/models.py rename to edsnlp/edsnlp/pipelines/misc/dates/models.py diff --git a/edsnlp/pipelines/misc/dates/patterns/__init__.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/__init__.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/__init__.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/__init__.py diff --git a/edsnlp/pipelines/misc/dates/patterns/absolute.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/absolute.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/absolute.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/absolute.py diff --git a/edsnlp/pipelines/misc/__init__.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/__init__.py similarity index 100% rename from edsnlp/pipelines/misc/__init__.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/__init__.py diff --git a/edsnlp/pipelines/misc/dates/patterns/atomic/days.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/days.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/atomic/days.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/days.py diff --git a/edsnlp/pipelines/misc/dates/patterns/atomic/delimiters.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/delimiters.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/atomic/delimiters.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/delimiters.py diff --git a/edsnlp/pipelines/misc/dates/patterns/atomic/directions.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/directions.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/atomic/directions.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/directions.py diff --git a/edsnlp/pipelines/misc/dates/patterns/atomic/modes.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/modes.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/atomic/modes.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/modes.py diff --git a/edsnlp/pipelines/misc/dates/patterns/atomic/months.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/months.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/atomic/months.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/months.py diff --git a/edsnlp/pipelines/misc/dates/patterns/atomic/numbers.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/numbers.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/atomic/numbers.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/numbers.py diff --git a/edsnlp/pipelines/misc/dates/patterns/atomic/time.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/time.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/atomic/time.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/time.py diff --git a/edsnlp/pipelines/misc/dates/patterns/atomic/units.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/units.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/atomic/units.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/units.py diff --git a/edsnlp/pipelines/misc/dates/patterns/atomic/years.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/years.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/atomic/years.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/atomic/years.py diff --git a/edsnlp/pipelines/misc/dates/patterns/current.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/current.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/current.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/current.py diff --git a/edsnlp/pipelines/misc/dates/patterns/duration.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/duration.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/duration.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/duration.py diff --git a/edsnlp/pipelines/misc/dates/patterns/false_positive.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/false_positive.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/false_positive.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/false_positive.py diff --git a/edsnlp/pipelines/misc/dates/patterns/relative.py b/edsnlp/edsnlp/pipelines/misc/dates/patterns/relative.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/relative.py rename to edsnlp/edsnlp/pipelines/misc/dates/patterns/relative.py diff --git a/edsnlp/pipelines/misc/measurements/__init__.py b/edsnlp/edsnlp/pipelines/misc/measurements/__init__.py similarity index 100% rename from edsnlp/pipelines/misc/measurements/__init__.py rename to edsnlp/edsnlp/pipelines/misc/measurements/__init__.py diff --git a/edsnlp/edsnlp/pipelines/misc/measurements/factory.py b/edsnlp/edsnlp/pipelines/misc/measurements/factory.py new file mode 100644 index 000000000..d21ad9a06 --- /dev/null +++ b/edsnlp/edsnlp/pipelines/misc/measurements/factory.py @@ -0,0 +1,84 @@ +from typing import Dict, List, Optional, Tuple, Union + +from spacy.language import Language +from typing_extensions import Literal + +import edsnlp.pipelines.misc.measurements.patterns as patterns +from edsnlp.pipelines.base import SpanGetterArg, SpanSetterArg +from edsnlp.pipelines.misc.measurements.measurements import ( + MeasureConfig, + MeasurementsMatcher, + UnitConfig, +) +from edsnlp.utils.deprecation import deprecated_factory + +DEFAULT_CONFIG = dict( + attr="NORM", + ignore_excluded=True, + units_config=patterns.units_config, + number_terms=patterns.number_terms, + value_range_terms=patterns.value_range_terms, + unit_divisors=patterns.unit_divisors, + measurements=None, + stopwords_unitless=patterns.stopwords_unitless, + stopwords_measure_unit=patterns.stopwords_measure_unit, + measure_before_unit=False, + parse_doc=True, + parse_tables=True, + all_measurements=True, + extract_ranges=False, + range_patterns=patterns.range_patterns, + span_setter=None, + span_getter=None, + merge_mode="intersect", + as_ents=False, +) + + +@Language.factory("eds.measurements", default_config=DEFAULT_CONFIG) +@deprecated_factory("eds.measures", "eds.measurements", default_config=DEFAULT_CONFIG) +def create_component( + nlp: Language, + name: str, + measurements: Optional[Union[Dict[str, MeasureConfig], List[str]]], + units_config: Dict[str, UnitConfig], + number_terms: Dict[str, List[str]], + value_range_terms: Dict[str, List[str]], + all_measurements: bool, + parse_tables: bool, + parse_doc: bool, + stopwords_unitless: List[str], + stopwords_measure_unit: List[str], + measure_before_unit: bool, + unit_divisors: List[str], + ignore_excluded: bool, + attr: str, + span_setter: Optional[SpanSetterArg], + span_getter: Optional[SpanGetterArg], + merge_mode: Literal["intersect", "align", "union"], + extract_ranges: bool, + range_patterns: List[Tuple[Optional[str], Optional[str]]], + as_ents: bool, +): + return MeasurementsMatcher( + nlp, + units_config=units_config, + number_terms=number_terms, + value_range_terms=value_range_terms, + all_measurements=all_measurements, + parse_tables=parse_tables, + parse_doc=parse_doc, + unit_divisors=unit_divisors, + measurements=measurements, + stopwords_unitless=stopwords_unitless, + stopwords_measure_unit=stopwords_measure_unit, + measure_before_unit=measure_before_unit, + attr=attr, + ignore_excluded=ignore_excluded, + extract_ranges=extract_ranges, + range_patterns=range_patterns, + span_setter=span_setter, + span_getter=span_getter, + merge_mode=merge_mode, + as_ents=as_ents, + ) diff --git a/edsnlp/edsnlp/pipelines/misc/measurements/measurements.py b/edsnlp/edsnlp/pipelines/misc/measurements/measurements.py new file mode 100644 index 000000000..b095ea4c2 --- /dev/null +++ b/edsnlp/edsnlp/pipelines/misc/measurements/measurements.py @@ -0,0 +1,1595 @@ +import abc +import re +import unicodedata +from collections import defaultdict +from functools import lru_cache +from itertools import repeat +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union + +import regex +import spacy +from spacy.tokens import Doc, Span +from typing_extensions import Literal, NotRequired, TypedDict + +from edsnlp.matchers.phrase import EDSPhraseMatcher +from edsnlp.matchers.regex import RegexMatcher +from edsnlp.pipelines.base import ( + BaseNERComponent, + SpanGetterArg, + SpanSetterArg, + get_spans, + validate_span_getter, +) +from edsnlp.pipelines.misc.measurements import patterns +from edsnlp.pipelines.misc.measurements.patterns import common_measurements +from edsnlp.utils.filter import align_spans, filter_spans + +__all__ = ["MeasurementsMatcher"] + +AFTER_SNIPPET_LIMIT = 8 +BEFORE_SNIPPET_LIMIT = 10 + + +class UnitConfig(TypedDict): + scale: float + terms: List[str] + followed_by: Optional[str] = None + ui_decomposition: Dict[str, int] + + +class SimpleMeasurementConfigWithoutRegistry(TypedDict): + value_range: str + value: Union[float, int] + unit: str + + +class UnitlessRange(TypedDict): + min: NotRequired[int] + max: NotRequired[int] + unit: str + + +class UnitlessPatternConfig(TypedDict): + terms: List[str] + ranges: List[UnitlessRange] + + +class UnitlessPatternConfigWithName(TypedDict): + terms: List[str] + ranges: List[UnitlessRange] + name: str + + +class ValuelessPatternConfig(TypedDict): + terms: NotRequired[List[str]] + regex: NotRequired[List[str]] + measurement: SimpleMeasurementConfigWithoutRegistry + + +class MeasureConfig(TypedDict): + unit: str + unitless_patterns: NotRequired[List[UnitlessPatternConfig]] + valueless_patterns: NotRequired[List[ValuelessPatternConfig]] + + +class Measurement(abc.ABC): + @abc.abstractmethod + def __iter__(self) -> Iterable["SimpleMeasurement"]: + """ + Iter over items of the measure (only one for SimpleMeasurement) + + Returns + ------- + iterable : Iterable["SimpleMeasurement"] + """ + + @abc.abstractmethod + def __getitem__(self, item) -> "SimpleMeasurement": + """ + Access items of the measure (only one for SimpleMeasurement) + + Parameters + ---------- + item : int + + Returns + ------- + measure : SimpleMeasurement + """ + + +class UnitRegistry: + def __init__(self, config: Dict[str, UnitConfig]): + def generate_inverse_terms(unit_terms): + for unit_term in unit_terms: + yield "/" + unit_term + yield unit_term + "⁻¹" + yield unit_term + "-1" + + self.config = {unicodedata.normalize("NFKC", k): v for k, v in config.items()} + for unit, unit_config in list(self.config.items()): + if not unit.startswith("per_") and "per_" + unit not in self.config: + self.config["per_" + unit] = { + "scale": 1 / unit_config["scale"], + "terms": list(generate_inverse_terms(unit_config["terms"])), + "followed_by": None, + "ui_decomposition": { + dim: -degree + for dim, degree in unit_config["ui_decomposition"].items() + }, + } + + @lru_cache(maxsize=-1) + def parse_unit(self, unit: str) -> Tuple[str, float]: + degrees = defaultdict(lambda: 0) + scale = 1 + for part in regex.split("(? 1,26 cm + The unit can also be positioned in place of the decimal dot/comma + > 1 cm 26 + Some measurements can be composite + > 1,26 cm x 2,34 mm + And sometimes they are factorized + > Les trois kystes mesurent 1, 2 et 3cm. + + The recognized measurements are stored in the "measurements" SpanGroup. + Each span has a `Measurement` object stored in the "value" extension attribute. + + Parameters + ---------- + nlp : Language + The SpaCy object. + measurements : Union[List[str], Tuple[str], Dict[str, MeasureConfig]] + A mapping from measure names to MeasureConfig + Each measure's configuration has the following shape: + { + "unit": str, # the unit of the measure (like "kg"), + "unitless_patterns": List[ + # optional patterns to handle unitless cases + { + "terms": List[str], # list of preceding terms used to trigger the + measure + # Mapping from ranges to unit to handle cases like + # ("Taille: 1.2" -> 1.20 m vs "Taille: 120" -> 120cm) + "ranges": List[{ + "min": int, + "max": int, + "unit": str, + }, { + "min": int, + "unit": str, + }, ...], + } + ], + "valueless_patterns": List[ + # optional patterns to handle unmatched measures by + # this pipe. The measures are hardcoded with this option. + # It can be useful to detect measures such as "positive" + # or "negative" and store it as booleans. + { + "regex": List[str], + "terms": List[str], + "measurement": { + "value_range": str, + "value": Union[int, float], + "unit": str + } + } + ], + number_terms: Dict[str, List[str]] + A mapping of numbers to their lexical variants + value_range_terms: Dict[str, List[str] + A mapping of range terms (=, <, >) to their lexical variants + all_measurements: bool + Whether to keep all measurements or only the one specified in measurements. + If True, matched measurements not mentionned in measurements variable + will be labeled "eds.measurement", while the ones mentionned in + measurements variable will be labeled with the specified name. + parse_tables: bool + Whether to parse the tables of the doc detected with "eds.tables" pipe. + parse_doc: bool + Whether to parse the doc without the tables. If parse_tables and parse_doc + are both False, anything is parsed. + stopwords_unitless: List[str] + A list of stopwords that do not matter when placed between a unitless + trigger and a number + stopwords_measure_unit: List[str] + A list of stopwords that do not matter when placed between a unit and + a number + These stopwords do not matter only in one of the following pattern: + unit - stopwords - measure or measure - stopwords - unit, + according to measure_before_unit parameter. + measure_before_unit: bool + Set It True if the measure is generally before the unit, False + in the other case. + This parameter will indicate if the stopwords in + stopwords_measure_unit should not matter in the unit-stopwords-measure + patterns only (False) or in the measure-stopwords- unit patterns + only (True) + unit_divisors: List[str] + A list of terms used to divide two units (like: m / s) + attr : str + Whether to match on the text ('TEXT') or on the normalized text ('NORM') + ignore_excluded : bool + Whether to exclude pollution patterns when matching in the text + span_setter: Optional[SpanSetterArg] + How to set the spans in the document. By default, each measurement will + be assigned to its own span group (using either the "name" field of the + config, or the key if you passed a dict), and to the "measurements" group. + span_getter : SpanGetterArg + Where to look for measurements in the doc. By default, look in the whole doc. + You can combine this with the `merge_mode` argument for interesting results. + merge_mode : Literal["intersect", "align"] + How to merge matches with the spans from `span_getter`, if given: + + - `intersect`: return only the matches that fall in the `span_getter` spans + - `align`: if a match overlaps a span from `span_getter` (e.g. a match + extracted by a machine learning model), return the `span_getter` span + instead, and assign all the parsed information (`._.date` / `._.duration`) + to it. Otherwise, don't return the date. + - `union`: extract measurements regardless of whether they fall within an existing span + extract_ranges: bool + Whether to extract ranges (like "entre 1 et 2 cm") + range_patterns: List[Tuple[str, str]] + A list of "{FROM} xx {TO} yy" patterns to match range measurements + """ + + if measurements is None: + measurements = common_measurements + elif isinstance(measurements, (list, tuple)): + measurements = {m: common_measurements[m] for m in measurements} + + self.nlp = nlp + self.name = name + self.unit_registry = UnitRegistry(units_config) + self.regex_matcher = RegexMatcher(attr=attr, ignore_excluded=True) + self.term_matcher = EDSPhraseMatcher(nlp.vocab, attr=attr, ignore_excluded=True) + self.unitless_patterns: Dict[str, UnitlessPatternConfigWithName] = {} + self.valueless_patterns: Dict[str, SimpleMeasurement] = {} + self.value_range_label_hashes: Set[int] = set() + self.unit_part_label_hashes: Set[int] = set() + self.unitless_label_hashes: Set[int] = set() + self.valueless_label_hashes: Set[int] = set() + self.unit_followers: Dict[str, str] = {} + self.measure_names: Dict[str, str] = {} + self.measure_before_unit = measure_before_unit + self.all_measurements = all_measurements + self.parse_tables = parse_tables + self.parse_doc = parse_doc + self.span_getter = ( + validate_span_getter(span_getter) if span_getter is not None else None + ) + self.merge_mode = merge_mode + self.extract_ranges = extract_ranges + self.range_patterns = range_patterns + + if span_setter is None: + span_setter = { + "ents": as_ents, + "measurements": True, + **{name: [name] for name in self.measure_names.values()}, + } + + super().__init__(nlp=nlp, name=name, span_setter=span_setter) + + # INTERVALS + self.regex_matcher.add( + "interval", + [r"-?\s*\d+(?:[.,]\d+)?\s*-\s*-?\s*\d+(?:[.,]\d+)?"], + ) + + # POWERS OF 10 + self.regex_matcher.add( + "pow10", + [ + ( + r"(?:(?:\s*x?\s*10\s*(?:\*{1,2}|\^)\s*)|" + r"(?:\s*\*\s*10\s*(?:\*{2}|\^)\s*))(-?\d+)" + ), + ], + ) + + # MEASUREMENT VALUE RANGES + for value_range, terms in value_range_terms.items(): + self.term_matcher.build_patterns(nlp, {value_range: terms}) + self.value_range_label_hashes.add(nlp.vocab.strings[value_range]) + + # NUMBER PATTERNS + self.regex_matcher.add( + "number", + [ + r"(? None: + """ + Set extensions for the measurements pipeline. + """ + + if not Span.has_extension("value"): + Span.set_extension("value", default=None) + + def extract_units(self, term_matches: Iterable[Span]) -> Iterable[Span]: + """ + Extracts unit spans from the document by extracting unit atoms (declared in the + units_config parameter) and aggregating them automatically + Ex: "il faut 2 g par jour" + => we extract [g]=unit(g), [par]=divisor(per), [jour]=unit(day) + => we aggregate these adjacent matches together to compose a new unit g_per_day + + + Parameters + ---------- + term_matches: Iterable[Span] + + Returns + ------- + Iterable[Span] + """ + last = None + units = [] + current = [] + unit_label_hashes = set() + for unit_part in filter_spans(term_matches): + if unit_part.label not in self.unit_part_label_hashes: + continue + if last is not None and unit_part.start != last.end and len(current): + doc = current[0].doc + # Last non "per" match: we don't want our units to be like `g_per` + end = next( + (i for i, e in list(enumerate(current))[::-1] if e.label_ != "per"), + None, + ) + if end is not None: + unit = "_".join(part.label_ for part in current[: end + 1]) + units.append(Span(doc, current[0].start, current[end].end, unit)) + unit_label_hashes.add(units[-1].label) + current = [] + last = None + current.append(unit_part) + last = unit_part + + end = next( + (i for i, e in list(enumerate(current))[::-1] if e.label_ != "per"), None + ) + if end is not None: + doc = current[0].doc + unit = "_".join(part.label_ for part in current[: end + 1]) + units.append(Span(doc, current[0].start, current[end].end, unit)) + unit_label_hashes.add(units[-1].label) + return units + + @classmethod + def make_pseudo_sentence( + cls, + doc: Doc, + matches: List[Tuple[Span, bool]], + pseudo_mapping: Dict[int, str], + ) -> Tuple[str, List[int]]: + """ + Creates a pseudo sentence (one letter per entity) + to extract higher order patterns + Ex: the sentence + "Il font {1}{,} {2} {et} {3} {cm} de long{.}" is transformed into "wn,n,nuw." + + Parameters + ---------- + doc: Doc + matches: List[(Span, bool)] + List of tuple of span and whether the span represents a sentence end + pseudo_mapping: Dict[int, str] + A mapping from label to char in the pseudo sentence + + Returns + ------- + (str, List[int]) + - the pseudo sentence + - a list of offsets to convert match indices into pseudo sent char indices + """ + pseudo = [] + last = 0 + offsets = [] + for ent, is_sent_split in matches: + if ent.start != last: + pseudo.append("w") + offsets.append(len(pseudo)) + if is_sent_split: + pseudo.append(".") + else: + pseudo.append(pseudo_mapping.get(ent.label, "w")) + last = ent.end + if len(doc) != last: + pseudo.append("w") + pseudo = "".join(pseudo) + + return pseudo, offsets + + @classmethod + def combine_measure_pow10( + cls, + measure: float, + pow10_text: str, + ) -> float: + """ + Return a float based on the measure (float) and the power of + 10 extracted with regex (string) + + Parameters + ---------- + measure: float + pow10_text: str + + Returns + ------- + float + """ + pow10 = int( + re.fullmatch( + ( + r"(?:(?:\s*x?\s*10\s*(?:\*{1,2}|\^)\s*)|" + r"(?:\s*\*\s*10\s*(?:\*{2}|\^)\s*))(-?\d+)" + ), + pow10_text, + ).group(1) + ) + return measure * 10**pow10 + + def get_matches(self, doc: Union[Doc, Span]): + """ + Extract and filter regex and phrase matches in the document + to prepare the measurement extraction. + Returns the matches and a list of hashes to quickly find unit matches + + Parameters + ---------- + doc: Union[Doc, Span] + + Returns + ------- + Tuple[List[(Span, bool)], Set[int]] + - List of tuples of spans and whether the spans represents a sentence end + - List of hash label to distinguish unit from other matches + """ + sent_ends = [doc[i : i + 1] for i in range(len(doc)) if doc[i].is_sent_end] + + regex_matches = list(self.regex_matcher(doc, as_spans=True)) + term_matches = list(self.term_matcher(doc, as_spans=True)) + + # Detect unit parts and compose them into units + units = self.extract_units(term_matches) + unit_label_hashes = {unit.label for unit in units} + + # Filter matches to prevent matches over dates or doc entities + non_unit_terms = [ + term + for term in term_matches + if term.label not in self.unit_part_label_hashes + ] + + # Filter out measurement-related spans that overlap already matched + # entities (in doc.ents or doc.spans["dates"] or doc.spans["tables"]) + # Tables are considered in a separate step + # Note: we also include sentence ends tokens as 1-token spans in those matches + if type(doc) == Doc: + spans__keep__is_sent_end = filter_spans( + [ + # Tuples (span, keep = is measurement related, is sentence end) + *zip(doc.spans.get("dates", ()), repeat(False), repeat(False)), + *zip(doc.spans.get("tables", ()), repeat(False), repeat(False)), + *zip(regex_matches, repeat(True), repeat(False)), + *zip(non_unit_terms, repeat(True), repeat(False)), + *zip(units, repeat(True), repeat(False)), + *zip(doc.ents, repeat(False), repeat(False)), + *zip(sent_ends, repeat(True), repeat(True)), + ], + # filter entities to keep only the ... + sort_key=measurements_match_tuples_sort_key, + ) + else: + spans__keep__is_sent_end = filter_spans( + [ + # Tuples (span, keep = is measurement related, is sentence end) + *zip(regex_matches, repeat(True), repeat(False)), + *zip(non_unit_terms, repeat(True), repeat(False)), + *zip(units, repeat(True), repeat(False)), + *zip(doc.ents, repeat(False), repeat(False)), + *zip(sent_ends, repeat(True), repeat(True)), + ], + # filter entities to keep only the ... + sort_key=measurements_match_tuples_sort_key, + ) + + # Remove non-measurement related spans (keep = False) and sort the matches + matches_and_is_sentence_end: List[(Span, bool)] = sorted( + [ + (span, is_sent_end) + for span, keep, is_sent_end in spans__keep__is_sent_end + # and remove entities that are not relevant to this pipeline + if keep + ] + ) + + return matches_and_is_sentence_end, unit_label_hashes + + def extract_measurements_from_doc(self, doc: Doc): + """ + Extracts measure entities from the filtered document + + Parameters + ---------- + doc: Doc + + Returns + ------- + List[Span] + """ + matches, unit_label_hashes = self.get_matches(doc) + + # Make match slice function to query them + def get_matches_after(i): + anchor = matches[i][0] + for j, (ent, is_sent_end) in enumerate(matches[i + 1 :]): + if not is_sent_end and ent.start > anchor.end + AFTER_SNIPPET_LIMIT: + return + yield j + i + 1, ent + + def get_matches_before(i): + anchor = matches[i][0] + for j, (ent, is_sent_end) in enumerate(matches[i::-1]): + if not is_sent_end and ent.end < anchor.start - BEFORE_SNIPPET_LIMIT: + return + yield i - j, ent + + # Make a pseudo sentence to query higher order patterns in the main loop + # `offsets` is a mapping from matches indices (ie match n°i) to + # char indices in the pseudo sentence + pseudo, offsets = self.make_pseudo_sentence( + doc, + matches, + { + self.nlp.vocab.strings["stopword"]: "o", + self.nlp.vocab.strings["interval"]: ",", + self.nlp.vocab.strings["stopword_unitless"]: ",", + self.nlp.vocab.strings["stopword_measure_unit"]: "s", + self.nlp.vocab.strings["number"]: "n", + self.nlp.vocab.strings["pow10"]: "p", + **{name: "u" for name in unit_label_hashes}, + **{name: "n" for name in self.number_label_hashes}, + **{name: "r" for name in self.value_range_label_hashes}, + }, + ) + + measurements = [] + matched_unit_indices = set() + + # Iterate through the number matches + for number_idx, (number, is_sent_split) in enumerate(matches): + if not is_sent_split and number.label not in self.number_label_hashes: + # Check if we have a valueless pattern + if number.label in self.valueless_label_hashes: + ent = number + ent._.value = self.valueless_patterns[number.label_] + ent.label_ = self.measure_names[ + self.unit_registry.parse_unit(ent._.value.unit)[0] + ] + measurements.append(ent) + continue + + # Detect the measure value + try: + if number.label_ == "number": + value = float( + number.text.replace(" ", "").replace(",", ".").replace(" ", "") + ) + else: + value = float(number.label_) + except ValueError: + continue + + # Link It to Its adjacent power if available + try: + pow10_idx, pow10_ent = next( + (j, ent) + for j, ent in get_matches_after(number_idx) + if ent.label == self.nlp.vocab.strings["pow10"] + ) + pseudo_sent = pseudo[offsets[number_idx] + 1 : offsets[pow10_idx]] + if re.fullmatch(r"[,o]*", pseudo_sent): + pow10_text = pow10_ent.text + value = self.combine_measure_pow10(value, pow10_text) + except (AttributeError, StopIteration): + pass + + # Check if the measurement is an =, < or > measurement + try: + if pseudo[offsets[number_idx] - 1] == "r": + value_range = matches[number_idx - 1][0].label_ + else: + value_range = "=" + except (KeyError, AttributeError, IndexError): + value_range = "=" + + unit_idx = unit_text = unit_norm = None + + # Find the closest unit after the number + try: + unit_idx, unit_text = next( + (j, ent) + for j, ent in get_matches_after(number_idx) + if ent.label in unit_label_hashes + ) + unit_norm = unit_text.label_ + except (AttributeError, StopIteration): + pass + + # Try to pair the number with this next unit if the two are only separated + # by numbers (with or without powers of 10) and separators + # (as in [1][,] [2] [and] [3] cm) + try: + pseudo_sent = pseudo[offsets[number_idx] + 1 : offsets[unit_idx]] + if not re.fullmatch(r"[,o]*p?([,o]n?p?)*", pseudo_sent): + unit_text, unit_norm = None, None + except TypeError: + pass + + # Otherwise, try to infer the unit from the preceding unit to handle cases + # like (1 meter 50) + if unit_norm is None and number_idx - 1 in matched_unit_indices: + try: + unit_before = matches[number_idx - 1][0] + if ( + unit_before.end == number.start + and pseudo[offsets[number_idx] - 2] == "n" + ): + unit_norm = self.unit_followers[unit_before.label_] + except (KeyError, AttributeError, IndexError): + pass + + # If no unit was matched, try to detect unitless patterns before + # the number to handle cases like ("Weight: 63, Height: 170") + if not unit_norm: + try: + (unitless_idx, unitless_text) = next( + (j, e) + for j, e in get_matches_before(number_idx) + if e.label in self.unitless_label_hashes + ) + unit_norm = None + if re.fullmatch( + r"[,onr]*", + pseudo[offsets[unitless_idx] + 1 : offsets[number_idx]], + ): + unitless_pattern = self.unitless_patterns[unitless_text.label_] + unit_norm = next( + scope["unit"] + for scope in unitless_pattern["ranges"] + if ( + "min" not in scope + or value >= scope["min"] + or value_range == ">" + ) + and ( + "max" not in scope + or value < scope["max"] + or value_range == "<" + ) + ) + except StopIteration: + pass + + # If no unit was matched, take the nearest unit only if + # It is separated by a stopword from stopwords_measure_unit and + # / or a value_range_term + # Take It before or after the measure according to + if not unit_norm: + try: + if not self.measure_before_unit: + (unit_before_idx, unit_before_text) = next( + (j, e) + for j, e in get_matches_before(number_idx) + if e.label in unit_label_hashes + ) + if re.fullmatch( + r"[sor]*", + pseudo[offsets[unit_before_idx] + 1 : offsets[number_idx]], + ): + unit_norm = unit_before_text.label_ + # Check if there is a power of 10 before the unit + if ( + offsets[unit_before_idx] >= 1 + and pseudo[offsets[unit_before_idx] - 1] == "p" + ): + pow10_text = matches[unit_before_idx - 1][0].text + value = self.combine_measure_pow10(value, pow10_text) + else: + (unit_after_idx, unit_after_text) = next( + (j, e) + for j, e in get_matches_after(number_idx) + if e.label in unit_label_hashes + ) + if re.fullmatch( + r"[sop]*", + pseudo[offsets[number_idx] + 1 : offsets[unit_after_idx]], + ): + unit_norm = unit_after_text.label_ + except (AttributeError, StopIteration): + pass + + # Otherwise, set the unit as no_unit if the value + # is not written with letters + if not unit_norm: + if number.label_ == "number": + unit_norm = "nounit" + else: + continue + + # Compute the final entity + if type(doc) == Doc: + if unit_text and unit_text.end == number.start: + ent = doc[unit_text.start : number.end] + elif unit_text and unit_text.start == number.end: + ent = doc[number.start : unit_text.end] + else: + ent = number + else: + if unit_text and unit_text.end == number.start: + ent = doc[unit_text.start - doc.start : number.end - doc.start] + elif unit_text and unit_text.start == number.end: + ent = doc[number.start - doc.start : unit_text.end - doc.start] + else: + ent = number + + # If the measure was not requested, dismiss it + # Otherwise, relabel the entity and create the value attribute + # Compute the dimensionality of the parsed unit + try: + dims = self.unit_registry.parse_unit(unit_norm)[0] + ent._.value = SimpleMeasurement( + value_range, value, unit_norm, self.unit_registry + ) + if dims not in self.measure_names: + if self.all_measurements: + ent.label_ = "eds.measurement" + else: + continue + else: + ent.label_ = self.measure_names[dims] + except KeyError: + continue + + measurements.append(ent) + + if unit_idx is not None: + matched_unit_indices.add(unit_idx) + + return measurements + + def extract_measurements_from_tables(self, doc: Doc): + """ + Extracts measure entities from the document tables + + Parameters + ---------- + doc: Doc + + Returns + ------- + List[Span] + """ + + tables = doc.spans.get("tables", None) + measurements = [] + + if not tables: + return [] + + def get_distance_between_columns(column1_key, column2_key): + return abs(keys.index(column1_key) - keys.index(column2_key)) + + for table in tables: + # Try to retrieve columns linked to values + # or columns linked to units + # or columns linked to powers of 10 + # And then iter through the value columns + # to recreate measurements + keys = list(table._.table.keys()) + + unit_column_keys = [] + value_column_keys = [] + pow10_column_keys = [] + # Table with measurements related labellisation + table_labeled = {key: [] for key in keys} + unit_label_hashes = set() + + for key, column in list(table._.table.items()): + # We link the column to values, powers of 10 or units + # if more than half of the cells contain the said object + + # Cell counters + n_unit = 0 + n_value = 0 + n_pow10 = 0 + + for term in column: + + matches_in_term, unit_label_hashes_in_term = self.get_matches(term) + unit_label_hashes = unit_label_hashes.union( + unit_label_hashes_in_term + ) + + measurement_matches = [] + is_unit = False + is_value = False + is_pow10 = False + + for match, _ in matches_in_term: + + if ( + match.label in self.number_label_hashes + or match.label in self.valueless_label_hashes + ): + is_value = True + measurement_matches.append(match) + elif match.label in unit_label_hashes_in_term: + is_unit = True + measurement_matches.append(match) + elif match.label == self.nlp.vocab.strings["pow10"]: + is_pow10 = True + measurement_matches.append(match) + elif match.label in self.value_range_label_hashes: + measurement_matches.append(match) + + if is_unit: + n_unit += 1 + if is_value: + n_value += 1 + if is_pow10: + n_pow10 += 1 + + table_labeled[key].append(measurement_matches) + + # Checking if half of the cells contain units, values + # or powers of 10 + if n_unit > len(column) / 2: + unit_column_keys.append(key) + if n_value > len(column) / 2: + value_column_keys.append(key) + if n_pow10 > len(column) / 2: + pow10_column_keys.append(key) + + # Iter through the value keys to create measurements + for value_column_key in value_column_keys: + + # If the table contains a unit column, + # try to pair the value to the unit of + # the nearest unit column + if len(unit_column_keys): + # Prevent same distance conflict + # For example is a table is organised as + # "header, unit1, value1, unit2, value2" + # value1 is at equal distance of unit1 and unit2 columns + # To solve this problem, we try to detect if we have a + # value - unit pattern or unit - value pattern by checking + # the first column that appears. + if keys.index(unit_column_keys[0]) > keys.index( + value_column_keys[0] + ): + measure_before_unit_in_table = True + else: + measure_before_unit_in_table = False + + # We only consider the nearest unit column when It + # is not a value column at the same time + # except if It is the column that we are considering + try: + unit_column_key = sorted( + [ + ( + unit_column_key, + get_distance_between_columns( + unit_column_key, value_column_key + ), + ) + for unit_column_key in unit_column_keys + if unit_column_key + not in [ + v + for v in value_column_keys + if v != value_column_key + ] + ], + key=lambda unit_column_key_tuple: unit_column_key_tuple[1], + )[0 : min(2, len(unit_column_keys))] + if ( + len(unit_column_key) == 1 + or unit_column_key[0][1] != unit_column_key[1][1] + ): + unit_column_key = unit_column_key[0][0] + else: + unit_column_key = unit_column_key[ + 0 * (not measure_before_unit_in_table) + - 1 * measure_before_unit_in_table + ][0] + except IndexError: + unit_column_key = value_column_key + else: + unit_column_key = value_column_key + + # If the table contains a power column, + # try to pair the value to the power of + # the nearest power column + if len(pow10_column_keys): + # Same distance conflict as for unit columns + if keys.index(pow10_column_keys[0]) > keys.index( + value_column_keys[0] + ): + measure_before_power_in_table = True + else: + measure_before_power_in_table = False + + try: + pow10_column_key = sorted( + [ + ( + pow10_column_key, + get_distance_between_columns( + pow10_column_key, value_column_key + ), + ) + for pow10_column_key in pow10_column_keys + if pow10_column_key + not in [ + v + for v in value_column_keys + if v != value_column_key + ] + + [u for u in unit_column_keys if u != unit_column_key] + # The pow10_column_key cannot be a unit_column_key + # other than the one selected + ], + key=lambda pow10_column_key_tuple: pow10_column_key_tuple[ + 1 + ], + )[0 : min(2, len(pow10_column_keys))] + if ( + len(pow10_column_key) == 1 + or pow10_column_key[0][1] != pow10_column_key[1][1] + ): + pow10_column_key = pow10_column_key[0][0] + else: + pow10_column_key = pow10_column_key[ + 0 * (not measure_before_power_in_table) + - 1 * measure_before_power_in_table + ][0] + except IndexError: + pow10_column_key = value_column_key + else: + pow10_column_key = value_column_key + + # If unit column is the same as value column, extract + # measurement in this column with the + # extract_measurements_from_doc method + + if unit_column_key == value_column_key: + # Consider possible pow10 column + if pow10_column_key != value_column_key: + for term, pow10_list in zip( + table._.table[value_column_key], + table_labeled[pow10_column_key], + ): + measurements_part = self.extract_measurements_from_doc(term) + try: + pow10_text = [ + p.text + for p in pow10_list + if p.label == self.nlp.vocab.strings["pow10"] + ][0] + for measurement in measurements_part: + measurement._.value.value = ( + self.combine_measure_pow10( + measurement._.value.value, pow10_text + ) + ) + except IndexError: + pass + measurements += measurements_part + else: + for term in table._.table[value_column_key]: + measurements += self.extract_measurements_from_doc(term) + continue + + # If unit column is different from value column + # Iter through the value column to create the measurement + # Iter through the units and powers columns + # at the same time if they exist, else value column + for unit_list, value_list, pow10_list in zip( + table_labeled[unit_column_key], + table_labeled[value_column_key], + table_labeled[pow10_column_key], + ): + # Check if there is really a value + try: + ent = [ + v + for v in value_list + if v.label in self.number_label_hashes + or v.label in self.valueless_label_hashes + ][0] + # Take the value linked to valueless pattern if + # we have a valueless pattern + if ent.label in self.valueless_label_hashes: + ent._.value = self.valueless_patterns[ent.label_] + ent.label_ = self.measure_names[ + self.unit_registry.parse_unit(ent._.value.unit)[0] + ] + measurements.append(ent) + continue + # Else try to parse the number + if ent.label_ == "number": + value = float( + ent.text.replace(" ", "") + .replace(",", ".") + .replace(" ", "") + ) + else: + value = float(ent.label_) + # Sometimes the value column contains a power. + # It may not be common enough to reach 50% + # of the cells, that's why + # It may not be labeled as pow10_column. + # Still, we should retrieve these powers. + try: + pow10_text = [ + p.text + for p in value_list + if p.label == self.nlp.vocab.strings["pow10"] + ][0] + value = self.combine_measure_pow10(value, pow10_text) + except IndexError: + pass + except (IndexError, ValueError): + continue + + # Check for value range terms + try: + value_range = [ + v_r.label_ + for v_r in value_list + if v_r.label in self.value_range_label_hashes + ][0] + except IndexError: + value_range = "=" + + # Check for units and powers in the unit column + # (for same reasons as described before) + # in units column + try: + unit_norm = [ + u.label_ for u in unit_list if u.label in unit_label_hashes + ][0] + # To avoid duplicates + if unit_column_key != value_column_key: + try: + pow10_text = [ + p.text + for p in unit_list + if p.label == self.nlp.vocab.strings["pow10"] + ][0] + value = self.combine_measure_pow10(value, pow10_text) + except IndexError: + pass + except IndexError: + unit_norm = "nounit" + + if unit_norm == "nounit": + # Try to retrieve a possible unit in the header + # of the value column + try: + unit_norm = [ + u.label_ + for u in self.extract_units( + list( + self.term_matcher( + self.nlp(str(value_column_key)), + as_spans=True, + ) + ) + ) + ][0] + except IndexError: + pass + + # Check for powers in power column + try: + if ( + pow10_column_key != value_column_key + and pow10_column_key != unit_column_key + ): + pow10_text = [ + p.text + for p in pow10_list + if p.label == self.nlp.vocab.strings["pow10"] + ][0] + value = self.combine_measure_pow10(value, pow10_text) + except IndexError: + pass + + if self.all_measurements: + ent._.value = SimpleMeasurement( + value_range, value, unit_norm, self.unit_registry + ) + ent.label_ = "eds.measurement" + else: + # If the measure was not requested, dismiss it + # Otherwise, relabel the entity and create the value attribute + # Compute the dimensionality of the parsed unit + try: + dims = self.unit_registry.parse_unit(unit_norm)[0] + if dims not in self.measure_names: + continue + ent._.value = SimpleMeasurement( + value_range, value, unit_norm, self.unit_registry + ) + ent.label_ = self.measure_names[dims] + except KeyError: + continue + + measurements.append(ent) + + return measurements + + def extract_measurements(self, doc: Doc): + """ + Extracts measure entities from the document + + Parameters + ---------- + doc: Doc + + Returns + ------- + List[Span] + """ + measurements = [] + if self.parse_doc: + measurements += self.extract_measurements_from_doc(doc) + if self.parse_tables: + measurements += self.extract_measurements_from_tables(doc) + measurements = filter_spans(measurements) + return measurements + + @classmethod + def merge_adjacent_measurements(cls, measurements: List[Span]) -> List[Span]: + """ + Aggregates extracted measurements together when they are adjacent to handle + cases like + - 1 meter 50 cm + - 30° 4' 54" + + Parameters + ---------- + measurements: List[Span] + + Returns + ------- + List[Span] + """ + merged = measurements[:1] + for ent in measurements[1:]: + last = merged[-1] + + if last.end == ent.start and last._.value.unit != ent._.value.unit: + try: + new_value = last._.value + ent._.value + merged[-1] = last = last.doc[last.start : ent.end] + last._.value = new_value + last.label_ = ent.label_ + except (AttributeError, TypeError): + merged.append(ent) + else: + merged.append(ent) + + return merged + + def merge_measurements_in_ranges(self, measurements: List[Span]) -> List[Span]: + """ + Aggregates extracted measurements together when they are adjacent to handle + cases like + - 1 meter 50 cm + - 30° 4' 54" + + Parameters + ---------- + measurements: List[Span] + + Returns + ------- + List[Span] + """ + if not self.extract_ranges or not self.range_patterns: + return measurements + + merged = measurements[:1] + for ent in measurements[1:]: + last = merged[-1] + + from_text = last.doc[last.start - 1].norm_ if last.start > 0 else None + to_text = get_text(last.doc[last.end : ent.start], "NORM", True) + matching_patterns = [ + (a, b) + for a, b in self.range_patterns + if b == to_text and (a is None or a == from_text) + ] + if len(matching_patterns): + try: + new_value = RangeMeasurement.from_measurements( + last._.value, ent._.value + ) + merged[-1] = last = last.doc[ + last.start + if matching_patterns[0][0] is None + else last.start - 1 : ent.end + ] + last.label_ = ent.label_ + last._.set(last.label_, new_value) + except (AttributeError, TypeError): + merged.append(ent) + else: + merged.append(ent) + + return merged + + def merge_with_existing( + self, + extracted: List[Span], + existing: List[Span], + ) -> List[Span]: + """ + Merges the extracted measurements with the existing measurements in the + document. + + Parameters + ---------- + extracted: List[Span] + The extracted measurements + existing: List[Span] + The existing measurements in the document + + Returns + ------- + List[Span] + """ + if self.merge_mode == "align": + spans_measurements = align_spans(extracted, existing, sort_by_overlap=True) + + extracted = [] + for span, span_measurements in zip(existing, spans_measurements): + if len(span_measurements): + span._.set(span.label_, span_measurements[0]._.get(span.label_)) + extracted.append(span) + + elif self.merge_mode == "intersect": + spans_measurements = align_spans(extracted, existing) + extracted = [] + for span, span_measurements in zip(existing, spans_measurements): + extracted.extend(span_measurements) + extracted = list(dict.fromkeys(extracted)) + + elif self.merge_mode == "union": + extracted = [*extracted, *existing] + + return extracted + + def __call__(self, doc): + """ + Adds measurements to document's "measurements" SpanGroup. + + Parameters + ---------- + doc: + spaCy Doc object + + Returns + ------- + doc: + spaCy Doc object, annotated for extracted measurements. + """ + measurements = self.extract_measurements(doc) + measurements = self.merge_adjacent_measurements(measurements) + measurements = self.merge_measurements_in_ranges(measurements) + + if self.span_getter is not None: + existing = list(get_spans(doc, self.span_getter)) + measurements = self.merge_with_existing(measurements, existing) + + doc.spans["measurements"] = measurements + + # for backward compatibility + doc.spans["measures"] = doc.spans["measurements"] + + return doc + + +def measurements_match_tuples_sort_key( + span__keep__is_sent_end: Tuple[Span, bool, bool] +) -> Tuple[int, int, bool]: + span, _, is_sent_end = span__keep__is_sent_end + + length = span.end - span.start + + return length, span.end, not is_sent_end diff --git a/edsnlp/edsnlp/pipelines/misc/measurements/patterns.py b/edsnlp/edsnlp/pipelines/misc/measurements/patterns.py new file mode 100644 index 000000000..4e5e6e06b --- /dev/null +++ b/edsnlp/edsnlp/pipelines/misc/measurements/patterns.py @@ -0,0 +1,2393 @@ +number_terms = { + "1": ["un", "une"], + "2": ["deux"], + "3": ["trois"], + "4": ["quatre"], + "5": ["cinq"], + "6": ["six"], + "7": ["sept"], + "8": ["huit"], + "9": ["neuf"], + "10": ["dix"], + "11": ["onze"], + "12": ["douze"], + "13": ["treize"], + "14": ["quatorze"], + "15": ["quinze"], + "16": ["seize"], + "17": ["dix-sept", "dix sept"], + "18": ["dix-huit", "dix huit"], + "19": ["dix-neuf", "dix neuf"], + "20": ["vingt", "vingts"], + "30": ["trente"], + "40": ["quarante"], + "50": ["cinquante"], + "60": ["soixante"], + "70": ["soixante dix", "soixante-dix"], + "80": ["quatre vingt", "quatre-vingt", "quatre vingts", "quatre-vingts"], + "90": ["quatre vingt dix", "quatre-vingt-dix"], + "100": ["cent"], + "500": ["cinq cent", "cinq-cent"], + "1000": ["mille", "milles"], +} + + +value_range_terms = { + "<": ["<", "<=", "inferieure a", "inferieur a", "inf a", "inf"], + ">": [">", ">=", "superieure a", "superieur a", "sup a", "sup"], +} + + +common_measurements = { + "eds.weight": { + "unit": "kg", + "unitless_patterns": [ + { + "terms": ["poids", "poid", "pese", "pesant", "pesait", "pesent"], + "ranges": [ + {"min": 0, "max": 200, "unit": "kg"}, + {"min": 200, "unit": "g"}, + ], + } + ], + }, + "eds.size": { + "unit": "m", + "unitless_patterns": [ + { + "terms": [ + "mesure", + "taille", + "mesurant", + "mesurent", + "mesurait", + "mesuree", + "hauteur", + "largeur", + "longueur", + ], + "ranges": [ + {"min": 0, "max": 3, "unit": "m"}, + {"min": 3, "unit": "cm"}, + ], + } + ], + }, + "eds.bmi": { + "unit": "kg_per_m2", + "unitless_patterns": [ + {"terms": ["imc", "bmi"], "ranges": [{"unit": "kg_per_m2"}]} + ], + }, + "eds.volume": {"unit": "m3", "unitless_patterns": []}, + "eds.bool": { + "unit": "bool", + "valueless_patterns": [ + { + "terms": ["positif", "positifs", "positive", "positives"], + "measurement": { + "value_range": "=", + "value": 1, + "unit": "bool", + }, + }, + { + "terms": ["negatif", "negatifs", "negative", "negatives"], + "measurement": { + "value_range": "=", + "value": 0, + "unit": "bool", + }, + }, + ], + }, +} + + +unit_divisors = ["/", "par"] + + +stopwords_unitless = ["par", "sur", "de", "a", ":", ",", "et"] + + +stopwords_measure_unit = ["|", "¦", "…", "."] + + +units_config = { + "fm": { + "scale": 1e-15, + "terms": ["femtometre", "femtometres", "femto-metre", "femto-metres", "fm"], + "followed_by": None, + "ui_decomposition": {"length": 1}, + }, + "pm": { + "scale": 1e-12, + "terms": ["picometre", "picometres", "pico-metre", "pico-metres", "pm"], + "followed_by": None, + "ui_decomposition": {"length": 1}, + }, + "nm": { + "scale": 1e-09, + "terms": ["nanometre", "nanometres", "nano-metre", "nano-metres", "nm"], + "followed_by": None, + "ui_decomposition": {"length": 1}, + }, + "μm": { + "scale": 1e-06, + "terms": [ + "micrometre", + "micrometres", + "micro-metre", + "micrometres", + "µm", + "um", + ], + "followed_by": None, + "ui_decomposition": {"length": 1}, + }, + "mm": { + "scale": 0.001, + "terms": ["millimetre", "millimetres", "milimetre", "milimetres", "mm"], + "followed_by": None, + "ui_decomposition": {"length": 1}, + }, + "cm": { + "scale": 0.01, + "terms": ["centimetre", "centimetres", "cm"], + "followed_by": None, + "ui_decomposition": {"length": 1}, + }, + "dm": { + "scale": 0.1, + "terms": ["decimetre", "decimetres", "dm"], + "followed_by": None, + "ui_decomposition": {"length": 1}, + }, + "m": { + "scale": 1.0, + "terms": ["metre", "metres", "m"], + "followed_by": "cm", + "ui_decomposition": {"length": 1}, + }, + "dam": { + "scale": 10.0, + "terms": ["decametre", "decametres", "dam"], + "followed_by": None, + "ui_decomposition": {"length": 1}, + }, + "hm": { + "scale": 100.0, + "terms": ["hectometre", "hectometres", "hm"], + "followed_by": None, + "ui_decomposition": {"length": 1}, + }, + "km": { + "scale": 1000.0, + "terms": ["kilometre", "kilometres", "km"], + "followed_by": "m", + "ui_decomposition": {"length": 1}, + }, + "fg": { + "scale": 1e-18, + "terms": [ + "femtogramme", + "femtogrammes", + "femto-gramme", + "femto-grammes", + "fgr", + "fg", + ], + "followed_by": None, + "ui_decomposition": {"mass": 1}, + }, + "pg": { + "scale": 1e-15, + "terms": [ + "picogramme", + "picogrammes", + "pico-gramme", + "pico-grammes", + "pgr", + "pg", + ], + "followed_by": None, + "ui_decomposition": {"mass": 1}, + }, + "ng": { + "scale": 1e-12, + "terms": [ + "nanogramme", + "nanogrammes", + "nano-gramme", + "nano-grammes", + "ngr", + "ng", + ], + "followed_by": None, + "ui_decomposition": {"mass": 1}, + }, + "µg": { + "scale": 1e-9, + "terms": [ + "microgramme", + "microgrammes", + "micro-gramme", + "micro-grammes", + "µgr", + "µg", + "ugr", + "ug", + ], + "followed_by": None, + "ui_decomposition": {"mass": 1}, + }, + "mg": { + "scale": 1e-6, + "terms": [ + "milligramme", + "miligramme", + "milligrammes", + "miligrammes", + "mgr", + "mg", + ], + "followed_by": None, + "ui_decomposition": {"mass": 1}, + }, + "cg": { + "scale": 1e-5, + "terms": ["centigramme", "centigrammes", "cg", "cgr"], + "followed_by": None, + "ui_decomposition": {"mass": 1}, + }, + "dg": { + "scale": 1e-4, + "terms": ["decigramme", "decigrammes", "dgr", "dg"], + "followed_by": None, + "ui_decomposition": {"mass": 1}, + }, + "g": { + "scale": 0.001, + "terms": ["gramme", "grammes", "gr", "g"], + "followed_by": None, + "ui_decomposition": {"mass": 1}, + }, + "dag": { + "scale": 0.01, + "terms": ["decagramme", "decagrammes", "dagr", "dag"], + "followed_by": None, + "ui_decomposition": {"mass": 1}, + }, + "hg": { + "scale": 0.1, + "terms": ["hectogramme", "hectogrammes", "hgr", "hg"], + "followed_by": None, + "ui_decomposition": {"mass": 1}, + }, + "kg": { + "scale": 1.0, + "terms": ["kilo", "kilogramme", "kilogrammes", "kgr", "kg"], + "followed_by": "g", + "ui_decomposition": {"mass": 1}, + }, + "fs": { + "scale": 1e-15, + "terms": [ + "femtoseconde", + "femtosecondes", + "femto-seconde", + "femto-secondes", + "fs", + ], + "followed_by": None, + "ui_decomposition": {"time": 1}, + }, + "ps": { + "scale": 1e-12, + "terms": ["picoseconde", "picosecondes", "pico-seconde", "pico-secondes", "ps"], + "followed_by": None, + "ui_decomposition": {"time": 1}, + }, + "ns": { + "scale": 1e-09, + "terms": ["nanoseconde", "nanosecondes", "nano-seconde", "nano-secondes", "ns"], + "followed_by": None, + "ui_decomposition": {"time": 1}, + }, + "µs": { + "scale": 1e-06, + "terms": [ + "microseconde", + "microsecondes", + "micro-seconde", + "micro-secondes", + "µs", + "us", + ], + "followed_by": None, + "ui_decomposition": {"time": 1}, + }, + "ms": { + "scale": 0.001, + "terms": ["milliseconde", "millisecondes", "miliseconde", "milisecondes", "ms"], + "followed_by": None, + "ui_decomposition": {"time": 1}, + }, + "s": { + "scale": 1, + "terms": ["seconde", "secondes", "s"], + "followed_by": None, + "ui_decomposition": {"time": 1}, + }, + "min": { + "scale": 60, + "terms": ["mn", "min", "minute", "minutes"], + "followed_by": "second", + "ui_decomposition": {"time": 1}, + }, + "h": { + "scale": 3600, + "terms": ["heure", "heures", "h"], + "followed_by": "minute", + "ui_decomposition": {"time": 1}, + }, + "day": { + "scale": 3600, + "terms": ["jour", "jours", "j"], + "followed_by": None, + "ui_decomposition": {"time": 1}, + }, + "month": { + "scale": 109500.12, + "terms": ["mois"], + "followed_by": None, + "ui_decomposition": {"time": 1}, + }, + "week": { + "scale": 25200, + "terms": ["semaine", "semaines"], + "followed_by": None, + "ui_decomposition": {"time": 1}, + }, + "year": { + "scale": 1314900.0, + "terms": ["an", "année", "ans", "années"], + "followed_by": None, + "ui_decomposition": {"time": 1}, + }, + "arc-second": { + "scale": 0.03333333333333333, + "terms": ['"', "''"], + "followed_by": None, + "ui_decomposition": {"time": 1}, + }, + "arc-minute": { + "scale": 2, + "terms": ["'"], + "followed_by": "arc-second", + "ui_decomposition": {"time": 1}, + }, + "degree": { + "scale": 120, + "terms": ["degre", "°", "deg"], + "followed_by": "arc-minute", + "ui_decomposition": {"time": 1}, + }, + "celcius": { + "scale": 1, + "terms": ["°C", "° celcius", "celcius"], + "followed_by": None, + "ui_decomposition": {"temperature": 1}, + }, + "fl": { + "scale": 1e-18, + "terms": ["femtolitre", "femtolitres", "femto-litre", "femto-litres", "fl"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "pl": { + "scale": 1e-15, + "terms": ["picolitre", "picolitres", "pico-litre", "pico-litres", "pl"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "nl": { + "scale": 1e-12, + "terms": ["nanolitre", "nanolitres", "nano-litre", "nano-litres", "nl"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "µl": { + "scale": 1e-09, + "terms": [ + "microlitre", + "microlitres", + "micro-litre", + "micro-litres", + "µl", + "ul", + ], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "ml": { + "scale": 1e-06, + "terms": ["mililitre", "millilitre", "mililitres", "millilitres", "ml"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "cl": { + "scale": 1e-05, + "terms": ["centilitre", "centilitres", "cl"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "dl": { + "scale": 0.0001, + "terms": ["decilitre", "decilitres", "dl"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "l": { + "scale": 0.001, + "terms": ["litre", "litres", "l"], + "followed_by": "ml", + "ui_decomposition": {"length": 3}, + }, + "dal": { + "scale": 0.01, + "terms": ["decalitre", "decalitres", "dal"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "hl": { + "scale": 0.1, + "terms": ["hectolitre", "hectolitres", "hl"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "kl": { + "scale": 1.0, + "terms": ["kilolitre", "kilolitres", "kl"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "cac": { + "scale": 5e-09, + "terms": ["cac", "c.a.c", "cuillere à café", "cuillères à café"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "goutte": { + "scale": 5e-11, + "terms": ["gt", "goutte"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "fm3": { + "scale": 1e-45, + "terms": ["fm3", "fm³"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "pm3": { + "scale": 1e-36, + "terms": ["pm3", "pm³"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "nm3": { + "scale": 1e-27, + "terms": ["nm3", "nm³"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "µm3": { + "scale": 1e-18, + "terms": ["um3", "um³", "µm3", "µm³"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "mm3": { + "scale": 1e-09, + "terms": ["mm3", "mm³"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "cm3": { + "scale": 1e-06, + "terms": ["cm3", "cm³", "cc"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "dm3": { + "scale": 0.001, + "terms": ["dm3", "dm³"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "m3": { + "scale": 1, + "terms": ["m3", "m³"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "dam3": { + "scale": 1000.0, + "terms": ["dam3", "dam³"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "hm3": { + "scale": 1000000.0, + "terms": ["hm3", "hm³"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "km3": { + "scale": 1000000000.0, + "terms": ["km3", "km³"], + "followed_by": None, + "ui_decomposition": {"length": 3}, + }, + "fm2": { + "scale": 1e-30, + "terms": ["fm2", "fm²"], + "followed_by": None, + "ui_decomposition": {"length": 2}, + }, + "pm2": { + "scale": 1e-24, + "terms": ["fm2", "fm²"], + "followed_by": None, + "ui_decomposition": {"length": 2}, + }, + "nm2": { + "scale": 1e-18, + "terms": ["nm2", "nm²"], + "followed_by": None, + "ui_decomposition": {"length": 2}, + }, + "μm2": { + "scale": 1e-12, + "terms": ["µm2", "µm²", "um2", "um²"], + "followed_by": None, + "ui_decomposition": {"length": 2}, + }, + "mm2": { + "scale": 1e-06, + "terms": ["mm2", "mm²"], + "followed_by": None, + "ui_decomposition": {"length": 2}, + }, + "cm2": { + "scale": 0.0001, + "terms": ["cm2", "cm²"], + "followed_by": None, + "ui_decomposition": {"length": 2}, + }, + "dm2": { + "scale": 0.01, + "terms": ["dm2", "dm²"], + "followed_by": None, + "ui_decomposition": {"length": 2}, + }, + "m2": { + "scale": 1.0, + "terms": ["m2", "m²"], + "followed_by": None, + "ui_decomposition": {"length": 2}, + }, + "dam2": { + "scale": 100.0, + "terms": ["dam2", "dam²"], + "followed_by": None, + "ui_decomposition": {"length": 2}, + }, + "hm2": { + "scale": 10000.0, + "terms": ["hm2", "hm²"], + "followed_by": None, + "ui_decomposition": {"length": 2}, + }, + "km2": { + "scale": 1000000.0, + "terms": ["km2", "km²"], + "followed_by": None, + "ui_decomposition": {"length": 2}, + }, + "fui": { + "scale": 1e-15, + "terms": ["fui", "f ui", "fu", "f u"], + "followed_by": None, + "ui_decomposition": {"ui": 1}, + }, + "pui": { + "scale": 1e-12, + "terms": ["pui", "p ui", "pu", "p u"], + "followed_by": None, + "ui_decomposition": {"ui": 1}, + }, + "nui": { + "scale": 1e-09, + "terms": ["nui", "n ui", "nu", "n u"], + "followed_by": None, + "ui_decomposition": {"ui": 1}, + }, + "µui": { + "scale": 1e-06, + "terms": ["µui", "µ ui", "uui", "u ui", "µu", "µ u", "uu", "u u"], + "followed_by": None, + "ui_decomposition": {"ui": 1}, + }, + "mui": { + "scale": 0.001, + "terms": ["mui", "m ui", "mu", "m u"], + "followed_by": None, + "ui_decomposition": {"ui": 1}, + }, + "cui": { + "scale": 0.01, + "terms": ["cui", "c ui", "cu", "c u"], + "followed_by": None, + "ui_decomposition": {"ui": 1}, + }, + "dui": { + "scale": 0.1, + "terms": ["dui", "d ui", "du", "d u"], + "followed_by": None, + "ui_decomposition": {"ui": 1}, + }, + "ui": { + "scale": 1.0, + "terms": ["ui", "u"], + "followed_by": None, + "ui_decomposition": {"ui": 1}, + }, + "daui": { + "scale": 10.0, + "terms": ["daui", "dau"], + "followed_by": None, + "ui_decomposition": {"ui": 1}, + }, + "hui": { + "scale": 100.0, + "terms": ["hui", "hu"], + "followed_by": None, + "ui_decomposition": {"ui": 1}, + }, + "kui": { + "scale": 1000.0, + "terms": ["kui", "ku"], + "followed_by": None, + "ui_decomposition": {"ui": 1}, + }, + "fmol": { + "scale": 1e-15, + "terms": ["fmol", "f mol", "fmole", "f mole"], + "followed_by": None, + "ui_decomposition": {"nsubstance": 1}, + }, + "pmol": { + "scale": 1e-12, + "terms": ["pmol", "p mol", "pmole", "p mole"], + "followed_by": None, + "ui_decomposition": {"nsubstance": 1}, + }, + "nmol": { + "scale": 1e-09, + "terms": ["nmol", "n mol", "nmole", "n mole"], + "followed_by": None, + "ui_decomposition": {"nsubstance": 1}, + }, + "µmol": { + "scale": 1e-06, + "terms": ["µmol", "µ mol", "umole", "u mole"], + "followed_by": None, + "ui_decomposition": {"nsubstance": 1}, + }, + "mmol": { + "scale": 0.001, + "terms": ["mmol", "m mol", "mmole", "m mole"], + "followed_by": None, + "ui_decomposition": {"nsubstance": 1}, + }, + "cmol": { + "scale": 0.01, + "terms": ["cmol", "c mol", "cmole", "c mole"], + "followed_by": None, + "ui_decomposition": {"nsubstance": 1}, + }, + "dmol": { + "scale": 0.1, + "terms": ["dmol", "d mol", "dmole", "d mole"], + "followed_by": None, + "ui_decomposition": {"nsubstance": 1}, + }, + "mol": { + "scale": 1.0, + "terms": ["mol", "mole"], + "followed_by": None, + "ui_decomposition": {"nsubstance": 1}, + }, + "damol": { + "scale": 10.0, + "terms": ["damol", "da mol", "damole", "da mole"], + "followed_by": None, + "ui_decomposition": {"nsubstance": 1}, + }, + "hmol": { + "scale": 100.0, + "terms": ["hmol", "h mol", "hmole", "h mole"], + "followed_by": None, + "ui_decomposition": {"nsubstance": 1}, + }, + "kmol": { + "scale": 1000.0, + "terms": ["kmol", "k mol", "kmole", "k mole"], + "followed_by": None, + "ui_decomposition": {"nsubstance": 1}, + }, + "per_fm": { + "scale": 1.0e15, + "terms": [ + "/femtometre", + "femtometre⁻¹", + "femtometre-1", + "/femtometres", + "femtometres⁻¹", + "femtometres-1", + "/femto-metre", + "femto-metre⁻¹", + "femto-metre-1", + "/femto-metres", + "femto-metres⁻¹", + "femto-metres-1", + "/fm", + "fm⁻¹", + "fm-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -1}, + }, + "per_pm": { + "scale": 1.0e12, + "terms": [ + "/picometre", + "picometre⁻¹", + "picometre-1", + "/picometres", + "picometres⁻¹", + "picometres-1", + "/pico-metre", + "pico-metre⁻¹", + "pico-metre-1", + "/pico-metres", + "pico-metres⁻¹", + "pico-metres-1", + "/pm", + "pm⁻¹", + "pm-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -1}, + }, + "per_nm": { + "scale": 1.0e9, + "terms": [ + "/nanometre", + "nanometre⁻¹", + "nanometre-1", + "/nanometres", + "nanometres⁻¹", + "nanometres-1", + "/nano-metre", + "nano-metre⁻¹", + "nano-metre-1", + "/nano-metres", + "nano-metres⁻¹", + "nano-metres-1", + "/nm", + "nm⁻¹", + "nm-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -1}, + }, + "per_μm": { + "scale": 1.0e6, + "terms": [ + "/micrometre", + "micrometre⁻¹", + "micrometre-1", + "/micrometres", + "micrometres⁻¹", + "micrometres-1", + "/micro-metre", + "micro-metre⁻¹", + "micro-metre-1", + "/micrometres", + "micrometres⁻¹", + "micrometres-1", + "/µm", + "µm⁻¹", + "µm-1", + "/um", + "um⁻¹", + "um-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -1}, + }, + "per_mm": { + "scale": 1000.0, + "terms": [ + "/millimetre", + "millimetre⁻¹", + "millimetre-1", + "/millimetres", + "millimetres⁻¹", + "millimetres-1", + "/milimetre", + "milimetre⁻¹", + "milimetre-1", + "/milimetres", + "milimetres⁻¹", + "milimetres-1", + "/mm", + "mm⁻¹", + "mm-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -1}, + }, + "per_cm": { + "scale": 100.0, + "terms": [ + "/centimetre", + "centimetre⁻¹", + "centimetre-1", + "/centimetres", + "centimetres⁻¹", + "centimetres-1", + "/cm", + "cm⁻¹", + "cm-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -1}, + }, + "per_dm": { + "scale": 10.0, + "terms": [ + "/decimetre", + "decimetre⁻¹", + "decimetre-1", + "/decimetres", + "decimetres⁻¹", + "decimetres-1", + "/dm", + "dm⁻¹", + "dm-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -1}, + }, + "per_m": { + "scale": 1.0, + "terms": [ + "/metre", + "metre⁻¹", + "metre-1", + "/metres", + "metres⁻¹", + "metres-1", + "/m", + "m⁻¹", + "m-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -1}, + }, + "per_dam": { + "scale": 0.1, + "terms": [ + "/decametre", + "decametre⁻¹", + "decametre-1", + "/decametres", + "decametres⁻¹", + "decametres-1", + "/dam", + "dam⁻¹", + "dam-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -1}, + }, + "per_hm": { + "scale": 0.01, + "terms": [ + "/hectometre", + "hectometre⁻¹", + "hectometre-1", + "/hectometres", + "hectometres⁻¹", + "hectometres-1", + "/hm", + "hm⁻¹", + "hm-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -1}, + }, + "per_km": { + "scale": 0.001, + "terms": [ + "/kilometre", + "kilometre⁻¹", + "kilometre-1", + "/kilometres", + "kilometres⁻¹", + "kilometres-1", + "/km", + "km⁻¹", + "km-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -1}, + }, + "per_fg": { + "scale": 1.0e18, + "terms": [ + "/femtogramme", + "femtogramme⁻¹", + "femtogramme-1", + "/femtogrammes", + "femtogrammes⁻¹", + "femtogrammes-1", + "/femto-gramme", + "femto-gramme⁻¹", + "femto-gramme-1", + "/femto-grammes", + "femto-grammes⁻¹", + "femto-grammes-1", + "/fgr", + "fgr⁻¹", + "fgr-1", + "/fg", + "fg⁻¹", + "fg-1", + ], + "followed_by": None, + "ui_decomposition": {"mass": -1}, + }, + "per_pg": { + "scale": 1.0e15, + "terms": [ + "/picogramme", + "picogramme⁻¹", + "picogramme-1", + "/picogrammes", + "picogrammes⁻¹", + "picogrammes-1", + "/pico-gramme", + "pico-gramme⁻¹", + "pico-gramme-1", + "/pico-grammes", + "pico-grammes⁻¹", + "pico-grammes-1", + "/pgr", + "pgr⁻¹", + "pgr-1", + "/pg", + "pg⁻¹", + "pg-1", + ], + "followed_by": None, + "ui_decomposition": {"mass": -1}, + }, + "per_ng": { + "scale": 1.0e12, + "terms": [ + "/nanogramme", + "nanogramme⁻¹", + "nanogramme-1", + "/nanogrammes", + "nanogrammes⁻¹", + "nanogrammes-1", + "/nano-gramme", + "nano-gramme⁻¹", + "nano-gramme-1", + "/nano-grammes", + "nano-grammes⁻¹", + "nano-grammes-1", + "/ngr", + "ngr⁻¹", + "ngr-1", + "/ng", + "ng⁻¹", + "ng-1", + ], + "followed_by": None, + "ui_decomposition": {"mass": -1}, + }, + "per_µg": { + "scale": 1.0e9, + "terms": [ + "/microgramme", + "microgramme⁻¹", + "microgramme-1", + "/microgrammes", + "microgrammes⁻¹", + "microgrammes-1", + "/micro-gramme", + "micro-gramme⁻¹", + "micro-gramme-1", + "/micro-grammes", + "micro-grammes⁻¹", + "micro-grammes-1", + "/µgr", + "µgr⁻¹", + "µgr-1", + "/µg", + "µg⁻¹", + "µg-1", + "/ugr", + "ugr⁻¹", + "ugr-1", + "/ug", + "ug⁻¹", + "ug-1", + ], + "followed_by": None, + "ui_decomposition": {"mass": -1}, + }, + "per_mg": { + "scale": 1.0e6, + "terms": [ + "/milligramme", + "milligramme⁻¹", + "milligramme-1", + "/miligramme", + "miligramme⁻¹", + "miligramme-1", + "/milligrammes", + "milligrammes⁻¹", + "milligrammes-1", + "/miligrammes", + "miligrammes⁻¹", + "miligrammes-1", + "/mgr", + "mgr⁻¹", + "mgr-1", + "/mg", + "mg⁻¹", + "mg-1", + ], + "followed_by": None, + "ui_decomposition": {"mass": -1}, + }, + "per_cg": { + "scale": 1.0e5, + "terms": [ + "/centigramme", + "centigramme⁻¹", + "centigramme-1", + "/centigrammes", + "centigrammes⁻¹", + "centigrammes-1", + "/cg", + "cg⁻¹", + "cg-1", + "/cgr", + "cgr⁻¹", + "cgr-1", + ], + "followed_by": None, + "ui_decomposition": {"mass": -1}, + }, + "per_dg": { + "scale": 1.0e4, + "terms": [ + "/decigramme", + "decigramme⁻¹", + "decigramme-1", + "/decigrammes", + "decigrammes⁻¹", + "decigrammes-1", + "/dgr", + "dgr⁻¹", + "dgr-1", + "/dg", + "dg⁻¹", + "dg-1", + ], + "followed_by": None, + "ui_decomposition": {"mass": -1}, + }, + "per_g": { + "scale": 1000.0, + "terms": [ + "/gramme", + "gramme⁻¹", + "gramme-1", + "/grammes", + "grammes⁻¹", + "grammes-1", + "/gr", + "gr⁻¹", + "gr-1", + "/g", + "g⁻¹", + "g-1", + ], + "followed_by": None, + "ui_decomposition": {"mass": -1}, + }, + "per_dag": { + "scale": 100.0, + "terms": [ + "/decagramme", + "decagramme⁻¹", + "decagramme-1", + "/decagrammes", + "decagrammes⁻¹", + "decagrammes-1", + "/dagr", + "dagr⁻¹", + "dagr-1", + "/dag", + "dag⁻¹", + "dag-1", + ], + "followed_by": None, + "ui_decomposition": {"mass": -1}, + }, + "per_hg": { + "scale": 10.0, + "terms": [ + "/hectogramme", + "hectogramme⁻¹", + "hectogramme-1", + "/hectogrammes", + "hectogrammes⁻¹", + "hectogrammes-1", + "/hgr", + "hgr⁻¹", + "hgr-1", + "/hg", + "hg⁻¹", + "hg-1", + ], + "followed_by": None, + "ui_decomposition": {"mass": -1}, + }, + "per_kg": { + "scale": 1.0, + "terms": [ + "/kilo", + "kilo⁻¹", + "kilo-1", + "/kilogramme", + "kilogramme⁻¹", + "kilogramme-1", + "/kilogrammes", + "kilogrammes⁻¹", + "kilogrammes-1", + "/kgr", + "kgr⁻¹", + "kgr-1", + "/kg", + "kg⁻¹", + "kg-1", + ], + "followed_by": None, + "ui_decomposition": {"mass": -1}, + }, + "per_fs": { + "scale": 1.0e15, + "terms": [ + "/femtoseconde", + "femtoseconde⁻¹", + "femtoseconde-1", + "/femtosecondes", + "femtosecondes⁻¹", + "femtosecondes-1", + "/femto-seconde", + "femto-seconde⁻¹", + "femto-seconde-1", + "/femto-secondes", + "femto-secondes⁻¹", + "femto-secondes-1", + "/fs", + "fs⁻¹", + "fs-1", + ], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_ps": { + "scale": 1.0e12, + "terms": [ + "/picoseconde", + "picoseconde⁻¹", + "picoseconde-1", + "/picosecondes", + "picosecondes⁻¹", + "picosecondes-1", + "/pico-seconde", + "pico-seconde⁻¹", + "pico-seconde-1", + "/pico-secondes", + "pico-secondes⁻¹", + "pico-secondes-1", + "/ps", + "ps⁻¹", + "ps-1", + ], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_ns": { + "scale": 1.0e9, + "terms": [ + "/nanoseconde", + "nanoseconde⁻¹", + "nanoseconde-1", + "/nanosecondes", + "nanosecondes⁻¹", + "nanosecondes-1", + "/nano-seconde", + "nano-seconde⁻¹", + "nano-seconde-1", + "/nano-secondes", + "nano-secondes⁻¹", + "nano-secondes-1", + "/ns", + "ns⁻¹", + "ns-1", + ], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_µs": { + "scale": 1.0e6, + "terms": [ + "/microseconde", + "microseconde⁻¹", + "microseconde-1", + "/microsecondes", + "microsecondes⁻¹", + "microsecondes-1", + "/micro-seconde", + "micro-seconde⁻¹", + "micro-seconde-1", + "/micro-secondes", + "micro-secondes⁻¹", + "micro-secondes-1", + "/µs", + "µs⁻¹", + "µs-1", + "/us", + "us⁻¹", + "us-1", + ], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_ms": { + "scale": 1000.0, + "terms": [ + "/milliseconde", + "milliseconde⁻¹", + "milliseconde-1", + "/millisecondes", + "millisecondes⁻¹", + "millisecondes-1", + "/miliseconde", + "miliseconde⁻¹", + "miliseconde-1", + "/milisecondes", + "milisecondes⁻¹", + "milisecondes-1", + "/ms", + "ms⁻¹", + "ms-1", + ], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_s": { + "scale": 1.0, + "terms": [ + "/seconde", + "seconde⁻¹", + "seconde-1", + "/secondes", + "secondes⁻¹", + "secondes-1", + "/s", + "s⁻¹", + "s-1", + ], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_min": { + "scale": 0.016666666666666666, + "terms": [ + "/mn", + "mn⁻¹", + "mn-1", + "/min", + "min⁻¹", + "min-1", + "/minute", + "minute⁻¹", + "minute-1", + "/minutes", + "minutes⁻¹", + "minutes-1", + ], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_h": { + "scale": 0.0002777777777777778, + "terms": [ + "/heure", + "heure⁻¹", + "heure-1", + "/heures", + "heures⁻¹", + "heures-1", + "/h", + "h⁻¹", + "h-1", + ], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_day": { + "scale": 0.0002777777777777778, + "terms": [ + "/jour", + "jour⁻¹", + "jour-1", + "/jours", + "jours⁻¹", + "jours-1", + "/j", + "j⁻¹", + "j-1", + "/24h", + "24h⁻¹", + "24h-1", + ], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_month": { + "scale": 9.132410083203562e-06, + "terms": ["/mois", "mois⁻¹", "mois-1"], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_week": { + "scale": 3.968253968253968e-05, + "terms": [ + "/semaine", + "semaine⁻¹", + "semaine-1", + "/semaines", + "semaines⁻¹", + "semaines-1", + ], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_year": { + "scale": 7.605141075366948e-07, + "terms": [ + "/an", + "an⁻¹", + "an-1", + "/année", + "année⁻¹", + "année-1", + "/ans", + "ans⁻¹", + "ans-1", + "/années", + "années⁻¹", + "années-1", + ], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_arc-second": { + "scale": 30.0, + "terms": ['/"', '"⁻¹', '"-1', "/''", "''⁻¹", "''-1"], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_arc-minute": { + "scale": 0.5, + "terms": ["/'", "'⁻¹", "'-1"], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_degree": { + "scale": 0.008333333333333333, + "terms": [ + "/degre", + "degre⁻¹", + "degre-1", + "/°", + "°⁻¹", + "°-1", + "/deg", + "deg⁻¹", + "deg-1", + ], + "followed_by": None, + "ui_decomposition": {"time": -1}, + }, + "per_celcius": { + "scale": 1.0, + "terms": [ + "/°C", + "°C⁻¹", + "°C-1", + "/° celcius", + "° celcius⁻¹", + "° celcius-1", + "/celcius", + "celcius⁻¹", + "celcius-1", + ], + "followed_by": None, + "ui_decomposition": {"temperature": -1}, + }, + "per_fl": { + "scale": 1.0e18, + "terms": [ + "/femtolitre", + "femtolitre⁻¹", + "femtolitre-1", + "/femtolitres", + "femtolitres⁻¹", + "femtolitres-1", + "/femto-litre", + "femto-litre⁻¹", + "femto-litre-1", + "/femto-litres", + "femto-litres⁻¹", + "femto-litres-1", + "/fl", + "fl⁻¹", + "fl-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_pl": { + "scale": 1.0e15, + "terms": [ + "/picolitre", + "picolitre⁻¹", + "picolitre-1", + "/picolitres", + "picolitres⁻¹", + "picolitres-1", + "/pico-litre", + "pico-litre⁻¹", + "pico-litre-1", + "/pico-litres", + "pico-litres⁻¹", + "pico-litres-1", + "/pl", + "pl⁻¹", + "pl-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_nl": { + "scale": 1.0e12, + "terms": [ + "/nanolitre", + "nanolitre⁻¹", + "nanolitre-1", + "/nanolitres", + "nanolitres⁻¹", + "nanolitres-1", + "/nano-litre", + "nano-litre⁻¹", + "nano-litre-1", + "/nano-litres", + "nano-litres⁻¹", + "nano-litres-1", + "/nl", + "nl⁻¹", + "nl-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_µl": { + "scale": 1.0e9, + "terms": [ + "/microlitre", + "microlitre⁻¹", + "microlitre-1", + "/microlitres", + "microlitres⁻¹", + "microlitres-1", + "/micro-litre", + "micro-litre⁻¹", + "micro-litre-1", + "/micro-litres", + "micro-litres⁻¹", + "micro-litres-1", + "/µl", + "µl⁻¹", + "µl-1", + "/ul", + "ul⁻¹", + "ul-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_ml": { + "scale": 1.0e6, + "terms": [ + "/mililitre", + "mililitre⁻¹", + "mililitre-1", + "/millilitre", + "millilitre⁻¹", + "millilitre-1", + "/mililitres", + "mililitres⁻¹", + "mililitres-1", + "/millilitres", + "millilitres⁻¹", + "millilitres-1", + "/ml", + "ml⁻¹", + "ml-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_cl": { + "scale": 1.0e5, + "terms": [ + "/centilitre", + "centilitre⁻¹", + "centilitre-1", + "/centilitres", + "centilitres⁻¹", + "centilitres-1", + "/cl", + "cl⁻¹", + "cl-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_dl": { + "scale": 1.0e4, + "terms": [ + "/decilitre", + "decilitre⁻¹", + "decilitre-1", + "/decilitres", + "decilitres⁻¹", + "decilitres-1", + "/dl", + "dl⁻¹", + "dl-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_l": { + "scale": 1000.0, + "terms": [ + "/litre", + "litre⁻¹", + "litre-1", + "/litres", + "litres⁻¹", + "litres-1", + "/l", + "l⁻¹", + "l-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_dal": { + "scale": 100.0, + "terms": [ + "/decalitre", + "decalitre⁻¹", + "decalitre-1", + "/decalitres", + "decalitres⁻¹", + "decalitres-1", + "/dal", + "dal⁻¹", + "dal-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_hl": { + "scale": 10.0, + "terms": [ + "/hectolitre", + "hectolitre⁻¹", + "hectolitre-1", + "/hectolitres", + "hectolitres⁻¹", + "hectolitres-1", + "/hl", + "hl⁻¹", + "hl-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_kl": { + "scale": 1.0, + "terms": [ + "/kilolitre", + "kilolitre⁻¹", + "kilolitre-1", + "/kilolitres", + "kilolitres⁻¹", + "kilolitres-1", + "/kl", + "kl⁻¹", + "kl-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_cac": { + "scale": 200000000.0, + "terms": [ + "/cac", + "cac⁻¹", + "cac-1", + "/c.a.c", + "c.a.c⁻¹", + "c.a.c-1", + "/cuillere à café", + "cuillere à café⁻¹", + "cuillere à café-1", + "/cuillères à café", + "cuillères à café⁻¹", + "cuillères à café-1", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_goutte": { + "scale": 20000000000.0, + "terms": ["/gt", "gt⁻¹", "gt-1", "/goutte", "goutte⁻¹", "goutte-1"], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_fm3": { + "scale": 1.0e45, + "terms": [ + "/fm3", + "fm3⁻¹", + "fm3-1", + "/fm³", + "fm⁻³", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_pm3": { + "scale": 1.0e36, + "terms": [ + "/pm3", + "pm3⁻¹", + "pm3-1", + "/pm³", + "pm⁻³", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_nm3": { + "scale": 1.0e27, + "terms": [ + "/nm3", + "nm3⁻¹", + "nm3-1", + "/nm³", + "nm⁻³", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_µm3": { + "scale": 1.0e18, + "terms": [ + "/um3", + "um3⁻¹", + "um3-1", + "/um³", + "um⁻³", + "/µm3", + "µm3⁻¹", + "µm3-1", + "/µm³", + "µm⁻³", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_mm3": { + "scale": 1.0e9, + "terms": [ + "/mm3", + "mm3⁻¹", + "mm3-1", + "/mm³", + "mm⁻³", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_cm3": { + "scale": 1.0e6, + "terms": ["/cm3", "cm3⁻¹", "cm3-1", "/cm³", "cm⁻³", "/cc", "cc⁻¹", "cc-1"], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_dm3": { + "scale": 1000.0, + "terms": [ + "/dm3", + "dm3⁻¹", + "dm3-1", + "/dm³", + "dm⁻³", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_m3": { + "scale": 1.0, + "terms": [ + "/m3", + "m3⁻¹", + "m3-1", + "/m³", + "m⁻³", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_dam3": { + "scale": 0.001, + "terms": [ + "/dam3", + "dam3⁻¹", + "dam3-1", + "/dam³", + "dam⁻³", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_hm3": { + "scale": 1e-06, + "terms": [ + "/hm3", + "hm3⁻¹", + "hm3-1", + "/hm³", + "hm⁻³", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_km3": { + "scale": 1e-09, + "terms": [ + "/km3", + "km3⁻¹", + "km3-1", + "/km³", + "km⁻³", + ], + "followed_by": None, + "ui_decomposition": {"length": -3}, + }, + "per_fm2": { + "scale": 1.0 + 30, + "terms": [ + "/fm2", + "fm2⁻¹", + "fm2-1", + "/fm²", + "fm⁻²", + ], + "followed_by": None, + "ui_decomposition": {"length": -2}, + }, + "per_pm2": { + "scale": 1.0e24, + "terms": [ + "/fm2", + "fm2⁻¹", + "fm2-1", + "/fm²", + "fm⁻²", + ], + "followed_by": None, + "ui_decomposition": {"length": -2}, + }, + "per_nm2": { + "scale": 1.0e18, + "terms": [ + "/nm2", + "nm2⁻¹", + "nm2-1", + "/nm²", + "nm⁻²", + ], + "followed_by": None, + "ui_decomposition": {"length": -2}, + }, + "per_μm2": { + "scale": 1.0e12, + "terms": [ + "/µm2", + "µm2⁻¹", + "µm2-1", + "/µm²", + "µm⁻²", + "/um2", + "um2⁻¹", + "um2-1", + "/um²", + "um⁻²", + ], + "followed_by": None, + "ui_decomposition": {"length": -2}, + }, + "per_mm2": { + "scale": 1.0e6, + "terms": [ + "/mm2", + "mm2⁻¹", + "mm2-1", + "/mm²", + "mm⁻²", + ], + "followed_by": None, + "ui_decomposition": {"length": -2}, + }, + "per_cm2": { + "scale": 1.0e4, + "terms": [ + "/cm2", + "cm2⁻¹", + "cm2-1", + "/cm²", + "cm⁻²", + ], + "followed_by": None, + "ui_decomposition": {"length": -2}, + }, + "per_dm2": { + "scale": 100.0, + "terms": [ + "/dm2", + "dm2⁻¹", + "dm2-1", + "/dm²", + "dm⁻²", + ], + "followed_by": None, + "ui_decomposition": {"length": -2}, + }, + "per_m2": { + "scale": 1.0, + "terms": [ + "/m2", + "m2⁻¹", + "m2-1", + "/m²", + "m⁻²", + ], + "followed_by": None, + "ui_decomposition": {"length": -2}, + }, + "per_dam2": { + "scale": 0.01, + "terms": [ + "/dam2", + "dam2⁻¹", + "dam2-1", + "/dam²", + "dam⁻²", + ], + "followed_by": None, + "ui_decomposition": {"length": -2}, + }, + "per_hm2": { + "scale": 1.0e-4, + "terms": [ + "/hm2", + "hm2⁻¹", + "hm2-1", + "/hm²", + "hm⁻²", + ], + "followed_by": None, + "ui_decomposition": {"length": -2}, + }, + "per_km2": { + "scale": 1e-06, + "terms": [ + "/km2", + "km2⁻¹", + "km2-1", + "/km²", + "km⁻²", + ], + "followed_by": None, + "ui_decomposition": {"length": -2}, + }, + "per_fui": { + "scale": 1.0e15, + "terms": [ + "/fui", + "fui⁻¹", + "fui-1", + "/f ui", + "f ui⁻¹", + "f ui-1", + "/fu", + "fu⁻¹", + "fu-1", + "/f u", + "f u⁻¹", + "f u-1", + ], + "followed_by": None, + "ui_decomposition": {"ui": -1}, + }, + "per_pui": { + "scale": 1.0e12, + "terms": [ + "/pui", + "pui⁻¹", + "pui-1", + "/p ui", + "p ui⁻¹", + "p ui-1", + "/pu", + "pu⁻¹", + "pu-1", + "/p u", + "p u⁻¹", + "p u-1", + ], + "followed_by": None, + "ui_decomposition": {"ui": -1}, + }, + "per_nui": { + "scale": 1.0e9, + "terms": [ + "/nui", + "nui⁻¹", + "nui-1", + "/n ui", + "n ui⁻¹", + "n ui-1", + "/nu", + "nu⁻¹", + "nu-1", + "/n u", + "n u⁻¹", + "n u-1", + ], + "followed_by": None, + "ui_decomposition": {"ui": -1}, + }, + "per_µui": { + "scale": 1.0e6, + "terms": [ + "/µui", + "µui⁻¹", + "µui-1", + "/µ ui", + "µ ui⁻¹", + "µ ui-1", + "/uui", + "uui⁻¹", + "uui-1", + "/u ui", + "u ui⁻¹", + "u ui-1", + "/µu", + "µu⁻¹", + "µu-1", + "/µ u", + "µ u⁻¹", + "µ u-1", + "/uu", + "uu⁻¹", + "uu-1", + "/u u", + "u u⁻¹", + "u u-1", + ], + "followed_by": None, + "ui_decomposition": {"ui": -1}, + }, + "per_mui": { + "scale": 1000.0, + "terms": [ + "/mui", + "mui⁻¹", + "mui-1", + "/m ui", + "m ui⁻¹", + "m ui-1", + "/mu", + "mu⁻¹", + "mu-1", + "/m u", + "m u⁻¹", + "m u-1", + ], + "followed_by": None, + "ui_decomposition": {"ui": -1}, + }, + "per_cui": { + "scale": 100.0, + "terms": [ + "/cui", + "cui⁻¹", + "cui-1", + "/c ui", + "c ui⁻¹", + "c ui-1", + "/cu", + "cu⁻¹", + "cu-1", + "/c u", + "c u⁻¹", + "c u-1", + ], + "followed_by": None, + "ui_decomposition": {"ui": -1}, + }, + "per_dui": { + "scale": 10.0, + "terms": [ + "/dui", + "dui⁻¹", + "dui-1", + "/d ui", + "d ui⁻¹", + "d ui-1", + "/du", + "du⁻¹", + "du-1", + "/d u", + "d u⁻¹", + "d u-1", + ], + "followed_by": None, + "ui_decomposition": {"ui": -1}, + }, + "per_ui": { + "scale": 1.0, + "terms": ["/ui", "ui⁻¹", "ui-1", "/u", "u⁻¹", "u-1"], + "followed_by": None, + "ui_decomposition": {"ui": -1}, + }, + "per_daui": { + "scale": 0.1, + "terms": ["/daui", "daui⁻¹", "daui-1", "/dau", "dau⁻¹", "dau-1"], + "followed_by": None, + "ui_decomposition": {"ui": -1}, + }, + "per_hui": { + "scale": 0.01, + "terms": ["/hui", "hui⁻¹", "hui-1", "/hu", "hu⁻¹", "hu-1"], + "followed_by": None, + "ui_decomposition": {"ui": -1}, + }, + "per_kui": { + "scale": 0.001, + "terms": ["/kui", "kui⁻¹", "kui-1", "/ku", "ku⁻¹", "ku-1"], + "followed_by": None, + "ui_decomposition": {"ui": -1}, + }, + "per_fmol": { + "scale": 1.0e15, + "terms": [ + "/fmol", + "fmol⁻¹", + "fmol-1", + "/f mol", + "f mol⁻¹", + "f mol-1", + "/fmole", + "fmole⁻¹", + "fmole-1", + "/f mole", + "f mole⁻¹", + "f mole-1", + ], + "followed_by": None, + "ui_decomposition": {"nsubstance": -1}, + }, + "per_pmol": { + "scale": 1.0e12, + "terms": [ + "/pmol", + "pmol⁻¹", + "pmol-1", + "/p mol", + "p mol⁻¹", + "p mol-1", + "/pmole", + "pmole⁻¹", + "pmole-1", + "/p mole", + "p mole⁻¹", + "p mole-1", + ], + "followed_by": None, + "ui_decomposition": {"nsubstance": -1}, + }, + "per_nmol": { + "scale": 1.0e9, + "terms": [ + "/nmol", + "nmol⁻¹", + "nmol-1", + "/n mol", + "n mol⁻¹", + "n mol-1", + "/nmole", + "nmole⁻¹", + "nmole-1", + "/n mole", + "n mole⁻¹", + "n mole-1", + ], + "followed_by": None, + "ui_decomposition": {"nsubstance": -1}, + }, + "per_µmol": { + "scale": 1.0e6, + "terms": [ + "/µmol", + "µmol⁻¹", + "µmol-1", + "/µ mol", + "µ mol⁻¹", + "µ mol-1", + "/umole", + "umole⁻¹", + "umole-1", + "/u mole", + "u mole⁻¹", + "u mole-1", + ], + "followed_by": None, + "ui_decomposition": {"nsubstance": -1}, + }, + "per_mmol": { + "scale": 1000.0, + "terms": [ + "/mmol", + "mmol⁻¹", + "mmol-1", + "/m mol", + "m mol⁻¹", + "m mol-1", + "/mmole", + "mmole⁻¹", + "mmole-1", + "/m mole", + "m mole⁻¹", + "m mole-1", + ], + "followed_by": None, + "ui_decomposition": {"nsubstance": -1}, + }, + "per_cmol": { + "scale": 100.0, + "terms": [ + "/cmol", + "cmol⁻¹", + "cmol-1", + "/c mol", + "c mol⁻¹", + "c mol-1", + "/cmole", + "cmole⁻¹", + "cmole-1", + "/c mole", + "c mole⁻¹", + "c mole-1", + ], + "followed_by": None, + "ui_decomposition": {"nsubstance": -1}, + }, + "per_dmol": { + "scale": 10.0, + "terms": [ + "/dmol", + "dmol⁻¹", + "dmol-1", + "/d mol", + "d mol⁻¹", + "d mol-1", + "/dmole", + "dmole⁻¹", + "dmole-1", + "/d mole", + "d mole⁻¹", + "d mole-1", + ], + "followed_by": None, + "ui_decomposition": {"nsubstance": -1}, + }, + "per_mol": { + "scale": 1.0, + "terms": ["/mol", "mol⁻¹", "mol-1", "/mole", "mole⁻¹", "mole-1"], + "followed_by": None, + "ui_decomposition": {"nsubstance": -1}, + }, + "per_damol": { + "scale": 0.1, + "terms": [ + "/damol", + "damol⁻¹", + "damol-1", + "/da mol", + "da mol⁻¹", + "da mol-1", + "/damole", + "damole⁻¹", + "damole-1", + "/da mole", + "da mole⁻¹", + "da mole-1", + ], + "followed_by": None, + "ui_decomposition": {"nsubstance": -1}, + }, + "per_hmol": { + "scale": 0.01, + "terms": [ + "/hmol", + "hmol⁻¹", + "hmol-1", + "/h mol", + "h mol⁻¹", + "h mol-1", + "/hmole", + "hmole⁻¹", + "hmole-1", + "/h mole", + "h mole⁻¹", + "h mole-1", + ], + "followed_by": None, + "ui_decomposition": {"nsubstance": -1}, + }, + "per_kmol": { + "scale": 0.001, + "terms": [ + "/kmol", + "kmol⁻¹", + "kmol-1", + "/k mol", + "k mol⁻¹", + "k mol-1", + "/kmole", + "kmole⁻¹", + "kmole-1", + "/k mole", + "k mole⁻¹", + "k mole-1", + ], + "followed_by": None, + "ui_decomposition": {"nsubstance": -1}, + }, + "nounit": { + "scale": 1, + "terms": [], + "followed_by": None, + "ui_decomposition": {"nounit": 1}, + }, + "percent": { + "scale": 0.01, + "terms": [ + "%", + "pourcent", + "pourcents", + ], + "followed_by": None, + "ui_decomposition": {"nounit": 1}, + }, + "permille": { + "scale": 0.001, + "terms": [ + "‰", + "pourmille", + "pour mille", + "pourmilles", + "pour milles", + ], + "followed_by": None, + "ui_decomposition": {"nounit": 1}, + }, + "mmhg": { + "scale": 133.3224, + "terms": [ + "mmhg", + "torr", + ], + "followed_by": None, + "ui_decomposition": {"mass": 1, "length": -1, "time": -2}, + }, + "bool": { + "scale": 1, + "terms": [], + "followed_by": None, + "ui_decomposition": {"bool": 1}, + }, +} + +range_patterns = [ + ("De", "à"), + ("De", "a"), + ("de", "à"), + ("de", "a"), + ("Entre", "et"), + ("entre", "et"), + (None, "à"), +] diff --git a/edsnlp/pipelines/misc/reason/__init__.py b/edsnlp/edsnlp/pipelines/misc/reason/__init__.py similarity index 100% rename from edsnlp/pipelines/misc/reason/__init__.py rename to edsnlp/edsnlp/pipelines/misc/reason/__init__.py diff --git a/edsnlp/pipelines/misc/reason/factory.py b/edsnlp/edsnlp/pipelines/misc/reason/factory.py similarity index 100% rename from edsnlp/pipelines/misc/reason/factory.py rename to edsnlp/edsnlp/pipelines/misc/reason/factory.py diff --git a/edsnlp/pipelines/misc/reason/patterns.py b/edsnlp/edsnlp/pipelines/misc/reason/patterns.py similarity index 100% rename from edsnlp/pipelines/misc/reason/patterns.py rename to edsnlp/edsnlp/pipelines/misc/reason/patterns.py diff --git a/edsnlp/pipelines/misc/reason/reason.py b/edsnlp/edsnlp/pipelines/misc/reason/reason.py similarity index 100% rename from edsnlp/pipelines/misc/reason/reason.py rename to edsnlp/edsnlp/pipelines/misc/reason/reason.py diff --git a/edsnlp/pipelines/misc/sections/__init__.py b/edsnlp/edsnlp/pipelines/misc/sections/__init__.py similarity index 100% rename from edsnlp/pipelines/misc/sections/__init__.py rename to edsnlp/edsnlp/pipelines/misc/sections/__init__.py diff --git a/edsnlp/pipelines/misc/sections/factory.py b/edsnlp/edsnlp/pipelines/misc/sections/factory.py similarity index 100% rename from edsnlp/pipelines/misc/sections/factory.py rename to edsnlp/edsnlp/pipelines/misc/sections/factory.py diff --git a/edsnlp/pipelines/misc/sections/patterns.py b/edsnlp/edsnlp/pipelines/misc/sections/patterns.py similarity index 100% rename from edsnlp/pipelines/misc/sections/patterns.py rename to edsnlp/edsnlp/pipelines/misc/sections/patterns.py diff --git a/edsnlp/pipelines/misc/sections/sections.py b/edsnlp/edsnlp/pipelines/misc/sections/sections.py similarity index 100% rename from edsnlp/pipelines/misc/sections/sections.py rename to edsnlp/edsnlp/pipelines/misc/sections/sections.py diff --git a/edsnlp/edsnlp/pipelines/misc/tables/__init__.py b/edsnlp/edsnlp/pipelines/misc/tables/__init__.py new file mode 100644 index 000000000..861ef7628 --- /dev/null +++ b/edsnlp/edsnlp/pipelines/misc/tables/__init__.py @@ -0,0 +1,2 @@ +from .patterns import regex, sep +from .tables import TablesMatcher diff --git a/edsnlp/edsnlp/pipelines/misc/tables/factory.py b/edsnlp/edsnlp/pipelines/misc/tables/factory.py new file mode 100644 index 000000000..d796a56c5 --- /dev/null +++ b/edsnlp/edsnlp/pipelines/misc/tables/factory.py @@ -0,0 +1,38 @@ +from typing import List, Optional + +from spacy.language import Language + +from edsnlp.pipelines.misc.tables import TablesMatcher +from edsnlp.utils.deprecation import deprecated_factory + +DEFAULT_CONFIG = dict( + tables_pattern=None, + sep_pattern=None, + attr="TEXT", + ignore_excluded=True, + col_names=False, + row_names=False, +) + + +@deprecated_factory("tables", "eds.tables", default_config=DEFAULT_CONFIG) +@Language.factory("eds.tables", default_config=DEFAULT_CONFIG) +def create_component( + nlp: Language, + name: str, + tables_pattern: Optional[List[str]], + sep_pattern: Optional[List[str]], + attr: str, + ignore_excluded: bool, + col_names: Optional[bool] = False, + row_names: Optional[bool] = False, +): + return TablesMatcher( + nlp, + tables_pattern=tables_pattern, + sep_pattern=sep_pattern, + attr=attr, + ignore_excluded=ignore_excluded, + col_names=col_names, + row_names=row_names, + ) diff --git a/edsnlp/edsnlp/pipelines/misc/tables/patterns.py b/edsnlp/edsnlp/pipelines/misc/tables/patterns.py new file mode 100644 index 000000000..233bc47b6 --- /dev/null +++ b/edsnlp/edsnlp/pipelines/misc/tables/patterns.py @@ -0,0 +1,2 @@ +sep = [r"¦", r"|"] +regex = [r"(?:¦?(?:[^¦\n]*¦)+[^¦\n]*¦?\n)+", r"(?:\|?(?:[^\|\n]*\|)+[^\|\n]*\|?\n)+"] diff --git a/edsnlp/edsnlp/pipelines/misc/tables/tables.py b/edsnlp/edsnlp/pipelines/misc/tables/tables.py new file mode 100644 index 000000000..a9f82b562 --- /dev/null +++ b/edsnlp/edsnlp/pipelines/misc/tables/tables.py @@ -0,0 +1,268 @@ +from typing import List, Optional + +import pandas as pd +from spacy.language import Language +from spacy.tokens import Doc, Span + +from edsnlp.matchers.phrase import EDSPhraseMatcher +from edsnlp.matchers.regex import RegexMatcher +from edsnlp.pipelines.misc.tables import patterns + + +class TablesMatcher: + """ + Pipeline to identify the Tables. + + It adds the key `tables` to doc.spans. + + Parameters + ---------- + nlp : Language + spaCy nlp pipeline to use for matching. + tables_pattern : Optional[List[str]] + The regex patterns to identify tables. + sep_pattern : Optional[List[str]] + The regex patterns to identify separators + in the detected tables + col_names : Optional[bool] + Whether the tables_pattern matches column names + row_names : Optional[bool] + Whether the table_pattern matches row names + attr : str + spaCy's attribute to use: + a string with the value "TEXT" or "NORM", or a dict with + the key 'term_attr'. We can also add a key for each regex. + ignore_excluded : bool + Whether to skip excluded tokens. + """ + + def __init__( + self, + nlp: Language, + tables_pattern: Optional[List[str]], + sep_pattern: Optional[List[str]], + attr: str, + ignore_excluded: bool, + col_names: Optional[bool] = False, + row_names: Optional[bool] = False, + ): + if tables_pattern is None: + tables_pattern = patterns.regex + + if sep_pattern is None: + sep_pattern = patterns.sep + + self.regex_matcher = RegexMatcher(attr=attr, ignore_excluded=True) + self.regex_matcher.add("table", tables_pattern) + + self.term_matcher = EDSPhraseMatcher(nlp.vocab, attr=attr, ignore_excluded=True) + self.term_matcher.build_patterns( + nlp, + { + "eol_pattern": "\n", + "sep_pattern": sep_pattern, + }, + ) + + self.col_names = col_names + self.row_names = row_names + + if not Span.has_extension("to_pd_table"): + Span.set_extension("to_pd_table", method=self.to_pd_table) + + self.set_extensions() + + @classmethod + def set_extensions(cls) -> None: + """ + Set extensions for the tables pipeline. + """ + + if not Span.has_extension("table"): + Span.set_extension("table", default=None) + + def get_tables(self, matches): + """ + Convert spans of tables to dictionnaries + + Parameters + ---------- + matches : List[Span] + + Returns + ------- + List[Span] + """ + + # Dictionnaries linked to each table + # Has the following format : + # List[Dict[Union[str, int], List[Span]]] + # - List of dictionnaries containing the tables. Keys are + # column names (str) if col_names is set to True, else row + # names (str) if row_names is set to True, else index of + # column (int) + tables_list = [] + + # Returned list + tables = [] + + # Iter through matches to consider each table individually + for table in matches: + # We store each row in a list and store each of hese lists + # in processed_table for post processing + # considering the self.col_names and self.row_names var + processed_table = [] + delimiters = [ + delimiter + for delimiter in self.term_matcher(table, as_spans=True) + if delimiter.start >= table.start and delimiter.end <= table.end + ] + + last = table.start + row = [] + # Parse the table to match each cell thanks to delimiters + for delimiter in delimiters: + row.append(table[last - table.start : delimiter.start - table.start]) + last = delimiter.end + + # End the actual row if there is an end of line + if delimiter.label_ == "eol_pattern": + processed_table.append(row) + row = [] + + # Remove first or last column in case the separator pattern is + # also used in the raw table to draw the outlines + if all(row[0].start == row[0].end for row in processed_table): + processed_table = [row[1:] for row in processed_table] + if all(row[-1].start == row[-1].end for row in processed_table): + processed_table = [row[:-1] for row in processed_table] + + # Check if all rows have the same dimension. + # If not, try to merge neighbour rows + # to find a new table + row_len = len(processed_table[0]) + if not all(len(row) == row_len for row in processed_table): + # Method to find all possible lengths of the rows + def divisors(n): + result = set() + for i in range(1, int(n**0.5) + 1): + if n % i == 0: + result.add(i) + result.add(n // i) + return sorted(list(result)) + + # Do not count the column names when splitting the table + if self.col_names: + n_rows = len(processed_table) - 1 + else: + n_rows = len(processed_table) + + for n_rows_to_merge in divisors(n_rows): + row_len = sum(len(row) for row in processed_table[:n_rows_to_merge]) + if all( + sum( + len(row) + for row in processed_table[ + i * n_rows_to_merge : (i + 1) * n_rows_to_merge + ] + ) + == row_len + for i in range(n_rows // n_rows_to_merge) + ): + new_table = [] + for i in range(n_rows // n_rows_to_merge): + # Init new_row with the first subrow + new_row = processed_table[i * n_rows_to_merge] + if new_row: + for subrow in processed_table[ + i * n_rows_to_merge + 1 : (i + 1) * n_rows_to_merge + ]: + if subrow: + new_row = ( + new_row[:-1] + + [ + table[ + new_row[-1].start + - table.start : subrow[0].end + - table.start + ] + ] + + subrow[1:] + ) + new_table.append(new_row) + tables_list.append(new_table) + break + continue + else: + tables_list.append(processed_table) + + # Convert to dictionnaries according to self.col_names + # and self.row_names + if self.col_names: + for table_index in range(len(tables_list)): + tables_list[table_index] = { + tables_list[table_index][0][column_index].text: [ + tables_list[table_index][row_index][column_index] + for row_index in range(1, len(tables_list[table_index])) + ] + for column_index in range(len(tables_list[table_index][0])) + } + elif self.row_names: + for table_index in range(len(tables_list)): + tables_list[table_index] = { + tables_list[table_index][row_index][0].text: [ + tables_list[table_index][row_index][column_index] + for column_index in range(1, len(tables_list[table_index][0])) + ] + for row_index in range(len(tables_list[table_index])) + } + else: + for table_index in range(len(tables_list)): + tables_list[table_index] = { + column_index: [ + tables_list[table_index][row_index][column_index] + for row_index in range(len(tables_list[table_index])) + ] + for column_index in range(len(tables_list[table_index][0])) + } + + for i in range(len(matches)): + ent = matches[i] + ent._.table = tables_list[i] + tables.append(ent) + + return tables + + def __call__(self, doc: Doc) -> Doc: + """ + Find spans that contain tables + + Parameters + ---------- + doc : Doc + + Returns + ------- + Doc + """ + matches = list(self.regex_matcher(doc, as_spans=True)) + tables = self.get_tables(matches) + doc.spans["tables"] = tables + + return doc + + def to_pd_table(self, span, as_spans=True) -> pd.DataFrame: + """ + Return pandas DataFrame + """ + if as_spans: + table = span._.table + else: + table = { + key: [str(cell) for cell in data] + for key, data in list(span._.table.items()) + } + if self.row_names: + return pd.DataFrame.from_dict(table, orient="index") + else: + return pd.DataFrame.from_dict(table) diff --git a/edsnlp/pipelines/misc/dates/patterns/atomic/__init__.py b/edsnlp/edsnlp/pipelines/ner/__init__.py similarity index 100% rename from edsnlp/pipelines/misc/dates/patterns/atomic/__init__.py rename to edsnlp/edsnlp/pipelines/ner/__init__.py diff --git a/edsnlp/pipelines/ner/__init__.py b/edsnlp/edsnlp/pipelines/ner/adicap/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/__init__.py rename to edsnlp/edsnlp/pipelines/ner/adicap/__init__.py diff --git a/edsnlp/pipelines/ner/adicap/adicap.py b/edsnlp/edsnlp/pipelines/ner/adicap/adicap.py similarity index 100% rename from edsnlp/pipelines/ner/adicap/adicap.py rename to edsnlp/edsnlp/pipelines/ner/adicap/adicap.py diff --git a/edsnlp/pipelines/ner/adicap/factory.py b/edsnlp/edsnlp/pipelines/ner/adicap/factory.py similarity index 100% rename from edsnlp/pipelines/ner/adicap/factory.py rename to edsnlp/edsnlp/pipelines/ner/adicap/factory.py diff --git a/edsnlp/pipelines/ner/adicap/models.py b/edsnlp/edsnlp/pipelines/ner/adicap/models.py similarity index 100% rename from edsnlp/pipelines/ner/adicap/models.py rename to edsnlp/edsnlp/pipelines/ner/adicap/models.py diff --git a/edsnlp/pipelines/ner/adicap/patterns.py b/edsnlp/edsnlp/pipelines/ner/adicap/patterns.py similarity index 100% rename from edsnlp/pipelines/ner/adicap/patterns.py rename to edsnlp/edsnlp/pipelines/ner/adicap/patterns.py diff --git a/edsnlp/pipelines/ner/adicap/__init__.py b/edsnlp/edsnlp/pipelines/ner/cim10/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/adicap/__init__.py rename to edsnlp/edsnlp/pipelines/ner/cim10/__init__.py diff --git a/edsnlp/pipelines/ner/cim10/factory.py b/edsnlp/edsnlp/pipelines/ner/cim10/factory.py similarity index 100% rename from edsnlp/pipelines/ner/cim10/factory.py rename to edsnlp/edsnlp/pipelines/ner/cim10/factory.py diff --git a/edsnlp/pipelines/ner/cim10/patterns.py b/edsnlp/edsnlp/pipelines/ner/cim10/patterns.py similarity index 100% rename from edsnlp/pipelines/ner/cim10/patterns.py rename to edsnlp/edsnlp/pipelines/ner/cim10/patterns.py diff --git a/edsnlp/pipelines/ner/cim10/__init__.py b/edsnlp/edsnlp/pipelines/ner/covid/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/cim10/__init__.py rename to edsnlp/edsnlp/pipelines/ner/covid/__init__.py diff --git a/edsnlp/pipelines/ner/covid/factory.py b/edsnlp/edsnlp/pipelines/ner/covid/factory.py similarity index 100% rename from edsnlp/pipelines/ner/covid/factory.py rename to edsnlp/edsnlp/pipelines/ner/covid/factory.py diff --git a/edsnlp/pipelines/ner/covid/patterns.py b/edsnlp/edsnlp/pipelines/ner/covid/patterns.py similarity index 100% rename from edsnlp/pipelines/ner/covid/patterns.py rename to edsnlp/edsnlp/pipelines/ner/covid/patterns.py diff --git a/edsnlp/pipelines/ner/covid/__init__.py b/edsnlp/edsnlp/pipelines/ner/drugs/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/covid/__init__.py rename to edsnlp/edsnlp/pipelines/ner/drugs/__init__.py diff --git a/edsnlp/pipelines/ner/drugs/factory.py b/edsnlp/edsnlp/pipelines/ner/drugs/factory.py similarity index 100% rename from edsnlp/pipelines/ner/drugs/factory.py rename to edsnlp/edsnlp/pipelines/ner/drugs/factory.py diff --git a/edsnlp/pipelines/ner/drugs/patterns.py b/edsnlp/edsnlp/pipelines/ner/drugs/patterns.py similarity index 100% rename from edsnlp/pipelines/ner/drugs/patterns.py rename to edsnlp/edsnlp/pipelines/ner/drugs/patterns.py diff --git a/edsnlp/pipelines/ner/scores/__init__.py b/edsnlp/edsnlp/pipelines/ner/scores/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/scores/__init__.py rename to edsnlp/edsnlp/pipelines/ner/scores/__init__.py diff --git a/edsnlp/pipelines/ner/scores/base_score.py b/edsnlp/edsnlp/pipelines/ner/scores/base_score.py similarity index 100% rename from edsnlp/pipelines/ner/scores/base_score.py rename to edsnlp/edsnlp/pipelines/ner/scores/base_score.py diff --git a/edsnlp/pipelines/ner/drugs/__init__.py b/edsnlp/edsnlp/pipelines/ner/scores/charlson/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/drugs/__init__.py rename to edsnlp/edsnlp/pipelines/ner/scores/charlson/__init__.py diff --git a/edsnlp/pipelines/ner/scores/charlson/factory.py b/edsnlp/edsnlp/pipelines/ner/scores/charlson/factory.py similarity index 100% rename from edsnlp/pipelines/ner/scores/charlson/factory.py rename to edsnlp/edsnlp/pipelines/ner/scores/charlson/factory.py diff --git a/edsnlp/pipelines/ner/scores/charlson/patterns.py b/edsnlp/edsnlp/pipelines/ner/scores/charlson/patterns.py similarity index 100% rename from edsnlp/pipelines/ner/scores/charlson/patterns.py rename to edsnlp/edsnlp/pipelines/ner/scores/charlson/patterns.py diff --git a/edsnlp/pipelines/ner/scores/charlson/__init__.py b/edsnlp/edsnlp/pipelines/ner/scores/elstonellis/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/scores/charlson/__init__.py rename to edsnlp/edsnlp/pipelines/ner/scores/elstonellis/__init__.py diff --git a/edsnlp/pipelines/ner/scores/elstonellis/factory.py b/edsnlp/edsnlp/pipelines/ner/scores/elstonellis/factory.py similarity index 100% rename from edsnlp/pipelines/ner/scores/elstonellis/factory.py rename to edsnlp/edsnlp/pipelines/ner/scores/elstonellis/factory.py diff --git a/edsnlp/pipelines/ner/scores/elstonellis/patterns.py b/edsnlp/edsnlp/pipelines/ner/scores/elstonellis/patterns.py similarity index 100% rename from edsnlp/pipelines/ner/scores/elstonellis/patterns.py rename to edsnlp/edsnlp/pipelines/ner/scores/elstonellis/patterns.py diff --git a/edsnlp/pipelines/ner/scores/elstonellis/__init__.py b/edsnlp/edsnlp/pipelines/ner/scores/emergency/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/scores/elstonellis/__init__.py rename to edsnlp/edsnlp/pipelines/ner/scores/emergency/__init__.py diff --git a/edsnlp/pipelines/ner/scores/emergency/__init__.py b/edsnlp/edsnlp/pipelines/ner/scores/emergency/ccmu/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/scores/emergency/__init__.py rename to edsnlp/edsnlp/pipelines/ner/scores/emergency/ccmu/__init__.py diff --git a/edsnlp/pipelines/ner/scores/emergency/ccmu/factory.py b/edsnlp/edsnlp/pipelines/ner/scores/emergency/ccmu/factory.py similarity index 100% rename from edsnlp/pipelines/ner/scores/emergency/ccmu/factory.py rename to edsnlp/edsnlp/pipelines/ner/scores/emergency/ccmu/factory.py diff --git a/edsnlp/pipelines/ner/scores/emergency/ccmu/patterns.py b/edsnlp/edsnlp/pipelines/ner/scores/emergency/ccmu/patterns.py similarity index 100% rename from edsnlp/pipelines/ner/scores/emergency/ccmu/patterns.py rename to edsnlp/edsnlp/pipelines/ner/scores/emergency/ccmu/patterns.py diff --git a/edsnlp/pipelines/ner/scores/emergency/ccmu/__init__.py b/edsnlp/edsnlp/pipelines/ner/scores/emergency/gemsa/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/scores/emergency/ccmu/__init__.py rename to edsnlp/edsnlp/pipelines/ner/scores/emergency/gemsa/__init__.py diff --git a/edsnlp/pipelines/ner/scores/emergency/gemsa/factory.py b/edsnlp/edsnlp/pipelines/ner/scores/emergency/gemsa/factory.py similarity index 100% rename from edsnlp/pipelines/ner/scores/emergency/gemsa/factory.py rename to edsnlp/edsnlp/pipelines/ner/scores/emergency/gemsa/factory.py diff --git a/edsnlp/pipelines/ner/scores/emergency/gemsa/patterns.py b/edsnlp/edsnlp/pipelines/ner/scores/emergency/gemsa/patterns.py similarity index 100% rename from edsnlp/pipelines/ner/scores/emergency/gemsa/patterns.py rename to edsnlp/edsnlp/pipelines/ner/scores/emergency/gemsa/patterns.py diff --git a/edsnlp/pipelines/ner/scores/emergency/gemsa/__init__.py b/edsnlp/edsnlp/pipelines/ner/scores/emergency/priority/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/scores/emergency/gemsa/__init__.py rename to edsnlp/edsnlp/pipelines/ner/scores/emergency/priority/__init__.py diff --git a/edsnlp/pipelines/ner/scores/emergency/priority/factory.py b/edsnlp/edsnlp/pipelines/ner/scores/emergency/priority/factory.py similarity index 100% rename from edsnlp/pipelines/ner/scores/emergency/priority/factory.py rename to edsnlp/edsnlp/pipelines/ner/scores/emergency/priority/factory.py diff --git a/edsnlp/pipelines/ner/scores/emergency/priority/patterns.py b/edsnlp/edsnlp/pipelines/ner/scores/emergency/priority/patterns.py similarity index 100% rename from edsnlp/pipelines/ner/scores/emergency/priority/patterns.py rename to edsnlp/edsnlp/pipelines/ner/scores/emergency/priority/patterns.py diff --git a/edsnlp/pipelines/ner/scores/factory.py b/edsnlp/edsnlp/pipelines/ner/scores/factory.py similarity index 100% rename from edsnlp/pipelines/ner/scores/factory.py rename to edsnlp/edsnlp/pipelines/ner/scores/factory.py diff --git a/edsnlp/pipelines/ner/scores/sofa/__init__.py b/edsnlp/edsnlp/pipelines/ner/scores/sofa/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/scores/sofa/__init__.py rename to edsnlp/edsnlp/pipelines/ner/scores/sofa/__init__.py diff --git a/edsnlp/pipelines/ner/scores/sofa/factory.py b/edsnlp/edsnlp/pipelines/ner/scores/sofa/factory.py similarity index 100% rename from edsnlp/pipelines/ner/scores/sofa/factory.py rename to edsnlp/edsnlp/pipelines/ner/scores/sofa/factory.py diff --git a/edsnlp/pipelines/ner/scores/sofa/patterns.py b/edsnlp/edsnlp/pipelines/ner/scores/sofa/patterns.py similarity index 100% rename from edsnlp/pipelines/ner/scores/sofa/patterns.py rename to edsnlp/edsnlp/pipelines/ner/scores/sofa/patterns.py diff --git a/edsnlp/pipelines/ner/scores/sofa/sofa.py b/edsnlp/edsnlp/pipelines/ner/scores/sofa/sofa.py similarity index 100% rename from edsnlp/pipelines/ner/scores/sofa/sofa.py rename to edsnlp/edsnlp/pipelines/ner/scores/sofa/sofa.py diff --git a/edsnlp/pipelines/ner/scores/tnm/__init__.py b/edsnlp/edsnlp/pipelines/ner/scores/tnm/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/scores/tnm/__init__.py rename to edsnlp/edsnlp/pipelines/ner/scores/tnm/__init__.py diff --git a/edsnlp/pipelines/ner/scores/tnm/factory.py b/edsnlp/edsnlp/pipelines/ner/scores/tnm/factory.py similarity index 100% rename from edsnlp/pipelines/ner/scores/tnm/factory.py rename to edsnlp/edsnlp/pipelines/ner/scores/tnm/factory.py diff --git a/edsnlp/pipelines/ner/scores/tnm/models.py b/edsnlp/edsnlp/pipelines/ner/scores/tnm/models.py similarity index 100% rename from edsnlp/pipelines/ner/scores/tnm/models.py rename to edsnlp/edsnlp/pipelines/ner/scores/tnm/models.py diff --git a/edsnlp/pipelines/ner/scores/tnm/patterns.py b/edsnlp/edsnlp/pipelines/ner/scores/tnm/patterns.py similarity index 100% rename from edsnlp/pipelines/ner/scores/tnm/patterns.py rename to edsnlp/edsnlp/pipelines/ner/scores/tnm/patterns.py diff --git a/edsnlp/pipelines/ner/scores/tnm/tnm.py b/edsnlp/edsnlp/pipelines/ner/scores/tnm/tnm.py similarity index 100% rename from edsnlp/pipelines/ner/scores/tnm/tnm.py rename to edsnlp/edsnlp/pipelines/ner/scores/tnm/tnm.py diff --git a/edsnlp/pipelines/ner/scores/emergency/priority/__init__.py b/edsnlp/edsnlp/pipelines/ner/umls/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/scores/emergency/priority/__init__.py rename to edsnlp/edsnlp/pipelines/ner/umls/__init__.py diff --git a/edsnlp/pipelines/ner/umls/factory.py b/edsnlp/edsnlp/pipelines/ner/umls/factory.py similarity index 100% rename from edsnlp/pipelines/ner/umls/factory.py rename to edsnlp/edsnlp/pipelines/ner/umls/factory.py diff --git a/edsnlp/pipelines/ner/umls/patterns.py b/edsnlp/edsnlp/pipelines/ner/umls/patterns.py similarity index 100% rename from edsnlp/pipelines/ner/umls/patterns.py rename to edsnlp/edsnlp/pipelines/ner/umls/patterns.py diff --git a/edsnlp/pipelines/ner/umls/__init__.py b/edsnlp/edsnlp/pipelines/qualifiers/__init__.py similarity index 100% rename from edsnlp/pipelines/ner/umls/__init__.py rename to edsnlp/edsnlp/pipelines/qualifiers/__init__.py diff --git a/edsnlp/pipelines/qualifiers/base.py b/edsnlp/edsnlp/pipelines/qualifiers/base.py similarity index 100% rename from edsnlp/pipelines/qualifiers/base.py rename to edsnlp/edsnlp/pipelines/qualifiers/base.py diff --git a/edsnlp/pipelines/qualifiers/factories.py b/edsnlp/edsnlp/pipelines/qualifiers/factories.py similarity index 100% rename from edsnlp/pipelines/qualifiers/factories.py rename to edsnlp/edsnlp/pipelines/qualifiers/factories.py diff --git a/edsnlp/pipelines/qualifiers/family/__init__.py b/edsnlp/edsnlp/pipelines/qualifiers/family/__init__.py similarity index 100% rename from edsnlp/pipelines/qualifiers/family/__init__.py rename to edsnlp/edsnlp/pipelines/qualifiers/family/__init__.py diff --git a/edsnlp/pipelines/qualifiers/family/factory.py b/edsnlp/edsnlp/pipelines/qualifiers/family/factory.py similarity index 100% rename from edsnlp/pipelines/qualifiers/family/factory.py rename to edsnlp/edsnlp/pipelines/qualifiers/family/factory.py diff --git a/edsnlp/pipelines/qualifiers/family/family.py b/edsnlp/edsnlp/pipelines/qualifiers/family/family.py similarity index 100% rename from edsnlp/pipelines/qualifiers/family/family.py rename to edsnlp/edsnlp/pipelines/qualifiers/family/family.py diff --git a/edsnlp/pipelines/qualifiers/family/patterns.py b/edsnlp/edsnlp/pipelines/qualifiers/family/patterns.py similarity index 100% rename from edsnlp/pipelines/qualifiers/family/patterns.py rename to edsnlp/edsnlp/pipelines/qualifiers/family/patterns.py diff --git a/edsnlp/pipelines/qualifiers/history/__init__.py b/edsnlp/edsnlp/pipelines/qualifiers/history/__init__.py similarity index 100% rename from edsnlp/pipelines/qualifiers/history/__init__.py rename to edsnlp/edsnlp/pipelines/qualifiers/history/__init__.py diff --git a/edsnlp/pipelines/qualifiers/history/factory.py b/edsnlp/edsnlp/pipelines/qualifiers/history/factory.py similarity index 100% rename from edsnlp/pipelines/qualifiers/history/factory.py rename to edsnlp/edsnlp/pipelines/qualifiers/history/factory.py diff --git a/edsnlp/pipelines/qualifiers/history/history.py b/edsnlp/edsnlp/pipelines/qualifiers/history/history.py similarity index 100% rename from edsnlp/pipelines/qualifiers/history/history.py rename to edsnlp/edsnlp/pipelines/qualifiers/history/history.py diff --git a/edsnlp/pipelines/qualifiers/history/patterns.py b/edsnlp/edsnlp/pipelines/qualifiers/history/patterns.py similarity index 100% rename from edsnlp/pipelines/qualifiers/history/patterns.py rename to edsnlp/edsnlp/pipelines/qualifiers/history/patterns.py diff --git a/edsnlp/pipelines/qualifiers/hypothesis/__init__.py b/edsnlp/edsnlp/pipelines/qualifiers/hypothesis/__init__.py similarity index 100% rename from edsnlp/pipelines/qualifiers/hypothesis/__init__.py rename to edsnlp/edsnlp/pipelines/qualifiers/hypothesis/__init__.py diff --git a/edsnlp/pipelines/qualifiers/hypothesis/factory.py b/edsnlp/edsnlp/pipelines/qualifiers/hypothesis/factory.py similarity index 100% rename from edsnlp/pipelines/qualifiers/hypothesis/factory.py rename to edsnlp/edsnlp/pipelines/qualifiers/hypothesis/factory.py diff --git a/edsnlp/pipelines/qualifiers/hypothesis/hypothesis.py b/edsnlp/edsnlp/pipelines/qualifiers/hypothesis/hypothesis.py similarity index 100% rename from edsnlp/pipelines/qualifiers/hypothesis/hypothesis.py rename to edsnlp/edsnlp/pipelines/qualifiers/hypothesis/hypothesis.py diff --git a/edsnlp/pipelines/qualifiers/hypothesis/patterns.py b/edsnlp/edsnlp/pipelines/qualifiers/hypothesis/patterns.py similarity index 100% rename from edsnlp/pipelines/qualifiers/hypothesis/patterns.py rename to edsnlp/edsnlp/pipelines/qualifiers/hypothesis/patterns.py diff --git a/edsnlp/pipelines/qualifiers/negation/__init__.py b/edsnlp/edsnlp/pipelines/qualifiers/negation/__init__.py similarity index 100% rename from edsnlp/pipelines/qualifiers/negation/__init__.py rename to edsnlp/edsnlp/pipelines/qualifiers/negation/__init__.py diff --git a/edsnlp/pipelines/qualifiers/negation/factory.py b/edsnlp/edsnlp/pipelines/qualifiers/negation/factory.py similarity index 100% rename from edsnlp/pipelines/qualifiers/negation/factory.py rename to edsnlp/edsnlp/pipelines/qualifiers/negation/factory.py diff --git a/edsnlp/pipelines/qualifiers/negation/negation.py b/edsnlp/edsnlp/pipelines/qualifiers/negation/negation.py similarity index 100% rename from edsnlp/pipelines/qualifiers/negation/negation.py rename to edsnlp/edsnlp/pipelines/qualifiers/negation/negation.py diff --git a/edsnlp/pipelines/qualifiers/negation/patterns.py b/edsnlp/edsnlp/pipelines/qualifiers/negation/patterns.py similarity index 100% rename from edsnlp/pipelines/qualifiers/negation/patterns.py rename to edsnlp/edsnlp/pipelines/qualifiers/negation/patterns.py diff --git a/edsnlp/pipelines/qualifiers/reported_speech/__init__.py b/edsnlp/edsnlp/pipelines/qualifiers/reported_speech/__init__.py similarity index 100% rename from edsnlp/pipelines/qualifiers/reported_speech/__init__.py rename to edsnlp/edsnlp/pipelines/qualifiers/reported_speech/__init__.py diff --git a/edsnlp/pipelines/qualifiers/reported_speech/factory.py b/edsnlp/edsnlp/pipelines/qualifiers/reported_speech/factory.py similarity index 100% rename from edsnlp/pipelines/qualifiers/reported_speech/factory.py rename to edsnlp/edsnlp/pipelines/qualifiers/reported_speech/factory.py diff --git a/edsnlp/pipelines/qualifiers/reported_speech/patterns.py b/edsnlp/edsnlp/pipelines/qualifiers/reported_speech/patterns.py similarity index 100% rename from edsnlp/pipelines/qualifiers/reported_speech/patterns.py rename to edsnlp/edsnlp/pipelines/qualifiers/reported_speech/patterns.py diff --git a/edsnlp/pipelines/qualifiers/reported_speech/reported_speech.py b/edsnlp/edsnlp/pipelines/qualifiers/reported_speech/reported_speech.py similarity index 100% rename from edsnlp/pipelines/qualifiers/reported_speech/reported_speech.py rename to edsnlp/edsnlp/pipelines/qualifiers/reported_speech/reported_speech.py diff --git a/edsnlp/pipelines/terminations.py b/edsnlp/edsnlp/pipelines/terminations.py similarity index 100% rename from edsnlp/pipelines/terminations.py rename to edsnlp/edsnlp/pipelines/terminations.py diff --git a/edsnlp/pipelines/qualifiers/__init__.py b/edsnlp/edsnlp/pipelines/trainable/__init__.py similarity index 100% rename from edsnlp/pipelines/qualifiers/__init__.py rename to edsnlp/edsnlp/pipelines/trainable/__init__.py diff --git a/edsnlp/pipelines/trainable/__init__.py b/edsnlp/edsnlp/pipelines/trainable/layers/__init__.py similarity index 100% rename from edsnlp/pipelines/trainable/__init__.py rename to edsnlp/edsnlp/pipelines/trainable/layers/__init__.py diff --git a/edsnlp/models/torch/crf.py b/edsnlp/edsnlp/pipelines/trainable/layers/crf.py similarity index 99% rename from edsnlp/models/torch/crf.py rename to edsnlp/edsnlp/pipelines/trainable/layers/crf.py index 7e7bf0d49..800b5dd90 100644 --- a/edsnlp/models/torch/crf.py +++ b/edsnlp/edsnlp/pipelines/trainable/layers/crf.py @@ -47,7 +47,7 @@ def __init__( Shape: n_tags Impossible transitions at the start of a sequence end_forbidden_transitions Optional[torch.BoolTensor] - Shape: n_tags + Shape is (n_tags,) Impossible transitions at the end of a sequence learnable_transitions: bool Should we learn transition scores to complete the @@ -410,8 +410,6 @@ def tags_to_spans(tags): ---------- tags: torch.LongTensor Shape: n_samples * n_labels * n_tokens - mask: torch.BoolTensor - Shape: n_samples * n_labels * n_tokens Returns ------- diff --git a/edsnlp/edsnlp/pipelines/trainable/nested_ner/__init__.py b/edsnlp/edsnlp/pipelines/trainable/nested_ner/__init__.py new file mode 100644 index 000000000..5cd383edd --- /dev/null +++ b/edsnlp/edsnlp/pipelines/trainable/nested_ner/__init__.py @@ -0,0 +1 @@ +from .factory import create_component, create_scorer diff --git a/edsnlp/edsnlp/pipelines/trainable/nested_ner/factory.py b/edsnlp/edsnlp/pipelines/trainable/nested_ner/factory.py new file mode 100644 index 000000000..7be0d1db2 --- /dev/null +++ b/edsnlp/edsnlp/pipelines/trainable/nested_ner/factory.py @@ -0,0 +1,83 @@ +from spacy import Language +from thinc.api import Model +from thinc.config import Config + +from .nested_ner import TrainableNer +from .nested_ner import make_nested_ner_scorer as create_scorer # noqa: F401 + +nested_ner_default_config = """ +[model] + @architectures = "eds.stack_crf_ner_model.v1" + mode = "joint" + + [model.tok2vec] + @architectures = "spacy.Tok2Vec.v1" + + [model.tok2vec.embed] + @architectures = "spacy.MultiHashEmbed.v1" + width = 96 + rows = [5000, 2000, 1000, 1000] + attrs = ["ORTH", "PREFIX", "SUFFIX", "SHAPE"] + include_static_vectors = false + + [model.tok2vec.encode] + @architectures = "spacy.MaxoutWindowEncoder.v1" + width = ${model.tok2vec.embed.width} + window_size = 1 + maxout_pieces = 3 + depth = 4 + +[scorer] + @scorers = "eds.nested_ner_scorer.v1" +""" + +NESTED_NER_DEFAULTS = Config().from_str(nested_ner_default_config) + + +@Language.factory( + "nested_ner", + default_config=NESTED_NER_DEFAULTS, + requires=["doc.ents", "doc.spans"], + assigns=["doc.ents", "doc.spans"], + default_score_weights={ + "ents_f": 1.0, + "ents_p": 0.0, + "ents_r": 0.0, + }, +) +def create_component( + nlp: Language, + name: str, + model: Model, + ent_labels=None, + spans_labels=None, + scorer=None, +): + """ + Initialize a general named entity recognizer (with or without nested or + overlapping entities). + + Parameters + ---------- + nlp: Language + The current nlp object + name: str + Name of the component + model: Model + The model to extract the spans + ent_labels: Iterable[str] + list of labels to filter entities for in `doc.ents` + spans_labels: Mapping[str, Iterable[str]] + Mapping from span group names to list of labels to look for entities + and assign the predicted entities + scorer: Optional[Callable] + Method to call to score predictions + """ + return TrainableNer( + vocab=nlp.vocab, + model=model, + name=name, + ent_labels=ent_labels, + spans_labels=spans_labels, + scorer=scorer, + ) diff --git a/edsnlp/pipelines/trainable/nested_ner.py b/edsnlp/edsnlp/pipelines/trainable/nested_ner/nested_ner.py similarity index 82% rename from edsnlp/pipelines/trainable/nested_ner.py rename to edsnlp/edsnlp/pipelines/trainable/nested_ner/nested_ner.py index b2359d027..144fe5a80 100644 --- a/edsnlp/pipelines/trainable/nested_ner.py +++ b/edsnlp/edsnlp/pipelines/trainable/nested_ner/nested_ner.py @@ -10,7 +10,6 @@ from spacy.vocab import Vocab from thinc.api import Model, Optimizer from thinc.backends import NumpyOps -from thinc.config import Config from thinc.model import set_dropout_rate from thinc.types import Ints2d from wasabi import Printer @@ -19,70 +18,11 @@ msg = Printer() - NUM_INITIALIZATION_EXAMPLES = 1000 - -nested_ner_default_config = """ -[model] - @architectures = "eds.stack_crf_ner_model.v1" - mode = "joint" - - [model.tok2vec] - @architectures = "spacy.Tok2Vec.v1" - - [model.tok2vec.embed] - @architectures = "spacy.MultiHashEmbed.v1" - width = 96 - rows = [5000, 2000, 1000, 1000] - attrs = ["ORTH", "PREFIX", "SUFFIX", "SHAPE"] - include_static_vectors = false - - [model.tok2vec.encode] - @architectures = "spacy.MaxoutWindowEncoder.v1" - width = ${model.tok2vec.embed.width} - window_size = 1 - maxout_pieces = 3 - depth = 4 - -[scorer] - @scorers = "eds.nested_ner_scorer.v1" -""" - -NESTED_NER_DEFAULTS = Config().from_str(nested_ner_default_config) np_ops = NumpyOps() -@Language.factory( - "nested_ner", - default_config=NESTED_NER_DEFAULTS, - requires=["doc.ents", "doc.spans"], - assigns=["doc.ents", "doc.spans"], - default_score_weights={ - "ents_f": 1.0, - "ents_p": 0.0, - "ents_r": 0.0, - }, -) -def create_component( - nlp: Language, - name: str, - model: Model, - ent_labels=None, - spans_labels=None, - scorer=None, -): - """Construct a TrainableQualifier component.""" - return TrainableNer( - vocab=nlp.vocab, - model=model, - name=name, - ent_labels=ent_labels, - spans_labels=spans_labels, - scorer=scorer, - ) - - def nested_ner_scorer(examples: Iterable[Example], **cfg): """ Scores the extracted entities that may be overlapping or nested @@ -102,21 +42,31 @@ def nested_ner_scorer(examples: Iterable[Example], **cfg): labels = set(cfg["labels"]) if "labels" in cfg is not None else None spans_labels = cfg["spans_labels"] - pred_spans = set() - gold_spans = set() + total_pred_spans = set() + total_gold_spans = set() + if labels is not None: + pred_spans = {} + gold_spans = {} for eg_idx, eg in enumerate(examples): for span in ( *eg.predicted.ents, *( span for name in ( - spans_labels if spans_labels is not None else eg.reference.spans + spans_labels if spans_labels is not None else eg.predicted.spans ) for span in eg.predicted.spans.get(name, ()) ), ): - if labels is None or span.label_ in labels: - pred_spans.add((eg_idx, span.start, span.end, span.label_)) + if labels is None: + total_pred_spans.add((eg_idx, span.start, span.end, span.label_)) + elif span.label_ in labels: + if span.label_ not in pred_spans.keys(): + pred_spans[span.label_] = set() + if span.label_ not in gold_spans.keys(): + gold_spans[span.label_] = set() + pred_spans[span.label_].add((eg_idx, span.start, span.end, span.label_)) + total_pred_spans.add((eg_idx, span.start, span.end, span.label_)) for span in ( *eg.reference.ents, @@ -128,18 +78,50 @@ def nested_ner_scorer(examples: Iterable[Example], **cfg): for span in eg.reference.spans.get(name, ()) ), ): - if labels is None or span.label_ in labels: - gold_spans.add((eg_idx, span.start, span.end, span.label_)) - - tp = len(pred_spans & gold_spans) - - return { - "ents_p": tp / len(pred_spans) if pred_spans else float(tp == len(pred_spans)), - "ents_r": tp / len(gold_spans) if gold_spans else float(tp == len(gold_spans)), - "ents_f": 2 * tp / (len(pred_spans) + len(gold_spans)) - if pred_spans or gold_spans - else float(len(pred_spans) == len(gold_spans)), - } + if labels is None: + total_gold_spans.add((eg_idx, span.start, span.end, span.label_)) + elif span.label_ in labels: + if span.label_ not in pred_spans.keys(): + pred_spans[span.label_] = set() + if span.label_ not in gold_spans.keys(): + gold_spans[span.label_] = set() + gold_spans[span.label_].add((eg_idx, span.start, span.end, span.label_)) + total_gold_spans.add((eg_idx, span.start, span.end, span.label_)) + + results = {} + tp = len(total_pred_spans & total_gold_spans) + results["ents_p"] = ( + tp / len(total_pred_spans) + if total_pred_spans + else float(tp == len(total_pred_spans)) + ) + results["ents_r"] = ( + tp / len(total_gold_spans) + if total_gold_spans + else float(tp == len(total_gold_spans)) + ) + results["ents_f"] = ( + 2 * tp / (len(total_pred_spans) + len(total_gold_spans)) + if total_pred_spans or gold_spans + else float(len(total_pred_spans) == len(total_gold_spans)) + ) + + if labels is not None: + results["ents_per_type"] = {} + for label in gold_spans.keys(): + tp = len(pred_spans[label] & gold_spans[label]) + results["ents_per_type"][label] = { + "p": tp / len(pred_spans[label]) + if pred_spans[label] + else float(tp == len(pred_spans[label])), + "r": tp / len(gold_spans[label]) + if gold_spans[label] + else float(tp == len(gold_spans[label])), + "f": 2 * tp / (len(pred_spans[label]) + len(gold_spans[label])) + if pred_spans[label] or gold_spans[label] + else float(len(pred_spans[label]) == len(gold_spans[label])), + } + return results @spacy.registry.scorers("eds.nested_ner_scorer.v1") diff --git a/edsnlp/models/stack_crf_ner.py b/edsnlp/edsnlp/pipelines/trainable/nested_ner/stack_crf_ner.py similarity index 87% rename from edsnlp/models/stack_crf_ner.py rename to edsnlp/edsnlp/pipelines/trainable/nested_ner/stack_crf_ner.py index 62f8ae4f2..967c1b056 100644 --- a/edsnlp/models/stack_crf_ner.py +++ b/edsnlp/edsnlp/pipelines/trainable/nested_ner/stack_crf_ner.py @@ -8,8 +8,11 @@ from thinc.model import Model from thinc.types import Floats1d, Floats2d, Ints2d -from edsnlp.models.pytorch_wrapper import PytorchWrapperModule, wrap_pytorch_model -from edsnlp.models.torch.crf import IMPOSSIBLE, MultiLabelBIOULDecoder +from edsnlp.pipelines.trainable.layers.crf import IMPOSSIBLE, MultiLabelBIOULDecoder +from edsnlp.pipelines.trainable.pytorch_wrapper import ( + PytorchWrapperModule, + wrap_pytorch_model, +) class CRFMode(str, Enum): @@ -176,11 +179,29 @@ def forward( def create_model( tok2vec: Model[List[Doc], List[Floats2d]], mode: CRFMode, - n_labels: int = None, + n_labels: Optional[int] = None, ) -> Model[ Tuple[Iterable[Doc], Optional[Ints2d], Optional[bool]], Tuple[Floats1d, Ints2d], ]: + """ + Parameters + ---------- + tok2vec: Model[List[Doc], List[Floats2d]] + The tok2vec embedding model used to generate the word embeddings + mode: CRFMode + How the CRF loss is computed + + - `joint`: Loss accounts for CRF transitions + - `independent`: Loss does not account for CRF transitions (softmax loss) + - `marginal`: Tag scores are smoothly updated with CRF transitions, and softmax loss is applied + n_labels: Optional[int] + Number of labels. This will be automatically set later during initialization + + Returns + ------- + Model + """ # noqa: E501 return wrap_pytorch_model( # noqa encoder=tok2vec, pt_model=StackedCRFNERModule( diff --git a/edsnlp/models/pytorch_wrapper.py b/edsnlp/edsnlp/pipelines/trainable/pytorch_wrapper.py similarity index 95% rename from edsnlp/models/pytorch_wrapper.py rename to edsnlp/edsnlp/pipelines/trainable/pytorch_wrapper.py index de7d7f8f3..e69b6ee0a 100644 --- a/edsnlp/models/pytorch_wrapper.py +++ b/edsnlp/edsnlp/pipelines/trainable/pytorch_wrapper.py @@ -1,5 +1,5 @@ import typing -from typing import Any, Callable, Iterable, List, Optional, OrderedDict, Tuple +from typing import Any, Callable, Iterable, List, Optional, OrderedDict, Sequence, Tuple import torch from spacy.tokens import Doc @@ -27,7 +27,7 @@ def __init__( """ Pytorch wrapping module for Spacy. Models that expect to be wrapped with - [wrap_pytorch_model][edsnlp.models.pytorch_wrapper.wrap_pytorch_model] + [wrap_pytorch_model][edsnlp.pipelines.trainable.pytorch_wrapper.wrap_pytorch_model] should inherit from this module. Parameters @@ -62,7 +62,7 @@ def load_state_dict( """ self.cfg = state_dict.pop("cfg") self.initialize() - super().load_state_dict(state_dict, strict) + super().load_state_dict(state_dict, False) def state_dict(self, destination=None, prefix="", keep_vars=False): """ @@ -245,7 +245,7 @@ def instance_init(model: Model, X: List[Doc] = None, Y: Ints2d = None) -> Model: Returns ------- - + Model """ encoder = model.get_ref("encoder") if X is not None: @@ -296,6 +296,7 @@ def wrap_pytorch_model( def wrap_pytorch_model( encoder: Model[List[Doc], List[Floats2d]], pt_model: PytorchWrapperModule, + attrs: Sequence[str] = ("set_n_labels",), ) -> Model[ Tuple[Iterable[Doc], Optional[PredT], Optional[bool]], Tuple[Floats1d, PredT], @@ -311,6 +312,8 @@ def wrap_pytorch_model( The Thinc document token embedding layer pt_model: PytorchWrapperModule The Pytorch model + attrs: Sequence[str] + The attributes of the Pytorch model that should be copied to the Thinc model Returns ------- @@ -323,8 +326,8 @@ def wrap_pytorch_model( "pytorch", pytorch_forward, attrs={ - "set_n_labels": pt_model.set_n_labels, "pt_model": pt_model, + **{attr: getattr(pt_model, attr) for attr in attrs}, }, layers=[encoder], shims=[PyTorchShim(pt_model)], diff --git a/edsnlp/edsnlp/pipelines/trainable/span_qualifier/__init__.py b/edsnlp/edsnlp/pipelines/trainable/span_qualifier/__init__.py new file mode 100644 index 000000000..a7a3a99ee --- /dev/null +++ b/edsnlp/edsnlp/pipelines/trainable/span_qualifier/__init__.py @@ -0,0 +1 @@ +from .factory import create_candidate_getter, create_component, create_scorer diff --git a/edsnlp/edsnlp/pipelines/trainable/span_qualifier/factory.py b/edsnlp/edsnlp/pipelines/trainable/span_qualifier/factory.py new file mode 100644 index 000000000..2fa85c5b6 --- /dev/null +++ b/edsnlp/edsnlp/pipelines/trainable/span_qualifier/factory.py @@ -0,0 +1,124 @@ +from typing import Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union + +from spacy import Language +from spacy.tokens import Doc +from thinc.api import Model +from thinc.config import Config + +from .span_qualifier import TrainableSpanQualifier +from .span_qualifier import make_span_qualifier_scorer as create_scorer # noqa: F401 +from .utils import SpanGroups, Spans +from .utils import make_candidate_getter as create_candidate_getter + +span_qualifier_default_config = """ +[model] + @architectures = "eds.span_multi_classifier.v1" + projection_mode = "dot" + pooler_mode = "max" + [model.tok2vec] + @architectures = "spacy.Tok2Vec.v1" + + [model.tok2vec.embed] + @architectures = "spacy.MultiHashEmbed.v1" + width = 96 + rows = [5000, 2000, 1000, 1000] + attrs = ["ORTH", "PREFIX", "SUFFIX", "SHAPE"] + include_static_vectors = false + + [model.tok2vec.encode] + @architectures = "spacy.MaxoutWindowEncoder.v1" + width = ${model.tok2vec.embed.width} + window_size = 1 + maxout_pieces = 3 + depth = 4 +""" + +SPAN_QUALIFIER_DEFAULTS = Config().from_str(span_qualifier_default_config) + + +@Language.factory( + "eds.span_qualifier", + default_config=SPAN_QUALIFIER_DEFAULTS, + requires=["doc.ents", "doc.spans"], + assigns=["doc.ents", "doc.spans"], + default_score_weights={ + "qual_f": 1.0, + }, +) +def create_component( + nlp, + model: Model, + on_ents: Optional[Union[bool, Sequence[str]]] = None, + on_span_groups: Union[ + bool, Sequence[str], Mapping[str, Union[bool, Sequence[str]]] + ] = False, + qualifiers: Optional[Sequence[str]] = None, + label_constraints: Optional[Dict[str, List[str]]] = None, + candidate_getter: Optional[ + Callable[[Doc], Tuple[Spans, Optional[Spans], SpanGroups, List[List[str]]]] + ] = None, + name: str = "span_qualifier", + scorer: Optional[Callable] = None, +) -> TrainableSpanQualifier: + """ + Create a generic span classification component + + Parameters + ---------- + nlp: Language + Spacy vocabulary + model: Model + The model to extract the spans + name: str + Name of the component + on_ents: Union[bool, Sequence[str]] + Whether to look into `doc.ents` for spans to classify. If a list of strings + is provided, only the span of the given labels will be considered. If None + and `on_span_groups` is False, labels mentioned in `label_constraints` + will be used, and all ents will be used if `label_constraints` is None. + on_span_groups: Union[bool, Sequence[str], Mapping[str, Sequence[str]]] + Whether to look into `doc.spans` for spans to classify: + + - If True, all span groups will be considered + - If False, no span group will be considered + - If a list of str is provided, only these span groups will be kept + - If a mapping is provided, the keys are the span group names and the values + are either a list of allowed labels in the group or True to keep them all + qualifiers: Optional[Sequence[str]] + The qualifiers to predict or train on. If None, keys from the + `label_constraints` will be used + label_constraints: Optional[Dict[str, List[str]]] + Constraints to select qualifiers for each span depending on their labels. + Keys of the dict are the qualifiers and values are the labels for which + the qualifier is allowed. If None, all qualifiers will be used for all spans + candidate_getter: Optional[Callable[[Doc], Tuple[Spans, Optional[Spans], SpanGroups, List[List[str]]]]] + Optional method to call to extract the candidate spans and the qualifiers + to predict or train on. If None, a candidate getter will be created from + the other parameters: `on_ents`, `on_span_groups`, `qualifiers` and + `label_constraints`. + scorer: Optional[Callable] + Optional method to call to score predictions + """ # noqa: E501 + do_make_candidate_getter = ( + on_ents or on_span_groups or qualifiers or label_constraints + ) + if (candidate_getter is not None) == do_make_candidate_getter: + raise ValueError( + "You must either provide a candidate getter or the parameters to " + "make one, but not both." + ) + if do_make_candidate_getter: + candidate_getter = create_candidate_getter( + on_ents=on_ents, + on_span_groups=on_span_groups, + qualifiers=qualifiers, + label_constraints=label_constraints, + ) + + return TrainableSpanQualifier( + vocab=nlp.vocab, + model=model, + candidate_getter=candidate_getter, + name=name, + scorer=scorer, + ) diff --git a/edsnlp/edsnlp/pipelines/trainable/span_qualifier/span_multi_classifier.py b/edsnlp/edsnlp/pipelines/trainable/span_qualifier/span_multi_classifier.py new file mode 100644 index 000000000..ca4fc459d --- /dev/null +++ b/edsnlp/edsnlp/pipelines/trainable/span_qualifier/span_multi_classifier.py @@ -0,0 +1,240 @@ +from enum import Enum +from typing import Any, Dict, Iterable, List, Optional, OrderedDict, Tuple + +import torch +from spacy import registry +from spacy.tokens import Doc +from thinc.model import Model +from thinc.types import Floats1d, Floats2d, Ints2d + +from edsnlp.pipelines.trainable.pytorch_wrapper import ( + PytorchWrapperModule, + wrap_pytorch_model, +) + + +class ProjectionMode(str, Enum): + dot = "dot" + + +class PoolerMode(str, Enum): + max = "max" + sum = "sum" + mean = "mean" + + +class SpanMultiClassifier(PytorchWrapperModule): + def __init__( + self, + input_size: Optional[int] = None, + n_labels: Optional[int] = None, + pooler_mode: PoolerMode = "max", + projection_mode: ProjectionMode = "dot", + ): + """ + Pytorch module for constrained multi-label & multi-class span classification + + Parameters + ---------- + input_size: int + Size of the input embeddings + n_labels: int + Number of labels predicted by the module + pooler_mode: PoolerMode + How embeddings are aggregated + projection_mode: ProjectionMode + How embeddings converted into logits + """ + super().__init__(input_size, n_labels) + + self.cfg["projection_mode"] = projection_mode + self.cfg["pooler_mode"] = pooler_mode + + if projection_mode != "dot": + raise Exception( + "Only scalar product is supported " "for label classification." + ) + + self.groups_indices = None + self.classifier = None + + def initialize(self): + """ + Once the number of labels n_labels are known, this method + initializes the torch linear layer. + """ + if self.cfg["projection_mode"] == "dot": + self.classifier = torch.nn.Linear(self.input_size, self.n_labels) + + def state_dict(self, destination=None, prefix="", keep_vars=False): + sd = super().state_dict() + + sd["groups_indices"] = self.groups_indices + sd["combinations"] = list(self.combinations) + return sd + + def load_state_dict( + self, state_dict: OrderedDict[str, torch.Tensor], strict: bool = True + ): + if state_dict.get("combinations", None) is not None: + self.set_label_groups( + groups_combinations=state_dict.pop("combinations"), + groups_indices=state_dict.pop("groups_indices"), + ) + + super().load_state_dict(state_dict, strict) + + def set_label_groups( + self, + groups_combinations, + groups_indices, + ): + """ + Set the label groups matrices. + """ + + # To make the buffers discoverable by pytorch (for device moving operations), + # we need to register them as buffer, and then we can group them in a + # single list of tensors + self.groups_indices = groups_indices + for i, group_combinations in enumerate(groups_combinations): + # n_combinations_in_group * n_labels_in_group + self.register_buffer( + f"combinations_{i}", + torch.as_tensor(group_combinations, dtype=torch.bool), + ) + + @property + def combinations(self): + for i in range(len(self.groups_indices)): + yield getattr(self, f"combinations_{i}") + + def forward( + self, + embeds: torch.FloatTensor, + mask: torch.BoolTensor, + spans: Optional[torch.LongTensor], + targets: Optional[torch.LongTensor], + additional_outputs: Dict[str, Any] = None, + is_train: bool = False, + is_predict: bool = False, + ) -> Optional[torch.FloatTensor]: + """ + Apply the span classifier module to the document embeddings and given spans to: + - compute the loss + - and/or predict the labels of spans + If labels are predicted, they are assigned to the `additional_outputs` + dictionary. + + Parameters + ---------- + embeds: torch.FloatTensor + Token embeddings to predict the tags from + mask: torch.BoolTensor + Mask of the sequences + spans: Optional[torch.LongTensor] + 2d tensor of n_spans * (doc_idx, ner_label_idx, begin, end) + targets: Optional[List[torch.LongTensor]] + list of 2d tensor of n_spans * n_combinations (1 hot) + additional_outputs: Dict[str, Any] + Additional outputs that should not / cannot be back-propped through + This dict will contain the predicted 2d tensor of labels + is_train: bool=False + Are we training the model (defaults to True) + is_predict: bool=False + Are we predicting the model (defaults to False) + + Returns + ------- + Optional[torch.FloatTensor] + Optional 0d loss (shape = [1]) to train the model + """ + n_samples, n_words = embeds.shape[:2] + device = embeds.device + (sample_idx, span_begins, span_ends) = spans.unbind(1) + if len(span_begins) == 0: + loss = None + if is_train: + loss = embeds.sum().unsqueeze(0) * 0 + else: + additional_outputs["labels"] = torch.zeros( + 0, self.n_labels, device=embeds.device, dtype=torch.int + ) + return loss + + flat_begins = n_words * sample_idx + span_begins + flat_ends = n_words * sample_idx + span_ends + flat_embeds = embeds.view(-1, embeds.shape[-1]) + flat_indices = torch.cat( + [ + torch.arange(b, e, device=device) + for b, e in zip(flat_begins.cpu().tolist(), flat_ends.cpu().tolist()) + ] + ).to(embeds.device) + offsets = (flat_ends - flat_begins).cumsum(0).roll(1) + offsets[0] = 0 + span_embeds = torch.nn.functional.embedding_bag( + input=flat_indices, + weight=flat_embeds, + offsets=offsets, + mode=self.cfg["pooler_mode"], + ) + + scores = self.classifier(span_embeds) + + groups_combinations_scores = [ + # ([e]ntities * [b]indings) * ([c]ombinations * [b]indings) + torch.einsum("eb,cb->ec", scores[:, grp_ids], grp_combinations.float()) + for grp_combinations, grp_ids in zip(self.combinations, self.groups_indices) + ] # -> list of ([e]ntities * [c]ombinations) + + loss = None + if is_train: + loss = sum( + [ + -grp_combinations_scores.log_softmax(-1) + .masked_fill(~grp_gold_combinations.to(device).bool(), 0) + .sum() + for grp_combinations_scores, grp_gold_combinations in zip( + groups_combinations_scores, targets + ) + ] + ) + loss = loss.unsqueeze(0) # for the thinc-pytorch shim + if is_predict: + pred = torch.cat( + [ + group_combinations[group_scores.argmax(-1)] + for group_scores, group_combinations in zip( + groups_combinations_scores, self.combinations + ) + ], + dim=-1, + ) + additional_outputs["labels"] = pred.int() + return loss + + +@registry.layers("eds.span_multi_classifier.v1") +def create_model( + tok2vec: Model[List[Doc], List[Floats2d]], + projection_mode: ProjectionMode = ProjectionMode.dot, + pooler_mode: PoolerMode = PoolerMode.max, + n_labels: int = None, +) -> Model[ + Tuple[Iterable[Doc], Optional[Ints2d], Optional[bool]], + Tuple[Floats1d, Ints2d], +]: + return wrap_pytorch_model( # noqa + encoder=tok2vec, + pt_model=SpanMultiClassifier( + input_size=None, # will be set later during initialization + n_labels=n_labels, # will likely be set later during initialization + projection_mode=projection_mode, + pooler_mode=pooler_mode, + ), + attrs=[ + "set_n_labels", + "set_label_groups", + ], + ) diff --git a/edsnlp/edsnlp/pipelines/trainable/span_qualifier/span_qualifier.py b/edsnlp/edsnlp/pipelines/trainable/span_qualifier/span_qualifier.py new file mode 100644 index 000000000..a4e64ec8e --- /dev/null +++ b/edsnlp/edsnlp/pipelines/trainable/span_qualifier/span_qualifier.py @@ -0,0 +1,538 @@ +import pickle +from collections import defaultdict +from itertools import islice +from typing import Callable, Dict, Iterable, List, Optional, Tuple + +import numpy as np +import spacy +from spacy import Language +from spacy.pipeline import TrainablePipe +from spacy.tokens import Doc +from spacy.training import Example +from spacy.vocab import Vocab +from thinc.api import Model, Optimizer +from thinc.backends import NumpyOps +from thinc.model import set_dropout_rate +from thinc.types import Ints2d +from wasabi import Printer + +from edsnlp.pipelines.trainable.span_qualifier.utils import ( + Binding, + SpanGroups, + Spans, + keydefaultdict, + make_binding_getter, + make_binding_setter, +) + +NUM_INITIALIZATION_EXAMPLES = 10 + +msg = Printer() +np_ops = NumpyOps() + + +@spacy.registry.scorers("eds.span_qualifier_scorer.v1") +def make_span_qualifier_scorer(candidate_getter: Callable): + def span_qualifier_scorer(examples: Iterable[Example], **cfg): + """ + Scores the extracted entities that may be overlapping or nested + by looking in `doc.ents`, and `doc.spans`. + + Parameters + ---------- + examples: Iterable[Example] + The examples to score + cfg: Dict[str] + The configuration dict of the component + + Returns + ------- + Dict[str, float] + """ + labels = defaultdict(lambda: ([], [])) + labels["ALL"] = ([], []) + for eg_idx, eg in enumerate(examples): + doc_spans, *_, doc_qlf = candidate_getter(eg.predicted) + for span_idx, (span, span_qualifiers) in enumerate(zip(doc_spans, doc_qlf)): + for qualifier in span_qualifiers: + value = BINDING_GETTERS[qualifier](span) + if value: + labels["ALL"][0].append((eg_idx, span_idx, qualifier, value)) + key_str = ( + f"{qualifier[2:]}" + if value is True + else f"{qualifier[2:]}-{value}" + ) + labels[key_str][0].append((eg_idx, span_idx, value)) + + doc_spans, *_, doc_qlf = candidate_getter(eg.reference) + for span_idx, (span, span_qualifiers) in enumerate(zip(doc_spans, doc_qlf)): + for qualifier in span_qualifiers: + value = BINDING_GETTERS[qualifier](span) + if value: + labels["ALL"][1].append((eg_idx, span_idx, qualifier, value)) + key_str = ( + f"{qualifier[2:]}" + if value is True + else f"{qualifier[2:]}-{value}" + ) + labels[key_str][1].append((eg_idx, span_idx, value)) + + def prf(pred, gold): + tp = len(set(pred) & set(gold)) + np = len(pred) + ng = len(gold) + return { + "f": 2 * tp / max(1, np + ng), + "p": 1 if tp == np else (tp / np), + "r": 1 if tp == ng else (tp / ng), + } + + results = {name: prf(pred, gold) for name, (pred, gold) in labels.items()} + results = dict(sorted(results.items())) + return {"qual_f": results["ALL"]["f"], "qual_per_type": results} + + return span_qualifier_scorer + + +BINDING_GETTERS = keydefaultdict(make_binding_getter) +BINDING_SETTERS = keydefaultdict(make_binding_setter) + + +# noinspection PyMethodOverriding +class TrainableSpanQualifier(TrainablePipe): + """Create a generic span classification component""" + + def __init__( + self, + vocab: Vocab, + model: Model, + candidate_getter: Callable[ + [Doc], Tuple[Spans, Optional[Spans], SpanGroups, List[List[str]]] + ], + name: str = "span_qualifier", + scorer: Optional[Callable] = None, + ) -> None: + """ + Parameters + ---------- + vocab: Vocab + Spacy vocabulary + model: Model + The model to extract the spans + name: str + Name of the component + candidate_getter: Callable[[Doc], Tuple[Spans, Optional[Spans], SpanGroups, List[List[str]]]] + Method to call to extract the candidate spans and the qualifiers + to predict or train on. + scorer: Optional[Callable] + Method to call to score predictions + """ # noqa: E501 + + super().__init__(vocab, model, name) + + self.cfg["qualifiers"]: Optional[Tuple[str]] = () + self.candidate_getter = candidate_getter + + self.bindings: List[Binding] = [] + self.ner_labels_indices: Optional[Dict[str, int]] = None + + if scorer is None: + self.scorer = make_span_qualifier_scorer(candidate_getter) + else: + self.scorer = scorer + + def to_disk(self, path, *, exclude=tuple()): + # This will receive the directory path + /my_component + super().to_disk(path, exclude=exclude) + data_path = path / "data.pkl" + with open(data_path, "wb") as f: + pickle.dump( + { + "bindings": self.bindings, + }, + f, + ) + + def from_disk(self, path, exclude=tuple()): + super().from_disk(path, exclude=exclude) + # This will receive the directory path + /my_component + data_path = path / "data.pkl" + with open(data_path, "rb") as f: + data = pickle.load(f) + self.bindings = data["bindings"] + return self + + @property + def qualifiers(self) -> Tuple[str]: + """Return the qualifiers predicted by the component""" + return self.cfg["qualifiers"] + + @property + def labels(self) -> List[str]: + return ["{}={}".format(a, b) for a, b in self.bindings] + + def add_label(self, label: str) -> int: + """Add a new label to the pipe.""" + raise Exception("Cannot add a new label to the pipe") + + def predict( + self, docs: List[Doc] + ) -> Tuple[ + Dict[str, Ints2d], + Spans, + List[Optional[Spans]], + List[SpanGroups], + List[List[str]], + ]: + """ + Apply the pipeline's model to a batch of docs, without modifying them. + + Parameters + ---------- + docs: List[Doc] + + Returns + ------- + # noqa: E501 + Tuple[Dict[str, Ints2d], Spans, List[Spans], List[SpanGroups], List[List[str]]] + The predicted list of 1-hot label sequence as a tensor + that represent the labels of spans for all the batch, + the list of all spans, and the span groups and ents in case the "label_" + qualifier is updated + """ + spans, ents, span_groups, spans_qlf, spans_array = self._get_span_data(docs) + + return ( + self.model.predict( + ( + docs, + self.model.ops.asarray(spans_array), + None, + True, + ) + )[1], + spans, + ents, + span_groups, + spans_qlf, + ) + + def set_annotations( + self, + docs: List[Doc], + predictions: Tuple[ + Dict[str, Ints2d], + Spans, + List[Optional[Spans]], + List[SpanGroups], + List[List[str]], + ], + **kwargs, + ) -> None: + """ + Modify the spans of a batch of `spacy.tokens.Span` objects, using the + predicted labels. + + # noqa: E501 + Parameters + ---------- + docs: List[Doc] + The docs to update, not used in this function + predictions: Tuple[Dict[str, Ints2d], Spans, List[SpanGroups], List[Optional[Spans]]] + Tuple returned by the `predict` method, containing: + - the label predictions. This is a 2d boolean tensor of shape + (`batch_size`, `len(self.bindings)`) + - the spans to update + - the ents to reassign if the "label_" qualifier is updated + - the span groups dicts to reassign if the "label_" qualifier is updated + - the qualifiers for each span + """ + output, spans, ents, span_groups, spans_qlf = predictions + one_hot = output["labels"] + for span, span_one_hot, span_qualifiers in zip(spans, one_hot, spans_qlf): + for binding, is_present in zip(self.bindings, span_one_hot): + if is_present and binding[0] in span_qualifiers: + BINDING_SETTERS[binding](span) + + # Because of the specific nature of the ".label_" attribute, we need to + # reassign the ents on `doc.ents` (if `span_getter.from_ents`) and the spans + # groups mentioned in `span_getter.from_spans_groups` on `doc.spans` + if "label_" in self.qualifiers or "label" in self.qualifiers: + if ents is not None: + for doc, doc_ents in zip(docs, ents): + if doc_ents is not None: + doc.ents = doc_ents + if span_groups is not None: + for doc, doc_span_groups in zip(docs, span_groups): + doc.spans.update(doc_span_groups) + + def update( + self, + examples: Iterable[Example], + *, + drop: float = 0.0, + set_annotations: bool = False, + sgd: Optional[Optimizer] = None, + losses: Optional[Dict[str, float]] = None, + ) -> Dict[str, float]: + """ + Learn from a batch of documents and gold-standard information, + updating the pipe's model. Delegates to begin_update and get_loss. + + Unlike standard TrainablePipe components, the discrete ops (best selection + of labels) is performed by the model directly (`begin_update` returns the loss + and the predictions) + + Parameters + ---------- + examples: Iterable[Example] + drop: float = 0.0 + + set_annotations: bool + Whether to update the document with predicted spans + sgd: Optional[Optimizer] + Optimizer + losses: Optional[Dict[str, float]] + Dict of loss, updated in place + + Returns + ------- + Dict[str, float] + Updated losses dict + """ + + if losses is None: + losses = {} + losses.setdefault(self.name, 0.0) + set_dropout_rate(self.model, drop) + examples = list(examples) + + # run the model + docs = [eg.predicted for eg in examples] + ( + spans, + ents, + span_groups, + spans_qlf, + spans_array, + targets, + ) = self.examples_to_truth(examples) + (loss, predictions), backprop = self.model.begin_update( + (docs, spans_array, targets, set_annotations) + ) + loss, gradient = self.get_loss(examples, loss) + backprop(gradient) + if sgd is not None: + self.model.finish_update(sgd) + if set_annotations: + self.set_annotations( + spans, + ( + predictions, + spans, + ents, + span_groups, + spans_qlf, + ), + ) + + losses[self.name] = loss + + return loss + + def get_loss(self, examples: Iterable[Example], loss) -> Tuple[float, float]: + """Find the loss and gradient of loss for the batch of documents and + their predicted scores.""" + return float(loss.item()), self.model.ops.xp.array([1]) + + def initialize( + self, + get_examples: Callable[[], Iterable[Example]], + *, + nlp: Language = None, + labels: Optional[List[str]] = None, + ): + """ + Initialize the pipe for training, using a representative set + of data examples. + + Gather the qualifier values by iterating on the spans and their qualifiers + matching the rules defined in the `candidate_getter`, and retrieving the + values of the qualifiers. + + Parameters + ---------- + get_examples: Callable[[], Iterable[Example]] + Method to sample some examples + nlp: spacy.Language + Unused spacy model + labels + Unused list of labels + """ + qualifier_values = defaultdict(set) + for eg in get_examples(): + spans, *_, spans_qualifiers = self.candidate_getter(eg.reference) + for span, span_qualifiers in zip(spans, spans_qualifiers): + for qualifier in span_qualifiers: + value = BINDING_GETTERS[qualifier](span) + qualifier_values[qualifier].add(value) + + qualifier_values = { + key: sorted(values, key=str) for key, values in qualifier_values.items() + } + + self.cfg["qualifiers"] = sorted(qualifier_values.keys()) + # groups: + # num binding_groups (e.g. ["events", "negation"]) + # * num label combinations in this group + # * positive labels in this combination + self.cfg["groups"] = [ + [((key, value),) for value in sorted(values, key=str)] + for key, values in qualifier_values.items() + ] + groups_bindings = [ + list( + dict.fromkeys( + [ + binding + for combination_bindings in group_combinations + for binding in combination_bindings + ] + ) + ) + for group_combinations in self.cfg["groups"] + ] + self.bindings = [ + binding for group_bindings in groups_bindings for binding in group_bindings + ] + self.model.attrs["set_n_labels"](len(self.bindings)) + + # combinations_one_hot: list of bool arrays of shape + # num binding_groups (e.g. ["events", "negation"]) + # * num bindings in this group (eg ["start", "stop"], [True, False]) + combinations_one_hot: List[List[List[bool]]] = [ + [ + [binding in combination_bindings for binding in group_bindings] + for combination_bindings in group_combinations + ] + for group_combinations, group_bindings in zip( + self.cfg["groups"], groups_bindings + ) + ] + # groups_indices: + # num binding_groups (e.g. ["events", "negation"]) + # * num label combinations in this group + # * presence or absence (bool) of the bindings of this groups in the combination + groups_bindings_indices = [ + [self.bindings.index(binding) for binding in group_bindings] + for group_bindings in groups_bindings + ] + + self.model.attrs["set_label_groups"]( + combinations_one_hot, + groups_bindings_indices, + ) + + # Neural network initialization + sub_batch = list(islice(get_examples(), NUM_INITIALIZATION_EXAMPLES)) + doc_sample = [eg.reference for eg in sub_batch] + spans, *_, spans_array, targets = self.examples_to_truth(sub_batch) + if len(spans) == 0: + raise ValueError( + "Call begin_training with relevant entities " + "and relations annotated in " + "at least a few reference examples!" + ) + + self.model.initialize(X=doc_sample, Y=spans_array) + + def _get_span_data( + self, docs: List[Doc] + ) -> Tuple[ + Spans, + List[Optional[Spans]], + List[SpanGroups], + List[List[str]], + np.ndarray, + ]: + spans = [] + ents, span_groups = [], [] + spans_qualifiers = [] + for doc_idx, doc in enumerate(docs): + doc_spans, doc_ents, doc_span_groups, qlf = self.candidate_getter(doc) + ents.append(doc_ents) + span_groups.append(doc_span_groups) + spans_qualifiers.extend(qlf) + spans.extend([(doc_idx, span) for span in doc_spans]) + spans = list(spans) + spans_array = np.zeros((len(spans), 3), dtype=int) + for i, (doc_idx, span) in enumerate(spans): + spans_array[i] = ( + doc_idx, + span.start, + span.end, + ) + + return ( + [span for i, span in spans], + ents, + span_groups, + spans_qualifiers, + spans_array, + ) + + def examples_to_truth( + self, examples: List[Example] + ) -> Tuple[ + Spans, + List[Spans], + List[SpanGroups], + List[List[str]], + Ints2d, + List[Ints2d], + ]: + """ + + Converts the spans of the examples into a list + of (doc_idx, label_idx, begin, end) tuple as a tensor, + and the labels of the spans into a list of 1-hot label sequence + + Parameters + ---------- + examples: List[Example] + + Returns + ------- + Tuple[Spans,List[Spans],List[SpanGroups],List[List[str]],Ints2d,List[Ints2d]] + The list of spans, the spans tensor, the qualifiers tensor, and the + list of entities and span groups to reassign them if the label_ attribute + is part of the updated qualifiers + """ # noqa E501 + spans, ents, span_groups, spans_qualifiers, spans_array = self._get_span_data( + [eg.reference for eg in examples] + ) + targets = [ + np.zeros((len(spans), len(group_combinations)), dtype=int) + for group_combinations in self.cfg["groups"] + ] + for span_idx, span in enumerate(spans): + span_bindings = [] + for j, binding in enumerate(self.bindings): + if binding[0] in spans_qualifiers[span_idx] and BINDING_GETTERS[ + binding + ](span): + span_bindings.append(binding) + for group_idx, group in enumerate(self.cfg["groups"]): + for comb_idx, group_combination in enumerate(group): + if set(group_combination).issubset(set(span_bindings)): + targets[group_idx][span_idx, comb_idx] = 1 + + return ( + spans, + ents, + span_groups, + spans_qualifiers, + self.model.ops.asarray(spans_array), + [self.model.ops.asarray(arr) for arr in targets], + ) diff --git a/edsnlp/edsnlp/pipelines/trainable/span_qualifier/utils.py b/edsnlp/edsnlp/pipelines/trainable/span_qualifier/utils.py new file mode 100644 index 000000000..2ffc6a96f --- /dev/null +++ b/edsnlp/edsnlp/pipelines/trainable/span_qualifier/utils.py @@ -0,0 +1,184 @@ +from typing import ( + Any, + Callable, + Dict, + List, + Mapping, + Optional, + Sequence, + Tuple, + TypeVar, + Union, +) + +from spacy import registry +from spacy.tokens import Doc, Span + +from edsnlp.utils.span_getters import make_span_getter + +Binding = Tuple[str, Any] +Spans = List[Span] +SpanGroups = Dict[str, Spans] + + +class make_candidate_getter: + def __init__( + self, + on_ents: Optional[Union[bool, Sequence[str]]] = None, + on_span_groups: Union[ + bool, Sequence[str], Mapping[str, Union[bool, Sequence[str]]] + ] = False, + qualifiers: Optional[Sequence[str]] = None, + label_constraints: Optional[Dict[str, List[str]]] = None, + ): + + """ + Make a span qualifier candidate getter function. + + Parameters + ---------- + on_ents: Union[bool, Sequence[str]] + Whether to look into `doc.ents` for spans to classify. If a list of strings + is provided, only the span of the given labels will be considered. If None + and `on_span_groups` is False, labels mentioned in `label_constraints` + will be used. + on_span_groups: Union[bool, Sequence[str], Mapping[str, Sequence[str]]] + Whether to look into `doc.spans` for spans to classify: + + - If True, all span groups will be considered + - If False, no span group will be considered + - If a list of str is provided, only these span groups will be kept + - If a mapping is provided, the keys are the span group names and the values + are either a list of allowed labels in the group or True to keep them all + qualifiers: Optional[Sequence[str]] + The qualifiers to predict or train on. If None, keys from the + `label_constraints` will be used + label_constraints: Optional[Dict[str, List[str]]] + Constraints to select qualifiers for each span depending on their labels. + Keys of the dict are the qualifiers and values are the labels for which + the qualifier is allowed. If None, all qualifiers will be used for all spans + + Returns + ------- + Callable[[Doc], Tuple[Spans, Optional[Spans], SpanGroups, List[List[str]]]] + """ + + if qualifiers is None and label_constraints is None: + raise ValueError( + "Either `qualifiers` or `label_constraints` must be given to " + "provide the qualifiers to predict / train on." + ) + elif qualifiers is None: + qualifiers = list(label_constraints.keys()) + + if not on_span_groups and on_ents is None: + if label_constraints is None: + on_ents = True + else: + on_ents = sorted( + set( + label + for qualifier in label_constraints + for label in label_constraints[qualifier] + ) + ) + + self.span_getter = make_span_getter(on_ents, on_span_groups) + self.label_constraints = label_constraints + self.qualifiers = qualifiers + + def __call__( + self, + doc: Doc, + ) -> Tuple[Spans, Optional[Spans], SpanGroups, List[List[str]]]: + flattened_spans, ents, span_groups = self.span_getter( + doc, + return_origin=True, + ) + + if self.label_constraints: + span_qualifiers = [ + [ + qualifier + for qualifier in self.qualifiers + if qualifier not in self.label_constraints + or span.label_ in self.label_constraints[qualifier] + ] + for span in flattened_spans + ] + else: + span_qualifiers = [self.qualifiers] * len(flattened_spans) + return flattened_spans, ents, span_groups, span_qualifiers + + +registry.misc("eds.candidate_span_qualifier_getter")(make_candidate_getter) + + +def _check_path(path: str): + assert [letter.isalnum() or letter == "_" or letter == "." for letter in path], ( + "The label must be a path of valid python identifier to be used as a getter" + "in the following template: span.[YOUR_LABEL], such as `label_` or `_.negated" + ) + + +def make_binding_getter(qualifier: Union[str, Binding]): + """ + Make a qualifier getter + + Parameters + ---------- + qualifier: Union[str, Binding] + Either one of the following: + - a path to a nested attributes of the span, such as "qualifier_" or "_.negated" + - a tuple of (key, value) equality, such as `("_.date.mode", "PASSED")` + + Returns + ------- + Callable[[Span], bool] + The qualifier getter + """ + if isinstance(qualifier, tuple): + path, value = qualifier + _check_path(path) + return eval(f"lambda span: span.{path} == value", {"value": value}, {}) + else: + _check_path(qualifier) + return eval(f"lambda span: span.{qualifier}") + + +def make_binding_setter(binding: Binding): + """ + Make a qualifier setter + + Parameters + ---------- + binding: Binding + A pair of + - a path to a nested attributes of the span, such as `qualifier_` or `_.negated` + - a value assignment + + Returns + ------- + Callable[[Span]] + The qualifier setter + """ + path, value = binding + _check_path(path) + fn_string = f"""def fn(span): span.{path} = value""" + loc = {"value": value} + exec(fn_string, loc, loc) + return loc["fn"] + + +K = TypeVar("K") +V = TypeVar("V") + + +class keydefaultdict(dict): + def __init__(self, default_factory: Callable[[K], V]): + super().__init__() + self.default_factory = default_factory + + def __missing__(self, key: K) -> V: + ret = self[key] = self.default_factory(key) + return ret diff --git a/edsnlp/processing/__init__.py b/edsnlp/edsnlp/processing/__init__.py similarity index 100% rename from edsnlp/processing/__init__.py rename to edsnlp/edsnlp/processing/__init__.py diff --git a/edsnlp/processing/distributed.py b/edsnlp/edsnlp/processing/distributed.py similarity index 100% rename from edsnlp/processing/distributed.py rename to edsnlp/edsnlp/processing/distributed.py diff --git a/edsnlp/processing/helpers.py b/edsnlp/edsnlp/processing/helpers.py similarity index 100% rename from edsnlp/processing/helpers.py rename to edsnlp/edsnlp/processing/helpers.py diff --git a/edsnlp/processing/parallel.py b/edsnlp/edsnlp/processing/parallel.py similarity index 100% rename from edsnlp/processing/parallel.py rename to edsnlp/edsnlp/processing/parallel.py diff --git a/edsnlp/processing/simple.py b/edsnlp/edsnlp/processing/simple.py similarity index 100% rename from edsnlp/processing/simple.py rename to edsnlp/edsnlp/processing/simple.py diff --git a/edsnlp/processing/utils.py b/edsnlp/edsnlp/processing/utils.py similarity index 100% rename from edsnlp/processing/utils.py rename to edsnlp/edsnlp/processing/utils.py diff --git a/edsnlp/processing/wrapper.py b/edsnlp/edsnlp/processing/wrapper.py similarity index 100% rename from edsnlp/processing/wrapper.py rename to edsnlp/edsnlp/processing/wrapper.py diff --git a/edsnlp/resources/adicap.json.gz b/edsnlp/edsnlp/resources/adicap.json.gz similarity index 100% rename from edsnlp/resources/adicap.json.gz rename to edsnlp/edsnlp/resources/adicap.json.gz diff --git a/edsnlp/resources/cim10.csv.gz b/edsnlp/edsnlp/resources/cim10.csv.gz similarity index 100% rename from edsnlp/resources/cim10.csv.gz rename to edsnlp/edsnlp/resources/cim10.csv.gz diff --git a/edsnlp/resources/drugs.json b/edsnlp/edsnlp/resources/drugs.json similarity index 100% rename from edsnlp/resources/drugs.json rename to edsnlp/edsnlp/resources/drugs.json diff --git a/edsnlp/edsnlp/resources/verbs.csv.gz b/edsnlp/edsnlp/resources/verbs.csv.gz new file mode 100644 index 000000000..370d9d3c1 Binary files /dev/null and b/edsnlp/edsnlp/resources/verbs.csv.gz differ diff --git a/edsnlp/utils/__init__.py b/edsnlp/edsnlp/utils/__init__.py similarity index 100% rename from edsnlp/utils/__init__.py rename to edsnlp/edsnlp/utils/__init__.py diff --git a/edsnlp/utils/blocs.py b/edsnlp/edsnlp/utils/blocs.py similarity index 100% rename from edsnlp/utils/blocs.py rename to edsnlp/edsnlp/utils/blocs.py diff --git a/edsnlp/utils/colors.py b/edsnlp/edsnlp/utils/colors.py similarity index 100% rename from edsnlp/utils/colors.py rename to edsnlp/edsnlp/utils/colors.py diff --git a/edsnlp/utils/deprecation.py b/edsnlp/edsnlp/utils/deprecation.py similarity index 100% rename from edsnlp/utils/deprecation.py rename to edsnlp/edsnlp/utils/deprecation.py diff --git a/edsnlp/utils/examples.py b/edsnlp/edsnlp/utils/examples.py similarity index 100% rename from edsnlp/utils/examples.py rename to edsnlp/edsnlp/utils/examples.py diff --git a/edsnlp/utils/extensions.py b/edsnlp/edsnlp/utils/extensions.py similarity index 100% rename from edsnlp/utils/extensions.py rename to edsnlp/edsnlp/utils/extensions.py diff --git a/edsnlp/utils/filter.py b/edsnlp/edsnlp/utils/filter.py similarity index 69% rename from edsnlp/utils/filter.py rename to edsnlp/edsnlp/utils/filter.py index d849f7074..4fc6d5a69 100644 --- a/edsnlp/utils/filter.py +++ b/edsnlp/edsnlp/utils/filter.py @@ -1,6 +1,7 @@ -from typing import Any, Callable, Iterable, List, Optional, Tuple, Union +from typing import Any, Callable, Iterable, List, Optional, Sequence, Tuple, Union from spacy.tokens import Span +from spacy.tokens.doc import Doc def default_sort_key(span: Span) -> Tuple[int, int]: @@ -206,3 +207,100 @@ def get_spans(spans: List[Span], label: Union[int, str]) -> List[Span]: return [span for span in spans if span.label == label] else: return [span for span in spans if span.label_ == label] + + +def span_f1(a: Span, b: Span) -> float: + """ + Computes the F1 overlap between two spans. + + Parameters + ---------- + a: Span + First span + b: Span + Second span + + Returns + ------- + float + F1 overlap + """ + start_a, end_a = a.start, a.end + start_b, end_b = b.start, b.end + overlap = max(0, min(end_a, end_b) - max(start_a, start_b)) + return 2 * overlap / (end_a - start_a + end_b - start_b) + + +def align_spans( + source: Sequence[Span], + target: Sequence[Span], + sort_by_overlap: bool = False, +) -> List[List[Span]]: + """ + Aligns two lists of spans, by matching source spans that overlap target spans. + This function is optimized to avoid quadratic complexity. + + Parameters + ---------- + source : List[Span] + List of spans to align. + target : List[Span] + List of spans to align. + sort_by_overlap : bool + Whether to sort the aligned spans by maximum dice/f1 overlap + with the target span. + + Returns + ------- + List[List[Span]] + Subset of `source` spans for each target span + """ + source = sorted(source, key=lambda x: (x.start, x.end)) + target = sorted(target, key=lambda x: (x.start, x.end)) + + aligned = [set() for _ in target] + source_idx = 0 + for target_idx in range(len(target)): + while source[source_idx].end <= target[target_idx].start: + source_idx += 1 + i = source_idx + while i < len(source) and source[i].start <= target[target_idx].end: + aligned[target_idx].add(source[i]) + i += 1 + + aligned = [list(span_set) for span_set in aligned] + + # Sort the aligned spans by maximum dice/f1 overlap with the target span + if sort_by_overlap: + aligned = [ + sorted(span_set, key=lambda x: span_f1(x, y), reverse=True) + for span_set, y in zip(aligned, target) + ] + + return aligned + + +def get_span_group(doclike: Union[Doc, Span], group: str) -> List[Span]: + """ + Get the spans of a span group that are contained inside a doclike object. + + Parameters + ---------- + doclike : Union[Doc, Span] + Doclike object to act as a mask. + group : str + Group name from which to get the spans. + + Returns + ------- + List[Span] + List of spans. + """ + if isinstance(doclike, Doc): + return [span for span in doclike.spans.get(group, ())] + else: + return [ + span + for span in doclike.doc.spans.get(group, ()) + if span.start >= doclike.start and span.end <= doclike.end + ] diff --git a/edsnlp/utils/inclusion.py b/edsnlp/edsnlp/utils/inclusion.py similarity index 100% rename from edsnlp/utils/inclusion.py rename to edsnlp/edsnlp/utils/inclusion.py diff --git a/edsnlp/utils/lists.py b/edsnlp/edsnlp/utils/lists.py similarity index 100% rename from edsnlp/utils/lists.py rename to edsnlp/edsnlp/utils/lists.py diff --git a/edsnlp/utils/merge_configs.py b/edsnlp/edsnlp/utils/merge_configs.py similarity index 100% rename from edsnlp/utils/merge_configs.py rename to edsnlp/edsnlp/utils/merge_configs.py diff --git a/edsnlp/utils/regex.py b/edsnlp/edsnlp/utils/regex.py similarity index 100% rename from edsnlp/utils/regex.py rename to edsnlp/edsnlp/utils/regex.py diff --git a/edsnlp/utils/resources.py b/edsnlp/edsnlp/utils/resources.py similarity index 100% rename from edsnlp/utils/resources.py rename to edsnlp/edsnlp/utils/resources.py diff --git a/edsnlp/edsnlp/utils/span_getters.py b/edsnlp/edsnlp/utils/span_getters.py new file mode 100644 index 000000000..0a6b9c431 --- /dev/null +++ b/edsnlp/edsnlp/utils/span_getters.py @@ -0,0 +1,96 @@ +from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union + +from spacy import registry +from spacy.tokens import Doc, Span + +Spans = List[Span] +SpanGroups = Dict[str, Spans] + + +class make_span_getter: + def __init__( + self, + on_ents: Optional[Union[bool, Sequence[str]]] = None, + on_spans_groups: Union[ + bool, Sequence[str], Mapping[str, Union[bool, Sequence[str]]] + ] = False, + ): + + """ + Make a span qualifier candidate getter function. + + Parameters + ---------- + on_ents: Union[bool, Sequence[str]] + Whether to look into `doc.ents` for spans to classify. If a list of strings + is provided, only the span of the given labels will be considered. If None + and `on_spans_groups` is False, labels mentioned in `label_constraints` + will be used. + on_spans_groups: Union[bool, Sequence[str], Mapping[str, Sequence[str]]] + Whether to look into `doc.spans` for spans to classify: + + - If True, all span groups will be considered + - If False, no span group will be considered + - If a list of str is provided, only these span groups will be kept + - If a mapping is provided, the keys are the span group names and the values + are either a list of allowed labels in the group or True to keep them all + """ + + if not on_spans_groups and on_ents is None: + on_ents = True + + self.on_ents = on_ents + self.on_spans_groups = on_spans_groups + + def __call__( + self, + doc: Doc, + return_origin: bool = False, + ) -> Union[Tuple[Spans], Tuple[Spans, Optional[Spans], SpanGroups]]: + flattened_spans = [] + span_groups = {} + ents = None + if self.on_ents: + # /!\ doc.ents is not a list but a Span iterator, so to ensure referential + # equality between the spans of `flattened_spans` and `ents`, + # we need to convert it to a list to "extract" the spans first + ents = list(doc.ents) + if isinstance(self.on_ents, Sequence): + flattened_spans.extend( + span for span in ents if span.label_ in self.on_ents + ) + else: + flattened_spans.extend(ents) + + if self.on_spans_groups: + if isinstance(self.on_spans_groups, Mapping): + for name, labels in self.on_spans_groups.items(): + if labels: + span_groups[name] = list(doc.spans.get(name, ())) + if isinstance(labels, Sequence): + flattened_spans.extend( + span + for span in span_groups[name] + if span.label_ in labels + ) + else: + flattened_spans.extend(span_groups[name]) + elif isinstance(self.on_spans_groups, Sequence): + for name in self.on_spans_groups: + span_groups[name] = list(doc.spans.get(name, ())) + flattened_spans.extend(span_groups[name]) + else: + for name, spans_ in doc.spans.items(): + # /!\ spans_ is not a list but a SpanGroup, so to ensure referential + # equality between the spans of `flattened_spans` and `span_groups`, + # we need to convert it to a list to "extract" the spans first + span_groups[name] = list(spans_) + flattened_spans.extend(span_groups[name]) + + if return_origin: + return flattened_spans, ents, span_groups + else: + return flattened_spans + + +registry.misc("eds.span_getter")(make_span_getter) diff --git a/edsnlp/utils/training.py b/edsnlp/edsnlp/utils/training.py similarity index 100% rename from edsnlp/utils/training.py rename to edsnlp/edsnlp/utils/training.py diff --git a/edsnlp/viz/__init__.py b/edsnlp/edsnlp/viz/__init__.py similarity index 100% rename from edsnlp/viz/__init__.py rename to edsnlp/edsnlp/viz/__init__.py diff --git a/edsnlp/viz/quick_examples.py b/edsnlp/edsnlp/viz/quick_examples.py similarity index 100% rename from edsnlp/viz/quick_examples.py rename to edsnlp/edsnlp/viz/quick_examples.py diff --git a/mkdocs.yml b/edsnlp/mkdocs.yml similarity index 100% rename from mkdocs.yml rename to edsnlp/mkdocs.yml diff --git a/notebooks/README.md b/edsnlp/notebooks/README.md similarity index 100% rename from notebooks/README.md rename to edsnlp/notebooks/README.md diff --git a/notebooks/connectors/context.py b/edsnlp/notebooks/connectors/context.py similarity index 100% rename from notebooks/connectors/context.py rename to edsnlp/notebooks/connectors/context.py diff --git a/notebooks/connectors/omop.md b/edsnlp/notebooks/connectors/omop.md similarity index 100% rename from notebooks/connectors/omop.md rename to edsnlp/notebooks/connectors/omop.md diff --git a/notebooks/context.py b/edsnlp/notebooks/context.py similarity index 100% rename from notebooks/context.py rename to edsnlp/notebooks/context.py diff --git a/notebooks/dates/context.py b/edsnlp/notebooks/dates/context.py similarity index 100% rename from notebooks/dates/context.py rename to edsnlp/notebooks/dates/context.py diff --git a/notebooks/dates/prototype.md b/edsnlp/notebooks/dates/prototype.md similarity index 100% rename from notebooks/dates/prototype.md rename to edsnlp/notebooks/dates/prototype.md diff --git a/notebooks/dates/user-guide.md b/edsnlp/notebooks/dates/user-guide.md similarity index 100% rename from notebooks/dates/user-guide.md rename to edsnlp/notebooks/dates/user-guide.md diff --git a/notebooks/endlines/endlines-example.md b/edsnlp/notebooks/endlines/endlines-example.md similarity index 100% rename from notebooks/endlines/endlines-example.md rename to edsnlp/notebooks/endlines/endlines-example.md diff --git a/notebooks/normalizer/context.py b/edsnlp/notebooks/normalizer/context.py similarity index 100% rename from notebooks/normalizer/context.py rename to edsnlp/notebooks/normalizer/context.py diff --git a/notebooks/normalizer/profiling.md b/edsnlp/notebooks/normalizer/profiling.md similarity index 100% rename from notebooks/normalizer/profiling.md rename to edsnlp/notebooks/normalizer/profiling.md diff --git a/notebooks/normalizer/prototype.md b/edsnlp/notebooks/normalizer/prototype.md similarity index 100% rename from notebooks/normalizer/prototype.md rename to edsnlp/notebooks/normalizer/prototype.md diff --git a/notebooks/pipeline.md b/edsnlp/notebooks/pipeline.md similarity index 100% rename from notebooks/pipeline.md rename to edsnlp/notebooks/pipeline.md diff --git a/notebooks/premier-pipeline.md b/edsnlp/notebooks/premier-pipeline.md similarity index 100% rename from notebooks/premier-pipeline.md rename to edsnlp/notebooks/premier-pipeline.md diff --git a/notebooks/sections/context.py b/edsnlp/notebooks/sections/context.py similarity index 100% rename from notebooks/sections/context.py rename to edsnlp/notebooks/sections/context.py diff --git a/notebooks/sections/section-dataset.md b/edsnlp/notebooks/sections/section-dataset.md similarity index 100% rename from notebooks/sections/section-dataset.md rename to edsnlp/notebooks/sections/section-dataset.md diff --git a/notebooks/sections/testing.md b/edsnlp/notebooks/sections/testing.md similarity index 100% rename from notebooks/sections/testing.md rename to edsnlp/notebooks/sections/testing.md diff --git a/notebooks/sentences/context.py b/edsnlp/notebooks/sentences/context.py similarity index 100% rename from notebooks/sentences/context.py rename to edsnlp/notebooks/sentences/context.py diff --git a/notebooks/sentences/sentences.md b/edsnlp/notebooks/sentences/sentences.md similarity index 100% rename from notebooks/sentences/sentences.md rename to edsnlp/notebooks/sentences/sentences.md diff --git a/notebooks/tnm/prototype.md b/edsnlp/notebooks/tnm/prototype.md similarity index 100% rename from notebooks/tnm/prototype.md rename to edsnlp/notebooks/tnm/prototype.md diff --git a/notebooks/tokenizer/context.py b/edsnlp/notebooks/tokenizer/context.py similarity index 100% rename from notebooks/tokenizer/context.py rename to edsnlp/notebooks/tokenizer/context.py diff --git a/notebooks/tokenizer/tokenizer.md b/edsnlp/notebooks/tokenizer/tokenizer.md similarity index 100% rename from notebooks/tokenizer/tokenizer.md rename to edsnlp/notebooks/tokenizer/tokenizer.md diff --git a/notebooks/utilities/brat.md b/edsnlp/notebooks/utilities/brat.md similarity index 100% rename from notebooks/utilities/brat.md rename to edsnlp/notebooks/utilities/brat.md diff --git a/notebooks/utilities/context.py b/edsnlp/notebooks/utilities/context.py similarity index 100% rename from notebooks/utilities/context.py rename to edsnlp/notebooks/utilities/context.py diff --git a/edsnlp/pipelines/base.py b/edsnlp/pipelines/base.py deleted file mode 100644 index a891113b9..000000000 --- a/edsnlp/pipelines/base.py +++ /dev/null @@ -1,64 +0,0 @@ -from typing import List, Optional, Tuple - -from spacy.tokens import Doc, Span - - -class BaseComponent(object): - """ - The `BaseComponent` adds a `set_extensions` method, - called at the creation of the object. - - It helps decouple the initialisation of the pipeline from - the creation of extensions, and is particularly usefull when - distributing EDSNLP on a cluster, since the serialisation mechanism - imposes that the extensions be reset. - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.set_extensions() - - @classmethod - def set_extensions(cls) -> None: - """ - Set `Doc`, `Span` and `Token` extensions. - """ - pass - - def _boundaries( - self, doc: Doc, terminations: Optional[List[Span]] = None - ) -> List[Tuple[int, int]]: - """ - Create sub sentences based sentences and terminations found in text. - - Parameters - ---------- - doc: - spaCy Doc object - terminations: - List of tuples with (match_id, start, end) - - Returns - ------- - boundaries: - List of tuples with (start, end) of spans - """ - - if terminations is None: - terminations = [] - - sent_starts = [sent.start for sent in doc.sents] - termination_starts = [t.start for t in terminations] - - starts = sent_starts + termination_starts + [len(doc)] - - # Remove duplicates - starts = list(set(starts)) - - # Sort starts - starts.sort() - - boundaries = [(start, end) for start, end in zip(starts[:-1], starts[1:])] - - return boundaries diff --git a/edsnlp/pipelines/misc/measurements/factory.py b/edsnlp/pipelines/misc/measurements/factory.py deleted file mode 100644 index 2e9aefc07..000000000 --- a/edsnlp/pipelines/misc/measurements/factory.py +++ /dev/null @@ -1,46 +0,0 @@ -from typing import Dict, List, Optional, Union - -from spacy.language import Language - -import edsnlp.pipelines.misc.measurements.patterns as patterns -from edsnlp.pipelines.misc.measurements.measurements import ( - MeasureConfig, - MeasurementsMatcher, - UnitConfig, -) -from edsnlp.utils.deprecation import deprecated_factory - -DEFAULT_CONFIG = dict( - attr="NORM", - ignore_excluded=True, - units_config=patterns.units_config, - number_terms=patterns.number_terms, - unit_divisors=patterns.unit_divisors, - measurements=None, - stopwords=patterns.stopwords, -) - - -@Language.factory("eds.measurements", default_config=DEFAULT_CONFIG) -@deprecated_factory("eds.measures", "eds.measurements", default_config=DEFAULT_CONFIG) -def create_component( - nlp: Language, - name: str, - measurements: Optional[Union[Dict[str, MeasureConfig], List[str]]], - units_config: Dict[str, UnitConfig], - number_terms: Dict[str, List[str]], - stopwords: List[str], - unit_divisors: List[str], - ignore_excluded: bool, - attr: str, -): - return MeasurementsMatcher( - nlp, - units_config=units_config, - number_terms=number_terms, - unit_divisors=unit_divisors, - measurements=measurements, - stopwords=stopwords, - attr=attr, - ignore_excluded=ignore_excluded, - ) diff --git a/edsnlp/pipelines/misc/measurements/measurements.py b/edsnlp/pipelines/misc/measurements/measurements.py deleted file mode 100644 index a80b88169..000000000 --- a/edsnlp/pipelines/misc/measurements/measurements.py +++ /dev/null @@ -1,680 +0,0 @@ -import abc -import re -import unicodedata -from collections import defaultdict -from functools import lru_cache -from itertools import repeat -from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union - -import regex -import spacy -from spacy.tokens import Doc, Span -from typing_extensions import TypedDict - -from edsnlp.matchers.phrase import EDSPhraseMatcher -from edsnlp.matchers.regex import RegexMatcher -from edsnlp.pipelines.misc.measurements.patterns import common_measurements -from edsnlp.utils.filter import filter_spans - -__all__ = ["MeasurementsMatcher"] - - -AFTER_SNIPPET_LIMIT = 6 -BEFORE_SNIPPET_LIMIT = 10 - - -class UnitConfig(TypedDict): - dim: str - degree: int - scale: float - terms: List[str] - followed_by: Optional[str] = None - - -class UnitlessRange(TypedDict): - min: int - max: int - unit: str - - -class UnitlessPatternConfig(TypedDict): - terms: List[str] - ranges: List[UnitlessRange] - - -class UnitlessPatternConfigWithName(TypedDict): - terms: List[str] - ranges: List[UnitlessRange] - name: str - - -class MeasureConfig(TypedDict): - unit: str - unitless_patterns: List[UnitlessPatternConfig] - - -class Measurement(abc.ABC): - @abc.abstractmethod - def __iter__(self) -> Iterable["SimpleMeasurement"]: - """ - Iter over items of the measure (only one for SimpleMeasurement) - - Returns - ------- - iterable : Iterable["SimpleMeasurement"] - """ - - @abc.abstractmethod - def __getitem__(self, item) -> "SimpleMeasurement": - """ - Access items of the measure (only one for SimpleMeasurement) - - Parameters - ---------- - item : int - - Returns - ------- - measure : SimpleMeasurement - """ - - -class UnitRegistry: - def __init__(self, config: Dict[str, UnitConfig]): - self.config = {unicodedata.normalize("NFKC", k): v for k, v in config.items()} - for unit, unit_config in list(self.config.items()): - if not unit.startswith("per_") and "per_" + unit not in unit_config: - self.config["per_" + unit] = { - "dim": unit_config["dim"], - "degree": -unit_config["degree"], - "scale": 1 / unit_config["scale"], - } - - @lru_cache(maxsize=-1) - def parse_unit(self, unit: str) -> Tuple[str, float]: - degrees = defaultdict(lambda: 0) - scale = 1 - for part in regex.split("(? 1,26 cm - The unit can also be positioned in place of the decimal dot/comma - > 1 cm 26 - Some measurements can be composite - > 1,26 cm x 2,34 mm - And sometimes they are factorized - > Les trois kystes mesurent 1, 2 et 3cm. - - The recognized measurements are stored in the "measurements" SpanGroup. - Each span has a `Measurement` object stored in the "value" extension attribute. - - Parameters - ---------- - nlp : Language - The SpaCy object. - measurements : Dict[str, MeasureConfig] - A mapping from measure names to MeasureConfig - Each measure's configuration has the following shape: - { - "unit": str, # the unit of the measure (like "kg"), - "unitless_patterns": { # optional patterns to handle unitless cases - "terms": List[str], # list of preceding terms used to trigger the - measure - # Mapping from ranges to unit to handle cases like - # ("Taille: 1.2" -> 1.20 m vs "Taille: 120" -> 120cm) - "ranges": List[{ - "min": int, - "max": int, - "unit": str, - }, { - "min": int, - "unit": str, - }, ...], - } - number_terms: Dict[str, List[str] - A mapping of numbers to their lexical variants - stopwords: List[str] - A list of stopwords that do not matter when placed between a unitless - trigger - and a number - unit_divisors: List[str] - A list of terms used to divide two units (like: m / s) - attr : str - Whether to match on the text ('TEXT') or on the normalized text ('NORM') - ignore_excluded : bool - Whether to exclude pollution patterns when matching in the text - """ - - if measurements is None: - measurements = common_measurements - elif isinstance(measurements, (list, tuple)): - measurements = {m: common_measurements[m] for m in measurements} - - self.nlp = nlp - self.name = name - self.unit_registry = UnitRegistry(units_config) - self.regex_matcher = RegexMatcher(attr=attr, ignore_excluded=True) - self.term_matcher = EDSPhraseMatcher(nlp.vocab, attr=attr, ignore_excluded=True) - self.unitless_patterns: Dict[str, UnitlessPatternConfigWithName] = {} - self.unit_part_label_hashes: Set[int] = set() - self.unitless_label_hashes: Set[int] = set() - self.unit_followers: Dict[str, str] = {} - self.measure_names: Dict[str, str] = {} - - # NUMBER PATTERNS - self.regex_matcher.add( - "number", - [ - r"(? None: - """ - Set extensions for the measurements pipeline. - """ - - if not Span.has_extension("value"): - Span.set_extension("value", default=None) - - def extract_units(self, term_matches: Iterable[Span]) -> Iterable[Span]: - """ - Extracts unit spans from the document by extracting unit atoms (declared in the - units_config parameter) and aggregating them automatically - Ex: "il faut 2 g par jour" - => we extract [g]=unit(g), [par]=divisor(per), [jour]=unit(day) - => we aggregate these adjacent matches together to compose a new unit g_per_day - - - Parameters - ---------- - term_matches: Iterable[Span] - - Returns - ------- - Iterable[Span] - """ - last = None - units = [] - current = [] - unit_label_hashes = set() - for unit_part in filter_spans(term_matches): - if unit_part.label not in self.unit_part_label_hashes: - continue - if last is not None and unit_part.start != last.end and len(current): - doc = current[0].doc - # Last non "per" match: we don't want our units to be like `g_per` - end = next( - (i for i, e in list(enumerate(current))[::-1] if e.label_ != "per"), - None, - ) - if end is not None: - unit = "_".join(part.label_ for part in current[: end + 1]) - units.append(Span(doc, current[0].start, current[end].end, unit)) - unit_label_hashes.add(units[-1].label) - current = [] - last = None - if len(current) > 0 or unit_part.label_ != "per": - current.append(unit_part) - last = unit_part - - end = next( - (i for i, e in list(enumerate(current))[::-1] if e.label_ != "per"), None - ) - if end is not None: - doc = current[0].doc - unit = "_".join(part.label_ for part in current[: end + 1]) - units.append(Span(doc, current[0].start, current[end].end, unit)) - unit_label_hashes.add(units[-1].label) - - return units - - @classmethod - def make_pseudo_sentence( - cls, - doc: Doc, - matches: List[Tuple[Span, bool]], - pseudo_mapping: Dict[int, str], - ) -> Tuple[str, List[int]]: - """ - Creates a pseudo sentence (one letter per entity) - to extract higher order patterns - Ex: the sentence - "Il font {1}{,} {2} {et} {3} {cm} de long{.}" is transformed into "wn,n,nuw." - - Parameters - ---------- - doc: Doc - matches: List[(Span, bool)] - List of tuple of span and whether the span represents a sentence end - pseudo_mapping: Dict[int, str] - A mapping from label to char in the pseudo sentence - - Returns - ------- - (str, List[int]) - - the pseudo sentence - - a list of offsets to convert match indices into pseudo sent char indices - """ - pseudo = [] - last = 0 - offsets = [] - for ent, is_sent_split in matches: - if ent.start != last: - pseudo.append("w") - offsets.append(len(pseudo)) - if is_sent_split: - pseudo.append(".") - else: - pseudo.append(pseudo_mapping.get(ent.label, "w")) - last = ent.end - if len(doc) != last: - pseudo.append("w") - pseudo = "".join(pseudo) - - return pseudo, offsets - - def get_matches(self, doc): - """ - Extract and filter regex and phrase matches in the document - to prepare the measurement extraction. - Returns the matches and a list of hashes to quickly find unit matches - - Parameters - ---------- - doc: Doc - - Returns - ------- - Tuple[List[(Span, bool)], Set[int]] - - List of tuples of spans and whether the spans represents a sentence end - - List of hash label to distinguish unit from other matches - """ - sent_ends = [doc[i : i + 1] for i in range(len(doc)) if doc[i].is_sent_end] - - regex_matches = list(self.regex_matcher(doc, as_spans=True)) - term_matches = list(self.term_matcher(doc, as_spans=True)) - - # Detect unit parts and compose them into units - units = self.extract_units(term_matches) - unit_label_hashes = {unit.label for unit in units} - - # Filter matches to prevent matches over dates or doc entities - non_unit_terms = [ - term - for term in term_matches - if term.label not in self.unit_part_label_hashes - ] - - # Filter out measurement-related spans that overlap already matched - # entities (in doc.ents or doc.spans["dates"]) - # Note: we also include sentence ends tokens as 1-token spans in those matches - spans__keep__is_sent_end = filter_spans( - [ - # Tuples (span, keep = is measurement related, is sentence end) - *zip(doc.spans.get("dates", ()), repeat(False), repeat(False)), - *zip(regex_matches, repeat(True), repeat(False)), - *zip(non_unit_terms, repeat(True), repeat(False)), - *zip(units, repeat(True), repeat(False)), - *zip(doc.ents, repeat(False), repeat(False)), - *zip(sent_ends, repeat(True), repeat(True)), - ], - # filter entities to keep only the ... - sort_key=measurements_match_tuples_sort_key, - ) - - # Remove non-measurement related spans (keep = False) and sort the matches - matches_and_is_sentence_end: List[(Span, bool)] = sorted( - [ - (span, is_sent_end) - for span, keep, is_sent_end in spans__keep__is_sent_end - # and remove entities that are not relevant to this pipeline - if keep - ] - ) - - return matches_and_is_sentence_end, unit_label_hashes - - def extract_measurements(self, doc: Doc): - """ - Extracts measure entities from the document - - Parameters - ---------- - doc: Doc - - Returns - ------- - List[Span] - """ - matches, unit_label_hashes = self.get_matches(doc) - - # Make match slice function to query them - def get_matches_after(i): - anchor = matches[i][0] - for j, (ent, is_sent_end) in enumerate(matches[i + 1 :]): - if not is_sent_end and ent.start > anchor.end + AFTER_SNIPPET_LIMIT: - return - yield j + i + 1, ent - - def get_matches_before(i): - anchor = matches[i][0] - for j, (ent, is_sent_end) in enumerate(matches[i::-1]): - if not is_sent_end and ent.end < anchor.start - BEFORE_SNIPPET_LIMIT: - return - yield i - j, ent - - # Make a pseudo sentence to query higher order patterns in the main loop - # `offsets` is a mapping from matches indices (ie match n°i) to - # char indices in the pseudo sentence - pseudo, offsets = self.make_pseudo_sentence( - doc, - matches, - { - self.nlp.vocab.strings["stopword"]: ",", - self.nlp.vocab.strings["number"]: "n", - **{name: "u" for name in unit_label_hashes}, - **{name: "n" for name in self.number_label_hashes}, - }, - ) - - measurements = [] - matched_unit_indices = set() - - # Iterate through the number matches - for number_idx, (number, is_sent_split) in enumerate(matches): - if not is_sent_split and number.label not in self.number_label_hashes: - continue - - # Detect the measure value - try: - if number.label_ == "number": - value = float( - number.text.replace(" ", "").replace(",", ".").replace(" ", "") - ) - else: - value = float(number.label_) - except ValueError: - continue - - unit_idx = unit_text = unit_norm = None - - # Find the closest unit after the number - try: - unit_idx, unit_text = next( - (j, ent) - for j, ent in get_matches_after(number_idx) - if ent.label in unit_label_hashes - ) - unit_norm = unit_text.label_ - except (AttributeError, StopIteration): - pass - - # Try to pair the number with this next unit if the two are only separated - # by numbers and separators alternatively (as in [1][,] [2] [and] [3] cm) - try: - pseudo_sent = pseudo[offsets[number_idx] + 1 : offsets[unit_idx]] - if not re.fullmatch(r"(,n)*", pseudo_sent): - unit_text, unit_norm = None, None - except TypeError: - pass - - # Otherwise, try to infer the unit from the preceding unit to handle cases - # like (1 meter 50) - if unit_norm is None and number_idx - 1 in matched_unit_indices: - try: - unit_before = matches[number_idx - 1][0] - if unit_before.end == number.start: - unit_norm = self.unit_followers[unit_before.label_] - except (KeyError, AttributeError, IndexError): - pass - - # If no unit was matched, try to detect unitless patterns before - # the number to handle cases like ("Weight: 63, Height: 170") - if not unit_norm: - try: - (unitless_idx, unitless_text) = next( - (j, e) - for j, e in get_matches_before(number_idx) - if e.label in self.unitless_label_hashes - ) - unit_norm = None - if re.fullmatch( - r"[,n]*", - pseudo[offsets[unitless_idx] + 1 : offsets[number_idx]], - ): - unitless_pattern = self.unitless_patterns[unitless_text.label_] - unit_norm = next( - scope["unit"] - for scope in unitless_pattern["ranges"] - if ("min" not in scope or value >= scope["min"]) - and ("max" not in scope or value < scope["max"]) - ) - except StopIteration: - pass - - # Otherwise, skip this number - if not unit_norm: - continue - - # Compute the final entity - if unit_text and unit_text.end == number.start: - ent = doc[unit_text.start : number.end] - elif unit_text and unit_text.start == number.end: - ent = doc[number.start : unit_text.end] - else: - ent = number - - # Compute the dimensionality of the parsed unit - try: - dims = self.unit_registry.parse_unit(unit_norm)[0] - except KeyError: - continue - - # If the measure was not requested, dismiss it - # Otherwise, relabel the entity and create the value attribute - if dims not in self.measure_names: - continue - - ent._.value = SimpleMeasurement(value, unit_norm, self.unit_registry) - ent.label_ = self.measure_names[dims] - - measurements.append(ent) - - if unit_idx is not None: - matched_unit_indices.add(unit_idx) - - return measurements - - @classmethod - def merge_adjacent_measurements(cls, measurements: List[Span]) -> List[Span]: - """ - Aggregates extracted measurements together when they are adjacent to handle - cases like - - 1 meter 50 cm - - 30° 4' 54" - - Parameters - ---------- - measurements: List[Span] - - Returns - ------- - List[Span] - """ - merged = measurements[:1] - for ent in measurements[1:]: - last = merged[-1] - - if last.end == ent.start and last._.value.unit != ent._.value.unit: - try: - new_value = last._.value + ent._.value - merged[-1] = last = last.doc[last.start : ent.end] - last._.value = new_value - last.label_ = ent.label_ - except (AttributeError, TypeError): - merged.append(ent) - else: - merged.append(ent) - - return merged - - def __call__(self, doc): - """ - Adds measurements to document's "measurements" SpanGroup. - - Parameters - ---------- - doc: - spaCy Doc object - - Returns - ------- - doc: - spaCy Doc object, annotated for extracted measurements. - """ - measurements = self.extract_measurements(doc) - measurements = self.merge_adjacent_measurements(measurements) - - doc.spans["measurements"] = measurements - - # for backward compatibility - doc.spans["measures"] = doc.spans["measurements"] - - return doc - - -def measurements_match_tuples_sort_key( - span__keep__is_sent_end: Tuple[Span, bool, bool] -) -> Tuple[int, int, bool]: - span, _, is_sent_end = span__keep__is_sent_end - - length = span.end - span.start - - return length, span.end, not is_sent_end diff --git a/edsnlp/pipelines/misc/measurements/patterns.py b/edsnlp/pipelines/misc/measurements/patterns.py deleted file mode 100644 index b2f0830a8..000000000 --- a/edsnlp/pipelines/misc/measurements/patterns.py +++ /dev/null @@ -1,577 +0,0 @@ -number_terms = { - "1": ["un", "une"], - "2": ["deux"], - "3": ["trois"], - "4": ["quatre"], - "5": ["cinq"], - "6": ["six"], - "7": ["sept"], - "8": ["huit"], - "9": ["neuf"], - "10": ["dix"], - "11": ["onze"], - "12": ["douze"], - "13": ["treize"], - "14": ["quatorze"], - "15": ["quinze"], - "16": ["seize"], - "17": ["dix-sept", "dix sept"], - "18": ["dix-huit", "dix huit"], - "19": ["dix-neuf", "dix neuf"], - "20": ["vingt", "vingts"], - "30": ["trente"], - "40": ["quarante"], - "50": ["cinquante"], - "60": ["soixante"], - "70": ["soixante dix", "soixante-dix"], - "80": ["quatre vingt", "quatre-vingt", "quatre vingts", "quatre-vingts"], - "90": ["quatre vingt dix", "quatre-vingt-dix"], - "100": ["cent"], - "500": ["cinq cent", "cinq-cent"], - "1000": ["mille", "milles"], -} - -units_config = { - # Lengths - "µm": { - "dim": "length", - "degree": 1, - "scale": 1e-4, - "terms": [ - "micrometre", - "micrometres", - "micro-metre", - "micrometres", - "µm", - "um", - ], - "followed_by": None, - }, - "mm": { - "dim": "length", - "degree": 1, - "scale": 1e-1, - "terms": ["millimetre", "millimetres", "milimetre", "milimetres", "mm"], - "followed_by": None, - }, - "cm": { - "dim": "length", - "degree": 1, - "scale": 1e0, - "terms": ["centimetre", "centimetres", "cm"], - "followed_by": None, - }, - "dm": { - "dim": "length", - "degree": 1, - "scale": 1e1, - "terms": ["decimetre", "decimetres", "dm"], - "followed_by": None, - }, - "m": { - "dim": "length", - "degree": 1, - "scale": 1e2, - "terms": ["metre", "metres", "m"], - "followed_by": "cm", - }, - # Weights - "mg": { - "dim": "mass", - "degree": 1, - "scale": 1e0, - "terms": [ - "milligramme", - "miligramme", - "milligrammes", - "miligrammes", - "mgr", - "mg", - ], - "followed_by": None, - }, - "cg": { - "dim": "mass", - "degree": 1, - "scale": 1e1, - "terms": ["centigramme", "centigrammes", "cg", "cgr"], - "followed_by": None, - }, - "dg": { - "dim": "mass", - "degree": 1, - "scale": 1e2, - "terms": ["decigramme", "decigrammes", "dgr", "dg"], - "followed_by": None, - }, - "g": { - "dim": "mass", - "degree": 1, - "scale": 1e3, - "terms": ["gramme", "grammes", "gr", "g"], - "followed_by": None, - }, - "kg": { - "dim": "mass", - "degree": 1, - "scale": 1e6, - "terms": ["kilo", "kilogramme", "kilogrammes", "kgr", "kg"], - "followed_by": "g", - }, - # Durations - "second": { - "dim": "time", - "degree": 1, - "scale": 1, - "terms": ["seconde", "secondes", "s"], - "followed_by": None, - }, - "minute": { - "dim": "time", - "degree": 1, - "scale": 60, - "terms": ["mn", "min", "minute", "minutes"], - "followed_by": "second", - }, - "hour": { - "dim": "time", - "degree": 1, - "scale": 3600, - "terms": ["heure", "h"], - "followed_by": "minute", - }, - "day": { - "dim": "time", - "degree": 1, - "scale": 3600 * 1, - "terms": ["jour", "jours", "j"], - "followed_by": None, - }, - "month": { - "dim": "time", - "degree": 1, - "scale": 3600 * 30.4167, - "terms": ["mois"], - "followed_by": None, - }, - "week": { - "dim": "time", - "degree": 1, - "scale": 3600 * 7, - "terms": ["semaine", "semaines"], - "followed_by": None, - }, - "year": { - "dim": "time", - "degree": 1, - "scale": 3600 * 365.25, - "terms": ["an", "année", "ans", "années"], - "followed_by": None, - }, - # Angle - "arc-second": { - "dim": "time", - "degree": 1, - "scale": 2 / 60.0, - "terms": ['"', "''"], - "followed_by": None, - }, - "arc-minute": { - "dim": "time", - "degree": 1, - "scale": 2, - "terms": ["'"], - "followed_by": "arc-second", - }, - "degree": { - "dim": "time", - "degree": 1, - "scale": 120, - "terms": ["degre", "°", "deg"], - "followed_by": "arc-minute", - }, - # Temperature - "celcius": { - "dim": "temperature", - "degree": 1, - "scale": 1, - "terms": ["°C", "° celcius", "celcius"], - "followed_by": None, - }, - # Volumes - "ml": { - "dim": "length", - "degree": 3, - "scale": 1e0, - "terms": ["mililitre", "millilitre", "mililitres", "millilitres", "ml"], - "followed_by": None, - }, - "cl": { - "dim": "length", - "degree": 3, - "scale": 1e1, - "terms": ["centilitre", "centilitres", "cl"], - "followed_by": None, - }, - "dl": { - "dim": "length", - "degree": 3, - "scale": 1e2, - "terms": ["decilitre", "decilitres", "dl"], - "followed_by": None, - }, - "l": { - "dim": "length", - "degree": 3, - "scale": 1e3, - "terms": ["litre", "litres", "l", "dm3"], - "followed_by": "ml", - }, - "cac": { - "dim": "length", - "degree": 3, - "scale": 5e-3, - "terms": ["cac", "c.a.c", "cuillere à café", "cuillères à café"], - "followed_by": None, - }, - "goutte": { - "dim": "length", - "degree": 3, - "scale": 5e-5, - "terms": ["gt", "goutte"], - "followed_by": None, - }, - "mm3": { - "dim": "length", - "degree": 3, - "scale": 1e-3, - "terms": ["mm3", "mm³"], - "followed_by": None, - }, - "cm3": { - "dim": "length", - "degree": 3, - "scale": 1e0, - "terms": ["cm3", "cm³", "cc"], - "followed_by": None, - }, - "dm3": { - "dim": "length", - "degree": 3, - "scale": 1e3, - "terms": ["dm3", "dm³"], - "followed_by": None, - }, - "m3": { - "dim": "length", - "degree": 3, - "scale": 1e6, - "terms": ["m3", "m³"], - "followed_by": None, - }, - # Surfaces - "µm2": { - "dim": "length", - "degree": 2, - "scale": 1e-8, - "terms": ["µm2", "µm²"], - "followed_by": None, - }, - "mm2": { - "dim": "length", - "degree": 2, - "scale": 1e-2, - "terms": ["mm2", "mm²"], - "followed_by": None, - }, - "cm2": { - "dim": "length", - "degree": 2, - "scale": 1e0, - "terms": ["cm2", "cm²"], - "followed_by": None, - }, - "dm2": { - "dim": "length", - "degree": 2, - "scale": 1e2, - "terms": ["dm2", "dm²"], - "followed_by": None, - }, - "m2": { - "dim": "length", - "degree": 2, - "scale": 1e4, - "terms": ["m2", "m²"], - "followed_by": None, - }, - # International units - "mui": { - "dim": "ui", - "degree": 1, - "scale": 1e0, - "terms": ["mui", "m ui"], - "followed_by": None, - }, - "dui": { - "dim": "ui", - "degree": 1, - "scale": 1e1, - "terms": ["dui", "d ui"], - "followed_by": None, - }, - "cui": { - "dim": "ui", - "degree": 1, - "scale": 1e2, - "terms": ["cui", "c ui"], - "followed_by": None, - }, - "ui": { - "dim": "ui", - "degree": 1, - "scale": 1e3, - "terms": ["ui"], - "followed_by": None, - }, - # Inverse - "per_µm": { - "dim": "length", - "degree": -1, - "scale": 1e4, - "terms": ["µm-1"], - "followed_by": None, - }, - "per_mm": { - "dim": "length", - "degree": -1, - "scale": 1e1, - "terms": ["mm-1"], - "followed_by": None, - }, - "per_cm": { - "dim": "length", - "degree": -1, - "scale": 1e0, - "terms": ["cm-1"], - "followed_by": None, - }, - "per_dm": { - "dim": "length", - "degree": -1, - "scale": 1e-1, - "terms": ["dm-1"], - "followed_by": None, - }, - "per_m": { - "dim": "length", - "degree": -1, - "scale": 1e-3, - "terms": ["m-1"], - "followed_by": None, - }, - "per_mg": { - "dim": "mass", - "degree": -1, - "scale": 1e-0, - "terms": ["mgr-1", "mg-1", "mgr⁻¹", "mg⁻¹"], - "followed_by": None, - }, - "per_cg": { - "dim": "mass", - "degree": -1, - "scale": 1e-1, - "terms": ["cg-1", "cgr-1", "cg⁻¹", "cgr⁻¹"], - "followed_by": None, - }, - "per_dg": { - "dim": "mass", - "degree": -1, - "scale": 1e-2, - "terms": ["dgr-1", "dg-1", "dgr⁻¹", "dg⁻¹"], - "followed_by": None, - }, - "per_g": { - "dim": "mass", - "degree": -1, - "scale": 1e-3, - "terms": ["gr-1", "g-1", "gr⁻¹", "g⁻¹"], - "followed_by": None, - }, - "per_kg": { - "dim": "mass", - "degree": -1, - "scale": 1e-6, - "terms": ["kgr-1", "kg-1", "kgr⁻¹", "kg⁻¹"], - "followed_by": None, - }, - "per_ml": { - "dim": "length", - "degree": -3, - "scale": 1e-0, - "terms": ["ml-1", "ml⁻¹"], - "followed_by": None, - }, - "per_cl": { - "dim": "length", - "degree": -3, - "scale": 1e-1, - "terms": ["cl-1", "cl⁻¹"], - "followed_by": None, - }, - "per_dl": { - "dim": "length", - "degree": -3, - "scale": 1e-2, - "terms": ["dl-1", "dl⁻¹"], - "followed_by": None, - }, - "per_l": { - "dim": "length", - "degree": -3, - "scale": 1e-3, - "terms": ["l-1", "l⁻¹"], - "followed_by": None, - }, - "per_mm3": { - "dim": "length", - "degree": -3, - "scale": 1e3, - "terms": ["mm-3", "mm⁻³"], - "followed_by": None, - }, - "per_cm3": { - "dim": "length", - "degree": -3, - "scale": 1e-0, - "terms": ["cm-3", "cm⁻³", "cc-1", "cc⁻¹"], - "followed_by": None, - }, - "per_dm3": { - "dim": "length", - "degree": -3, - "scale": 1e-3, - "terms": ["dm-3", "dm⁻³"], - "followed_by": None, - }, - "per_m3": { - "dim": "length", - "degree": -3, - "scale": 1e-6, - "terms": ["m-3", "m⁻³"], - "followed_by": None, - }, - "per_mui": { - "dim": "ui", - "degree": -1, - "scale": 1e-0, - "terms": ["mui-1", "mui⁻¹"], - "followed_by": None, - }, - "per_dui": { - "dim": "ui", - "degree": -1, - "scale": 1e-1, - "terms": ["dui-1", "dui⁻¹"], - "followed_by": None, - }, - "per_cui": { - "dim": "ui", - "degree": -1, - "scale": 1e-2, - "terms": ["cui-1", "cui⁻¹"], - "followed_by": None, - }, - "per_ui": { - "dim": "ui", - "degree": -1, - "scale": 1e-3, - "terms": ["ui-1", "ui⁻¹"], - "followed_by": None, - }, - # Surfaces - "per_µm2": { - "dim": "length", - "degree": -2, - "scale": 1e8, - "terms": ["µm-2", "µm⁻²"], - "followed_by": None, - }, - "per_mm2": { - "dim": "length", - "degree": -2, - "scale": 1e2, - "terms": ["mm-2", "mm⁻²"], - "followed_by": None, - }, - "per_cm2": { - "dim": "length", - "degree": -2, - "scale": 1e-0, - "terms": ["cm-2", "cm⁻²"], - "followed_by": None, - }, - "per_dm2": { - "dim": "length", - "degree": -2, - "scale": 1e-2, - "terms": ["dm-2", "dm⁻²"], - "followed_by": None, - }, - "per_m2": { - "dim": "length", - "degree": -2, - "scale": 1e-4, - "terms": ["m-2", "m⁻²"], - "followed_by": None, - }, -} - - -common_measurements = { - "eds.weight": { - "unit": "kg", - "unitless_patterns": [ - { - "terms": ["poids", "poid", "pese", "pesant", "pesait", "pesent"], - "ranges": [ - {"min": 0, "max": 200, "unit": "kg"}, - {"min": 200, "unit": "g"}, - ], - } - ], - }, - "eds.size": { - "unit": "m", - "unitless_patterns": [ - { - "terms": [ - "mesure", - "taille", - "mesurant", - "mesurent", - "mesurait", - "mesuree", - "hauteur", - "largeur", - "longueur", - ], - "ranges": [ - {"min": 0, "max": 3, "unit": "m"}, - {"min": 3, "unit": "cm"}, - ], - } - ], - }, - "eds.bmi": { - "unit": "kg_per_m2", - "unitless_patterns": [ - {"terms": ["imc", "bmi"], "ranges": [{"unit": "kg_per_m2"}]} - ], - }, - "eds.volume": {"unit": "m3", "unitless_patterns": []}, -} - -unit_divisors = ["/", "par"] - -stopwords = ["par", "sur", "de", "a", ":", ",", "et"] diff --git a/pyproject.toml b/edsnlp/pyproject.toml similarity index 87% rename from pyproject.toml rename to edsnlp/pyproject.toml index 5f65bfd6c..376635b5f 100644 --- a/pyproject.toml +++ b/edsnlp/pyproject.toml @@ -111,14 +111,24 @@ where = ["."] "measurements" = "edsnlp.components:measurements" "drugs" = "edsnlp.components:drugs" "nested_ner" = "edsnlp.components:nested_ner" +"span_qualifier" = "edsnlp.components:span_qualifier" "adicap" = "edsnlp.components:adicap" "umls" = "edsnlp.components:umls" +"clean-entities" = "edsnlp.pipelines.clean_entities:CleanEntities" + +[project.entry-points."spacy_readers"] +"eds.Corpus.v1" = "edsnlp.corpus_reader:Corpus" [project.entry-points."spacy_architectures"] -"eds.stack_crf_ner_model.v1" = "edsnlp.models.stack_crf_ner:create_model" +"eds.stack_crf_ner_model.v1" = "edsnlp.pipelines.trainable.nested_ner.stack_crf_ner:create_model" +"eds.span_multi_classifier.v1" = "edsnlp.pipelines.trainable.span_qualifier.span_multi_classifier:create_model" [project.entry-points."spacy_scorers"] -"eds.nested_ner_scorer.v1" = "edsnlp.pipelines.trainable.nested_ner:make_nested_ner_scorer" +"eds.nested_ner_scorer.v1" = "edsnlp.pipelines.trainable.nested_ner.nested_ner:make_nested_ner_scorer" +"eds.span_qualifier_scorer.v1" = "edsnlp.pipelines.trainable.span_qualifier.factory:create_scorer" + +[project.entry-points."spacy_misc"] +"eds.candidate_span_qualifier_getter" = "edsnlp.pipelines.trainable.span_qualifier.factory:create_candidate_getter" [project.entry-points."spacy_languages"] "eds" = "edsnlp.language:EDSLanguage" diff --git a/edsnlp/resources/verbs.csv.gz b/edsnlp/resources/verbs.csv.gz deleted file mode 100644 index b05fb4eef..000000000 Binary files a/edsnlp/resources/verbs.csv.gz and /dev/null differ diff --git a/scripts/adicap.py b/edsnlp/scripts/adicap.py similarity index 100% rename from scripts/adicap.py rename to edsnlp/scripts/adicap.py diff --git a/scripts/cim10.py b/edsnlp/scripts/cim10.py similarity index 100% rename from scripts/cim10.py rename to edsnlp/scripts/cim10.py diff --git a/scripts/conjugate_verbs.py b/edsnlp/scripts/conjugate_verbs.py similarity index 100% rename from scripts/conjugate_verbs.py rename to edsnlp/scripts/conjugate_verbs.py diff --git a/scripts/context.py b/edsnlp/scripts/context.py similarity index 100% rename from scripts/context.py rename to edsnlp/scripts/context.py diff --git a/scripts/serve.py b/edsnlp/scripts/serve.py similarity index 100% rename from scripts/serve.py rename to edsnlp/scripts/serve.py diff --git a/setup.py b/edsnlp/setup.py similarity index 100% rename from setup.py rename to edsnlp/setup.py diff --git a/tests/conftest.py b/edsnlp/tests/conftest.py similarity index 100% rename from tests/conftest.py rename to edsnlp/tests/conftest.py diff --git a/tests/connectors/test_brat.py b/edsnlp/tests/connectors/test_brat.py similarity index 100% rename from tests/connectors/test_brat.py rename to edsnlp/tests/connectors/test_brat.py diff --git a/tests/connectors/test_labeltool.py b/edsnlp/tests/connectors/test_labeltool.py similarity index 100% rename from tests/connectors/test_labeltool.py rename to edsnlp/tests/connectors/test_labeltool.py diff --git a/tests/connectors/test_omop.py b/edsnlp/tests/connectors/test_omop.py similarity index 100% rename from tests/connectors/test_omop.py rename to edsnlp/tests/connectors/test_omop.py diff --git a/tests/matchers/test_phrase.py b/edsnlp/tests/matchers/test_phrase.py similarity index 100% rename from tests/matchers/test_phrase.py rename to edsnlp/tests/matchers/test_phrase.py diff --git a/tests/matchers/test_regex.py b/edsnlp/tests/matchers/test_regex.py similarity index 100% rename from tests/matchers/test_regex.py rename to edsnlp/tests/matchers/test_regex.py diff --git a/tests/matchers/test_simstring.py b/edsnlp/tests/matchers/test_simstring.py similarity index 100% rename from tests/matchers/test_simstring.py rename to edsnlp/tests/matchers/test_simstring.py diff --git a/tests/pipelines/core/test_contextual_matcher.py b/edsnlp/tests/pipelines/core/test_contextual_matcher.py similarity index 100% rename from tests/pipelines/core/test_contextual_matcher.py rename to edsnlp/tests/pipelines/core/test_contextual_matcher.py diff --git a/tests/pipelines/core/test_endlines.py b/edsnlp/tests/pipelines/core/test_endlines.py similarity index 100% rename from tests/pipelines/core/test_endlines.py rename to edsnlp/tests/pipelines/core/test_endlines.py diff --git a/tests/pipelines/core/test_matcher.py b/edsnlp/tests/pipelines/core/test_matcher.py similarity index 100% rename from tests/pipelines/core/test_matcher.py rename to edsnlp/tests/pipelines/core/test_matcher.py index c1be81b23..5dda35bc5 100644 --- a/tests/pipelines/core/test_matcher.py +++ b/edsnlp/tests/pipelines/core/test_matcher.py @@ -1,9 +1,9 @@ import pytest from pytest import fixture +from tests.conftest import text from thinc.config import ConfigValidationError from edsnlp.pipelines.core.matcher import GenericMatcher -from tests.conftest import text @fixture diff --git a/tests/pipelines/core/test_normalisation.py b/edsnlp/tests/pipelines/core/test_normalisation.py similarity index 100% rename from tests/pipelines/core/test_normalisation.py rename to edsnlp/tests/pipelines/core/test_normalisation.py diff --git a/tests/pipelines/core/test_sentences.py b/edsnlp/tests/pipelines/core/test_sentences.py similarity index 100% rename from tests/pipelines/core/test_sentences.py rename to edsnlp/tests/pipelines/core/test_sentences.py diff --git a/tests/pipelines/core/test_terminology.py b/edsnlp/tests/pipelines/core/test_terminology.py similarity index 100% rename from tests/pipelines/core/test_terminology.py rename to edsnlp/tests/pipelines/core/test_terminology.py diff --git a/tests/pipelines/misc/test_consultation_date.py b/edsnlp/tests/pipelines/misc/test_consultation_date.py similarity index 100% rename from tests/pipelines/misc/test_consultation_date.py rename to edsnlp/tests/pipelines/misc/test_consultation_date.py diff --git a/tests/pipelines/misc/test_dates.py b/edsnlp/tests/pipelines/misc/test_dates.py similarity index 100% rename from tests/pipelines/misc/test_dates.py rename to edsnlp/tests/pipelines/misc/test_dates.py diff --git a/tests/pipelines/misc/test_measurements.py b/edsnlp/tests/pipelines/misc/test_measurements.py similarity index 100% rename from tests/pipelines/misc/test_measurements.py rename to edsnlp/tests/pipelines/misc/test_measurements.py diff --git a/tests/pipelines/misc/test_reason.py b/edsnlp/tests/pipelines/misc/test_reason.py similarity index 100% rename from tests/pipelines/misc/test_reason.py rename to edsnlp/tests/pipelines/misc/test_reason.py diff --git a/tests/pipelines/misc/test_sections.py b/edsnlp/tests/pipelines/misc/test_sections.py similarity index 100% rename from tests/pipelines/misc/test_sections.py rename to edsnlp/tests/pipelines/misc/test_sections.py diff --git a/tests/pipelines/ner/test_adicap.py b/edsnlp/tests/pipelines/ner/test_adicap.py similarity index 100% rename from tests/pipelines/ner/test_adicap.py rename to edsnlp/tests/pipelines/ner/test_adicap.py diff --git a/tests/pipelines/ner/test_adicap_decoder.py b/edsnlp/tests/pipelines/ner/test_adicap_decoder.py similarity index 100% rename from tests/pipelines/ner/test_adicap_decoder.py rename to edsnlp/tests/pipelines/ner/test_adicap_decoder.py diff --git a/tests/pipelines/ner/test_cim10.py b/edsnlp/tests/pipelines/ner/test_cim10.py similarity index 100% rename from tests/pipelines/ner/test_cim10.py rename to edsnlp/tests/pipelines/ner/test_cim10.py diff --git a/tests/pipelines/ner/test_covid.py b/edsnlp/tests/pipelines/ner/test_covid.py similarity index 100% rename from tests/pipelines/ner/test_covid.py rename to edsnlp/tests/pipelines/ner/test_covid.py diff --git a/tests/pipelines/ner/test_drugs.py b/edsnlp/tests/pipelines/ner/test_drugs.py similarity index 100% rename from tests/pipelines/ner/test_drugs.py rename to edsnlp/tests/pipelines/ner/test_drugs.py diff --git a/tests/pipelines/ner/test_score.py b/edsnlp/tests/pipelines/ner/test_score.py similarity index 100% rename from tests/pipelines/ner/test_score.py rename to edsnlp/tests/pipelines/ner/test_score.py diff --git a/tests/pipelines/ner/test_tnm.py b/edsnlp/tests/pipelines/ner/test_tnm.py similarity index 100% rename from tests/pipelines/ner/test_tnm.py rename to edsnlp/tests/pipelines/ner/test_tnm.py diff --git a/tests/pipelines/ner/test_umls.py b/edsnlp/tests/pipelines/ner/test_umls.py similarity index 100% rename from tests/pipelines/ner/test_umls.py rename to edsnlp/tests/pipelines/ner/test_umls.py diff --git a/tests/pipelines/qualifiers/conftest.py b/edsnlp/tests/pipelines/qualifiers/conftest.py similarity index 100% rename from tests/pipelines/qualifiers/conftest.py rename to edsnlp/tests/pipelines/qualifiers/conftest.py diff --git a/tests/pipelines/qualifiers/test_family.py b/edsnlp/tests/pipelines/qualifiers/test_family.py similarity index 100% rename from tests/pipelines/qualifiers/test_family.py rename to edsnlp/tests/pipelines/qualifiers/test_family.py diff --git a/tests/pipelines/qualifiers/test_history.py b/edsnlp/tests/pipelines/qualifiers/test_history.py similarity index 100% rename from tests/pipelines/qualifiers/test_history.py rename to edsnlp/tests/pipelines/qualifiers/test_history.py diff --git a/tests/pipelines/qualifiers/test_hypothesis.py b/edsnlp/tests/pipelines/qualifiers/test_hypothesis.py similarity index 100% rename from tests/pipelines/qualifiers/test_hypothesis.py rename to edsnlp/tests/pipelines/qualifiers/test_hypothesis.py diff --git a/tests/pipelines/qualifiers/test_negation.py b/edsnlp/tests/pipelines/qualifiers/test_negation.py similarity index 100% rename from tests/pipelines/qualifiers/test_negation.py rename to edsnlp/tests/pipelines/qualifiers/test_negation.py diff --git a/tests/pipelines/qualifiers/test_reported_speech.py b/edsnlp/tests/pipelines/qualifiers/test_reported_speech.py similarity index 100% rename from tests/pipelines/qualifiers/test_reported_speech.py rename to edsnlp/tests/pipelines/qualifiers/test_reported_speech.py diff --git a/tests/pipelines/test_pipelines.py b/edsnlp/tests/pipelines/test_pipelines.py similarity index 100% rename from tests/pipelines/test_pipelines.py rename to edsnlp/tests/pipelines/test_pipelines.py diff --git a/tests/pipelines/trainable/test_nested_ner.py b/edsnlp/tests/pipelines/trainable/test_nested_ner.py similarity index 100% rename from tests/pipelines/trainable/test_nested_ner.py rename to edsnlp/tests/pipelines/trainable/test_nested_ner.py diff --git a/tests/processing/test_processing.py b/edsnlp/tests/processing/test_processing.py similarity index 100% rename from tests/processing/test_processing.py rename to edsnlp/tests/processing/test_processing.py diff --git a/tests/readme.md b/edsnlp/tests/readme.md similarity index 100% rename from tests/readme.md rename to edsnlp/tests/readme.md diff --git a/tests/test_conjugator.py b/edsnlp/tests/test_conjugator.py similarity index 100% rename from tests/test_conjugator.py rename to edsnlp/tests/test_conjugator.py diff --git a/tests/test_docs.py b/edsnlp/tests/test_docs.py similarity index 100% rename from tests/test_docs.py rename to edsnlp/tests/test_docs.py diff --git a/tests/test_language.py b/edsnlp/tests/test_language.py similarity index 100% rename from tests/test_language.py rename to edsnlp/tests/test_language.py diff --git a/tests/utils/test_examples.py b/edsnlp/tests/utils/test_examples.py similarity index 100% rename from tests/utils/test_examples.py rename to edsnlp/tests/utils/test_examples.py diff --git a/tests/utils/test_filter.py b/edsnlp/tests/utils/test_filter.py similarity index 100% rename from tests/utils/test_filter.py rename to edsnlp/tests/utils/test_filter.py diff --git a/tests/utils/test_quick_examples.py b/edsnlp/tests/utils/test_quick_examples.py similarity index 100% rename from tests/utils/test_quick_examples.py rename to edsnlp/tests/utils/test_quick_examples.py diff --git a/notebooks/example.txt b/notebooks/example.txt deleted file mode 100644 index cbd694c9b..000000000 --- a/notebooks/example.txt +++ /dev/null @@ -1,11 +0,0 @@ -Motif : -Le patient est admis le 29 août pour des difficultés respiratoires. - -Antécédents familiaux : -Le père est asthmatique, sans traitement particulier. - -HISTOIRE DE LA MALADIE -Le patient dit avoir de la toux depuis trois jours. Elle a empiré jusqu'à nécessiter un passage aux urgences. - -Conclusion -Possible infection au coronavirus diff --git a/notebooks/export_pandas_to_brat.py b/notebooks/export_pandas_to_brat.py new file mode 100644 index 000000000..bd370325e --- /dev/null +++ b/notebooks/export_pandas_to_brat.py @@ -0,0 +1,90 @@ +import re + +import pandas as pd + + +def export_pandas_to_brat( + ann_path, + txt_path, + df_to_convert, + label_column_name, + span_column_name, + term_column_name, + annotation_column_name=None, +): + """ + - ann_path: str path where to write the ann file. + - txt_path: str path where is stored the txt linked to ann file. Useful to check if there are newlines. + - df_to_convert: Pandas df containing at least a column of labels, a column of spans and a column of terms. + - label_column_name: str name of the column in df_to_convert containing the labels. This column should be filled with str only. + - span_column_name: str name of the column in df_to_convert containing the spans. This column should be filled with lists only, + first element of each list being the beginning of the span and second element being the end. + - term_column_name: str name of the column in df_to_convert containing the raw str from the raw text. This column should be filled with str only. + - annotation_column_name: OPTIONAL str name of the column in df_to_convert containing the annotations. This column should be filled with str only. + If None, no annotation will be saved. + """ + + SEP = "\t" + ANNOTATION_LABEL = "AnnotatorNotes" + brat_raw = "" + n_annotation = 0 + + with open(txt_path, "r") as f: + txt_raw = f.read() + + if annotation_column_name: + df_to_convert = df_to_convert[ + [ + label_column_name, + span_column_name, + term_column_name, + annotation_column_name, + ] + ] + else: + # Create an empty annotation column so that we can iter + # In a generic pandas dataframe + df_to_convert = df_to_convert[ + [label_column_name, span_column_name, term_column_name] + ] + df_to_convert[annotation_column_name] = "" + + # Iter through df to write each line of ann file + for index, (label, span, term, annotation) in df_to_convert.iterrows(): + term_raw = txt_raw[span[0] : span[1]] + if "\n" in term_raw: + span_str = ( + str(span[0]) + + "".join( + " " + + str(span[0] + newline_index.start()) + + ";" + + str(span[0] + newline_index.start() + 1) + for newline_index in re.finditer("\n", term_raw) + ) + + " " + + str(span[1]) + ) + else: + span_str = str(span[0]) + " " + str(span[1]) + brat_raw += ( + "T" + str(index + 1) + SEP + label + " " + span_str + SEP + term + "\n" + ) + if len(annotation): + n_annotation += 1 + brat_raw += ( + "#" + + str(n_annotation) + + SEP + + ANNOTATION_LABEL + + " " + + "T" + + str(index + 1) + + SEP + + annotation + + "\n" + ) + + brat_raw = brat_raw[:-2] + with open(ann_path, "w") as f: + print(brat_raw, file=f) diff --git a/notebooks/get_stats_by_section_on_cim10.md b/notebooks/get_stats_by_section_on_cim10.md new file mode 100644 index 000000000..21e26beea --- /dev/null +++ b/notebooks/get_stats_by_section_on_cim10.md @@ -0,0 +1,1828 @@ +--- +jupyter: + jupytext: + formats: ipynb,md + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.15.0 + kernelspec: + display_name: BioMedics_client + language: python + name: biomedics_client +--- + +## TODO +- [x] REMGARDER LE SEUIL pour la positivité +- [x] regarder les patients en communs +- [x] regarder Hémoglobine et DFG +- [x] Finish fine tuning of CODER-EDS. Just execute `/export/home/cse200093/Jacques_Bio/normalisation/py_files/train_coder.sh` file up to 1M iterations (To know the number of iteration, just take a look at where the weigths of CODER-EDS are saved, i.e at `/export/home/cse200093/Jacques_Bio/data_bio/coder_output`. The files are saved with the number of iterations in their names.). Evaluate this model then with the files in `/export/home/cse200093/Jacques_Bio/normalisation/notebooks/coder` for example. +- [X] Requêter les médicaments en structuré ! +- [X] Finir la normalisation des médicaments NER +- [ ] Cleaner le code et mettre sur GitHub +- [ ] Récupérer les figures +- [ ] Commencer à rédiger + + +```python +%reload_ext autoreload +%autoreload 2 +%reload_ext jupyter_black +sc.cancelAllJobs() +``` + +```python +import os + +os.environ["OMP_NUM_THREADS"] = "16" +``` + +```python +from edsteva import improve_performances + +spark, sc, sql = improve_performances( + to_add_conf=[ + ("spark.yarn.max.executor.failures", "10"), + ("spark.executor.memory", "32g"), + ("spark.driver.memory", "32g"), + ("spark.driver.maxResultSize", "16g"), + ("spark.default.parallelism", 160), + ("spark.shuffle.service.enabled", "true"), + ("spark.sql.shuffle.partitions", 160), + ("spark.yarn.am.memory", "4g"), + ("spark.yarn.max.executor.failures", 10), + ("spark.dynamicAllocation.enabled", True), + ("spark.dynamicAllocation.minExecutors", "20"), + ("spark.dynamicAllocation.maxExecutors", "20"), + ("spark.executor.cores", "8"), + ] +) +``` + +```python +import pandas as pd +from os.path import isfile, isdir, join, basename +from os import listdir, mkdir +import spacy +from edsnlp.processing import pipe +import matplotlib.pyplot as plt +import numpy as np +from matplotlib_venn import venn3, venn2 +import altair as alt +from functools import reduce +from knowledge import TO_BE_MATCHED + +import sys + +BRAT_DIR = "/export/home/cse200093/scratch/BioMedics/data/CRH" +RES_DIR = "/export/home/cse200093/scratch/BioMedics/data/bio_results" +RES_DRUG_DIR = "/export/home/cse200093/scratch/BioMedics/data/drug_results" +``` + + +# Only execute the following cells if you want to recreate the inference dataset (i.e dataset based on CIM10). PLEASE USE THE ENV `[2.4.3] K8s Py3 client` FOR THIS DATASET CREATION PART ! + + +### Functions + +```python +### CELLS TO CREATE THE DATASET CONTAINING ALL TXT FILES WE WANT TO STUDY: +### ALL PATIENTS WITH ONE LINE AT LEAST IN: +# - i2b2_observation_cim10 with correct CIM10 according to `TO_BE_MATCHED` +# - i2b2_observation_doc +# - i2b2_observation_lab (OPTIONAL) + +# SHOW DATASETS +sql("USE cse_200093_20210402") +sql("SHOW tables").show(10, False) + + +# Save txt function +def save_to_txt(path, txt): + with open(path, "w") as f: + print(txt, file=f) + + +def get_docs_df(cim10_list, min_len=1000): + ### If we filter on `i2b2_observation_lab` + # docs = sql("""SELECT doc.instance_num, doc.observation_blob, cim10.concept_cd FROM i2b2_observation_doc AS doc + # JOIN i2b2_observation_cim10 AS cim10 ON doc.encounter_num = cim10.encounter_num + # WHERE ((doc.concept_cd == 'CR:CRH-HOSPI' OR doc.concept_cd == 'CR:CRH-S') + # AND EXISTS (SELECT lab.encounter_num FROM i2b2_observation_lab AS lab + # WHERE lab.encounter_num = doc.encounter_num))""") + + ### If we don't filter on `i2b2_observation_lab` + docs = sql( + """SELECT doc.instance_num, doc.observation_blob, doc.encounter_num, doc.patient_num, visit.age_visit_in_years_num, visit.start_date, cim10.concept_cd FROM i2b2_observation_doc AS doc + JOIN i2b2_observation_cim10 AS cim10 ON doc.encounter_num = cim10.encounter_num JOIN i2b2_visit AS visit ON doc.encounter_num = visit.encounter_num + WHERE (doc.concept_cd == 'CR:CRH-HOSPI' OR doc.concept_cd == 'CR:CRH-S') + """ + ) + ### Filter on cim10_list and export to Pandas + docs_df = docs.filter(docs.concept_cd.isin(cim10_list)).toPandas().dropna() + ### Keep documents with some information at least + docs_df = docs_df.loc[docs_df["observation_blob"].apply(len) > min_len].reset_index( + drop=True + ) + docs_df = ( + docs_df.groupby("observation_blob") + .agg( + { + "instance_num": set, + "encounter_num": "first", + "patient_num": "first", + "age_visit_in_years_num": "first", + "start_date": "first", + "observation_blob": "first", + } + ) + .reset_index(drop=True) + ) + docs_df["instance_num"] = docs_df["instance_num"].apply( + lambda instance_num: "_".join(list(instance_num)) + ) + return docs_df + + +def get_bio_df(summary_docs): + bio = sql( + """SELECT bio.instance_num AS bio_id, bio.concept_cd, bio.units_cd, bio.nval_num, bio.tval_char, bio.quantity_num, bio.confidence_num, bio.encounter_num, bio.patient_num, bio.start_date, concept.name_char + FROM i2b2_observation_lab AS bio JOIN i2b2_concept AS concept ON bio.concept_cd = concept.concept_cd""" + ) + bio_dfs = {} + for disease in summary_docs.disease.unique(): + unique_visit = summary_docs[summary_docs.disease == disease][ + ["encounter_num"] + ].drop_duplicates() + unique_visit = spark.createDataFrame(unique_visit).hint("broadcast") + bio_df = bio.join(unique_visit, on="encounter_num").toPandas() + bio_df["disease"] = disease + bio_df["terms_linked_to_measurement"] = bio_df["name_char"].apply( + _get_term_from_c_name + ) + bio_df.loc[bio_df["units_cd"].isna(), "units_cd"] = "nounit" + bio_df = bio_df[~((bio_df.nval_num.isna()) & (bio_df.tval_char.isna()))] + display(bio_df) + bio_dfs[disease] = bio_df + + return bio_dfs + + +def get_med_df(summary_docs): + med = sql( + """SELECT med.instance_num AS med_id, med.concept_cd, med.valueflag_cd, med.encounter_num, med.patient_num, med.start_date, concept.name_char + FROM i2b2_observation_med AS med JOIN i2b2_concept AS concept ON med.concept_cd = concept.concept_cd""" + ) + med_dfs = {} + for disease in summary_docs.disease.unique(): + unique_visit = summary_docs[summary_docs.disease == disease][ + ["encounter_num"] + ].drop_duplicates() + unique_visit = spark.createDataFrame(unique_visit).hint("broadcast") + med_df = med.join(unique_visit, on="encounter_num").toPandas() + med_df["disease"] = disease + display(med_df) + med_dfs[disease] = med_df + + return med_dfs + + +def _get_term_from_c_name(c_name): + return c_name[c_name.index(":") + 1 :].split("_")[0].strip() +``` + +### Get Docs and Bio and Med + +```python +# Get docs and save It for each disease +docs_all_diseases = [] +for disease, disease_data in TO_BE_MATCHED.items(): + path_to_brat = join(BRAT_DIR, "raw", disease) + if not os.path.exists(path_to_brat): + mkdir(path_to_brat) + docs_df = get_docs_df(["CIM10:" + cim10 for cim10 in disease_data["CIM10"]]) + docs_df.apply(lambda row: save_to_txt(join(path_to_brat, row["instance_num"] + ".txt"), row["observation_blob"]), axis=1) + for file in os.listdir(path_to_brat): + if file.endswith(".txt"): + ann_file = os.path.join(path_to_brat, file[:-3] + "ann") + open(ann_file, mode='a').close() + print(disease + " processed and saved") + docs_df["disease"] = disease + docs_all_diseases.append(docs_df) +summary_df_docs = pd.concat(docs_all_diseases) +bio_from_structured_data = get_bio_df(summary_df_docs) +bio_from_structured_data = pd.concat(list(bio_from_structured_data.values())) +med_from_structured_data = get_med_df(summary_df_docs) +med_from_structured_data = pd.concat(list(med_from_structured_data.values())) +display(summary_df_docs) +display(bio_from_structured_data) +display(med_from_structured_data) +bio_from_structured_data.to_pickle(join(RES_DIR, "bio_from_structured_data.pkl")) +med_from_structured_data.to_pickle(join(RES_DRUG_DIR, "med_from_structured_data.pkl")) +summary_df_docs.to_pickle(join(BRAT_DIR, "summary_df_docs.pkl")) +``` + +```python +bio_from_structured_data["found"] = bio_from_structured_data["nval_num"].mask( + bio_from_structured_data["nval_num"].isna(), bio_from_structured_data["tval_char"] +) +bio_from_structured_data["gold"] = ( + bio_from_structured_data["found"].astype(str) + " " + bio_from_structured_data["units_cd"] +) +bio_from_structured_data = bio_from_structured_data.groupby( + ["disease", "encounter_num", "patient_num", "terms_linked_to_measurement"], + as_index=False, +).agg({"name_char": list, "gold": list}) +bio_from_structured_data.to_json(join(RES_DIR, "bio_from_structured_data.json")) +``` + + +# Summary description of the data + + +```python +import altair as alt + +summary_df_docs = pd.read_pickle(join(BRAT_DIR, "summary_df_docs.pkl")) +bio_from_structured_data = pd.read_pickle(join(RES_DIR, "bio_from_structured_data.pkl")) +med_from_structured_data = pd.read_pickle( + join(RES_DRUG_DIR, "med_from_structured_data.pkl") +) +``` + +## Number of docs/visit/patients + +```python +summary_df_docs.groupby("disease").agg( + {"instance_num": "nunique", "encounter_num": "nunique", "patient_num": "nunique"} +) +``` + +## Number of Bio/visit/patient + +```python +bio_from_structured_data.groupby("disease").agg( + {"bio_id": "nunique", "encounter_num": "nunique", "patient_num": "nunique"} +) +``` + +## Number of Med/visit/patient + +```python +med_from_structured_data.groupby("disease").agg( + {"med_id": "nunique", "encounter_num": "nunique", "patient_num": "nunique"} +) +``` + +## Age histogram + +```python +summary_df_docs["round_age"] = (summary_df_docs["age_visit_in_years_num"] * 2).round( + -1 +) / 2 +age_summary = summary_df_docs.groupby( + ["disease", "age_visit_in_years_num"], as_index=False +).agg({"patient_num": "nunique"}) +round_age_summary = summary_df_docs.groupby( + ["disease", "round_age"], as_index=False +).agg({"patient_num": "nunique"}) +total_patient = ( + summary_df_docs.groupby("disease", as_index=False) + .agg({"patient_num": "nunique"}) + .rename(columns={"patient_num": "total_patient"}) +) +age_summary = age_summary.merge(total_patient, on="disease") +age_summary["density"] = age_summary["patient_num"] / age_summary["total_patient"] +display(age_summary) +``` + +```python +alt.data_transformers.disable_max_rows() + +alt.Chart(round_age_summary).mark_bar(size=12, align="left").encode( + alt.X("round_age:Q").title("Age at stay"), + alt.Y("patient_num:Q").title("Number of patients"), + alt.Row("disease:N"), +).resolve_scale(y="independent").properties(height=200) +``` + +```python +alt.data_transformers.disable_max_rows() + +alt.Chart(round_age_summary).mark_area(interpolate="step-after").encode( + alt.X("round_age:Q").title("Age at stay"), + alt.Y("patient_num:Q").title("Number of patients"), + alt.Row("disease:N"), +).resolve_scale(y="independent").properties(height=200) +``` + +```python +alt.data_transformers.disable_max_rows() + +alt.Chart(age_summary).mark_area().encode( + alt.X("age_visit_in_years_num:Q").title("Age at stay"), + alt.Y("patient_num:Q").title("Number of patients"), + alt.Row("disease:N"), +).resolve_scale(y="independent").properties(height=200) +``` + +```python +alt.data_transformers.disable_max_rows() + +alt.Chart(age_summary).mark_area(interpolate="basis").encode( + alt.X("age_visit_in_years_num:Q").title("Age at stay"), + alt.Y("density:Q").title("Density"), + alt.Row("disease:N"), +).properties(height=200) +``` + +```python +alt.data_transformers.disable_max_rows() + +alt.Chart(age_summary).mark_bar().encode( + alt.X("age_visit_in_years_num:Q"), + alt.Y("density:Q"), + alt.Row("disease:N"), + color="disease:N", +).properties(height=200) +``` + +```python +alt.data_transformers.disable_max_rows() + +alt.Chart(age_summary).mark_area(opacity=0.4).encode( + alt.X("age_visit_in_years_num:Q"), alt.Y("density:Q").stack(None), color="disease:N" +).properties(height=200) +``` + +```python +alt.data_transformers.disable_max_rows() + +alt.Chart(age_summary).mark_area().encode( + alt.X("age_visit_in_years_num:Q"), + alt.Y("density:Q").stack(True), + color="disease:N", +).properties(height=200) +``` + +## Stay start histogramm + +```python +summary_df_docs["month_date"] = ( + summary_df_docs["start_date"].dt.strftime("%Y-%m").astype("datetime64[ns]") +) +month_date_summary = summary_df_docs.groupby( + ["disease", "month_date"], as_index=False +).agg({"encounter_num": "nunique"}) +total_visit = ( + summary_df_docs.groupby("disease", as_index=False) + .agg({"encounter_num": "nunique"}) + .rename(columns={"encounter_num": "total_visit"}) +) +month_date_summary = month_date_summary.merge(total_visit, on="disease") +month_date_summary["density"] = ( + month_date_summary["encounter_num"] / month_date_summary["total_visit"] +) +display(month_date_summary) +``` + +```python +alt.data_transformers.disable_max_rows() +alt.Chart(month_date_summary).mark_bar(align="left").encode( + alt.X("yearquarter(month_date):T") + .title("Time (Year)") + .axis(tickCount="year", labelAngle=0, grid=True, format="%Y"), + alt.Y("sum(encounter_num):Q").title("Number of stays"), + alt.Row("disease:N"), +).resolve_scale(y="independent").properties(height=200, width=600) +``` + +```python +alt.data_transformers.disable_max_rows() + +alt.Chart(month_date_summary).mark_area(interpolate="basis").encode( + alt.X("month_date:T").title("Time (Year)"), + alt.Y("density:Q").title("Density"), + alt.Row("disease:N"), +).properties(height=200, width=600) +``` + +```python +alt.data_transformers.disable_max_rows() + +alt.Chart(month_date_summary).mark_bar().encode( + alt.X("month_date:T").title("Time (Year)"), + alt.Y("density:Q").title("Density"), + alt.Row("disease:N"), + color="disease:N", +).properties(height=200) +``` + +```python +alt.data_transformers.disable_max_rows() + +alt.Chart(month_date_summary).mark_area(opacity=0.4).encode( + alt.X("month_date:T"), alt.Y("density:Q").stack(None), color="disease:N" +).properties(height=200, width=600) +``` + +```python +alt.data_transformers.disable_max_rows() + +alt.Chart(month_date_summary).mark_area().encode( + alt.X("month_date:T"), + alt.Y("density:Q").stack(True), + color="disease:N", +).properties(height=200) +``` + +# Please infer super_pipe on `BRAT_DIR` subfolders. Use the sbatch file in `/export/home/cse200093/Jacques_Bio/super_pipe/py_files/sbatch/main.sh`. res path should be in `RES_DIR`. NOW, PLEASE USE `jacques-spark` FOR THE NEXT CELLS. + + +## MED STRUCTURED + +```python +med_from_structured_data = pd.read_pickle( + join(RES_DRUG_DIR, "med_from_structured_data.pkl") +) +codes_to_keep = {"disease": [], "valueflag_cd": [], "med": []} +for disease, disease_data in TO_BE_MATCHED.items(): + for label, code_list in disease_data["ATC_codes"].items(): + for code in code_list: + codes_to_keep["disease"].append(disease) + codes_to_keep["valueflag_cd"].append(code) + codes_to_keep["med"].append(label) +filtered_med = med_from_structured_data.merge( + pd.DataFrame(codes_to_keep), on=["disease", "valueflag_cd"] +) +for disease in TO_BE_MATCHED.keys(): + path_to_res = join(RES_DRUG_DIR, disease) + if not os.path.exists(path_to_res): + mkdir(path_to_res) + filtered_med[filtered_med.disease == disease].to_pickle( + join(path_to_res, "filtered_med_from_structured_data.pkl") + ) +display(filtered_med) +filtered_med.to_pickle(join(RES_DRUG_DIR, "filtered_med_from_structured_data.pkl")) +``` + +## BIO STRUCTURED + +```python +bio_from_structured_data = pd.read_pickle(join(RES_DIR, "bio_from_structured_data.pkl")) +codes_to_keep = {"disease": [], "concept_cd": [], "bio": []} +for disease, disease_data in TO_BE_MATCHED.items(): + for label, code_list in disease_data["ANABIO_codes"].items(): + for code in code_list: + codes_to_keep["disease"].append(disease) + codes_to_keep["concept_cd"].append(f"LAB:{code}") + codes_to_keep["bio"].append(label) +filtered_bio = bio_from_structured_data.merge( + pd.DataFrame(codes_to_keep), on=["disease", "concept_cd"] +) +for disease in TO_BE_MATCHED.keys(): + path_to_res = join(RES_DIR, disease) + if not os.path.exists(path_to_res): + mkdir(path_to_res) + filtered_bio[filtered_bio.disease == disease].to_pickle( + join(path_to_res, "filtered_bio_from_structured_data.pkl") + ) +display(filtered_bio) +filtered_bio.to_pickle(join(RES_DIR, "filtered_bio_from_structured_data.pkl")) +``` + +```python +bio_from_structured_data = pd.read_json( + join(RES_DIR, "bio_from_structured_data.json"), + dtype={"encounter_num": str, "patient_num": str}, +).explode("label") +cuis_to_keep = {"disease": [], "label": [], "bio": []} +for disease, disease_data in TO_BE_MATCHED.items(): + for cui_dic in disease_data["CUI_per_section"].values(): + for cui_label, cui_list in cui_dic.items(): + for cui in cui_list: + print(cui) + cuis_to_keep["disease"].append(disease) + cuis_to_keep["label"].append(cui) + cuis_to_keep["bio"].append(cui_label) +filtered_bio_from_structured = bio_from_structured_data.merge( + pd.DataFrame(cuis_to_keep), on=["disease", "label"] +) +for disease in TO_BE_MATCHED.keys(): + path_to_res = join(RES_DIR, disease) + if not os.path.exists(path_to_res): + mkdir(path_to_res) + filtered_bio_from_structured[filtered_bio_from_structured.disease == disease].to_pickle( + join(path_to_res, "filtered_bio_from_structured_data.pkl") + ) +display(filtered_bio_from_structured) +filtered_bio_from_structured.to_pickle(join(RES_DIR, "filtered_bio_from_structured_data.pkl")) +``` + +## MED NLP + +```python +from tqdm import tqdm + + +# Check if we have to keep a match or not based on section and CUI +def keep_match(atc, atcs, atcs_to_keep): + if atc not in atcs_to_keep: + return None + for drug, atc_list in atcs.items(): + if atc in atc_list: + return drug + return None + + +# List of df by disease for concatenation +res_part_filtered_list = [] +res_part_df_list = [] +for disease, disease_data in TO_BE_MATCHED.items(): + ### Load each res dataset to concat them in one unique df + res_part_df = pd.read_pickle(join(RES_DRUG_DIR, disease, "norm_lev_match.pkl")) + res_part_df["disease"] = disease + res_part_df["source"] = res_part_df["source"] + ".ann" + + ### Filter ATC to keep + codes_to_keep = {"label": [], "med": []} + for label, code_list in disease_data["ATC_codes"].items(): + for code in code_list: + codes_to_keep["label"].append(code) + codes_to_keep["med"].append(label) + res_part_filtered = ( + res_part_df.explode("label") + .merge(pd.DataFrame(codes_to_keep), on="label") + .drop_duplicates( + subset=["term", "source", "span_converted", "norm_term", "disease"] + ) + ) + + ### Save for future concatenation + res_part_filtered.to_pickle(join(RES_DRUG_DIR, disease, "res_final_filtered.pkl")) + res_part_filtered_list.append(res_part_filtered) +res_filtered_df = pd.concat(res_part_filtered_list) +res_filtered_df.to_pickle(join(RES_DRUG_DIR, "res_final_filtered.pkl")) +display(res_filtered_df) +``` + +## BIO NLP WITHOUT SECTION + +```python +from tqdm import tqdm + + +# Check if we have to keep a match or not based on section and CUI +def keep_match(cui, cui_per_section, cuis_to_keep): + if cui not in cuis_to_keep: + return None + for bio, cui_list in cui_per_section["all"].items(): + if cui in cui_list: + return bio + return None + + +# List of df by disease for concatenation +res_part_filtered_list = [] +res_part_df_list = [] +for disease, disease_data in TO_BE_MATCHED.items(): + ### Load each res dataset to concat them in one unique df + res_part_df = pd.read_json(join(RES_DIR, disease, "norm_coder_all.json")) + res_part_df["disease"] = disease + + ### Filter CUIS to keep + cuis_to_keep = [ + cui + for cui_dic in disease_data["CUI_per_section"].values() + for cui_list in cui_dic.values() + for cui in cui_list + ] + res_part_filtered = [] + for source in tqdm(res_part_df["source"].unique()): + for _, row in res_part_df.loc[res_part_df["source"] == source].iterrows(): + for cui in row["label"]: + to_keep = keep_match( + cui, + disease_data["CUI_per_section"], + cuis_to_keep, + ) + if to_keep: + row["bio"] = to_keep + res_part_filtered.append(row) + + ### Save for future concatenation + res_part_df.to_pickle(join(RES_DIR, disease, "res_final.pkl")) + res_part_df_list.append(res_part_df) + pd.DataFrame(res_part_filtered).to_pickle( + join(RES_DIR, disease, "res_final_filtered.pkl") + ) + res_part_filtered_list += res_part_filtered +res_df = pd.concat(res_part_df_list) +res_filtered_df = pd.DataFrame(res_part_filtered_list) +res_df.to_pickle(join(RES_DIR, "res_final.pkl")) +res_filtered_df.to_pickle(join(RES_DIR, "res_final_filtered.pkl")) +display(res_df) +display(res_filtered_df) +``` + +## BIO NLP WITH SECTION + +```python +from tqdm import tqdm + +rule_based_section = False + +if rule_based_section: + # Load nlp pipe to detect sections + nlp_sections = spacy.blank("eds") + nlp_sections.add_pipe("eds.normalizer") + nlp_sections.add_pipe("eds.sections") + + +# Check if two spans are overlapping for section detection +def is_overlapping(a, b): + # Return true if a segment is overlapping b + # else False + return min(a[1], b[1]) > max(a[0], b[0]) + + +# Check if we have to keep a match or not based on section and CUI +def keep_match(cui, span, txt_section_part_df, cui_per_section, cuis_to_keep): + if cui not in cuis_to_keep: + return None + for section in cui_per_section.keys(): + if section == "all": + for bio, cui_list in cui_per_section["all"].items(): + if cui in cui_list: + return bio + elif section not in txt_section_part_df["label"].tolist(): + continue + else: + section_spans = ( + txt_section_part_df.loc[txt_section_part_df["label"] == section] + .apply(lambda row: [row["start"], row["end"]], axis=1) + .tolist() + ) + for section_span in section_spans: + if is_overlapping(span, section_span): + for bio, cui_list in cui_per_section[section].items(): + if cui in cui_list: + return bio + else: + continue + return None + + +# List of df by disease for concatenation +res_part_filtered_list = [] +txt_sections_part_df_list = [] +res_part_df_list = [] +for disease, disease_data in TO_BE_MATCHED.items(): + ### Load each res dataset to concat them in one unique df + res_part_df = pd.read_json(join(RES_DIR, disease, "norm_coder_all.json")) + res_part_df["disease"] = disease + + if rule_based_section: + ### Load txt files, detect sections and store it in df + # Load txt files in DataFrame + txt_files_part = [ + f + for f in listdir(join(BRAT_DIR, "raw", disease)) + if isfile(join(BRAT_DIR, "raw", disease, f)) + if f.endswith(".txt") + ] + txt_list_part = [] + for txt_file in txt_files_part: + with open(join(BRAT_DIR, "raw", disease, txt_file), "r") as file: + text = file.read() + txt_list_part.append([text, txt_file[:-3] + "ann"]) + txt_sections_part_df = pd.DataFrame( + txt_list_part, columns=["note_text", "note_id"] + ) + + # Infer nlp pipe to detect sections + txt_sections_part_df = pipe( + note=txt_sections_part_df, + nlp=nlp_sections, + n_jobs=-2, + additional_spans=["sections"], + ).drop(columns=["span_type", "lexical_variant"]) + else: + ### Load txt files, detect sections and store it in df + # Load txt files in DataFrame + txt_files_part = [ + f + for f in listdir(join(BRAT_DIR, "pred", disease)) + if isfile(join(BRAT_DIR, "pred", disease, f)) + if f.endswith(".ann") + ] + txt_list_part = [] + for txt_file in txt_files_part: + with open(join(BRAT_DIR, "pred", disease, txt_file), "r") as file: + lines = file.readlines() + start = 0 + section = "introduction" + for line in lines: + if "SECTION" in line and not ( + line.split(" ")[1].split(" ")[0] == section + ): + end = int(line.split(" ")[1].split(" ")[1]) + txt_list_part.append([txt_file, section, start, end]) + section = line.split(" ")[1].split(" ")[0] + start = end + txt_sections_part_df = pd.DataFrame( + txt_list_part, columns=["note_id", "label", "start", "end"] + ) + txt_sections_part_df["disease"] = disease + + ### Filter CUIS to keep + sections_to_keep = list(disease_data["CUI_per_section"].keys()) + cuis_to_keep = [ + cui + for cui_dic in disease_data["CUI_per_section"].values() + for cui_list in cui_dic.values() + for cui in cui_list + ] + print(cuis_to_keep) + res_part_filtered = [] + for source in tqdm(res_part_df["source"].unique()): + txt_sections_part_source_df = txt_sections_part_df.loc[ + (txt_sections_part_df["note_id"] == source) + # & (txt_sections_part_df["label"].isin(sections_to_keep)) + ] + for _, row in res_part_df.loc[res_part_df["source"] == source].iterrows(): + for cui in row["label"]: + to_keep = keep_match( + cui, + row["span_converted"], + txt_sections_part_source_df, + disease_data["CUI_per_section"], + cuis_to_keep, + ) + if to_keep: + row["bio"] = to_keep + res_part_filtered.append(row) + + ### Save for future concatenation + res_part_df.to_pickle(join(RES_DIR, disease, "res_final.pkl")) + res_part_df_list.append(res_part_df) + pd.DataFrame(res_part_filtered).to_pickle( + join(RES_DIR, disease, "res_final_filtered.pkl") + ) + res_part_filtered_list += res_part_filtered + txt_sections_part_df.to_pickle(join(RES_DIR, disease, "txt_sections_df.pkl")) + txt_sections_part_df_list.append(txt_sections_part_df) + +res_df = pd.concat(res_part_df_list) +res_filtered_df = pd.DataFrame(res_part_filtered_list) +txt_sections_df = pd.concat(txt_sections_part_df_list) +txt_sections_df.to_pickle(join(RES_DIR, "txt_sections_df.pkl")) +res_df.to_pickle(join(RES_DIR, "res_final.pkl")) +res_filtered_df.to_pickle(join(RES_DIR, "res_final_filtered.pkl")) +display(res_df) +display(res_filtered_df) +``` + +# Vizualize phenotype + +```python +def prepare_structured_CODERE_label_df(disease): + structured_filtered_res = pd.read_pickle( + join(RES_DIR, disease, "structured_filtered_res.pkl") + ) + summary_df_docs = pd.read_pickle(join(BRAT_DIR, "summary_df_docs.pkl")) + summary_df_docs = summary_df_docs[summary_df_docs.disease == disease] + structured_filtered_res = structured_filtered_res.merge( + summary_df_docs[["encounter_num", "patient_num"]], + on=["encounter_num", "patient_num"], + how="right", + ) + structured_filtered_res = structured_filtered_res.explode("gold") + structured_filtered_res["value"] = pd.to_numeric( + structured_filtered_res["gold"].str.split(" ").str.get(0), errors="coerce" + ) + structured_filtered_res["unit"] = ( + structured_filtered_res["gold"].str.split(" ").str.get(-1).str.lower() + ) + structured_patient_group = None + if len(TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys()) > 0: + for bio in TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys(): + structured_filtered_res[bio] = structured_filtered_res.bio == bio + structured_filtered_res[f"{bio} positif"] = ( + structured_filtered_res.bio == bio + ) & (structured_filtered_res.value >= 1.0) + structured_patient_group = structured_filtered_res.groupby( + "patient_num", as_index=False + ).agg( + { + **{ + bio: "sum" + for bio in TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys() + }, + **{ + f"{bio} positif": "sum" + for bio in TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys() + }, + } + ) + for bio in TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys(): + structured_patient_group[bio] = structured_patient_group[bio] >= 1 + structured_patient_group[f"{bio} positif"] = ( + structured_patient_group[f"{bio} positif"] >= 1 + ) + return structured_filtered_res, structured_patient_group +``` + +```python +def prepare_structured_df(disease): + summary_filtered_res = pd.read_pickle( + join(RES_DIR, disease, "filtered_bio_from_structured_data.pkl") + ) + summary_df_docs = pd.read_pickle(join(BRAT_DIR, "summary_df_docs.pkl")) + summary_df_docs = summary_df_docs[summary_df_docs.disease == disease] + summary_filtered_res = summary_filtered_res.merge( + summary_df_docs[["encounter_num", "patient_num"]], + on=["encounter_num", "patient_num"], + how="right", + ) + summary_filtered_res = summary_filtered_res.rename( + columns={"nval_num": "value", "units_cd": "unit"} + ) + summary_patient_group = None + if len(TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys()) > 0: + for bio in TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys(): + summary_filtered_res[bio] = summary_filtered_res.bio == bio + summary_filtered_res[f"{bio} positif"] = ( + summary_filtered_res.bio == bio + ) & ( + (summary_filtered_res.value >= summary_filtered_res.confidence_num) + | (summary_filtered_res.tval_char.str.contains("posi", case=False)) + ) + summary_patient_group = summary_filtered_res.groupby( + "patient_num", as_index=False + ).agg( + { + **{ + bio: "sum" + for bio in TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys() + }, + **{ + f"{bio} positif": "sum" + for bio in TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys() + }, + } + ) + for bio in TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys(): + summary_patient_group[bio] = summary_patient_group[bio] >= 1 + summary_patient_group[f"{bio} positif"] = ( + summary_patient_group[f"{bio} positif"] >= 1 + ) + + return summary_filtered_res, summary_patient_group +``` + +```python +def prepare_structured_med_df(disease): + summary_filtered_res = pd.read_pickle( + join(RES_DRUG_DIR, disease, "filtered_med_from_structured_data.pkl") + ) + summary_df_docs = pd.read_pickle(join(BRAT_DIR, "summary_df_docs.pkl")) + summary_df_docs = summary_df_docs[summary_df_docs.disease == disease] + summary_filtered_res = summary_filtered_res.merge( + summary_df_docs[["encounter_num", "patient_num"]], + on=["encounter_num", "patient_num"], + how="right", + ) + summary_patient_group = None + for med in TO_BE_MATCHED[disease]["ATC_codes"].keys(): + summary_filtered_res[med] = summary_filtered_res.med == med + summary_patient_group = summary_filtered_res.groupby( + "patient_num", as_index=False + ).agg( + { + **{med: "sum" for med in TO_BE_MATCHED[disease]["ATC_codes"].keys()}, + } + ) + for med in TO_BE_MATCHED[disease]["ATC_codes"].keys(): + summary_patient_group[med] = summary_patient_group[med] >= 1 + + return summary_filtered_res, summary_patient_group +``` + +```python +def prepare_nlp_med_df(disease): + res_filtered_df = pd.read_pickle( + join(RES_DRUG_DIR, disease, "res_final_filtered.pkl") + ) + res_filtered_df["instance_num"] = ( + res_filtered_df.source.str.split(".").str.get(0).str.split("_").str.get(0) + ) + summary_df_docs = pd.read_pickle(join(BRAT_DIR, "summary_df_docs.pkl")) + summary_df_docs = summary_df_docs[summary_df_docs.disease == disease] + summary_df_docs["instance_num"] = summary_df_docs.instance_num.str.split("_") + summary_df_docs = summary_df_docs.explode("instance_num") + res_filtered_df = res_filtered_df.merge( + summary_df_docs[["instance_num", "encounter_num", "patient_num"]], + on="instance_num", + how="right", + ) + patient_group = None + for med in TO_BE_MATCHED[disease]["ATC_codes"].keys(): + res_filtered_df[med] = res_filtered_df.med == med + patient_group = res_filtered_df.groupby("patient_num", as_index=False).agg( + { + **{med: "sum" for med in TO_BE_MATCHED[disease]["ATC_codes"].keys()}, + } + ) + for med in TO_BE_MATCHED[disease]["ATC_codes"].keys(): + patient_group[med] = patient_group[med] >= 1 + + return res_filtered_df, patient_group +``` + +```python +def prepare_nlp_df(disease): + res_filtered_df = pd.read_pickle(join(RES_DIR, disease, "res_final_filtered.pkl")) + res_filtered_df["instance_num"] = ( + res_filtered_df.source.str.split(".").str.get(0).str.split("_").str.get(0) + ) + summary_df_docs = pd.read_pickle(join(BRAT_DIR, "summary_df_docs.pkl")) + summary_df_docs = summary_df_docs[summary_df_docs.disease == disease] + summary_df_docs["instance_num"] = summary_df_docs.instance_num.str.split("_") + summary_df_docs = summary_df_docs.explode("instance_num") + res_filtered_df = res_filtered_df.merge( + summary_df_docs[["instance_num", "encounter_num", "patient_num"]], + on="instance_num", + how="right", + ) + res_filtered_df = res_filtered_df.explode("found") + res_filtered_df["comparator"] = res_filtered_df["found"].str.split(" ").str.get(0) + res_filtered_df["value"] = ( + res_filtered_df["found"].str.split(" ").str.get(1).astype(float) + ) + res_filtered_df["unit"] = res_filtered_df["found"].str.split(" ").str.get(2) + patient_group = None + if len(TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys()) > 0: + for bio in TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys(): + res_filtered_df[bio] = res_filtered_df.bio == bio + res_filtered_df[f"{bio} positif"] = (res_filtered_df.bio == bio) & ( + res_filtered_df.value >= 1.0 + ) + patient_group = res_filtered_df.groupby("patient_num", as_index=False).agg( + { + **{ + bio: "sum" + for bio in TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys() + }, + **{ + f"{bio} positif": "sum" + for bio in TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys() + }, + } + ) + for bio in TO_BE_MATCHED[disease]["CUI_per_section"]["all"].keys(): + patient_group[bio] = patient_group[bio] >= 1 + patient_group[f"{bio} positif"] = patient_group[f"{bio} positif"] >= 1 + + return res_filtered_df, patient_group +``` + +```python +def plot_hist(unit_convert, possible_values, res_filtered_df, title: bool = False): + alt.data_transformers.disable_max_rows() + res_hists = [] + for bio, units in unit_convert.items(): + filtered_bio = res_filtered_df[["bio", "unit", "value"]][ + (res_filtered_df.bio == bio) & (res_filtered_df.unit.isin(units.keys())) + ].copy() + if not filtered_bio.empty: + for unit, rate in units.items(): + filtered_bio["value"] = filtered_bio["value"].mask( + filtered_bio["unit"] == unit, filtered_bio["value"] * rate + ) + outliers = filtered_bio[ + (filtered_bio["value"] > possible_values[bio]) + | (filtered_bio["value"] < 0) + ].copy() + outliers["Percentage"] = len(outliers) / len(filtered_bio) + outliers["MaxValue"] = possible_values[bio] + outliers["value"] = outliers["value"].mask( + outliers["value"] > outliers["MaxValue"], outliers["MaxValue"] + ) + outliers["value"] = outliers["value"].mask(outliers["value"] < 0, 0) + filtered_bio = filtered_bio[ + (filtered_bio.value >= 0) & (filtered_bio.value <= possible_values[bio]) + ] + res_density = ( + alt.Chart(filtered_bio) + .transform_density( + "value", + counts=True, + extent=[0, possible_values[bio]], + as_=["value", "density"], + ) + .mark_area() + .encode( + alt.X("value:Q"), + alt.Y("density:Q"), + alt.Tooltip(["value:Q", "density:Q"]), + ) + ) + res_box_plot = ( + alt.Chart(filtered_bio) + .mark_boxplot() + .encode(alt.X("value:Q").scale(domainMin=0)) + ) + res_outliers = ( + alt.Chart(outliers) + .mark_bar(color="grey") + .encode( + alt.X("value:Q"), + alt.Y("count()").title("Smoothed count"), + tooltip=[ + alt.Tooltip( + "MaxValue:Q", + title="Upper bound", + format=",", + ), + alt.Tooltip( + "count():Q", + title="Frequency over the maximum", + ), + alt.Tooltip( + "Percentage:Q", + format=".2%", + ), + ], + ) + ) + res_hist = ( + (res_density).properties(width=400, height=300) & res_box_plot + ).resolve_scale(x="shared") + else: + res_hist = ( + alt.Chart(pd.DataFrame([])) + .mark_text() + .properties(width=400, height=300) + ) + if title: + res_hist = res_hist.properties( + title=alt.TitleParams(text=bio, orient="top") + ) + res_hists.append(res_hist) + chart = reduce( + lambda bar_chart_1, bar_chart_2: (bar_chart_1 | bar_chart_2) + .resolve_scale(x="independent") + .resolve_scale(y="independent"), + res_hists, + ) + return chart +``` + +```python +def plot_venn(patient_group, bio_venn, english_title, method): + if len(bio_venn) == 2: + subsets = ( + ((patient_group[bio_venn["A"]]) & ~(patient_group[bio_venn["B"]])).sum(), + (~(patient_group[bio_venn["A"]]) & (patient_group[bio_venn["B"]])).sum(), + ((patient_group[bio_venn["A"]]) & (patient_group[bio_venn["B"]])).sum(), + ) + venn = venn2(subsets=subsets, set_labels=bio_venn.values()) + elif len(bio_venn) == 3: + subsets = ( + ( + (patient_group[bio_venn["A"]]) + & ~(patient_group[bio_venn["B"]]) + & ~(patient_group[bio_venn["C"]]) + ).sum(), + ( + ~(patient_group[bio_venn["A"]]) + & (patient_group[bio_venn["B"]]) + & ~(patient_group[bio_venn["C"]]) + ).sum(), + ( + (patient_group[bio_venn["A"]]) + & (patient_group[bio_venn["B"]]) + & ~(patient_group[bio_venn["C"]]) + ).sum(), + ( + ~(patient_group[bio_venn["A"]]) + & ~(patient_group[bio_venn["B"]]) + & (patient_group[bio_venn["C"]]) + ).sum(), + ( + (patient_group[bio_venn["A"]]) + & ~(patient_group[bio_venn["B"]]) + & (patient_group[bio_venn["C"]]) + ).sum(), + ( + ~(patient_group[bio_venn["A"]]) + & (patient_group[bio_venn["B"]]) + & (patient_group[bio_venn["C"]]) + ).sum(), + ( + (patient_group[bio_venn["A"]]) + & (patient_group[bio_venn["B"]]) + & (patient_group[bio_venn["C"]]) + ).sum(), + ) + venn = venn3(subsets=subsets, set_labels=bio_venn.values()) + + total_patients = patient_group.patient_num.nunique() + if len(bio_venn) == 3: + total_pos = patient_group[ + patient_group[bio_venn["A"]] + | patient_group[bio_venn["B"]] + | patient_group[bio_venn["C"]] + ].patient_num.nunique() + elif len(bio_venn) == 2: + total_pos = patient_group[ + patient_group[bio_venn["A"]] | patient_group[bio_venn["B"]] + ].patient_num.nunique() + for idx, subset in enumerate(venn.subset_labels): + if subset: + subset.set_text( + f"{subset.get_text()}\n{int(subset.get_text())/total_patients*100:.1f}%" + ) + plt.title( + f"N = {total_patients} patients studied with a {english_title} \n Detected from {method} = {total_pos} ({total_pos/total_patients * 100:.1f} %)" + ) + # plt.show() +``` + +```python +def plot_summary_med(nlp_patient_group, structured_patient_group, english_title): + nlp_summary = pd.DataFrame( + nlp_patient_group.sum().drop("patient_num"), columns=["Detected"] + ) + nlp_summary["Total"] = len(nlp_patient_group) + nlp_summary["Percentage"] = ( + nlp_summary["Detected"] / nlp_summary["Total"] * 100 + ).astype(float).round(2).astype(str) + " %" + nlp_summary.columns = pd.MultiIndex.from_product( + [ + ["NLP"], + nlp_summary.columns, + ] + ) + structued_summary = pd.DataFrame( + structured_patient_group.sum().drop("patient_num"), columns=["Detected"] + ) + structued_summary["Total"] = len(structured_patient_group) + structued_summary["Percentage"] = ( + (structued_summary["Detected"] / structued_summary["Total"] * 100) + .astype(float) + .round(2) + ).astype(str) + " %" + structued_summary.columns = pd.MultiIndex.from_product( + [ + ["Structured Data"], + structued_summary.columns, + ] + ) + nlp_structured_patient_group = ( + pd.concat([nlp_patient_group, structured_patient_group]) + .groupby("patient_num", as_index=False) + .max() + ) + nlp_structued_summary = pd.DataFrame( + nlp_structured_patient_group.sum().drop("patient_num"), columns=["Detected"] + ) + nlp_structued_summary["Total"] = len(nlp_structured_patient_group) + nlp_structued_summary["Percentage"] = ( + (nlp_structued_summary["Detected"] / nlp_structued_summary["Total"] * 100) + .astype(float) + .round(2) + ).astype(str) + " %" + nlp_structued_summary.columns = pd.MultiIndex.from_product( + [ + ["NLP + Structured Data"], + nlp_structued_summary.columns, + ] + ) + return pd.concat( + [structued_summary, nlp_summary, nlp_structued_summary], axis=1 + ).style.set_caption(english_title.capitalize()) +``` + +```python +Biology_nlp_hist = [] +Biology_structured_hist = [] +Biology_nlp_structured_hist = [] +unit_convert = { + "Créatininémie": {"µmol_per_l": 1, "µmol/l": 1, "nounit": 1}, + "Hémoglobine": {"g_per_dl": 1, "g/dl": 1}, + "CRP": {"mg_per_l": 1, "µg_per_l": 0.001, "ui_per_l": 1, "nounit": 1, "mg/l": 1}, + "INR": {"nounit": 1}, + "DFG": {"ml_per_min": 1, "ml/min": 1, "nounit": 1, "mL/min/1,73m²": 1}, +} +possible_values = { + "Créatininémie": 1000, + "Hémoglobine": 30, + "CRP": 300, + "INR": 10, + "DFG": 200, +} +``` + +## syndrome_des_anti-phospholipides + +```python +disease = "syndrome_des_anti-phospholipides" +english_title = "Antiphospholipid syndrome" +nlp_filtered_res, nlp_patient_group = prepare_nlp_df(disease) +structured_filtered_res, structured_patient_group = prepare_structured_df(disease) +_, nlp_patient_med_group = prepare_nlp_med_df(disease) +_, structured_patient_med_group = prepare_structured_med_df(disease) +structured_filtered_res["method"] = "structured_knowledge" +nlp_filtered_res["method"] = "nlp" +nlp_structured_filtered_res = pd.concat( + [ + structured_filtered_res[ + ["encounter_num", "patient_num", "value", "unit", "bio", "method"] + ], + nlp_filtered_res[ + ["encounter_num", "patient_num", "value", "unit", "bio", "method"] + ], + ] +) +nlp_structured_patient_group = ( + pd.concat([nlp_patient_group, structured_patient_group]) + .groupby("patient_num", as_index=False) + .max() +) +nlp_structured_patient_med_group = ( + pd.concat([nlp_patient_med_group, structured_patient_med_group]) + .groupby("patient_num", as_index=False) + .max() +) +``` + +```python +med_venn = dict(A="Héparine", B="Anticoagulants oraux") +plot_venn(nlp_patient_med_group, med_venn, english_title, method="discharge summaries") +plt.savefig(f"figures/{disease}/venn_nlp_med.jpeg") +plt.show() +plot_venn( + structured_patient_med_group, med_venn, english_title, method="strctured data" +) +plt.savefig(f"figures/{disease}/venn_structured_med.jpeg") +plt.show() +plot_venn( + nlp_structured_patient_med_group, + med_venn, + english_title, + method="discharge summaries and structured data", +) +plt.savefig(f"figures/{disease}/venn_nlp_structured_med.jpeg") +plt.show() +``` + +```python +plot_summary_med(nlp_patient_med_group, structured_patient_med_group, english_title) +``` + +```python +nlp_hists = plot_hist(unit_convert, possible_values, nlp_filtered_res, True).properties( + title=english_title + " (NLP)" +) +strctured_hists = plot_hist( + unit_convert, possible_values, structured_filtered_res, False +).properties(title=english_title + " (structured data)") +nlp_strctured_hists = plot_hist( + unit_convert, possible_values, nlp_structured_filtered_res, False +).properties(title=english_title + " (NLP + structured data)") +chart = ( + (nlp_hists & strctured_hists & nlp_strctured_hists) + .resolve_scale(x="independent") + .resolve_scale(y="independent") + .configure_title(anchor="middle", fontSize=20, orient="left") +) +if not os.path.exists(f"figures/{disease}"): + os.makedirs(f"figures/{disease}") +chart.save(f"figures/{disease}/histogram.png") +chart.save(f"figures/{disease}/histogram.html") +# display(chart) +``` + +```python +Biology_nlp_hist.append( + plot_hist(unit_convert, possible_values, nlp_filtered_res, True).properties( + title=english_title + " (NLP)" + ) +) +Biology_structured_hist.append( + plot_hist(unit_convert, possible_values, structured_filtered_res, True).properties( + title=english_title + " (structured data)" + ) +) +Biology_nlp_structured_hist.append( + plot_hist( + unit_convert, possible_values, nlp_structured_filtered_res, True + ).properties(title=english_title + " (NLP + structured data)") +) +``` + +```python +bio_venn = dict( + A="Anti-cardiolipides", B="anti_B2GP1", C="anticoagulant_circulant_lupique" +) +plot_venn(nlp_patient_group, bio_venn, english_title, method="discharge summaries") +plt.savefig(f"figures/{disease}/venn_nlp.jpeg") +plt.show() +plot_venn(structured_patient_group, bio_venn, english_title, method="strctured data") +plt.savefig(f"figures/{disease}/venn_structured.jpeg") +plt.show() +plot_venn( + nlp_structured_patient_group, + bio_venn, + english_title, + method="discharge summaries and structured data", +) +plt.savefig(f"figures/{disease}/venn_nlp_structured.jpeg") +plt.show() +``` + +```python +bio_venn = dict( + A="Anti-cardiolipides positif", + B="anti_B2GP1 positif", + C="anticoagulant_circulant_lupique positif", +) +plot_venn(nlp_patient_group, bio_venn, english_title, method="discharge summaries") +plt.savefig(f"figures/{disease}/venn_pos_nlp.jpeg") +plt.show() +plot_venn(structured_patient_group, bio_venn, english_title, method="strctured data") +plt.savefig(f"figures/{disease}/venn_pos_structured.jpeg") +plt.show() +plot_venn( + nlp_structured_patient_group, + bio_venn, + english_title, + method="discharge summaries and structured data", +) +plt.savefig(f"figures/{disease}/venn_pos_nlp_structured.jpeg") +plt.show() +``` + +## Lupus + +FAN/AAN (C0587178), Anti-DNA Natif (C1262035) +Anti-Sm (C0201357) + +```python +disease = "lupus_erythemateux_dissemine" +english_title = "Lupus" +nlp_filtered_res, nlp_patient_group = prepare_nlp_df(disease) +structured_filtered_res, structured_patient_group = prepare_structured_df(disease) +_, nlp_patient_med_group = prepare_nlp_med_df(disease) +_, structured_patient_med_group = prepare_structured_med_df(disease) +structured_filtered_res["method"] = "structured_knowledge" +nlp_filtered_res["method"] = "nlp" +nlp_structured_filtered_res = pd.concat( + [ + structured_filtered_res[ + ["encounter_num", "patient_num", "value", "unit", "bio", "method"] + ], + nlp_filtered_res[ + ["encounter_num", "patient_num", "value", "unit", "bio", "method"] + ], + ] +) +nlp_structured_patient_group = ( + pd.concat([nlp_patient_group, structured_patient_group]) + .groupby("patient_num", as_index=False) + .max() +) +``` + +```python +plot_summary_med(nlp_patient_med_group, structured_patient_med_group, english_title) +``` + +```python +nlp_hists = plot_hist(unit_convert, possible_values, nlp_filtered_res, True).properties( + title=english_title + " (NLP)" +) +strctured_hists = plot_hist( + unit_convert, possible_values, structured_filtered_res, False +).properties(title=english_title + " (structured data)") +nlp_strctured_hists = plot_hist( + unit_convert, possible_values, nlp_structured_filtered_res, False +).properties(title=english_title + " (NLP + structured data)") +chart = ( + (nlp_hists & strctured_hists & nlp_strctured_hists) + .resolve_scale(x="independent") + .resolve_scale(y="independent") + .configure_title(anchor="middle", fontSize=20, orient="left") +) +if not os.path.exists(f"figures/{disease}"): + os.makedirs(f"figures/{disease}") +chart.save(f"figures/{disease}/histogram.png") +chart.save(f"figures/{disease}/histogram.html") +# display(chart) +``` + +```python +Biology_nlp_hist.append( + plot_hist(unit_convert, possible_values, nlp_filtered_res).properties( + title=english_title + " (NLP)" + ) +) +Biology_structured_hist.append( + plot_hist(unit_convert, possible_values, structured_filtered_res).properties( + title=english_title + " (structured data)" + ) +) +Biology_nlp_structured_hist.append( + plot_hist(unit_convert, possible_values, nlp_structured_filtered_res).properties( + title=english_title + " (NLP + structured data)" + ) +) +``` + +```python +bio_venn = dict(A="Facteur anti-nucléaire", B="Anti-DNA natif", C="Anti-Sm") +plot_venn(nlp_patient_group, bio_venn, english_title, method="discharge summaries") +plt.savefig(f"figures/{disease}/venn_nlp.jpeg") +plt.show() +plot_venn(structured_patient_group, bio_venn, english_title, method="strctured data") +plt.savefig(f"figures/{disease}/venn_structured.jpeg") +plt.show() +plot_venn( + nlp_structured_patient_group, + bio_venn, + english_title, + method="discharge summaries and structured data", +) +plt.savefig(f"figures/{disease}/venn_nlp_structured.jpeg") +plt.show() +``` + +```python +bio_venn = dict( + A="Facteur anti-nucléaire positif", B="Anti-DNA natif positif", C="Anti-Sm positif" +) +plot_venn(nlp_patient_group, bio_venn, english_title, method="discharge summaries") +plt.savefig(f"figures/{disease}/venn_pos_nlp.jpeg") +plt.show() +plot_venn(structured_patient_group, bio_venn, english_title, method="strctured data") +plt.savefig(f"figures/{disease}/venn_pos_structured.jpeg") +plt.show() +plot_venn( + nlp_structured_patient_group, + bio_venn, + english_title, + method="discharge summaries and structured data", +) +plt.savefig(f"figures/{disease}/venn_pos_nlp_structured.jpeg") +plt.show() +``` + +```python +bio_venn = dict(A="Facteur anti-nucléaire", B="Anti-DNA natif") +plot_venn(nlp_patient_group, bio_venn, english_title, method="discharge summaries") +plt.savefig(f"figures/{disease}/venn_2_nlp.jpeg") +plt.show() +plot_venn(structured_patient_group, bio_venn, english_title, method="strctured data") +plt.savefig(f"figures/{disease}/venn_2_structured.jpeg") +plt.show() +plot_venn( + nlp_structured_patient_group, + bio_venn, + english_title, + method="discharge summaries and structured data", +) +plt.savefig(f"figures/{disease}/venn_2_nlp_structured.jpeg") +plt.show() +``` + +```python +bio_venn = dict(A="Facteur anti-nucléaire positif", B="Anti-DNA natif positif") +plot_venn(nlp_patient_group, bio_venn, english_title, method="discharge summaries") +plt.savefig(f"figures/{disease}/venn_pos_2_nlp.jpeg") +plt.show() +plot_venn(structured_patient_group, bio_venn, english_title, method="strctured data") +plt.savefig(f"figures/{disease}/venn_pos_2_structured.jpeg") +plt.show() +plot_venn( + nlp_structured_patient_group, + bio_venn, + english_title, + method="discharge summaries and structured data", +) +plt.savefig(f"figures/{disease}/venn_pos_2_nlp_structured.jpeg") +plt.show() +``` + +## Sclérodermie systémique + +```python +disease = "sclerodermie_systemique" +english_title = "systemic sclerosis" +nlp_filtered_res, nlp_patient_group = prepare_nlp_df(disease) +structured_filtered_res, structured_patient_group = prepare_structured_df(disease) +_, nlp_patient_med_group = prepare_nlp_med_df(disease) +_, structured_patient_med_group = prepare_structured_med_df(disease) +structured_filtered_res["method"] = "structured_knowledge" +nlp_filtered_res["method"] = "nlp" +nlp_structured_filtered_res = pd.concat( + [ + structured_filtered_res[ + ["encounter_num", "patient_num", "value", "unit", "bio", "method"] + ], + nlp_filtered_res[ + ["encounter_num", "patient_num", "value", "unit", "bio", "method"] + ], + ] +) +nlp_structured_patient_group = ( + pd.concat([nlp_patient_group, structured_patient_group]) + .groupby("patient_num", as_index=False) + .max() +) +``` + +```python +plot_summary_med(nlp_patient_med_group, structured_patient_med_group, english_title) +``` + +```python +nlp_hists = plot_hist(unit_convert, possible_values, nlp_filtered_res, True).properties( + title=english_title + " (NLP)" +) +strctured_hists = plot_hist( + unit_convert, possible_values, structured_filtered_res, False +).properties(title=english_title + " (structured data)") +nlp_strctured_hists = plot_hist( + unit_convert, possible_values, nlp_structured_filtered_res, False +).properties(title=english_title + " (NLP + structured data)") +chart = ( + (nlp_hists & strctured_hists & nlp_strctured_hists) + .resolve_scale(x="independent") + .resolve_scale(y="independent") + .configure_title(anchor="middle", fontSize=20, orient="left") +) +if not os.path.exists(f"figures/{disease}"): + os.makedirs(f"figures/{disease}") +chart.save(f"figures/{disease}/histogram.png") +chart.save(f"figures/{disease}/histogram.html") +# display(chart) +``` + +```python +Biology_nlp_hist.append( + plot_hist(unit_convert, possible_values, nlp_filtered_res).properties( + title=english_title + " (NLP)" + ) +) +Biology_structured_hist.append( + plot_hist(unit_convert, possible_values, structured_filtered_res).properties( + title=english_title + " (structured data)" + ) +) +Biology_nlp_structured_hist.append( + plot_hist(unit_convert, possible_values, nlp_structured_filtered_res).properties( + title=english_title + " (NLP + structured data)" + ) +) +``` + +```python +bio_venn = dict(A="Anti-RNA pol 3", B="Anti-SCL 70") +plot_venn(nlp_patient_group, bio_venn, english_title, method="discharge summaries") +plt.savefig(f"figures/{disease}/venn_nlp.jpeg") +plt.show() +plot_venn(structured_patient_group, bio_venn, english_title, method="strctured data") +plt.savefig(f"figures/{disease}/venn_structured.jpeg") +plt.show() +plot_venn( + nlp_structured_patient_group, + bio_venn, + english_title, + method="discharge summaries and structured data", +) +plt.savefig(f"figures/{disease}/venn_nlp_structured.jpeg") +plt.show() +``` + +```python +bio_venn = dict(A="Anti-RNA pol 3 positif", B="Anti-SCL 70 positif") +plot_venn(nlp_patient_group, bio_venn, english_title, method="discharge summaries") +plt.savefig(f"figures/{disease}/venn_pos_nlp.jpeg") +plt.show() +plot_venn(structured_patient_group, bio_venn, english_title, method="strctured data") +plt.savefig(f"figures/{disease}/venn_pos_structured.jpeg") +plt.show() +plot_venn( + nlp_structured_patient_group, + bio_venn, + english_title, + method="discharge summaries and structured data", +) +plt.savefig(f"figures/{disease}/venn_pos_nlp_structured.jpeg") +plt.show() +``` + +### Maladie de Takayasu + +```python +disease = "maladie_de_takayasu" +english_title = "Takayasu´s disease" +nlp_filtered_res, _ = prepare_nlp_df(disease) +structured_filtered_res, _ = prepare_structured_df(disease) +_, nlp_patient_med_group = prepare_nlp_med_df(disease) +_, structured_patient_med_group = prepare_structured_med_df(disease) +structured_filtered_res["method"] = "structured_knowledge" +nlp_filtered_res["method"] = "nlp" +nlp_structured_filtered_res = pd.concat( + [ + structured_filtered_res[ + ["encounter_num", "patient_num", "value", "unit", "bio", "method"] + ], + nlp_filtered_res[ + ["encounter_num", "patient_num", "value", "unit", "bio", "method"] + ], + ] +) +``` + +```python +plot_summary_med(nlp_patient_med_group, structured_patient_med_group, english_title) +``` + +```python +nlp_hists = plot_hist(unit_convert, possible_values, nlp_filtered_res, True).properties( + title=english_title + " (NLP)" +) +strctured_hists = plot_hist( + unit_convert, possible_values, structured_filtered_res, False +).properties(title=english_title + " (structured data)") +nlp_strctured_hists = plot_hist( + unit_convert, possible_values, nlp_structured_filtered_res, False +).properties(title=english_title + " (NLP + structured data)") +chart = ( + (nlp_hists & strctured_hists & nlp_strctured_hists) + .resolve_scale(x="independent") + .resolve_scale(y="independent") + .configure_title(anchor="middle", fontSize=20, orient="left") +) +if not os.path.exists(f"figures/{disease}"): + os.makedirs(f"figures/{disease}") +chart.save(f"figures/{disease}/histogram.png") +chart.save(f"figures/{disease}/histogram.html") +# display(chart) +``` + +```python +Biology_nlp_hist.append( + plot_hist(unit_convert, possible_values, nlp_filtered_res).properties( + title=english_title + " (NLP)" + ) +) +Biology_structured_hist.append( + plot_hist(unit_convert, possible_values, structured_filtered_res).properties( + title=english_title + " (structured data)" + ) +) +Biology_nlp_structured_hist.append( + plot_hist(unit_convert, possible_values, nlp_structured_filtered_res).properties( + title=english_title + " (NLP + structured data)" + ) +) +``` + +```python +chart = reduce( + lambda bar_chart_1, bar_chart_2: (bar_chart_1 & bar_chart_2) + .resolve_scale(x="independent") + .resolve_scale(y="independent"), + Biology_nlp_hist, +).configure_title(orient="left", anchor="middle", fontSize=20) +chart.save("figures/histogram_nlp.png") +chart.save("figures/histogram_nlp.html") +# display(chart) +``` + +```python +chart = reduce( + lambda bar_chart_1, bar_chart_2: (bar_chart_1 & bar_chart_2) + .resolve_scale(x="independent") + .resolve_scale(y="independent"), + Biology_structured_hist, +).configure_title(orient="left", anchor="middle", fontSize=20) +chart.save("figures/histogram_structured.png") +chart.save("figures/histogram_structured.html") +# display(chart) +``` + +```python +chart = reduce( + lambda bar_chart_1, bar_chart_2: (bar_chart_1 & bar_chart_2) + .resolve_scale(x="independent") + .resolve_scale(y="independent"), + Biology_nlp_structured_hist, +).configure_title(orient="left", anchor="middle", fontSize=20) +chart.save("figures/histogram_nlp_structured.png") +chart.save("figures/histogram_nlp_structured.html") +# display(chart) +``` + +```python + +``` + +```python + +``` + +```python + +``` + +```python + +``` + +```python + +``` + +```python + +``` + +```python + +``` + +```python + +``` + +```python + +``` + +```python + +``` + +```python + +``` + +```python + +``` + +```python +res_filtered_df[res_filtered_df.bio == "DFG"].unit.value_counts() +``` + +```python +"Anti-B2GP1" =[ + "E9627", + "A7854", + "H3772", + "E5157", + "I2042", + "I1882", + "X9708", + "H6269", + "X5898", + "X2761", + "A7855", + "H9650", + "E9626", + "I2043", + "I1883", + "X9707", + "H6270", + "X5899", + "I5970", + "H5543", + "H6271", + "J9678", + "J9704", + "J9705", + "K8345", +] +``` + +```python +structured_filtered_res.explode("name_char").name_char.str.split(":").str.get(0).unique() +``` + +```python +structured_res = pd.read_json( + join(BRAT_DIR, "summary_df_bio.json"), + dtype={"encounter_num": str, "patient_num": str}, +).explode("label") +``` + +```python +structured_res = structured_res.explode("name_char") +structured_res[structured_res.name_char.str.contains("F8160")] +``` diff --git a/notebooks/knowledge.py b/notebooks/knowledge.py new file mode 100644 index 000000000..efad84253 --- /dev/null +++ b/notebooks/knowledge.py @@ -0,0 +1,650 @@ +TO_BE_MATCHED = { + "lupus_erythemateux_dissemine": { + "CIM10": ["M320", "M321", "M328", "M329"], + "ATC_codes": { + "Corticothérapie systémique": ["H02AB"], + "Endoxan": ["L01AA01"], + "Cellcept": ["L04AA06"], + "Rituximab": ["L01XC02"], + "Belimumab": ["L04AA26"], + "Methotrexate": ["L04AX03"], + "Plaquenil": ["P01BA02"], + "Vaccins Prevenar": ["J07AL02"], + "Vaccins Pneumovax": ["J07AL01"], + "Vaccins Grippe": ["J07BB"], + }, + "ANABIO_codes": { + "CRP": ["E6332", "A0248", "F5581", "J7381", "F2631"], + "Créatininémie": [ + "H9412", + "F2621", + "H9411", + "H4038", + "H9421", + "A0094", + "J4687", + "C0697", + "H9427", + "H9426", + "E3180", + "A7813", + "H9417", + "F9410", + "H9424", + "G1975", + "H9413", + "H9418", + "J1002", + "H9419", + "G7834", + "H9415", + "H9425", + "G1974", + "H9422", + "H9416", + "F9409", + "H9414", + "H9423", + "H9420", + ], + "DFG": ["F8160", "G6921"], + "Hémoglobine": [ + "J6562", + "J4764", + "A9882", + "B1946", + "C1549", + "B1947", + "Z0363", + "I4392", + "C8745", + "E9823", + "I7893", + "B1945", + "F8179", + "A0163", + ], + "Facteur anti-nucléaire": [ + "A7947", + "A7941", + "A7953", + "C2707", + "C2757", + "C2762", + "C2758", + "C2763", + "C2759", + "C2764", + "C2760", + "C2765", + "I1951", + "I1952", + "I1953", + "I1954", + "I1955", + "I1956", + "I1839", + ], + "Anti-DNA natif": [ + "A7844", + "A7847", + "C2683", + "C2684", + "A7850", + "B2594", + "E6639", + "B2599", + "H8136", + "C8243", + "C8245", + "I2221", + "I2222", + "L7308", + "C8765", + "A7838", + "H9653", + "C2748", + "A7835", + "B2595", + "C5008", + "C2685", + "X5893", + "B2600", + "A7841", + "L4997", + "L4998", + "D0198", + "X4089", + "X4807", + ], + "Anti-Sm": [ + "E6951", + "C8704", + "H6257", + "B3391", + "A7983", + "C5061", + "C7978", + "H8580", + "B3393", + "L7304", + "B3389", + ], + }, + "CUI_per_section": { + "all": { + "Facteur anti-nucléaire": [ + "C0587178", + "C1271804", + "C1273464", + "C1277811", + ], + "Anti-DNA natif": ["C1262035", "C0282056"], + "Anti-Sm": ["C0201357"], + "Hémoglobine": ["C0518015"], + "CRP": ["C0201657"], + "Créatininémie": ["C0201975"], + "DFG": ["C0017654", "C2733005"], + }, + "SECTION_examen_complementaire": {}, + }, + }, + "syndrome_des_anti-phospholipides": { + "CIM10": ["D686"], + "ATC_codes": { + "Héparine": ["B01AB01"], + "Anticoagulants oraux": ["B01AF02"], + }, + "ANABIO_codes": { + "CRP": ["E6332", "A0248", "F5581", "J7381", "F2631"], + "Créatininémie": [ + "H9412", + "F2621", + "H9411", + "H4038", + "H9421", + "A0094", + "J4687", + "C0697", + "H9427", + "H9426", + "E3180", + "A7813", + "H9417", + "F9410", + "H9424", + "G1975", + "H9413", + "H9418", + "J1002", + "H9419", + "G7834", + "H9415", + "H9425", + "G1974", + "H9422", + "H9416", + "F9409", + "H9414", + "H9423", + "H9420", + ], + "Hémoglobine": [ + "J6562", + "J4764", + "A9882", + "B1946", + "C1549", + "B1947", + "Z0363", + "I4392", + "C8745", + "E9823", + "I7893", + "B1945", + "F8179", + "A0163", + ], + "INR": ["A0269"], + "Anti-cardiolipides": [ + "H5544", + "E2153", + "C3960", + "H3773", + "E2154", + "C2686", + "L5048", + "I1880", + "X9706", + "A7856", + "H6272", + "K9986", + "L7125", + "X5896", + "X8627", + "E2155", + "C6288", + "H3774", + "E2156", + "C6289", + "L5049", + "I1881", + "X9705", + "A7857", + "H6273", + "K9987", + "L7124", + "X5897", + "I5969", + "H5542", + "H6274", + "L5050", + "J9703", + "X4100", + "K8344", + ], + "Anti_phospholipides": [ + "B4504", + "E2293", + "E2297", + "E2294", + "L1016", + "C7890", + "B4505", + "E2295", + "E2298", + "E2296", + "L1017", + "C7891", + "C4781", + "X5900", + "F8079", + "I6159", + "F8080", + "I6160", + "L5296", + "F8139", + "I6150", + "L5297", + "F8140", + "I6149", + "L5298", + "X2612", + "X4808", + "B2604", + "I6157", + "B2605", + "I6158", + "K8346", + ], + "anti_B2GP1": [ + "E9627", + "A7854", + "H3772", + "E5157", + "I2042", + "L5051", + "I1882", + "X9708", + "H6269", + "L7127", + "X5898", + "X2761", + "A7855", + "H9650", + "E9626", + "I2043", + "L5052", + "I1883", + "X9707", + "H6270", + "L7126", + "X5899", + "I5970", + "H5543", + "H6271", + "J9678", + "L5053", + "J9704", + "J9705", + "K8345", + ], + "anticoagulant_circulant_lupique": [ + "A7747", + "A7746", + "A7749", + "F0800", + "F0803", + "A1793", + "F0807", + "A7748", + "C4797", + "A7750", + "A1791", + "D0020", + "K9945", + "K9946", + "K9947", + "K9948", + "J7800", + "J7801", + "J7802", + "J7803", + "J7804", + "J7805", + "B3819", + "B5606", + "B3821", + "B3817", + "E2945", + "B9000", + "B9001", + "B9002", + "E5356", + "B8999", + "B3820", + "B5607", + "B3822", + "E5357", + "B3818", + "E5344", + "H5107", + "C5050", + "X2278", + "X5408", + "E5358", + "E5360", + "E5359", + "G1945", + "G1946", + "G1947", + "G1948", + "G1949", + "G1950", + "G1951", + "X4109", + "C2770", + "C2773", + "E5355", + "E5948", + "E5412", + "X4143", + "B9005", + "B9003", + "B9004", + "B8082", + "B8084", + "B8083", + "B9995", + "B9997", + "C1407", + "C1411", + "C1409", + "C1406", + "A1809", + "A0358", + "E5413", + "A0354", + "A1810", + "A3966", + "G1987", + "H5105", + "J3287", + "J3283", + "J3282", + "J3284", + "J3301", + "J3300", + "A0351", + "L8492", + "A0355", + "F0786", + "A0352", + "A0356", + "A3965", + "A3964", + "A0353", + "H5217", + "J3299", + "J3298", + "J3297", + "J3296", + "J3295", + "J3294", + "J3292", + "J3290", + "L8493", + "B4084", + "C2267", + "B9990", + "X4145", + "J3288", + "J2389", + "J2397", + "J2395", + "J2394", + "J2396", + "J2393", + "J2392", + "J2391", + "J2386", + "J2388", + "J2390", + "J2387", + "J2384", + "J2383", + "K2941", + "K2942", + "K2943", + "J2385", + "X2155", + "X4796", + "L3541", + "L7206", + "L7207", + ], + }, + "CUI_per_section": { + "all": { + "Anti-cardiolipides": ["C0201535", "C0455311"], + "Anti_phospholipides": ["C0201534"], + "anti_B2GP1": ["C1295005", "C1303280"], + "anticoagulant_circulant_lupique": [ + "C0455328", + "C1142517", + "C1277823", + "C0522828", + ], + "INR": ["C0525032"], + "Hémoglobine": ["C0518015"], + "CRP": ["C0201657"], + "Créatininémie": ["C0201975"], + }, + "SECTION_examen_complementaire": {}, + }, + }, + "sclerodermie_systemique": { + "CIM10": ["M340", "M341", "M348", "M349"], + "ATC_codes": { + "IgIV": ["J06BA02"], + "Corticothérapie systémique": ["H02AB"], + "Endoxan": ["L01AA01"], + "Cellcept": ["L04AA06"], + "Rituximab": ["L01XC02"], + "Belimumab": ["L04AA26"], + "Methotrexate": ["L04AX03"], + "Plaquenil": ["P01BA02"], + "Vaccins Prevenar": ["J07AL02"], + "Vaccins Pneumovax": ["J07AL01"], + "Vaccins Grippe": ["J07BB"], + }, + "ANABIO_codes": { + "CRP": ["E6332", "A0248", "F5581", "J7381", "F2631"], + "Créatininémie": [ + "H9412", + "F2621", + "H9411", + "H4038", + "H9421", + "A0094", + "J4687", + "C0697", + "H9427", + "H9426", + "E3180", + "A7813", + "H9417", + "F9410", + "H9424", + "G1975", + "H9413", + "H9418", + "J1002", + "H9419", + "G7834", + "H9415", + "H9425", + "G1974", + "H9422", + "H9416", + "F9409", + "H9414", + "H9423", + "H9420", + ], + "DFG": ["F8160", "G6921"], + "Hémoglobine": [ + "J6562", + "J4764", + "A9882", + "B1946", + "C1549", + "B1947", + "Z0363", + "I4392", + "C8745", + "E9823", + "I7893", + "B1945", + "F8179", + "A0163", + ], + "Anti-RNA pol 3": [ + "C4950", + "C6911", + "E6593", + "I2082", + "K9937", + "L1626", + "E7376", + "H8235", + ], + "Anti-SCL 70": [ + "E6950", + "C8702", + "H6256", + "B4528", + "C5060", + "A7981", + "C9997", + "H8582", + "B4530", + "L7302", + "B4527", + "H8573", + "C2711", + "K5223", + ], + }, + "CUI_per_section": { + "all": { + "Anti-RNA pol 3": ["C1295034"], + "Anti-SCL 70": ["C0523317"], + "Anti-centromères": ["C0201361"], + "Hémoglobine": ["C0518015"], + "CRP": ["C0201657"], + "Créatininémie": ["C0201975"], + "DFG": ["C0017654", "C2733005"], + }, + "SECTION_examen_complementaire": {}, + }, + }, + "maladie_de_takayasu": { + "CIM10": ["M314"], + "ATC_codes": { + "Corticothérapie systémique": ["H02AB"], + "Endoxan": ["L01AA01"], + "Tocilizumab": ["L04AC07"], + "Cellcept": ["L04AA06"], + "Rituximab": ["L01XC02"], + "Belimumab": ["L04AA26"], + "Methotrexate": ["L04AX03"], + "Plaquenil": ["P01BA02"], + "IgIV": ["J06BA02"], + "Vaccins Prevenar": ["J07AL02"], + "Vaccins Pneumovax": ["J07AL01"], + "Vaccins Grippe": ["J07BB"], + }, + "ANABIO_codes": { + "CRP": ["E6332", "A0248", "F5581", "J7381", "F2631"], + "Créatininémie": [ + "H9412", + "F2621", + "H9411", + "H4038", + "H9421", + "A0094", + "J4687", + "C0697", + "H9427", + "H9426", + "E3180", + "A7813", + "H9417", + "F9410", + "H9424", + "G1975", + "H9413", + "H9418", + "J1002", + "H9419", + "G7834", + "H9415", + "H9425", + "G1974", + "H9422", + "H9416", + "F9409", + "H9414", + "H9423", + "H9420", + ], + "DFG": ["F8160", "G6921"], + "Hémoglobine": [ + "J6562", + "J4764", + "A9882", + "B1946", + "C1549", + "B1947", + "Z0363", + "I4392", + "C8745", + "E9823", + "I7893", + "B1945", + "F8179", + "A0163", + ], + }, + "CUI_per_section": { + "all": { + "Hémoglobine": ["C0518015"], + "CRP": ["C0201657"], + "Créatininémie": ["C0201975"], + "DFG": ["C0017654", "C2733005"], + }, + "SECTION_examen_complementaire": {}, + }, + }, +} diff --git a/notebooks/sections/annotated_sections.csv b/notebooks/sections/annotated_sections.csv deleted file mode 100644 index f22f5541c..000000000 --- a/notebooks/sections/annotated_sections.csv +++ /dev/null @@ -1,121 +0,0 @@ -lexical_variant,section -ENTREE, -OBSERVATION, -PRISE EN CHARGE, -CONCLUSION, -CORRESPONDANTS, -Antécédents, -Motif, -Résumé clinique, -Traitement en cours, -Evolution depuis la dernière consultation, -Suivi, -Examen clinique, -Examens complémentaires, -Conclusion, -Synthèse, -Histoire de la maladie actuelle, -Traitement, -Habitus, -Résultats d'examens, -Indication de l'acte, -Facteurs de risque, -Résultat de la coronarographie, -ANTECEDENTS :, -MODE DE VIE :, -TRAITEMENT ACTUEL :, -HISTOIRE DU POIDS :, -EXAMEN CLINIQUE :, -ENQUETE ACTIVITE PHYSIQUE :, -SYNTHESE MEDICALE/CONCLUSION, -ANTÉCÉDENTS :, -ENQUÊTE ACTIVITÉ PHYSIQUE :, -SYNTÈSE MÉDICALE / CONCLUSION, -DIAGNOSTIC, -INTERVENTION, -RAPPEL CLINIQUE, -DESCRIPTION DETAILLEE, -Motif de l'hospitalisation :, -Histoire de la maladie :, -Examen clinique à l'entrée :, -Examen(s) complémentaire(s) :, -Intervention(s) - acte(s) réalisé(s) :, -Suites opératoires :, -Conduite à tenir :, -Prescriptions de sortie :, -Conclusion :, -Antécédents :, -CONCLUSION :, -MOTIF D'HOSPITALISATION, -MODE DE VIE, -HABITUS, -ANTECEDENTS, -HISTOIRE RECENTE, -EXAMEN CLINIQUE A L'ENTREE, -HEMOPATHIE, -EVOLUTION, -CONCLUSION DE SORTIE, -CONSIGNES A LA SORTIE, -RESUME, -TRAITEMENT A L'ENTREE, -POSE DE CATHETER CENTRAL, -STATUT FONCTIONNEL DE SORTIE, -TRAITEMENT DE SORTIE, -FACTEURS DE RISQUES, -Antécédents personnels, -HISTOIRE DE LA MALADIE, -EXAMENS COMPLEMENTAIRES, -AU TOTAL, -Motif de l'hospitalisation, -Diagnostics, -Prescriptions médicales de sortie, -Soins infirmiers, -Traitement de sortie :, -EXAMEN CLINIQUE, -Examen clinique , -EXAMENS COMPLÉMENTAIRES, -Actes réalisés, -VACCINATIONS, -Examen clinique :, -Examens complémentaires :, -ANTÉCÉDENTS, -TRAITEMENT À L'ENTRÉE, -EXAMEN CLINIQUE À L'ENTRÉE, -TRAITEMENT A L'ENTRÉE, -EXAMENS COMPLÉMENTAIRES RÉALISÉS PENDANT LE SÉJOUR, -ÉVOLUTION, -PROJET DIAGNOSTIQUE ET THÉRAPEUTIQUE, -Contexte familial et social, -Antécédents familiaux, -Grossesse - Période néonatale, -Histoire de la maladie, -Examen clinique à l'entrée, -Examens complémentaires à l'entrée, -Conclusion à l'entrée, -Attitude thérapeutique initiale, -Dépistages, -Conclusion de sortie, -Destination de sortie, -Traitement de sortie, -Planification des soins, -Scores à l'entrée, -Mode de vie, -Traitements à l'entrée, -Données biométriques et paramètres vitaux à l'entrée, -Evolution, -Scores à la sortie, -"Situation sociale, mode de vie", -Histoire de la maladie - Explorations, -Histoire récente de la maladie, -Examens para-cliniques, -Diagnostic retenu, -Diagnostics :, -Prescriptions médicales de sortie :, -Actes réalisés :, -Antécédents médicaux et chirurgicaux, -Vaccinations, -Mode de vie - Scolarité, -EXAMEN CLINIQUE A L'ENTRÉE, -PARAMÈTRES VITAUX ET DONNÉES BIOMÉTRIQUES A L'ENTRÉE, -PLANIFICATION DES SOINS / SUITES À DONNER, -PARAMETRES VITAUX ET DONNEES BIOMETRIQUES A L'ENTREE, diff --git a/notebooks/sections/sections.xlsx b/notebooks/sections/sections.xlsx deleted file mode 100644 index fc1df11b4..000000000 Binary files a/notebooks/sections/sections.xlsx and /dev/null differ diff --git a/tests/resources/brat_data/subfolder/doc-1.ann b/tests/resources/brat_data/subfolder/doc-1.ann deleted file mode 100644 index b9bc7af1e..000000000 --- a/tests/resources/brat_data/subfolder/doc-1.ann +++ /dev/null @@ -1,24 +0,0 @@ -R1 lieu Arg1:T8 Arg2:T9 -T1 sosy 30 38 douleurs -T2 localisation 39 57 dans le bras droit -T3 anatomie 47 57 bras droit -T4 pathologie 75 83;85 98 problème de locomotion -A1 assertion T4 absent -T5 pathologie 114 117 AVC -A2 etat T5 passé -A3 assertion T5 non-associé -T6 pathologie 159 164 rhume -A4 etat T6 présent -A5 assertion T6 hypothétique -T7 pathologie 291 296 rhume -A6 etat T7 présent -A7 assertion T7 hypothétique -T8 sosy 306 314 Douleurs -T9 localisation 315 333 dans le bras droit -T10 anatomie 323 333 bras droit -T11 sosy 378 386 anomalie -#1 AnnotatorNotes T7 Repetition -R2 lieu Arg1:T1 Arg2:T2 -A8 assertion T11 absent -E1 MyArg1:T3 MyArg2:T1 -E2 MyArg1:T1 MyArg2:E1 diff --git a/tests/resources/brat_data/subfolder/doc-1.txt b/tests/resources/brat_data/subfolder/doc-1.txt deleted file mode 100644 index 37cfaa255..000000000 --- a/tests/resources/brat_data/subfolder/doc-1.txt +++ /dev/null @@ -1,10 +0,0 @@ -Le patient est admis pour des douleurs dans le bras droit, mais n'a pas de problème -de locomotion. -Historique d'AVC dans la famille. pourrait être un cas de rhume. -NBNbWbWbNbWbNBNbNbWbWbNBNbWbNbNbWbNBNbWbNbNBWbWbNbNbNBWbNbWbNbWBNbNbWbNbNBNbWbWbNbWBNbNbWbNBNbWbWbNb -Pourrait être un cas de rhume. -Motif : -Douleurs dans le bras droit. -ANTÉCÉDENTS -Le patient est déjà venu -Pas d'anomalie détectée.