Skip to content

Commit 568e1cd

Browse files
committed
remove ner/ned code from page2tsv package
1 parent ed90193 commit 568e1cd

File tree

6 files changed

+2
-283
lines changed

6 files changed

+2
-283
lines changed

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
ocrd >= 2.23.2
22
pandas
33
matplotlib
4+
qurator-sbb-tools

setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
"annotate-tsv=tsvtools.cli:annotate_tsv",
2525
"page2tsv=tsvtools.cli:page2tsv",
2626
"tsv2page=tsvtools.cli:tsv2page",
27-
"find-entities=tsvtools.cli:find_entities",
2827
"make-page2tsv-commands=tsvtools.cli:make_page2tsv_commands"
2928
]
3029
},

tsvtools/cli.py

Lines changed: 1 addition & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import json
21
import glob
32
import re
43
import os
@@ -14,12 +13,9 @@
1413
from ocrd_models.ocrd_page import parse
1514
from ocrd_utils import bbox_from_points
1615

17-
from .ned import ned
18-
from .ner import ner
19-
from .tsv import read_tsv, write_tsv, extract_doc_links
16+
from qurator.utils.tsv import read_tsv, write_tsv, extract_doc_links
2017
from .ocr import get_conf_color
2118

22-
2319
@click.command()
2420
@click.argument('tsv-file', type=click.Path(exists=True), required=True, nargs=1)
2521
@click.argument('url-file', type=click.Path(exists=False), required=True, nargs=1)
@@ -218,59 +214,6 @@ def tsv2page(output_filename, keep_words, page_file, tsv_file):
218214
f.write(ET.tostring(tree, pretty_print=True).decode('utf-8'))
219215

220216

221-
@click.command()
222-
@click.argument('tsv-file', type=click.Path(exists=True), required=True, nargs=1)
223-
@click.argument('tsv-out-file', type=click.Path(), required=True, nargs=1)
224-
@click.option('--ner-rest-endpoint', type=str, default=None,
225-
help="REST endpoint of sbb_ner service. See https://github.com/qurator-spk/sbb_ner for details.")
226-
@click.option('--ned-rest-endpoint', type=str, default=None,
227-
help="REST endpoint of sbb_ned service. See https://github.com/qurator-spk/sbb_ned for details.")
228-
@click.option('--ned-json-file', type=str, default=None)
229-
@click.option('--noproxy', type=bool, is_flag=True, help='disable proxy. default: proxy is enabled.')
230-
@click.option('--ned-threshold', type=float, default=None)
231-
@click.option('--ned-priority', type=int, default=1)
232-
def find_entities(tsv_file, tsv_out_file, ner_rest_endpoint, ned_rest_endpoint, ned_json_file, noproxy, ned_threshold,
233-
ned_priority):
234-
235-
if noproxy:
236-
os.environ['no_proxy'] = '*'
237-
238-
tsv, urls = read_tsv(tsv_file)
239-
240-
try:
241-
if ner_rest_endpoint is not None:
242-
243-
tsv, ner_result = ner(tsv, ner_rest_endpoint)
244-
245-
elif os.path.exists(tsv_file):
246-
247-
print('Using NER information that is already contained in file: {}'.format(tsv_file))
248-
249-
tmp = tsv.copy()
250-
tmp['sen'] = (tmp['No.'] == 0).cumsum()
251-
tmp.loc[~tmp['NE-TAG'].isin(['O', 'B-PER', 'B-LOC', 'B-ORG', 'I-PER', 'I-LOC', 'I-ORG']), 'NE-TAG'] = 'O'
252-
253-
ner_result = [[{'word': str(row.TOKEN), 'prediction': row['NE-TAG']} for _, row in sen.iterrows()]
254-
for _, sen in tmp.groupby('sen')]
255-
else:
256-
raise RuntimeError("Either NER rest endpoint or NER-TAG information within tsv_file required.")
257-
258-
if ned_rest_endpoint is not None:
259-
260-
tsv, ned_result = ned(tsv, ner_result, ned_rest_endpoint, json_file=ned_json_file, threshold=ned_threshold,
261-
priority=ned_priority)
262-
263-
if ned_json_file is not None and not os.path.exists(ned_json_file):
264-
265-
with open(ned_json_file, "w") as fp_json:
266-
json.dump(ned_result, fp_json, indent=2, separators=(',', ': '))
267-
268-
write_tsv(tsv, urls, tsv_out_file)
269-
270-
except requests.HTTPError as e:
271-
print(e)
272-
273-
274217
@click.command()
275218
@click.option('--xls-file', type=click.Path(exists=True), default=None,
276219
help="Read parameters from xls-file. Expected columns: Filename, iiif_url, scale_factor.")

tsvtools/ned.py

Lines changed: 0 additions & 88 deletions
This file was deleted.

tsvtools/ner.py

Lines changed: 0 additions & 49 deletions
This file was deleted.

tsvtools/tsv.py

Lines changed: 0 additions & 87 deletions
This file was deleted.

0 commit comments

Comments
 (0)