Skip to content

Commit d739937

Browse files
committed
enforce kwargs for writer.build_...
1 parent c87ad57 commit d739937

File tree

2 files changed

+218
-81
lines changed

2 files changed

+218
-81
lines changed

src/eynollah/eynollah.py

Lines changed: 136 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
# FIXME: fix all of those...
99
# pyright: reportUnnecessaryTypeIgnoreComment=true
1010
# pyright: reportPossiblyUnboundVariable=false
11-
# pyright: reportMissingImports=false
1211
# pyright: reportCallIssue=false
1312
# pyright: reportOperatorIssue=false
1413
# pyright: reportUnboundVariable=false
@@ -49,9 +48,9 @@
4948

5049
tf_disable_interactive_logs()
5150

52-
import tensorflow as tf
51+
import tensorflow as tf # type: ignore
5352
try:
54-
import torch
53+
import torch # type: ignore
5554
except ImportError:
5655
torch = None
5756
try:
@@ -2979,13 +2978,28 @@ def run_single(self):
29792978
conf_contours_textregions =[0]
29802979

29812980
pcgts = self.writer.build_pagexml_no_full_layout(
2982-
cont_page, page_coord, order_text_new, id_of_texts_tot,
2983-
all_found_textline_polygons, page_coord, [],
2984-
[], [], [], [], [], [],
2985-
slopes, [], [],
2986-
cont_page, [], [],
2981+
found_polygons_text_region=cont_page,
2982+
page_coord=page_coord,
2983+
order_of_texts=order_text_new,
2984+
id_of_texts=id_of_texts_tot,
2985+
all_found_textline_polygons=all_found_textline_polygons,
2986+
all_box_coord=page_coord,
2987+
polygons_of_images=[],
2988+
polygons_of_marginals_left=[],
2989+
polygons_of_marginals_right=[],
2990+
all_found_textline_polygons_marginals_left=[],
2991+
all_found_textline_polygons_marginals_right=[],
2992+
all_box_coord_marginals_left=[],
2993+
all_box_coord_marginals_right=[],
2994+
slopes=slopes,
2995+
slopes_marginals_left=[],
2996+
slopes_marginals_right=[],
2997+
cont_page=cont_page,
2998+
polygons_seplines=[],
2999+
contours_tables=[],
29873000
conf_contours_textregion=conf_contours_textregions,
2988-
skip_layout_reading_order=True)
3001+
skip_layout_reading_order=True
3002+
)
29893003
self.logger.info("Basic processing complete")
29903004
return pcgts
29913005

@@ -3029,8 +3043,26 @@ def run_single(self):
30293043
self.logger.info("No columns detected - generating empty PAGE-XML")
30303044

30313045
pcgts = self.writer.build_pagexml_no_full_layout(
3032-
[], page_coord, [], [], [], [], [], [], [], [], [], [], [], [], [], [],
3033-
cont_page, [], [])
3046+
found_polygons_text_region=[],
3047+
page_coord=page_coord,
3048+
order_of_texts=[],
3049+
id_of_texts=[],
3050+
all_found_textline_polygons=[],
3051+
all_box_coord=[],
3052+
polygons_of_images=[],
3053+
polygons_of_marginals_left=[],
3054+
polygons_of_marginals_right=[],
3055+
all_found_textline_polygons_marginals_left=[],
3056+
all_found_textline_polygons_marginals_right=[],
3057+
all_box_coord_marginals_left=[],
3058+
all_box_coord_marginals_right=[],
3059+
slopes=[],
3060+
slopes_marginals_left=[],
3061+
slopes_marginals_right=[],
3062+
cont_page=cont_page,
3063+
polygons_seplines=[],
3064+
contours_tables=[]
3065+
)
30343066
return pcgts
30353067

30363068
#print("text region early in %.1fs", time.time() - t0)
@@ -3243,22 +3275,53 @@ def deskew(polygon):
32433275
empty_marginals = [[]] * len(polygons_of_marginals)
32443276
if self.full_layout:
32453277
pcgts = self.writer.build_pagexml_full_layout(
3246-
[], [], page_coord, [], [], [], [], [], [],
3247-
polygons_of_images, contours_tables, [],
3248-
polygons_of_marginals, polygons_of_marginals,
3249-
empty_marginals, empty_marginals,
3250-
empty_marginals, empty_marginals,
3251-
[], [], [], [],
3252-
cont_page, polygons_seplines)
3278+
contours_only_text_parent=[],
3279+
contours_only_text_parent_h=[],
3280+
page_coord=page_coord,
3281+
order_of_texts=[],
3282+
id_of_texts=[],
3283+
all_found_textline_polygons=[],
3284+
all_found_textline_polygons_h=[],
3285+
all_box_coord=[],
3286+
all_box_coord_h=[],
3287+
polygons_of_images=polygons_of_images,
3288+
contours_tables=contours_tables,
3289+
polygons_of_drop_capitals=[],
3290+
polygons_of_marginals_left=polygons_of_marginals,
3291+
polygons_of_marginals_right=polygons_of_marginals,
3292+
all_found_textline_polygons_marginals_left=empty_marginals,
3293+
all_found_textline_polygons_marginals_right=empty_marginals,
3294+
all_box_coord_marginals_left=empty_marginals,
3295+
all_box_coord_marginals_right=empty_marginals,
3296+
slopes=[],
3297+
slopes_h=[],
3298+
slopes_marginals_left=[],
3299+
slopes_marginals_right=[],
3300+
cont_page=cont_page,
3301+
polygons_seplines=polygons_seplines
3302+
)
32533303
else:
32543304
pcgts = self.writer.build_pagexml_no_full_layout(
3255-
[], page_coord, [], [], [], [],
3256-
polygons_of_images,
3257-
polygons_of_marginals, polygons_of_marginals,
3258-
empty_marginals, empty_marginals,
3259-
empty_marginals, empty_marginals,
3260-
[], [], [],
3261-
cont_page, polygons_seplines, contours_tables)
3305+
found_polygons_text_region=[],
3306+
page_coord=page_coord,
3307+
order_of_texts=[],
3308+
id_of_texts=[],
3309+
all_found_textline_polygons=[],
3310+
all_box_coord=[],
3311+
polygons_of_images=polygons_of_images,
3312+
polygons_of_marginals_left=polygons_of_marginals,
3313+
polygons_of_marginals_right=polygons_of_marginals,
3314+
all_found_textline_polygons_marginals_left=empty_marginals,
3315+
all_found_textline_polygons_marginals_right=empty_marginals,
3316+
all_box_coord_marginals_left=empty_marginals,
3317+
all_box_coord_marginals_right=empty_marginals,
3318+
slopes=[],
3319+
slopes_marginals_left=[],
3320+
slopes_marginals_right=[],
3321+
cont_page=cont_page,
3322+
polygons_seplines=polygons_seplines,
3323+
contours_tables=contours_tables
3324+
)
32623325
return pcgts
32633326

32643327

@@ -3417,24 +3480,55 @@ def deskew(polygon):
34173480

34183481
if self.full_layout:
34193482
pcgts = self.writer.build_pagexml_full_layout(
3420-
contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot,
3421-
all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
3422-
polygons_of_images, contours_tables, polygons_of_drop_capitals,
3423-
polygons_of_marginals_left, polygons_of_marginals_right,
3424-
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
3425-
all_box_coord_marginals_left, all_box_coord_marginals_right,
3426-
slopes, slopes_h, slopes_marginals_left, slopes_marginals_right,
3427-
cont_page, polygons_seplines,
3428-
conf_contours_textregions, conf_contours_textregions_h)
3483+
found_polygons_text_region=contours_only_text_parent,
3484+
found_polygons_text_region_h=contours_only_text_parent_h,
3485+
page_coord=page_coord,
3486+
order_of_texts=order_text_new,
3487+
id_of_texts=id_of_texts_tot,
3488+
all_found_textline_polygons=all_found_textline_polygons,
3489+
all_found_textline_polygons_h=all_found_textline_polygons_h,
3490+
all_box_coord=all_box_coord,
3491+
all_box_coord_h=all_box_coord_h,
3492+
polygons_of_images=polygons_of_images,
3493+
contours_tables=contours_tables,
3494+
polygons_of_drop_capitals=polygons_of_drop_capitals,
3495+
polygons_of_marginals_left=polygons_of_marginals_left,
3496+
polygons_of_marginals_right=polygons_of_marginals_right,
3497+
all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left,
3498+
all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right,
3499+
all_box_coord_marginals_left=all_box_coord_marginals_left,
3500+
all_box_coord_marginals_right=all_box_coord_marginals_right,
3501+
slopes=slopes,
3502+
slopes_h=slopes_h,
3503+
slopes_marginals_left=slopes_marginals_left,
3504+
slopes_marginals_right=slopes_marginals_right,
3505+
cont_page=cont_page,
3506+
polygons_seplines=polygons_seplines,
3507+
conf_contours_textregions=conf_contours_textregions,
3508+
conf_contours_textregions_h=conf_contours_textregions_h
3509+
)
34293510
else:
34303511
pcgts = self.writer.build_pagexml_no_full_layout(
3431-
contours_only_text_parent, page_coord, order_text_new, id_of_texts_tot,
3432-
all_found_textline_polygons, all_box_coord, polygons_of_images,
3433-
polygons_of_marginals_left, polygons_of_marginals_right,
3434-
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
3435-
all_box_coord_marginals_left, all_box_coord_marginals_right,
3436-
slopes, slopes_marginals_left, slopes_marginals_right,
3437-
cont_page, polygons_seplines, contours_tables,
3438-
conf_contours_textregions=conf_contours_textregions)
3512+
found_polygons_text_region=contours_only_text_parent,
3513+
page_coord=page_coord,
3514+
order_of_texts=order_text_new,
3515+
id_of_texts=id_of_texts_tot,
3516+
all_found_textline_polygons=all_found_textline_polygons,
3517+
all_box_coord=all_box_coord,
3518+
polygons_of_images=polygons_of_images,
3519+
polygons_of_marginals_left=polygons_of_marginals_left,
3520+
polygons_of_marginals_right=polygons_of_marginals_right,
3521+
all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left,
3522+
all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right,
3523+
all_box_coord_marginals_left=all_box_coord_marginals_left,
3524+
all_box_coord_marginals_right=all_box_coord_marginals_right,
3525+
slopes=slopes,
3526+
slopes_marginals_left=slopes_marginals_left,
3527+
slopes_marginals_right=slopes_marginals_right,
3528+
cont_page=cont_page,
3529+
polygons_seplines=polygons_seplines,
3530+
contours_tables=contours_tables,
3531+
conf_contours_textregions=conf_contours_textregions
3532+
)
34393533

34403534
return pcgts

src/eynollah/writer.py

Lines changed: 82 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -81,48 +81,91 @@ def write_pagexml(self, pcgts):
8181
f.write(to_xml(pcgts))
8282

8383
def build_pagexml_no_full_layout(
84-
self, found_polygons_text_region,
85-
page_coord, order_of_texts, id_of_texts,
86-
all_found_textline_polygons,
87-
all_box_coord,
88-
found_polygons_text_region_img,
89-
found_polygons_marginals_left, found_polygons_marginals_right,
90-
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
91-
all_box_coord_marginals_left, all_box_coord_marginals_right,
92-
slopes, slopes_marginals_left, slopes_marginals_right,
93-
cont_page, polygons_seplines,
94-
found_polygons_tables,
95-
**kwargs):
84+
self,
85+
*,
86+
found_polygons_text_region,
87+
page_coord,
88+
order_of_texts,
89+
id_of_texts,
90+
all_found_textline_polygons,
91+
all_box_coord,
92+
found_polygons_text_region_img,
93+
found_polygons_marginals_left,
94+
found_polygons_marginals_right,
95+
all_found_textline_polygons_marginals_left,
96+
all_found_textline_polygons_marginals_right,
97+
all_box_coord_marginals_left,
98+
all_box_coord_marginals_right,
99+
slopes,
100+
slopes_marginals_left,
101+
slopes_marginals_right,
102+
cont_page,
103+
polygons_seplines,
104+
found_polygons_tables,
105+
):
96106
return self.build_pagexml_full_layout(
97-
found_polygons_text_region, [],
98-
page_coord, order_of_texts, id_of_texts,
99-
all_found_textline_polygons, [],
100-
all_box_coord, [],
101-
found_polygons_text_region_img, found_polygons_tables, [],
102-
found_polygons_marginals_left, found_polygons_marginals_right,
103-
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
104-
all_box_coord_marginals_left, all_box_coord_marginals_right,
105-
slopes, [], slopes_marginals_left, slopes_marginals_right,
106-
cont_page, polygons_seplines,
107-
**kwargs)
107+
found_polygons_text_region=found_polygons_text_region,
108+
found_polygons_text_region_h=[],
109+
page_coord=page_coord,
110+
order_of_texts=order_of_texts,
111+
id_of_texts=id_of_texts,
112+
all_found_textline_polygons=all_found_textline_polygons,
113+
all_found_textline_polygons_h=[],
114+
all_box_coord=all_box_coord,
115+
all_box_coord_h=[],
116+
found_polygons_text_region_img=found_polygons_text_region_img,
117+
found_polygons_tables=found_polygons_tables,
118+
found_polygons_drop_capitals=[],
119+
found_polygons_marginals_left=found_polygons_marginals_left,
120+
found_polygons_marginals_right=found_polygons_marginals_right,
121+
all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left,
122+
all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right,
123+
all_box_coord_marginals_left=all_box_coord_marginals_left,
124+
all_box_coord_marginals_right=all_box_coord_marginals_right,
125+
slopes=slopes,
126+
slopes_h=[],
127+
slopes_marginals_left=slopes_marginals_left,
128+
slopes_marginals_right=slopes_marginals_right,
129+
cont_page=cont_page,
130+
polygons_seplines=polygons_seplines,
131+
)
108132

109133
def build_pagexml_full_layout(
110-
self,
111-
found_polygons_text_region, found_polygons_text_region_h,
112-
page_coord, order_of_texts, id_of_texts,
113-
all_found_textline_polygons, all_found_textline_polygons_h,
114-
all_box_coord, all_box_coord_h,
115-
found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals,
116-
found_polygons_marginals_left,found_polygons_marginals_right,
117-
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
118-
all_box_coord_marginals_left, all_box_coord_marginals_right,
119-
slopes, slopes_h, slopes_marginals_left, slopes_marginals_right,
120-
cont_page, polygons_seplines,
121-
ocr_all_textlines=None, ocr_all_textlines_h=None,
122-
ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None,
123-
ocr_all_textlines_drop=None,
124-
conf_contours_textregions=None, conf_contours_textregions_h=None,
125-
skip_layout_reading_order=False):
134+
self,
135+
*,
136+
found_polygons_text_region,
137+
found_polygons_text_region_h,
138+
page_coord,
139+
order_of_texts,
140+
id_of_texts,
141+
all_found_textline_polygons,
142+
all_found_textline_polygons_h,
143+
all_box_coord,
144+
all_box_coord_h,
145+
found_polygons_text_region_img,
146+
found_polygons_tables,
147+
found_polygons_drop_capitals,
148+
found_polygons_marginals_left,
149+
found_polygons_marginals_right,
150+
all_found_textline_polygons_marginals_left,
151+
all_found_textline_polygons_marginals_right,
152+
all_box_coord_marginals_left,
153+
all_box_coord_marginals_right,
154+
slopes,
155+
slopes_h,
156+
slopes_marginals_left,
157+
slopes_marginals_right,
158+
cont_page,
159+
polygons_seplines,
160+
ocr_all_textlines=None,
161+
ocr_all_textlines_h=None,
162+
ocr_all_textlines_marginals_left=None,
163+
ocr_all_textlines_marginals_right=None,
164+
ocr_all_textlines_drop=None,
165+
conf_contours_textregions=None,
166+
conf_contours_textregions_h=None,
167+
skip_layout_reading_order=False,
168+
):
126169
self.logger.debug('enter build_pagexml')
127170

128171
# create the file structure

0 commit comments

Comments
 (0)