88# FIXME: fix all of those...
99# pyright: reportUnnecessaryTypeIgnoreComment=true
1010# pyright: reportPossiblyUnboundVariable=false
11- # pyright: reportCallIssue=false
1211# pyright: reportOperatorIssue=false
1312# pyright: reportUnboundVariable=false
1413# pyright: reportArgumentType=false
2019import logging
2120import sys
2221
23- # cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
24- if sys .version_info < (3 , 10 ):
25- import importlib_resources
26- else :
27- import importlib .resources as importlib_resources
28-
2922from difflib import SequenceMatcher as sq
3023import math
3124import os
4841
4942tf_disable_interactive_logs ()
5043
51- import tensorflow as tf # type: ignore
44+ import tensorflow as tf
5245try :
53- import torch # type: ignore
46+ import torch
5447except ImportError :
5548 torch = None
5649try :
@@ -2974,19 +2967,17 @@ def run_single(self):
29742967
29752968 order_text_new = [0 ]
29762969 slopes = [0 ]
2977- id_of_texts_tot = ['region_0001' ]
29782970 conf_contours_textregions = [0 ]
29792971
29802972 pcgts = self .writer .build_pagexml_no_full_layout (
29812973 found_polygons_text_region = cont_page ,
29822974 page_coord = page_coord ,
29832975 order_of_texts = order_text_new ,
2984- id_of_texts = id_of_texts_tot ,
29852976 all_found_textline_polygons = all_found_textline_polygons ,
29862977 all_box_coord = page_coord ,
2987- polygons_of_images = [],
2988- polygons_of_marginals_left = [],
2989- polygons_of_marginals_right = [],
2978+ found_polygons_text_region_img = [],
2979+ found_polygons_marginals_left = [],
2980+ found_polygons_marginals_right = [],
29902981 all_found_textline_polygons_marginals_left = [],
29912982 all_found_textline_polygons_marginals_right = [],
29922983 all_box_coord_marginals_left = [],
@@ -2996,9 +2987,7 @@ def run_single(self):
29962987 slopes_marginals_right = [],
29972988 cont_page = cont_page ,
29982989 polygons_seplines = [],
2999- contours_tables = [],
3000- conf_contours_textregion = conf_contours_textregions ,
3001- skip_layout_reading_order = True
2990+ found_polygons_tables = [],
30022991 )
30032992 self .logger .info ("Basic processing complete" )
30042993 return pcgts
@@ -3046,12 +3035,11 @@ def run_single(self):
30463035 found_polygons_text_region = [],
30473036 page_coord = page_coord ,
30483037 order_of_texts = [],
3049- id_of_texts = [],
30503038 all_found_textline_polygons = [],
30513039 all_box_coord = [],
3052- polygons_of_images = [],
3053- polygons_of_marginals_left = [],
3054- polygons_of_marginals_right = [],
3040+ found_polygons_text_region_img = [],
3041+ found_polygons_marginals_left = [],
3042+ found_polygons_marginals_right = [],
30553043 all_found_textline_polygons_marginals_left = [],
30563044 all_found_textline_polygons_marginals_right = [],
30573045 all_box_coord_marginals_left = [],
@@ -3061,7 +3049,7 @@ def run_single(self):
30613049 slopes_marginals_right = [],
30623050 cont_page = cont_page ,
30633051 polygons_seplines = [],
3064- contours_tables = []
3052+ found_polygons_tables = [],
30653053 )
30663054 return pcgts
30673055
@@ -3275,20 +3263,19 @@ def deskew(polygon):
32753263 empty_marginals = [[]] * len (polygons_of_marginals )
32763264 if self .full_layout :
32773265 pcgts = self .writer .build_pagexml_full_layout (
3278- contours_only_text_parent = [],
3279- contours_only_text_parent_h = [],
3266+ found_polygons_text_region = [],
3267+ found_polygons_text_region_h = [],
32803268 page_coord = page_coord ,
32813269 order_of_texts = [],
3282- id_of_texts = [],
32833270 all_found_textline_polygons = [],
32843271 all_found_textline_polygons_h = [],
32853272 all_box_coord = [],
32863273 all_box_coord_h = [],
3287- polygons_of_images = polygons_of_images ,
3288- contours_tables = contours_tables ,
3289- polygons_of_drop_capitals = [],
3290- polygons_of_marginals_left = polygons_of_marginals ,
3291- polygons_of_marginals_right = polygons_of_marginals ,
3274+ found_polygons_text_region_img = polygons_of_images ,
3275+ found_polygons_tables = contours_tables ,
3276+ found_polygons_drop_capitals = [],
3277+ found_polygons_marginals_left = polygons_of_marginals ,
3278+ found_polygons_marginals_right = polygons_of_marginals ,
32923279 all_found_textline_polygons_marginals_left = empty_marginals ,
32933280 all_found_textline_polygons_marginals_right = empty_marginals ,
32943281 all_box_coord_marginals_left = empty_marginals ,
@@ -3305,12 +3292,11 @@ def deskew(polygon):
33053292 found_polygons_text_region = [],
33063293 page_coord = page_coord ,
33073294 order_of_texts = [],
3308- id_of_texts = [],
33093295 all_found_textline_polygons = [],
33103296 all_box_coord = [],
3311- polygons_of_images = polygons_of_images ,
3312- polygons_of_marginals_left = polygons_of_marginals ,
3313- polygons_of_marginals_right = polygons_of_marginals ,
3297+ found_polygons_text_region_img = polygons_of_images ,
3298+ found_polygons_marginals_left = polygons_of_marginals ,
3299+ found_polygons_marginals_right = polygons_of_marginals ,
33143300 all_found_textline_polygons_marginals_left = empty_marginals ,
33153301 all_found_textline_polygons_marginals_right = empty_marginals ,
33163302 all_box_coord_marginals_left = empty_marginals ,
@@ -3320,7 +3306,7 @@ def deskew(polygon):
33203306 slopes_marginals_right = [],
33213307 cont_page = cont_page ,
33223308 polygons_seplines = polygons_seplines ,
3323- contours_tables = contours_tables
3309+ found_polygons_tables = contours_tables
33243310 )
33253311 return pcgts
33263312
@@ -3484,16 +3470,15 @@ def deskew(polygon):
34843470 found_polygons_text_region_h = contours_only_text_parent_h ,
34853471 page_coord = page_coord ,
34863472 order_of_texts = order_text_new ,
3487- id_of_texts = id_of_texts_tot ,
34883473 all_found_textline_polygons = all_found_textline_polygons ,
34893474 all_found_textline_polygons_h = all_found_textline_polygons_h ,
34903475 all_box_coord = all_box_coord ,
34913476 all_box_coord_h = all_box_coord_h ,
3492- polygons_of_images = polygons_of_images ,
3493- contours_tables = contours_tables ,
3494- polygons_of_drop_capitals = polygons_of_drop_capitals ,
3495- polygons_of_marginals_left = polygons_of_marginals_left ,
3496- polygons_of_marginals_right = polygons_of_marginals_right ,
3477+ found_polygons_text_region_img = polygons_of_images ,
3478+ found_polygons_tables = contours_tables ,
3479+ found_polygons_drop_capitals = polygons_of_drop_capitals ,
3480+ found_polygons_marginals_left = polygons_of_marginals_left ,
3481+ found_polygons_marginals_right = polygons_of_marginals_right ,
34973482 all_found_textline_polygons_marginals_left = all_found_textline_polygons_marginals_left ,
34983483 all_found_textline_polygons_marginals_right = all_found_textline_polygons_marginals_right ,
34993484 all_box_coord_marginals_left = all_box_coord_marginals_left ,
@@ -3512,12 +3497,11 @@ def deskew(polygon):
35123497 found_polygons_text_region = contours_only_text_parent ,
35133498 page_coord = page_coord ,
35143499 order_of_texts = order_text_new ,
3515- id_of_texts = id_of_texts_tot ,
35163500 all_found_textline_polygons = all_found_textline_polygons ,
35173501 all_box_coord = all_box_coord ,
3518- polygons_of_images = polygons_of_images ,
3519- polygons_of_marginals_left = polygons_of_marginals_left ,
3520- polygons_of_marginals_right = polygons_of_marginals_right ,
3502+ found_polygons_text_region_img = polygons_of_images ,
3503+ found_polygons_marginals_left = polygons_of_marginals_left ,
3504+ found_polygons_marginals_right = polygons_of_marginals_right ,
35213505 all_found_textline_polygons_marginals_left = all_found_textline_polygons_marginals_left ,
35223506 all_found_textline_polygons_marginals_right = all_found_textline_polygons_marginals_right ,
35233507 all_box_coord_marginals_left = all_box_coord_marginals_left ,
@@ -3527,8 +3511,7 @@ def deskew(polygon):
35273511 slopes_marginals_right = slopes_marginals_right ,
35283512 cont_page = cont_page ,
35293513 polygons_seplines = polygons_seplines ,
3530- contours_tables = contours_tables ,
3531- conf_contours_textregions = conf_contours_textregions
3514+ found_polygons_tables = contours_tables ,
35323515 )
35333516
35343517 return pcgts
0 commit comments