|
8 | 8 | # FIXME: fix all of those... |
9 | 9 | # pyright: reportUnnecessaryTypeIgnoreComment=true |
10 | 10 | # pyright: reportPossiblyUnboundVariable=false |
11 | | -# pyright: reportMissingImports=false |
12 | 11 | # pyright: reportCallIssue=false |
13 | 12 | # pyright: reportOperatorIssue=false |
14 | 13 | # pyright: reportUnboundVariable=false |
|
49 | 48 |
|
50 | 49 | tf_disable_interactive_logs() |
51 | 50 |
|
52 | | -import tensorflow as tf |
| 51 | +import tensorflow as tf # type: ignore |
53 | 52 | try: |
54 | | - import torch |
| 53 | + import torch # type: ignore |
55 | 54 | except ImportError: |
56 | 55 | torch = None |
57 | 56 | try: |
@@ -2979,13 +2978,28 @@ def run_single(self): |
2979 | 2978 | conf_contours_textregions =[0] |
2980 | 2979 |
|
2981 | 2980 | pcgts = self.writer.build_pagexml_no_full_layout( |
2982 | | - cont_page, page_coord, order_text_new, id_of_texts_tot, |
2983 | | - all_found_textline_polygons, page_coord, [], |
2984 | | - [], [], [], [], [], [], |
2985 | | - slopes, [], [], |
2986 | | - cont_page, [], [], |
| 2981 | + found_polygons_text_region=cont_page, |
| 2982 | + page_coord=page_coord, |
| 2983 | + order_of_texts=order_text_new, |
| 2984 | + id_of_texts=id_of_texts_tot, |
| 2985 | + all_found_textline_polygons=all_found_textline_polygons, |
| 2986 | + all_box_coord=page_coord, |
| 2987 | + polygons_of_images=[], |
| 2988 | + polygons_of_marginals_left=[], |
| 2989 | + polygons_of_marginals_right=[], |
| 2990 | + all_found_textline_polygons_marginals_left=[], |
| 2991 | + all_found_textline_polygons_marginals_right=[], |
| 2992 | + all_box_coord_marginals_left=[], |
| 2993 | + all_box_coord_marginals_right=[], |
| 2994 | + slopes=slopes, |
| 2995 | + slopes_marginals_left=[], |
| 2996 | + slopes_marginals_right=[], |
| 2997 | + cont_page=cont_page, |
| 2998 | + polygons_seplines=[], |
| 2999 | + contours_tables=[], |
2987 | 3000 | conf_contours_textregion=conf_contours_textregions, |
2988 | | - skip_layout_reading_order=True) |
| 3001 | + skip_layout_reading_order=True |
| 3002 | + ) |
2989 | 3003 | self.logger.info("Basic processing complete") |
2990 | 3004 | return pcgts |
2991 | 3005 |
|
@@ -3029,8 +3043,26 @@ def run_single(self): |
3029 | 3043 | self.logger.info("No columns detected - generating empty PAGE-XML") |
3030 | 3044 |
|
3031 | 3045 | pcgts = self.writer.build_pagexml_no_full_layout( |
3032 | | - [], page_coord, [], [], [], [], [], [], [], [], [], [], [], [], [], [], |
3033 | | - cont_page, [], []) |
| 3046 | + found_polygons_text_region=[], |
| 3047 | + page_coord=page_coord, |
| 3048 | + order_of_texts=[], |
| 3049 | + id_of_texts=[], |
| 3050 | + all_found_textline_polygons=[], |
| 3051 | + all_box_coord=[], |
| 3052 | + polygons_of_images=[], |
| 3053 | + polygons_of_marginals_left=[], |
| 3054 | + polygons_of_marginals_right=[], |
| 3055 | + all_found_textline_polygons_marginals_left=[], |
| 3056 | + all_found_textline_polygons_marginals_right=[], |
| 3057 | + all_box_coord_marginals_left=[], |
| 3058 | + all_box_coord_marginals_right=[], |
| 3059 | + slopes=[], |
| 3060 | + slopes_marginals_left=[], |
| 3061 | + slopes_marginals_right=[], |
| 3062 | + cont_page=cont_page, |
| 3063 | + polygons_seplines=[], |
| 3064 | + contours_tables=[] |
| 3065 | + ) |
3034 | 3066 | return pcgts |
3035 | 3067 |
|
3036 | 3068 | #print("text region early in %.1fs", time.time() - t0) |
@@ -3243,22 +3275,53 @@ def deskew(polygon): |
3243 | 3275 | empty_marginals = [[]] * len(polygons_of_marginals) |
3244 | 3276 | if self.full_layout: |
3245 | 3277 | pcgts = self.writer.build_pagexml_full_layout( |
3246 | | - [], [], page_coord, [], [], [], [], [], [], |
3247 | | - polygons_of_images, contours_tables, [], |
3248 | | - polygons_of_marginals, polygons_of_marginals, |
3249 | | - empty_marginals, empty_marginals, |
3250 | | - empty_marginals, empty_marginals, |
3251 | | - [], [], [], [], |
3252 | | - cont_page, polygons_seplines) |
| 3278 | + contours_only_text_parent=[], |
| 3279 | + contours_only_text_parent_h=[], |
| 3280 | + page_coord=page_coord, |
| 3281 | + order_of_texts=[], |
| 3282 | + id_of_texts=[], |
| 3283 | + all_found_textline_polygons=[], |
| 3284 | + all_found_textline_polygons_h=[], |
| 3285 | + all_box_coord=[], |
| 3286 | + all_box_coord_h=[], |
| 3287 | + polygons_of_images=polygons_of_images, |
| 3288 | + contours_tables=contours_tables, |
| 3289 | + polygons_of_drop_capitals=[], |
| 3290 | + polygons_of_marginals_left=polygons_of_marginals, |
| 3291 | + polygons_of_marginals_right=polygons_of_marginals, |
| 3292 | + all_found_textline_polygons_marginals_left=empty_marginals, |
| 3293 | + all_found_textline_polygons_marginals_right=empty_marginals, |
| 3294 | + all_box_coord_marginals_left=empty_marginals, |
| 3295 | + all_box_coord_marginals_right=empty_marginals, |
| 3296 | + slopes=[], |
| 3297 | + slopes_h=[], |
| 3298 | + slopes_marginals_left=[], |
| 3299 | + slopes_marginals_right=[], |
| 3300 | + cont_page=cont_page, |
| 3301 | + polygons_seplines=polygons_seplines |
| 3302 | + ) |
3253 | 3303 | else: |
3254 | 3304 | pcgts = self.writer.build_pagexml_no_full_layout( |
3255 | | - [], page_coord, [], [], [], [], |
3256 | | - polygons_of_images, |
3257 | | - polygons_of_marginals, polygons_of_marginals, |
3258 | | - empty_marginals, empty_marginals, |
3259 | | - empty_marginals, empty_marginals, |
3260 | | - [], [], [], |
3261 | | - cont_page, polygons_seplines, contours_tables) |
| 3305 | + found_polygons_text_region=[], |
| 3306 | + page_coord=page_coord, |
| 3307 | + order_of_texts=[], |
| 3308 | + id_of_texts=[], |
| 3309 | + all_found_textline_polygons=[], |
| 3310 | + all_box_coord=[], |
| 3311 | + polygons_of_images=polygons_of_images, |
| 3312 | + polygons_of_marginals_left=polygons_of_marginals, |
| 3313 | + polygons_of_marginals_right=polygons_of_marginals, |
| 3314 | + all_found_textline_polygons_marginals_left=empty_marginals, |
| 3315 | + all_found_textline_polygons_marginals_right=empty_marginals, |
| 3316 | + all_box_coord_marginals_left=empty_marginals, |
| 3317 | + all_box_coord_marginals_right=empty_marginals, |
| 3318 | + slopes=[], |
| 3319 | + slopes_marginals_left=[], |
| 3320 | + slopes_marginals_right=[], |
| 3321 | + cont_page=cont_page, |
| 3322 | + polygons_seplines=polygons_seplines, |
| 3323 | + contours_tables=contours_tables |
| 3324 | + ) |
3262 | 3325 | return pcgts |
3263 | 3326 |
|
3264 | 3327 |
|
@@ -3417,24 +3480,55 @@ def deskew(polygon): |
3417 | 3480 |
|
3418 | 3481 | if self.full_layout: |
3419 | 3482 | pcgts = self.writer.build_pagexml_full_layout( |
3420 | | - contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, |
3421 | | - all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, |
3422 | | - polygons_of_images, contours_tables, polygons_of_drop_capitals, |
3423 | | - polygons_of_marginals_left, polygons_of_marginals_right, |
3424 | | - all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, |
3425 | | - all_box_coord_marginals_left, all_box_coord_marginals_right, |
3426 | | - slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, |
3427 | | - cont_page, polygons_seplines, |
3428 | | - conf_contours_textregions, conf_contours_textregions_h) |
| 3483 | + found_polygons_text_region=contours_only_text_parent, |
| 3484 | + found_polygons_text_region_h=contours_only_text_parent_h, |
| 3485 | + page_coord=page_coord, |
| 3486 | + order_of_texts=order_text_new, |
| 3487 | + id_of_texts=id_of_texts_tot, |
| 3488 | + all_found_textline_polygons=all_found_textline_polygons, |
| 3489 | + all_found_textline_polygons_h=all_found_textline_polygons_h, |
| 3490 | + all_box_coord=all_box_coord, |
| 3491 | + all_box_coord_h=all_box_coord_h, |
| 3492 | + polygons_of_images=polygons_of_images, |
| 3493 | + contours_tables=contours_tables, |
| 3494 | + polygons_of_drop_capitals=polygons_of_drop_capitals, |
| 3495 | + polygons_of_marginals_left=polygons_of_marginals_left, |
| 3496 | + polygons_of_marginals_right=polygons_of_marginals_right, |
| 3497 | + all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left, |
| 3498 | + all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right, |
| 3499 | + all_box_coord_marginals_left=all_box_coord_marginals_left, |
| 3500 | + all_box_coord_marginals_right=all_box_coord_marginals_right, |
| 3501 | + slopes=slopes, |
| 3502 | + slopes_h=slopes_h, |
| 3503 | + slopes_marginals_left=slopes_marginals_left, |
| 3504 | + slopes_marginals_right=slopes_marginals_right, |
| 3505 | + cont_page=cont_page, |
| 3506 | + polygons_seplines=polygons_seplines, |
| 3507 | + conf_contours_textregions=conf_contours_textregions, |
| 3508 | + conf_contours_textregions_h=conf_contours_textregions_h |
| 3509 | + ) |
3429 | 3510 | else: |
3430 | 3511 | pcgts = self.writer.build_pagexml_no_full_layout( |
3431 | | - contours_only_text_parent, page_coord, order_text_new, id_of_texts_tot, |
3432 | | - all_found_textline_polygons, all_box_coord, polygons_of_images, |
3433 | | - polygons_of_marginals_left, polygons_of_marginals_right, |
3434 | | - all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, |
3435 | | - all_box_coord_marginals_left, all_box_coord_marginals_right, |
3436 | | - slopes, slopes_marginals_left, slopes_marginals_right, |
3437 | | - cont_page, polygons_seplines, contours_tables, |
3438 | | - conf_contours_textregions=conf_contours_textregions) |
| 3512 | + found_polygons_text_region=contours_only_text_parent, |
| 3513 | + page_coord=page_coord, |
| 3514 | + order_of_texts=order_text_new, |
| 3515 | + id_of_texts=id_of_texts_tot, |
| 3516 | + all_found_textline_polygons=all_found_textline_polygons, |
| 3517 | + all_box_coord=all_box_coord, |
| 3518 | + polygons_of_images=polygons_of_images, |
| 3519 | + polygons_of_marginals_left=polygons_of_marginals_left, |
| 3520 | + polygons_of_marginals_right=polygons_of_marginals_right, |
| 3521 | + all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left, |
| 3522 | + all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right, |
| 3523 | + all_box_coord_marginals_left=all_box_coord_marginals_left, |
| 3524 | + all_box_coord_marginals_right=all_box_coord_marginals_right, |
| 3525 | + slopes=slopes, |
| 3526 | + slopes_marginals_left=slopes_marginals_left, |
| 3527 | + slopes_marginals_right=slopes_marginals_right, |
| 3528 | + cont_page=cont_page, |
| 3529 | + polygons_seplines=polygons_seplines, |
| 3530 | + contours_tables=contours_tables, |
| 3531 | + conf_contours_textregions=conf_contours_textregions |
| 3532 | + ) |
3439 | 3533 |
|
3440 | 3534 | return pcgts |
0 commit comments