|
19 | 19 | - 补充文档扭曲矫正/去模糊/去阴影/二值化方案,可作为前置处理 [RapidUnDistort](https://github.com/Joker1212/RapidUnWrap) |
20 | 20 | - **2025.1.9** |
21 | 21 | - RapidTable支持了 unitable 模型,精度更高支持torch推理,补充测评数据 |
22 | | -- **2025.3.9** |
| 22 | +- **2025.3.30** |
23 | 23 | - 输入输出格式对齐RapidTable |
24 | 24 | - 支持模型自动下载 |
25 | 25 | - 增加来自paddle的新表格分类模型 |
26 | 26 | - 增加最新PaddleX表格识别模型测评值 |
| 27 | + - 支持 rapidocr 2.0 取消重复ocr检测 |
27 | 28 |
|
28 | 29 | ### 简介 |
29 | 30 | 💖该仓库是用来对文档中表格做结构化识别的推理库,包括来自阿里读光有线和无线表格识别模型,llaipython(微信)贡献的有线表格模型,网易Qanything内置表格分类模型等。\ |
@@ -79,71 +80,89 @@ wired_table_rec_v2 对1500px内大小的图片效果最好,所以分辨率超 |
79 | 80 | SLANet-plus/unitable (综合精度最高): 文档场景表格(论文,杂志,期刊中的表格) |
80 | 81 |
|
81 | 82 | ### 安装 |
82 | | - |
| 83 | +rapidocr2.0以上版本支持torch,onnx,paddle,openvino等多引擎切换,详情参考[rapidocr文档](https://rapidai.github.io/RapidOCRDocs/main/install_usage/rapidocr/usage/) |
83 | 84 | ``` python {linenos=table} |
84 | 85 | pip install wired_table_rec lineless_table_rec table_cls |
| 86 | +pip install rapidocr |
85 | 87 | ``` |
86 | 88 |
|
87 | 89 | ### 快速使用 |
88 | 90 | > ⚠️注意:在`wired_table_rec/table_cls`>=1.2.0` `lineless_table_rec` > 0.1.0 后,采用同RapidTable完全一致格式的输入输出 |
89 | 91 | ``` python {linenos=table} |
90 | 92 | from pathlib import Path |
91 | 93 |
|
92 | | -from wired_table_rec.utils.utils import VisTable |
| 94 | +from demo_wired import viser |
93 | 95 | from table_cls import TableCls |
94 | 96 | from wired_table_rec.main import WiredTableInput, WiredTableRecognition |
95 | 97 | from lineless_table_rec.main import LinelessTableInput, LinelessTableRecognition |
96 | | -from rapidocr_onnxruntime import RapidOCR, VisRes |
97 | | - |
98 | | -# 初始化引擎 |
99 | | -wired_input = WiredTableInput() |
100 | | -lineless_input = LinelessTableInput() |
101 | | -wired_engine = WiredTableRecognition(wired_input) |
102 | | -lineless_engine = LinelessTableRecognition(lineless_input) |
103 | | -# 默认小yolo模型(0.1s),可切换为精度更高yolox(0.25s),更快的qanything(0.07s)模型或paddle模型(0.03s) |
104 | | -table_cls = TableCls() |
105 | | -img_path = f'tests/test_files/table.jpg' |
106 | | - |
107 | | -cls,elasp = table_cls(img_path) |
108 | | -if cls == 'wired': |
109 | | - table_engine = wired_engine |
110 | | -else: |
111 | | - table_engine = lineless_engine |
112 | | - |
113 | | -table_results = table_engine(img_path, enhance_box_line=False) |
114 | | -# 使用RapidOCR输入 |
115 | | -# ocr_engine = RapidOCR() |
116 | | -# ocr_result, _ = ocr_engine(img_path) |
117 | | -# table_results = table_engine(img_path, ocr_result=ocr_result) |
118 | | - |
119 | | -# 可视化并存储结果,包含识别框+行列坐标 |
120 | | -# save_dir = Path("outputs") |
121 | | -# save_dir.mkdir(parents=True, exist_ok=True) |
122 | | -# |
123 | | -# save_html_path = f"outputs/{Path(img_path).stem}.html" |
124 | | -# save_drawed_path = f"outputs/{Path(img_path).stem}_table_vis{Path(img_path).suffix}" |
125 | | -# save_logic_path = ( |
126 | | -# f"outputs/{Path(img_path).stem}_table_vis_logic{Path(img_path).suffix}" |
127 | | -# ) |
128 | | -# |
129 | | -# vis_table = VisTable() |
130 | | -# vis_imged = vis_table( |
131 | | -# img_path, table_results, save_html_path, save_drawed_path, save_logic_path |
132 | | -# ) |
| 98 | +from rapidocr import RapidOCR |
| 99 | + |
| 100 | + |
| 101 | +if __name__ == "__main__": |
| 102 | + # Init |
| 103 | + wired_input = WiredTableInput() |
| 104 | + lineless_input = LinelessTableInput() |
| 105 | + wired_engine = WiredTableRecognition(wired_input) |
| 106 | + lineless_engine = LinelessTableRecognition(lineless_input) |
| 107 | + # 默认小yolo模型(0.1s),可切换为精度更高yolox(0.25s),更快的qanything(0.07s)模型或paddle模型(0.03s) |
| 108 | + table_cls = TableCls() |
| 109 | + img_path = f"tests/test_files/table.jpg" |
| 110 | + |
| 111 | + cls, elasp = table_cls(img_path) |
| 112 | + if cls == "wired": |
| 113 | + table_engine = wired_engine |
| 114 | + else: |
| 115 | + table_engine = lineless_engine |
| 116 | + |
| 117 | + # 使用RapidOCR输入 |
| 118 | + ocr_engine = RapidOCR() |
| 119 | + rapid_ocr_output = ocr_engine(img_path, return_word_box=True) |
| 120 | + ocr_result = list(zip(rapid_ocr_output.boxes, rapid_ocr_output.txts, rapid_ocr_output.scores)) |
| 121 | + table_results = table_engine( |
| 122 | + img_path, ocr_result=ocr_result, enhance_box_line=False |
| 123 | + ) |
| 124 | + |
| 125 | + |
| 126 | + # 使用单字识别 |
| 127 | + # word_results = rapid_ocr_output.word_results |
| 128 | + # ocr_result = [ |
| 129 | + # [word_result[2], word_result[0], word_result[1]] for word_result in word_results |
| 130 | + # ] |
| 131 | + # table_results = table_engine( |
| 132 | + # img_path, ocr_result=ocr_result, enhance_box_line=False |
| 133 | + # ) |
| 134 | + |
| 135 | + # Save |
| 136 | + # save_dir = Path("outputs") |
| 137 | + # save_dir.mkdir(parents=True, exist_ok=True) |
| 138 | + # |
| 139 | + # save_html_path = f"outputs/{Path(img_path).stem}.html" |
| 140 | + # save_drawed_path = f"outputs/{Path(img_path).stem}_table_vis{Path(img_path).suffix}" |
| 141 | + # save_logic_path = ( |
| 142 | + # f"outputs/{Path(img_path).stem}_table_vis_logic{Path(img_path).suffix}" |
| 143 | + # ) |
| 144 | + |
| 145 | + # Visualize table rec result |
| 146 | + # vis_imged = viser( |
| 147 | + # img_path, table_results, save_html_path, save_drawed_path, save_logic_path |
| 148 | + # ) |
| 149 | + |
| 150 | + |
133 | 151 |
|
134 | 152 | ``` |
135 | 153 |
|
136 | 154 | #### 单字ocr匹配 |
137 | 155 |
|
138 | 156 | ```python |
139 | 157 | # 将单字box转换为行识别同样的结构) |
140 | | -from rapidocr_onnxruntime import RapidOCR |
141 | | -from wired_table_rec.utils.utils_table_recover import trans_char_ocr_res |
142 | | - |
| 158 | +from rapidocr import RapidOCR |
143 | 159 | img_path = "tests/test_files/wired/table4.jpg" |
144 | 160 | ocr_engine = RapidOCR() |
145 | | -ocr_res, _ = ocr_engine(img_path, return_word_box=True) |
146 | | -ocr_res = trans_char_ocr_res(ocr_res) |
| 161 | +rapid_ocr_output = ocr_engine(img_path, return_word_box=True) |
| 162 | +word_results = rapid_ocr_output.word_results |
| 163 | +ocr_result = [ |
| 164 | + [word_result[2], word_result[0], word_result[1]] for word_result in word_results |
| 165 | +] |
147 | 166 | ``` |
148 | 167 |
|
149 | 168 | #### 表格旋转及透视修正 |
@@ -230,14 +249,12 @@ table_results = wired_table_rec( |
230 | 249 | row_threshold=10, # 识别框上边界y坐标差值小于row_threshold的默认同行 |
231 | 250 | rotated_fix=True, # wiredV2支持,轻度旋转(-45°~45°)矫正,默认为True |
232 | 251 | need_ocr=True, # 是否进行OCR识别, 默认为True |
233 | | - rec_again=True,# 是否针对未识别到文字的表格框,进行单独截取再识别,默认为True |
234 | 252 | ) |
235 | 253 | lineless_table_rec = LinelessTableRecognition(LinelessTableInput()) |
236 | 254 | table_results = lineless_table_rec( |
237 | 255 | img, # 图片 Union[str, np.ndarray, bytes, Path, PIL.Image.Image] |
238 | 256 | ocr_result, # 输入rapidOCR识别结果,不传默认使用内部rapidocr模型 |
239 | 257 | need_ocr=True, # 是否进行OCR识别, 默认为True |
240 | | - rec_again=True,# 是否针对未识别到文字的表格框,进行单独截取再识别,默认为True |
241 | 258 | ) |
242 | 259 | ``` |
243 | 260 |
|
@@ -268,7 +285,7 @@ table_results = lineless_table_rec( |
268 | 285 | ```mermaid |
269 | 286 | flowchart TD |
270 | 287 | A[/表格图片/] --> B([表格分类 table_cls]) |
271 | | - B --> C([有线表格识别 wired_table_rec]) & D([无线表格识别 lineless_table_rec]) --> E([文字识别 rapidocr_onnxruntime]) |
| 288 | + B --> C([有线表格识别 wired_table_rec]) & D([无线表格识别 lineless_table_rec]) --> E([文字识别 rapidocr]) |
272 | 289 | E --> F[/html结构化输出/] |
273 | 290 | ``` |
274 | 291 |
|
|
0 commit comments