Skip to content

Commit 22c96a8

Browse files
authored
Merge pull request #34 from RapidAI/more_teds_compare
More teds compare
2 parents 7257c7a + e72fd56 commit 22c96a8

File tree

6 files changed

+37
-20
lines changed

6 files changed

+37
-20
lines changed

README.md

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,12 @@
1212
<a href="https://github.com/RapidAI/TableStructureRec/blob/c41bbd23898cb27a957ed962b0ffee3c74dfeff1/LICENSE"><img alt="GitHub" src="https://img.shields.io/badge/license-Apache 2.0-blue"></a>
1313
</div>
1414

15-
### 简介
15+
### 最近更新
16+
- **2024.9.26**
17+
- 修正RapidTable默认英文模型导致的测评结果错误。
18+
- 补充测评数据集,补充开源社区更多模型的测评结果
1619

20+
### 简介
1721
💖该仓库是用来对文档中表格做结构化识别的推理库,包括来自paddle的表格识别模型,
1822
阿里读光有线和无线表格识别模型,llaipython(微信)贡献的有线表格模型,网易Qanything内置表格分类模型等。
1923

@@ -33,15 +37,20 @@
3337

3438
### 指标结果
3539

36-
[TableRecognitionMetric 评测工具](https://github.com/SWHL/TableRecognitionMetric) [评测数据集](https://huggingface.co/datasets/SWHL/table_rec_test_dataset) [Rapid OCR](https://github.com/RapidAI/RapidOCR)
40+
[TableRecognitionMetric 评测工具](https://github.com/SWHL/TableRecognitionMetric) [huggingface数据集](https://huggingface.co/datasets/SWHL/table_rec_test_dataset) [modelscope 数据集](https://www.modelscope.cn/datasets/jockerK/TEDS_TEST/files) [Rapid OCR](https://github.com/RapidAI/RapidOCR)
41+
42+
注: StructEqTable 输出为 latex,只取成功转换为html并去除样式标签后进行测评
43+
44+
| 方法 | TEDS | TEDS-only-structure |
45+
|:---------------------------------------------------------------------------------------------------------------------------|:-----------:|:-------------------:|
46+
| [deepdoctection(rag-flow)](https://github.com/deepdoctection/deepdoctection?tab=readme-ov-file) | 0.59975 | 0.69918 |
47+
| [ppstructure_table_master](https://github.com/PaddlePaddle/PaddleOCR/tree/main/ppstructure) | 0.61606 | 0.73892 |
48+
| [ppsturcture_table_engine](https://github.com/PaddlePaddle/PaddleOCR/tree/main/ppstructure) | 0.67924 | 0.78653 |
49+
| table_cls + wired_table_rec v1 + lineless_table_rec | 0.68507 | 0.75140 |
50+
| [StructEqTable](https://github.com/UniModal4Reasoning/StructEqTable-Deploy) | 0.67310 | **0.81210** |
51+
| [RapidTable](https://github.com/RapidAI/RapidStructure/blob/b800b156015bf5cd6f5429295cdf48be682fd97e/docs/README_Table.md) | 0.71654 | 0.81067 |
52+
| table_cls + wired_table_rec v2 + lineless_table_rec | **0.73702** | 0.80210 |
3753

38-
| 方法 | TEDS | TEDS-only-structure |
39-
|:---------------------------------------------------------------------------------------------------------------------------|:-------:|:-------------------:|
40-
| [RapidTable](https://github.com/RapidAI/RapidStructure/blob/b800b156015bf5cd6f5429295cdf48be682fd97e/docs/README_Table.md) | 0.59765 | 0.68996 |
41-
| ppstructure_table_master | 0.59835 | 0.68996 |
42-
| table_cls + wired_table_rec v1 + lineless_table_rec | 0.74692 | 0.83049 |
43-
| ppsturcture_table_engine | 0.76835 | 0.83296 |
44-
| table_cls + wired_table_rec v2 + lineless_table_rec | 0.80890 | 0.88011 |
4554

4655
### 安装
4756

@@ -69,9 +78,15 @@ if cls == 'wired':
6978
table_engine = wired_engine
7079
else:
7180
table_engine = lineless_engine
81+
7282
html, elasp, polygons, logic_points, ocr_res = table_engine(img_path)
7383
print(f"elasp: {elasp}")
7484

85+
# 使用其他ocr模型
86+
#ocr_engine =RapidOCR(det_model_dir="xxx/det_server_infer.onnx",rec_model_dir="xxx/rec_server_infer.onnx")
87+
#ocr_res, _ = ocr_engine(img_path)
88+
#html, elasp, polygons, logic_points, ocr_res = table_engine(img_path, ocr_result=ocr_res)
89+
7590
# output_dir = f'outputs'
7691
# complete_html = format_html(html)
7792
# os.makedirs(os.path.dirname(f"{output_dir}/table.html"), exist_ok=True)
@@ -105,8 +120,7 @@ cv2.imwrite(f'img_rotated.jpg', img)
105120
- 答:该项目暂时不支持偏移图片识别,请先修正图片,也欢迎提pr来解决这个问题。
106121

107122
2. **问:识别框丢失了内部文字信息**
108-
-
109-
答:默认使用的rapidocr小模型,如果需要更高精度的效果,可以从 [模型列表](https://rapidai.github.io/RapidOCRDocs/model_list/#_1)
123+
- 答:默认使用的rapidocr小模型,如果需要更高精度的效果,可以从 [模型列表](https://rapidai.github.io/RapidOCRDocs/model_list/#_1)
110124
下载更高精度的ocr模型,在执行时传入ocr_result即可
111125

112126
3. **问:模型支持 gpu 加速吗?**
@@ -116,8 +130,9 @@ cv2.imwrite(f'img_rotated.jpg', img)
116130

117131
### TODO List
118132

119-
- [ ] 识别前图片偏移修正(完成有线表格小角度偏移修正)
120-
- [ ] 增加数据集数量,增加更多评测对比
133+
- [x] 图片小角度偏移修正方法补充
134+
- [x] 增加数据集数量,增加更多评测对比
135+
- [ ] 补充复杂场景表格检测和提取,解决旋转和透视导致的低识别率
121136
- [ ] 优化无线表格模型
122137

123138
### 处理流程

lineless_table_rec/utils_table_recover.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,6 @@ def plot_html_table(
421421
grid = [[None] * max_col for _ in range(max_row)]
422422

423423
valid_start_row = (1 << 16) - 1
424-
valid_end_row = 0
425424
valid_start_col = (1 << 16) - 1
426425
valid_end_col = 0
427426
# 将 sorted_logi_points 中的元素填充到 grid 中
@@ -436,7 +435,6 @@ def plot_html_table(
436435
if ocr_rec_text_list and "".join(ocr_rec_text_list):
437436
valid_start_row = min(row_start, valid_start_row)
438437
valid_start_col = min(col_start, valid_start_col)
439-
valid_end_row = max(row_end, valid_end_row)
440438
valid_end_col = max(col_end, valid_end_col)
441439
for row in range(row_start, row_end + 1):
442440
for col in range(col_start, col_end + 1):
@@ -447,7 +445,7 @@ def plot_html_table(
447445

448446
# 遍历每行
449447
for row in range(max_row):
450-
if row < valid_start_row or row > valid_end_row:
448+
if row < valid_start_row:
451449
continue
452450
temp = "<tr>"
453451
# 遍历每一列
81.7 KB
Loading

tests/test_wired_table_rec.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232

3333
def get_td_nums(html: str) -> int:
3434
soup = BeautifulSoup(html, "html.parser")
35+
if not soup.table:
36+
return 0
3537
tds = soup.table.find_all("td")
3638
return len(tds)
3739

@@ -41,7 +43,7 @@ def test_squeeze_bug():
4143
ocr_result, _ = ocr_engine(img_path)
4244
table_str, *_ = table_recog(str(img_path), ocr_result)
4345
td_nums = get_td_nums(table_str)
44-
assert td_nums == 291
46+
assert td_nums == 192
4547

4648

4749
@pytest.mark.parametrize(
@@ -50,6 +52,7 @@ def test_squeeze_bug():
5052
("table_recognition.jpg", 35, "d colsp"),
5153
("table2.jpg", 23, "td><td "),
5254
("row_span.png", 17, "></td><"),
55+
("no_table.jpg", 1, "d colsp"),
5356
],
5457
)
5558
def test_input_normal(img_path, gt_td_nums, gt2):

wired_table_rec/table_recover.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ def get_rows(polygons: np.array) -> Dict[int, List[int]]:
4343
result = {}
4444
thresh = 10.0
4545
split_idxs = np.argwhere(abs(minus_res) > thresh).squeeze()
46+
# 如果都在一行,则将所有下标设置为同一行
47+
if split_idxs.size == 0:
48+
return {0: [i for i in range(len(y_axis))]}
4649
if split_idxs.ndim == 0:
4750
split_idxs = split_idxs[None, ...]
4851

wired_table_rec/utils_table_recover.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,6 @@ def plot_html_table(
572572
grid = [[None] * max_col for _ in range(max_row)]
573573

574574
valid_start_row = (1 << 16) - 1
575-
valid_end_row = 0
576575
valid_start_col = (1 << 16) - 1
577576
valid_end_col = 0
578577
# 将 sorted_logi_points 中的元素填充到 grid 中
@@ -587,7 +586,6 @@ def plot_html_table(
587586
if ocr_rec_text_list and "".join(ocr_rec_text_list):
588587
valid_start_row = min(row_start, valid_start_row)
589588
valid_start_col = min(col_start, valid_start_col)
590-
valid_end_row = max(row_end, valid_end_row)
591589
valid_end_col = max(col_end, valid_end_col)
592590
for row in range(row_start, row_end + 1):
593591
for col in range(col_start, col_end + 1):
@@ -598,7 +596,7 @@ def plot_html_table(
598596

599597
# 遍历每行
600598
for row in range(max_row):
601-
if row < valid_start_row or row > valid_end_row:
599+
if row < valid_start_row:
602600
continue
603601
temp = "<tr>"
604602
# 遍历每一列

0 commit comments

Comments
 (0)