File tree Expand file tree Collapse file tree 2 files changed +11
-6
lines changed
Expand file tree Collapse file tree 2 files changed +11
-6
lines changed Original file line number Diff line number Diff line change 1- sep = r"¦ "
1+ sep = r"'¦|\|' "
22regex = dict (
33 tables = rf"(\b.*{ sep } .*\n)+" ,
44)
Original file line number Diff line number Diff line change @@ -102,6 +102,9 @@ class TablesMatcher(GenericMatcher):
102102 tables_pattern : Optional[Dict[str, str]]
103103 The regex pattern to identify tables.
104104 The key of dictionary should be `tables`
105+ sep_pattern : Optional[str]
106+ The regex pattern to identify the separator pattern.
107+ Used when calling `to_pd_table`.
105108 attr : str
106109 spaCy's attribute to use:
107110 a string with the value "TEXT" or "NORM", or a dict with
@@ -124,14 +127,16 @@ def __init__(
124127 attr : Union [Dict [str , str ], str ] = "TEXT" ,
125128 ignore_excluded : bool = True ,
126129 ):
127- if tables_pattern is None :
130+ if tables_pattern is None and sep_pattern is None :
128131 self .tables_pattern = patterns .regex
129- else :
130- self .tables_pattern = tables_pattern
131-
132- if sep_pattern is None :
133132 self .sep = patterns .sep
133+ elif tables_pattern is None or sep_pattern is None :
134+ raise ValueError (
135+ "Both tables_pattern and sep_pattern must be provided "
136+ "for custom eds.table pipeline."
137+ )
134138 else :
139+ self .tables_pattern = tables_pattern
135140 self .sep = sep_pattern
136141
137142 super ().__init__ (
You can’t perform that action at this time.
0 commit comments