Skip to content

Commit 40b2c0b

Browse files
committed
Fix multiline regex search
1 parent 27b8a41 commit 40b2c0b

File tree

1 file changed

+25
-7
lines changed

1 file changed

+25
-7
lines changed

src/frontEnd/serverSide/controller/WordTree_Controller.java

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,14 @@ public Map<String, Object> getWordTree(List<String> reportIDList, String rootWor
4747
String reportID, reportText;
4848
List<Map<String, Object>> leftList = new ArrayList<>();
4949
List<Map<String, Object>> rightList = new ArrayList<>();
50-
50+
51+
String searchText = rootWord.replaceAll(" ", "\\\\s*");
52+
// Pattern sentencePattern = Pattern.compile("([^.:]*?" + rootWord + "[^.\n]*\\.)");
53+
System.out.println("New search is: " + searchText);
54+
55+
Pattern sentencePattern = Pattern.compile(" ([^.:]*?\\b" + searchText + "\\b[^\n.?!]*)", Pattern.CASE_INSENSITIVE);
5156
// Pattern sentencePattern = Pattern.compile("([^.:]*?" + rootWord + "[^.\n]*\\.)");
52-
Pattern sentencePattern = Pattern.compile(" ([^.:]*?\\b" + rootWord + "\\b[^\n.?!]*)", Pattern.CASE_INSENSITIVE);
57+
5358
Pattern tokenPattern = Pattern.compile("[\\w']+|[.,!?;]");
5459

5560

@@ -68,7 +73,7 @@ public Map<String, Object> getWordTree(List<String> reportIDList, String rootWor
6873
reportText = TextUtil.reconstructSentences(reportText);
6974
int oldCount = matchCount;
7075
matchCount = parseWordTree(reportText, sentencePattern,
71-
tokenPattern, leftList, rightList, reportID, rootWord,
76+
tokenPattern, leftList, rightList, reportID, searchText,
7277
matchCount);
7378
// docCount++;
7479
// find within the pathology report
@@ -81,7 +86,7 @@ public Map<String, Object> getWordTree(List<String> reportIDList, String rootWor
8186
// use heuristic merging sentences
8287
reportText = TextUtil.reconstructSentences(reportText);
8388
matchCount = parseWordTree(reportText, sentencePattern,
84-
tokenPattern, leftList, rightList, reportID, rootWord,
89+
tokenPattern, leftList, rightList, reportID, searchText,
8590
matchCount);
8691
// docCount++;
8792
}
@@ -138,11 +143,24 @@ protected int parseWordTree(String reportText, Pattern sentencePattern,
138143
// left branch
139144
tokenList = new ArrayList<>();
140145

146+
System.out.println(matchedSentence);
147+
Pattern pattern = Pattern.compile(rootWord);
148+
Matcher matcher = pattern.matcher(matchedSentence);
149+
150+
Integer start = 0;
151+
Integer end = 0;
152+
153+
if(matcher.find()){
154+
start = matcher.start();
155+
end = matcher.end();
156+
}
157+
141158
branchMatch = tokenPattern.matcher(matchedSentence.substring(0,
142-
matchedSentence.indexOf(rootWord)).trim());
159+
start).trim());
143160
while (branchMatch.find()) {
144161
tokenList.add(branchMatch.group());
145162
}
163+
146164
matchedItem = new HashMap<>();
147165
matchedItem.put("doc", reportID);
148166
matchedItem.put("id", Integer.toString(matchCount));
@@ -152,8 +170,8 @@ protected int parseWordTree(String reportText, Pattern sentencePattern,
152170
// right branch
153171
tokenList = new ArrayList<>();
154172
branchMatch = tokenPattern.matcher(matchedSentence.substring(
155-
matchedSentence.indexOf(rootWord) + rootWord.length())
156-
.trim());
173+
end).trim());
174+
157175
while (branchMatch.find()) {
158176
tokenList.add(branchMatch.group());
159177
}

0 commit comments

Comments
 (0)