Skip to content

Commit 3701d43

Browse files
author
Kazuma TAKAOKA
authored
Merge pull request #45 from WorksApplications/fix-pos-filter
Fix follwing bugs
2 parents 3d00338 + 0cfb15a commit 3701d43

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

src/main/java/com/worksap/nlp/lucene/sudachi/ja/PartOfSpeechTrie.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
public class PartOfSpeechTrie {
2424

2525
static final String EMPTY_SYMBOL = "*";
26+
static final String LEAF = "";
2627

2728
Map<String, Object> root = new HashMap<>();
2829

@@ -37,6 +38,7 @@ public void add(String... items) {
3738
(Map<String, Object>)node.computeIfAbsent(item, k -> new HashMap<>());
3839
node = newNode;
3940
}
41+
node.put(LEAF, LEAF);
4042
}
4143

4244
public boolean isPrefixOf(List<String> items, int begin, int end) {
@@ -47,17 +49,17 @@ public boolean isPrefixOf(List<String> items, int begin, int end) {
4749
for (int i = begin; i < end; i++) {
4850
String item = items.get(i);
4951
if (EMPTY_SYMBOL.equals(item)) {
50-
return node.isEmpty();
52+
return node.containsKey(LEAF);
5153
}
5254
@SuppressWarnings("unchecked")
5355
Map<String, Object> newNode = (Map<String, Object>)node.get(item);
5456
node = newNode;
5557
if (node == null) {
5658
return false;
57-
} else if (node.isEmpty()) {
59+
} else if (node.containsKey(LEAF)) {
5860
return true;
5961
}
6062
}
61-
return true;
63+
return node.containsKey(LEAF);
6264
}
6365
}

src/test/java/com/worksap/nlp/lucene/sudachi/ja/TestSudachiPartOfSpeechStopFilter.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,4 +91,21 @@ public void testConjugationForm() throws IOException {
9191
assertTokenStreamContents(tokenStream,
9292
new String[] {"東京都", "東京", "都", "に", "行っ"});
9393
}
94+
95+
public void testPrefixWithUnmatchedSubcategory() throws IOException {
96+
String tags = "助詞,格助詞\n助詞,格助詞,引用\n";
97+
factory.inform(new StringResourceLoader(tags));
98+
tokenStream = factory.create(tokenStream);
99+
assertTokenStreamContents(tokenStream,
100+
new String[] {"東京都", "東京", "都", "行っ", "た"});
101+
}
102+
103+
public void testTooLongCategory() throws IOException {
104+
String tags = "名詞,固有名詞,地名,一般,一般\n";
105+
factory.inform(new StringResourceLoader(tags));
106+
tokenStream = factory.create(tokenStream);
107+
assertTokenStreamContents(tokenStream,
108+
new String[] {"東京都", "東京", "都", "に", "行っ", "た"});
109+
}
110+
94111
}

0 commit comments

Comments
 (0)