Skip to content
This repository was archived by the owner on Apr 9, 2025. It is now read-only.

Commit 9b778f6

Browse files
committed
Bugfix in attributions with "that". Implemented new rule.
1 parent 68269be commit 9b778f6

File tree

2 files changed

+91
-0
lines changed

2 files changed

+91
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* ==========================License-Start=============================
3+
* DiscourseSimplification : SubordinationPostAttributionExtractor2
4+
*
5+
* Copyright © 2018 Lambda³
6+
*
7+
* GNU General Public License 3
8+
* This program is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* (at your option) any later version.
12+
*
13+
* This program is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU General Public License
19+
* along with this program. If not, see http://www.gnu.org/licenses/.
20+
* ==========================License-End==============================
21+
*/
22+
23+
package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules;
24+
25+
import edu.stanford.nlp.ling.Word;
26+
import edu.stanford.nlp.trees.tregex.TregexMatcher;
27+
import edu.stanford.nlp.trees.tregex.TregexPattern;
28+
import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
29+
import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction;
30+
import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
31+
import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf;
32+
import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
33+
import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
34+
import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
35+
36+
import java.util.ArrayList;
37+
import java.util.Arrays;
38+
import java.util.List;
39+
import java.util.Optional;
40+
41+
/**
42+
*
43+
*/
44+
public class SubordinationPostAttributionExtractor2 extends ExtractionRule {
45+
46+
@Override
47+
public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {
48+
TregexPattern p = TregexPattern.compile("ROOT <<: (S < (NP $.. (VP=vp <+(VP) (SBAR=sbar <<, /that/ < (S=s)))))");
49+
TregexMatcher matcher = p.matcher(leaf.getParseTree());
50+
51+
while (matcher.findAt(leaf.getParseTree())) {
52+
53+
// the left, !subordinate! constituent
54+
List<Word> leftConstituentWords = new ArrayList<>();
55+
leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
56+
leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("sbar"), false));
57+
58+
// rephrase
59+
leftConstituentWords = rephraseIntraSententialAttribution(leftConstituentWords);
60+
Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords));
61+
leftConstituent.dontAllowSplit();
62+
leftConstituent.setToSimpleContext(true);
63+
64+
// the right, !superordinate! constituent
65+
List<Word> rightConstituentWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("s"));
66+
Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords));
67+
68+
// relation
69+
Optional<Word> headVerb = getHeadVerb(matcher.getNode("vp"));
70+
71+
// only extract if verb matches
72+
if (headVerb.isPresent() && classifer.checkAttribution(headVerb.get())) {
73+
Relation relation = Relation.ATTRIBUTION;
74+
75+
Extraction res = new Extraction(
76+
getClass().getSimpleName(),
77+
false,
78+
null,
79+
relation,
80+
false,
81+
Arrays.asList(leftConstituent, rightConstituent)
82+
);
83+
84+
return Optional.of(res);
85+
}
86+
}
87+
88+
return Optional.empty();
89+
}
90+
}

src/main/resources/reference.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ discourse-simplification {
2020
org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.PurposePostExtractor,
2121
//org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.SubordinationPostExtractor2,
2222
org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.QuotedAttributionPostExtractor,
23+
org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.SubordinationPostAttributionExtractor2,
2324
org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.SubordinationPostAttributionExtractor,
2425
org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.SubordinationPostExtractor,
2526
org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.QuotedAttributionPreExtractor,

0 commit comments

Comments
 (0)