|
| 1 | +/* |
| 2 | + * ==========================License-Start============================= |
| 3 | + * DiscourseSimplification : SubordinationPostAttributionExtractor2 |
| 4 | + * |
| 5 | + * Copyright © 2018 Lambda³ |
| 6 | + * |
| 7 | + * GNU General Public License 3 |
| 8 | + * This program is free software: you can redistribute it and/or modify |
| 9 | + * it under the terms of the GNU General Public License as published by |
| 10 | + * the Free Software Foundation, either version 3 of the License, or |
| 11 | + * (at your option) any later version. |
| 12 | + * |
| 13 | + * This program is distributed in the hope that it will be useful, |
| 14 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | + * GNU General Public License for more details. |
| 17 | + * |
| 18 | + * You should have received a copy of the GNU General Public License |
| 19 | + * along with this program. If not, see http://www.gnu.org/licenses/. |
| 20 | + * ==========================License-End============================== |
| 21 | + */ |
| 22 | + |
| 23 | +package org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules; |
| 24 | + |
| 25 | +import edu.stanford.nlp.ling.Word; |
| 26 | +import edu.stanford.nlp.trees.tregex.TregexMatcher; |
| 27 | +import edu.stanford.nlp.trees.tregex.TregexPattern; |
| 28 | +import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation; |
| 29 | +import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.Extraction; |
| 30 | +import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule; |
| 31 | +import org.lambda3.text.simplification.discourse.runner.discourse_tree.model.Leaf; |
| 32 | +import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException; |
| 33 | +import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils; |
| 34 | +import org.lambda3.text.simplification.discourse.utils.words.WordsUtils; |
| 35 | + |
| 36 | +import java.util.ArrayList; |
| 37 | +import java.util.Arrays; |
| 38 | +import java.util.List; |
| 39 | +import java.util.Optional; |
| 40 | + |
| 41 | +/** |
| 42 | + * |
| 43 | + */ |
| 44 | +public class SubordinationPostAttributionExtractor2 extends ExtractionRule { |
| 45 | + |
| 46 | + @Override |
| 47 | + public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException { |
| 48 | + TregexPattern p = TregexPattern.compile("ROOT <<: (S < (NP $.. (VP=vp <+(VP) (SBAR=sbar <<, /that/ < (S=s)))))"); |
| 49 | + TregexMatcher matcher = p.matcher(leaf.getParseTree()); |
| 50 | + |
| 51 | + while (matcher.findAt(leaf.getParseTree())) { |
| 52 | + |
| 53 | + // the left, !subordinate! constituent |
| 54 | + List<Word> leftConstituentWords = new ArrayList<>(); |
| 55 | + leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("sbar"), false)); |
| 56 | + leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("sbar"), false)); |
| 57 | + |
| 58 | + // rephrase |
| 59 | + leftConstituentWords = rephraseIntraSententialAttribution(leftConstituentWords); |
| 60 | + Leaf leftConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(leftConstituentWords)); |
| 61 | + leftConstituent.dontAllowSplit(); |
| 62 | + leftConstituent.setToSimpleContext(true); |
| 63 | + |
| 64 | + // the right, !superordinate! constituent |
| 65 | + List<Word> rightConstituentWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("s")); |
| 66 | + Leaf rightConstituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(rightConstituentWords)); |
| 67 | + |
| 68 | + // relation |
| 69 | + Optional<Word> headVerb = getHeadVerb(matcher.getNode("vp")); |
| 70 | + |
| 71 | + // only extract if verb matches |
| 72 | + if (headVerb.isPresent() && classifer.checkAttribution(headVerb.get())) { |
| 73 | + Relation relation = Relation.ATTRIBUTION; |
| 74 | + |
| 75 | + Extraction res = new Extraction( |
| 76 | + getClass().getSimpleName(), |
| 77 | + false, |
| 78 | + null, |
| 79 | + relation, |
| 80 | + false, |
| 81 | + Arrays.asList(leftConstituent, rightConstituent) |
| 82 | + ); |
| 83 | + |
| 84 | + return Optional.of(res); |
| 85 | + } |
| 86 | + } |
| 87 | + |
| 88 | + return Optional.empty(); |
| 89 | + } |
| 90 | +} |
0 commit comments