diff --git a/src/main/java/com/graphaware/nlp/processor/opennlp/OpenNLPPipeline.java b/src/main/java/com/graphaware/nlp/processor/opennlp/OpenNLPPipeline.java index 2dd85e2..7d9945b 100644 --- a/src/main/java/com/graphaware/nlp/processor/opennlp/OpenNLPPipeline.java +++ b/src/main/java/com/graphaware/nlp/processor/opennlp/OpenNLPPipeline.java @@ -351,13 +351,24 @@ public String train(String alg, String modelId, String fileTrain, String lang, M NERModelTool nerModel = new NERModelTool(fileTrain, modelId, lang, params); nerModel.train(); result = nerModel.validate(); - nerModel.saveModel(fileOut); + String[] dirPathSplit = fileTrain.split(File.separator); + String fileOutToUse; + if (dirPathSplit.length > 2) { + StringBuilder sb = new StringBuilder(""); + for (int i = 0; i < dirPathSplit.length -2; ++i) { + sb.append(dirPathSplit[i]).append(File.separator); + } + fileOutToUse = sb.toString() + fileOut; + } else { + fileOutToUse = fileOut; + } + nerModel.saveModel(fileOutToUse); // incorporate this model to the OpenNLPPipeline if (nerModel.getModel() != null) { - customNeModels.put(newKey, fileOut); - /*if (!nameDetectors.containsKey(newKey)) { + customNeModels.put(newKey, fileOutToUse); + if (!nameDetectors.containsKey(newKey)) { nameDetectors.put(newKey, new NameFinderME((TokenNameFinderModel) nerModel.getModel())); - }*/ + } } } else if (alg.toLowerCase().equals("sentiment")) { diff --git a/src/main/java/com/graphaware/nlp/processor/opennlp/model/NERModelTool.java b/src/main/java/com/graphaware/nlp/processor/opennlp/model/NERModelTool.java index 4b53e48..4e6cc4d 100644 --- a/src/main/java/com/graphaware/nlp/processor/opennlp/model/NERModelTool.java +++ b/src/main/java/com/graphaware/nlp/processor/opennlp/model/NERModelTool.java @@ -56,8 +56,9 @@ public void train() { LOG.error("Error while opening training file: " + fileIn, ex); throw new RuntimeException("Error while training " + MODEL_NAME + " model " + this.modelDescr, ex); } catch (Exception ex) { - LOG.error("Error while training " + MODEL_NAME + " model " + modelDescr); - throw new RuntimeException("Error while training " + MODEL_NAME + " model " + this.modelDescr, ex); + LOG.error("Error while training " + MODEL_NAME + " model " + modelDescr + ". " + ex.getMessage()); + ex.printStackTrace(); + throw new RuntimeException(ex); } } @@ -77,10 +78,10 @@ public String validate() { LOG.info("Validation: " + result); } catch (IOException ex) { LOG.error("Error while opening training file: " + fileIn, ex); - throw new RuntimeException("IOError while evaluating " + MODEL_NAME + " model " + modelDescr, ex); + throw new RuntimeException(ex); } catch (Exception ex) { LOG.error("Error while evaluating " + MODEL_NAME + " model.", ex); - throw new RuntimeException("Error while evaluating " + MODEL_NAME + " model " + modelDescr, ex); + throw new RuntimeException(ex); } } else { result = test(this.fileValidate, new NameFinderME((TokenNameFinderModel) model)); diff --git a/src/main/java/com/graphaware/nlp/processor/opennlp/model/OpenNLPGenericModelTool.java b/src/main/java/com/graphaware/nlp/processor/opennlp/model/OpenNLPGenericModelTool.java index e6d4c71..47c103e 100644 --- a/src/main/java/com/graphaware/nlp/processor/opennlp/model/OpenNLPGenericModelTool.java +++ b/src/main/java/com/graphaware/nlp/processor/opennlp/model/OpenNLPGenericModelTool.java @@ -77,11 +77,11 @@ protected ObjectStream openFile(String fileName) { } ObjectStream lStream = null; try { - ImprovisedInputStreamFactory dataIn = new ImprovisedInputStreamFactory(null, "", fileName); + ImprovisedInputStreamFactory dataIn = new ImprovisedInputStreamFactory(new Properties(), "", fileName); lStream = new PlainTextByLineStream(dataIn, "UTF-8"); - } catch (IOException ex) { - LOG.error("Failure while opening file " + fileName, ex); - throw new RuntimeException("Failure while opening file " + fileName, ex); + } catch (Exception ex) { + LOG.error("Unable to open file " + fileName, ex); + throw new RuntimeException("Unable to open file " + fileName, ex); } if (lStream == null) @@ -188,7 +188,7 @@ public void saveModel(String file) { modelOut.close(); } catch (IOException ex) { LOG.error("Error saving model to file " + file, ex); - throw new RuntimeException("Error saving model to file " + file, ex); + throw new RuntimeException(ex); } //this.closeInputFile(); diff --git a/src/test/java/com/graphaware/nlp/processor/opennlp/model/CustomNERIntegrationTest.java b/src/test/java/com/graphaware/nlp/processor/opennlp/model/CustomNERIntegrationTest.java new file mode 100644 index 0000000..0f46807 --- /dev/null +++ b/src/test/java/com/graphaware/nlp/processor/opennlp/model/CustomNERIntegrationTest.java @@ -0,0 +1,66 @@ +package com.graphaware.nlp.processor.opennlp.model; + +import com.graphaware.nlp.processor.opennlp.OpenNLPIntegrationTest; +import org.junit.Test; + +import java.util.Collections; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +public class CustomNERIntegrationTest extends OpenNLPIntegrationTest { + + @Test + public void testTrainNER() { + String p = getClass().getClassLoader().getResource("import/ner.train").getPath(); + String q = "CALL ga.nlp.processor.train({textProcessor: \"com.graphaware.nlp.processor.opennlp.OpenNLPTextProcessor\", modelIdentifier: \"test-ner\", alg: \"ner\", inputFile: \"" + p + "\", trainingParameters: {iter: 10}})"; + executeInTransaction(q, (result -> { + assertTrue(result.hasNext()); + })); + + String addPipelineQuery = "CALL ga.nlp.processor.addPipeline({textProcessor: 'com.graphaware.nlp.processor.opennlp.OpenNLPTextProcessor', name: 'customNER', processingSteps: {tokenize: true, ner: true, sentiment: true, dependency: false, customNER: \"test-ner\"}})"; + executeInTransaction(addPipelineQuery, emptyConsumer()); + + + String textNew = "Mr Kus said he intended to form a government."; + executeInTransaction("CALL ga.nlp.annotate({text: {text}, id:'test-ner', pipeline:'customNER', textProcessor: 'com.graphaware.nlp.processor.opennlp.OpenNLPTextProcessor'}) YIELD result RETURN result", Collections.singletonMap("text", textNew), result -> { + assertTrue(result.hasNext()); + }); + + executeInTransaction("MATCH (n:NER_Person) RETURN n.value AS v", (result -> { + assertTrue(result.hasNext()); + })); + } + + @Test + public void testTrainOnNonExistingFileThrowsMeaningfulError() { + String p = "non-exist"; + String q = "CALL ga.nlp.processor.train({textProcessor: \"com.graphaware.nlp.processor.opennlp.OpenNLPTextProcessor\", modelIdentifier: \"test-ner\", alg: \"ner\", inputFile: \"" + p + "\", trainingParameters: {iter: 10}})"; + Exception ex = null; + try { + executeInTransaction(q, (result -> { + assertTrue(result.hasNext()); + })); + } catch (Exception e) { + ex = e; + } + assertNotNull(ex); + assertTrue(ex.getMessage().contains("Unable to open")); + } + + @Test + public void testTrainingOnSmallSetThrowsInsufficientException() { + String p = getClass().getClassLoader().getResource("import/ner-small.train").getPath(); + String q = "CALL ga.nlp.processor.train({textProcessor: \"com.graphaware.nlp.processor.opennlp.OpenNLPTextProcessor\", modelIdentifier: \"test-ner\", alg: \"ner\", inputFile: \"" + p + "\", trainingParameters: {iter: 10}})"; + Exception ex = null; + try { + executeInTransaction(q, (result -> { + assertTrue(result.hasNext()); + })); + } catch (Exception e) { + ex = e; + } + assertNotNull(ex); + assertTrue(ex.getMessage().contains("Insufficient")); + } +} diff --git a/src/test/java/com/graphaware/nlp/processor/opennlp/model/CustomSentimentModelIntegrationTest.java b/src/test/java/com/graphaware/nlp/processor/opennlp/model/CustomSentimentModelIntegrationTest.java index 9f57de6..428ffe4 100644 --- a/src/test/java/com/graphaware/nlp/processor/opennlp/model/CustomSentimentModelIntegrationTest.java +++ b/src/test/java/com/graphaware/nlp/processor/opennlp/model/CustomSentimentModelIntegrationTest.java @@ -34,5 +34,4 @@ public void testTrainCustomModelWithProcedure() { } })); } - } diff --git a/src/test/resources/import/ner-small.train b/src/test/resources/import/ner-small.train new file mode 100644 index 0000000..fa7e729 --- /dev/null +++ b/src/test/resources/import/ner-small.train @@ -0,0 +1 @@ +Autism, adhd ever today and night. \ No newline at end of file diff --git a/src/test/resources/import/ner.train b/src/test/resources/import/ner.train new file mode 100644 index 0000000..c16e5e5 --- /dev/null +++ b/src/test/resources/import/ner.train @@ -0,0 +1,64 @@ + Theresa May has said she will form a government with the support of the Democratic Unionists that can provide "certainty" for the future. +Speaking after visiting Buckingham Palace, she said only her party had the "legitimacy" to govern after winning the most seats and votes. +In a short statement outside Downing Street, which followed a 25-minute audience with The Queen , Mrs May said she intended to form a government which could "provide certainty and lead Britain forward at this critical time for our country". + +The BBC's Laura Kuenssberg said the PM had returned to No 10 a "diminished figure", having ended up with 12 fewer seats than when she called the election in April. +She had called the election with the stated reason that it would strengthen her hand in negotiations for the UK to leave the EU - the talks are due to start on 19 June. + +The general election has ended in a hung Parliament, where no party has the 326 seats needed to get an overall majority in the House of Commons. +So what happens now? +Who is the prime minister? + Theresa May remains prime minister. +She aims to form a minority government, working with the Democratic Unionist Party. + +The Labour leader does not have to wait until Mrs May has exhausted all her options before he starts trying to put a deal of his own together. +He can hold talks with potential partners at the same time as Mrs May . +They may even be talking to the same people. + +Labour had a majority of three after the 1974 general election - but this had vanished by 1977, and it stayed in power thanks to a "pact" with the Liberal Party. +And John Major's Conservative government started out with a majority of 21 in 1992 but was a minority government by the 1997 general election. + +On 8 May 2013, one week before the Pakistani election, the third author, in his keynote address at the Sentiment Analysis Symposium, forecast the winner of the Pakistani election. +The chart in Figure 1 shows varying sentiment on the candidates for prime minister of Pakistan in that election. +The next day, the BBC’s Owen Bennett Jones , reporting from Islamabad, wrote an article titled “Pakistan Elections: Five Reasons Why the Vote is Unpredictable.” + +At the moment the first deadline is Tuesday 13 June, when the new Parliament meets for the first time. + Mrs May has until this date to put together a deal to keep herself in power or resign, according to official guidance issued by the Cabinet Office. +If she were to resign, Mrs May must be clear that Jeremy Corbyn can form a government and that she can't. +She is entitled to wait until the new Parliament to see if she has the confidence of the House of Commons. + +Japan's parliament has passed a one-off bill to allow Emperor Akihito to abdicate, the first emperor to do so in 200 years. +The 83-year-old said last year that his age and health were making it hard for him to fulfil his official duties. +But there was no provision under existing law for him to stand down. +The government will now begin the process of arranging his abdication, expected to happen in late 2018, and the handover to Crown Prince Naruhito . + +Germany has called for diplomatic efforts to resolve a growing crisis over Qatar, which is accused by four Arab neighbours of funding terrorism. +Saudi Arabia, the United Arab Emirates (UAE), Egypt and Bahrain cut travel and diplomatic ties with Qatar on Monday. +Speaking after hosting his Qatari counterpart on Friday, German Foreign Minister Sigmar Gabriel called for the "sea and air blockades" to be lifted. + +Mr Gabriel met Saudi Foreign Minister Adil al-Ahmad al-Jubayr two days ago, and said all parties were seeking "to avoid further escalation". +Then on Friday, Mr Gabriel spoke to Qatari Foreign Minister Sheikh Mohammed bin Abdulrahman al-Thani in the northern German town of Wolfenbuettel. + +On Friday Saudi Arabia and its three allies issued a list of 49 people - including Muslim Brotherhood spiritual leader Yusuf al-Qaradawi - and 12 Qatar-backed charities and groups accused of links with militants. +On Thursday, Qatar's Sheikh Mohammed said his country had been isolated "because we are successful and progressive" and called his country "a platform for peace not terrorism". + +US President Donald Trump has urged Nato allies to boost defence spending. +Last month German Chancellor Angela Merkel said Europe could no longer "completely depend" on the US and UK , following the election of President Trump and the triggering of Brexit. + +The UK has long been one of the strongest voices in the EU against any moves towards forming a European army. +The UK says the EU must not duplicate Nato's role as the main pillar of European defence. +However, Mr Trump's criticisms of Nato have raised questions about the US commitment to defending Europe. + +The Cabinet Office revealed on Wednesday that Japan's GDP grew by 0.3% during the first quarter of 2017. +Although the reading missed a forecast of 0.6% growth, Japan's economy continued to expand in five consecutive quarters, the country's highest streak in three years. + +The General Administration of Customs revealed on Thursday that China's exports increased by 8.7% year-on-year in May, beating forecasts of a 7.2% increase. +The country's trade surplus now amounts to $40.8bn (£31bn). + +Furthermore, the European Central Bank is scheduled to hold its monetary policy meeting today as well. +Expectations point towards the Bank reaffirming its decision to maintain a loose monetary policy, but spectators will carefully scrutinize ECB President Mario Draghi and his team's rhetoric in order to get an indication of when interest rates could possibly rise. + + James Trescothick , senior global strategist, said there is the potential that Opec's agreement to limit current oil production could collapse. +If that happens oil prices could potentially fall. +However, Qatar is one of the smallest oil producers in Opec, with estimated proven reserves of 25bn barrels which is dwarfed by Saudi Arabia's 266bn barrels. + Trescothick believes the main danger to the region and indeed to oil prices is that increased tension could lead Qatar to reach out even further to Iran for support which would no doubt sour diplomatic relations further.