Skip to content
This repository was archived by the owner on May 6, 2021. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -351,13 +351,24 @@ public String train(String alg, String modelId, String fileTrain, String lang, M
NERModelTool nerModel = new NERModelTool(fileTrain, modelId, lang, params);
nerModel.train();
result = nerModel.validate();
nerModel.saveModel(fileOut);
String[] dirPathSplit = fileTrain.split(File.separator);
String fileOutToUse;
if (dirPathSplit.length > 2) {
StringBuilder sb = new StringBuilder("");
for (int i = 0; i < dirPathSplit.length -2; ++i) {
sb.append(dirPathSplit[i]).append(File.separator);
}
fileOutToUse = sb.toString() + fileOut;
} else {
fileOutToUse = fileOut;
}
nerModel.saveModel(fileOutToUse);
// incorporate this model to the OpenNLPPipeline
if (nerModel.getModel() != null) {
customNeModels.put(newKey, fileOut);
/*if (!nameDetectors.containsKey(newKey)) {
customNeModels.put(newKey, fileOutToUse);
if (!nameDetectors.containsKey(newKey)) {
nameDetectors.put(newKey, new NameFinderME((TokenNameFinderModel) nerModel.getModel()));
}*/
}
}
}
else if (alg.toLowerCase().equals("sentiment")) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ public void train() {
LOG.error("Error while opening training file: " + fileIn, ex);
throw new RuntimeException("Error while training " + MODEL_NAME + " model " + this.modelDescr, ex);
} catch (Exception ex) {
LOG.error("Error while training " + MODEL_NAME + " model " + modelDescr);
throw new RuntimeException("Error while training " + MODEL_NAME + " model " + this.modelDescr, ex);
LOG.error("Error while training " + MODEL_NAME + " model " + modelDescr + ". " + ex.getMessage());
ex.printStackTrace();
throw new RuntimeException(ex);
}
}

Expand All @@ -77,10 +78,10 @@ public String validate() {
LOG.info("Validation: " + result);
} catch (IOException ex) {
LOG.error("Error while opening training file: " + fileIn, ex);
throw new RuntimeException("IOError while evaluating " + MODEL_NAME + " model " + modelDescr, ex);
throw new RuntimeException(ex);
} catch (Exception ex) {
LOG.error("Error while evaluating " + MODEL_NAME + " model.", ex);
throw new RuntimeException("Error while evaluating " + MODEL_NAME + " model " + modelDescr, ex);
throw new RuntimeException(ex);
}
} else {
result = test(this.fileValidate, new NameFinderME((TokenNameFinderModel) model));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,11 @@ protected ObjectStream<String> openFile(String fileName) {
}
ObjectStream<String> lStream = null;
try {
ImprovisedInputStreamFactory dataIn = new ImprovisedInputStreamFactory(null, "", fileName);
ImprovisedInputStreamFactory dataIn = new ImprovisedInputStreamFactory(new Properties(), "", fileName);
lStream = new PlainTextByLineStream(dataIn, "UTF-8");
} catch (IOException ex) {
LOG.error("Failure while opening file " + fileName, ex);
throw new RuntimeException("Failure while opening file " + fileName, ex);
} catch (Exception ex) {
LOG.error("Unable to open file " + fileName, ex);
throw new RuntimeException("Unable to open file " + fileName, ex);
}

if (lStream == null)
Expand Down Expand Up @@ -188,7 +188,7 @@ public void saveModel(String file) {
modelOut.close();
} catch (IOException ex) {
LOG.error("Error saving model to file " + file, ex);
throw new RuntimeException("Error saving model to file " + file, ex);
throw new RuntimeException(ex);
}

//this.closeInputFile();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package com.graphaware.nlp.processor.opennlp.model;

import com.graphaware.nlp.processor.opennlp.OpenNLPIntegrationTest;
import org.junit.Test;

import java.util.Collections;

import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

public class CustomNERIntegrationTest extends OpenNLPIntegrationTest {

@Test
public void testTrainNER() {
String p = getClass().getClassLoader().getResource("import/ner.train").getPath();
String q = "CALL ga.nlp.processor.train({textProcessor: \"com.graphaware.nlp.processor.opennlp.OpenNLPTextProcessor\", modelIdentifier: \"test-ner\", alg: \"ner\", inputFile: \"" + p + "\", trainingParameters: {iter: 10}})";
executeInTransaction(q, (result -> {
assertTrue(result.hasNext());
}));

String addPipelineQuery = "CALL ga.nlp.processor.addPipeline({textProcessor: 'com.graphaware.nlp.processor.opennlp.OpenNLPTextProcessor', name: 'customNER', processingSteps: {tokenize: true, ner: true, sentiment: true, dependency: false, customNER: \"test-ner\"}})";
executeInTransaction(addPipelineQuery, emptyConsumer());


String textNew = "Mr Kus said he intended to form a government.";
executeInTransaction("CALL ga.nlp.annotate({text: {text}, id:'test-ner', pipeline:'customNER', textProcessor: 'com.graphaware.nlp.processor.opennlp.OpenNLPTextProcessor'}) YIELD result RETURN result", Collections.singletonMap("text", textNew), result -> {
assertTrue(result.hasNext());
});

executeInTransaction("MATCH (n:NER_Person) RETURN n.value AS v", (result -> {
assertTrue(result.hasNext());
}));
}

@Test
public void testTrainOnNonExistingFileThrowsMeaningfulError() {
String p = "non-exist";
String q = "CALL ga.nlp.processor.train({textProcessor: \"com.graphaware.nlp.processor.opennlp.OpenNLPTextProcessor\", modelIdentifier: \"test-ner\", alg: \"ner\", inputFile: \"" + p + "\", trainingParameters: {iter: 10}})";
Exception ex = null;
try {
executeInTransaction(q, (result -> {
assertTrue(result.hasNext());
}));
} catch (Exception e) {
ex = e;
}
assertNotNull(ex);
assertTrue(ex.getMessage().contains("Unable to open"));
}

@Test
public void testTrainingOnSmallSetThrowsInsufficientException() {
String p = getClass().getClassLoader().getResource("import/ner-small.train").getPath();
String q = "CALL ga.nlp.processor.train({textProcessor: \"com.graphaware.nlp.processor.opennlp.OpenNLPTextProcessor\", modelIdentifier: \"test-ner\", alg: \"ner\", inputFile: \"" + p + "\", trainingParameters: {iter: 10}})";
Exception ex = null;
try {
executeInTransaction(q, (result -> {
assertTrue(result.hasNext());
}));
} catch (Exception e) {
ex = e;
}
assertNotNull(ex);
assertTrue(ex.getMessage().contains("Insufficient"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,4 @@ public void testTrainCustomModelWithProcedure() {
}
}));
}

}
1 change: 1 addition & 0 deletions src/test/resources/import/ner-small.train
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Autism, <START:person> adhd <END> ever today and night.
64 changes: 64 additions & 0 deletions src/test/resources/import/ner.train
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
<START:person> Theresa May <END> has said she will form a government with the support of the Democratic Unionists that can provide "certainty" for the future.
Speaking after visiting Buckingham Palace, she said only her party had the "legitimacy" to govern after winning the most seats and votes.
In a short statement outside Downing Street, which followed a 25-minute audience with <START:person> The Queen <END> , Mrs <START:person> May <END> said she intended to form a government which could "provide certainty and lead Britain forward at this critical time for our country".

The BBC's <START:person> Laura Kuenssberg <END> said the PM had returned to No 10 a "diminished figure", having ended up with 12 fewer seats than when she called the election in April.
She had called the election with the stated reason that it would strengthen her hand in negotiations for the UK to leave the EU - the talks are due to start on 19 June.

The general election has ended in a hung Parliament, where no party has the 326 seats needed to get an overall majority in the House of Commons.
So what happens now?
Who is the prime minister?
<START:person> Theresa May <END> remains prime minister.
She aims to form a minority government, working with the Democratic Unionist Party.

The Labour leader does not have to wait until Mrs <START:person> May <END> has exhausted all her options before he starts trying to put a deal of his own together.
He can hold talks with potential partners at the same time as Mrs <START:person> May <END> .
They may even be talking to the same people.

Labour had a majority of three after the 1974 general election - but this had vanished by 1977, and it stayed in power thanks to a "pact" with the Liberal Party.
And <START:person> John Major's <END> Conservative government started out with a majority of 21 in 1992 but was a minority government by the 1997 general election.

On 8 May 2013, one week before the Pakistani election, the third author, in his keynote address at the Sentiment Analysis Symposium, forecast the winner of the Pakistani election.
The chart in Figure 1 shows varying sentiment on the candidates for prime minister of Pakistan in that election.
The next day, the BBC’s <START:person> Owen Bennett Jones <END> , reporting from Islamabad, wrote an article titled “Pakistan Elections: Five Reasons Why the Vote is Unpredictable.”

At the moment the first deadline is Tuesday 13 June, when the new Parliament meets for the first time.
Mrs <START:person> May <END> has until this date to put together a deal to keep herself in power or resign, according to official guidance issued by the Cabinet Office.
If she were to resign, Mrs <START:person> May <END> must be clear that <START:person> Jeremy Corbyn <END> can form a government and that she can't.
She is entitled to wait until the new Parliament to see if she has the confidence of the House of Commons.

Japan's parliament has passed a one-off bill to allow Emperor <START:person> Akihito <END> to abdicate, the first emperor to do so in 200 years.
The 83-year-old said last year that his age and health were making it hard for him to fulfil his official duties.
But there was no provision under existing law for him to stand down.
The government will now begin the process of arranging his abdication, expected to happen in late 2018, and the handover to <START:person> Crown Prince Naruhito <END> .

Germany has called for diplomatic efforts to resolve a growing crisis over Qatar, which is accused by four Arab neighbours of funding terrorism.
Saudi Arabia, the United Arab Emirates (UAE), Egypt and Bahrain cut travel and diplomatic ties with Qatar on Monday.
Speaking after hosting his Qatari counterpart on Friday, German Foreign Minister <START:person> Sigmar Gabriel <END> called for the "sea and air blockades" to be lifted.

Mr <START:person> Gabriel <END> met Saudi Foreign Minister <START:person> Adil al-Ahmad al-Jubayr <END> two days ago, and said all parties were seeking "to avoid further escalation".
Then on Friday, Mr <START:person> Gabriel <END> spoke to Qatari Foreign Minister <START:person> Sheikh Mohammed bin Abdulrahman al-Thani <END> in the northern German town of Wolfenbuettel.

On Friday Saudi Arabia and its three allies issued a list of 49 people - including Muslim Brotherhood spiritual leader <START:person> Yusuf al-Qaradawi <END> - and 12 Qatar-backed charities and groups accused of links with militants.
On Thursday, Qatar's <START:person> Sheikh Mohammed <END> said his country had been isolated "because we are successful and progressive" and called his country "a platform for peace not terrorism".

US President <START:person> Donald Trump <END> has urged Nato allies to boost defence spending.
Last month German Chancellor <START:person> Angela Merkel <END> said Europe could no longer "completely depend" on the US and UK , following the election of President <START:person> Trump <END> and the triggering of Brexit.

The UK has long been one of the strongest voices in the EU against any moves towards forming a European army.
The UK says the EU must not duplicate Nato's role as the main pillar of European defence.
However, Mr <START:person> Trump's <END> criticisms of Nato have raised questions about the US commitment to defending Europe.

The Cabinet Office revealed on Wednesday that Japan's GDP grew by 0.3% during the first quarter of 2017.
Although the reading missed a forecast of 0.6% growth, Japan's economy continued to expand in five consecutive quarters, the country's highest streak in three years.

The General Administration of Customs revealed on Thursday that China's exports increased by 8.7% year-on-year in May, beating forecasts of a 7.2% increase.
The country's trade surplus now amounts to $40.8bn (£31bn).

Furthermore, the European Central Bank is scheduled to hold its monetary policy meeting today as well.
Expectations point towards the Bank reaffirming its decision to maintain a loose monetary policy, but spectators will carefully scrutinize ECB President <START:person> Mario Draghi <END> and his team's rhetoric in order to get an indication of when interest rates could possibly rise.

<START:person> James Trescothick <END> , senior global strategist, said there is the potential that Opec's agreement to limit current oil production could collapse.
If that happens oil prices could potentially fall.
However, Qatar is one of the smallest oil producers in Opec, with estimated proven reserves of 25bn barrels which is dwarfed by Saudi Arabia's 266bn barrels.
<START:person> Trescothick <END> believes the main danger to the region and indeed to oil prices is that increased tension could lead Qatar to reach out even further to Iran for support which would no doubt sour diplomatic relations further.