use of opennlp.tools.postag.POSModel in project stanbol by apache.
the class TextAnalyzer method getPosTagger.
protected final POSTaggerME getPosTagger() {
if (!config.enablePosTagger) {
return null;
}
if (posTagger == null && !posTaggerNotAvailable) {
try {
POSModel posModel = openNLP.getPartOfSpeechModel(language);
if (posModel != null) {
posTagger = new POSTaggerME(posModel);
} else {
log.debug("No POS Model for language '{}'", language);
posTaggerNotAvailable = true;
}
} catch (IOException e) {
log.info("Unable to load POS Model for language '" + language + "'", e);
posTaggerNotAvailable = true;
}
}
return posTagger;
}
use of opennlp.tools.postag.POSModel in project stanbol by apache.
the class OpenNLPTest method testLoadMissingPOS.
@Test
public void testLoadMissingPOS() throws IOException {
POSModel model = openNLP.getPartOfSpeechModel("ru");
Assert.assertNull(model);
POSTagger posTagger = openNLP.getPartOfSpeechTagger("ru");
Assert.assertNull(posTagger);
}
use of opennlp.tools.postag.POSModel in project stanbol by apache.
the class OpenNLP method getPartOfSpeechModel.
/**
* Getter for the "part-of-speech" model for the parsed language.
* If the model is not yet available a new one is built. The required data
* are loaded by using the {@link DataFileProvider} service.
* @param language the language
* @return the model or <code>null</code> if no model data are found
* @throws InvalidFormatException in case the found model data are in the wrong format
* @throws IOException on any error while reading the model data
*/
public POSModel getPartOfSpeechModel(String language) throws IOException, InvalidFormatException {
// typically there are two versions
// we prefer the perceptron variant but if not available try to build the other
IOException first = null;
POSModel model;
try {
model = initModel(String.format("%s-pos-perceptron.bin", language), POSModel.class);
} catch (IOException e) {
first = e;
log.warn("Unable to laod preceptron based POS model for " + language, e);
model = null;
}
if (model == null) {
log.debug("No perceptron based POS model for language " + language + "available. Will try to load maxent model");
try {
model = initModel(String.format("%s-pos-maxent.bin", language), POSModel.class);
} catch (IOException e) {
if (first != null) {
throw first;
} else {
throw e;
}
}
}
return model;
}
use of opennlp.tools.postag.POSModel in project textdb by TextDB.
the class POSTagexample method main.
public static void main(String[] args) throws IOException {
POSModel model = new POSModelLoader().load(new File("./src/main/java/edu/uci/ics/texera/sandbox/OpenNLPexample/en-pos-maxent.bin"));
PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
POSTaggerME tagger = new POSTaggerME(model);
String dataFile = "./src/main/resources/abstract_100.txt";
Scanner scan = new Scanner(new File(dataFile));
int counter = 0;
perfMon.start();
while (scan.hasNextLine()) {
String input = scan.nextLine();
String[] sentence = Tokenize(input);
String[] tags = tagger.tag(sentence);
perfMon.incrementCounter();
for (int i = 0; i < sentence.length; i++) {
String word = sentence[i];
String pos = tags[i];
// filter out useless results
if (!word.equals(pos) && !pos.equals("``") && !pos.equals("''")) {
counter++;
System.out.println("word: " + sentence[i] + " pos: " + tags[i]);
}
}
}
System.out.println("Total Number of Results: " + counter);
perfMon.stopAndPrintFinalResult();
scan.close();
}
Aggregations