use of org.apache.uima.cas.CAS in project deeplearning4j by deeplearning4j.
the class SWN3 method score.
/**
* Scores the text
* @param words the text to score
* @return the score (polarity) for the text
* @throws Exception
*/
public double score(String words) throws Exception {
CAS cas = analysisEngine.newCAS();
cas.setDocumentText(words);
analysisEngine.process(cas);
return score(cas);
}
use of org.apache.uima.cas.CAS in project deeplearning4j by deeplearning4j.
the class TreeParser method getTreebankTrees.
/**
* Gets trees from text.
* First a sentence segmenter is used to segment the training examples in to sentences.
* Sentences are then turned in to trees and returned.
* @param text the text to process
* @return the list of trees
* @throws Exception
*/
public List<TreebankNode> getTreebankTrees(String text) throws Exception {
if (text.isEmpty())
return new ArrayList<>();
CAS c = pool.getCas();
c.setDocumentText(text);
tokenizer.process(c);
List<TreebankNode> ret = new ArrayList<>();
for (Sentence sentence : JCasUtil.select(c.getJCas(), Sentence.class)) {
List<String> tokens = new ArrayList<>();
CAS c2 = tokenizer.newCAS();
for (Token t : JCasUtil.selectCovered(Token.class, sentence)) tokens.add(t.getCoveredText());
c2.setDocumentText(sentence.getCoveredText());
tokenizer.process(c2);
parser.process(c2);
//build the tree based on this
TopTreebankNode node = JCasUtil.selectSingle(c2.getJCas(), TopTreebankNode.class);
ret.add(node);
}
pool.releaseCas(c);
return ret;
}
use of org.apache.uima.cas.CAS in project deeplearning4j by deeplearning4j.
the class UimaSentenceIterator method nextSentence.
@Override
public synchronized String nextSentence() {
if (sentences == null || !sentences.hasNext()) {
try {
if (getReader().hasNext()) {
CAS cas = resource.retrieve();
try {
getReader().getNext(cas);
} catch (Exception e) {
log.warn("Done iterating returning an empty string");
return "";
}
resource.getAnalysisEngine().process(cas);
List<String> list = new ArrayList<>();
for (Sentence sentence : JCasUtil.select(cas.getJCas(), Sentence.class)) {
list.add(sentence.getCoveredText());
}
sentences = list.iterator();
//needs to be next cas
while (!sentences.hasNext()) {
//sentence is empty; go to another cas
if (reader.hasNext()) {
cas.reset();
getReader().getNext(cas);
resource.getAnalysisEngine().process(cas);
for (Sentence sentence : JCasUtil.select(cas.getJCas(), Sentence.class)) {
list.add(sentence.getCoveredText());
}
sentences = list.iterator();
} else
return null;
}
String ret = sentences.next();
if (this.getPreProcessor() != null)
ret = this.getPreProcessor().preProcess(ret);
return ret;
}
return null;
} catch (Exception e) {
throw new RuntimeException(e);
}
} else {
String ret = sentences.next();
if (this.getPreProcessor() != null)
ret = this.getPreProcessor().preProcess(ret);
return ret;
}
}
use of org.apache.uima.cas.CAS in project deeplearning4j by deeplearning4j.
the class UimaResource method process.
/**
* Use the given analysis engine and process the given text
* You must release the return cas yourself
* @param text the text to process
* @return the processed cas
*/
public CAS process(String text) {
CAS cas = retrieve();
if (cas == null)
return null;
cas.setDocumentText(text);
try {
analysisEngine.process(cas);
} catch (AnalysisEngineProcessException e) {
log.warn("Unable to process text " + text, e);
}
return cas;
}
use of org.apache.uima.cas.CAS in project deeplearning4j by deeplearning4j.
the class TreeParser method getTrees.
/**
* Gets trees from text.
* First a sentence segmenter is used to segment the training examples in to sentences.
* Sentences are then turned in to trees and returned.
* @param text the text to process
* @return the list of trees
* @throws Exception
*/
public List<Tree> getTrees(String text) throws Exception {
CAS c = pool.getCas();
c.setDocumentText(text);
tokenizer.process(c);
List<Tree> ret = new ArrayList<>();
CAS c2 = pool.getCas();
for (Sentence sentence : JCasUtil.select(c.getJCas(), Sentence.class)) {
List<String> tokens = new ArrayList<>();
for (Token t : JCasUtil.selectCovered(Token.class, sentence)) tokens.add(t.getCoveredText());
c2.setDocumentText(sentence.getCoveredText());
tokenizer.process(c2);
parser.process(c2);
//build the tree based on this
TopTreebankNode node = JCasUtil.selectSingle(c2.getJCas(), TopTreebankNode.class);
log.info("Tree bank parse " + node.getTreebankParse());
for (TreebankNode node2 : JCasUtil.select(c2.getJCas(), TreebankNode.class)) {
log.info("Node val " + node2.getNodeValue() + " and label " + node2.getNodeType() + " and tags was " + node2.getNodeTags());
}
ret.add(TreeFactory.buildTree(node));
c2.reset();
}
pool.releaseCas(c);
pool.releaseCas(c2);
return ret;
}
Aggregations