Search in sources :

Example 1 with CAS

use of org.apache.uima.cas.CAS in project deeplearning4j by deeplearning4j.

the class SWN3 method score.

/**
     * Scores the text
     * @param words the text to score
     * @return the score (polarity) for the text
     * @throws Exception
     */
public double score(String words) throws Exception {
    CAS cas = analysisEngine.newCAS();
    cas.setDocumentText(words);
    analysisEngine.process(cas);
    return score(cas);
}
Also used : CAS(org.apache.uima.cas.CAS)

Example 2 with CAS

use of org.apache.uima.cas.CAS in project deeplearning4j by deeplearning4j.

the class TreeParser method getTreebankTrees.

/**
     * Gets trees from text.
     * First a sentence segmenter is used to segment the training examples in to sentences.
     * Sentences are then turned in to trees and returned.
     * @param text the text to process
     * @return the list of trees
     * @throws Exception
     */
public List<TreebankNode> getTreebankTrees(String text) throws Exception {
    if (text.isEmpty())
        return new ArrayList<>();
    CAS c = pool.getCas();
    c.setDocumentText(text);
    tokenizer.process(c);
    List<TreebankNode> ret = new ArrayList<>();
    for (Sentence sentence : JCasUtil.select(c.getJCas(), Sentence.class)) {
        List<String> tokens = new ArrayList<>();
        CAS c2 = tokenizer.newCAS();
        for (Token t : JCasUtil.selectCovered(Token.class, sentence)) tokens.add(t.getCoveredText());
        c2.setDocumentText(sentence.getCoveredText());
        tokenizer.process(c2);
        parser.process(c2);
        //build the tree based on this
        TopTreebankNode node = JCasUtil.selectSingle(c2.getJCas(), TopTreebankNode.class);
        ret.add(node);
    }
    pool.releaseCas(c);
    return ret;
}
Also used : CAS(org.apache.uima.cas.CAS) TreebankNode(org.cleartk.syntax.constituent.type.TreebankNode) TopTreebankNode(org.cleartk.syntax.constituent.type.TopTreebankNode) TopTreebankNode(org.cleartk.syntax.constituent.type.TopTreebankNode) ArrayList(java.util.ArrayList) Token(org.cleartk.token.type.Token) Sentence(org.cleartk.token.type.Sentence)

Example 3 with CAS

use of org.apache.uima.cas.CAS in project deeplearning4j by deeplearning4j.

the class UimaSentenceIterator method nextSentence.

@Override
public synchronized String nextSentence() {
    if (sentences == null || !sentences.hasNext()) {
        try {
            if (getReader().hasNext()) {
                CAS cas = resource.retrieve();
                try {
                    getReader().getNext(cas);
                } catch (Exception e) {
                    log.warn("Done iterating returning an empty string");
                    return "";
                }
                resource.getAnalysisEngine().process(cas);
                List<String> list = new ArrayList<>();
                for (Sentence sentence : JCasUtil.select(cas.getJCas(), Sentence.class)) {
                    list.add(sentence.getCoveredText());
                }
                sentences = list.iterator();
                //needs to be next cas
                while (!sentences.hasNext()) {
                    //sentence is empty; go to another cas
                    if (reader.hasNext()) {
                        cas.reset();
                        getReader().getNext(cas);
                        resource.getAnalysisEngine().process(cas);
                        for (Sentence sentence : JCasUtil.select(cas.getJCas(), Sentence.class)) {
                            list.add(sentence.getCoveredText());
                        }
                        sentences = list.iterator();
                    } else
                        return null;
                }
                String ret = sentences.next();
                if (this.getPreProcessor() != null)
                    ret = this.getPreProcessor().preProcess(ret);
                return ret;
            }
            return null;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    } else {
        String ret = sentences.next();
        if (this.getPreProcessor() != null)
            ret = this.getPreProcessor().preProcess(ret);
        return ret;
    }
}
Also used : CAS(org.apache.uima.cas.CAS) ArrayList(java.util.ArrayList) Sentence(org.cleartk.token.type.Sentence) ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException)

Example 4 with CAS

use of org.apache.uima.cas.CAS in project deeplearning4j by deeplearning4j.

the class UimaResource method process.

/**
     * Use the given analysis engine and process the given text
     * You must release the return cas yourself
     * @param text the text to process
     * @return the processed cas
     */
public CAS process(String text) {
    CAS cas = retrieve();
    if (cas == null)
        return null;
    cas.setDocumentText(text);
    try {
        analysisEngine.process(cas);
    } catch (AnalysisEngineProcessException e) {
        log.warn("Unable to process text " + text, e);
    }
    return cas;
}
Also used : CAS(org.apache.uima.cas.CAS) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Example 5 with CAS

use of org.apache.uima.cas.CAS in project deeplearning4j by deeplearning4j.

the class TreeParser method getTrees.

/**
     * Gets trees from text.
     * First a sentence segmenter is used to segment the training examples in to sentences.
     * Sentences are then turned in to trees and returned.
     * @param text the text to process
     * @return the list of trees
     * @throws Exception
     */
public List<Tree> getTrees(String text) throws Exception {
    CAS c = pool.getCas();
    c.setDocumentText(text);
    tokenizer.process(c);
    List<Tree> ret = new ArrayList<>();
    CAS c2 = pool.getCas();
    for (Sentence sentence : JCasUtil.select(c.getJCas(), Sentence.class)) {
        List<String> tokens = new ArrayList<>();
        for (Token t : JCasUtil.selectCovered(Token.class, sentence)) tokens.add(t.getCoveredText());
        c2.setDocumentText(sentence.getCoveredText());
        tokenizer.process(c2);
        parser.process(c2);
        //build the tree based on this
        TopTreebankNode node = JCasUtil.selectSingle(c2.getJCas(), TopTreebankNode.class);
        log.info("Tree bank parse " + node.getTreebankParse());
        for (TreebankNode node2 : JCasUtil.select(c2.getJCas(), TreebankNode.class)) {
            log.info("Node val " + node2.getNodeValue() + " and label " + node2.getNodeType() + " and tags was " + node2.getNodeTags());
        }
        ret.add(TreeFactory.buildTree(node));
        c2.reset();
    }
    pool.releaseCas(c);
    pool.releaseCas(c2);
    return ret;
}
Also used : CAS(org.apache.uima.cas.CAS) TopTreebankNode(org.cleartk.syntax.constituent.type.TopTreebankNode) TreebankNode(org.cleartk.syntax.constituent.type.TreebankNode) TopTreebankNode(org.cleartk.syntax.constituent.type.TopTreebankNode) ArrayList(java.util.ArrayList) Tree(org.deeplearning4j.nn.layers.feedforward.autoencoder.recursive.Tree) Token(org.cleartk.token.type.Token) Sentence(org.cleartk.token.type.Sentence)

Aggregations

CAS (org.apache.uima.cas.CAS)7 ArrayList (java.util.ArrayList)5 Sentence (org.cleartk.token.type.Sentence)5 TopTreebankNode (org.cleartk.syntax.constituent.type.TopTreebankNode)4 Token (org.cleartk.token.type.Token)4 Tree (org.deeplearning4j.nn.layers.feedforward.autoencoder.recursive.Tree)3 TreebankNode (org.cleartk.syntax.constituent.type.TreebankNode)2 MultiDimensionalMap (org.deeplearning4j.util.MultiDimensionalMap)2 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)1 ResourceInitializationException (org.apache.uima.resource.ResourceInitializationException)1 Pair (org.deeplearning4j.berkeley.Pair)1