Search in sources :

Example 56 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class LuceneNGramCPFE method extract.

@Override
public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException {
    FrequencyDistribution<String> view1Ngrams = null;
    FrequencyDistribution<String> view2Ngrams = null;
    TextClassificationTarget aTarget1 = JCasUtil.selectSingle(view1, TextClassificationTarget.class);
    TextClassificationTarget aTarget2 = JCasUtil.selectSingle(view2, TextClassificationTarget.class);
    view1Ngrams = NGramUtils.getDocumentNgrams(view1, aTarget1, ngramLowerCase, filterPartialStopwordMatches, ngramMinN1, ngramMaxN1, stopwords, Token.class);
    view2Ngrams = NGramUtils.getDocumentNgrams(view2, aTarget2, ngramLowerCase, filterPartialStopwordMatches, ngramMinN2, ngramMaxN2, stopwords, Token.class);
    FrequencyDistribution<String> documentComboNgrams = ComboUtils.getCombinedNgrams(view1Ngrams, view2Ngrams, ngramMinNCombo, ngramMaxNCombo, ngramUseSymmetricalCombos);
    Set<Feature> features = new HashSet<Feature>();
    prefix = "comboNG";
    features = addToFeatureArray(documentComboNgrams, topKSetCombo, features);
    return features;
}
Also used : TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Feature(org.dkpro.tc.api.features.Feature) HashSet(java.util.HashSet)

Example 57 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class ComboUtils method getMultipleViewNgrams.

public static FrequencyDistribution<String> getMultipleViewNgrams(List<JCas> jcases, Annotation classificationUnit, boolean ngramLowerCase, boolean filterPartialStopwords, int ngramMinN, int ngramMaxN, Set<String> stopwords) throws TextClassificationException {
    FrequencyDistribution<String> viewNgramsTotal = new FrequencyDistribution<String>();
    for (JCas view : jcases) {
        FrequencyDistribution<String> oneViewsNgrams = new FrequencyDistribution<String>();
        TextClassificationTarget aTarget = JCasUtil.selectSingle(view, TextClassificationTarget.class);
        if (classificationUnit == null) {
            oneViewsNgrams = NGramUtils.getDocumentNgrams(view, aTarget, ngramLowerCase, filterPartialStopwords, ngramMinN, ngramMaxN, stopwords, Token.class);
        } else {
            oneViewsNgrams = NGramUtils.getAnnotationNgrams(view, classificationUnit, ngramLowerCase, filterPartialStopwords, ngramMinN, ngramMaxN, stopwords);
        }
        // This is a hack because there's no method to combine 2 FD's
        for (String key : oneViewsNgrams.getKeys()) {
            viewNgramsTotal.addSample(key, oneViewsNgrams.getCount(key));
        }
    }
    return viewNgramsTotal;
}
Also used : TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) FrequencyDistribution(de.tudarmstadt.ukp.dkpro.core.api.frequency.util.FrequencyDistribution)

Example 58 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class LucenePMetaCollectorBase method process.

@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
    JCas view1;
    JCas view2;
    try {
        view1 = jcas.getView(PART_ONE);
        view2 = jcas.getView(PART_TWO);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    List<JCas> jcases = new ArrayList<JCas>();
    jcases.add(view1);
    jcases.add(view2);
    FrequencyDistribution<String> view1NGrams;
    FrequencyDistribution<String> view2NGrams;
    FrequencyDistribution<String> documentNGrams;
    try {
        TextClassificationTarget aTarget1 = JCasUtil.selectSingle(view1, TextClassificationTarget.class);
        TextClassificationTarget aTarget2 = JCasUtil.selectSingle(view2, TextClassificationTarget.class);
        view1NGrams = getNgramsFDView1(view1, aTarget1);
        view2NGrams = getNgramsFDView2(view2, aTarget2);
        documentNGrams = getNgramsFD(jcases);
    } catch (TextClassificationException e) {
        throw new AnalysisEngineProcessException(e);
    }
    for (String ngram : documentNGrams.getKeys()) {
        for (int i = 0; i < documentNGrams.getCount(ngram); i++) {
            addField(getFieldName(), ngram);
        }
    }
    for (String ngram : view1NGrams.getKeys()) {
        for (int i = 0; i < view1NGrams.getCount(ngram); i++) {
            addField(getFieldNameView1(), ngram);
        }
    }
    for (String ngram : view2NGrams.getKeys()) {
        for (int i = 0; i < view2NGrams.getCount(ngram); i++) {
            addField(getFieldNameView2(), ngram);
        }
    }
}
Also used : TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Example 59 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class TcAnnotator method processUnit.

private void processUnit(JCas aJCas) throws AnalysisEngineProcessException {
    Type type = aJCas.getCas().getTypeSystem().getType(nameUnit);
    Collection<AnnotationFS> select = CasUtil.select(aJCas.getCas(), type);
    List<AnnotationFS> unitAnnotation = new ArrayList<AnnotationFS>(select);
    TextClassificationOutcome tco = null;
    List<String> outcomes = new ArrayList<String>();
    // iterate the units and set on each a prepared dummy outcome
    for (AnnotationFS unit : unitAnnotation) {
        TextClassificationTarget tcs = new TextClassificationTarget(aJCas, unit.getBegin(), unit.getEnd());
        tcs.addToIndexes();
        tco = new TextClassificationOutcome(aJCas, unit.getBegin(), unit.getEnd());
        tco.setOutcome(Constants.TC_OUTCOME_DUMMY_VALUE);
        tco.addToIndexes();
        engine.process(aJCas);
        // store the outcome
        outcomes.add(tco.getOutcome());
        tcs.removeFromIndexes();
        tco.removeFromIndexes();
    }
    // iterate again to set for each unit the outcome
    for (int i = 0; i < unitAnnotation.size(); i++) {
        AnnotationFS unit = unitAnnotation.get(i);
        tco = new TextClassificationOutcome(aJCas, unit.getBegin(), unit.getEnd());
        tco.setOutcome(outcomes.get(i));
        tco.addToIndexes();
    }
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget)

Example 60 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class TcAnnotator method processDocument.

private void processDocument(JCas aJCas) throws AnalysisEngineProcessException {
    if (!JCasUtil.exists(aJCas, TextClassificationTarget.class)) {
        TextClassificationTarget aTarget = new TextClassificationTarget(aJCas, 0, aJCas.getDocumentText().length());
        aTarget.addToIndexes();
    }
    // we need an outcome annotation to be present
    if (!JCasUtil.exists(aJCas, TextClassificationOutcome.class)) {
        TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas);
        outcome.setOutcome("");
        outcome.addToIndexes();
    }
    // loaded from the model
    try {
        engine.process(aJCas);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
}
Also used : TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Aggregations

TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)61 JCas (org.apache.uima.jcas.JCas)29 ArrayList (java.util.ArrayList)22 TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)18 Feature (org.dkpro.tc.api.features.Feature)16 Test (org.junit.Test)16 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)12 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)12 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)11 JCasId (org.dkpro.tc.api.type.JCasId)11 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)8 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)7 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)7 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)6 CollectionReader (org.apache.uima.collection.CollectionReader)5 FeatureExtractorResource_ImplBase (org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)5 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)4 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)4 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4