Search in sources :

Example 46 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class CrfSuiteFeatureFormatExtractionIteratorTest method buildFeatures.

private void buildFeatures() throws Exception {
    fs = new ArrayList<>();
    List<Feature> features1 = new ArrayList<Feature>();
    features1.add(new Feature("feature1", 1.0, FeatureType.NUMERIC));
    features1.add(new Feature("feature2", 0.0, FeatureType.NUMERIC));
    features1.add(new Feature("feature3", "Water", FeatureType.STRING));
    List<Feature> features2 = new ArrayList<Feature>();
    features2.add(new Feature("feature2", 0.5, FeatureType.NUMERIC));
    features2.add(new Feature("feature1", 0.5, FeatureType.NUMERIC));
    features2.add(new Feature("feature3", "Fanta", FeatureType.STRING));
    Instance instance1 = new Instance(features1, "1");
    instance1.setJcasId(0);
    instance1.setSequenceId(0);
    instance1.setSequencePosition(0);
    Instance instance2 = new Instance(features2, "2");
    instance1.setJcasId(0);
    instance2.setSequenceId(0);
    instance2.setSequencePosition(1);
    Instance instance3 = new Instance(features1, "3");
    instance1.setJcasId(0);
    instance3.setSequenceId(0);
    instance3.setSequencePosition(2);
    Instance instance4 = new Instance(features1, "4");
    instance1.setJcasId(0);
    instance4.setSequenceId(1);
    instance4.setSequencePosition(0);
    Instance instance5 = new Instance(features2, "4");
    instance1.setJcasId(0);
    instance5.setSequenceId(1);
    instance5.setSequencePosition(1);
    fs.add(instance1);
    fs.add(instance2);
    fs.add(instance3);
    fs.add(instance4);
    fs.add(instance5);
}
Also used : Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature)

Example 47 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class CrfSuiteLoadModelConnector method getInstancesInSequence.

private List<Instance> getInstancesInSequence(FeatureExtractorResource_ImplBase[] featureExtractors, JCas jcas, TextClassificationSequence sequence, boolean addInstanceId, int sequenceId) throws Exception {
    List<Instance> instances = new ArrayList<Instance>();
    int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
    List<TextClassificationTarget> seqTargets = JCasUtil.selectCovered(jcas, TextClassificationTarget.class, sequence);
    for (TextClassificationTarget aTarget : seqTargets) {
        Instance instance = new Instance();
        if (addInstanceId) {
            instance.addFeature(InstanceIdFeature.retrieve(jcas, aTarget, sequenceId));
        }
        // execute feature extractors and add features to instance
        try {
            for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
                instance.addFeatures(((FeatureExtractor) featExt).extract(jcas, aTarget));
            }
        } catch (TextClassificationException e) {
            throw new AnalysisEngineProcessException(e);
        }
        // set and write outcome label(s)
        instance.setOutcomes(getOutcomes(jcas, aTarget));
        instance.setJcasId(jcasId);
        instance.setSequenceId(sequenceId);
        instance.setSequencePosition(aTarget.getId());
        instances.add(instance);
    }
    return instances;
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) Instance(org.dkpro.tc.api.features.Instance) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) FeatureExtractorResource_ImplBase(org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Example 48 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class CrfSuiteLoadModelConnector method process.

@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
    try {
        int sequenceId = 0;
        List<Instance> instance = new ArrayList<>();
        for (TextClassificationSequence seq : JCasUtil.select(jcas, TextClassificationSequence.class)) {
            instance.addAll(getInstancesInSequence(featureExtractors, jcas, seq, true, sequenceId++));
        }
        CrfSuiteFeatureFormatExtractionIterator iterator = new CrfSuiteFeatureFormatExtractionIterator(instance);
        // takes N sequences and classifies them - all results are hold in
        // memory
        StringBuilder output = new StringBuilder();
        while (iterator.hasNext()) {
            StringBuilder buffer = new StringBuilder();
            int limit = 5000;
            int idx = 0;
            while (iterator.hasNext()) {
                StringBuilder seqInfo = iterator.next();
                buffer.append(seqInfo);
                idx++;
                if (idx == limit) {
                    break;
                }
            }
            List<String> command = buildCommand();
            StringBuilder out = runCommand(command, buffer.toString());
            output.append(out);
        }
        setPredictedOutcome(jcas, output.toString());
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
}
Also used : Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) CrfSuiteFeatureFormatExtractionIterator(org.dkpro.tc.ml.crfsuite.writer.CrfSuiteFeatureFormatExtractionIterator) TextClassificationSequence(org.dkpro.tc.api.type.TextClassificationSequence) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException) IOException(java.io.IOException) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Example 49 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class CrfSuiteFeatureFormatExtractionIterator method next.

@Override
public StringBuilder next() {
    StringBuilder sb = new StringBuilder();
    try {
        String lastSeenSeqId = idInitVal;
        boolean seqIdChanged = false;
        for (; insIdx < instances.size(); insIdx++) {
            Instance i = instances.get(insIdx);
            String id = getId(i);
            if (!lastSeenSeqId.equals(id)) {
                seqIdChanged = true;
                lastSeenSeqId = getId(i);
            }
            sb.append(LabelSubstitutor.labelReplacement(i.getOutcome()));
            sb.append("\t");
            int idx = 0;
            for (Feature f : i.getFeatures()) {
                sb.append(f.getName() + "=" + f.getValue());
                if (idx + 1 < i.getFeatures().size()) {
                    sb.append("\t");
                }
                idx++;
            }
            // Mark first line of new sequence with an additional __BOS__
            if (seqIdChanged) {
                sb.append("\t");
                sb.append("__BOS__");
                seqIdChanged = false;
            }
            // Peak ahead - seqEnd reached?
            if (insIdx + 1 < instances.size()) {
                Instance next = instances.get(insIdx + 1);
                String nextId = getId(next);
                if (!lastSeenSeqId.equals(nextId)) {
                    appendEOS(sb);
                    insIdx++;
                    break;
                }
            } else if (insIdx + 1 == instances.size()) {
                appendEOS(sb);
                insIdx++;
                // We're done
                break;
            }
            sb.append("\n");
        }
    } catch (Exception e) {
        throw new UnsupportedOperationException(e);
    }
    return sb;
}
Also used : Instance(org.dkpro.tc.api.features.Instance) Feature(org.dkpro.tc.api.features.Feature)

Example 50 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class CrfSuiteDataWriterTest method prepareFeatures.

private void prepareFeatures() throws Exception {
    List<Feature> features1 = new ArrayList<Feature>();
    features1.add(new Feature("feature1", 1.0, FeatureType.NUMERIC));
    features1.add(new Feature("feature2", 0.0, FeatureType.NUMERIC));
    features1.add(new Feature("feature3", "Water", FeatureType.STRING));
    List<Feature> features2 = new ArrayList<Feature>();
    features2.add(new Feature("feature2", 0.5, FeatureType.NUMERIC));
    features2.add(new Feature("feature1", 0.5, FeatureType.NUMERIC));
    features2.add(new Feature("feature3", "Fanta", FeatureType.STRING));
    Instance instance1 = new Instance(features1, "1");
    instance1.setSequenceId(0);
    instance1.setSequencePosition(0);
    Instance instance2 = new Instance(features2, "2");
    instance2.setSequenceId(0);
    instance2.setSequencePosition(1);
    Instance instance3 = new Instance(features1, "3");
    instance3.setSequenceId(0);
    instance3.setSequencePosition(2);
    Instance instance4 = new Instance(features1, "4");
    instance4.setSequenceId(1);
    instance4.setSequencePosition(0);
    Instance instance5 = new Instance(features2, "4");
    instance5.setSequenceId(1);
    instance5.setSequencePosition(1);
    instances.add(instance1);
    instances.add(instance2);
    instances.add(instance3);
    instances.add(instance4);
    instances.add(instance5);
}
Also used : Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature)

Aggregations

Instance (org.dkpro.tc.api.features.Instance)61 ArrayList (java.util.ArrayList)38 Feature (org.dkpro.tc.api.features.Feature)30 Test (org.junit.Test)27 File (java.io.File)17 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)14 IOException (java.io.IOException)12 Gson (com.google.gson.Gson)8 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)8 Attribute (weka.core.Attribute)8 DenseInstance (weka.core.DenseInstance)8 Instances (weka.core.Instances)8 SparseInstance (weka.core.SparseInstance)8 FeatureExtractorResource_ImplBase (org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)6 BufferedReader (java.io.BufferedReader)5 FileInputStream (java.io.FileInputStream)5 InputStreamReader (java.io.InputStreamReader)5 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)5 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)5 ExternalResourceDescription (org.apache.uima.resource.ExternalResourceDescription)5