Search in sources :

Example 91 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class WekaUtilTest method tcInstanceToMekaInstanceTest.

@Test
public void tcInstanceToMekaInstanceTest() throws Exception {
    List<String> outcomeValues = Arrays.asList(new String[] { "outc_1", "outc_2", "outc_3" });
    Instance i1 = new Instance();
    i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
    Instance i2 = new Instance();
    i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
    i2.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
    i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("outc_1", Arrays.asList(new String[] { "0", "1" })));
    attributes.add(new Attribute("outc_2", Arrays.asList(new String[] { "0", "1" })));
    attributes.add(new Attribute("outc_3", Arrays.asList(new String[] { "0", "1" })));
    attributes.add(new Attribute("feature5"));
    attributes.add(new Attribute("feature2"));
    attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_1", "val_2" })));
    attributes.add(new Attribute("feature1"));
    Instances trainingData = new Instances("test", attributes, 0);
    weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToMekaInstance(i1, trainingData, outcomeValues);
    weka.core.Instance wekaInstance2 = WekaUtils.tcInstanceToMekaInstance(i2, trainingData, outcomeValues);
    assertEquals(true, wekaInstance1.equalHeaders(wekaInstance2));
    assertEquals(7, wekaInstance1.numAttributes());
    wekaInstance1.dataset().add(wekaInstance1);
    wekaInstance2.dataset().add(wekaInstance2);
    System.out.println(wekaInstance1.dataset() + "\n");
    System.out.println(wekaInstance2.dataset() + "\n");
}
Also used : Instances(weka.core.Instances) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) Test(org.junit.Test)

Example 92 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class WekaUtilTest method tcInstanceToWekaInstanceRegressionTest.

@Test
public void tcInstanceToWekaInstanceRegressionTest() throws Exception {
    Instance i1 = new Instance();
    i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
    Instance i2 = new Instance();
    i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
    i2.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
    i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("feature5"));
    attributes.add(new Attribute("feature2"));
    attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_1", "val_2" })));
    attributes.add(new Attribute("feature1"));
    attributes.add(new Attribute("outcome"));
    Instances trainingData = new Instances("test", attributes, 0);
    weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToWekaInstance(i1, trainingData, null, true);
    weka.core.Instance wekaInstance2 = WekaUtils.tcInstanceToWekaInstance(i2, trainingData, null, true);
    assertEquals(true, wekaInstance1.equalHeaders(wekaInstance2));
    assertEquals(5, wekaInstance1.numAttributes());
    wekaInstance1.dataset().add(wekaInstance1);
    wekaInstance2.dataset().add(wekaInstance2);
    System.out.println(wekaInstance1.dataset() + "\n");
    System.out.println(wekaInstance2.dataset() + "\n");
}
Also used : Instances(weka.core.Instances) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) Test(org.junit.Test)

Example 93 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class WekaUtilTest method instanceToArffTest_multiLabel.

@Test
public void instanceToArffTest_multiLabel() throws Exception {
    Instance i1 = new Instance();
    i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
    i1.addFeature(new Feature("feature4", Values.VALUE_1, FeatureType.NUMERIC));
    i1.setOutcomes("1", "2");
    Instance i2 = new Instance();
    i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
    i2.addFeature(new Feature("feature2", 1, FeatureType.NUMERIC));
    i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
    i2.addFeature(new Feature("feature4", Values.VALUE_2, FeatureType.NUMERIC));
    i2.setOutcomes("2", "3");
    Instance i3 = new Instance();
    i3.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
    i3.addFeature(new Feature("feature2", 1, FeatureType.NUMERIC));
    i3.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
    i3.addFeature(new Feature("feature4", Values.VALUE_3, FeatureType.NUMERIC));
    i3.setOutcomes("2");
    List<Instance> instances = new ArrayList<>();
    instances.add(i1);
    instances.add(i2);
    instances.add(i3);
    File outfile = new File("target/test/out.txt");
    outfile.mkdirs();
    outfile.createNewFile();
    outfile.deleteOnExit();
    WekaUtils.instanceListToArffFileMultiLabel(outfile, instances, false);
    System.out.println(FileUtils.readFileToString(outfile, "utf-8"));
}
Also used : Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) File(java.io.File) Test(org.junit.Test)

Example 94 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class LiblinearDataWriterTest method dataWriterTest.

@Test
public void dataWriterTest() throws Exception {
    List<Instance> fs = new ArrayList<Instance>();
    List<Feature> features1 = new ArrayList<>();
    features1.add(new Feature("feature1", 1.0, FeatureType.NUMERIC));
    features1.add(new Feature("feature2", 0.0, FeatureType.NUMERIC));
    List<Feature> features2 = new ArrayList<>();
    features2.add(new Feature("feature2", 0.5, FeatureType.NUMERIC));
    features2.add(new Feature("feature1", 0.5, FeatureType.NUMERIC));
    Instance instance1 = new Instance(features1, "0");
    Instance instance2 = new Instance(features2, "1");
    fs.add(instance1);
    fs.add(instance2);
    File outputDirectory = folder.newFolder();
    StringBuilder sb = new StringBuilder();
    sb.append("feature1\n");
    sb.append("feature2\n");
    FileUtils.writeStringToFile(new File(outputDirectory, Constants.FILENAME_FEATURES), sb.toString(), "utf-8");
    File outputFile = new File(outputDirectory, Constants.FILENAME_DATA_IN_CLASSIFIER_FORMAT);
    LibsvmDataFormatWriter writer = new LibsvmDataFormatWriter();
    writer.init(outputDirectory, false, Constants.LM_SINGLE_LABEL, false, new String[] { "0", "1" });
    writer.writeClassifierFormat(fs);
    Problem problem = Problem.readFromFile(outputFile, 1.0);
    assertEquals(2, problem.l);
    assertEquals(4, problem.n);
    assertEquals(0.0, problem.y[0], 0.00001);
    assertEquals(1.0, problem.y[1], 0.00001);
}
Also used : LibsvmDataFormatWriter(org.dkpro.tc.io.libsvm.LibsvmDataFormatWriter) Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) Problem(de.bwaldvogel.liblinear.Problem) Feature(org.dkpro.tc.api.features.Feature) File(java.io.File) Test(org.junit.Test)

Aggregations

Feature (org.dkpro.tc.api.features.Feature)94 Test (org.junit.Test)48 Instance (org.dkpro.tc.api.features.Instance)30 ArrayList (java.util.ArrayList)29 HashSet (java.util.HashSet)21 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)17 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)16 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)16 JCas (org.apache.uima.jcas.JCas)15 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)13 File (java.io.File)8 Attribute (weka.core.Attribute)8 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)7 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)6 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)5 Chunk (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk)4 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4 Instances (weka.core.Instances)4 IOException (java.io.IOException)3