use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class WekaUtilTest method tcInstanceToWekaInstanceTest.
@Test
public void tcInstanceToWekaInstanceTest() throws Exception {
List<String> outcomeValues = Arrays.asList(new String[] { "outc_1", "outc_2", "outc_3" });
Instance i1 = new Instance();
i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
Instance i2 = new Instance();
i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
i2.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
ArrayList<Attribute> attributes = new ArrayList<Attribute>();
attributes.add(new Attribute("feature5"));
attributes.add(new Attribute("feature2"));
attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_1", "val_2" })));
attributes.add(new Attribute("feature1"));
attributes.add(new Attribute("outcome", outcomeValues));
Instances trainingData = new Instances("test", attributes, 0);
weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToWekaInstance(i1, trainingData, outcomeValues, false);
weka.core.Instance wekaInstance2 = WekaUtils.tcInstanceToWekaInstance(i2, trainingData, outcomeValues, false);
assertEquals(true, wekaInstance1.equalHeaders(wekaInstance2));
assertEquals(5, wekaInstance1.numAttributes());
wekaInstance1.dataset().add(wekaInstance1);
wekaInstance2.dataset().add(wekaInstance2);
System.out.println(wekaInstance1.dataset() + "\n");
System.out.println(wekaInstance2.dataset() + "\n");
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class WekaUtilTest method tcInstanceToWekaInstanceFailTest.
@Test(expected = IllegalArgumentException.class)
public void tcInstanceToWekaInstanceFailTest() throws Exception {
List<String> outcomeValues = Arrays.asList(new String[] { "outc_1", "outc_2", "outc_3" });
Instance i1 = new Instance();
i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
ArrayList<Attribute> attributes = new ArrayList<Attribute>();
attributes.add(new Attribute("feature2"));
attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_4", "val_2" })));
attributes.add(new Attribute("outcome", outcomeValues));
Instances trainingData = new Instances("test", attributes, 0);
@SuppressWarnings("unused") weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToWekaInstance(i1, trainingData, outcomeValues, false);
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class ExtractFeaturesConnector method enforceMatchingFeatures.
private List<Instance> enforceMatchingFeatures(List<Instance> instances) {
if (!isTesting) {
return instances;
}
List<Instance> out = new ArrayList<>();
for (Instance i : instances) {
List<Feature> newFeatures = new ArrayList<>();
for (Feature feat : i.getFeatures()) {
if (!featureMeta.getFeatureNames().contains(feat.getName())) {
continue;
}
newFeatures.add(feat);
}
i.setFeatures(newFeatures);
out.add(i);
}
return out;
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class InstanceExtractor method getSparse.
private Set<Feature> getSparse(JCas jcas, TextClassificationTarget unit, FeatureExtractorResource_ImplBase featExt) throws TextClassificationException {
Set<Feature> features = ((FeatureExtractor) featExt).extract(jcas, unit);
Set<Feature> filtered = new HashSet<>();
for (Feature f : features) {
if (!f.isDefaultValue()) {
filtered.add(f);
}
}
return filtered;
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class FeatureMetaData method collectMetaData.
public void collectMetaData(List<Instance> instances) {
featureNames = new TreeSet<>();
for (Feature f : instances.get(0).getFeatures()) {
featureNames.add(f.getName());
if (!featDesc.containsKey(f.getName())) {
featDesc.put(f.getName(), f.getType());
}
if (f.getType() == FeatureType.NOMINAL) {
enumFeatureName.put(f.getName(), f.getValue().getClass().getName());
}
}
didCollect = true;
}
Aggregations