use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class InstanceTest method instanceAddSingleFeatureTest.
@Test
public void instanceAddSingleFeatureTest() throws Exception {
Feature f1 = new Feature("feature1", "value1", FeatureType.STRING);
Feature f2 = new Feature("feature2", "value1", FeatureType.STRING);
List<Feature> features = new ArrayList<>();
features.add(f1);
features.add(f2);
Instance instance = new Instance(features, "outcome");
Feature f3 = new Feature("feature3", "value1", FeatureType.STRING);
instance.addFeature(f3);
assertEquals(3, instance.getFeatures().size());
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class InstanceTest method instanceAddFeatureListTest.
@Test
public void instanceAddFeatureListTest() throws Exception {
Feature f1 = new Feature("feature1", "value1", FeatureType.STRING);
Feature f2 = new Feature("feature2", "value1", FeatureType.STRING);
List<Feature> features = new ArrayList<>();
features.add(f1);
features.add(f2);
Instance instance = new Instance(features, "outcome");
List<Feature> s = new ArrayList<Feature>();
Feature f3 = new Feature("feature3", "value3", FeatureType.STRING);
Feature f4 = new Feature("feature4", "value4", FeatureType.STRING);
s.add(f3);
s.add(f4);
instance.addFeatures(s);
assertEquals(4, instance.getFeatures().size());
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class InstanceTest method instanceInitializationWithArrayOfOutcomes.
@Test
public void instanceInitializationWithArrayOfOutcomes() throws Exception {
Feature f1 = new Feature("feature1", "value1", FeatureType.STRING);
Feature f2 = new Feature("feature2", "value1", FeatureType.STRING);
List<Feature> features = new ArrayList<>();
features.add(f1);
features.add(f2);
Instance instance = new Instance(features, "outcome", "outcome2");
assertEquals(2, instance.getFeatures().size());
assertEquals(2, instance.getOutcomes().size());
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class FilterLuceneCharacterNgramStartingWithLetter method applyFilter.
@Override
public void applyFilter(File inputFeatureFile) throws Exception {
Gson gson = new Gson();
// iterating over a stream is for large data more reasonable that
// bulk-read of all data
List<String> outputLines = new ArrayList<>();
List<String> inputLines = FileUtils.readLines(inputFeatureFile, "utf-8");
for (String l : inputLines) {
// de-serialize
Instance[] instances = gson.fromJson(l, Instance[].class);
List<Instance> filter_out = new ArrayList<>();
for (Instance inst : instances) {
// collect features starting with a t-letter
List<Feature> features = new ArrayList<>(inst.getFeatures());
List<Feature> deletionTargets = new ArrayList<>();
for (Feature f : features) {
if (f.getName().startsWith("charngram")) {
deletionTargets.add(f);
}
}
// remove those features
for (Feature f : deletionTargets) {
features.remove(f);
}
// update instances
inst.setFeatures(features);
// re-serialize
filter_out.add(inst);
}
outputLines.add(gson.toJson(filter_out.toArray(new Instance[0]), Instance[].class));
}
// Write new file to temporary location
File tmp = File.createTempFile("tmpFeatureFile", "tmp");
FileUtils.writeLines(tmp, "utf-8", outputLines);
// overwrite input file with new file
FileUtils.copyFile(tmp, inputFeatureFile);
tmp.delete();
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class TokenLengthRatio method extract.
@Override
public Set<Feature> extract(JCas jcas, TextClassificationTarget aTarget) throws TextClassificationException {
long maxLen = getMax();
double ratio = getRatio(aTarget.getCoveredText().length(), maxLen);
return new Feature(FEATURE_NAME, ratio, FeatureType.NUMERIC).asSet();
}
Aggregations