use of org.tribuo.data.columnar.processors.field.IdentityProcessor in project tribuo by oracle.
the class JsonDataSourceTest method buildRowProcessor.
private static RowProcessor<MockOutput> buildRowProcessor() {
Map<String, FieldProcessor> fieldProcessors = new HashMap<>();
fieldProcessors.put("height", new DoubleFieldProcessor("height"));
fieldProcessors.put("description", new TextFieldProcessor("description", new BasicPipeline(new BreakIteratorTokenizer(Locale.US), 2)));
fieldProcessors.put("transport", new IdentityProcessor("transport"));
Map<String, FieldProcessor> regexMappingProcessors = new HashMap<>();
regexMappingProcessors.put("extra.*", new DoubleFieldProcessor("regex"));
ResponseProcessor<MockOutput> responseProcessor = new FieldResponseProcessor<>("disposition", "UNK", new MockOutputFactory());
List<FieldExtractor<?>> metadataExtractors = new ArrayList<>();
metadataExtractors.add(new IntExtractor("id"));
metadataExtractors.add(new DateExtractor("timestamp", "timestamp", "dd/MM/yyyy HH:mm"));
return new RowProcessor<>(metadataExtractors, null, responseProcessor, fieldProcessors, regexMappingProcessors, Collections.emptySet());
}
use of org.tribuo.data.columnar.processors.field.IdentityProcessor in project tribuo by oracle.
the class LIMEColumnarTest method generateBinarisedDataset.
private Pair<RowProcessor<Label>, Dataset<Label>> generateBinarisedDataset() throws URISyntaxException {
LabelFactory labelFactory = new LabelFactory();
ResponseProcessor<Label> responseProcessor = new FieldResponseProcessor<>("Response", "N", labelFactory);
Map<String, FieldProcessor> fieldProcessors = new HashMap<>();
fieldProcessors.put("A", new IdentityProcessor("A"));
fieldProcessors.put("B", new DoubleFieldProcessor("B"));
fieldProcessors.put("C", new DoubleFieldProcessor("C"));
fieldProcessors.put("D", new IdentityProcessor("D"));
fieldProcessors.put("TextField", new TextFieldProcessor("TextField", new BasicPipeline(tokenizer, 2)));
RowProcessor<Label> rp = new RowProcessor<>(responseProcessor, fieldProcessors);
CSVDataSource<Label> source = new CSVDataSource<>(LIMEColumnarTest.class.getResource("/org/tribuo/classification/explanations/lime/test-columnar.csv").toURI(), rp, true);
Dataset<Label> dataset = new MutableDataset<>(source);
return new Pair<>(rp, dataset);
}
use of org.tribuo.data.columnar.processors.field.IdentityProcessor in project tribuo by oracle.
the class LIMEColumnarTest method generateCategoricalDataset.
private Pair<RowProcessor<Label>, Dataset<Label>> generateCategoricalDataset() throws URISyntaxException {
LabelFactory labelFactory = new LabelFactory();
ResponseProcessor<Label> responseProcessor = new FieldResponseProcessor<>("Response", "N", labelFactory);
Map<String, FieldProcessor> fieldProcessors = new HashMap<>();
fieldProcessors.put("A", new IdentityProcessor("A") {
@Override
public GeneratedFeatureType getFeatureType() {
return GeneratedFeatureType.CATEGORICAL;
}
});
fieldProcessors.put("B", new DoubleFieldProcessor("B"));
fieldProcessors.put("C", new DoubleFieldProcessor("C"));
fieldProcessors.put("D", new IdentityProcessor("D") {
@Override
public GeneratedFeatureType getFeatureType() {
return GeneratedFeatureType.CATEGORICAL;
}
});
fieldProcessors.put("TextField", new TextFieldProcessor("TextField", new BasicPipeline(tokenizer, 2)));
RowProcessor<Label> rp = new RowProcessor<>(responseProcessor, fieldProcessors);
CSVDataSource<Label> source = new CSVDataSource<>(LIMEColumnarTest.class.getResource("/org/tribuo/classification/explanations/lime/test-columnar.csv").toURI(), rp, true);
Dataset<Label> dataset = new MutableDataset<>(source);
return new Pair<>(rp, dataset);
}
use of org.tribuo.data.columnar.processors.field.IdentityProcessor in project tribuo by oracle.
the class RowProcessorTest method testInvalidRegexMapping.
@Test
public void testInvalidRegexMapping() {
List<String> fieldNames = Arrays.asList("Armadillos", "Armadas", "Archery", "Battleship", "Battles", "Carrots", "Label");
Map<String, FieldProcessor> fixed = new HashMap<>();
fixed.put("Battles", new IdentityProcessor("Battles"));
Map<String, FieldProcessor> regex = new HashMap<>();
try {
regex.put("Arma*", new IdentityProcessor("Arma*"));
regex.put("Monkeys", new IdentityProcessor("Monkeys"));
RowProcessor<MockOutput> rowProcessor = new RowProcessor<>(Collections.emptyList(), null, new MockResponseProcessor("Label"), fixed, regex, new HashSet<>());
rowProcessor.expandRegexMapping(fieldNames);
fail("Should have thrown an IllegalArgumentException");
} catch (IllegalArgumentException e) {
// pass
} catch (Exception e) {
fail("Incorrect exception thrown.");
}
regex.clear();
try {
regex.put("Battle*", new IdentityProcessor("Battle*"));
RowProcessor<MockOutput> rowProcessor = new RowProcessor<>(Collections.emptyList(), null, new MockResponseProcessor("Label"), fixed, regex, new HashSet<>());
rowProcessor.expandRegexMapping(fieldNames);
fail("Should have thrown an IllegalArgumentException");
} catch (IllegalArgumentException e) {
// pass
} catch (Exception e) {
fail("Incorrect exception thrown.");
}
regex.clear();
try {
regex.put("Arm*", new IdentityProcessor("Arm*"));
regex.put("Armadil*", new IdentityProcessor("Armadil*"));
RowProcessor<MockOutput> rowProcessor = new RowProcessor<>(Collections.emptyList(), null, new MockResponseProcessor("Label"), fixed, regex, new HashSet<>());
rowProcessor.expandRegexMapping(fieldNames);
fail("Should have thrown an IllegalArgumentException");
} catch (IllegalArgumentException e) {
// pass
} catch (Exception e) {
fail("Incorrect exception thrown.");
}
}
use of org.tribuo.data.columnar.processors.field.IdentityProcessor in project tribuo by oracle.
the class RowProcessorTest method metadataExtractorTest.
@Test
public void metadataExtractorTest() {
Map<String, FieldProcessor> fixed = new HashMap<>();
fixed.put("Battles", new IdentityProcessor("Battles"));
fixed.put("Armadas", new DoubleFieldProcessor("Armadas"));
List<FieldExtractor<?>> metadataExtractors = new ArrayList<>();
metadataExtractors.add(new IdentityExtractor("Armadillos", Example.NAME));
metadataExtractors.add(new IntExtractor("Armadillos", "ID"));
metadataExtractors.add(new DateExtractor("Carrots", "Date", "uuuuMMdd"));
metadataExtractors.add(new OffsetDateTimeExtractor("Carrot-time", "OffsetDateTime", "dd/MM/yyyy HH:mmx"));
FloatExtractor weightExtractor = new FloatExtractor("Mass");
MockResponseProcessor response = new MockResponseProcessor("Label");
Map<String, String> row = new HashMap<>();
row.put("Armadillos", "1");
row.put("Armadas", "2");
row.put("Archery", "3");
row.put("Battleship", "4");
row.put("Battles", "5");
row.put("Carrots", "20010506");
row.put("Carrot-time", "14/10/2020 16:07+01");
row.put("Mass", "9000");
row.put("Label", "Sheep");
RowProcessor<MockOutput> processor = new RowProcessor<>(metadataExtractors, weightExtractor, response, fixed, Collections.emptySet());
Example<MockOutput> example = processor.generateExample(row, true).get();
// Check example is extracted correctly
assertEquals(2, example.size());
assertEquals("Sheep", example.getOutput().label);
Iterator<Feature> featureIterator = example.iterator();
Feature a = featureIterator.next();
assertEquals("Armadas@value", a.getName());
assertEquals(2.0, a.getValue());
a = featureIterator.next();
assertEquals("Battles@5", a.getName());
assertEquals(IdentityProcessor.FEATURE_VALUE, a.getValue());
assertEquals(9000f, example.getWeight());
// Check metadata is extracted correctly
Map<String, Object> metadata = example.getMetadata();
assertEquals(4, metadata.size());
assertEquals("1", metadata.get(Example.NAME));
assertEquals(1, metadata.get("ID"));
assertEquals(LocalDate.of(2001, 5, 6), metadata.get("Date"));
assertEquals(OffsetDateTime.of(LocalDate.of(2020, 10, 14), LocalTime.of(16, 7), ZoneOffset.ofHours(1)), metadata.get("OffsetDateTime"));
// Check metadata types
Map<String, Class<?>> metadataTypes = processor.getMetadataTypes();
assertEquals(4, metadataTypes.size());
assertEquals(String.class, metadataTypes.get(Example.NAME));
assertEquals(Integer.class, metadataTypes.get("ID"));
assertEquals(LocalDate.class, metadataTypes.get("Date"));
assertEquals(OffsetDateTime.class, metadataTypes.get("OffsetDateTime"));
// Check an invalid metadata extractor throws IllegalArgumentException
List<FieldExtractor<?>> badExtractors = new ArrayList<>();
badExtractors.add(new IdentityExtractor("Armadillos", Example.NAME));
badExtractors.add(new IntExtractor("Armadillos", "ID"));
badExtractors.add(new DateExtractor("Carrots", "ID", "uuuuMMdd"));
assertThrows(PropertyException.class, () -> new RowProcessor<>(badExtractors, weightExtractor, response, fixed, Collections.emptySet()));
}
Aggregations