use of org.tribuo.test.MockOutputFactory in project tribuo by oracle.
the class LibSVMDataSourceTest method testLibSVMSaving.
@Test
public void testLibSVMSaving() throws IOException {
MockOutputFactory factory = new MockOutputFactory();
URL dataFile = LibSVMDataSourceTest.class.getResource("/org/tribuo/datasource/test-1.libsvm");
LibSVMDataSource<MockOutput> source = new LibSVMDataSource<>(dataFile, factory);
File temp = File.createTempFile("tribuo-lib-svm-test", "libsvm");
temp.deleteOnExit();
MutableDataset<MockOutput> dataset = new MutableDataset<>(source);
try (PrintStream stream = new PrintStream(temp, StandardCharsets.UTF_8.name())) {
LibSVMDataSource.writeLibSVMFormat(dataset, stream, false, (MockOutput a) -> Integer.parseInt(a.label));
}
LibSVMDataSource<MockOutput> loadedSource = new LibSVMDataSource<>(temp.toPath(), factory);
assertTrue(compareDataSources(source, loadedSource), "Saved data source was not the same as the loaded one.");
// Now we check provenance path normalization on the saved file
// First generate a path with a relative element in it (i.e., ".")
Path newPath = temp.toPath().resolveSibling(Paths.get(".", temp.getName()));
// Load the datasource back in using the relativised path
LibSVMDataSource<MockOutput> newSource = new LibSVMDataSource<>(newPath, factory);
LibSVMDataSource.LibSVMDataSourceProvenance sourceProv = (LibSVMDataSource.LibSVMDataSourceProvenance) newSource.getProvenance();
// Extract the two provenance fields
// test code and the provenance exists
@SuppressWarnings("unchecked") URL sourceURL = ((PrimitiveProvenance<URL>) sourceProv.getConfiguredParameters().get("url")).getValue();
// test code and the provenance exists
@SuppressWarnings("unchecked") File sourceFile = ((PrimitiveProvenance<File>) sourceProv.getConfiguredParameters().get("path")).getValue();
// Assert they match
assertEquals(sourceFile.toPath().toUri().toURL(), sourceURL);
}
use of org.tribuo.test.MockOutputFactory in project tribuo by oracle.
the class LibSVMDataSourceTest method testLibSVMLoading.
@Test
public void testLibSVMLoading() throws IOException {
MockOutputFactory factory = new MockOutputFactory();
URL dataFile = LibSVMDataSourceTest.class.getResource("/org/tribuo/datasource/test-1.libsvm");
LibSVMDataSource<MockOutput> source = new LibSVMDataSource<>(dataFile, factory);
MutableDataset<MockOutput> dataset = new MutableDataset<>(source);
FeatureMap fmap = dataset.getFeatureMap();
assertEquals(7, fmap.size());
OutputInfo<MockOutput> info = dataset.getOutputInfo();
assertEquals(2, info.size());
}
use of org.tribuo.test.MockOutputFactory in project tribuo by oracle.
the class SequenceDatasetTest method testDense.
@Test
public void testDense() {
MockOutputFactory mockFactory = new MockOutputFactory();
MockDataSourceProvenance mockProvenance = new MockDataSourceProvenance();
MockOutput mockOutput = new MockOutput("test");
MutableSequenceDataset<MockOutput> dataset = new MutableSequenceDataset<>(mockProvenance, mockFactory);
// Empty datasets are dense
assertTrue(dataset.isDense());
ArrayExample<MockOutput> first = new ArrayExample<>(mockOutput, new String[] { "a", "b", "c" }, new double[] { 1, 1, 1 });
ArrayExample<MockOutput> second = new ArrayExample<>(mockOutput, new String[] { "a", "b", "c" }, new double[] { 2, 2, 2 });
ArrayExample<MockOutput> third = new ArrayExample<>(mockOutput, new String[] { "a", "b", "c" }, new double[] { 3, 3, 3 });
SequenceExample<MockOutput> denseExample = new SequenceExample<>(Arrays.asList(first, second, third));
dataset.add(denseExample);
// This example is dense
assertTrue(dataset.isDense());
first = new ArrayExample<>(mockOutput, new String[] { "a", "b", "c", "d" }, new double[] { 1, 1, 1, 1 });
second = new ArrayExample<>(mockOutput, new String[] { "a", "b", "c", "d" }, new double[] { 1, 1, 1, 1 });
third = new ArrayExample<>(mockOutput, new String[] { "a", "b", "c", "d" }, new double[] { 1, 1, 1, 1 });
SequenceExample<MockOutput> newDenseExample = new SequenceExample<>(Arrays.asList(first, second, third));
dataset.add(newDenseExample);
// This example is dense, but it makes the previous one not dense as it adds a new feature
assertFalse(dataset.isDense());
// flush out the previous test
dataset.clear();
first = new ArrayExample<>(mockOutput, new String[] { "a" }, new double[] { 1 });
second = new ArrayExample<>(mockOutput, new String[] { "a", "b" }, new double[] { 1, 1 });
third = new ArrayExample<>(mockOutput, new String[] { "a", "b", "c" }, new double[] { 1, 1, 1 });
ArrayExample<MockOutput> fourth = new ArrayExample<>(mockOutput, new String[] { "b", "c" }, new double[] { 1, 1 });
SequenceExample<MockOutput> sparseExample = new SequenceExample<>(Arrays.asList(first, second, third, fourth));
dataset.add(sparseExample);
// This example is sparse
assertFalse(dataset.isDense());
dataset.densify();
// After densification it should be dense
assertTrue(dataset.isDense());
dataset.add(denseExample);
// should still be dense as they share a feature space
assertTrue(dataset.isDense());
dataset.add(newDenseExample);
// now it's sparse again
assertFalse(dataset.isDense());
dataset.densify();
assertTrue(dataset.isDense());
}
use of org.tribuo.test.MockOutputFactory in project tribuo by oracle.
the class SequenceDatasetTest method testMinimumCardinality.
@Test
public void testMinimumCardinality() {
MutableSequenceDataset<MockOutput> dataset = new MutableSequenceDataset<>(new MockDataSourceProvenance(), new MockOutputFactory());
ListExample<MockOutput> ex1 = createExample("green", "f1");
ListExample<MockOutput> ex2 = createExample("green", "f1", "f2");
ListExample<MockOutput> ex3 = createExample("green", "f1", "f2", "f3");
ListExample<MockOutput> ex4 = createExample("green", "f1", "f2", "f3", "f4");
SequenceExample<MockOutput> seqEx = new SequenceExample<>(Arrays.asList(ex1, ex2, ex3, ex4));
dataset.add(seqEx);
ex1 = createExample("blue", "f1");
ex2 = createExample("blue", "f1", "f2");
ex3 = createExample("blue", "f1", "f2", "f3");
ex4 = createExample("blue", "f1", "f2", "f3", "f4");
seqEx = new SequenceExample<>(Arrays.asList(ex1, ex2, ex3, ex4));
dataset.add(seqEx);
ex1 = createExample("orange", "f5", "f6", "f7");
seqEx = new SequenceExample<>(Arrays.asList(ex1));
dataset.add(seqEx);
FeatureMap infoMap = dataset.getFeatureIDMap();
assertEquals(8, infoMap.get("f1").getCount());
assertEquals(6, infoMap.get("f2").getCount());
assertEquals(4, infoMap.get("f3").getCount());
assertEquals(2, infoMap.get("f4").getCount());
assertEquals(1, infoMap.get("f5").getCount());
assertEquals(1, infoMap.get("f6").getCount());
assertEquals(1, infoMap.get("f7").getCount());
assertEquals(3, dataset.size());
MinimumCardinalitySequenceDataset<MockOutput> minimumCardinalityDataset = new MinimumCardinalitySequenceDataset<>(dataset, 3);
assertEquals(3, minimumCardinalityDataset.getMinCardinality());
infoMap = minimumCardinalityDataset.getFeatureIDMap();
assertEquals(8, infoMap.get("f1").getCount());
assertEquals(6, infoMap.get("f2").getCount());
assertEquals(4, infoMap.get("f3").getCount());
assertNull(infoMap.get("f4"));
assertNull(infoMap.get("f5"));
assertNull(infoMap.get("f6"));
assertNull(infoMap.get("f7"));
assertEquals(2, minimumCardinalityDataset.size());
}
use of org.tribuo.test.MockOutputFactory in project tribuo by oracle.
the class SequenceDatasetTest method testBasic.
@Test
public void testBasic() {
MutableSequenceDataset<MockOutput> dataset = new MutableSequenceDataset<>(new MockDataSourceProvenance(), new MockOutputFactory());
ListExample<MockOutput> ex1 = new ListExample<>(new MockOutput("green"));
ex1.add(new Feature("f1", 1.0));
ex1.add(new Feature("f2", 0.0));
ex1.add(new Feature("f3", 1.0));
Example<MockOutput> ex2 = new ListExample<>(new MockOutput("green"));
ex2.add(new Feature("f1", 1.0));
ex2.add(new Feature("f2", 0.0));
ex2.add(new Feature("f3", 1.0));
SequenceExample<MockOutput> seqEx = new SequenceExample<>(Arrays.asList(ex1, ex2));
dataset.add(seqEx);
ex1 = new ListExample<>(new MockOutput("blue"));
ex1.add(new Feature("f1", 1.0));
ex1.add(new Feature("f2", 0.0));
ex1.add(new Feature("f4", 1.0));
ex2 = new ListExample<>(new MockOutput("green"));
ex2.add(new Feature("f1", 1.0));
ex2.add(new Feature("f4", 0.0));
ex2.add(new Feature("f5", 1.0));
seqEx = new SequenceExample<>(Arrays.asList(ex1, ex2));
dataset.add(seqEx);
FeatureMap infoMap = dataset.getFeatureIDMap();
assertEquals(4, infoMap.get("f1").getCount());
assertEquals(0, infoMap.get("f2").getCount());
assertEquals(2, infoMap.get("f3").getCount());
// One as non-sparse zeros are ignored
assertEquals(1, infoMap.get("f4").getCount());
assertEquals(1, infoMap.get("f5").getCount());
SequenceDataset<MockOutput> prunedDataset = new MinimumCardinalitySequenceDataset<>(dataset, 2);
infoMap = prunedDataset.getFeatureIDMap();
assertEquals(4, infoMap.get("f1").getCount());
assertNull(infoMap.get("f2"));
assertEquals(2, infoMap.get("f3").getCount());
assertNull(infoMap.get("f4"));
assertNull(infoMap.get("f5"));
seqEx = prunedDataset.getExample(1);
ex2 = seqEx.get(1);
Feature f1 = ex2.lookup("f1");
assertEquals(1.0, f1.getValue(), 1e-5);
assertNull(ex2.lookup("f5"));
prunedDataset = new MinimumCardinalitySequenceDataset<>(dataset, 3);
infoMap = prunedDataset.getFeatureIDMap();
assertEquals(4, infoMap.get("f1").getCount());
assertNull(infoMap.get("f2"));
assertNull(infoMap.get("f3"));
assertNull(infoMap.get("f4"));
assertNull(infoMap.get("f5"));
// no examples make it through the pruning
prunedDataset = new MinimumCardinalitySequenceDataset<>(dataset, 5);
assertEquals(0, prunedDataset.size());
infoMap = prunedDataset.getFeatureIDMap();
assertNull(infoMap.get("f1"));
assertNull(infoMap.get("f2"));
assertNull(infoMap.get("f3"));
assertNull(infoMap.get("f4"));
assertNull(infoMap.get("f5"));
}
Aggregations