Search in sources :

Example 1 with MockOutputFactory

use of org.tribuo.test.MockOutputFactory in project tribuo by oracle.

the class LibSVMDataSourceTest method testLibSVMSaving.

@Test
public void testLibSVMSaving() throws IOException {
    MockOutputFactory factory = new MockOutputFactory();
    URL dataFile = LibSVMDataSourceTest.class.getResource("/org/tribuo/datasource/test-1.libsvm");
    LibSVMDataSource<MockOutput> source = new LibSVMDataSource<>(dataFile, factory);
    File temp = File.createTempFile("tribuo-lib-svm-test", "libsvm");
    temp.deleteOnExit();
    MutableDataset<MockOutput> dataset = new MutableDataset<>(source);
    try (PrintStream stream = new PrintStream(temp, StandardCharsets.UTF_8.name())) {
        LibSVMDataSource.writeLibSVMFormat(dataset, stream, false, (MockOutput a) -> Integer.parseInt(a.label));
    }
    LibSVMDataSource<MockOutput> loadedSource = new LibSVMDataSource<>(temp.toPath(), factory);
    assertTrue(compareDataSources(source, loadedSource), "Saved data source was not the same as the loaded one.");
    // Now we check provenance path normalization on the saved file
    // First generate a path with a relative element in it (i.e., ".")
    Path newPath = temp.toPath().resolveSibling(Paths.get(".", temp.getName()));
    // Load the datasource back in using the relativised path
    LibSVMDataSource<MockOutput> newSource = new LibSVMDataSource<>(newPath, factory);
    LibSVMDataSource.LibSVMDataSourceProvenance sourceProv = (LibSVMDataSource.LibSVMDataSourceProvenance) newSource.getProvenance();
    // Extract the two provenance fields
    // test code and the provenance exists
    @SuppressWarnings("unchecked") URL sourceURL = ((PrimitiveProvenance<URL>) sourceProv.getConfiguredParameters().get("url")).getValue();
    // test code and the provenance exists
    @SuppressWarnings("unchecked") File sourceFile = ((PrimitiveProvenance<File>) sourceProv.getConfiguredParameters().get("path")).getValue();
    // Assert they match
    assertEquals(sourceFile.toPath().toUri().toURL(), sourceURL);
}
Also used : Path(java.nio.file.Path) PrintStream(java.io.PrintStream) MockOutput(org.tribuo.test.MockOutput) MockOutputFactory(org.tribuo.test.MockOutputFactory) URL(java.net.URL) PrimitiveProvenance(com.oracle.labs.mlrg.olcut.provenance.PrimitiveProvenance) File(java.io.File) MutableDataset(org.tribuo.MutableDataset) Test(org.junit.jupiter.api.Test)

Example 2 with MockOutputFactory

use of org.tribuo.test.MockOutputFactory in project tribuo by oracle.

the class LibSVMDataSourceTest method testLibSVMLoading.

@Test
public void testLibSVMLoading() throws IOException {
    MockOutputFactory factory = new MockOutputFactory();
    URL dataFile = LibSVMDataSourceTest.class.getResource("/org/tribuo/datasource/test-1.libsvm");
    LibSVMDataSource<MockOutput> source = new LibSVMDataSource<>(dataFile, factory);
    MutableDataset<MockOutput> dataset = new MutableDataset<>(source);
    FeatureMap fmap = dataset.getFeatureMap();
    assertEquals(7, fmap.size());
    OutputInfo<MockOutput> info = dataset.getOutputInfo();
    assertEquals(2, info.size());
}
Also used : FeatureMap(org.tribuo.FeatureMap) MockOutput(org.tribuo.test.MockOutput) MockOutputFactory(org.tribuo.test.MockOutputFactory) MutableDataset(org.tribuo.MutableDataset) URL(java.net.URL) Test(org.junit.jupiter.api.Test)

Example 3 with MockOutputFactory

use of org.tribuo.test.MockOutputFactory in project tribuo by oracle.

the class SequenceDatasetTest method testDense.

@Test
public void testDense() {
    MockOutputFactory mockFactory = new MockOutputFactory();
    MockDataSourceProvenance mockProvenance = new MockDataSourceProvenance();
    MockOutput mockOutput = new MockOutput("test");
    MutableSequenceDataset<MockOutput> dataset = new MutableSequenceDataset<>(mockProvenance, mockFactory);
    // Empty datasets are dense
    assertTrue(dataset.isDense());
    ArrayExample<MockOutput> first = new ArrayExample<>(mockOutput, new String[] { "a", "b", "c" }, new double[] { 1, 1, 1 });
    ArrayExample<MockOutput> second = new ArrayExample<>(mockOutput, new String[] { "a", "b", "c" }, new double[] { 2, 2, 2 });
    ArrayExample<MockOutput> third = new ArrayExample<>(mockOutput, new String[] { "a", "b", "c" }, new double[] { 3, 3, 3 });
    SequenceExample<MockOutput> denseExample = new SequenceExample<>(Arrays.asList(first, second, third));
    dataset.add(denseExample);
    // This example is dense
    assertTrue(dataset.isDense());
    first = new ArrayExample<>(mockOutput, new String[] { "a", "b", "c", "d" }, new double[] { 1, 1, 1, 1 });
    second = new ArrayExample<>(mockOutput, new String[] { "a", "b", "c", "d" }, new double[] { 1, 1, 1, 1 });
    third = new ArrayExample<>(mockOutput, new String[] { "a", "b", "c", "d" }, new double[] { 1, 1, 1, 1 });
    SequenceExample<MockOutput> newDenseExample = new SequenceExample<>(Arrays.asList(first, second, third));
    dataset.add(newDenseExample);
    // This example is dense, but it makes the previous one not dense as it adds a new feature
    assertFalse(dataset.isDense());
    // flush out the previous test
    dataset.clear();
    first = new ArrayExample<>(mockOutput, new String[] { "a" }, new double[] { 1 });
    second = new ArrayExample<>(mockOutput, new String[] { "a", "b" }, new double[] { 1, 1 });
    third = new ArrayExample<>(mockOutput, new String[] { "a", "b", "c" }, new double[] { 1, 1, 1 });
    ArrayExample<MockOutput> fourth = new ArrayExample<>(mockOutput, new String[] { "b", "c" }, new double[] { 1, 1 });
    SequenceExample<MockOutput> sparseExample = new SequenceExample<>(Arrays.asList(first, second, third, fourth));
    dataset.add(sparseExample);
    // This example is sparse
    assertFalse(dataset.isDense());
    dataset.densify();
    // After densification it should be dense
    assertTrue(dataset.isDense());
    dataset.add(denseExample);
    // should still be dense as they share a feature space
    assertTrue(dataset.isDense());
    dataset.add(newDenseExample);
    // now it's sparse again
    assertFalse(dataset.isDense());
    dataset.densify();
    assertTrue(dataset.isDense());
}
Also used : ArrayExample(org.tribuo.impl.ArrayExample) MockOutput(org.tribuo.test.MockOutput) MockOutputFactory(org.tribuo.test.MockOutputFactory) MockDataSourceProvenance(org.tribuo.test.MockDataSourceProvenance) Test(org.junit.jupiter.api.Test)

Example 4 with MockOutputFactory

use of org.tribuo.test.MockOutputFactory in project tribuo by oracle.

the class SequenceDatasetTest method testMinimumCardinality.

@Test
public void testMinimumCardinality() {
    MutableSequenceDataset<MockOutput> dataset = new MutableSequenceDataset<>(new MockDataSourceProvenance(), new MockOutputFactory());
    ListExample<MockOutput> ex1 = createExample("green", "f1");
    ListExample<MockOutput> ex2 = createExample("green", "f1", "f2");
    ListExample<MockOutput> ex3 = createExample("green", "f1", "f2", "f3");
    ListExample<MockOutput> ex4 = createExample("green", "f1", "f2", "f3", "f4");
    SequenceExample<MockOutput> seqEx = new SequenceExample<>(Arrays.asList(ex1, ex2, ex3, ex4));
    dataset.add(seqEx);
    ex1 = createExample("blue", "f1");
    ex2 = createExample("blue", "f1", "f2");
    ex3 = createExample("blue", "f1", "f2", "f3");
    ex4 = createExample("blue", "f1", "f2", "f3", "f4");
    seqEx = new SequenceExample<>(Arrays.asList(ex1, ex2, ex3, ex4));
    dataset.add(seqEx);
    ex1 = createExample("orange", "f5", "f6", "f7");
    seqEx = new SequenceExample<>(Arrays.asList(ex1));
    dataset.add(seqEx);
    FeatureMap infoMap = dataset.getFeatureIDMap();
    assertEquals(8, infoMap.get("f1").getCount());
    assertEquals(6, infoMap.get("f2").getCount());
    assertEquals(4, infoMap.get("f3").getCount());
    assertEquals(2, infoMap.get("f4").getCount());
    assertEquals(1, infoMap.get("f5").getCount());
    assertEquals(1, infoMap.get("f6").getCount());
    assertEquals(1, infoMap.get("f7").getCount());
    assertEquals(3, dataset.size());
    MinimumCardinalitySequenceDataset<MockOutput> minimumCardinalityDataset = new MinimumCardinalitySequenceDataset<>(dataset, 3);
    assertEquals(3, minimumCardinalityDataset.getMinCardinality());
    infoMap = minimumCardinalityDataset.getFeatureIDMap();
    assertEquals(8, infoMap.get("f1").getCount());
    assertEquals(6, infoMap.get("f2").getCount());
    assertEquals(4, infoMap.get("f3").getCount());
    assertNull(infoMap.get("f4"));
    assertNull(infoMap.get("f5"));
    assertNull(infoMap.get("f6"));
    assertNull(infoMap.get("f7"));
    assertEquals(2, minimumCardinalityDataset.size());
}
Also used : FeatureMap(org.tribuo.FeatureMap) MockOutput(org.tribuo.test.MockOutput) MockOutputFactory(org.tribuo.test.MockOutputFactory) MockDataSourceProvenance(org.tribuo.test.MockDataSourceProvenance) Test(org.junit.jupiter.api.Test)

Example 5 with MockOutputFactory

use of org.tribuo.test.MockOutputFactory in project tribuo by oracle.

the class SequenceDatasetTest method testBasic.

@Test
public void testBasic() {
    MutableSequenceDataset<MockOutput> dataset = new MutableSequenceDataset<>(new MockDataSourceProvenance(), new MockOutputFactory());
    ListExample<MockOutput> ex1 = new ListExample<>(new MockOutput("green"));
    ex1.add(new Feature("f1", 1.0));
    ex1.add(new Feature("f2", 0.0));
    ex1.add(new Feature("f3", 1.0));
    Example<MockOutput> ex2 = new ListExample<>(new MockOutput("green"));
    ex2.add(new Feature("f1", 1.0));
    ex2.add(new Feature("f2", 0.0));
    ex2.add(new Feature("f3", 1.0));
    SequenceExample<MockOutput> seqEx = new SequenceExample<>(Arrays.asList(ex1, ex2));
    dataset.add(seqEx);
    ex1 = new ListExample<>(new MockOutput("blue"));
    ex1.add(new Feature("f1", 1.0));
    ex1.add(new Feature("f2", 0.0));
    ex1.add(new Feature("f4", 1.0));
    ex2 = new ListExample<>(new MockOutput("green"));
    ex2.add(new Feature("f1", 1.0));
    ex2.add(new Feature("f4", 0.0));
    ex2.add(new Feature("f5", 1.0));
    seqEx = new SequenceExample<>(Arrays.asList(ex1, ex2));
    dataset.add(seqEx);
    FeatureMap infoMap = dataset.getFeatureIDMap();
    assertEquals(4, infoMap.get("f1").getCount());
    assertEquals(0, infoMap.get("f2").getCount());
    assertEquals(2, infoMap.get("f3").getCount());
    // One as non-sparse zeros are ignored
    assertEquals(1, infoMap.get("f4").getCount());
    assertEquals(1, infoMap.get("f5").getCount());
    SequenceDataset<MockOutput> prunedDataset = new MinimumCardinalitySequenceDataset<>(dataset, 2);
    infoMap = prunedDataset.getFeatureIDMap();
    assertEquals(4, infoMap.get("f1").getCount());
    assertNull(infoMap.get("f2"));
    assertEquals(2, infoMap.get("f3").getCount());
    assertNull(infoMap.get("f4"));
    assertNull(infoMap.get("f5"));
    seqEx = prunedDataset.getExample(1);
    ex2 = seqEx.get(1);
    Feature f1 = ex2.lookup("f1");
    assertEquals(1.0, f1.getValue(), 1e-5);
    assertNull(ex2.lookup("f5"));
    prunedDataset = new MinimumCardinalitySequenceDataset<>(dataset, 3);
    infoMap = prunedDataset.getFeatureIDMap();
    assertEquals(4, infoMap.get("f1").getCount());
    assertNull(infoMap.get("f2"));
    assertNull(infoMap.get("f3"));
    assertNull(infoMap.get("f4"));
    assertNull(infoMap.get("f5"));
    // no examples make it through the pruning
    prunedDataset = new MinimumCardinalitySequenceDataset<>(dataset, 5);
    assertEquals(0, prunedDataset.size());
    infoMap = prunedDataset.getFeatureIDMap();
    assertNull(infoMap.get("f1"));
    assertNull(infoMap.get("f2"));
    assertNull(infoMap.get("f3"));
    assertNull(infoMap.get("f4"));
    assertNull(infoMap.get("f5"));
}
Also used : MockOutput(org.tribuo.test.MockOutput) ListExample(org.tribuo.impl.ListExample) MockOutputFactory(org.tribuo.test.MockOutputFactory) MockDataSourceProvenance(org.tribuo.test.MockDataSourceProvenance) Feature(org.tribuo.Feature) FeatureMap(org.tribuo.FeatureMap) Test(org.junit.jupiter.api.Test)

Aggregations

MockOutput (org.tribuo.test.MockOutput)36 MockOutputFactory (org.tribuo.test.MockOutputFactory)36 Test (org.junit.jupiter.api.Test)28 MockDataSourceProvenance (org.tribuo.test.MockDataSourceProvenance)15 MutableDataset (org.tribuo.MutableDataset)12 URL (java.net.URL)9 ArrayExample (org.tribuo.impl.ArrayExample)8 ArrayList (java.util.ArrayList)7 ListExample (org.tribuo.impl.ListExample)5 Path (java.nio.file.Path)4 Example (org.tribuo.Example)4 FeatureMap (org.tribuo.FeatureMap)4 Feature (org.tribuo.Feature)3 SimpleDataSourceProvenance (org.tribuo.provenance.SimpleDataSourceProvenance)3 HashMap (java.util.HashMap)2 LinkedHashSet (java.util.LinkedHashSet)2 Random (java.util.Random)2 ConfigurableDataSource (org.tribuo.ConfigurableDataSource)2 FieldProcessor (org.tribuo.data.columnar.FieldProcessor)2 IdentityProcessor (org.tribuo.data.columnar.processors.field.IdentityProcessor)2