Search in sources :

Example 1 with FoldDimensionBundle

use of org.dkpro.lab.task.impl.FoldDimensionBundle in project dkpro-lab by dkpro.

the class FoldDimensionBundleTest method testComparator.

/**
 * Tests that instances in the same cluster (here, parent folder) go into a fold together.
 */
@Test
public void testComparator() {
    Dimension<String> baseData = Dimension.create("base", "aa/1.txt", "aa/2.txt", "aa/3.txt", "bb/4.txt", "bb/5.txt", "bb/6.txt", "cc/7.txt", "cc/8.txt", "cc/9.txt", "cc/10.txt");
    Comparator<String> comp = new Comparator<String>() {

        @Override
        public int compare(String filename1, String filename2) {
            File file1 = new File(filename1);
            File file2 = new File(filename2);
            String folder1 = file1.getParentFile().getName();
            String folder2 = file2.getParentFile().getName();
            if (folder1.equals(folder2)) {
                return 0;
            }
            return 1;
        }
    };
    FoldDimensionBundle<String> foldBundle = new FoldDimensionBundle<String>("fold", baseData, 3, comp);
    String expected = "0 - [aa/1.txt, aa/2.txt, aa/3.txt] [bb/4.txt, bb/5.txt, bb/6.txt, cc/7.txt, cc/8.txt, cc/9.txt, cc/10.txt]\n" + "1 - [bb/4.txt, bb/5.txt, bb/6.txt] [aa/1.txt, aa/2.txt, aa/3.txt, cc/7.txt, cc/8.txt, cc/9.txt, cc/10.txt]\n" + "2 - [cc/7.txt, cc/8.txt, cc/9.txt, cc/10.txt] [aa/1.txt, aa/2.txt, aa/3.txt, bb/4.txt, bb/5.txt, bb/6.txt]\n";
    StringBuilder actual = new StringBuilder();
    int n = 0;
    ParameterSpace pSpace = new ParameterSpace(foldBundle);
    for (Map<String, Object> config : pSpace) {
        actual.append(String.format("%d - %s %s\n", n, config.get("fold_validation"), config.get("fold_training")));
        n++;
    }
    assertEquals(3, n);
    assertEquals(3, pSpace.getStepCount());
    assertEquals(expected, actual.toString());
}
Also used : FoldDimensionBundle(org.dkpro.lab.task.impl.FoldDimensionBundle) ParameterSpace(org.dkpro.lab.task.ParameterSpace) File(java.io.File) Comparator(java.util.Comparator) Test(org.junit.Test)

Example 2 with FoldDimensionBundle

use of org.dkpro.lab.task.impl.FoldDimensionBundle in project dkpro-lab by dkpro.

the class FoldDimensionBundleTest method testFoldDistribution.

/**
 * Tests to see that instances are evenly distributed across folds.
 */
@Test
public void testFoldDistribution() {
    Dimension<String> baseData = Dimension.create("base", "aa/1.txt", "aa/2.txt", "bb/3.txt", "cc/4.txt", "dd/5.txt", "dd/6.txt", "ee/7.txt", "ff/8.txt", "gg/9.txt", "gg/10.txt");
    Comparator<String> comp = new Comparator<String>() {

        @Override
        public int compare(String filename1, String filename2) {
            File file1 = new File(filename1);
            File file2 = new File(filename2);
            String folder1 = file1.getParentFile().getName();
            String folder2 = file2.getParentFile().getName();
            if (folder1.equals(folder2)) {
                return 0;
            }
            return 1;
        }
    };
    FoldDimensionBundle<String> foldBundle = new FoldDimensionBundle<String>("fold", baseData, 3, comp);
    String expected = "0 - [aa/1.txt, aa/2.txt, ff/8.txt] [bb/3.txt, dd/5.txt, dd/6.txt, cc/4.txt, ee/7.txt, gg/9.txt, gg/10.txt]\n" + "1 - [bb/3.txt, dd/5.txt, dd/6.txt] [aa/1.txt, aa/2.txt, ff/8.txt, cc/4.txt, ee/7.txt, gg/9.txt, gg/10.txt]\n" + "2 - [cc/4.txt, ee/7.txt, gg/9.txt, gg/10.txt] [aa/1.txt, aa/2.txt, ff/8.txt, bb/3.txt, dd/5.txt, dd/6.txt]\n";
    StringBuilder actual = new StringBuilder();
    int n = 0;
    ParameterSpace pSpace = new ParameterSpace(foldBundle);
    for (Map<String, Object> config : pSpace) {
        actual.append(String.format("%d - %s %s\n", n, config.get("fold_validation"), config.get("fold_training")));
        n++;
    }
    // System.out.println(actual.toString());
    assertEquals(3, n);
    assertEquals(3, pSpace.getStepCount());
    assertEquals(expected, actual.toString());
}
Also used : FoldDimensionBundle(org.dkpro.lab.task.impl.FoldDimensionBundle) ParameterSpace(org.dkpro.lab.task.ParameterSpace) File(java.io.File) Comparator(java.util.Comparator) Test(org.junit.Test)

Example 3 with FoldDimensionBundle

use of org.dkpro.lab.task.impl.FoldDimensionBundle in project dkpro-lab by dkpro.

the class FoldDimensionBundleTest method testFoldInjection.

@Test
public void testFoldInjection() throws Exception {
    File repo = new File("target/repository/" + getClass().getSimpleName() + "/" + name.getMethodName());
    FileUtils.deleteDirectory(repo);
    repo.mkdirs();
    ((FileSystemStorageService) Lab.getInstance().getStorageService()).setStorageRoot(repo);
    Dimension<String> baseData = Dimension.create("base", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10");
    FoldDimensionBundle<String> foldBundle = new FoldDimensionBundle<String>("fold", baseData, 3);
    String expected = "0 - [1, 4, 7, 10] [2, 5, 8, 3, 6, 9]\n" + "1 - [2, 5, 8] [1, 4, 7, 10, 3, 6, 9]\n" + "2 - [3, 6, 9] [1, 4, 7, 10, 2, 5, 8]\n";
    ParameterSpace pSpace = new ParameterSpace(foldBundle);
    final StringBuilder actual = new StringBuilder();
    Task testTask = new ExecutableTaskBase() {

        int n = 0;

        @Discriminator
        Collection<String> fold_validation;

        @Discriminator
        Collection<String> fold_training;

        @Override
        public void execute(TaskContext aContext) throws Exception {
            System.out.printf("%d training  : %s\n", n, fold_training);
            System.out.printf("%d validation: %s\n", n, fold_validation);
            actual.append(String.format("%d - %s %s\n", n, fold_validation, fold_training));
            n++;
        }
    };
    DefaultBatchTask batchTask = new DefaultBatchTask();
    batchTask.setParameterSpace(pSpace);
    batchTask.addTask(testTask);
    Lab.getInstance().run(batchTask);
    assertEquals(3, pSpace.getStepCount());
    assertEquals(expected, actual.toString());
}
Also used : Task(org.dkpro.lab.task.Task) DefaultBatchTask(org.dkpro.lab.task.impl.DefaultBatchTask) FoldDimensionBundle(org.dkpro.lab.task.impl.FoldDimensionBundle) TaskContext(org.dkpro.lab.engine.TaskContext) ExecutableTaskBase(org.dkpro.lab.task.impl.ExecutableTaskBase) ParameterSpace(org.dkpro.lab.task.ParameterSpace) Collection(java.util.Collection) File(java.io.File) FileSystemStorageService(org.dkpro.lab.storage.filesystem.FileSystemStorageService) DefaultBatchTask(org.dkpro.lab.task.impl.DefaultBatchTask) Test(org.junit.Test)

Example 4 with FoldDimensionBundle

use of org.dkpro.lab.task.impl.FoldDimensionBundle in project dkpro-lab by dkpro.

the class FoldDimensionBundleTest method testFileFold.

@Test
public void testFileFold() {
    Dimension<String> baseData = Dimension.create("base", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10");
    FoldDimensionBundle<String> foldBundle = new FoldDimensionBundle<String>("fold", baseData, 3);
    String expected = "0 - [1, 4, 7, 10] [2, 5, 8, 3, 6, 9]\n" + "1 - [2, 5, 8] [1, 4, 7, 10, 3, 6, 9]\n" + "2 - [3, 6, 9] [1, 4, 7, 10, 2, 5, 8]\n";
    StringBuilder actual = new StringBuilder();
    int n = 0;
    ParameterSpace pSpace = new ParameterSpace(foldBundle);
    for (Map<String, Object> config : pSpace) {
        actual.append(String.format("%d - %s %s\n", n, config.get("fold_validation"), config.get("fold_training")));
        n++;
    }
    assertEquals(3, n);
    assertEquals(3, pSpace.getStepCount());
    assertEquals(expected, actual.toString());
}
Also used : FoldDimensionBundle(org.dkpro.lab.task.impl.FoldDimensionBundle) ParameterSpace(org.dkpro.lab.task.ParameterSpace) Test(org.junit.Test)

Example 5 with FoldDimensionBundle

use of org.dkpro.lab.task.impl.FoldDimensionBundle in project dkpro-lab by dkpro.

the class FoldDimensionBundleTest method testSimpleFold.

@Test
public void testSimpleFold() {
    Dimension<String> baseData = Dimension.create("base", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10");
    FoldDimensionBundle<String> foldBundle = new FoldDimensionBundle<String>("fold", baseData, 3);
    String expected = "0 - [1, 4, 7, 10] [2, 5, 8, 3, 6, 9]\n" + "1 - [2, 5, 8] [1, 4, 7, 10, 3, 6, 9]\n" + "2 - [3, 6, 9] [1, 4, 7, 10, 2, 5, 8]\n";
    StringBuilder actual = new StringBuilder();
    int n = 0;
    ParameterSpace pSpace = new ParameterSpace(foldBundle);
    for (Map<String, Object> config : pSpace) {
        actual.append(String.format("%d - %s %s\n", n, config.get("fold_validation"), config.get("fold_training")));
        n++;
    }
    assertEquals(3, n);
    assertEquals(3, pSpace.getStepCount());
    assertEquals(expected, actual.toString());
}
Also used : FoldDimensionBundle(org.dkpro.lab.task.impl.FoldDimensionBundle) ParameterSpace(org.dkpro.lab.task.ParameterSpace) Test(org.junit.Test)

Aggregations

ParameterSpace (org.dkpro.lab.task.ParameterSpace)5 FoldDimensionBundle (org.dkpro.lab.task.impl.FoldDimensionBundle)5 Test (org.junit.Test)5 File (java.io.File)3 Comparator (java.util.Comparator)2 Collection (java.util.Collection)1 TaskContext (org.dkpro.lab.engine.TaskContext)1 FileSystemStorageService (org.dkpro.lab.storage.filesystem.FileSystemStorageService)1 Task (org.dkpro.lab.task.Task)1 DefaultBatchTask (org.dkpro.lab.task.impl.DefaultBatchTask)1 ExecutableTaskBase (org.dkpro.lab.task.impl.ExecutableTaskBase)1