Search in sources :

Example 81 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class WekaPairRegressionDemo method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training data reader dimension
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(STSReader.class, STSReader.PARAM_INPUT_FILE, inputFileTrain, STSReader.PARAM_GOLD_FILE, goldFileTrain);
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(STSReader.class, STSReader.PARAM_INPUT_FILE, inputFileTest, STSReader.PARAM_GOLD_FILE, goldFileTest);
    dimReaders.put(DIM_READER_TEST, readerTest);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(DiffNrOfTokensPairFeatureExtractor.class)));
    Map<String, Object> config = new HashMap<>();
    config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), SMOreg.class.getName() });
    config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
    config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle(Constants.DIM_READER_TRAIN, dimReaders), Dimension.create(Constants.DIM_FEATURE_MODE, Constants.FM_PAIR), Dimension.create(Constants.DIM_LEARNING_MODE, Constants.LM_REGRESSION), dimFeatureSets, mlas);
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) SMOreg(weka.classifiers.functions.SMOreg) HashMap(java.util.HashMap) Map(java.util.Map) WekaAdapter(org.dkpro.tc.ml.weka.WekaAdapter)

Example 82 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class XgboostDocumentPlain method main.

public static void main(String[] args) throws Exception {
    DemoUtils.setDkproHome("target/");
    ParameterSpace pSpace = getParameterSpace();
    XgboostDocumentPlain experiment = new XgboostDocumentPlain();
    experiment.runTrainTest(pSpace);
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace)

Example 83 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class XgboostDocumentPlain method getParameterSpace.

public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
    // configure training and test data reader dimension
    // train/test will use both, while cross-validation will only use the
    // train part
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
    dimReaders.put(DIM_READER_TEST, readerTest);
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet("DummyFeatureSet", TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 20, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
    Map<String, Object> xgboostConfig = new HashMap<>();
    xgboostConfig.put(DIM_CLASSIFICATION_ARGS, new Object[] { new XgboostAdapter(), "objective=multi:softmax" });
    xgboostConfig.put(DIM_DATA_WRITER, new XgboostAdapter().getDataWriterClass().getName());
    xgboostConfig.put(DIM_FEATURE_USE_SPARSE, new XgboostAdapter().useSparseFeatures());
    Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", xgboostConfig);
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) XgboostAdapter(org.dkpro.tc.ml.xgboost.XgboostAdapter) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet) HashMap(java.util.HashMap) Map(java.util.Map)

Example 84 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class DyNetSequenceTest method runTest.

@Test
public void runTest() throws Exception {
    DemoUtils.setDkproHome(DynetSeq2SeqTrainTest.class.getSimpleName());
    boolean testConditon = true;
    String python3 = null;
    try {
        python3 = getEnvironment();
    } catch (Exception e) {
        System.err.println("Failed to locate Python with Keras - will skip this test case");
        testConditon = false;
    }
    if (testConditon) {
        ParameterSpace ps = DynetSeq2SeqTrainTest.getParameterSpace(python3);
        DynetSeq2SeqTrainTest.runTrainTest(ps);
        assertEquals(1, ContextMemoryReport.id2outcomeFiles.size());
        List<String> lines = FileUtils.readLines(ContextMemoryReport.id2outcomeFiles.get(0), "utf-8");
        assertEquals(51, lines.size());
        // line-wise compare
        assertEquals("#ID=PREDICTION;GOLDSTANDARD;THRESHOLD", lines.get(0));
        assertEquals("#labels 0=AP 1=AT 2=BER 3=CC 4=CS 5=DOD 6=DTS 7=HV 8=IN 9=JJ 10=NN 11=NNS 12=NP 13=NPg 14=PPO 15=PPS 16=RB 17=TO 18=VB 19=VBD 20=VBG 21=VBN 22=pct", lines.get(1));
        assertTrue(lines.get(3).matches("000000_000000_000000_The=[0-9]+;1;-1"));
        assertTrue(lines.get(4).matches("000000_000000_000001_jury=[0-9]+;10;-1"));
        assertTrue(lines.get(5).matches("000000_000000_000002_said=[0-9]+;19;-1"));
        assertTrue(lines.get(6).matches("000000_000000_000003_it=[0-9]+;15;-1"));
        assertTrue(lines.get(7).matches("000000_000000_000004_did=[0-9]+;5;-1"));
        assertTrue(lines.get(8).matches("000000_000000_000005_find=[0-9]+;18;-1"));
        assertTrue(lines.get(9).matches("000000_000000_000006_that=[0-9]+;4;-1"));
        assertTrue(lines.get(10).matches("000000_000000_000007_many=[0-9]+;0;-1"));
        assertTrue(lines.get(11).matches("000000_000000_000008_of=[0-9]+;8;-1"));
        assertTrue(lines.get(12).matches("000000_000000_000009_Georgia's=[0-9]+;13;-1"));
        assertTrue(lines.get(13).matches("000000_000000_000010_registration=[0-9]+;10;-1"));
        assertTrue(lines.get(14).matches("000000_000000_000011_and=[0-9]+;3;-1"));
        assertTrue(lines.get(15).matches("000000_000000_000012_election=[0-9]+;10;-1"));
        assertTrue(lines.get(16).matches("000000_000000_000013_laws=[0-9]+;11;-1"));
        assertTrue(lines.get(17).matches("000000_000000_000014_``=[0-9]+;22;-1"));
        assertTrue(lines.get(18).matches("000000_000000_000015_are=[0-9]+;2;-1"));
        assertTrue(lines.get(19).matches("000000_000000_000016_outmoded=[0-9]+;9;-1"));
        assertTrue(lines.get(20).matches("000000_000000_000017_or=[0-9]+;3;-1"));
        assertTrue(lines.get(21).matches("000000_000000_000018_inadequate=[0-9]+;9;-1"));
        assertTrue(lines.get(22).matches("000000_000000_000019_and=[0-9]+;3;-1"));
        assertTrue(lines.get(23).matches("000000_000000_000020_often=[0-9]+;16;-1"));
        assertTrue(lines.get(24).matches("000000_000000_000021_ambiguous=[0-9]+;9;-1"));
        assertTrue(lines.get(25).matches("000000_000000_000022_''=[0-9]+;22;-1"));
        assertTrue(lines.get(26).matches("000000_000000_000023_.=[0-9]+;22;-1"));
        assertTrue(lines.get(27).matches("000000_000001_000000_It=[0-9]+;15;-1"));
        assertTrue(lines.get(28).matches("000000_000001_000001_recommended=[0-9]+;19;-1"));
        assertTrue(lines.get(29).matches("000000_000001_000002_that=[0-9]+;4;-1"));
        assertTrue(lines.get(30).matches("000000_000001_000003_Fulton=[0-9]+;12;-1"));
        assertTrue(lines.get(31).matches("000000_000001_000004_legislators=[0-9]+;11;-1"));
        assertTrue(lines.get(32).matches("000000_000001_000005_act=[0-9]+;18;-1"));
        assertTrue(lines.get(33).matches("000000_000001_000006_``=[0-9]+;22;-1"));
        assertTrue(lines.get(34).matches("000000_000001_000007_to=[0-9]+;17;-1"));
        assertTrue(lines.get(35).matches("000000_000001_000008_have=[0-9]+;7;-1"));
        assertTrue(lines.get(36).matches("000000_000001_000009_these=[0-9]+;6;-1"));
        assertTrue(lines.get(37).matches("000000_000001_000010_laws=[0-9]+;11;-1"));
        assertTrue(lines.get(38).matches("000000_000001_000011_studied=[0-9]+;21;-1"));
        assertTrue(lines.get(39).matches("000000_000001_000012_and=[0-9]+;3;-1"));
        assertTrue(lines.get(40).matches("000000_000001_000013_revised=[0-9]+;21;-1"));
        assertTrue(lines.get(41).matches("000000_000001_000014_to=[0-9]+;8;-1"));
        assertTrue(lines.get(42).matches("000000_000001_000015_the=[0-9]+;1;-1"));
        assertTrue(lines.get(43).matches("000000_000001_000016_end=[0-9]+;10;-1"));
        assertTrue(lines.get(44).matches("000000_000001_000017_of=[0-9]+;8;-1"));
        assertTrue(lines.get(45).matches("000000_000001_000018_modernizing=[0-9]+;20;-1"));
        assertTrue(lines.get(46).matches("000000_000001_000019_and=[0-9]+;3;-1"));
        assertTrue(lines.get(47).matches("000000_000001_000020_improving=[0-9]+;20;-1"));
        assertTrue(lines.get(48).matches("000000_000001_000021_them=[0-9]+;14;-1"));
        assertTrue(lines.get(49).matches("000000_000001_000022_''=[0-9]+;22;-1"));
        assertTrue(lines.get(50).matches("000000_000001_000023_.=[0-9]+;22;-1"));
    }
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace) Test(org.junit.Test)

Example 85 with ParameterSpace

use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.

the class KerasDocumentTest method runTest.

@Test
public void runTest() throws Exception {
    DemoUtils.setDkproHome(KerasDocumentTrainTest.class.getSimpleName());
    boolean testConditon = true;
    String python3 = null;
    try {
        python3 = getEnvironment();
    } catch (Exception e) {
        System.err.println("Failed to locate Python with Keras - will skip this test case");
        testConditon = false;
    }
    if (testConditon) {
        ParameterSpace ps = KerasDocumentTrainTest.getParameterSpace(python3);
        KerasDocumentTrainTest.runTrainTest(ps);
        assertEquals(1, ContextMemoryReport.id2outcomeFiles.size());
        List<String> lines = FileUtils.readLines(ContextMemoryReport.id2outcomeFiles.get(0), "utf-8");
        assertEquals(11, lines.size());
        // line-wise compare
        assertEquals("#ID=PREDICTION;GOLDSTANDARD;THRESHOLD", lines.get(0));
        assertEquals("#labels 0=alt.atheism 1=comp.graphics", lines.get(1));
        assertTrue(lines.get(3).matches(".*alt.atheism/53068.txt=[0-9]+;0;-1"));
        assertTrue(lines.get(4).matches(".*alt.atheism/53257.txt=[0-9]+;0;-1"));
        assertTrue(lines.get(5).matches(".*alt.atheism/53260.txt=[0-9]+;0;-1"));
        assertTrue(lines.get(6).matches(".*alt.atheism/53261.txt=[0-9]+;0;-1"));
        assertTrue(lines.get(7).matches(".*comp.graphics/38758.txt=[0-9]+;1;-1"));
        assertTrue(lines.get(8).matches(".*comp.graphics/38761.txt=[0-9]+;1;-1"));
        assertTrue(lines.get(9).matches(".*comp.graphics/38762.txt=[0-9]+;1;-1"));
        assertTrue(lines.get(10).matches(".*comp.graphics/38763.txt=[0-9]+;1;-1"));
    }
}
Also used : ParameterSpace(org.dkpro.lab.task.ParameterSpace) Test(org.junit.Test)

Aggregations

ParameterSpace (org.dkpro.lab.task.ParameterSpace)130 HashMap (java.util.HashMap)60 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)51 Map (java.util.Map)45 Test (org.junit.Test)44 TcFeatureSet (org.dkpro.tc.api.features.TcFeatureSet)42 File (java.io.File)26 WekaAdapter (org.dkpro.tc.ml.weka.WekaAdapter)21 DefaultBatchTask (org.dkpro.lab.task.impl.DefaultBatchTask)12 ArrayList (java.util.ArrayList)10 LiblinearAdapter (org.dkpro.tc.ml.liblinear.LiblinearAdapter)9 NaiveBayes (weka.classifiers.bayes.NaiveBayes)9 TaskContext (org.dkpro.lab.engine.TaskContext)7 CrfSuiteAdapter (org.dkpro.tc.ml.crfsuite.CrfSuiteAdapter)7 LibsvmAdapter (org.dkpro.tc.ml.libsvm.LibsvmAdapter)7 List (java.util.List)6 XgboostAdapter (org.dkpro.tc.ml.xgboost.XgboostAdapter)6 FoldDimensionBundle (org.dkpro.lab.task.impl.FoldDimensionBundle)5 SMO (weka.classifiers.functions.SMO)5 Task (org.dkpro.lab.task.Task)4