use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class WekaPairRegressionDemo method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training data reader dimension
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(STSReader.class, STSReader.PARAM_INPUT_FILE, inputFileTrain, STSReader.PARAM_GOLD_FILE, goldFileTrain);
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(STSReader.class, STSReader.PARAM_INPUT_FILE, inputFileTest, STSReader.PARAM_GOLD_FILE, goldFileTest);
dimReaders.put(DIM_READER_TEST, readerTest);
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(DiffNrOfTokensPairFeatureExtractor.class)));
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), SMOreg.class.getName() });
config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle(Constants.DIM_READER_TRAIN, dimReaders), Dimension.create(Constants.DIM_FEATURE_MODE, Constants.FM_PAIR), Dimension.create(Constants.DIM_LEARNING_MODE, Constants.LM_REGRESSION), dimFeatureSets, mlas);
return pSpace;
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class XgboostDocumentPlain method main.
public static void main(String[] args) throws Exception {
DemoUtils.setDkproHome("target/");
ParameterSpace pSpace = getParameterSpace();
XgboostDocumentPlain experiment = new XgboostDocumentPlain();
experiment.runTrainTest(pSpace);
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class XgboostDocumentPlain method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
// train/test will use both, while cross-validation will only use the
// train part
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TEST, readerTest);
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet("DummyFeatureSet", TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 20, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
Map<String, Object> xgboostConfig = new HashMap<>();
xgboostConfig.put(DIM_CLASSIFICATION_ARGS, new Object[] { new XgboostAdapter(), "objective=multi:softmax" });
xgboostConfig.put(DIM_DATA_WRITER, new XgboostAdapter().getDataWriterClass().getName());
xgboostConfig.put(DIM_FEATURE_USE_SPARSE, new XgboostAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", xgboostConfig);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
return pSpace;
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class DyNetSequenceTest method runTest.
@Test
public void runTest() throws Exception {
DemoUtils.setDkproHome(DynetSeq2SeqTrainTest.class.getSimpleName());
boolean testConditon = true;
String python3 = null;
try {
python3 = getEnvironment();
} catch (Exception e) {
System.err.println("Failed to locate Python with Keras - will skip this test case");
testConditon = false;
}
if (testConditon) {
ParameterSpace ps = DynetSeq2SeqTrainTest.getParameterSpace(python3);
DynetSeq2SeqTrainTest.runTrainTest(ps);
assertEquals(1, ContextMemoryReport.id2outcomeFiles.size());
List<String> lines = FileUtils.readLines(ContextMemoryReport.id2outcomeFiles.get(0), "utf-8");
assertEquals(51, lines.size());
// line-wise compare
assertEquals("#ID=PREDICTION;GOLDSTANDARD;THRESHOLD", lines.get(0));
assertEquals("#labels 0=AP 1=AT 2=BER 3=CC 4=CS 5=DOD 6=DTS 7=HV 8=IN 9=JJ 10=NN 11=NNS 12=NP 13=NPg 14=PPO 15=PPS 16=RB 17=TO 18=VB 19=VBD 20=VBG 21=VBN 22=pct", lines.get(1));
assertTrue(lines.get(3).matches("000000_000000_000000_The=[0-9]+;1;-1"));
assertTrue(lines.get(4).matches("000000_000000_000001_jury=[0-9]+;10;-1"));
assertTrue(lines.get(5).matches("000000_000000_000002_said=[0-9]+;19;-1"));
assertTrue(lines.get(6).matches("000000_000000_000003_it=[0-9]+;15;-1"));
assertTrue(lines.get(7).matches("000000_000000_000004_did=[0-9]+;5;-1"));
assertTrue(lines.get(8).matches("000000_000000_000005_find=[0-9]+;18;-1"));
assertTrue(lines.get(9).matches("000000_000000_000006_that=[0-9]+;4;-1"));
assertTrue(lines.get(10).matches("000000_000000_000007_many=[0-9]+;0;-1"));
assertTrue(lines.get(11).matches("000000_000000_000008_of=[0-9]+;8;-1"));
assertTrue(lines.get(12).matches("000000_000000_000009_Georgia's=[0-9]+;13;-1"));
assertTrue(lines.get(13).matches("000000_000000_000010_registration=[0-9]+;10;-1"));
assertTrue(lines.get(14).matches("000000_000000_000011_and=[0-9]+;3;-1"));
assertTrue(lines.get(15).matches("000000_000000_000012_election=[0-9]+;10;-1"));
assertTrue(lines.get(16).matches("000000_000000_000013_laws=[0-9]+;11;-1"));
assertTrue(lines.get(17).matches("000000_000000_000014_``=[0-9]+;22;-1"));
assertTrue(lines.get(18).matches("000000_000000_000015_are=[0-9]+;2;-1"));
assertTrue(lines.get(19).matches("000000_000000_000016_outmoded=[0-9]+;9;-1"));
assertTrue(lines.get(20).matches("000000_000000_000017_or=[0-9]+;3;-1"));
assertTrue(lines.get(21).matches("000000_000000_000018_inadequate=[0-9]+;9;-1"));
assertTrue(lines.get(22).matches("000000_000000_000019_and=[0-9]+;3;-1"));
assertTrue(lines.get(23).matches("000000_000000_000020_often=[0-9]+;16;-1"));
assertTrue(lines.get(24).matches("000000_000000_000021_ambiguous=[0-9]+;9;-1"));
assertTrue(lines.get(25).matches("000000_000000_000022_''=[0-9]+;22;-1"));
assertTrue(lines.get(26).matches("000000_000000_000023_.=[0-9]+;22;-1"));
assertTrue(lines.get(27).matches("000000_000001_000000_It=[0-9]+;15;-1"));
assertTrue(lines.get(28).matches("000000_000001_000001_recommended=[0-9]+;19;-1"));
assertTrue(lines.get(29).matches("000000_000001_000002_that=[0-9]+;4;-1"));
assertTrue(lines.get(30).matches("000000_000001_000003_Fulton=[0-9]+;12;-1"));
assertTrue(lines.get(31).matches("000000_000001_000004_legislators=[0-9]+;11;-1"));
assertTrue(lines.get(32).matches("000000_000001_000005_act=[0-9]+;18;-1"));
assertTrue(lines.get(33).matches("000000_000001_000006_``=[0-9]+;22;-1"));
assertTrue(lines.get(34).matches("000000_000001_000007_to=[0-9]+;17;-1"));
assertTrue(lines.get(35).matches("000000_000001_000008_have=[0-9]+;7;-1"));
assertTrue(lines.get(36).matches("000000_000001_000009_these=[0-9]+;6;-1"));
assertTrue(lines.get(37).matches("000000_000001_000010_laws=[0-9]+;11;-1"));
assertTrue(lines.get(38).matches("000000_000001_000011_studied=[0-9]+;21;-1"));
assertTrue(lines.get(39).matches("000000_000001_000012_and=[0-9]+;3;-1"));
assertTrue(lines.get(40).matches("000000_000001_000013_revised=[0-9]+;21;-1"));
assertTrue(lines.get(41).matches("000000_000001_000014_to=[0-9]+;8;-1"));
assertTrue(lines.get(42).matches("000000_000001_000015_the=[0-9]+;1;-1"));
assertTrue(lines.get(43).matches("000000_000001_000016_end=[0-9]+;10;-1"));
assertTrue(lines.get(44).matches("000000_000001_000017_of=[0-9]+;8;-1"));
assertTrue(lines.get(45).matches("000000_000001_000018_modernizing=[0-9]+;20;-1"));
assertTrue(lines.get(46).matches("000000_000001_000019_and=[0-9]+;3;-1"));
assertTrue(lines.get(47).matches("000000_000001_000020_improving=[0-9]+;20;-1"));
assertTrue(lines.get(48).matches("000000_000001_000021_them=[0-9]+;14;-1"));
assertTrue(lines.get(49).matches("000000_000001_000022_''=[0-9]+;22;-1"));
assertTrue(lines.get(50).matches("000000_000001_000023_.=[0-9]+;22;-1"));
}
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class KerasDocumentTest method runTest.
@Test
public void runTest() throws Exception {
DemoUtils.setDkproHome(KerasDocumentTrainTest.class.getSimpleName());
boolean testConditon = true;
String python3 = null;
try {
python3 = getEnvironment();
} catch (Exception e) {
System.err.println("Failed to locate Python with Keras - will skip this test case");
testConditon = false;
}
if (testConditon) {
ParameterSpace ps = KerasDocumentTrainTest.getParameterSpace(python3);
KerasDocumentTrainTest.runTrainTest(ps);
assertEquals(1, ContextMemoryReport.id2outcomeFiles.size());
List<String> lines = FileUtils.readLines(ContextMemoryReport.id2outcomeFiles.get(0), "utf-8");
assertEquals(11, lines.size());
// line-wise compare
assertEquals("#ID=PREDICTION;GOLDSTANDARD;THRESHOLD", lines.get(0));
assertEquals("#labels 0=alt.atheism 1=comp.graphics", lines.get(1));
assertTrue(lines.get(3).matches(".*alt.atheism/53068.txt=[0-9]+;0;-1"));
assertTrue(lines.get(4).matches(".*alt.atheism/53257.txt=[0-9]+;0;-1"));
assertTrue(lines.get(5).matches(".*alt.atheism/53260.txt=[0-9]+;0;-1"));
assertTrue(lines.get(6).matches(".*alt.atheism/53261.txt=[0-9]+;0;-1"));
assertTrue(lines.get(7).matches(".*comp.graphics/38758.txt=[0-9]+;1;-1"));
assertTrue(lines.get(8).matches(".*comp.graphics/38761.txt=[0-9]+;1;-1"));
assertTrue(lines.get(9).matches(".*comp.graphics/38762.txt=[0-9]+;1;-1"));
assertTrue(lines.get(10).matches(".*comp.graphics/38763.txt=[0-9]+;1;-1"));
}
}
Aggregations