use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class SvmHmmBrownPosDemo method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
Map<String, Object> dimReaders = getDimReaders();
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(Constants.DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(CharacterNGram.class, CharacterNGram.PARAM_NGRAM_USE_TOP_K, 20, CharacterNGram.PARAM_NGRAM_MIN_N, 2, CharacterNGram.PARAM_NGRAM_MAX_N, 3)));
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new SvmHmmAdapter(), "-c", "5.0", "-t", "1", "-m", "0" });
config.put(DIM_DATA_WRITER, new SvmHmmAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new SvmHmmAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
return new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(Constants.DIM_LEARNING_MODE, Constants.LM_SINGLE_LABEL), Dimension.create(Constants.DIM_FEATURE_MODE, Constants.FM_SEQUENCE), dimFeatureSets, mlas);
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class WekaAblationDemo method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
// train/test will use both, while cross-validation will only use the
// train part
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TEST, readerTest);
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), NaiveBayes.class.getName() });
config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
Dimension<TcFeatureSet> dimFeatureSets = ExperimentUtil.getAblationTestFeatures(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(EmoticonRatio.class), TcFeatureFactory.create(NumberOfHashTags.class));
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
return pSpace;
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class XgboostRegression method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
// train/test will use both, while cross-validation will only use the train part
// The reader is also responsible for setting the labels/outcome on all
// documents/instances it creates.
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, "src/main/resources/data/essays/train/essay_train.txt", LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, "src/main/resources/data/essays/test/essay_test.txt", LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
dimReaders.put(DIM_READER_TEST, readerTest);
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(SentenceRatioPerDocument.class), TcFeatureFactory.create(TokenRatioPerDocument.class)));
Map<String, Object> xgboostConfig = new HashMap<>();
xgboostConfig.put(DIM_CLASSIFICATION_ARGS, new Object[] { new XgboostAdapter(), "booster=gbtree", "reg:linear" });
xgboostConfig.put(DIM_DATA_WRITER, new XgboostAdapter().getDataWriterClass().getName());
xgboostConfig.put(DIM_FEATURE_USE_SPARSE, new XgboostAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", xgboostConfig);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_REGRESSION), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
return pSpace;
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class KerasCrossValidation method runTest.
@Test
public void runTest() throws Exception {
DemoUtils.setDkproHome(KerasDocumentCrossValidation.class.getSimpleName());
boolean testConditon = true;
String python3 = null;
try {
python3 = getEnvironment();
} catch (Exception e) {
System.err.println("Failed to locate Python with Keras - will skip this test case");
testConditon = false;
}
if (testConditon) {
ParameterSpace ps = KerasDocumentCrossValidation.getParameterSpace(python3);
KerasDocumentCrossValidation.runCrossValidation(ps);
// The comined file in the CV folder + 2 single files in the per-fold folder
assertEquals(3, ContextMemoryReport.id2outcomeFiles.size());
assertTrue(ContextMemoryReport.id2outcomeFiles.get(0).exists());
assertTrue(ContextMemoryReport.id2outcomeFiles.get(1).exists());
assertTrue(ContextMemoryReport.id2outcomeFiles.get(2).exists());
}
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class KerasRegressionCrossValidation method runTest.
@Test
public void runTest() throws Exception {
DemoUtils.setDkproHome(KerasRegressionWassa.class.getSimpleName());
boolean testConditon = true;
String python3 = null;
try {
python3 = getEnvironment();
} catch (Exception e) {
System.err.println("Failed to locate Python with Keras - will skip this test case");
testConditon = false;
}
if (testConditon) {
ParameterSpace ps = KerasRegression.getParameterSpace(python3);
KerasRegression.runCrossValidation(ps);
EvaluationData<Double> data = Tc2LtlabEvalConverter.convertRegressionModeId2Outcome(ContextMemoryReport.crossValidationCombinedIdFiles.get(0));
SpearmanCorrelation spear = new SpearmanCorrelation(data);
assertTrue(spear.getResult() < 0.0);
}
}
Aggregations