use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class WekaTwentyNewsgroupsInstanceWeightingDemo method main.
public static void main(String[] args) throws Exception {
// This is used to ensure that the required DKPRO_HOME environment variable is set.
// Ensures that people can run the experiments even if they haven't read the setup
// instructions first :)
// Don't use this in real experiments! Read the documentation and set DKPRO_HOME as
// explained there.
DemoUtils.setDkproHome(WekaTwentyNewsgroupsInstanceWeightingDemo.class.getSimpleName());
ParameterSpace pSpace = getParameterSpace();
WekaTwentyNewsgroupsInstanceWeightingDemo experiment = new WekaTwentyNewsgroupsInstanceWeightingDemo();
experiment.runTrainTest(pSpace);
experiment.runCrossValidation(pSpace);
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class MekaComplexConfigurationMultiDemo method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(ReutersCorpusReader.class, ReutersCorpusReader.PARAM_SOURCE_LOCATION, FILEPATH_TRAIN, ReutersCorpusReader.PARAM_GOLD_LABEL_FILE, FILEPATH_GOLD_LABELS, ReutersCorpusReader.PARAM_LANGUAGE, LANGUAGE_CODE, ReutersCorpusReader.PARAM_PATTERNS, ReutersCorpusReader.INCLUDE_PREFIX + "*.txt");
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(ReutersCorpusReader.class, ReutersCorpusReader.PARAM_SOURCE_LOCATION, FILEPATH_TEST, ReutersCorpusReader.PARAM_GOLD_LABEL_FILE, FILEPATH_GOLD_LABELS, ReutersCorpusReader.PARAM_LANGUAGE, LANGUAGE_CODE, ReutersCorpusReader.PARAM_PATTERNS, ReutersCorpusReader.INCLUDE_PREFIX + "*.txt");
dimReaders.put(DIM_READER_TEST, readerTest);
// Config 1
Map<String, Object> config1 = new HashMap<>();
config1.put(DIM_CLASSIFICATION_ARGS, new Object[] { new MekaAdapter(), BR.class.getName(), "-W", NaiveBayes.class.getName() });
config1.put(DIM_DATA_WRITER, new MekaAdapter().getDataWriterClass().getName());
config1.put(DIM_FEATURE_USE_SPARSE, new MekaAdapter().useSparseFeatures());
Map<String, Object> config2 = new HashMap<>();
config2.put(DIM_CLASSIFICATION_ARGS, new Object[] { new MekaAdapter(), CCq.class.getName(), "-P", "0.9" });
config2.put(DIM_DATA_WRITER, new MekaAdapter().getDataWriterClass().getName());
config2.put(DIM_FEATURE_USE_SPARSE, new MekaAdapter().useSparseFeatures());
Map<String, Object> config3 = new HashMap<>();
config3.put(DIM_CLASSIFICATION_ARGS, new Object[] { new MekaAdapter(), PSUpdateable.class.getName(), "-B", "900", "-S", "9" });
config3.put(DIM_DATA_WRITER, new MekaAdapter().getDataWriterClass().getName());
config3.put(DIM_FEATURE_USE_SPARSE, new MekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config1, config2, config3);
// We configure 2 sets of feature extractors, one consisting of 2 extractors, and one with
// only one
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 600, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
// multi-label feature selection (Mulan specific options), reduces the feature set to 10
Map<String, Object> dimFeatureSelection = new HashMap<String, Object>();
dimFeatureSelection.put(DIM_LABEL_TRANSFORMATION_METHOD, "BinaryRelevanceAttributeEvaluator");
dimFeatureSelection.put(DIM_ATTRIBUTE_EVALUATOR_ARGS, asList(new String[] { InfoGainAttributeEval.class.getName() }));
dimFeatureSelection.put(DIM_NUM_LABELS_TO_KEEP, 10);
dimFeatureSelection.put(DIM_APPLY_FEATURE_SELECTION, true);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_MULTI_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), Dimension.create(DIM_BIPARTITION_THRESHOLD, BIPARTITION_THRESHOLD), dimFeatureSets, mlas, Dimension.createBundle("featureSelection", dimFeatureSelection));
return pSpace;
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class MekaSaveAndApplyModelMultilabelDemo method main.
public static void main(String[] args) throws Exception {
// This is used to ensure that the required DKPRO_HOME environment
// variable is set.
// Ensures that people can run the experiments even if they haven't read
// the setup
// instructions first :)
// Don't use this in real experiments! Read the documentation and set
// DKPRO_HOME as
// explained there.
DemoUtils.setDkproHome(MekaSaveAndApplyModelMultilabelDemo.class.getSimpleName());
ParameterSpace pSpace = getParameterSpace();
MekaSaveAndApplyModelMultilabelDemo experiment = new MekaSaveAndApplyModelMultilabelDemo();
experiment.runSaveModel(pSpace);
experiment.applyStoredModel("An example sentence. And another one.");
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class DynetSeq2SeqTrainTest method getParameterSpace.
public static ParameterSpace getParameterSpace(String python3) throws ResourceInitializationException {
// configure training and test data reader dimension
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription train = CollectionReaderFactory.createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, TeiReader.PARAM_PATTERNS, "*.xml");
dimReaders.put(DIM_READER_TRAIN, train);
// Careful - we need at least 2 sequences in the testing file otherwise
// things will crash
CollectionReaderDescription test = CollectionReaderFactory.createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, TeiReader.PARAM_PATTERNS, "*.xml");
dimReaders.put(DIM_READER_TEST, test);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_FEATURE_MODE, Constants.FM_SEQUENCE), Dimension.create(DIM_LEARNING_MODE, Constants.LM_SINGLE_LABEL), Dimension.create(DeepLearningConstants.DIM_PYTHON_INSTALLATION, python3), Dimension.create(DeepLearningConstants.DIM_PRETRAINED_EMBEDDINGS, "src/test/resources/wordvector/glove.6B.50d_250.txt"), Dimension.create(DeepLearningConstants.DIM_RAM_WORKING_MEMORY, "4096"), Dimension.create(DeepLearningConstants.DIM_VECTORIZE_TO_INTEGER, false), Dimension.create(DeepLearningConstants.DIM_USER_CODE, "src/main/resources/dynetCode/dynetPoStagger.py"));
return pSpace;
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class KerasSeq2SeqCrossValidation method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription train = CollectionReaderFactory.createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, TeiReader.PARAM_PATTERNS, asList(INCLUDE_PREFIX + "a01.xml"));
dimReaders.put(DIM_READER_TRAIN, train);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_FEATURE_MODE, Constants.FM_SEQUENCE), Dimension.create(DIM_LEARNING_MODE, Constants.LM_SINGLE_LABEL), Dimension.create(DeepLearningConstants.DIM_PYTHON_INSTALLATION, "/usr/local/bin/python3"), Dimension.create(DeepLearningConstants.DIM_MAXIMUM_LENGTH, 75), Dimension.create(DeepLearningConstants.DIM_VECTORIZE_TO_INTEGER, true), Dimension.create(DeepLearningConstants.DIM_USER_CODE, "src/main/resources/kerasCode/seq/posTaggingLstm.py"));
return pSpace;
}
Aggregations