use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class WekaTwentyNewsgroupsInstanceWeightingDemo method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
// train/test will use both, while cross-validation will only use the train part
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(WeightedTwentyNewsgroupsCorpusReader.class, WeightedTwentyNewsgroupsCorpusReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, WeightedTwentyNewsgroupsCorpusReader.PARAM_WEIGHT_FILE_LOCATION, corpusFilePathTrain + weightsFile, WeightedTwentyNewsgroupsCorpusReader.PARAM_LANGUAGE, LANGUAGE_CODE, WeightedTwentyNewsgroupsCorpusReader.PARAM_PATTERNS, Arrays.asList(WeightedTwentyNewsgroupsCorpusReader.INCLUDE_PREFIX + "*/*.txt"));
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(WeightedTwentyNewsgroupsCorpusReader.class, WeightedTwentyNewsgroupsCorpusReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, WeightedTwentyNewsgroupsCorpusReader.PARAM_WEIGHT_FILE_LOCATION, corpusFilePathTest + weightsFile, WeightedTwentyNewsgroupsCorpusReader.PARAM_LANGUAGE, LANGUAGE_CODE, WeightedTwentyNewsgroupsCorpusReader.PARAM_PATTERNS, WeightedTwentyNewsgroupsCorpusReader.INCLUDE_PREFIX + "*/*.txt");
dimReaders.put(DIM_READER_TEST, readerTest);
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 50, WordNGram.PARAM_NGRAM_MIN_N, 2, WordNGram.PARAM_NGRAM_MAX_N, 3)));
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), SMO.class.getName() });
config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas, Dimension.create(DIM_APPLY_INSTANCE_WEIGHTING, true));
return pSpace;
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class MekaComplexConfigurationMultiDemo method main.
/*
* Starts the experiment.
*/
public static void main(String[] args) throws Exception {
// This is used to ensure that the required DKPRO_HOME environment variable is set.
// Ensures that people can run the experiments even if they haven't read the setup
// instructions first :)
// Don't use this in real experiments! Read the documentation and set DKPRO_HOME as
// explained there.
DemoUtils.setDkproHome(MekaComplexConfigurationMultiDemo.class.getSimpleName());
ParameterSpace pSpace = getParameterSpace();
MekaComplexConfigurationMultiDemo experiment = new MekaComplexConfigurationMultiDemo();
experiment.runTrainTest(pSpace);
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class PairModeWekaDemo method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
// train/test will use both, while cross-validation will only use the train part
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(PairTwentyNewsgroupsReader.class, PairTwentyNewsgroupsReader.PARAM_LISTFILE, listFilePathTrain, PairTwentyNewsgroupsReader.PARAM_LANGUAGE_CODE, languageCode);
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(PairTwentyNewsgroupsReader.class, PairTwentyNewsgroupsReader.PARAM_LISTFILE, listFilePathTest, PairTwentyNewsgroupsReader.PARAM_LANGUAGE_CODE, languageCode);
dimReaders.put(DIM_READER_TEST, readerTest);
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(DiffNrOfTokensPairFeatureExtractor.class)));
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), NaiveBayes.class.getName() });
config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_PAIR), dimFeatureSets, mlas);
return pSpace;
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class PairModeWekaDemo method main.
public static void main(String[] args) throws Exception {
// This is used to ensure that the required DKPRO_HOME environment variable is set.
// Ensures that people can run the experiments even if they haven't read the setup
// instructions first :)
// Don't use this in real experiments! Read the documentation and set DKPRO_HOME as
// explained there.
DemoUtils.setDkproHome(PairModeWekaDemo.class.getSimpleName());
ParameterSpace pSpace = getParameterSpace();
PairModeWekaDemo experiment = new PairModeWekaDemo();
experiment.runTrainTest(pSpace);
}
use of org.dkpro.lab.task.ParameterSpace in project dkpro-tc by dkpro.
the class MekaSaveAndApplyModelMultilabelDemo method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
// train/test will use both, while cross-validation will only use the
// train part
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(ReutersCorpusReader.class, ReutersCorpusReader.PARAM_SOURCE_LOCATION, FILEPATH_TRAIN, ReutersCorpusReader.PARAM_GOLD_LABEL_FILE, FILEPATH_GOLD_LABELS, ReutersCorpusReader.PARAM_LANGUAGE, LANGUAGE_CODE, ReutersCorpusReader.PARAM_PATTERNS, ReutersCorpusReader.INCLUDE_PREFIX + "*.txt");
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(ReutersCorpusReader.class, ReutersCorpusReader.PARAM_SOURCE_LOCATION, FILEPATH_TEST, ReutersCorpusReader.PARAM_GOLD_LABEL_FILE, FILEPATH_GOLD_LABELS, ReutersCorpusReader.PARAM_LANGUAGE, LANGUAGE_CODE, ReutersCorpusReader.PARAM_PATTERNS, ReutersCorpusReader.INCLUDE_PREFIX + "*.txt");
dimReaders.put(DIM_READER_TEST, readerTest);
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new MekaAdapter(), MULAN.class.getName(), "-S", "RAkEL2", "-W", RandomForest.class.getName() });
config.put(DIM_DATA_WRITER, new MekaAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new MekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 100, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_MULTI_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), Dimension.create(DIM_BIPARTITION_THRESHOLD, BIPARTITION_THRESHOLD), dimFeatureSets, mlas);
return pSpace;
}
Aggregations