use of weka.classifiers.trees.J48 in project dkpro-tc by dkpro.
the class WekaComplexConfigurationSingleDemo method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, CORPUS_FILEPATH_TRAIN, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, COPRUS_FILEPATH_TEST, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TEST, readerTest);
Map<String, Object> config1 = new HashMap<>();
config1.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), SMO.class.getName(), "-C", "1.0", "-K", PolyKernel.class.getName() + " " + "-C -1 -E 2" });
config1.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
config1.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Map<String, Object> config2 = new HashMap<>();
config2.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), RandomForest.class.getName(), "-I", "5" });
config2.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
config2.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Map<String, Object> config3 = new HashMap<>();
config3.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), Bagging.class.getName(), "-I", "2", "-W", J48.class.getName(), "--", "-C", "0.5", "-M", "2" });
config3.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
config3.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config1, config2, config3);
// We configure 2 sets of feature extractors, one consisting of 3 extractors, and one with
// only 1
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 50, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)), new TcFeatureSet(TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 50, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
// single-label feature selection (Weka specific options), reduces the feature set to 10
Map<String, Object> dimFeatureSelection = new HashMap<String, Object>();
dimFeatureSelection.put(DIM_FEATURE_SEARCHER_ARGS, asList(new String[] { Ranker.class.getName(), "-N", "10" }));
dimFeatureSelection.put(DIM_ATTRIBUTE_EVALUATOR_ARGS, asList(new String[] { InfoGainAttributeEval.class.getName() }));
dimFeatureSelection.put(DIM_APPLY_FEATURE_SELECTION, true);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas, Dimension.createBundle("featureSelection", dimFeatureSelection));
return pSpace;
}
use of weka.classifiers.trees.J48 in project dkpro-tc by dkpro.
the class WekaResultsTest method testWekaResultsMultiLabel.
@Test
public void testWekaResultsMultiLabel() throws Exception {
BR cl = new BR();
cl.setOptions(new String[] { "-W", J48.class.getName() });
Instances testData = WekaUtils.makeOutcomeClassesCompatible(multiLabelTrainData, multiLabelTestData, true);
Instances trainData = WekaUtils.removeInstanceId(multiLabelTrainData, true);
testData = WekaUtils.removeInstanceId(testData, true);
cl.buildClassifier(trainData);
Result eval = WekaUtils.getEvaluationMultilabel(cl, trainData, testData, "0.2");
assertEquals(16.0, eval.L, 0.01);
assertEquals(0.0, (Double) Result.getStats(eval, "1").get("Exact match"), 0.01);
}
Aggregations