use of org.dkpro.tc.api.features.TcFeatureSet in project dkpro-tc by dkpro.
the class CRFSuiteNERSequenceDemo method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(SequenceOutcomeReader.class, SequenceOutcomeReader.PARAM_LANGUAGE, "de", SequenceOutcomeReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, SequenceOutcomeReader.PARAM_TOKEN_INDEX, 1, SequenceOutcomeReader.PARAM_OUTCOME_INDEX, 2, SequenceOutcomeReader.PARAM_SKIP_LINES_START_WITH_STRING, "#", SequenceOutcomeReader.PARAM_PATTERNS, INCLUDE_PREFIX + "*.txt");
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(SequenceOutcomeReader.class, SequenceOutcomeReader.PARAM_LANGUAGE, "de", SequenceOutcomeReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, SequenceOutcomeReader.PARAM_TOKEN_INDEX, 1, SequenceOutcomeReader.PARAM_OUTCOME_INDEX, 2, SequenceOutcomeReader.PARAM_SKIP_LINES_START_WITH_STRING, "#", SequenceOutcomeReader.PARAM_PATTERNS, INCLUDE_PREFIX + "*.txt");
Map<String, Object> dimReaders = new HashMap<String, Object>();
dimReaders.put(DIM_READER_TRAIN, readerTrain);
dimReaders.put(DIM_READER_TEST, readerTest);
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new CrfSuiteAdapter(), CrfSuiteAdapter.ALGORITHM_LBFGS, "-p", "max_iterations=5" });
config.put(DIM_DATA_WRITER, new CrfSuiteAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new CrfSuiteAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(InitialCharacterUpperCase.class)));
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, Constants.LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, Constants.FM_SEQUENCE), dimFeatureSets, mlas);
return pSpace;
}
use of org.dkpro.tc.api.features.TcFeatureSet in project dkpro-tc by dkpro.
the class LiblinearDocumentPlain method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TEST, readerTest);
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet("DummyFeatureSet", TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 50, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new LiblinearAdapter(), "-s", "4", "-c", "100" });
config.put(DIM_DATA_WRITER, new LiblinearAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new LiblinearAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
return pSpace;
}
use of org.dkpro.tc.api.features.TcFeatureSet in project dkpro-tc by dkpro.
the class LibsvmDocumentPlain method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TEST, readerTest);
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet("DummyFeatureSet", TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class, WordNGram.PARAM_NGRAM_USE_TOP_K, 50, WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 3)));
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new LibsvmAdapter(), "-s", "1", "-c", "1000", "-t", "3" });
config.put(DIM_DATA_WRITER, new LibsvmAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new LibsvmAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, mlas);
return pSpace;
}
use of org.dkpro.tc.api.features.TcFeatureSet in project dkpro-tc by dkpro.
the class LibsvmBrownUnitPosDemo method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, TeiReader.PARAM_PATTERNS, new String[] { INCLUDE_PREFIX + "*.xml", INCLUDE_PREFIX + "*.xml.gz" });
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, TeiReader.PARAM_PATTERNS, new String[] { INCLUDE_PREFIX + "*.xml", INCLUDE_PREFIX + "*.xml.gz" });
dimReaders.put(DIM_READER_TEST, readerTest);
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(Constants.DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(CharacterNGram.class, CharacterNGram.PARAM_NGRAM_LOWER_CASE, false, CharacterNGram.PARAM_NGRAM_USE_TOP_K, 50)));
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new LibsvmAdapter(), "-c", "10" });
config.put(DIM_DATA_WRITER, new LibsvmAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new LibsvmAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_UNIT), dimFeatureSets, mlas);
return pSpace;
}
use of org.dkpro.tc.api.features.TcFeatureSet in project dkpro-tc by dkpro.
the class MultiSvmUsingWekaLibsvmLiblinear method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
// train/test will use both, while cross-validation will only use the
// train part
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TRAIN, readerTrain);
//
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt");
dimReaders.put(DIM_READER_TEST, readerTest);
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), SMO.class.getName(), "-C", "1.0", "-K", PolyKernel.class.getName() + " " + "-C -1 -E 2" });
config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Map<String, Object> config2 = new HashMap<>();
config2.put(DIM_CLASSIFICATION_ARGS, new Object[] { new LiblinearAdapter(), "-s", "4", "-c", "100" });
config2.put(DIM_DATA_WRITER, new LiblinearAdapter().getDataWriterClass().getName());
config2.put(DIM_FEATURE_USE_SPARSE, new LiblinearAdapter().useSparseFeatures());
Map<String, Object> config3 = new HashMap<>();
config3.put(DIM_CLASSIFICATION_ARGS, new Object[] { new LibsvmAdapter(), "-s", "1", "-c", "1000", "-t", "3" });
config3.put(DIM_DATA_WRITER, new LibsvmAdapter().getDataWriterClass().getName());
config3.put(DIM_FEATURE_USE_SPARSE, new LibsvmAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config, config2, config3);
Dimension<String> dimLearningMode = Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL);
Dimension<String> dimFeatureMode = Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT);
Dimension<TcFeatureSet> dimFeatureSet = Dimension.create(DIM_FEATURE_SET, getFeatureSet());
ParameterSpace ps = new ParameterSpace(dimLearningMode, dimFeatureMode, dimFeatureMode, dimFeatureSet, mlas, Dimension.createBundle(DIM_READERS, dimReaders));
return ps;
}
Aggregations