Search in sources :

Example 71 with CollectionReaderDescription

use of org.apache.uima.collection.CollectionReaderDescription in project dkpro-tc by dkpro.

the class XgboostSaveAndLoadModelDocumentRegression method regressionGetParameterSpace.

private ParameterSpace regressionGetParameterSpace() throws Exception {
    Map<String, Object> dimReaders = new HashMap<String, Object>();
    CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en", LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, "src/main/resources/data/essays/train/essay_train.txt", LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
    dimReaders.put(DIM_READER_TRAIN, readerTrain);
    @SuppressWarnings("unchecked") Dimension<List<Object>> dimClassificationArgs = Dimension.create(DIM_CLASSIFICATION_ARGS, Arrays.asList(new Object[] { new XgboostAdapter(), "booster=gblinear", "reg:logistic" }));
    Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(SentenceRatioPerDocument.class), TcFeatureFactory.create(WordNGram.class), TcFeatureFactory.create(TokenRatioPerDocument.class)));
    ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle("readers", dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_REGRESSION), Dimension.create(DIM_FEATURE_MODE, FM_DOCUMENT), dimFeatureSets, dimClassificationArgs);
    return pSpace;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) HashMap(java.util.HashMap) ParameterSpace(org.dkpro.lab.task.ParameterSpace) XgboostAdapter(org.dkpro.tc.ml.xgboost.XgboostAdapter) ArrayList(java.util.ArrayList) List(java.util.List) TcFeatureSet(org.dkpro.tc.api.features.TcFeatureSet)

Example 72 with CollectionReaderDescription

use of org.apache.uima.collection.CollectionReaderDescription in project dkpro-tc by dkpro.

the class LuceneMetaCollectionBasedFeatureTestBase method runFeatureExtractor.

protected void runFeatureExtractor(File luceneFolder, AnalysisEngineDescription featureExtractor) throws Exception {
    CollectionReaderDescription reader = getFeatureReader();
    AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);
    SimplePipeline.runPipeline(reader, segmenter, featureExtractor);
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription)

Example 73 with CollectionReaderDescription

use of org.apache.uima.collection.CollectionReaderDescription in project dkpro-tc by dkpro.

the class NgramUnitTest method runFeatureExtractor.

private File runFeatureExtractor(File luceneFolder) throws Exception {
    File outputPath = folder.newFolder();
    Object[] parameters = new Object[] { WordNGram.PARAM_UNIQUE_EXTRACTOR_NAME, EXTRACTOR_NAME, WordNGram.PARAM_NGRAM_USE_TOP_K, "1", WordNGram.PARAM_SOURCE_LOCATION, luceneFolder.toString(), WordNGramMC.PARAM_TARGET_LOCATION, luceneFolder.toString(), WordNGram.PARAM_NGRAM_MIN_N, "1", WordNGram.PARAM_NGRAM_MAX_N, "1" };
    ExternalResourceDescription featureExtractor = ExternalResourceFactory.createExternalResourceDescription(WordNGram.class, parameters);
    List<ExternalResourceDescription> fes = new ArrayList<>();
    fes.add(featureExtractor);
    CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TestReaderSingleLabelDocumentReader.class, TestReaderSingleLabelDocumentReader.PARAM_LANGUAGE, "en", TestReaderSingleLabelDocumentReader.PARAM_SOURCE_LOCATION, "src/test/resources/ngrams/text3.txt", TestReaderSingleLabelDocumentReader.PARAM_SUPPRESS_DOCUMENT_ANNOTATION, true);
    AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngineDescription unitAnno = AnalysisEngineFactory.createEngineDescription(EachTokenAsUnitAnnotator.class);
    AnalysisEngineDescription featExtractorConnector = TaskUtils.getFeatureExtractorConnector(outputPath.getAbsolutePath(), JsonDataWriter.class.getName(), Constants.LM_SINGLE_LABEL, Constants.FM_UNIT, false, false, false, false, Collections.emptyList(), fes, new String[] {});
    SimplePipeline.runPipeline(reader, segmenter, unitAnno, featExtractorConnector);
    return outputPath;
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) JsonDataWriter(org.dkpro.tc.core.io.JsonDataWriter) ArrayList(java.util.ArrayList) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) File(java.io.File) ExternalResourceDescription(org.apache.uima.resource.ExternalResourceDescription)

Example 74 with CollectionReaderDescription

use of org.apache.uima.collection.CollectionReaderDescription in project dkpro-tc by dkpro.

the class NgramUnitTest method runMetaCollection.

private void runMetaCollection(File luceneFolder) throws Exception {
    Object[] parameters = new Object[] { WordNGram.PARAM_UNIQUE_EXTRACTOR_NAME, EXTRACTOR_NAME, WordNGram.PARAM_NGRAM_USE_TOP_K, 1, WordNGram.PARAM_SOURCE_LOCATION, luceneFolder.toString(), WordNGramMC.PARAM_TARGET_LOCATION, luceneFolder.toString(), WordNGram.PARAM_NGRAM_MIN_N, 1, WordNGram.PARAM_NGRAM_MAX_N, 1 };
    List<Object> parameterList = new ArrayList<Object>(Arrays.asList(parameters));
    CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TestReaderSingleLabelDocumentReader.class, TestReaderSingleLabelDocumentReader.PARAM_LANGUAGE, "en", TestReaderSingleLabelDocumentReader.PARAM_SOURCE_LOCATION, "src/test/resources/ngrams/text3.txt");
    AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngineDescription metaCollector = AnalysisEngineFactory.createEngineDescription(WordNGramMC.class, parameterList.toArray());
    // run meta collector
    SimplePipeline.runPipeline(reader, segmenter, metaCollector);
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) ArrayList(java.util.ArrayList) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription)

Example 75 with CollectionReaderDescription

use of org.apache.uima.collection.CollectionReaderDescription in project dkpro-tc by dkpro.

the class PosNGramTest method runFeatureExtractor.

protected void runFeatureExtractor(File luceneFolder, AnalysisEngineDescription featureExtractor) throws Exception {
    CollectionReaderDescription reader = getFeatureReader();
    AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngineDescription posTagger = AnalysisEngineFactory.createEngineDescription(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_LANGUAGE, "en");
    SimplePipeline.runPipeline(reader, segmenter, posTagger, featureExtractor);
}
Also used : CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription)

Aggregations

CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)78 HashMap (java.util.HashMap)53 ParameterSpace (org.dkpro.lab.task.ParameterSpace)51 TcFeatureSet (org.dkpro.tc.api.features.TcFeatureSet)40 Map (java.util.Map)35 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)25 File (java.io.File)19 WekaAdapter (org.dkpro.tc.ml.weka.WekaAdapter)17 Test (org.junit.Test)14 ArrayList (java.util.ArrayList)13 LiblinearAdapter (org.dkpro.tc.ml.liblinear.LiblinearAdapter)9 NaiveBayes (weka.classifiers.bayes.NaiveBayes)9 ExternalResourceDescription (org.apache.uima.resource.ExternalResourceDescription)7 LibsvmAdapter (org.dkpro.tc.ml.libsvm.LibsvmAdapter)7 Gson (com.google.gson.Gson)6 Instance (org.dkpro.tc.api.features.Instance)6 JsonDataWriter (org.dkpro.tc.core.io.JsonDataWriter)6 XgboostAdapter (org.dkpro.tc.ml.xgboost.XgboostAdapter)6 JCasIterable (org.apache.uima.fit.pipeline.JCasIterable)5 JCas (org.apache.uima.jcas.JCas)5