use of org.apache.uima.resource.ExternalResourceDescription in project dkpro-tc by dkpro.
the class KeywordNGramTest method initialize.
private List<Instance> initialize(boolean includeComma, boolean markSentenceLocation) throws Exception {
File luceneFolder = folder.newFolder();
File outputPath = folder.newFolder();
Object[] parameters = new Object[] { KeywordNGram.PARAM_UNIQUE_EXTRACTOR_NAME, "123", KeywordNGram.PARAM_NGRAM_KEYWORDS_FILE, "src/test/resources/data/keywordlist.txt", KeywordNGram.PARAM_SOURCE_LOCATION, luceneFolder, KeywordNGramMC.PARAM_TARGET_LOCATION, luceneFolder, KeywordNGram.PARAM_KEYWORD_NGRAM_MARK_SENTENCE_LOCATION, markSentenceLocation, KeywordNGram.PARAM_KEYWORD_NGRAM_INCLUDE_COMMAS, includeComma };
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TestReaderSingleLabelDocumentReader.class, TestReaderSingleLabelDocumentReader.PARAM_SOURCE_LOCATION, "src/test/resources/ngrams/trees.txt");
AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngineDescription metaCollector = AnalysisEngineFactory.createEngineDescription(KeywordNGramMC.class, parameters);
ExternalResourceDescription featureExtractor = ExternalResourceFactory.createExternalResourceDescription(KeywordNGram.class, toString(parameters));
List<ExternalResourceDescription> fes = new ArrayList<>();
fes.add(featureExtractor);
AnalysisEngineDescription featExtractorConnector = TaskUtils.getFeatureExtractorConnector(outputPath.getAbsolutePath(), JsonDataWriter.class.getName(), Constants.LM_SINGLE_LABEL, Constants.FM_DOCUMENT, false, false, false, false, Collections.emptyList(), fes, new String[] {});
// run meta collector
SimplePipeline.runPipeline(reader, segmenter, metaCollector);
// run FE(s)
SimplePipeline.runPipeline(reader, segmenter, featExtractorConnector);
Gson gson = new Gson();
List<String> lines = FileUtils.readLines(new File(outputPath, JsonDataWriter.JSON_FILE_NAME), "utf-8");
List<Instance> instances = new ArrayList<>();
for (String l : lines) {
instances.add(gson.fromJson(l, Instance.class));
}
assertEquals(1, instances.size());
return instances;
}
use of org.apache.uima.resource.ExternalResourceDescription in project dkpro-tc by dkpro.
the class KeywordPPipelineTest method getFeatureExtractorCollector.
@Override
protected void getFeatureExtractorCollector(List<Object> parameterList) throws ResourceInitializationException {
ExternalResourceDescription featureExtractor = ExternalResourceFactory.createExternalResourceDescription(LuceneKeywordPFE.class, toString(parameterList.toArray()));
List<ExternalResourceDescription> fes = new ArrayList<>();
fes.add(featureExtractor);
featExtractorConnector = TaskUtils.getFeatureExtractorConnector(outputPath.getAbsolutePath(), JsonDataWriter.class.getName(), Constants.LM_SINGLE_LABEL, Constants.FM_PAIR, false, false, false, false, Collections.emptyList(), fes, new String[] {});
}
use of org.apache.uima.resource.ExternalResourceDescription in project dkpro-tc by dkpro.
the class NGramPPipelineTest method getFeatureExtractorCollector.
@Override
protected void getFeatureExtractorCollector(List<Object> parameterList) throws ResourceInitializationException {
ExternalResourceDescription featureExtractor = ExternalResourceFactory.createExternalResourceDescription(LuceneNGramPFE.class, toString(parameterList.toArray()));
List<ExternalResourceDescription> fes = new ArrayList<>();
fes.add(featureExtractor);
featExtractorConnector = TaskUtils.getFeatureExtractorConnector(outputPath.getAbsolutePath(), JsonDataWriter.class.getName(), Constants.LM_SINGLE_LABEL, Constants.FM_PAIR, false, false, false, false, Collections.emptyList(), fes, new String[] {});
}
use of org.apache.uima.resource.ExternalResourceDescription in project dkpro-tc by dkpro.
the class KeywordCPPipelineTest method getFeatureExtractorCollector.
@Override
protected void getFeatureExtractorCollector(List<Object> parameterList) throws ResourceInitializationException {
ExternalResourceDescription featureExtractor = ExternalResourceFactory.createExternalResourceDescription(LuceneKeywordCPFE.class, toString(parameterList.toArray()));
List<ExternalResourceDescription> fes = new ArrayList<>();
fes.add(featureExtractor);
featExtractorConnector = TaskUtils.getFeatureExtractorConnector(outputPath.getAbsolutePath(), JsonDataWriter.class.getName(), Constants.LM_SINGLE_LABEL, Constants.FM_PAIR, false, false, false, false, Collections.emptyList(), fes, new String[] {});
}
use of org.apache.uima.resource.ExternalResourceDescription in project dkpro-tc by dkpro.
the class NGramCPPipelineTest method getFeatureExtractorCollector.
@Override
protected void getFeatureExtractorCollector(List<Object> parameterList) throws ResourceInitializationException {
ExternalResourceDescription featureExtractor = ExternalResourceFactory.createExternalResourceDescription(LuceneNGramCPFE.class, toString(parameterList.toArray()));
List<ExternalResourceDescription> fes = new ArrayList<>();
fes.add(featureExtractor);
featExtractorConnector = TaskUtils.getFeatureExtractorConnector(outputPath.getAbsolutePath(), JsonDataWriter.class.getName(), Constants.LM_SINGLE_LABEL, Constants.FM_PAIR, false, false, false, false, Collections.emptyList(), fes, new String[] {});
}
Aggregations