use of org.apache.uima.resource.ExternalResourceDescription in project dkpro-tc by dkpro.
the class WekaExternalResourceDemo method getParameterSpace.
public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
// configure training and test data reader dimension
// train/test will use both, while cross-validation will only use the
// train part
Map<String, Object> dimReaders = new HashMap<String, Object>();
CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(PairTwentyNewsgroupsReader.class, PairTwentyNewsgroupsReader.PARAM_LISTFILE, listFilePathTrain, PairTwentyNewsgroupsReader.PARAM_LANGUAGE_CODE, languageCode);
dimReaders.put(DIM_READER_TRAIN, readerTrain);
CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(PairTwentyNewsgroupsReader.class, PairTwentyNewsgroupsReader.PARAM_LISTFILE, listFilePathTest, PairTwentyNewsgroupsReader.PARAM_LANGUAGE_CODE, languageCode);
dimReaders.put(DIM_READER_TEST, readerTest);
// Create the External Resource here:
ExternalResourceDescription gstResource = ExternalResourceFactory.createExternalResourceDescription(CosineSimilarityResource.class, CosineSimilarityResource.PARAM_NORMALIZATION, NormalizationMode.L2.toString());
Dimension<TcFeatureSet> dimFeatureSets = Dimension.create(DIM_FEATURE_SET, new TcFeatureSet(TcFeatureFactory.create(SimilarityPairFeatureExtractor.class, SimilarityPairFeatureExtractor.PARAM_TEXT_SIMILARITY_RESOURCE, gstResource)));
Map<String, Object> config = new HashMap<>();
config.put(DIM_CLASSIFICATION_ARGS, new Object[] { new WekaAdapter(), SMO.class.getName() });
config.put(DIM_DATA_WRITER, new WekaAdapter().getDataWriterClass().getName());
config.put(DIM_FEATURE_USE_SPARSE, new WekaAdapter().useSparseFeatures());
Dimension<Map<String, Object>> mlas = Dimension.createBundle("config", config);
ParameterSpace pSpace = new ParameterSpace(Dimension.createBundle(DIM_READERS, dimReaders), Dimension.create(DIM_LEARNING_MODE, LM_SINGLE_LABEL), Dimension.create(DIM_FEATURE_MODE, FM_PAIR), dimFeatureSets, mlas);
return pSpace;
}
use of org.apache.uima.resource.ExternalResourceDescription in project dkpro-tc by dkpro.
the class ExtractFeaturesConnectorTest method extractFeaturesConnectorRegressionTest.
@Test
public void extractFeaturesConnectorRegressionTest() throws Exception {
File outputPath = folder.newFolder();
// we do not need parameters here, but in case we do :)
Object[] parameters = new Object[] { NoopFeatureExtractor.PARAM_UNIQUE_EXTRACTOR_NAME, "123", UnitContextMetaCollector.PARAM_CONTEXT_FOLDER, Constants.ID_CONTEXT_KEY };
ExternalResourceDescription featureExtractor = ExternalResourceFactory.createExternalResourceDescription(NoopFeatureExtractor.class, parameters);
List<ExternalResourceDescription> fes = new ArrayList<>();
fes.add(featureExtractor);
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TestReaderRegression.class, TestReaderRegression.PARAM_SOURCE_LOCATION, "src/test/resources/data/*.txt");
AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngineDescription doc = AnalysisEngineFactory.createEngineDescription(DocumentModeAnnotator.class, DocumentModeAnnotator.PARAM_FEATURE_MODE, Constants.FM_DOCUMENT);
AnalysisEngineDescription featExtractorConnector = TaskUtils.getFeatureExtractorConnector(outputPath.getAbsolutePath(), JsonDataWriter.class.getName(), Constants.LM_REGRESSION, Constants.FM_DOCUMENT, false, false, false, false, Collections.emptyList(), fes, new String[] {});
SimplePipeline.runPipeline(reader, segmenter, doc, featExtractorConnector);
Gson gson = new Gson();
List<String> lines = FileUtils.readLines(new File(outputPath, JsonDataWriter.JSON_FILE_NAME), "utf-8");
List<Instance> instances = new ArrayList<>();
for (String l : lines) {
instances.add(gson.fromJson(l, Instance.class));
}
assertEquals(2, instances.size());
assertEquals(1, getUniqueOutcomes(instances));
assertEquals("0.45", instances.get(0).getOutcome());
System.out.println(FileUtils.readFileToString(new File(outputPath, JsonDataWriter.JSON_FILE_NAME), "utf-8"));
}
use of org.apache.uima.resource.ExternalResourceDescription in project dkpro-tc by dkpro.
the class LuceneMetaCollectionBasedFeatureTestBase method makeResource.
protected List<ExternalResourceDescription> makeResource(Class<? extends Resource_ImplBase> class1, Object[] parameters) {
ExternalResourceDescription featureExtractor = ExternalResourceFactory.createExternalResourceDescription(class1, parameters);
List<ExternalResourceDescription> fes = new ArrayList<>();
fes.add(featureExtractor);
return fes;
}
use of org.apache.uima.resource.ExternalResourceDescription in project dkpro-tc by dkpro.
the class NgramUnitTest method runFeatureExtractor.
private File runFeatureExtractor(File luceneFolder) throws Exception {
File outputPath = folder.newFolder();
Object[] parameters = new Object[] { WordNGram.PARAM_UNIQUE_EXTRACTOR_NAME, EXTRACTOR_NAME, WordNGram.PARAM_NGRAM_USE_TOP_K, "1", WordNGram.PARAM_SOURCE_LOCATION, luceneFolder.toString(), WordNGramMC.PARAM_TARGET_LOCATION, luceneFolder.toString(), WordNGram.PARAM_NGRAM_MIN_N, "1", WordNGram.PARAM_NGRAM_MAX_N, "1" };
ExternalResourceDescription featureExtractor = ExternalResourceFactory.createExternalResourceDescription(WordNGram.class, parameters);
List<ExternalResourceDescription> fes = new ArrayList<>();
fes.add(featureExtractor);
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TestReaderSingleLabelDocumentReader.class, TestReaderSingleLabelDocumentReader.PARAM_LANGUAGE, "en", TestReaderSingleLabelDocumentReader.PARAM_SOURCE_LOCATION, "src/test/resources/ngrams/text3.txt", TestReaderSingleLabelDocumentReader.PARAM_SUPPRESS_DOCUMENT_ANNOTATION, true);
AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngineDescription unitAnno = AnalysisEngineFactory.createEngineDescription(EachTokenAsUnitAnnotator.class);
AnalysisEngineDescription featExtractorConnector = TaskUtils.getFeatureExtractorConnector(outputPath.getAbsolutePath(), JsonDataWriter.class.getName(), Constants.LM_SINGLE_LABEL, Constants.FM_UNIT, false, false, false, false, Collections.emptyList(), fes, new String[] {});
SimplePipeline.runPipeline(reader, segmenter, unitAnno, featExtractorConnector);
return outputPath;
}
use of org.apache.uima.resource.ExternalResourceDescription in project dkpro-tc by dkpro.
the class TokenLenTest method prepareFeatureExtractor.
@Override
protected AnalysisEngineDescription prepareFeatureExtractor(File outputPath, Class<? extends Resource_ImplBase> class1, Object[] parameters) throws ResourceInitializationException {
List<ExternalResourceDescription> fes = makeResource(class1, parameters);
AnalysisEngineDescription featExtractorConnector = TaskUtils.getFeatureExtractorConnector(outputPath.getAbsolutePath(), JsonDataWriter.class.getName(), Constants.LM_SINGLE_LABEL, Constants.FM_UNIT, false, false, false, false, Collections.emptyList(), fes, new String[] {});
return featExtractorConnector;
}
Aggregations