use of org.talend.dataprep.quality.AnalyzerService in project data-prep by Talend.
the class Analyzers method analyzerService.
@Bean
public AnalyzerService analyzerService() {
LOGGER.info("Data Quality strategy is {} and located in {}", luceneIndexStrategy, dataqualityIndexesLocation);
LOGGER.info("DataQuality indexes location : '{}'", this.dataqualityIndexesLocation);
CategoryRegistryManager.setLocalRegistryPath(this.dataqualityIndexesLocation);
// Configure DQ index creation strategy (one copy per use or one copy shared by all calls).
LOGGER.info("Analyzer service lucene index strategy set to '{}'", luceneIndexStrategy);
if ("basic".equalsIgnoreCase(luceneIndexStrategy)) {
ClassPathDirectory.setProvider(new ClassPathDirectory.BasicProvider());
} else if ("singleton".equalsIgnoreCase(luceneIndexStrategy)) {
ClassPathDirectory.setProvider(new ClassPathDirectory.SingletonProvider());
} else {
// Default
LOGGER.warn("Not a supported strategy for lucene indexes: '{}'", luceneIndexStrategy);
ClassPathDirectory.setProvider(new ClassPathDirectory.SingletonProvider());
}
LOGGER.info("DataQuality indexes location : '{}'", this.dataqualityIndexesLocation);
return new AnalyzerService(new StandardDictionarySnapshotProvider());
}
use of org.talend.dataprep.quality.AnalyzerService in project data-prep by Talend.
the class ReplaceCellValueTest method should_tag_invalid_value.
@Test
public void should_tag_invalid_value() {
// given
final DataSetRow row = getRow("True");
row.setTdpId(1L);
final ColumnMetadata columnMetadata = row.getRowMetadata().getColumns().get(0);
// Column is a boolean
columnMetadata.setType(Type.BOOLEAN.getName());
columnMetadata.setTypeForced(true);
final Map<String, String> parameters = getParameters(1L, "True", "NotABoolean");
// when
final AnalyzerService analyzerService = new AnalyzerService();
ActionTestWorkbench.test(Collections.singleton(row), analyzerService, actionRegistry, factory.create(action, parameters));
// then
assertThat(row.get("0000"), is("NotABoolean"));
assertThat(row.getInternalValues().get(FlagNames.TDP_INVALID), is(",0000"));
}
use of org.talend.dataprep.quality.AnalyzerService in project data-prep by Talend.
the class PipelineTransformer method buildExecutable.
@Override
public ExecutableTransformer buildExecutable(DataSet input, Configuration configuration) {
final RowMetadata rowMetadata = input.getMetadata().getRowMetadata();
// prepare the fallback row metadata
RowMetadata fallBackRowMetadata = transformationRowMetadataUtils.getMatchingEmptyRowMetadata(rowMetadata);
final TransformerWriter writer = writerRegistrationService.getWriter(configuration.formatId(), configuration.output(), configuration.getArguments());
final ConfiguredCacheWriter metadataWriter = new ConfiguredCacheWriter(contentCache, DEFAULT);
final TransformationMetadataCacheKey metadataKey = cacheKeyGenerator.generateMetadataKey(configuration.getPreparationId(), configuration.stepId(), configuration.getSourceType());
final PreparationMessage preparation = configuration.getPreparation();
// function that from a step gives the rowMetadata associated to the previous/parent step
final Function<Step, RowMetadata> previousStepRowMetadataSupplier = s -> //
Optional.ofNullable(s.getParent()).map(//
id -> preparationUpdater.get(id)).orElse(null);
final Pipeline pipeline = //
Pipeline.Builder.builder().withAnalyzerService(//
analyzerService).withActionRegistry(//
actionRegistry).withPreparation(//
preparation).withActions(//
actionParser.parse(configuration.getActions())).withInitialMetadata(rowMetadata, //
configuration.volume() == SMALL).withMonitor(//
configuration.getMonitor()).withFilter(//
configuration.getFilter()).withLimit(//
configuration.getLimit()).withFilterOut(//
configuration.getOutFilter()).withOutput(//
() -> new WriterNode(writer, metadataWriter, metadataKey, fallBackRowMetadata)).withStatisticsAdapter(//
adapter).withStepMetadataSupplier(//
previousStepRowMetadataSupplier).withGlobalStatistics(//
configuration.isGlobalStatistics()).allowMetadataChange(//
configuration.isAllowMetadataChange()).build();
// wrap this transformer into an executable transformer
return new ExecutableTransformer() {
@Override
public void execute() {
try {
LOGGER.debug("Before transformation: {}", pipeline);
pipeline.execute(input);
} finally {
LOGGER.debug("After transformation: {}", pipeline);
}
if (preparation != null) {
final UpdatedStepVisitor visitor = new UpdatedStepVisitor(preparationUpdater);
pipeline.accept(visitor);
}
}
@Override
public void signal(Signal signal) {
pipeline.signal(signal);
}
};
}
Aggregations