use of org.talend.dataprep.transformation.pipeline.Pipeline in project data-prep by Talend.
the class ActionTestWorkbench method test.
public static void test(Collection<DataSetRow> input, AnalyzerService analyzerService, ActionRegistry actionRegistry, RunnableAction... actions) {
final List<RunnableAction> allActions = new ArrayList<>();
Collections.addAll(allActions, actions);
final DataSet dataSet = new DataSet();
final RowMetadata rowMetadata = input.iterator().next().getRowMetadata();
final DataSetMetadata dataSetMetadata = new DataSetMetadata();
dataSetMetadata.setRowMetadata(rowMetadata);
dataSet.setMetadata(dataSetMetadata);
dataSet.setRecords(input.stream());
final TestOutputNode outputNode = new TestOutputNode(input);
Pipeline pipeline = //
Pipeline.Builder.builder().withActionRegistry(actionRegistry).withInitialMetadata(rowMetadata, //
true).withActions(//
allActions).withAnalyzerService(analyzerService).withStatisticsAdapter(//
new StatisticsAdapter(40)).withOutput(//
() -> outputNode).build();
pipeline.execute(dataSet);
// Some tests rely on the metadata changes in the provided metadata so set back modified columns in row metadata
// (although this should be avoided in tests).
// TODO Make this method return the modified metadata iso. setting modified columns.
rowMetadata.setColumns(outputNode.getMetadata().getColumns());
for (DataSetRow dataSetRow : input) {
dataSetRow.setRowMetadata(rowMetadata);
}
}
use of org.talend.dataprep.transformation.pipeline.Pipeline in project data-prep by Talend.
the class PipelineDiffTransformer method buildExecutable.
/**
* Starts the transformation in preview mode.
*
* @param input the dataset content.
* @param configuration The {@link Configuration configuration} for this transformation.
*/
@Override
public ExecutableTransformer buildExecutable(DataSet input, Configuration configuration) {
Validate.notNull(input, "Input cannot be null.");
final PreviewConfiguration previewConfiguration = (PreviewConfiguration) configuration;
final RowMetadata rowMetadata = input.getMetadata().getRowMetadata();
final TransformerWriter writer = writerRegistrationService.getWriter(configuration.formatId(), configuration.output(), configuration.getArguments());
// Build diff pipeline
final Node diffWriterNode = new DiffWriterNode(writer);
final String referenceActions = previewConfiguration.getReferenceActions();
final String previewActions = previewConfiguration.getPreviewActions();
final Pipeline referencePipeline = buildPipeline(rowMetadata, referenceActions);
final Pipeline previewPipeline = buildPipeline(rowMetadata, previewActions);
// Filter source records (extract TDP ids information)
final List<Long> indexes = previewConfiguration.getIndexes();
final boolean isIndexLimited = indexes != null && !indexes.isEmpty();
final Long minIndex = isIndexLimited ? indexes.stream().mapToLong(Long::longValue).min().getAsLong() : 0L;
final Long maxIndex = isIndexLimited ? indexes.stream().mapToLong(Long::longValue).max().getAsLong() : Long.MAX_VALUE;
final Predicate<DataSetRow> filter = isWithinWantedIndexes(minIndex, maxIndex);
// Build diff pipeline
final Node diffPipeline = //
NodeBuilder.filteredSource(filter).dispatchTo(referencePipeline, //
previewPipeline).zipTo(//
diffWriterNode).build();
// wrap this transformer into an ExecutableTransformer
return new ExecutableTransformer() {
@Override
public void execute() {
// Run diff
try {
// Print pipeline before execution (for debug purposes).
diffPipeline.logStatus(LOGGER, "Before execution: {}");
input.getRecords().forEach(r -> diffPipeline.exec().receive(r, rowMetadata));
diffPipeline.exec().signal(Signal.END_OF_STREAM);
} finally {
// Print pipeline after execution (for debug purposes).
diffPipeline.logStatus(LOGGER, "After execution: {}");
}
}
@Override
public void signal(Signal signal) {
diffPipeline.exec().signal(signal);
}
};
}
use of org.talend.dataprep.transformation.pipeline.Pipeline in project data-prep by Talend.
the class PipelineTransformer method buildExecutable.
@Override
public ExecutableTransformer buildExecutable(DataSet input, Configuration configuration) {
final RowMetadata rowMetadata = input.getMetadata().getRowMetadata();
// prepare the fallback row metadata
RowMetadata fallBackRowMetadata = transformationRowMetadataUtils.getMatchingEmptyRowMetadata(rowMetadata);
final TransformerWriter writer = writerRegistrationService.getWriter(configuration.formatId(), configuration.output(), configuration.getArguments());
final ConfiguredCacheWriter metadataWriter = new ConfiguredCacheWriter(contentCache, DEFAULT);
final TransformationMetadataCacheKey metadataKey = cacheKeyGenerator.generateMetadataKey(configuration.getPreparationId(), configuration.stepId(), configuration.getSourceType());
final PreparationMessage preparation = configuration.getPreparation();
// function that from a step gives the rowMetadata associated to the previous/parent step
final Function<Step, RowMetadata> previousStepRowMetadataSupplier = s -> //
Optional.ofNullable(s.getParent()).map(//
id -> preparationUpdater.get(id)).orElse(null);
final Pipeline pipeline = //
Pipeline.Builder.builder().withAnalyzerService(//
analyzerService).withActionRegistry(//
actionRegistry).withPreparation(//
preparation).withActions(//
actionParser.parse(configuration.getActions())).withInitialMetadata(rowMetadata, //
configuration.volume() == SMALL).withMonitor(//
configuration.getMonitor()).withFilter(//
configuration.getFilter()).withLimit(//
configuration.getLimit()).withFilterOut(//
configuration.getOutFilter()).withOutput(//
() -> new WriterNode(writer, metadataWriter, metadataKey, fallBackRowMetadata)).withStatisticsAdapter(//
adapter).withStepMetadataSupplier(//
previousStepRowMetadataSupplier).withGlobalStatistics(//
configuration.isGlobalStatistics()).allowMetadataChange(//
configuration.isAllowMetadataChange()).build();
// wrap this transformer into an executable transformer
return new ExecutableTransformer() {
@Override
public void execute() {
try {
LOGGER.debug("Before transformation: {}", pipeline);
pipeline.execute(input);
} finally {
LOGGER.debug("After transformation: {}", pipeline);
}
if (preparation != null) {
final UpdatedStepVisitor visitor = new UpdatedStepVisitor(preparationUpdater);
pipeline.accept(visitor);
}
}
@Override
public void signal(Signal signal) {
pipeline.signal(signal);
}
};
}
Aggregations