use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class ActionTestWorkbench method test.
public static void test(Collection<DataSetRow> input, AnalyzerService analyzerService, ActionRegistry actionRegistry, RunnableAction... actions) {
final List<RunnableAction> allActions = new ArrayList<>();
Collections.addAll(allActions, actions);
final DataSet dataSet = new DataSet();
final RowMetadata rowMetadata = input.iterator().next().getRowMetadata();
final DataSetMetadata dataSetMetadata = new DataSetMetadata();
dataSetMetadata.setRowMetadata(rowMetadata);
dataSet.setMetadata(dataSetMetadata);
dataSet.setRecords(input.stream());
final TestOutputNode outputNode = new TestOutputNode(input);
Pipeline pipeline = //
Pipeline.Builder.builder().withActionRegistry(actionRegistry).withInitialMetadata(rowMetadata, //
true).withActions(//
allActions).withAnalyzerService(analyzerService).withStatisticsAdapter(//
new StatisticsAdapter(40)).withOutput(//
() -> outputNode).build();
pipeline.execute(dataSet);
// Some tests rely on the metadata changes in the provided metadata so set back modified columns in row metadata
// (although this should be avoided in tests).
// TODO Make this method return the modified metadata iso. setting modified columns.
rowMetadata.setColumns(outputNode.getMetadata().getColumns());
for (DataSetRow dataSetRow : input) {
dataSetRow.setRowMetadata(rowMetadata);
}
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class PipelineTest method testPipeline.
@Test
public void testPipeline() throws Exception {
// given
final Pipeline pipeline = new Pipeline(NodeBuilder.source().to(output).build());
final RowMetadata rowMetadata = new RowMetadata();
final DataSetRow row1 = new DataSetRow(rowMetadata);
final DataSetRow row2 = new DataSetRow(rowMetadata);
final List<DataSetRow> records = new ArrayList<>();
records.add(row1);
records.add(row2);
final DataSet dataSet = new DataSet();
final DataSetMetadata metadata = new DataSetMetadata();
metadata.setRowMetadata(rowMetadata);
dataSet.setMetadata(metadata);
dataSet.setRecords(records.stream());
// when
pipeline.execute(dataSet);
// then
assertThat(output.getCount(), is(2));
assertThat(output.getRow(), is(row2));
assertThat(output.getMetadata(), is(rowMetadata));
assertThat(output.getSignal(), is(END_OF_STREAM));
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class Pipeline method execute.
public void execute(DataSet dataSet) {
final RowMetadata rowMetadata = dataSet.getMetadata().getRowMetadata().clone();
try (Stream<DataSetRow> records = dataSet.getRecords()) {
// get the lock on isFinished to make the signal(STOP) method wait for the whole pipeline to finish
synchronized (isFinished) {
AtomicLong counter = new AtomicLong();
// we use map/allMatch to stop the stream when isStopped = true
// with only forEach((row) -> if(isStopped)) for ex we just stop the processed code
// but we proceed all the rows of the stream
// to replace when java introduce more useful functions to stream (ex: takeWhile)
//
records.peek(row -> {
//
node.exec().receive(row, rowMetadata);
counter.addAndGet(1L);
}).allMatch(row -> !isStopped.get());
LOG.debug("{} rows sent in the pipeline", counter.get());
node.exec().signal(Signal.END_OF_STREAM);
}
}
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class DataSetJSONTest method shouldDealWithNoRecords.
@Test
public void shouldDealWithNoRecords() throws Exception {
// given
final InputStream input = this.getClass().getResourceAsStream("no_records.json");
// when
DataSet dataSet = from(input);
// then
final List<DataSetRow> records = dataSet.getRecords().collect(Collectors.toList());
assertTrue(records.isEmpty());
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class DataSetJSONTest method testRoundTrip.
@Test
public void testRoundTrip() throws Exception {
DataSet dataSet = from(DataSetJSONTest.class.getResourceAsStream("test3.json"));
final DataSetMetadata metadata = dataSet.getMetadata();
metadata.getContent().addParameter(CSVFormatFamily.SEPARATOR_PARAMETER, ",");
metadata.getContent().setFormatFamilyId(new CSVFormatFamily().getBeanId());
assertNotNull(metadata);
StringWriter writer = new StringWriter();
to(dataSet, writer);
assertThat(writer.toString(), sameJSONAsFile(DataSetJSONTest.class.getResourceAsStream("test3.json")));
}
Aggregations