use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class PreparationExportStrategy method performPreparation.
public void performPreparation(final ExportParameters parameters, final OutputStream outputStream) {
final String stepId = parameters.getStepId();
final String preparationId = parameters.getPreparationId();
final String formatName = parameters.getExportType();
final PreparationMessage preparation = getPreparation(preparationId, stepId);
final String dataSetId = preparation.getDataSetId();
final ExportFormat format = getFormat(parameters.getExportType());
// get the dataset content (in an auto-closable block to make sure it is properly closed)
boolean releasedIdentity = false;
// Allow get dataset and get dataset metadata access whatever share status is
securityProxy.asTechnicalUser();
final DataSetGet dataSetGet = applicationContext.getBean(DataSetGet.class, dataSetId, false, true);
final DataSetGetMetadata dataSetGetMetadata = applicationContext.getBean(DataSetGetMetadata.class, dataSetId);
try (InputStream datasetContent = dataSetGet.execute()) {
try (JsonParser parser = mapper.getFactory().createParser(new InputStreamReader(datasetContent, UTF_8))) {
// head is not allowed as step id
final String version = getCleanStepId(preparation, stepId);
// Create dataset
final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
dataSet.setMetadata(dataSetGetMetadata.execute());
// All good, can already release identity
securityProxy.releaseIdentity();
releasedIdentity = true;
// get the actions to apply (no preparation ==> dataset export ==> no actions)
final String actions = getActions(preparationId, version);
final TransformationCacheKey key = //
cacheKeyGenerator.generateContentKey(//
dataSetId, //
preparationId, //
version, //
formatName, //
parameters.getFrom(), //
parameters.getArguments(), //
parameters.getFilter());
LOGGER.debug("Cache key: " + key.getKey());
LOGGER.debug("Cache key details: " + key.toString());
try (final TeeOutputStream tee = new TeeOutputStream(outputStream, contentCache.put(key, ContentCache.TimeToLive.DEFAULT))) {
final Configuration configuration = //
Configuration.builder().args(//
parameters.getArguments()).outFilter(//
rm -> filterService.build(parameters.getFilter(), rm)).sourceType(parameters.getFrom()).format(//
format.getName()).actions(//
actions).preparation(//
preparation).stepId(//
version).volume(//
Configuration.Volume.SMALL).output(//
tee).limit(//
limit).build();
factory.get(configuration).buildExecutable(dataSet, configuration).execute();
tee.flush();
} catch (Throwable e) {
// NOSONAR
contentCache.evict(key);
throw e;
}
}
} catch (TDPException e) {
throw e;
} catch (Exception e) {
throw new TDPException(TransformationErrorCodes.UNABLE_TO_TRANSFORM_DATASET, e);
} finally {
if (!releasedIdentity) {
// Release identity in case of error.
securityProxy.releaseIdentity();
}
}
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class AggregationServiceTest method shouldNotAggregateBecauseNoGroupBy.
@Test(expected = TDPException.class)
public void shouldNotAggregateBecauseNoGroupBy() {
final AggregationParameters params = new AggregationParameters();
params.addOperation(new AggregationOperation("0001", Operator.AVERAGE));
service.aggregate(params, new DataSet());
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class PipelineTest method testCancelledPipeline.
@Test
public void testCancelledPipeline() throws Exception {
// given
final Pipeline pipeline = new Pipeline(NodeBuilder.source().to(output).build());
final RowMetadata rowMetadata = new RowMetadata();
final DataSetRow row1 = new DataSetRow(rowMetadata);
final DataSetRow row2 = new DataSetRow(rowMetadata);
final List<DataSetRow> records = new ArrayList<>();
records.add(row1);
records.add(row2);
final DataSet dataSet = new DataSet();
final DataSetMetadata metadata = new DataSetMetadata();
metadata.setRowMetadata(rowMetadata);
dataSet.setMetadata(metadata);
dataSet.setRecords(records.stream());
// when
pipeline.signal(Signal.STOP);
pipeline.execute(dataSet);
// then
assertThat(output.getCount(), is(1));
assertThat(output.getRow(), is(row1));
assertThat(output.getMetadata(), is(rowMetadata));
assertThat(output.getSignal(), is(END_OF_STREAM));
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class DataSetGetMetadata method configureLimitedDataset.
private void configureLimitedDataset(final String dataSetId) {
execute(() -> new HttpGet(datasetServiceUrl + "/datasets/" + dataSetId + "/metadata"));
on(HttpStatus.OK).then((req, res) -> {
try {
final DataSet dataSet = objectMapper.readerFor(DataSet.class).readValue(res.getEntity().getContent());
return dataSet.getMetadata();
} catch (IOException e) {
throw new TDPException(CommonErrorCodes.UNEXPECTED_EXCEPTION, e);
} finally {
req.releaseConnection();
}
});
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class TypeDetectionNode method signal.
@Override
public void signal(Signal signal) {
final long start = System.currentTimeMillis();
try {
if (signal == Signal.END_OF_STREAM || signal == Signal.CANCEL || signal == Signal.STOP) {
// End temporary output
generator.writeEndArray();
generator.writeEndObject();
generator.flush();
generator.close();
// Send stored records to next steps
final ObjectMapper mapper = new ObjectMapper();
if (rowMetadata != null && resultAnalyzer != null) {
// Adapt row metadata to infer type (adapter takes care of type-forced columns)
resultAnalyzer.end();
final List<ColumnMetadata> columns = rowMetadata.getColumns();
adapter.adapt(columns, resultAnalyzer.getResult(), (Predicate<ColumnMetadata>) filter);
resultAnalyzer.close();
}
// Continue process
try (JsonParser parser = mapper.getFactory().createParser(new InputStreamReader(new GZIPInputStream(new FileInputStream(reservoir)), UTF_8))) {
final DataSet dataSet = mapper.reader(DataSet.class).readValue(parser);
dataSet.getRecords().forEach(r -> {
r.setRowMetadata(rowMetadata);
link.exec().emit(r, rowMetadata);
});
}
}
} catch (Exception e) {
LOGGER.warn("Unable to perform delayed analysis.", e);
} finally {
try {
generator.close();
} catch (IOException e) {
LOGGER.error("Unable to close JSON generator (causing potential temp file delete issues).", e);
}
FilesHelper.deleteQuietly(reservoir);
totalTime += System.currentTimeMillis() - start;
}
super.signal(signal);
}
Aggregations