use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class DataSetJSONTest method testColumnAtEnd.
@Test
public void testColumnAtEnd() throws Exception {
DataSet dataSet = from(this.getClass().getResourceAsStream("test4.json"));
// There are 4 columns, but Jackson doesn't take them into account if at end of content. This is not "expected"
// but known. This test ensure the known behavior remains the same.
assertThat(dataSet.getMetadata(), nullValue());
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class TransformationService method getSemanticDomains.
/**
* Return the semantic domains for the given parameters.
*
* @param metadata the dataset metadata.
* @param columnId the column id to analyze.
* @param records the dataset records.
* @return the semantic domains for the given parameters.
* @throws IOException can happen...
*/
private List<SemanticDomain> getSemanticDomains(DataSetMetadata metadata, String columnId, InputStream records) throws IOException {
// copy the column metadata and set the semantic domain forced flag to false to make sure the statistics adapter set all
// available domains
final ColumnMetadata columnMetadata = //
column().copy(//
metadata.getRowMetadata().getById(columnId)).semanticDomainForce(//
false).build();
final Analyzer<Analyzers.Result> analyzer = analyzerService.build(columnMetadata, SEMANTIC);
analyzer.init();
try (final JsonParser parser = mapper.getFactory().createParser(new InputStreamReader(records, UTF_8))) {
final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
dataSet.getRecords().map(//
r -> r.get(columnId)).forEach(analyzer::analyze);
analyzer.end();
}
final List<Analyzers.Result> analyzerResult = analyzer.getResult();
statisticsAdapter.adapt(singletonList(columnMetadata), analyzerResult);
return columnMetadata.getSemanticDomains();
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class TransformationService method aggregate.
/**
* Compute the given aggregation.
*
* @param rawParams the aggregation rawParams as body rawParams.
*/
// @formatter:off
@RequestMapping(value = "/aggregate", method = POST, consumes = APPLICATION_JSON_VALUE)
@ApiOperation(value = "Compute the aggregation according to the request body rawParams", consumes = APPLICATION_JSON_VALUE)
@VolumeMetered
public AggregationResult aggregate(@ApiParam(value = "The aggregation rawParams in json") @RequestBody final String rawParams) {
// @formatter:on
// parse the aggregation parameters
final AggregationParameters parameters;
try {
parameters = mapper.readerFor(AggregationParameters.class).readValue(rawParams);
LOG.debug("Aggregation requested {}", parameters);
} catch (IOException e) {
throw new TDPException(CommonErrorCodes.BAD_AGGREGATION_PARAMETERS, e);
}
InputStream contentToAggregate;
// get the content of the preparation (internal call with piped streams)
if (StringUtils.isNotBlank(parameters.getPreparationId())) {
try {
PipedOutputStream temp = new PipedOutputStream();
contentToAggregate = new PipedInputStream(temp);
// because of piped streams, processing must be asynchronous
Runnable r = () -> {
try {
final ExportParameters exportParameters = new ExportParameters();
exportParameters.setPreparationId(parameters.getPreparationId());
exportParameters.setDatasetId(parameters.getDatasetId());
final String filter = parameters.getFilter();
if (filter != null) {
if (filter.isEmpty()) {
throw new TDPException(CommonErrorCodes.UNABLE_TO_AGGREGATE, new IllegalArgumentException("Source should not be empty"));
}
exportParameters.setFilter(mapper.readTree(filter));
}
exportParameters.setExportType(JSON);
exportParameters.setStepId(parameters.getStepId());
final StreamingResponseBody body = executeSampleExportStrategy(exportParameters);
body.writeTo(temp);
} catch (IOException e) {
throw new TDPException(CommonErrorCodes.UNABLE_TO_AGGREGATE, e);
}
};
executor.execute(r);
} catch (IOException e) {
throw new TDPException(CommonErrorCodes.UNABLE_TO_AGGREGATE, e);
}
} else {
final DataSetGet dataSetGet = context.getBean(DataSetGet.class, parameters.getDatasetId(), false, true);
contentToAggregate = dataSetGet.execute();
}
// apply the aggregation
try (JsonParser parser = mapper.getFactory().createParser(new InputStreamReader(contentToAggregate, UTF_8))) {
final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
return aggregationService.aggregate(parameters, dataSet);
} catch (IOException e) {
throw new TDPException(CommonErrorCodes.UNABLE_TO_PARSE_JSON, e);
} finally {
// don't forget to release the connection
if (contentToAggregate != null) {
try {
contentToAggregate.close();
} catch (IOException e) {
LOG.warn("Could not close dataset input stream while aggregating", e);
}
}
}
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class TransformationService method executeDiffOnDataset.
private void executeDiffOnDataset(final PreviewParameters previewParameters, final OutputStream output) {
final DataSetGet dataSetGet = context.getBean(DataSetGet.class, previewParameters.getDataSetId(), false, true);
boolean identityReleased = false;
securityProxy.asTechnicalUser();
// because of dataset records streaming, the dataset content must be within an auto closeable block
try (//
final InputStream dataSetContent = dataSetGet.execute();
final JsonParser parser = mapper.getFactory().createParser(new InputStreamReader(dataSetContent, UTF_8))) {
securityProxy.releaseIdentity();
identityReleased = true;
final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
executePreview(//
previewParameters.getNewActions(), //
previewParameters.getBaseActions(), //
previewParameters.getTdpIds(), //
dataSet, //
output);
} catch (IOException e) {
throw new TDPException(TransformationErrorCodes.UNABLE_TO_PERFORM_PREVIEW, e);
} finally {
// make sure the technical identity is released
if (!identityReleased) {
securityProxy.releaseIdentity();
}
}
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class DataSetExportStrategy method execute.
@Override
public StreamingResponseBody execute(ExportParameters parameters) {
final String formatName = parameters.getExportType();
final ExportFormat format = getFormat(formatName);
//
ExportUtils.setExportHeaders(//
parameters.getExportName(), //
parameters.getArguments().get(ExportFormat.PREFIX + CSVFormat.ParametersCSV.ENCODING), format);
return outputStream -> {
// get the dataset content (in an auto-closable block to make sure it is properly closed)
final String datasetId = parameters.getDatasetId();
final DataSetGet dataSetGet = applicationContext.getBean(DataSetGet.class, datasetId, false, true);
final DataSetGetMetadata dataSetGetMetadata = applicationContext.getBean(DataSetGetMetadata.class, datasetId);
try (InputStream datasetContent = dataSetGet.execute()) {
try (JsonParser parser = mapper.getFactory().createParser(new InputStreamReader(datasetContent, UTF_8))) {
// Create dataset
final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
dataSet.setMetadata(dataSetGetMetadata.execute());
// get the actions to apply (no preparation ==> dataset export ==> no actions)
Configuration configuration = //
Configuration.builder().args(//
parameters.getArguments()).outFilter(//
rm -> filterService.build(parameters.getFilter(), rm)).format(//
format.getName()).volume(//
Configuration.Volume.SMALL).output(//
outputStream).limit(//
limit).build();
factory.get(configuration).buildExecutable(dataSet, configuration).execute();
}
} catch (TDPException e) {
throw e;
} catch (Exception e) {
throw new TDPException(TransformationErrorCodes.UNABLE_TO_TRANSFORM_DATASET, e);
}
};
}
Aggregations