use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class DataSetServiceTest method shouldUpdateSeparatorWithoutHeader.
/**
* see https://jira.talendforge.org/browse/TDP-1066
*/
@Test
public void shouldUpdateSeparatorWithoutHeader() throws Exception {
// given
String dataSetId = createCSVDataSet(this.getClass().getResourceAsStream("../tdp-1066_no_header.ssv"), "tdp-1066-2");
InputStream metadataInput = when().get("/datasets/{id}/metadata", dataSetId).asInputStream();
DataSet dataSet = mapper.readerFor(DataSet.class).readValue(metadataInput);
DataSetMetadata metadata = dataSet.getMetadata();
// then
// ';' is guessed as separator ==> 2 columns
assertThat(metadata.getRowMetadata().getColumns().size(), is(2));
// when
final Map<String, String> parameters = metadata.getContent().getParameters();
parameters.put(CSVFormatFamily.SEPARATOR_PARAMETER, " ");
parameters.remove(CSVFormatFamily.HEADER_COLUMNS_PARAMETER);
final int statusCode = //
given().contentType(//
JSON).body(//
mapper.writer().writeValueAsString(metadata)).expect().statusCode(200).log().ifError().when().put("/datasets/{id}", dataSetId).getStatusCode();
assertThat(statusCode, is(200));
assertQueueMessages(dataSetId);
// then
InputStream datasetContent = given().when().get("/datasets/{id}/content?metadata=true", dataSetId).asInputStream();
final DataSet actual = mapper.readerFor(DataSet.class).readValue(datasetContent);
final DataSetMetadata actualMetadata = actual.getMetadata();
// with ' ' as separator ==> 10 columns
assertThat(actualMetadata.getRowMetadata().getColumns().size(), is(10));
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class DataSetService method preview.
/**
* Returns preview of the the data set content for given id (first 100 rows). Service might return
* {@link org.apache.http.HttpStatus#SC_ACCEPTED} if the data set exists but analysis is not yet fully
* completed so content is not yet ready to be served.
*
* @param metadata If <code>true</code>, includes data set metadata information.
* @param sheetName the sheet name to preview
* @param dataSetId A data set id.
*/
@RequestMapping(value = "/datasets/{id}/preview", method = RequestMethod.GET)
@ApiOperation(value = "Get a data preview set by id", notes = "Get a data set preview content based on provided id. Not valid or non existing data set id returns empty content. Data set not in drat status will return a redirect 301")
@Timed
@ResponseBody
public DataSet preview(@RequestParam(defaultValue = "true") @ApiParam(name = "metadata", value = "Include metadata information in the response") boolean metadata, @RequestParam(defaultValue = "") @ApiParam(name = "sheetName", value = "Sheet name to preview") String sheetName, @PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the requested data set") String dataSetId) {
DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId);
if (dataSetMetadata == null) {
HttpResponseContext.status(HttpStatus.NO_CONTENT);
// No data set, returns empty content.
return DataSet.empty();
}
if (!dataSetMetadata.isDraft()) {
// Moved to get data set content operation
HttpResponseContext.status(HttpStatus.MOVED_PERMANENTLY);
HttpResponseContext.header("Location", "/datasets/" + dataSetId + "/content");
// dataset not anymore a draft so preview doesn't make sense.
return DataSet.empty();
}
if (StringUtils.isNotEmpty(sheetName)) {
dataSetMetadata.setSheetName(sheetName);
}
// take care of previous data without schema parser result
if (dataSetMetadata.getSchemaParserResult() != null) {
// sheet not yet set correctly so use the first one
if (StringUtils.isEmpty(dataSetMetadata.getSheetName())) {
String theSheetName = dataSetMetadata.getSchemaParserResult().getSheetContents().get(0).getName();
LOG.debug("preview for dataSetMetadata: {} with sheetName: {}", dataSetId, theSheetName);
dataSetMetadata.setSheetName(theSheetName);
}
String theSheetName = dataSetMetadata.getSheetName();
Optional<Schema.SheetContent> sheetContentFound = dataSetMetadata.getSchemaParserResult().getSheetContents().stream().filter(sheetContent -> theSheetName.equals(sheetContent.getName())).findFirst();
if (!sheetContentFound.isPresent()) {
HttpResponseContext.status(HttpStatus.NO_CONTENT);
// No sheet found, returns empty content.
return DataSet.empty();
}
List<ColumnMetadata> columnMetadatas = sheetContentFound.get().getColumnMetadatas();
if (dataSetMetadata.getRowMetadata() == null) {
dataSetMetadata.setRowMetadata(new RowMetadata(emptyList()));
}
dataSetMetadata.getRowMetadata().setColumns(columnMetadatas);
} else {
LOG.warn("dataset#{} has draft status but any SchemaParserResult");
}
// Build the result
DataSet dataSet = new DataSet();
if (metadata) {
dataSet.setMetadata(conversionService.convert(dataSetMetadata, UserDataSetMetadata.class));
}
dataSet.setRecords(contentStore.stream(dataSetMetadata).limit(100));
return dataSet;
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class TransformationServiceBaseTest method getMetadata.
protected DataSetMetadata getMetadata(final String dataSetId) throws IOException {
final Response get = //
given().contentType(//
ContentType.JSON).expect().statusCode(200).log().ifError().when().get("/datasets/{id}/metadata", dataSetId);
final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(get.asInputStream());
assertNotNull(dataSet);
return dataSet.getMetadata();
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class CachedExportStrategyTest method shouldNotAcceptNullPreparationId.
@Test
public void shouldNotAcceptNullPreparationId() throws Exception {
// Then
final ExportParameters parameters = new ExportParameters();
parameters.setContent(new DataSet());
parameters.setPreparationId(null);
assertFalse(cachedExportStrategy.accept(parameters));
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class XlsWriterTest method createSchemaParser.
/**
* utility function
*/
public SchemaParser.Request createSchemaParser(String inputFileName) throws Exception {
Path path = Files.createTempFile("datarep-foo", "xlsx");
Files.deleteIfExists(path);
try (final OutputStream outputStream = Files.newOutputStream(path)) {
final Configuration configuration = //
Configuration.builder().format(//
XlsFormat.XLSX).output(//
outputStream).actions(//
"").build();
final Transformer exporter = factory.get(configuration);
final InputStream inputStream = XlsWriterTest.class.getResourceAsStream(inputFileName);
try (JsonParser parser = mapper.getFactory().createParser(inputStream)) {
final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
exporter.buildExecutable(dataSet, configuration).execute();
}
}
DataSetMetadata metadata = metadataBuilder.metadata().id("123").build();
return new SchemaParser.Request(Files.newInputStream(path), metadata);
}
Aggregations