use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class DataSetJSONTest method testRead1.
@Test
public void testRead1() throws Exception {
DataSet dataSet = from(this.getClass().getResourceAsStream("test1.json"));
assertNotNull(dataSet);
final DataSetMetadata metadata = dataSet.getMetadata();
assertEquals("410d2196-8f90-478f-a817-7e8b6694ac91", metadata.getId());
assertEquals("test", metadata.getName());
assertEquals("anonymousUser", metadata.getAuthor());
assertEquals(2, metadata.getContent().getNbRecords());
assertEquals(1, metadata.getContent().getNbLinesInHeader());
assertEquals(0, metadata.getContent().getNbLinesInFooter());
final SimpleDateFormat dateFormat = new SimpleDateFormat("MM-dd-yyyy HH:mm");
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
Date expectedDate = dateFormat.parse("02-17-2015 09:02");
assertEquals(expectedDate, new Date(metadata.getCreationDate()));
List<ColumnMetadata> columns = dataSet.getMetadata().getRowMetadata().getColumns();
assertEquals(6, columns.size());
ColumnMetadata firstColumn = columns.get(0);
assertEquals("0001", firstColumn.getId());
assertEquals("id", firstColumn.getName());
assertEquals("integer", firstColumn.getType());
assertEquals(20, firstColumn.getQuality().getEmpty());
assertEquals(26, firstColumn.getQuality().getInvalid());
assertEquals(54, firstColumn.getQuality().getValid());
ColumnMetadata lastColumn = columns.get(5);
assertEquals("0007", lastColumn.getId());
assertEquals("string", lastColumn.getType());
assertEquals(8, lastColumn.getQuality().getEmpty());
assertEquals(25, lastColumn.getQuality().getInvalid());
assertEquals(67, lastColumn.getQuality().getValid());
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class DataSetJSONTest method testWrite1.
@Test
public void testWrite1() throws Exception {
final ColumnMetadata.Builder columnBuilder = //
ColumnMetadata.Builder.column().id(//
5).name(//
"column1").type(//
Type.STRING).empty(//
0).invalid(//
10).valid(50);
DataSetMetadata metadata = metadataBuilder.metadata().id("1234").name("name").author("author").created(0).row(columnBuilder).build();
final DataSetContent content = metadata.getContent();
content.addParameter(CSVFormatFamily.SEPARATOR_PARAMETER, ",");
content.setFormatFamilyId(new CSVFormatFamily().getBeanId());
content.setMediaType("text/csv");
metadata.getLifecycle().qualityAnalyzed(true);
metadata.getLifecycle().schemaAnalyzed(true);
LocalStoreLocation location = new LocalStoreLocation();
metadata.setLocation(location);
StringWriter writer = new StringWriter();
DataSet dataSet = new DataSet();
dataSet.setMetadata(metadata);
to(dataSet, writer);
assertThat(writer.toString(), sameJSONAsFile(DataSetJSONTest.class.getResourceAsStream("test2.json")));
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class DataSetJSONTest method should_iterate_row_with_metadata.
@Test
public void should_iterate_row_with_metadata() throws IOException {
// given
String[] columnNames = new String[] { "0001", "0002", "0003", "0004", "0005", "0006", "0007", "0008", "0009" };
final InputStream input = this.getClass().getResourceAsStream("dataSetRowMetadata.json");
try (JsonParser parser = mapper.getFactory().createParser(input)) {
final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
final Iterator<DataSetRow> iterator = dataSet.getRecords().iterator();
List<ColumnMetadata> actualColumns = new ArrayList<>();
int recordCount = 0;
while (iterator.hasNext()) {
final DataSetRow next = iterator.next();
actualColumns = next.getRowMetadata().getColumns();
assertThat(actualColumns, not(empty()));
recordCount++;
}
// then
assertEquals(10, recordCount);
for (int i = 0; i < actualColumns.size(); i++) {
final ColumnMetadata column = actualColumns.get(i);
assertEquals(columnNames[i], column.getId());
}
} catch (Exception e) {
throw new TDPException(CommonErrorCodes.UNABLE_TO_PARSE_JSON, e);
}
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class DataSetLookupRowMatcher method init.
/**
* Open the connection to get the dataset content and init the row iterator.
*/
@PostConstruct
private void init() {
final DataSetGet dataSetGet = context.getBean(DataSetGet.class, datasetId, true, true);
LOGGER.debug("opening {}", datasetId);
this.input = dataSetGet.execute();
try {
JsonParser jsonParser = mapper.getFactory().createParser(new InputStreamReader(input, UTF_8));
DataSet lookup = mapper.readerFor(DataSet.class).readValue(jsonParser);
this.lookupIterator = lookup.getRecords().iterator();
this.emptyRow = getEmptyRow(lookup.getMetadata().getRowMetadata().getColumns());
} catch (IOException e) {
throw new TDPException(TransformationErrorCodes.UNABLE_TO_READ_LOOKUP_DATASET, e);
}
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class DataSetService method get.
/**
* Returns the <b>full</b> data set content for given id.
*
* @param metadata If <code>true</code>, includes data set metadata information.
* @param dataSetId A data set id.
* @return The full data set.
*/
@RequestMapping(value = "/datasets/{id}/content", method = RequestMethod.GET)
@ApiOperation(value = "Get a data set by id", notes = "Get a data set content based on provided id. Id should be a UUID returned by the list operation. Not valid or non existing data set id returns empty content.")
@Timed
@ResponseBody
public Callable<DataSet> get(@RequestParam(defaultValue = "true") @ApiParam(name = "metadata", value = "Include metadata information in the response") boolean metadata, @RequestParam(defaultValue = "false") @ApiParam(name = "includeInternalContent", value = "Include internal content in the response") boolean includeInternalContent, @ApiParam(value = "Filter for retrieved content.") @RequestParam(value = "filter", defaultValue = "") String filter, @PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the requested data set") String dataSetId) {
return () -> {
final Marker marker = Markers.dataset(dataSetId);
LOG.debug(marker, "Get data set #{}", dataSetId);
try {
DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId);
assertDataSetMetadata(dataSetMetadata, dataSetId);
// Build the result
DataSet dataSet = new DataSet();
if (metadata) {
dataSet.setMetadata(conversionService.convert(dataSetMetadata, UserDataSetMetadata.class));
}
// Disable line limit
Stream<DataSetRow> stream = contentStore.stream(dataSetMetadata, -1);
if (!includeInternalContent) {
LOG.debug("Skip internal content when serving data set #{} content.", dataSetId);
stream = stream.map(r -> {
final Map<String, Object> values = r.values();
final Map<String, Object> filteredValues = new HashMap<>(values);
values.forEach((k, v) -> {
if (k != null && k.startsWith(FlagNames.INTERNAL_PROPERTY_PREFIX)) {
// Removes technical
// properties
// from returned
// values.
filteredValues.remove(k);
}
});
// Include TDP_ID anyway
filteredValues.put(FlagNames.TDP_ID, r.getTdpId());
return new DataSetRow(r.getRowMetadata(), filteredValues);
});
}
// Filter content
stream = stream.filter(filterService.build(URLDecoder.decode(filter, "UTF-8"), dataSetMetadata.getRowMetadata()));
dataSet.setRecords(stream);
return dataSet;
} finally {
LOG.debug(marker, "Get done.");
}
};
}
Aggregations