Search in sources :

Example 26 with DataSet

use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.

the class DataSetJSONTest method testRead1.

@Test
public void testRead1() throws Exception {
    DataSet dataSet = from(this.getClass().getResourceAsStream("test1.json"));
    assertNotNull(dataSet);
    final DataSetMetadata metadata = dataSet.getMetadata();
    assertEquals("410d2196-8f90-478f-a817-7e8b6694ac91", metadata.getId());
    assertEquals("test", metadata.getName());
    assertEquals("anonymousUser", metadata.getAuthor());
    assertEquals(2, metadata.getContent().getNbRecords());
    assertEquals(1, metadata.getContent().getNbLinesInHeader());
    assertEquals(0, metadata.getContent().getNbLinesInFooter());
    final SimpleDateFormat dateFormat = new SimpleDateFormat("MM-dd-yyyy HH:mm");
    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
    Date expectedDate = dateFormat.parse("02-17-2015 09:02");
    assertEquals(expectedDate, new Date(metadata.getCreationDate()));
    List<ColumnMetadata> columns = dataSet.getMetadata().getRowMetadata().getColumns();
    assertEquals(6, columns.size());
    ColumnMetadata firstColumn = columns.get(0);
    assertEquals("0001", firstColumn.getId());
    assertEquals("id", firstColumn.getName());
    assertEquals("integer", firstColumn.getType());
    assertEquals(20, firstColumn.getQuality().getEmpty());
    assertEquals(26, firstColumn.getQuality().getInvalid());
    assertEquals(54, firstColumn.getQuality().getValid());
    ColumnMetadata lastColumn = columns.get(5);
    assertEquals("0007", lastColumn.getId());
    assertEquals("string", lastColumn.getType());
    assertEquals(8, lastColumn.getQuality().getEmpty());
    assertEquals(25, lastColumn.getQuality().getInvalid());
    assertEquals(67, lastColumn.getQuality().getValid());
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSet(org.talend.dataprep.api.dataset.DataSet) SimpleDateFormat(java.text.SimpleDateFormat) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) ServiceBaseTest(org.talend.ServiceBaseTest) Test(org.junit.Test)

Example 27 with DataSet

use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.

the class DataSetJSONTest method testWrite1.

@Test
public void testWrite1() throws Exception {
    final ColumnMetadata.Builder columnBuilder = // 
    ColumnMetadata.Builder.column().id(// 
    5).name(// 
    "column1").type(// 
    Type.STRING).empty(// 
    0).invalid(// 
    10).valid(50);
    DataSetMetadata metadata = metadataBuilder.metadata().id("1234").name("name").author("author").created(0).row(columnBuilder).build();
    final DataSetContent content = metadata.getContent();
    content.addParameter(CSVFormatFamily.SEPARATOR_PARAMETER, ",");
    content.setFormatFamilyId(new CSVFormatFamily().getBeanId());
    content.setMediaType("text/csv");
    metadata.getLifecycle().qualityAnalyzed(true);
    metadata.getLifecycle().schemaAnalyzed(true);
    LocalStoreLocation location = new LocalStoreLocation();
    metadata.setLocation(location);
    StringWriter writer = new StringWriter();
    DataSet dataSet = new DataSet();
    dataSet.setMetadata(metadata);
    to(dataSet, writer);
    assertThat(writer.toString(), sameJSONAsFile(DataSetJSONTest.class.getResourceAsStream("test2.json")));
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSet(org.talend.dataprep.api.dataset.DataSet) DataSetContent(org.talend.dataprep.api.dataset.DataSetContent) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) CSVFormatFamily(org.talend.dataprep.schema.csv.CSVFormatFamily) LocalStoreLocation(org.talend.dataprep.api.dataset.location.LocalStoreLocation) ServiceBaseTest(org.talend.ServiceBaseTest) Test(org.junit.Test)

Example 28 with DataSet

use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.

the class DataSetJSONTest method should_iterate_row_with_metadata.

@Test
public void should_iterate_row_with_metadata() throws IOException {
    // given
    String[] columnNames = new String[] { "0001", "0002", "0003", "0004", "0005", "0006", "0007", "0008", "0009" };
    final InputStream input = this.getClass().getResourceAsStream("dataSetRowMetadata.json");
    try (JsonParser parser = mapper.getFactory().createParser(input)) {
        final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
        final Iterator<DataSetRow> iterator = dataSet.getRecords().iterator();
        List<ColumnMetadata> actualColumns = new ArrayList<>();
        int recordCount = 0;
        while (iterator.hasNext()) {
            final DataSetRow next = iterator.next();
            actualColumns = next.getRowMetadata().getColumns();
            assertThat(actualColumns, not(empty()));
            recordCount++;
        }
        // then
        assertEquals(10, recordCount);
        for (int i = 0; i < actualColumns.size(); i++) {
            final ColumnMetadata column = actualColumns.get(i);
            assertEquals(columnNames[i], column.getId());
        }
    } catch (Exception e) {
        throw new TDPException(CommonErrorCodes.UNABLE_TO_PARSE_JSON, e);
    }
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSet(org.talend.dataprep.api.dataset.DataSet) TDPException(org.talend.dataprep.exception.TDPException) TalendRuntimeException(org.talend.daikon.exception.TalendRuntimeException) TDPException(org.talend.dataprep.exception.TDPException) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow) JsonParser(com.fasterxml.jackson.core.JsonParser) ServiceBaseTest(org.talend.ServiceBaseTest) Test(org.junit.Test)

Example 29 with DataSet

use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.

the class DataSetLookupRowMatcher method init.

/**
 * Open the connection to get the dataset content and init the row iterator.
 */
@PostConstruct
private void init() {
    final DataSetGet dataSetGet = context.getBean(DataSetGet.class, datasetId, true, true);
    LOGGER.debug("opening {}", datasetId);
    this.input = dataSetGet.execute();
    try {
        JsonParser jsonParser = mapper.getFactory().createParser(new InputStreamReader(input, UTF_8));
        DataSet lookup = mapper.readerFor(DataSet.class).readValue(jsonParser);
        this.lookupIterator = lookup.getRecords().iterator();
        this.emptyRow = getEmptyRow(lookup.getMetadata().getRowMetadata().getColumns());
    } catch (IOException e) {
        throw new TDPException(TransformationErrorCodes.UNABLE_TO_READ_LOOKUP_DATASET, e);
    }
}
Also used : TDPException(org.talend.dataprep.exception.TDPException) DataSetGet(org.talend.dataprep.command.dataset.DataSetGet) InputStreamReader(java.io.InputStreamReader) DataSet(org.talend.dataprep.api.dataset.DataSet) IOException(java.io.IOException) JsonParser(com.fasterxml.jackson.core.JsonParser) PostConstruct(javax.annotation.PostConstruct)

Example 30 with DataSet

use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.

the class DataSetService method get.

/**
 * Returns the <b>full</b> data set content for given id.
 *
 * @param metadata If <code>true</code>, includes data set metadata information.
 * @param dataSetId A data set id.
 * @return The full data set.
 */
@RequestMapping(value = "/datasets/{id}/content", method = RequestMethod.GET)
@ApiOperation(value = "Get a data set by id", notes = "Get a data set content based on provided id. Id should be a UUID returned by the list operation. Not valid or non existing data set id returns empty content.")
@Timed
@ResponseBody
public Callable<DataSet> get(@RequestParam(defaultValue = "true") @ApiParam(name = "metadata", value = "Include metadata information in the response") boolean metadata, @RequestParam(defaultValue = "false") @ApiParam(name = "includeInternalContent", value = "Include internal content in the response") boolean includeInternalContent, @ApiParam(value = "Filter for retrieved content.") @RequestParam(value = "filter", defaultValue = "") String filter, @PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the requested data set") String dataSetId) {
    return () -> {
        final Marker marker = Markers.dataset(dataSetId);
        LOG.debug(marker, "Get data set #{}", dataSetId);
        try {
            DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId);
            assertDataSetMetadata(dataSetMetadata, dataSetId);
            // Build the result
            DataSet dataSet = new DataSet();
            if (metadata) {
                dataSet.setMetadata(conversionService.convert(dataSetMetadata, UserDataSetMetadata.class));
            }
            // Disable line limit
            Stream<DataSetRow> stream = contentStore.stream(dataSetMetadata, -1);
            if (!includeInternalContent) {
                LOG.debug("Skip internal content when serving data set #{} content.", dataSetId);
                stream = stream.map(r -> {
                    final Map<String, Object> values = r.values();
                    final Map<String, Object> filteredValues = new HashMap<>(values);
                    values.forEach((k, v) -> {
                        if (k != null && k.startsWith(FlagNames.INTERNAL_PROPERTY_PREFIX)) {
                            // Removes technical
                            // properties
                            // from returned
                            // values.
                            filteredValues.remove(k);
                        }
                    });
                    // Include TDP_ID anyway
                    filteredValues.put(FlagNames.TDP_ID, r.getTdpId());
                    return new DataSetRow(r.getRowMetadata(), filteredValues);
                });
            }
            // Filter content
            stream = stream.filter(filterService.build(URLDecoder.decode(filter, "UTF-8"), dataSetMetadata.getRowMetadata()));
            dataSet.setRecords(stream);
            return dataSet;
        } finally {
            LOG.debug(marker, "Get done.");
        }
    };
}
Also used : DataSet(org.talend.dataprep.api.dataset.DataSet) HashMap(java.util.HashMap) PipedInputStream(java.io.PipedInputStream) Stream(java.util.stream.Stream) PipedOutputStream(java.io.PipedOutputStream) NullOutputStream(org.apache.commons.io.output.NullOutputStream) StrictlyBoundedInputStream(org.talend.dataprep.dataset.store.content.StrictlyBoundedInputStream) OutputStream(java.io.OutputStream) InputStream(java.io.InputStream) Marker(org.slf4j.Marker) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow) Timed(org.talend.dataprep.metrics.Timed) ApiOperation(io.swagger.annotations.ApiOperation) RequestMapping(org.springframework.web.bind.annotation.RequestMapping) ResponseBody(org.springframework.web.bind.annotation.ResponseBody)

Aggregations

DataSet (org.talend.dataprep.api.dataset.DataSet)39 DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)18 Test (org.junit.Test)16 TDPException (org.talend.dataprep.exception.TDPException)15 JsonParser (com.fasterxml.jackson.core.JsonParser)13 InputStream (java.io.InputStream)13 RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)11 DataSetRow (org.talend.dataprep.api.dataset.row.DataSetRow)10 OutputStream (java.io.OutputStream)8 Logger (org.slf4j.Logger)8 DataSetGet (org.talend.dataprep.command.dataset.DataSetGet)8 Configuration (org.talend.dataprep.transformation.api.transformer.configuration.Configuration)8 ApiOperation (io.swagger.annotations.ApiOperation)7 IOException (java.io.IOException)7 ArrayList (java.util.ArrayList)7 LoggerFactory (org.slf4j.LoggerFactory)7 Autowired (org.springframework.beans.factory.annotation.Autowired)7 ServiceBaseTest (org.talend.ServiceBaseTest)7 ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)7 ContentCache (org.talend.dataprep.cache.ContentCache)7