use of org.talend.dataprep.api.dataset.DataSetContent in project data-prep by Talend.
the class DataSetContentStore method get.
/**
* Returns the {@link DataSetMetadata data set} content as <b>JSON</b> format. Whether data set content was JSON or
* not, method is expected to provide a JSON output. It's up to the implementation to:
* <ul>
* <li>Convert data content to JSON.</li>
* <li>Throw an exception if data set is not ready for read (content type missing).</li>
* </ul>
* Implementations are also encouraged to implement method with no blocking code.
*
* @param dataSetMetadata The {@link DataSetMetadata data set} to read content from.
* @param limit A limit to pass to content supplier (use -1 for "no limit). Used as parameter for both raw content supplier
* and JSON serializer.
* @return A valid <b>JSON</b> stream. It is a JSON array where each element in the array contains a single data set
* row (it does not mean there's a line in input stream per data set row, a data set row might be split on multiple
* rows in stream).
*/
protected InputStream get(DataSetMetadata dataSetMetadata, long limit) {
DataSetContent content = dataSetMetadata.getContent();
Serializer serializer = factory.getFormatFamily(content.getFormatFamilyId()).getSerializer();
return serializer.serialize(getAsRaw(dataSetMetadata, limit), dataSetMetadata, limit);
}
use of org.talend.dataprep.api.dataset.DataSetContent in project data-prep by Talend.
the class PreparationExportStrategyTest method setUp.
@Before
public void setUp() throws Exception {
// Given
mapper.registerModule(new Jdk8Module());
strategy.setMapper(new ObjectMapper());
when(formatRegistrationService.getByName(eq("JSON"))).thenReturn(new JsonFormat());
final DataSetGetMetadata dataSetGetMetadata = mock(DataSetGetMetadata.class);
when(applicationContext.getBean(eq(DataSetGetMetadata.class), anyVararg())).thenReturn(dataSetGetMetadata);
DataSetGet dataSetGet = mock(DataSetGet.class);
final StringWriter dataSetAsString = new StringWriter();
DataSet dataSet = new DataSet();
final DataSetMetadata dataSetMetadata = new DataSetMetadata("ds-1234", "", "", 0L, 0L, new RowMetadata(), "");
final DataSetContent content = new DataSetContent();
dataSetMetadata.setContent(content);
dataSet.setMetadata(dataSetMetadata);
dataSet.setRecords(Stream.empty());
mapper.writerFor(DataSet.class).writeValue(dataSetAsString, dataSet);
when(dataSetGet.execute()).thenReturn(new ByteArrayInputStream(dataSetAsString.toString().getBytes()));
when(applicationContext.getBean(eq(DataSetGet.class), anyVararg())).thenReturn(dataSetGet);
final PreparationGetActions preparationGetActions = mock(PreparationGetActions.class);
when(preparationGetActions.execute()).thenReturn(new ByteArrayInputStream("{}".getBytes()));
when(applicationContext.getBean(eq(PreparationGetActions.class), eq("prep-1234"), anyString())).thenReturn(preparationGetActions);
final TransformationCacheKey cacheKey = mock(TransformationCacheKey.class);
when(cacheKey.getKey()).thenReturn("cache-1234");
when(cacheKeyGenerator.generateContentKey(anyString(), anyString(), anyString(), anyString(), any(), any(), anyString())).thenReturn(cacheKey);
final ExecutableTransformer executableTransformer = mock(ExecutableTransformer.class);
reset(transformer);
when(transformer.buildExecutable(any(), any())).thenReturn(executableTransformer);
when(factory.get(any())).thenReturn(transformer);
when(contentCache.put(any(), any())).thenReturn(new NullOutputStream());
}
use of org.talend.dataprep.api.dataset.DataSetContent in project data-prep by Talend.
the class ContentAnalysis method updateHeaderAndFooter.
/**
* Update the header and footer information in the dataset metadata.
*
* @param metadata the dataset metadata to update.
*/
private void updateHeaderAndFooter(DataSetMetadata metadata) {
DataSetContent datasetContent = metadata.getContent();
// parameters
final Map<String, String> parameters = metadata.getContent().getParameters();
int headerNBLines = 1;
try {
headerNBLines = Integer.parseInt(parameters.get(CSVFormatFamily.HEADER_NB_LINES_PARAMETER));
} catch (NumberFormatException e) {
LOG.info("No header information for {}, let's use the first line as header.", metadata.getId());
}
datasetContent.setNbLinesInHeader(headerNBLines);
datasetContent.setNbLinesInFooter(0);
}
use of org.talend.dataprep.api.dataset.DataSetContent in project data-prep by Talend.
the class FormatAnalysis method internalUpdateMetadata.
/**
* Update the given dataset metadata with the specified format.
*
* @param metadata the dataset metadata to update.
* @param format the specified format used to update the dataset metadata
*/
private void internalUpdateMetadata(DataSetMetadata metadata, Format format) {
FormatFamily formatFamily = format.getFormatFamily();
DataSetContent dataSetContent = metadata.getContent();
final String mediaType = metadata.getLocation().toMediaType(format.getFormatFamily());
dataSetContent.setFormatFamilyId(formatFamily.getBeanId());
dataSetContent.setMediaType(mediaType);
metadata.setEncoding(format.getEncoding());
parseColumnNameInformation(metadata.getId(), metadata, format);
repository.save(metadata);
}
use of org.talend.dataprep.api.dataset.DataSetContent in project data-prep by Talend.
the class DataSetJSONTest method testWrite1.
@Test
public void testWrite1() throws Exception {
final ColumnMetadata.Builder columnBuilder = //
ColumnMetadata.Builder.column().id(//
5).name(//
"column1").type(//
Type.STRING).empty(//
0).invalid(//
10).valid(50);
DataSetMetadata metadata = metadataBuilder.metadata().id("1234").name("name").author("author").created(0).row(columnBuilder).build();
final DataSetContent content = metadata.getContent();
content.addParameter(CSVFormatFamily.SEPARATOR_PARAMETER, ",");
content.setFormatFamilyId(new CSVFormatFamily().getBeanId());
content.setMediaType("text/csv");
metadata.getLifecycle().qualityAnalyzed(true);
metadata.getLifecycle().schemaAnalyzed(true);
LocalStoreLocation location = new LocalStoreLocation();
metadata.setLocation(location);
StringWriter writer = new StringWriter();
DataSet dataSet = new DataSet();
dataSet.setMetadata(metadata);
to(dataSet, writer);
assertThat(writer.toString(), sameJSONAsFile(DataSetJSONTest.class.getResourceAsStream("test2.json")));
}
Aggregations