use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class OptimizedExportStrategy method performOptimizedTransform.
private void performOptimizedTransform(ExportParameters parameters, OutputStream outputStream) throws IOException {
// Initial check
final OptimizedPreparationInput optimizedPreparationInput = new OptimizedPreparationInput(parameters).invoke();
if (optimizedPreparationInput == null) {
throw new IllegalStateException("Unable to use this strategy (call accept() before calling this).");
}
final String preparationId = parameters.getPreparationId();
final String dataSetId = optimizedPreparationInput.getDataSetId();
final TransformationCacheKey transformationCacheKey = optimizedPreparationInput.getTransformationCacheKey();
final DataSetMetadata metadata = optimizedPreparationInput.getMetadata();
final String previousVersion = optimizedPreparationInput.getPreviousVersion();
final String version = optimizedPreparationInput.getVersion();
final ExportFormat format = getFormat(parameters.getExportType());
// Get content from previous step
try (JsonParser parser = mapper.getFactory().createParser(new InputStreamReader(contentCache.get(transformationCacheKey), UTF_8))) {
// Create dataset
final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
dataSet.setMetadata(metadata);
// get the actions to apply (no preparation ==> dataset export ==> no actions)
final String actions = getActions(preparationId, previousVersion, version);
final PreparationMessage preparation = getPreparation(preparationId);
preparation.setSteps(getMatchingSteps(preparation.getSteps(), previousVersion, version));
LOGGER.debug("Running optimized strategy for preparation {} @ step #{}", preparationId, version);
// create tee to broadcast to cache + service output
final TransformationCacheKey key = //
cacheKeyGenerator.generateContentKey(//
dataSetId, //
preparationId, //
version, //
parameters.getExportType(), //
parameters.getFrom(), //
parameters.getArguments(), //
parameters.getFilter());
LOGGER.debug("Cache key: " + key.getKey());
LOGGER.debug("Cache key details: " + key.toString());
try (final TeeOutputStream tee = new TeeOutputStream(outputStream, contentCache.put(key, ContentCache.TimeToLive.DEFAULT))) {
final Configuration configuration = //
Configuration.builder().args(//
parameters.getArguments()).outFilter(//
rm -> filterService.build(parameters.getFilter(), rm)).sourceType(parameters.getFrom()).format(//
format.getName()).actions(//
actions).preparation(//
preparation).stepId(//
version).volume(//
Configuration.Volume.SMALL).output(//
tee).limit(//
limit).build();
factory.get(configuration).buildExecutable(dataSet, configuration).execute();
tee.flush();
} catch (Throwable e) {
// NOSONAR
contentCache.evict(key);
throw e;
}
} catch (TDPException e) {
throw e;
} catch (Exception e) {
throw new TDPException(TransformationErrorCodes.UNABLE_TO_TRANSFORM_DATASET, e);
}
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class PreparationExportStrategyTest method setUp.
@Before
public void setUp() throws Exception {
// Given
mapper.registerModule(new Jdk8Module());
strategy.setMapper(new ObjectMapper());
when(formatRegistrationService.getByName(eq("JSON"))).thenReturn(new JsonFormat());
final DataSetGetMetadata dataSetGetMetadata = mock(DataSetGetMetadata.class);
when(applicationContext.getBean(eq(DataSetGetMetadata.class), anyVararg())).thenReturn(dataSetGetMetadata);
DataSetGet dataSetGet = mock(DataSetGet.class);
final StringWriter dataSetAsString = new StringWriter();
DataSet dataSet = new DataSet();
final DataSetMetadata dataSetMetadata = new DataSetMetadata("ds-1234", "", "", 0L, 0L, new RowMetadata(), "");
final DataSetContent content = new DataSetContent();
dataSetMetadata.setContent(content);
dataSet.setMetadata(dataSetMetadata);
dataSet.setRecords(Stream.empty());
mapper.writerFor(DataSet.class).writeValue(dataSetAsString, dataSet);
when(dataSetGet.execute()).thenReturn(new ByteArrayInputStream(dataSetAsString.toString().getBytes()));
when(applicationContext.getBean(eq(DataSetGet.class), anyVararg())).thenReturn(dataSetGet);
final PreparationGetActions preparationGetActions = mock(PreparationGetActions.class);
when(preparationGetActions.execute()).thenReturn(new ByteArrayInputStream("{}".getBytes()));
when(applicationContext.getBean(eq(PreparationGetActions.class), eq("prep-1234"), anyString())).thenReturn(preparationGetActions);
final TransformationCacheKey cacheKey = mock(TransformationCacheKey.class);
when(cacheKey.getKey()).thenReturn("cache-1234");
when(cacheKeyGenerator.generateContentKey(anyString(), anyString(), anyString(), anyString(), any(), any(), anyString())).thenReturn(cacheKey);
final ExecutableTransformer executableTransformer = mock(ExecutableTransformer.class);
reset(transformer);
when(transformer.buildExecutable(any(), any())).thenReturn(executableTransformer);
when(factory.get(any())).thenReturn(transformer);
when(contentCache.put(any(), any())).thenReturn(new NullOutputStream());
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class RenameDataSetsWithFolderPath method run.
/**
* @see UpgradeTask#run()
*/
@Override
public void run() {
final Stream<DataSetMetadata> datasets = dataSetMetadataRepository.list();
datasets.forEach(dataset -> {
final Folder folder = folderRepository.locateEntry(dataset.getId(), FolderContentType.DATASET);
// skip home folder
if (folder != null && !StringUtils.equals("/", folder.getPath())) {
String newName = dataset.getName() + " - " + StringUtils.strip(folder.getPath(), "/");
dataset.setName(newName);
dataSetMetadataRepository.save(dataset);
LOG.debug("dataset #{} renamed to {}", dataset.getId(), newName);
} else {
LOG.debug("dataset #{} not renamed since it's in the user home folder", dataset.getId());
}
});
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class FileSystemDataSetMetadataRepository method get.
/**
* @see DataSetMetadataRepository#get(String)
*/
@Override
public DataSetMetadata get(String id) {
final File file = getFile(id);
if (file.getName().startsWith(".")) {
LOG.info("Ignore hidden file {}", file.getName());
return null;
}
if (!file.exists()) {
LOG.info("dataset #{} not found in file system", id);
return null;
}
ReentrantReadWriteLock lock = locks.getLock(id);
lock.readLock().lock();
try (FileInputStream fileInputStream = new FileInputStream(file);
GZIPInputStream input = new GZIPInputStream(fileInputStream)) {
return mapper.readerFor(DataSetMetadata.class).readValue(input);
} catch (IOException e) {
LOG.error("unable to load dataset {}", id, e);
return null;
} finally {
lock.readLock().unlock();
}
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class InMemoryDataSetMetadataRepository method get.
/**
* @see DataSetMetadataRepository#get(String)
*/
@Override
public DataSetMetadata get(String id) {
DataSetMetadata dataSetMetadata = store.get(id);
if (dataSetMetadata == null) {
LOG.info("data set metadata #{} not found in the system", id);
return null;
}
resetTransientValues(dataSetMetadata);
return metadataBuilder.metadata().copy(dataSetMetadata).build();
}
Aggregations