Search in sources :

Example 86 with RowMetadata

use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.

the class DataSetService method preview.

/**
 * Returns preview of the the data set content for given id (first 100 rows). Service might return
 * {@link org.apache.http.HttpStatus#SC_ACCEPTED} if the data set exists but analysis is not yet fully
 * completed so content is not yet ready to be served.
 *
 * @param metadata If <code>true</code>, includes data set metadata information.
 * @param sheetName the sheet name to preview
 * @param dataSetId A data set id.
 */
@RequestMapping(value = "/datasets/{id}/preview", method = RequestMethod.GET)
@ApiOperation(value = "Get a data preview set by id", notes = "Get a data set preview content based on provided id. Not valid or non existing data set id returns empty content. Data set not in drat status will return a redirect 301")
@Timed
@ResponseBody
public DataSet preview(@RequestParam(defaultValue = "true") @ApiParam(name = "metadata", value = "Include metadata information in the response") boolean metadata, @RequestParam(defaultValue = "") @ApiParam(name = "sheetName", value = "Sheet name to preview") String sheetName, @PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the requested data set") String dataSetId) {
    DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId);
    if (dataSetMetadata == null) {
        HttpResponseContext.status(HttpStatus.NO_CONTENT);
        // No data set, returns empty content.
        return DataSet.empty();
    }
    if (!dataSetMetadata.isDraft()) {
        // Moved to get data set content operation
        HttpResponseContext.status(HttpStatus.MOVED_PERMANENTLY);
        HttpResponseContext.header("Location", "/datasets/" + dataSetId + "/content");
        // dataset not anymore a draft so preview doesn't make sense.
        return DataSet.empty();
    }
    if (StringUtils.isNotEmpty(sheetName)) {
        dataSetMetadata.setSheetName(sheetName);
    }
    // take care of previous data without schema parser result
    if (dataSetMetadata.getSchemaParserResult() != null) {
        // sheet not yet set correctly so use the first one
        if (StringUtils.isEmpty(dataSetMetadata.getSheetName())) {
            String theSheetName = dataSetMetadata.getSchemaParserResult().getSheetContents().get(0).getName();
            LOG.debug("preview for dataSetMetadata: {} with sheetName: {}", dataSetId, theSheetName);
            dataSetMetadata.setSheetName(theSheetName);
        }
        String theSheetName = dataSetMetadata.getSheetName();
        Optional<Schema.SheetContent> sheetContentFound = dataSetMetadata.getSchemaParserResult().getSheetContents().stream().filter(sheetContent -> theSheetName.equals(sheetContent.getName())).findFirst();
        if (!sheetContentFound.isPresent()) {
            HttpResponseContext.status(HttpStatus.NO_CONTENT);
            // No sheet found, returns empty content.
            return DataSet.empty();
        }
        List<ColumnMetadata> columnMetadatas = sheetContentFound.get().getColumnMetadatas();
        if (dataSetMetadata.getRowMetadata() == null) {
            dataSetMetadata.setRowMetadata(new RowMetadata(emptyList()));
        }
        dataSetMetadata.getRowMetadata().setColumns(columnMetadatas);
    } else {
        LOG.warn("dataset#{} has draft status but any SchemaParserResult");
    }
    // Build the result
    DataSet dataSet = new DataSet();
    if (metadata) {
        dataSet.setMetadata(conversionService.convert(dataSetMetadata, UserDataSetMetadata.class));
    }
    dataSet.setRecords(contentStore.stream(dataSetMetadata).limit(100));
    return dataSet;
}
Also used : VolumeMetered(org.talend.dataprep.metrics.VolumeMetered) RequestParam(org.springframework.web.bind.annotation.RequestParam) ImportBuilder(org.talend.dataprep.api.dataset.Import.ImportBuilder) FormatFamilyFactory(org.talend.dataprep.schema.FormatFamilyFactory) Autowired(org.springframework.beans.factory.annotation.Autowired) ApiParam(io.swagger.annotations.ApiParam) StringUtils(org.apache.commons.lang3.StringUtils) TEXT_PLAIN_VALUE(org.springframework.http.MediaType.TEXT_PLAIN_VALUE) SortAndOrderHelper.getDataSetMetadataComparator(org.talend.dataprep.util.SortAndOrderHelper.getDataSetMetadataComparator) Collections.singletonList(java.util.Collections.singletonList) SemanticDomain(org.talend.dataprep.api.dataset.statistics.SemanticDomain) BeanConversionService(org.talend.dataprep.conversions.BeanConversionService) PipedInputStream(java.io.PipedInputStream) DistributedLock(org.talend.dataprep.lock.DistributedLock) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) DataprepBundle.message(org.talend.dataprep.i18n.DataprepBundle.message) UserData(org.talend.dataprep.api.user.UserData) TaskExecutor(org.springframework.core.task.TaskExecutor) MAX_STORAGE_MAY_BE_EXCEEDED(org.talend.dataprep.exception.error.DataSetErrorCodes.MAX_STORAGE_MAY_BE_EXCEEDED) DataSet(org.talend.dataprep.api.dataset.DataSet) LocalStoreLocation(org.talend.dataprep.api.dataset.location.LocalStoreLocation) FormatFamily(org.talend.dataprep.schema.FormatFamily) Resource(javax.annotation.Resource) Set(java.util.Set) DatasetUpdatedEvent(org.talend.dataprep.dataset.event.DatasetUpdatedEvent) RestController(org.springframework.web.bind.annotation.RestController) QuotaService(org.talend.dataprep.dataset.store.QuotaService) Stream(java.util.stream.Stream) StreamSupport.stream(java.util.stream.StreamSupport.stream) FlagNames(org.talend.dataprep.api.dataset.row.FlagNames) UNEXPECTED_CONTENT(org.talend.dataprep.exception.error.CommonErrorCodes.UNEXPECTED_CONTENT) Analyzers(org.talend.dataquality.common.inference.Analyzers) DataSetLocatorService(org.talend.dataprep.api.dataset.location.locator.DataSetLocatorService) Callable(java.util.concurrent.Callable) Schema(org.talend.dataprep.schema.Schema) ArrayList(java.util.ArrayList) Value(org.springframework.beans.factory.annotation.Value) RequestBody(org.springframework.web.bind.annotation.RequestBody) DataSetLocationService(org.talend.dataprep.api.dataset.location.DataSetLocationService) AnalyzerService(org.talend.dataprep.quality.AnalyzerService) UserDataRepository(org.talend.dataprep.user.store.UserDataRepository) Markers(org.talend.dataprep.log.Markers) Api(io.swagger.annotations.Api) DraftValidator(org.talend.dataprep.schema.DraftValidator) HttpResponseContext(org.talend.dataprep.http.HttpResponseContext) Sort(org.talend.dataprep.util.SortAndOrderHelper.Sort) IOException(java.io.IOException) PipedOutputStream(java.io.PipedOutputStream) FormatAnalysis(org.talend.dataprep.dataset.service.analysis.synchronous.FormatAnalysis) ContentAnalysis(org.talend.dataprep.dataset.service.analysis.synchronous.ContentAnalysis) SchemaAnalysis(org.talend.dataprep.dataset.service.analysis.synchronous.SchemaAnalysis) HttpStatus(org.springframework.http.HttpStatus) FilterService(org.talend.dataprep.api.filter.FilterService) Marker(org.slf4j.Marker) NullOutputStream(org.apache.commons.io.output.NullOutputStream) StatisticsAdapter(org.talend.dataprep.dataset.StatisticsAdapter) Timed(org.talend.dataprep.metrics.Timed) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) PathVariable(org.springframework.web.bind.annotation.PathVariable) DataSetMetadataBuilder(org.talend.dataprep.dataset.DataSetMetadataBuilder) URLDecoder(java.net.URLDecoder) DataSetErrorCodes(org.talend.dataprep.exception.error.DataSetErrorCodes) PUT(org.springframework.web.bind.annotation.RequestMethod.PUT) LoggerFactory(org.slf4j.LoggerFactory) SEMANTIC(org.talend.dataprep.quality.AnalyzerService.Analysis.SEMANTIC) ApiOperation(io.swagger.annotations.ApiOperation) UNABLE_TO_CREATE_OR_UPDATE_DATASET(org.talend.dataprep.exception.error.DataSetErrorCodes.UNABLE_TO_CREATE_OR_UPDATE_DATASET) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow) StrictlyBoundedInputStream(org.talend.dataprep.dataset.store.content.StrictlyBoundedInputStream) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) UNSUPPORTED_CONTENT(org.talend.dataprep.exception.error.DataSetErrorCodes.UNSUPPORTED_CONTENT) TimeToLive(org.talend.dataprep.cache.ContentCache.TimeToLive) Order(org.talend.dataprep.util.SortAndOrderHelper.Order) Collections.emptyList(java.util.Collections.emptyList) PublicAPI(org.talend.dataprep.security.PublicAPI) RequestMethod(org.springframework.web.bind.annotation.RequestMethod) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) ContentCache(org.talend.dataprep.cache.ContentCache) INVALID_DATASET_NAME(org.talend.dataprep.exception.error.DataSetErrorCodes.INVALID_DATASET_NAME) List(java.util.List) Optional(java.util.Optional) Analyzer(org.talend.dataquality.common.inference.Analyzer) RequestHeader(org.springframework.web.bind.annotation.RequestHeader) Pattern(java.util.regex.Pattern) Security(org.talend.dataprep.security.Security) Spliterator(java.util.Spliterator) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) ComponentProperties(org.talend.dataprep.parameters.jsonschema.ComponentProperties) TDPException(org.talend.dataprep.exception.TDPException) JsonErrorCodeDescription(org.talend.dataprep.exception.json.JsonErrorCodeDescription) RequestMapping(org.springframework.web.bind.annotation.RequestMapping) UNABLE_CREATE_DATASET(org.talend.dataprep.exception.error.DataSetErrorCodes.UNABLE_CREATE_DATASET) HashMap(java.util.HashMap) GET(org.springframework.web.bind.annotation.RequestMethod.GET) Import(org.talend.dataprep.api.dataset.Import) ExceptionContext.build(org.talend.daikon.exception.ExceptionContext.build) ExceptionContext(org.talend.daikon.exception.ExceptionContext) Charset(java.nio.charset.Charset) UpdateColumnParameters(org.talend.dataprep.dataset.service.api.UpdateColumnParameters) VersionService(org.talend.dataprep.api.service.info.VersionService) POST(org.springframework.web.bind.annotation.RequestMethod.POST) OutputStream(java.io.OutputStream) DataSetLocation(org.talend.dataprep.api.dataset.DataSetLocation) Logger(org.slf4j.Logger) LocaleContextHolder.getLocale(org.springframework.context.i18n.LocaleContextHolder.getLocale) UpdateDataSetCacheKey(org.talend.dataprep.dataset.service.cache.UpdateDataSetCacheKey) IOUtils(org.apache.commons.compress.utils.IOUtils) APPLICATION_JSON_VALUE(org.springframework.http.MediaType.APPLICATION_JSON_VALUE) ResponseBody(org.springframework.web.bind.annotation.ResponseBody) Certification(org.talend.dataprep.api.dataset.DataSetGovernance.Certification) EncodingSupport(org.talend.dataprep.configuration.EncodingSupport) Comparator(java.util.Comparator) InputStream(java.io.InputStream) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) DataSet(org.talend.dataprep.api.dataset.DataSet) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Timed(org.talend.dataprep.metrics.Timed) ApiOperation(io.swagger.annotations.ApiOperation) RequestMapping(org.springframework.web.bind.annotation.RequestMapping) ResponseBody(org.springframework.web.bind.annotation.ResponseBody)

Example 87 with RowMetadata

use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.

the class ToPEPersistentIdentifiable method run.

@Override
public void run() {
    LOGGER.debug("starting upgrade from {} to {}.", Step.class, PersistentStep.class);
    final AtomicLong counter = new AtomicLong(0L);
    fileSystemPreparationRepository.list(Step.class).forEach(s -> {
        fileSystemPreparationRepository.remove(s);
        PersistentStep persistentStep = turnToPersistentStep(s);
        preparationRepository.add(persistentStep);
        LOGGER.debug("step {} updated to {}", s, persistentStep);
        counter.incrementAndGet();
    });
    LOGGER.info("Upgrade from {} to {} done, {} steps processed.", Step.class, PersistentStep.class, counter.get());
    LOGGER.debug("starting upgrade from {} to {}.", Preparation.class, PersistentPreparation.class);
    final Stream<Preparation> preparations = fileSystemPreparationRepository.list(Preparation.class);
    preparations.forEach(p -> {
        fileSystemPreparationRepository.remove(p);
        PersistentPreparation persistentPreparation = turnToPersistentPreparation(p);
        preparationRepository.add(persistentPreparation);
    });
    LOGGER.info("Upgrade from {} to {} done.", Preparation.class, PersistentPreparation.class);
    LOGGER.info("Migration of step ids in preparation...");
    final Stream<PersistentPreparation> persistentPreparations = preparationRepository.list(PersistentPreparation.class);
    persistentPreparations.forEach(p -> {
        LOGGER.info("Migration of preparation #{}", p.getId());
        final List<String> stepsIds = preparationUtils.listStepsIds(p.getHeadId(), preparationRepository);
        p.setSteps(stepsIds);
        final DataSetMetadata metadata = dataSetMetadataRepository.get(p.getDataSetId());
        if (metadata != null) {
            LOGGER.info("Set metadata {} in preparation {}.", p.getDataSetId(), p.getId());
            p.setRowMetadata(metadata.getRowMetadata());
        } else {
            LOGGER.info("Metadata {} not found for preparation {}.", p.getDataSetId(), p.getId());
            p.setRowMetadata(new RowMetadata());
        }
        preparationRepository.add(p);
        LOGGER.info("Migration of preparation #{} done ({} steps)", p.getId(), stepsIds.size());
    });
    LOGGER.info("Migration of step ids in preparation done.");
}
Also used : PersistentStep(org.talend.dataprep.preparation.store.PersistentStep) AtomicLong(java.util.concurrent.atomic.AtomicLong) Preparation(org.talend.dataprep.api.preparation.Preparation) PersistentPreparation(org.talend.dataprep.preparation.store.PersistentPreparation) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) PersistentStep(org.talend.dataprep.preparation.store.PersistentStep) Step(org.talend.dataprep.api.preparation.Step) PersistentPreparation(org.talend.dataprep.preparation.store.PersistentPreparation) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata)

Example 88 with RowMetadata

use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.

the class FillWithIntegerIfEmptyTest method test_apply_inplace.

@Test
public void test_apply_inplace() throws Exception {
    // given
    final Map<String, String> values = new HashMap<>();
    values.put("0000", "David Bowie");
    values.put("0001", "");
    values.put("0002", "Something");
    final DataSetRow row = new DataSetRow(values);
    final RowMetadata rowMetadata = row.getRowMetadata();
    rowMetadata.getById("0001").setType(Type.INTEGER.getName());
    Map<String, String> parameters = // 
    ActionMetadataTestUtils.parseParameters(this.getClass().getResourceAsStream("fillEmptyIntegerAction.json"));
    // when
    ActionTestWorkbench.test(row, actionRegistry, factory.create(action, parameters));
    // then
    Assert.assertEquals("25", row.get("0001"));
    Assert.assertEquals("David Bowie", row.get("0000"));
}
Also used : HashMap(java.util.HashMap) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow) Test(org.junit.Test) AbstractMetadataBaseTest(org.talend.dataprep.transformation.actions.AbstractMetadataBaseTest)

Example 89 with RowMetadata

use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.

the class FillWithIntegerIfEmptyTest method should_fill_empty_integer_other_column.

@Test
public void should_fill_empty_integer_other_column() throws Exception {
    // given
    final Map<String, String> values = new HashMap<>();
    values.put("0000", "David Bowie");
    values.put("0001", "");
    values.put("0002", "10");
    final DataSetRow row = new DataSetRow(values);
    final RowMetadata rowMetadata = row.getRowMetadata();
    rowMetadata.getById("0001").setType(Type.INTEGER.getName());
    rowMetadata.getById("0002").setType(Type.INTEGER.getName());
    Map<String, String> parameters = // 
    ActionMetadataTestUtils.parseParameters(this.getClass().getResourceAsStream("fillEmptyIntegerAction.json"));
    // when
    parameters.put(FillIfEmpty.MODE_PARAMETER, FillIfEmpty.OTHER_COLUMN_MODE);
    parameters.put(FillIfEmpty.SELECTED_COLUMN_PARAMETER, "0002");
    ActionTestWorkbench.test(row, actionRegistry, factory.create(action, parameters));
    // then
    Assert.assertEquals("10", row.get("0001"));
    Assert.assertEquals("David Bowie", row.get("0000"));
}
Also used : HashMap(java.util.HashMap) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow) Test(org.junit.Test) AbstractMetadataBaseTest(org.talend.dataprep.transformation.actions.AbstractMetadataBaseTest)

Example 90 with RowMetadata

use of org.talend.dataprep.api.dataset.RowMetadata in project data-prep by Talend.

the class FillWithIntegerTest method should_not_fill_empty_integer.

@Test
public void should_not_fill_empty_integer() throws Exception {
    // given
    final Map<String, String> values = new HashMap<>();
    values.put("0000", "David Bowie");
    values.put("0001", "not empty");
    values.put("0002", "Something");
    final DataSetRow row = new DataSetRow(values);
    final RowMetadata rowMetadata = row.getRowMetadata();
    rowMetadata.getById("0001").setType(Type.INTEGER.getName());
    Map<String, String> parameters = // 
    ActionMetadataTestUtils.parseParameters(this.getClass().getResourceAsStream("fillEmptyIntegerAction.json"));
    // when
    ActionTestWorkbench.test(row, actionRegistry, factory.create(action, parameters));
    // then
    Assert.assertEquals("25", row.get("0001"));
    Assert.assertEquals("David Bowie", row.get("0000"));
}
Also used : HashMap(java.util.HashMap) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow) Test(org.junit.Test) AbstractMetadataBaseTest(org.talend.dataprep.transformation.actions.AbstractMetadataBaseTest)

Aggregations

RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)199 Test (org.junit.Test)130 DataSetRow (org.talend.dataprep.api.dataset.row.DataSetRow)112 ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)87 AbstractMetadataBaseTest (org.talend.dataprep.transformation.actions.AbstractMetadataBaseTest)68 HashMap (java.util.HashMap)48 ActionContext (org.talend.dataprep.transformation.api.action.context.ActionContext)21 DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)20 RunnableAction (org.talend.dataprep.transformation.actions.common.RunnableAction)19 TransformationContext (org.talend.dataprep.transformation.api.action.context.TransformationContext)18 ArrayList (java.util.ArrayList)16 DataSet (org.talend.dataprep.api.dataset.DataSet)11 List (java.util.List)9 Statistics (org.talend.dataprep.api.dataset.statistics.Statistics)9 Before (org.junit.Before)7 SemanticDomain (org.talend.dataprep.api.dataset.statistics.SemanticDomain)6 Preparation (org.talend.dataprep.api.preparation.Preparation)6 Response (com.jayway.restassured.response.Response)5 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 Logger (org.slf4j.Logger)5