Search in sources :

Example 71 with TDPException

use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.

the class FormatAnalysis method parseColumnNameInformation.

/**
 * Parse and store column name information.
 *
 * @param dataSetId the dataset id
 * @param metadata the dataset metadata to parse
 * @param format the format
 */
private void parseColumnNameInformation(String dataSetId, DataSetMetadata metadata, Format format) {
    final Marker marker = Markers.dataset(dataSetId);
    LOG.debug(marker, "Parsing column information...");
    try (InputStream content = store.getAsRaw(metadata, 10)) {
        SchemaParser parser = format.getFormatFamily().getSchemaGuesser();
        Schema schema = parser.parse(new SchemaParser.Request(content, metadata));
        metadata.setSheetName(schema.getSheetName());
        metadata.setDraft(schema.draft());
        if (schema.draft()) {
            metadata.setSchemaParserResult(schema);
            repository.save(metadata);
            LOG.info(Markers.dataset(dataSetId), "format analysed");
            return;
        }
        if (schema.getSheetContents().isEmpty()) {
            throw new IOException("Parser could not detect file format for " + metadata.getId());
        }
        metadata.getRowMetadata().setColumns(schema.getSheetContents().get(0).getColumnMetadatas());
    } catch (IOException e) {
        throw new TDPException(DataSetErrorCodes.UNABLE_TO_READ_DATASET_CONTENT, e);
    }
    LOG.debug(marker, "Parsed column information.");
}
Also used : TDPException(org.talend.dataprep.exception.TDPException) SequenceInputStream(java.io.SequenceInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) Marker(org.slf4j.Marker) IOException(java.io.IOException)

Example 72 with TDPException

use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.

the class FormatAnalysis method verifyFormat.

/**
 * Checks for format validity. Clean up and throw exception if the format is null or unsupported.
 *
 * @param detectedFormat the detected format of the dataset
 */
private void verifyFormat(Format detectedFormat) {
    TDPException hypotheticalException = null;
    Set<Charset> supportedEncodings = EncodingSupport.getSupportedCharsets();
    if (detectedFormat == null || UnsupportedFormatFamily.class.isAssignableFrom(detectedFormat.getFormatFamily().getClass())) {
        hypotheticalException = new TDPException(DataSetErrorCodes.UNSUPPORTED_CONTENT);
    } else if (!supportedEncodings.contains(Charset.forName(detectedFormat.getEncoding()))) {
        hypotheticalException = new TDPException(DataSetErrorCodes.UNSUPPORTED_ENCODING);
    }
    if (hypotheticalException != null) {
        // Throw exception to indicate unsupported content
        throw hypotheticalException;
    }
}
Also used : TDPException(org.talend.dataprep.exception.TDPException) Charset(java.nio.charset.Charset)

Example 73 with TDPException

use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.

the class CSVSchemaParser method guessSeparator.

/**
 * Try to guess the separator of a CSV formatted input stream or use the specified forced separator.
 *
 * @param is the input stream to read the CSV from
 * @param encoding the encoding to use for the reading
 * @param forcedSeparator if the separator is forced
 * @return the guessed CSV separator or null if none found
 */
private Separator guessSeparator(InputStream is, String encoding, Optional<Character> forcedSeparator) {
    try (CSVStreamReader csvStreamReader = new CSVStreamReader(is, encoding, SIZE_LIMIT, LINE_LIMIT)) {
        Map<Character, Separator> separatorMap = new HashMap<>();
        String line;
        List<String> sampleLines = new ArrayList<>();
        final List<Character> validSepartors;
        validSepartors = forcedSeparator.map(Collections::singletonList).orElse(DEFAULT_VALID_SEPARATORS);
        while ((line = csvStreamReader.readLine()) != null) {
            if (!line.isEmpty() && sampleLines.size() < SMALL_SAMPLE_LIMIT) {
                sampleLines.add(line);
            }
            processLine(line, separatorMap, validSepartors, csvStreamReader.getLineCount());
        }
        return chooseSeparator(new ArrayList<>(separatorMap.values()), csvStreamReader.getLineCount(), sampleLines, forcedSeparator);
    } catch (IOException e) {
        throw new TDPException(CommonErrorCodes.UNABLE_TO_READ_CONTENT, e);
    } catch (Exception e) {
        LOGGER.debug("Unable to read content from content using encoding '{}'.", encoding, e);
        return null;
    }
}
Also used : TDPException(org.talend.dataprep.exception.TDPException) TDPException(org.talend.dataprep.exception.TDPException)

Example 74 with TDPException

use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.

the class HtmlSchemaParser method parse.

/**
 * @see SchemaParser#parse(Request)
 */
@Override
public Schema parse(Request request) {
    try {
        SimpleHeadersContentHandler headersContentHandler = new SimpleHeadersContentHandler();
        InputStream inputStream = request.getContent();
        HtmlParser htmlParser = new HtmlParser();
        Metadata metadata = new Metadata();
        htmlParser.parse(inputStream, headersContentHandler, metadata, new ParseContext());
        List<ColumnMetadata> columns = new ArrayList<>(headersContentHandler.getHeaderValues().size());
        for (String headerValue : headersContentHandler.getHeaderValues()) {
            columns.add(// 
            ColumnMetadata.Builder.column().type(// ATM not doing any complicated type calculation
            Type.STRING).name(// 
            headerValue).id(// 
            columns.size()).build());
        }
        Schema.SheetContent sheetContent = new Schema.SheetContent();
        sheetContent.setColumnMetadatas(columns);
        return // 
        Schema.Builder.parserResult().sheetContents(// 
        Collections.singletonList(sheetContent)).draft(// 
        false).build();
    } catch (Exception e) {
        LOGGER.debug("Exception during parsing html request :" + e.getMessage(), e);
        throw new TDPException(CommonErrorCodes.UNEXPECTED_EXCEPTION, e);
    }
}
Also used : ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) InputStream(java.io.InputStream) Schema(org.talend.dataprep.schema.Schema) Metadata(org.apache.tika.metadata.Metadata) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) TDPException(org.talend.dataprep.exception.TDPException) TDPException(org.talend.dataprep.exception.TDPException) HtmlParser(org.apache.tika.parser.html.HtmlParser) ParseContext(org.apache.tika.parser.ParseContext)

Example 75 with TDPException

use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.

the class AESEncryption method decrypt.

/**
 * Decrypts the specified string (which is supposed to be encrypted) and returns its original value.
 *
 * @param src the specified {@link String}
 * @return the decrypted value of the specified {@link String}
 * @throws GeneralSecurityException In case of security-related issues.
 */
public static String decrypt(final String src) throws GeneralSecurityException {
    final Cipher c = Cipher.getInstance(ALGO);
    c.init(Cipher.DECRYPT_MODE, secretKey);
    final byte[] decodedValue = Base64.getDecoder().decode(src);
    final byte[] decValue = c.doFinal(decodedValue);
    try {
        return new String(decValue, ENCODING);
    } catch (UnsupportedEncodingException e) {
        throw new TDPException(CommonErrorCodes.UNEXPECTED_EXCEPTION, e);
    }
}
Also used : TDPException(org.talend.dataprep.exception.TDPException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) Cipher(javax.crypto.Cipher)

Aggregations

TDPException (org.talend.dataprep.exception.TDPException)123 IOException (java.io.IOException)43 InputStream (java.io.InputStream)25 DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)21 Test (org.junit.Test)17 ApiOperation (io.swagger.annotations.ApiOperation)16 Timed (org.talend.dataprep.metrics.Timed)14 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)13 DataSet (org.talend.dataprep.api.dataset.DataSet)13 ServiceBaseTest (org.talend.ServiceBaseTest)11 StringEntity (org.apache.http.entity.StringEntity)10 JsonParser (com.fasterxml.jackson.core.JsonParser)9 URISyntaxException (java.net.URISyntaxException)9 HttpPost (org.apache.http.client.methods.HttpPost)9 Autowired (org.springframework.beans.factory.annotation.Autowired)9 ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)9 List (java.util.List)8 URIBuilder (org.apache.http.client.utils.URIBuilder)8 Marker (org.slf4j.Marker)8 ErrorCode (org.talend.daikon.exception.error.ErrorCode)8