use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.
the class FormatAnalysis method parseColumnNameInformation.
/**
* Parse and store column name information.
*
* @param dataSetId the dataset id
* @param metadata the dataset metadata to parse
* @param format the format
*/
private void parseColumnNameInformation(String dataSetId, DataSetMetadata metadata, Format format) {
final Marker marker = Markers.dataset(dataSetId);
LOG.debug(marker, "Parsing column information...");
try (InputStream content = store.getAsRaw(metadata, 10)) {
SchemaParser parser = format.getFormatFamily().getSchemaGuesser();
Schema schema = parser.parse(new SchemaParser.Request(content, metadata));
metadata.setSheetName(schema.getSheetName());
metadata.setDraft(schema.draft());
if (schema.draft()) {
metadata.setSchemaParserResult(schema);
repository.save(metadata);
LOG.info(Markers.dataset(dataSetId), "format analysed");
return;
}
if (schema.getSheetContents().isEmpty()) {
throw new IOException("Parser could not detect file format for " + metadata.getId());
}
metadata.getRowMetadata().setColumns(schema.getSheetContents().get(0).getColumnMetadatas());
} catch (IOException e) {
throw new TDPException(DataSetErrorCodes.UNABLE_TO_READ_DATASET_CONTENT, e);
}
LOG.debug(marker, "Parsed column information.");
}
use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.
the class FormatAnalysis method verifyFormat.
/**
* Checks for format validity. Clean up and throw exception if the format is null or unsupported.
*
* @param detectedFormat the detected format of the dataset
*/
private void verifyFormat(Format detectedFormat) {
TDPException hypotheticalException = null;
Set<Charset> supportedEncodings = EncodingSupport.getSupportedCharsets();
if (detectedFormat == null || UnsupportedFormatFamily.class.isAssignableFrom(detectedFormat.getFormatFamily().getClass())) {
hypotheticalException = new TDPException(DataSetErrorCodes.UNSUPPORTED_CONTENT);
} else if (!supportedEncodings.contains(Charset.forName(detectedFormat.getEncoding()))) {
hypotheticalException = new TDPException(DataSetErrorCodes.UNSUPPORTED_ENCODING);
}
if (hypotheticalException != null) {
// Throw exception to indicate unsupported content
throw hypotheticalException;
}
}
use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.
the class CSVSchemaParser method guessSeparator.
/**
* Try to guess the separator of a CSV formatted input stream or use the specified forced separator.
*
* @param is the input stream to read the CSV from
* @param encoding the encoding to use for the reading
* @param forcedSeparator if the separator is forced
* @return the guessed CSV separator or null if none found
*/
private Separator guessSeparator(InputStream is, String encoding, Optional<Character> forcedSeparator) {
try (CSVStreamReader csvStreamReader = new CSVStreamReader(is, encoding, SIZE_LIMIT, LINE_LIMIT)) {
Map<Character, Separator> separatorMap = new HashMap<>();
String line;
List<String> sampleLines = new ArrayList<>();
final List<Character> validSepartors;
validSepartors = forcedSeparator.map(Collections::singletonList).orElse(DEFAULT_VALID_SEPARATORS);
while ((line = csvStreamReader.readLine()) != null) {
if (!line.isEmpty() && sampleLines.size() < SMALL_SAMPLE_LIMIT) {
sampleLines.add(line);
}
processLine(line, separatorMap, validSepartors, csvStreamReader.getLineCount());
}
return chooseSeparator(new ArrayList<>(separatorMap.values()), csvStreamReader.getLineCount(), sampleLines, forcedSeparator);
} catch (IOException e) {
throw new TDPException(CommonErrorCodes.UNABLE_TO_READ_CONTENT, e);
} catch (Exception e) {
LOGGER.debug("Unable to read content from content using encoding '{}'.", encoding, e);
return null;
}
}
use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.
the class HtmlSchemaParser method parse.
/**
* @see SchemaParser#parse(Request)
*/
@Override
public Schema parse(Request request) {
try {
SimpleHeadersContentHandler headersContentHandler = new SimpleHeadersContentHandler();
InputStream inputStream = request.getContent();
HtmlParser htmlParser = new HtmlParser();
Metadata metadata = new Metadata();
htmlParser.parse(inputStream, headersContentHandler, metadata, new ParseContext());
List<ColumnMetadata> columns = new ArrayList<>(headersContentHandler.getHeaderValues().size());
for (String headerValue : headersContentHandler.getHeaderValues()) {
columns.add(//
ColumnMetadata.Builder.column().type(// ATM not doing any complicated type calculation
Type.STRING).name(//
headerValue).id(//
columns.size()).build());
}
Schema.SheetContent sheetContent = new Schema.SheetContent();
sheetContent.setColumnMetadatas(columns);
return //
Schema.Builder.parserResult().sheetContents(//
Collections.singletonList(sheetContent)).draft(//
false).build();
} catch (Exception e) {
LOGGER.debug("Exception during parsing html request :" + e.getMessage(), e);
throw new TDPException(CommonErrorCodes.UNEXPECTED_EXCEPTION, e);
}
}
use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.
the class AESEncryption method decrypt.
/**
* Decrypts the specified string (which is supposed to be encrypted) and returns its original value.
*
* @param src the specified {@link String}
* @return the decrypted value of the specified {@link String}
* @throws GeneralSecurityException In case of security-related issues.
*/
public static String decrypt(final String src) throws GeneralSecurityException {
final Cipher c = Cipher.getInstance(ALGO);
c.init(Cipher.DECRYPT_MODE, secretKey);
final byte[] decodedValue = Base64.getDecoder().decode(src);
final byte[] decValue = c.doFinal(decodedValue);
try {
return new String(decValue, ENCODING);
} catch (UnsupportedEncodingException e) {
throw new TDPException(CommonErrorCodes.UNEXPECTED_EXCEPTION, e);
}
}
Aggregations