use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.
the class CSVSchemaParser method parse.
/**
* @param request container with information needed to parse the raw data.
* @return
*/
@Override
public Schema parse(Request request) {
List<Schema.SheetContent> sheetContents = new ArrayList<>();
sheetContents.add(new Schema.SheetContent(META_KEY, new ArrayList<>()));
try {
final DataSetMetadata metadata = request.getMetadata();
final Map<String, String> parameters = guess(request, metadata.getEncoding());
metadata.getContent().setParameters(parameters);
List<String> header = csvFormatUtils.retrieveHeader(parameters);
if (header == null || header.isEmpty()) {
throw new TDPException(DataSetErrorCodes.UNABLE_TO_READ_DATASET_CONTENT);
}
LOGGER.debug("Columns found: {}", header);
// By default, consider all columns as Strings (to be refined by deeper analysis).
LOGGER.debug("Setting default type for columns...");
int i = 0;
for (String column : header) {
//
sheetContents.stream().filter(sheetContent -> META_KEY.equals(sheetContent.getName())).findFirst().get().getColumnMetadatas().add(column().id(i++).name(column).type(Type.STRING).build());
}
} catch (Exception e) {
throw new TDPException(CommonErrorCodes.UNABLE_TO_READ_CONTENT, e);
}
return //
Schema.Builder.parserResult().sheetContents(//
sheetContents).draft(false).build();
}
use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.
the class CSVSerializer method serialize.
@Override
public InputStream serialize(InputStream rawContent, DataSetMetadata metadata, long limit) {
try {
PipedInputStream pipe = new PipedInputStream();
PipedOutputStream jsonOutput = new PipedOutputStream(pipe);
// Serialize asynchronously for better performance (especially if caller doesn't consume all, see sampling).
Runnable r = () -> {
final Map<String, String> parameters = metadata.getContent().getParameters();
final String separator = parameters.get(CSVFormatFamily.SEPARATOR_PARAMETER);
final char actualSeparator = separator.charAt(0);
final char textEnclosureChar = getFromParameters(parameters, TEXT_ENCLOSURE_CHAR, defaultTextEnclosure);
final char escapeChar = getFromParameters(parameters, CSVFormatFamily.ESCAPE_CHAR, defaultEscapeChar);
try (InputStreamReader input = new InputStreamReader(rawContent, metadata.getEncoding());
CSVReader reader = new CSVReader(input, actualSeparator, textEnclosureChar, escapeChar)) {
JsonGenerator generator = new JsonFactory().createGenerator(jsonOutput);
int i = 0;
while (i++ < metadata.getContent().getNbLinesInHeader()) {
// Skip all header lines
reader.readNext();
}
generator.writeStartArray();
writeLineContent(reader, metadata, generator, separator, limit);
generator.writeEndArray();
generator.flush();
} catch (Exception e) {
// Consumer may very well interrupt consumption of stream (in case of limit(n) use for sampling).
// This is not an issue as consumer is allowed to partially consumes results, it's up to the
// consumer to ensure data it consumed is consistent.
LOGGER.debug("Unable to continue serialization for {}. Skipping remaining content.", metadata.getId(), e);
} finally {
try {
jsonOutput.close();
} catch (IOException e) {
LOGGER.error("Unable to close output", e);
}
}
};
executor.execute(r);
return pipe;
} catch (IOException e) {
throw new TDPException(CommonErrorCodes.UNABLE_TO_SERIALIZE_TO_JSON, e);
}
}
use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.
the class HtmlSerializer method serialize.
@Override
public InputStream serialize(InputStream rawContent, DataSetMetadata metadata, long limit) {
try {
PipedInputStream pipe = new PipedInputStream();
PipedOutputStream jsonOutput = new PipedOutputStream(pipe);
Runnable r = () -> deserialize(rawContent, metadata, jsonOutput, limit);
executor.execute(r);
return pipe;
} catch (IOException e) {
throw new TDPException(CommonErrorCodes.UNABLE_TO_SERIALIZE_TO_JSON, e);
}
}
use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.
the class FileSystemPreparationRepository method add.
/**
* @see PreparationRepository#add(Identifiable)
*/
@Override
public void add(Identifiable object) {
// defensive programming
if (object == null) {
LOG.warn("cannot save null...");
return;
}
final File outputFile = getIdentifiableFile(object);
try {
FileUtils.touch(outputFile);
} catch (IOException e) {
LOG.error("Unable to prepare file for {}.", object, e);
}
try (GZIPOutputStream output = new GZIPOutputStream(new FileOutputStream(outputFile))) {
mapper.writer().writeValue(output, object);
} catch (IOException e) {
LOG.error("Error saving {}", object, e);
throw new TDPException(CommonErrorCodes.UNABLE_TO_SAVE_PREPARATION, e, ExceptionContext.build().put("id", object.id()));
}
LOG.debug("{} #{} saved", object.getClass().getSimpleName(), object.id());
}
use of org.talend.dataprep.exception.TDPException in project data-prep by Talend.
the class SearchDataSets method onExecute.
private HttpRequestBase onExecute(final String name, final boolean strict) {
try {
URIBuilder uriBuilder = new URIBuilder(datasetServiceUrl + "/datasets/search");
uriBuilder.addParameter("name", name);
uriBuilder.addParameter("strict", String.valueOf(strict));
return new HttpGet(uriBuilder.build());
} catch (URISyntaxException e) {
throw new TDPException(UNEXPECTED_EXCEPTION, e);
}
}
Aggregations