use of org.kitesdk.data.DatasetIOException in project nifi by apache.
the class ConvertAvroSchema method onTrigger.
@Override
public void onTrigger(ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile incomingAvro = session.get();
if (incomingAvro == null) {
return;
}
String inputSchemaProperty = context.getProperty(INPUT_SCHEMA).evaluateAttributeExpressions(incomingAvro).getValue();
final Schema inputSchema;
try {
inputSchema = getSchema(inputSchemaProperty, DefaultConfiguration.get());
} catch (SchemaNotFoundException e) {
getLogger().error("Cannot find schema: " + inputSchemaProperty);
session.transfer(incomingAvro, FAILURE);
return;
}
String outputSchemaProperty = context.getProperty(OUTPUT_SCHEMA).evaluateAttributeExpressions(incomingAvro).getValue();
final Schema outputSchema;
try {
outputSchema = getSchema(outputSchemaProperty, DefaultConfiguration.get());
} catch (SchemaNotFoundException e) {
getLogger().error("Cannot find schema: " + outputSchemaProperty);
session.transfer(incomingAvro, FAILURE);
return;
}
final Map<String, String> fieldMapping = new HashMap<>();
for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
if (entry.getKey().isDynamic()) {
fieldMapping.put(entry.getKey().getName(), entry.getValue());
}
}
// Set locale
final String localeProperty = context.getProperty(LOCALE).getValue();
final Locale locale = localeProperty.equals(DEFAULT_LOCALE_VALUE) ? Locale.getDefault() : LocaleUtils.toLocale(localeProperty);
final AvroRecordConverter converter = new AvroRecordConverter(inputSchema, outputSchema, fieldMapping, locale);
final DataFileWriter<Record> writer = new DataFileWriter<>(AvroUtil.newDatumWriter(outputSchema, Record.class));
writer.setCodec(getCodecFactory(context.getProperty(COMPRESSION_TYPE).getValue()));
final DataFileWriter<Record> failureWriter = new DataFileWriter<>(AvroUtil.newDatumWriter(outputSchema, Record.class));
failureWriter.setCodec(getCodecFactory(context.getProperty(COMPRESSION_TYPE).getValue()));
try {
final AtomicLong written = new AtomicLong(0L);
final FailureTracker failures = new FailureTracker();
final List<Record> badRecords = Lists.newLinkedList();
FlowFile incomingAvroCopy = session.clone(incomingAvro);
FlowFile outgoingAvro = session.write(incomingAvro, new StreamCallback() {
@Override
public void process(InputStream in, OutputStream out) throws IOException {
try (DataFileStream<Record> stream = new DataFileStream<Record>(in, new GenericDatumReader<Record>(converter.getInputSchema()))) {
try (DataFileWriter<Record> w = writer.create(outputSchema, out)) {
for (Record record : stream) {
try {
Record converted = converter.convert(record);
w.append(converted);
written.incrementAndGet();
} catch (AvroConversionException e) {
failures.add(e);
getLogger().error("Error converting data: " + e.getMessage());
badRecords.add(record);
}
}
}
}
}
});
FlowFile badOutput = session.write(incomingAvroCopy, new StreamCallback() {
@Override
public void process(InputStream in, OutputStream out) throws IOException {
try (DataFileWriter<Record> w = failureWriter.create(inputSchema, out)) {
for (Record record : badRecords) {
w.append(record);
}
}
}
});
long errors = failures.count();
// update only if file transfer is successful
session.adjustCounter("Converted records", written.get(), false);
// update only if file transfer is successful
session.adjustCounter("Conversion errors", errors, false);
if (written.get() > 0L) {
session.transfer(outgoingAvro, SUCCESS);
} else {
session.remove(outgoingAvro);
if (errors == 0L) {
badOutput = session.putAttribute(badOutput, "errors", "No incoming records");
session.transfer(badOutput, FAILURE);
}
}
if (errors > 0L) {
getLogger().warn("Failed to convert {}/{} records between Avro Schemas", new Object[] { errors, errors + written.get() });
badOutput = session.putAttribute(badOutput, "errors", failures.summary());
session.transfer(badOutput, FAILURE);
} else {
session.remove(badOutput);
}
} catch (ProcessException | DatasetIOException e) {
getLogger().error("Failed reading or writing", e);
session.transfer(incomingAvro, FAILURE);
} catch (DatasetException e) {
getLogger().error("Failed to read FlowFile", e);
session.transfer(incomingAvro, FAILURE);
} finally {
try {
writer.close();
} catch (IOException e) {
getLogger().warn("Unable to close writer ressource", e);
}
try {
failureWriter.close();
} catch (IOException e) {
getLogger().warn("Unable to close writer ressource", e);
}
}
}
use of org.kitesdk.data.DatasetIOException in project nifi by apache.
the class ConvertCSVToAvro method onTrigger.
@Override
public void onTrigger(ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile incomingCSV = session.get();
if (incomingCSV == null) {
return;
}
CSVProperties props = new CSVProperties.Builder().charset(context.getProperty(CHARSET).evaluateAttributeExpressions(incomingCSV).getValue()).delimiter(context.getProperty(DELIMITER).evaluateAttributeExpressions(incomingCSV).getValue()).quote(context.getProperty(QUOTE).evaluateAttributeExpressions(incomingCSV).getValue()).escape(context.getProperty(ESCAPE).evaluateAttributeExpressions(incomingCSV).getValue()).hasHeader(context.getProperty(HAS_HEADER).evaluateAttributeExpressions(incomingCSV).asBoolean()).linesToSkip(context.getProperty(LINES_TO_SKIP).evaluateAttributeExpressions(incomingCSV).asInteger()).build();
String schemaProperty = context.getProperty(SCHEMA).evaluateAttributeExpressions(incomingCSV).getValue();
final Schema schema;
try {
schema = getSchema(schemaProperty, DefaultConfiguration.get());
} catch (SchemaNotFoundException e) {
getLogger().error("Cannot find schema: " + schemaProperty);
session.transfer(incomingCSV, FAILURE);
return;
}
try (final DataFileWriter<Record> writer = new DataFileWriter<>(AvroUtil.newDatumWriter(schema, Record.class))) {
writer.setCodec(getCodecFactory(context.getProperty(COMPRESSION_TYPE).getValue()));
try {
final AtomicLong written = new AtomicLong(0L);
final FailureTracker failures = new FailureTracker();
FlowFile badRecords = session.clone(incomingCSV);
FlowFile outgoingAvro = session.write(incomingCSV, new StreamCallback() {
@Override
public void process(InputStream in, OutputStream out) throws IOException {
try (CSVFileReader<Record> reader = new CSVFileReader<>(in, props, schema, Record.class)) {
reader.initialize();
try (DataFileWriter<Record> w = writer.create(schema, out)) {
while (reader.hasNext()) {
try {
Record record = reader.next();
w.append(record);
written.incrementAndGet();
} catch (DatasetRecordException e) {
failures.add(e);
}
}
}
}
}
});
long errors = failures.count();
session.adjustCounter("Converted records", written.get(), false);
session.adjustCounter("Conversion errors", errors, false);
if (written.get() > 0L) {
session.transfer(outgoingAvro, SUCCESS);
if (errors > 0L) {
getLogger().warn("Failed to convert {}/{} records from CSV to Avro", new Object[] { errors, errors + written.get() });
badRecords = session.putAttribute(badRecords, "errors", failures.summary());
session.transfer(badRecords, INCOMPATIBLE);
} else {
session.remove(badRecords);
}
} else {
session.remove(outgoingAvro);
if (errors > 0L) {
getLogger().warn("Failed to convert {}/{} records from CSV to Avro", new Object[] { errors, errors });
badRecords = session.putAttribute(badRecords, "errors", failures.summary());
} else {
badRecords = session.putAttribute(badRecords, "errors", "No incoming records");
}
session.transfer(badRecords, FAILURE);
}
} catch (ProcessException | DatasetIOException e) {
getLogger().error("Failed reading or writing", e);
session.transfer(incomingCSV, FAILURE);
} catch (DatasetException e) {
getLogger().error("Failed to read FlowFile", e);
session.transfer(incomingCSV, FAILURE);
}
} catch (final IOException ioe) {
throw new RuntimeException("Unable to close Avro Writer", ioe);
}
}
use of org.kitesdk.data.DatasetIOException in project nifi by apache.
the class ConvertJSONToAvro method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile incomingJSON = session.get();
if (incomingJSON == null) {
return;
}
String schemaProperty = context.getProperty(SCHEMA).evaluateAttributeExpressions(incomingJSON).getValue();
final Schema schema;
try {
schema = getSchema(schemaProperty, DefaultConfiguration.get());
} catch (SchemaNotFoundException e) {
getLogger().error("Cannot find schema: " + schemaProperty);
session.transfer(incomingJSON, FAILURE);
return;
}
final DataFileWriter<Record> writer = new DataFileWriter<>(AvroUtil.newDatumWriter(schema, Record.class));
writer.setCodec(getCodecFactory(context.getProperty(COMPRESSION_TYPE).getValue()));
try {
final AtomicLong written = new AtomicLong(0L);
final FailureTracker failures = new FailureTracker();
FlowFile badRecords = session.clone(incomingJSON);
FlowFile outgoingAvro = session.write(incomingJSON, new StreamCallback() {
@Override
public void process(InputStream in, OutputStream out) throws IOException {
try (JSONFileReader<Record> reader = new JSONFileReader<>(in, schema, Record.class)) {
reader.initialize();
try (DataFileWriter<Record> w = writer.create(schema, out)) {
while (reader.hasNext()) {
try {
Record record = reader.next();
w.append(record);
written.incrementAndGet();
} catch (final DatasetRecordException e) {
failures.add(e);
}
}
}
}
}
});
long errors = failures.count();
session.adjustCounter("Converted records", written.get(), false);
session.adjustCounter("Conversion errors", errors, false);
if (written.get() > 0L) {
session.transfer(outgoingAvro, SUCCESS);
if (errors > 0L) {
getLogger().warn("Failed to convert {}/{} records from JSON to Avro", new Object[] { errors, errors + written.get() });
badRecords = session.putAttribute(badRecords, "errors", failures.summary());
session.transfer(badRecords, INCOMPATIBLE);
} else {
session.remove(badRecords);
}
} else {
session.remove(outgoingAvro);
if (errors > 0L) {
getLogger().warn("Failed to convert {}/{} records from JSON to Avro", new Object[] { errors, errors });
badRecords = session.putAttribute(badRecords, "errors", failures.summary());
} else {
badRecords = session.putAttribute(badRecords, "errors", "No incoming records");
}
session.transfer(badRecords, FAILURE);
}
} catch (ProcessException | DatasetIOException e) {
getLogger().error("Failed reading or writing", e);
session.transfer(incomingJSON, FAILURE);
} catch (DatasetException e) {
getLogger().error("Failed to read FlowFile", e);
session.transfer(incomingJSON, FAILURE);
} finally {
try {
writer.close();
} catch (IOException e) {
getLogger().warn("Unable to close writer ressource", e);
}
}
}
use of org.kitesdk.data.DatasetIOException in project nifi by apache.
the class StoreInKiteDataset method onTrigger.
@Override
public void onTrigger(ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final View<Record> target = load(context, flowFile);
final Schema schema = target.getDataset().getDescriptor().getSchema();
try {
StopWatch timer = new StopWatch(true);
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
try (DataFileStream<Record> stream = new DataFileStream<>(in, AvroUtil.newDatumReader(schema, Record.class))) {
IncompatibleSchemaException.check(SchemaValidationUtil.canRead(stream.getSchema(), schema), "Incompatible file schema %s, expected %s", stream.getSchema(), schema);
long written = 0L;
try (DatasetWriter<Record> writer = target.newWriter()) {
for (Record record : stream) {
writer.write(record);
written += 1;
}
} finally {
session.adjustCounter("Stored records", written, true);
}
}
}
});
timer.stop();
session.getProvenanceReporter().send(flowFile, target.getUri().toString(), timer.getDuration(TimeUnit.MILLISECONDS), true);
session.transfer(flowFile, SUCCESS);
} catch (ProcessException | DatasetIOException e) {
getLogger().error("Failed to read FlowFile", e);
session.transfer(flowFile, FAILURE);
} catch (ValidationException e) {
getLogger().error(e.getMessage());
getLogger().debug("Incompatible schema error", e);
session.transfer(flowFile, INCOMPATIBLE);
}
}
Aggregations