Search in sources :

Example 1 with AvroConversionException

use of org.apache.nifi.processors.kite.AvroRecordConverter.AvroConversionException in project nifi by apache.

the class ConvertAvroSchema method onTrigger.

@Override
public void onTrigger(ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile incomingAvro = session.get();
    if (incomingAvro == null) {
        return;
    }
    String inputSchemaProperty = context.getProperty(INPUT_SCHEMA).evaluateAttributeExpressions(incomingAvro).getValue();
    final Schema inputSchema;
    try {
        inputSchema = getSchema(inputSchemaProperty, DefaultConfiguration.get());
    } catch (SchemaNotFoundException e) {
        getLogger().error("Cannot find schema: " + inputSchemaProperty);
        session.transfer(incomingAvro, FAILURE);
        return;
    }
    String outputSchemaProperty = context.getProperty(OUTPUT_SCHEMA).evaluateAttributeExpressions(incomingAvro).getValue();
    final Schema outputSchema;
    try {
        outputSchema = getSchema(outputSchemaProperty, DefaultConfiguration.get());
    } catch (SchemaNotFoundException e) {
        getLogger().error("Cannot find schema: " + outputSchemaProperty);
        session.transfer(incomingAvro, FAILURE);
        return;
    }
    final Map<String, String> fieldMapping = new HashMap<>();
    for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
        if (entry.getKey().isDynamic()) {
            fieldMapping.put(entry.getKey().getName(), entry.getValue());
        }
    }
    // Set locale
    final String localeProperty = context.getProperty(LOCALE).getValue();
    final Locale locale = localeProperty.equals(DEFAULT_LOCALE_VALUE) ? Locale.getDefault() : LocaleUtils.toLocale(localeProperty);
    final AvroRecordConverter converter = new AvroRecordConverter(inputSchema, outputSchema, fieldMapping, locale);
    final DataFileWriter<Record> writer = new DataFileWriter<>(AvroUtil.newDatumWriter(outputSchema, Record.class));
    writer.setCodec(getCodecFactory(context.getProperty(COMPRESSION_TYPE).getValue()));
    final DataFileWriter<Record> failureWriter = new DataFileWriter<>(AvroUtil.newDatumWriter(outputSchema, Record.class));
    failureWriter.setCodec(getCodecFactory(context.getProperty(COMPRESSION_TYPE).getValue()));
    try {
        final AtomicLong written = new AtomicLong(0L);
        final FailureTracker failures = new FailureTracker();
        final List<Record> badRecords = Lists.newLinkedList();
        FlowFile incomingAvroCopy = session.clone(incomingAvro);
        FlowFile outgoingAvro = session.write(incomingAvro, new StreamCallback() {

            @Override
            public void process(InputStream in, OutputStream out) throws IOException {
                try (DataFileStream<Record> stream = new DataFileStream<Record>(in, new GenericDatumReader<Record>(converter.getInputSchema()))) {
                    try (DataFileWriter<Record> w = writer.create(outputSchema, out)) {
                        for (Record record : stream) {
                            try {
                                Record converted = converter.convert(record);
                                w.append(converted);
                                written.incrementAndGet();
                            } catch (AvroConversionException e) {
                                failures.add(e);
                                getLogger().error("Error converting data: " + e.getMessage());
                                badRecords.add(record);
                            }
                        }
                    }
                }
            }
        });
        FlowFile badOutput = session.write(incomingAvroCopy, new StreamCallback() {

            @Override
            public void process(InputStream in, OutputStream out) throws IOException {
                try (DataFileWriter<Record> w = failureWriter.create(inputSchema, out)) {
                    for (Record record : badRecords) {
                        w.append(record);
                    }
                }
            }
        });
        long errors = failures.count();
        // update only if file transfer is successful
        session.adjustCounter("Converted records", written.get(), false);
        // update only if file transfer is successful
        session.adjustCounter("Conversion errors", errors, false);
        if (written.get() > 0L) {
            session.transfer(outgoingAvro, SUCCESS);
        } else {
            session.remove(outgoingAvro);
            if (errors == 0L) {
                badOutput = session.putAttribute(badOutput, "errors", "No incoming records");
                session.transfer(badOutput, FAILURE);
            }
        }
        if (errors > 0L) {
            getLogger().warn("Failed to convert {}/{} records between Avro Schemas", new Object[] { errors, errors + written.get() });
            badOutput = session.putAttribute(badOutput, "errors", failures.summary());
            session.transfer(badOutput, FAILURE);
        } else {
            session.remove(badOutput);
        }
    } catch (ProcessException | DatasetIOException e) {
        getLogger().error("Failed reading or writing", e);
        session.transfer(incomingAvro, FAILURE);
    } catch (DatasetException e) {
        getLogger().error("Failed to read FlowFile", e);
        session.transfer(incomingAvro, FAILURE);
    } finally {
        try {
            writer.close();
        } catch (IOException e) {
            getLogger().warn("Unable to close writer ressource", e);
        }
        try {
            failureWriter.close();
        } catch (IOException e) {
            getLogger().warn("Unable to close writer ressource", e);
        }
    }
}
Also used : Locale(java.util.Locale) HashMap(java.util.HashMap) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) OutputStream(java.io.OutputStream) DatasetException(org.kitesdk.data.DatasetException) Record(org.apache.avro.generic.GenericData.Record) FlowFile(org.apache.nifi.flowfile.FlowFile) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) InputStream(java.io.InputStream) DataFileWriter(org.apache.avro.file.DataFileWriter) DatasetIOException(org.kitesdk.data.DatasetIOException) IOException(java.io.IOException) DataFileStream(org.apache.avro.file.DataFileStream) StreamCallback(org.apache.nifi.processor.io.StreamCallback) AtomicLong(java.util.concurrent.atomic.AtomicLong) ProcessException(org.apache.nifi.processor.exception.ProcessException) AvroConversionException(org.apache.nifi.processors.kite.AvroRecordConverter.AvroConversionException) SchemaNotFoundException(org.kitesdk.data.SchemaNotFoundException) DatasetIOException(org.kitesdk.data.DatasetIOException) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 HashMap (java.util.HashMap)1 Locale (java.util.Locale)1 Map (java.util.Map)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 Schema (org.apache.avro.Schema)1 DataFileStream (org.apache.avro.file.DataFileStream)1 DataFileWriter (org.apache.avro.file.DataFileWriter)1 Record (org.apache.avro.generic.GenericData.Record)1 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)1 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)1 FlowFile (org.apache.nifi.flowfile.FlowFile)1 ProcessException (org.apache.nifi.processor.exception.ProcessException)1 StreamCallback (org.apache.nifi.processor.io.StreamCallback)1 AvroConversionException (org.apache.nifi.processors.kite.AvroRecordConverter.AvroConversionException)1 DatasetException (org.kitesdk.data.DatasetException)1 DatasetIOException (org.kitesdk.data.DatasetIOException)1 SchemaNotFoundException (org.kitesdk.data.SchemaNotFoundException)1