Search in sources :

Example 1 with JSONFileReader

use of org.kitesdk.data.spi.filesystem.JSONFileReader in project nifi by apache.

the class ConvertJSONToAvro method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile incomingJSON = session.get();
    if (incomingJSON == null) {
        return;
    }
    String schemaProperty = context.getProperty(SCHEMA).evaluateAttributeExpressions(incomingJSON).getValue();
    final Schema schema;
    try {
        schema = getSchema(schemaProperty, DefaultConfiguration.get());
    } catch (SchemaNotFoundException e) {
        getLogger().error("Cannot find schema: " + schemaProperty);
        session.transfer(incomingJSON, FAILURE);
        return;
    }
    final DataFileWriter<Record> writer = new DataFileWriter<>(AvroUtil.newDatumWriter(schema, Record.class));
    writer.setCodec(getCodecFactory(context.getProperty(COMPRESSION_TYPE).getValue()));
    try {
        final AtomicLong written = new AtomicLong(0L);
        final FailureTracker failures = new FailureTracker();
        FlowFile badRecords = session.clone(incomingJSON);
        FlowFile outgoingAvro = session.write(incomingJSON, new StreamCallback() {

            @Override
            public void process(InputStream in, OutputStream out) throws IOException {
                try (JSONFileReader<Record> reader = new JSONFileReader<>(in, schema, Record.class)) {
                    reader.initialize();
                    try (DataFileWriter<Record> w = writer.create(schema, out)) {
                        while (reader.hasNext()) {
                            try {
                                Record record = reader.next();
                                w.append(record);
                                written.incrementAndGet();
                            } catch (final DatasetRecordException e) {
                                failures.add(e);
                            }
                        }
                    }
                }
            }
        });
        long errors = failures.count();
        session.adjustCounter("Converted records", written.get(), false);
        session.adjustCounter("Conversion errors", errors, false);
        if (written.get() > 0L) {
            session.transfer(outgoingAvro, SUCCESS);
            if (errors > 0L) {
                getLogger().warn("Failed to convert {}/{} records from JSON to Avro", new Object[] { errors, errors + written.get() });
                badRecords = session.putAttribute(badRecords, "errors", failures.summary());
                session.transfer(badRecords, INCOMPATIBLE);
            } else {
                session.remove(badRecords);
            }
        } else {
            session.remove(outgoingAvro);
            if (errors > 0L) {
                getLogger().warn("Failed to convert {}/{} records from JSON to Avro", new Object[] { errors, errors });
                badRecords = session.putAttribute(badRecords, "errors", failures.summary());
            } else {
                badRecords = session.putAttribute(badRecords, "errors", "No incoming records");
            }
            session.transfer(badRecords, FAILURE);
        }
    } catch (ProcessException | DatasetIOException e) {
        getLogger().error("Failed reading or writing", e);
        session.transfer(incomingJSON, FAILURE);
    } catch (DatasetException e) {
        getLogger().error("Failed to read FlowFile", e);
        session.transfer(incomingJSON, FAILURE);
    } finally {
        try {
            writer.close();
        } catch (IOException e) {
            getLogger().warn("Unable to close writer ressource", e);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) OutputStream(java.io.OutputStream) DatasetIOException(org.kitesdk.data.DatasetIOException) IOException(java.io.IOException) StreamCallback(org.apache.nifi.processor.io.StreamCallback) DatasetRecordException(org.kitesdk.data.DatasetRecordException) DatasetException(org.kitesdk.data.DatasetException) AtomicLong(java.util.concurrent.atomic.AtomicLong) ProcessException(org.apache.nifi.processor.exception.ProcessException) Record(org.apache.avro.generic.GenericData.Record) SchemaNotFoundException(org.kitesdk.data.SchemaNotFoundException) JSONFileReader(org.kitesdk.data.spi.filesystem.JSONFileReader) DatasetIOException(org.kitesdk.data.DatasetIOException)

Aggregations

IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 Schema (org.apache.avro.Schema)1 DataFileWriter (org.apache.avro.file.DataFileWriter)1 Record (org.apache.avro.generic.GenericData.Record)1 FlowFile (org.apache.nifi.flowfile.FlowFile)1 ProcessException (org.apache.nifi.processor.exception.ProcessException)1 StreamCallback (org.apache.nifi.processor.io.StreamCallback)1 DatasetException (org.kitesdk.data.DatasetException)1 DatasetIOException (org.kitesdk.data.DatasetIOException)1 DatasetRecordException (org.kitesdk.data.DatasetRecordException)1 SchemaNotFoundException (org.kitesdk.data.SchemaNotFoundException)1 JSONFileReader (org.kitesdk.data.spi.filesystem.JSONFileReader)1