Search in sources :

Example 1 with DatasetRecordException

use of org.kitesdk.data.DatasetRecordException in project nifi by apache.

the class ConvertCSVToAvro method onTrigger.

@Override
public void onTrigger(ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile incomingCSV = session.get();
    if (incomingCSV == null) {
        return;
    }
    CSVProperties props = new CSVProperties.Builder().charset(context.getProperty(CHARSET).evaluateAttributeExpressions(incomingCSV).getValue()).delimiter(context.getProperty(DELIMITER).evaluateAttributeExpressions(incomingCSV).getValue()).quote(context.getProperty(QUOTE).evaluateAttributeExpressions(incomingCSV).getValue()).escape(context.getProperty(ESCAPE).evaluateAttributeExpressions(incomingCSV).getValue()).hasHeader(context.getProperty(HAS_HEADER).evaluateAttributeExpressions(incomingCSV).asBoolean()).linesToSkip(context.getProperty(LINES_TO_SKIP).evaluateAttributeExpressions(incomingCSV).asInteger()).build();
    String schemaProperty = context.getProperty(SCHEMA).evaluateAttributeExpressions(incomingCSV).getValue();
    final Schema schema;
    try {
        schema = getSchema(schemaProperty, DefaultConfiguration.get());
    } catch (SchemaNotFoundException e) {
        getLogger().error("Cannot find schema: " + schemaProperty);
        session.transfer(incomingCSV, FAILURE);
        return;
    }
    try (final DataFileWriter<Record> writer = new DataFileWriter<>(AvroUtil.newDatumWriter(schema, Record.class))) {
        writer.setCodec(getCodecFactory(context.getProperty(COMPRESSION_TYPE).getValue()));
        try {
            final AtomicLong written = new AtomicLong(0L);
            final FailureTracker failures = new FailureTracker();
            FlowFile badRecords = session.clone(incomingCSV);
            FlowFile outgoingAvro = session.write(incomingCSV, new StreamCallback() {

                @Override
                public void process(InputStream in, OutputStream out) throws IOException {
                    try (CSVFileReader<Record> reader = new CSVFileReader<>(in, props, schema, Record.class)) {
                        reader.initialize();
                        try (DataFileWriter<Record> w = writer.create(schema, out)) {
                            while (reader.hasNext()) {
                                try {
                                    Record record = reader.next();
                                    w.append(record);
                                    written.incrementAndGet();
                                } catch (DatasetRecordException e) {
                                    failures.add(e);
                                }
                            }
                        }
                    }
                }
            });
            long errors = failures.count();
            session.adjustCounter("Converted records", written.get(), false);
            session.adjustCounter("Conversion errors", errors, false);
            if (written.get() > 0L) {
                session.transfer(outgoingAvro, SUCCESS);
                if (errors > 0L) {
                    getLogger().warn("Failed to convert {}/{} records from CSV to Avro", new Object[] { errors, errors + written.get() });
                    badRecords = session.putAttribute(badRecords, "errors", failures.summary());
                    session.transfer(badRecords, INCOMPATIBLE);
                } else {
                    session.remove(badRecords);
                }
            } else {
                session.remove(outgoingAvro);
                if (errors > 0L) {
                    getLogger().warn("Failed to convert {}/{} records from CSV to Avro", new Object[] { errors, errors });
                    badRecords = session.putAttribute(badRecords, "errors", failures.summary());
                } else {
                    badRecords = session.putAttribute(badRecords, "errors", "No incoming records");
                }
                session.transfer(badRecords, FAILURE);
            }
        } catch (ProcessException | DatasetIOException e) {
            getLogger().error("Failed reading or writing", e);
            session.transfer(incomingCSV, FAILURE);
        } catch (DatasetException e) {
            getLogger().error("Failed to read FlowFile", e);
            session.transfer(incomingCSV, FAILURE);
        }
    } catch (final IOException ioe) {
        throw new RuntimeException("Unable to close Avro Writer", ioe);
    }
}
Also used : Schema(org.apache.avro.Schema) OutputStream(java.io.OutputStream) DatasetRecordException(org.kitesdk.data.DatasetRecordException) DatasetException(org.kitesdk.data.DatasetException) Record(org.apache.avro.generic.GenericData.Record) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) DataFileWriter(org.apache.avro.file.DataFileWriter) DatasetIOException(org.kitesdk.data.DatasetIOException) IOException(java.io.IOException) StreamCallback(org.apache.nifi.processor.io.StreamCallback) CSVProperties(org.kitesdk.data.spi.filesystem.CSVProperties) AtomicLong(java.util.concurrent.atomic.AtomicLong) ProcessException(org.apache.nifi.processor.exception.ProcessException) CSVFileReader(org.kitesdk.data.spi.filesystem.CSVFileReader) SchemaNotFoundException(org.kitesdk.data.SchemaNotFoundException) DatasetIOException(org.kitesdk.data.DatasetIOException)

Example 2 with DatasetRecordException

use of org.kitesdk.data.DatasetRecordException in project nifi by apache.

the class ConvertJSONToAvro method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile incomingJSON = session.get();
    if (incomingJSON == null) {
        return;
    }
    String schemaProperty = context.getProperty(SCHEMA).evaluateAttributeExpressions(incomingJSON).getValue();
    final Schema schema;
    try {
        schema = getSchema(schemaProperty, DefaultConfiguration.get());
    } catch (SchemaNotFoundException e) {
        getLogger().error("Cannot find schema: " + schemaProperty);
        session.transfer(incomingJSON, FAILURE);
        return;
    }
    final DataFileWriter<Record> writer = new DataFileWriter<>(AvroUtil.newDatumWriter(schema, Record.class));
    writer.setCodec(getCodecFactory(context.getProperty(COMPRESSION_TYPE).getValue()));
    try {
        final AtomicLong written = new AtomicLong(0L);
        final FailureTracker failures = new FailureTracker();
        FlowFile badRecords = session.clone(incomingJSON);
        FlowFile outgoingAvro = session.write(incomingJSON, new StreamCallback() {

            @Override
            public void process(InputStream in, OutputStream out) throws IOException {
                try (JSONFileReader<Record> reader = new JSONFileReader<>(in, schema, Record.class)) {
                    reader.initialize();
                    try (DataFileWriter<Record> w = writer.create(schema, out)) {
                        while (reader.hasNext()) {
                            try {
                                Record record = reader.next();
                                w.append(record);
                                written.incrementAndGet();
                            } catch (final DatasetRecordException e) {
                                failures.add(e);
                            }
                        }
                    }
                }
            }
        });
        long errors = failures.count();
        session.adjustCounter("Converted records", written.get(), false);
        session.adjustCounter("Conversion errors", errors, false);
        if (written.get() > 0L) {
            session.transfer(outgoingAvro, SUCCESS);
            if (errors > 0L) {
                getLogger().warn("Failed to convert {}/{} records from JSON to Avro", new Object[] { errors, errors + written.get() });
                badRecords = session.putAttribute(badRecords, "errors", failures.summary());
                session.transfer(badRecords, INCOMPATIBLE);
            } else {
                session.remove(badRecords);
            }
        } else {
            session.remove(outgoingAvro);
            if (errors > 0L) {
                getLogger().warn("Failed to convert {}/{} records from JSON to Avro", new Object[] { errors, errors });
                badRecords = session.putAttribute(badRecords, "errors", failures.summary());
            } else {
                badRecords = session.putAttribute(badRecords, "errors", "No incoming records");
            }
            session.transfer(badRecords, FAILURE);
        }
    } catch (ProcessException | DatasetIOException e) {
        getLogger().error("Failed reading or writing", e);
        session.transfer(incomingJSON, FAILURE);
    } catch (DatasetException e) {
        getLogger().error("Failed to read FlowFile", e);
        session.transfer(incomingJSON, FAILURE);
    } finally {
        try {
            writer.close();
        } catch (IOException e) {
            getLogger().warn("Unable to close writer ressource", e);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) OutputStream(java.io.OutputStream) DatasetIOException(org.kitesdk.data.DatasetIOException) IOException(java.io.IOException) StreamCallback(org.apache.nifi.processor.io.StreamCallback) DatasetRecordException(org.kitesdk.data.DatasetRecordException) DatasetException(org.kitesdk.data.DatasetException) AtomicLong(java.util.concurrent.atomic.AtomicLong) ProcessException(org.apache.nifi.processor.exception.ProcessException) Record(org.apache.avro.generic.GenericData.Record) SchemaNotFoundException(org.kitesdk.data.SchemaNotFoundException) JSONFileReader(org.kitesdk.data.spi.filesystem.JSONFileReader) DatasetIOException(org.kitesdk.data.DatasetIOException)

Aggregations

IOException (java.io.IOException)2 InputStream (java.io.InputStream)2 OutputStream (java.io.OutputStream)2 AtomicLong (java.util.concurrent.atomic.AtomicLong)2 Schema (org.apache.avro.Schema)2 DataFileWriter (org.apache.avro.file.DataFileWriter)2 Record (org.apache.avro.generic.GenericData.Record)2 FlowFile (org.apache.nifi.flowfile.FlowFile)2 ProcessException (org.apache.nifi.processor.exception.ProcessException)2 StreamCallback (org.apache.nifi.processor.io.StreamCallback)2 DatasetException (org.kitesdk.data.DatasetException)2 DatasetIOException (org.kitesdk.data.DatasetIOException)2 DatasetRecordException (org.kitesdk.data.DatasetRecordException)2 SchemaNotFoundException (org.kitesdk.data.SchemaNotFoundException)2 CSVFileReader (org.kitesdk.data.spi.filesystem.CSVFileReader)1 CSVProperties (org.kitesdk.data.spi.filesystem.CSVProperties)1 JSONFileReader (org.kitesdk.data.spi.filesystem.JSONFileReader)1