Search in sources :

Example 1 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class ConfluentSchemaRegistry method retrieveSchemaById.

private RecordSchema retrieveSchemaById(final SchemaIdentifier schemaIdentifier) throws IOException, SchemaNotFoundException {
    final OptionalLong schemaId = schemaIdentifier.getIdentifier();
    if (!schemaId.isPresent()) {
        throw new org.apache.nifi.schema.access.SchemaNotFoundException("Cannot retrieve schema because Schema Id is not present");
    }
    final RecordSchema schema = client.getSchema((int) schemaId.getAsLong());
    return schema;
}
Also used : OptionalLong(java.util.OptionalLong) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 2 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class ConfluentSchemaRegistry method retrieveSchemaByName.

private RecordSchema retrieveSchemaByName(final SchemaIdentifier schemaIdentifier) throws IOException, SchemaNotFoundException {
    final Optional<String> schemaName = schemaIdentifier.getName();
    if (!schemaName.isPresent()) {
        throw new org.apache.nifi.schema.access.SchemaNotFoundException("Cannot retrieve schema because Schema Name is not present");
    }
    final RecordSchema schema = client.getSchema(schemaName.get());
    return schema;
}
Also used : SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 3 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class RestSchemaRegistryClient method createRecordSchema.

private RecordSchema createRecordSchema(final JsonNode schemaNode) throws SchemaNotFoundException {
    final String subject = schemaNode.get(SUBJECT_FIELD_NAME).asText();
    final int version = schemaNode.get(VERSION_FIELD_NAME).asInt();
    final int id = schemaNode.get(ID_FIELD_NAME).asInt();
    final String schemaText = schemaNode.get(SCHEMA_TEXT_FIELD_NAME).asText();
    try {
        final Schema avroSchema = new Schema.Parser().parse(schemaText);
        final SchemaIdentifier schemaId = SchemaIdentifier.builder().name(subject).id(Long.valueOf(id)).version(version).build();
        final RecordSchema recordSchema = AvroTypeUtil.createSchema(avroSchema, schemaText, schemaId);
        return recordSchema;
    } catch (final SchemaParseException spe) {
        throw new SchemaNotFoundException("Obtained Schema with id " + id + " and name " + subject + " from Confluent Schema Registry but the Schema Text that was returned is not a valid Avro Schema");
    }
}
Also used : SchemaParseException(org.apache.avro.SchemaParseException) Schema(org.apache.avro.Schema) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) SchemaIdentifier(org.apache.nifi.serialization.record.SchemaIdentifier) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 4 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class PutDruidRecord method processFlowFile.

/**
 * Parses the record(s), converts each to a Map, and sends via Tranquility to the Druid Indexing Service
 *
 * @param context The process context
 * @param session The process session
 */
@SuppressWarnings("unchecked")
private void processFlowFile(ProcessContext context, final ProcessSession session) {
    final ComponentLog log = getLogger();
    // Get handle on Druid Tranquility session
    DruidTranquilityService tranquilityController = context.getProperty(DRUID_TRANQUILITY_SERVICE).asControllerService(DruidTranquilityService.class);
    Tranquilizer<Map<String, Object>> tranquilizer = tranquilityController.getTranquilizer();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    // Create the outgoing flow files and output streams
    FlowFile droppedFlowFile = session.create(flowFile);
    final AtomicInteger droppedFlowFileCount = new AtomicInteger(0);
    FlowFile failedFlowFile = session.create(flowFile);
    final AtomicInteger failedFlowFileCount = new AtomicInteger(0);
    FlowFile successfulFlowFile = session.create(flowFile);
    final AtomicInteger successfulFlowFileCount = new AtomicInteger(0);
    final AtomicInteger recordWriteErrors = new AtomicInteger(0);
    int recordCount = 0;
    final OutputStream droppedOutputStream = session.write(droppedFlowFile);
    final RecordSetWriter droppedRecordWriter;
    final OutputStream failedOutputStream = session.write(failedFlowFile);
    final RecordSetWriter failedRecordWriter;
    final OutputStream successfulOutputStream = session.write(successfulFlowFile);
    final RecordSetWriter successfulRecordWriter;
    try (final InputStream in = session.read(flowFile)) {
        final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
        final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class);
        final Map<String, String> attributes = flowFile.getAttributes();
        final RecordReader reader = recordParserFactory.createRecordReader(flowFile, in, getLogger());
        final RecordSchema outSchema = writerFactory.getSchema(attributes, reader.getSchema());
        droppedRecordWriter = writerFactory.createWriter(log, outSchema, droppedOutputStream);
        droppedRecordWriter.beginRecordSet();
        failedRecordWriter = writerFactory.createWriter(log, outSchema, failedOutputStream);
        failedRecordWriter.beginRecordSet();
        successfulRecordWriter = writerFactory.createWriter(log, outSchema, successfulOutputStream);
        successfulRecordWriter.beginRecordSet();
        Record r;
        while ((r = reader.nextRecord()) != null) {
            final Record record = r;
            recordCount++;
            // Convert each Record to HashMap and send to Druid
            Map<String, Object> contentMap = (Map<String, Object>) DataTypeUtils.convertRecordFieldtoObject(r, RecordFieldType.RECORD.getRecordDataType(r.getSchema()));
            log.debug("Tranquilizer Status: {}", new Object[] { tranquilizer.status().toString() });
            // Send data element to Druid asynchronously
            Future<BoxedUnit> future = tranquilizer.send(contentMap);
            log.debug("Sent Payload to Druid: {}", new Object[] { contentMap });
            // Wait for Druid to call back with status
            future.addEventListener(new FutureEventListener<Object>() {

                @Override
                public void onFailure(Throwable cause) {
                    if (cause instanceof MessageDroppedException) {
                        // This happens when event timestamp targets a Druid Indexing task that has closed (Late Arriving Data)
                        log.debug("Record Dropped due to MessageDroppedException: {}, transferring record to dropped.", new Object[] { cause.getMessage() }, cause);
                        try {
                            synchronized (droppedRecordWriter) {
                                droppedRecordWriter.write(record);
                                droppedRecordWriter.flush();
                                droppedFlowFileCount.incrementAndGet();
                            }
                        } catch (final IOException ioe) {
                            log.error("Error transferring record to dropped, this may result in data loss.", new Object[] { ioe.getMessage() }, ioe);
                            recordWriteErrors.incrementAndGet();
                        }
                    } else {
                        log.error("FlowFile Processing Failed due to: {}", new Object[] { cause.getMessage() }, cause);
                        try {
                            synchronized (failedRecordWriter) {
                                failedRecordWriter.write(record);
                                failedRecordWriter.flush();
                                failedFlowFileCount.incrementAndGet();
                            }
                        } catch (final IOException ioe) {
                            log.error("Error transferring record to failure, this may result in data loss.", new Object[] { ioe.getMessage() }, ioe);
                            recordWriteErrors.incrementAndGet();
                        }
                    }
                }

                @Override
                public void onSuccess(Object value) {
                    log.debug(" FlowFile Processing Success: {}", new Object[] { value.toString() });
                    try {
                        synchronized (successfulRecordWriter) {
                            successfulRecordWriter.write(record);
                            successfulRecordWriter.flush();
                            successfulFlowFileCount.incrementAndGet();
                        }
                    } catch (final IOException ioe) {
                        log.error("Error transferring record to success, this may result in data loss. " + "However the record was successfully processed by Druid", new Object[] { ioe.getMessage() }, ioe);
                        recordWriteErrors.incrementAndGet();
                    }
                }
            });
        }
    } catch (IOException | SchemaNotFoundException | MalformedRecordException e) {
        log.error("FlowFile Processing Failed due to: {}", new Object[] { e.getMessage() }, e);
        // The FlowFile will be obtained and the error logged below, when calling publishResult.getFailedFlowFiles()
        flowFile = session.putAttribute(flowFile, RECORD_COUNT, Integer.toString(recordCount));
        session.transfer(flowFile, REL_FAILURE);
        try {
            droppedOutputStream.close();
            session.remove(droppedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with dropped records.", ioe);
        }
        try {
            failedOutputStream.close();
            session.remove(failedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with failed records.", ioe);
        }
        try {
            successfulOutputStream.close();
            session.remove(successfulFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with successful records.", ioe);
        }
        session.commit();
        return;
    }
    if (recordCount == 0) {
        // Send original (empty) flow file to success, remove the rest
        flowFile = session.putAttribute(flowFile, RECORD_COUNT, "0");
        session.transfer(flowFile, REL_SUCCESS);
        try {
            droppedOutputStream.close();
            session.remove(droppedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with dropped records.", ioe);
        }
        try {
            failedOutputStream.close();
            session.remove(failedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with failed records.", ioe);
        }
        try {
            successfulOutputStream.close();
            session.remove(successfulFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with successful records.", ioe);
        }
    } else {
        // Wait for all the records to finish processing
        while (recordCount != (droppedFlowFileCount.get() + failedFlowFileCount.get() + successfulFlowFileCount.get() + recordWriteErrors.get())) {
            Thread.yield();
        }
        try {
            droppedRecordWriter.finishRecordSet();
            droppedRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with dropped records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (droppedFlowFileCount.get() > 0) {
            droppedFlowFile = session.putAttribute(droppedFlowFile, RECORD_COUNT, Integer.toString(droppedFlowFileCount.get()));
            session.transfer(droppedFlowFile, REL_DROPPED);
        } else {
            session.remove(droppedFlowFile);
        }
        try {
            failedRecordWriter.finishRecordSet();
            failedRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with failed records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (failedFlowFileCount.get() > 0) {
            failedFlowFile = session.putAttribute(failedFlowFile, RECORD_COUNT, Integer.toString(failedFlowFileCount.get()));
            session.transfer(failedFlowFile, REL_FAILURE);
        } else {
            session.remove(failedFlowFile);
        }
        try {
            successfulRecordWriter.finishRecordSet();
            successfulRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with successful records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (successfulFlowFileCount.get() > 0) {
            successfulFlowFile = session.putAttribute(successfulFlowFile, RECORD_COUNT, Integer.toString(successfulFlowFileCount.get()));
            session.transfer(successfulFlowFile, REL_SUCCESS);
            session.getProvenanceReporter().send(successfulFlowFile, tranquilityController.getTransitUri());
        } else {
            session.remove(successfulFlowFile);
        }
        session.remove(flowFile);
    }
    session.commit();
}
Also used : MessageDroppedException(com.metamx.tranquility.tranquilizer.MessageDroppedException) OutputStream(java.io.OutputStream) RecordReader(org.apache.nifi.serialization.RecordReader) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) DruidTranquilityService(org.apache.nifi.controller.api.druid.DruidTranquilityService) Record(org.apache.nifi.serialization.record.Record) BoxedUnit(scala.runtime.BoxedUnit) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) ProcessException(org.apache.nifi.processor.exception.ProcessException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) Map(java.util.Map)

Example 5 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class PutElasticsearchHttpRecord method writeValue.

@SuppressWarnings("unchecked")
private void writeValue(final JsonGenerator generator, final Object value, final String fieldName, final DataType dataType) throws IOException {
    if (value == null) {
        if (nullSuppression.equals(NEVER_SUPPRESS.getValue()) || ((nullSuppression.equals(SUPPRESS_MISSING.getValue())) && fieldName != null && !fieldName.equals(""))) {
            generator.writeNullField(fieldName);
        }
        return;
    }
    final DataType chosenDataType = dataType.getFieldType() == RecordFieldType.CHOICE ? DataTypeUtils.chooseDataType(value, (ChoiceDataType) dataType) : dataType;
    final Object coercedValue = DataTypeUtils.convertType(value, chosenDataType, fieldName);
    if (coercedValue == null) {
        generator.writeNull();
        return;
    }
    switch(chosenDataType.getFieldType()) {
        case DATE:
            {
                final String stringValue = DataTypeUtils.toString(coercedValue, () -> DataTypeUtils.getDateFormat(RecordFieldType.DATE.getDefaultFormat()));
                if (DataTypeUtils.isLongTypeCompatible(stringValue)) {
                    generator.writeNumber(DataTypeUtils.toLong(coercedValue, fieldName));
                } else {
                    generator.writeString(stringValue);
                }
                break;
            }
        case TIME:
            {
                final String stringValue = DataTypeUtils.toString(coercedValue, () -> DataTypeUtils.getDateFormat(RecordFieldType.TIME.getDefaultFormat()));
                if (DataTypeUtils.isLongTypeCompatible(stringValue)) {
                    generator.writeNumber(DataTypeUtils.toLong(coercedValue, fieldName));
                } else {
                    generator.writeString(stringValue);
                }
                break;
            }
        case TIMESTAMP:
            {
                final String stringValue = DataTypeUtils.toString(coercedValue, () -> DataTypeUtils.getDateFormat(RecordFieldType.TIMESTAMP.getDefaultFormat()));
                if (DataTypeUtils.isLongTypeCompatible(stringValue)) {
                    generator.writeNumber(DataTypeUtils.toLong(coercedValue, fieldName));
                } else {
                    generator.writeString(stringValue);
                }
                break;
            }
        case DOUBLE:
            generator.writeNumber(DataTypeUtils.toDouble(coercedValue, fieldName));
            break;
        case FLOAT:
            generator.writeNumber(DataTypeUtils.toFloat(coercedValue, fieldName));
            break;
        case LONG:
            generator.writeNumber(DataTypeUtils.toLong(coercedValue, fieldName));
            break;
        case INT:
        case BYTE:
        case SHORT:
            generator.writeNumber(DataTypeUtils.toInteger(coercedValue, fieldName));
            break;
        case CHAR:
        case STRING:
            generator.writeString(coercedValue.toString());
            break;
        case BIGINT:
            if (coercedValue instanceof Long) {
                generator.writeNumber((Long) coercedValue);
            } else {
                generator.writeNumber((BigInteger) coercedValue);
            }
            break;
        case BOOLEAN:
            final String stringValue = coercedValue.toString();
            if ("true".equalsIgnoreCase(stringValue)) {
                generator.writeBoolean(true);
            } else if ("false".equalsIgnoreCase(stringValue)) {
                generator.writeBoolean(false);
            } else {
                generator.writeString(stringValue);
            }
            break;
        case RECORD:
            {
                final Record record = (Record) coercedValue;
                final RecordDataType recordDataType = (RecordDataType) chosenDataType;
                final RecordSchema childSchema = recordDataType.getChildSchema();
                writeRecord(record, childSchema, generator);
                break;
            }
        case MAP:
            {
                final MapDataType mapDataType = (MapDataType) chosenDataType;
                final DataType valueDataType = mapDataType.getValueType();
                final Map<String, ?> map = (Map<String, ?>) coercedValue;
                generator.writeStartObject();
                for (final Map.Entry<String, ?> entry : map.entrySet()) {
                    final String mapKey = entry.getKey();
                    final Object mapValue = entry.getValue();
                    generator.writeFieldName(mapKey);
                    writeValue(generator, mapValue, fieldName + "." + mapKey, valueDataType);
                }
                generator.writeEndObject();
                break;
            }
        case ARRAY:
        default:
            if (coercedValue instanceof Object[]) {
                final Object[] values = (Object[]) coercedValue;
                final ArrayDataType arrayDataType = (ArrayDataType) dataType;
                final DataType elementType = arrayDataType.getElementType();
                writeArray(values, fieldName, generator, elementType);
            } else {
                generator.writeString(coercedValue.toString());
            }
            break;
    }
}
Also used : MapDataType(org.apache.nifi.serialization.record.type.MapDataType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) DataType(org.apache.nifi.serialization.record.DataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) MapDataType(org.apache.nifi.serialization.record.type.MapDataType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) Record(org.apache.nifi.serialization.record.Record) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Map(java.util.Map)

Aggregations

RecordSchema (org.apache.nifi.serialization.record.RecordSchema)243 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)178 Test (org.junit.Test)168 Record (org.apache.nifi.serialization.record.Record)147 RecordField (org.apache.nifi.serialization.record.RecordField)138 ArrayList (java.util.ArrayList)107 MapRecord (org.apache.nifi.serialization.record.MapRecord)94 HashMap (java.util.HashMap)88 InputStream (java.io.InputStream)79 ByteArrayInputStream (java.io.ByteArrayInputStream)64 FileInputStream (java.io.FileInputStream)56 ComponentLog (org.apache.nifi.logging.ComponentLog)54 IOException (java.io.IOException)44 LinkedHashMap (java.util.LinkedHashMap)36 DataType (org.apache.nifi.serialization.record.DataType)36 File (java.io.File)31 Schema (org.apache.avro.Schema)29 SchemaIdentifier (org.apache.nifi.serialization.record.SchemaIdentifier)29 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)28 ByteArrayOutputStream (java.io.ByteArrayOutputStream)26