Search in sources :

Example 6 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class PutElasticsearchHttpRecord method writeRecord.

private void writeRecord(final Record record, final RecordSchema writeSchema, final JsonGenerator generator) throws IOException {
    RecordSchema schema = record.getSchema();
    generator.writeStartObject();
    for (int i = 0; i < schema.getFieldCount(); i++) {
        final RecordField field = schema.getField(i);
        final String fieldName = field.getFieldName();
        final Object value = record.getValue(field);
        if (value == null) {
            if (nullSuppression.equals(NEVER_SUPPRESS.getValue()) || (nullSuppression.equals(SUPPRESS_MISSING.getValue())) && record.getRawFieldNames().contains(fieldName)) {
                generator.writeNullField(fieldName);
            }
            continue;
        }
        generator.writeFieldName(fieldName);
        final DataType dataType = schema.getDataType(fieldName).get();
        writeValue(generator, value, fieldName, dataType);
    }
    generator.writeEndObject();
}
Also used : RecordField(org.apache.nifi.serialization.record.RecordField) DataType(org.apache.nifi.serialization.record.DataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) MapDataType(org.apache.nifi.serialization.record.type.MapDataType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 7 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class PublishKafkaRecord_1_0 method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final List<FlowFile> flowFiles = session.get(FlowFileFilters.newSizeBasedFilter(1, DataUnit.MB, 500));
    if (flowFiles.isEmpty()) {
        return;
    }
    final PublisherPool pool = getPublisherPool(context);
    if (pool == null) {
        context.yield();
        return;
    }
    final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue();
    final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final boolean useTransactions = context.getProperty(USE_TRANSACTIONS).asBoolean();
    final long startTime = System.nanoTime();
    try (final PublisherLease lease = pool.obtainPublisher()) {
        if (useTransactions) {
            lease.beginTransaction();
        }
        // Send each FlowFile to Kafka asynchronously.
        final Iterator<FlowFile> itr = flowFiles.iterator();
        while (itr.hasNext()) {
            final FlowFile flowFile = itr.next();
            if (!isScheduled()) {
                // If stopped, re-queue FlowFile instead of sending it
                if (useTransactions) {
                    session.rollback();
                    lease.rollback();
                    return;
                }
                session.transfer(flowFile);
                itr.remove();
                continue;
            }
            final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue();
            final String messageKeyField = context.getProperty(MESSAGE_KEY_FIELD).evaluateAttributeExpressions(flowFile).getValue();
            try {
                session.read(flowFile, new InputStreamCallback() {

                    @Override
                    public void process(final InputStream rawIn) throws IOException {
                        try (final InputStream in = new BufferedInputStream(rawIn)) {
                            final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger());
                            final RecordSet recordSet = reader.createRecordSet();
                            final RecordSchema schema = writerFactory.getSchema(flowFile.getAttributes(), recordSet.getSchema());
                            lease.publish(flowFile, recordSet, writerFactory, schema, messageKeyField, topic);
                        } catch (final SchemaNotFoundException | MalformedRecordException e) {
                            throw new ProcessException(e);
                        }
                    }
                });
            } catch (final Exception e) {
                // The FlowFile will be obtained and the error logged below, when calling publishResult.getFailedFlowFiles()
                lease.fail(flowFile, e);
                continue;
            }
        }
        // Complete the send
        final PublishResult publishResult = lease.complete();
        if (publishResult.isFailure()) {
            getLogger().info("Failed to send FlowFile to kafka; transferring to failure");
            session.transfer(flowFiles, REL_FAILURE);
            return;
        }
        // Transfer any successful FlowFiles.
        final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime);
        for (FlowFile success : flowFiles) {
            final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(success).getValue();
            final int msgCount = publishResult.getSuccessfulMessageCount(success);
            success = session.putAttribute(success, MSG_COUNT, String.valueOf(msgCount));
            session.adjustCounter("Messages Sent", msgCount, true);
            final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, topic);
            session.getProvenanceReporter().send(success, transitUri, "Sent " + msgCount + " messages", transmissionMillis);
            session.transfer(success, REL_SUCCESS);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) IOException(java.io.IOException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) BufferedInputStream(java.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) RecordSet(org.apache.nifi.serialization.record.RecordSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 8 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class TestPublisherLease method testRecordsSentToRecordWriterAndThenToProducer.

@Test
public void testRecordsSentToRecordWriterAndThenToProducer() throws IOException, SchemaNotFoundException, MalformedRecordException {
    final PublisherLease lease = new PublisherLease(producer, 1024 * 1024, 10L, logger, true, null, StandardCharsets.UTF_8);
    final FlowFile flowFile = new MockFlowFile(1L);
    final byte[] exampleInput = "101, John Doe, 48\n102, Jane Doe, 47".getBytes(StandardCharsets.UTF_8);
    final MockRecordParser readerService = new MockRecordParser();
    readerService.addSchemaField("person_id", RecordFieldType.LONG);
    readerService.addSchemaField("name", RecordFieldType.STRING);
    readerService.addSchemaField("age", RecordFieldType.INT);
    final RecordReader reader = readerService.createRecordReader(Collections.emptyMap(), new ByteArrayInputStream(exampleInput), logger);
    final RecordSet recordSet = reader.createRecordSet();
    final RecordSchema schema = reader.getSchema();
    final String topic = "unit-test";
    final String keyField = "person_id";
    final RecordSetWriterFactory writerFactory = Mockito.mock(RecordSetWriterFactory.class);
    final RecordSetWriter writer = Mockito.mock(RecordSetWriter.class);
    Mockito.when(writer.write(Mockito.any(Record.class))).thenReturn(WriteResult.of(1, Collections.emptyMap()));
    Mockito.when(writerFactory.createWriter(eq(logger), eq(schema), any())).thenReturn(writer);
    lease.publish(flowFile, recordSet, writerFactory, schema, keyField, topic);
    verify(writerFactory, times(2)).createWriter(eq(logger), eq(schema), any());
    verify(writer, times(2)).write(any(Record.class));
    verify(producer, times(2)).send(any(), any());
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) RecordReader(org.apache.nifi.serialization.RecordReader) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) MockFlowFile(org.apache.nifi.util.MockFlowFile) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) ByteArrayInputStream(java.io.ByteArrayInputStream) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) Record(org.apache.nifi.serialization.record.Record) RecordSet(org.apache.nifi.serialization.record.RecordSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) MockRecordParser(org.apache.nifi.processors.kafka.pubsub.util.MockRecordParser) Test(org.junit.Test)

Example 9 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class PutParquetTest method configure.

private void configure(final PutParquet putParquet, final int numUsers) throws InitializationException {
    testRunner = TestRunners.newTestRunner(putParquet);
    testRunner.setProperty(PutParquet.HADOOP_CONFIGURATION_RESOURCES, TEST_CONF_PATH);
    testRunner.setProperty(PutParquet.DIRECTORY, DIRECTORY);
    readerFactory = new MockRecordParser();
    final RecordSchema recordSchema = AvroTypeUtil.createSchema(schema);
    for (final RecordField recordField : recordSchema.getFields()) {
        readerFactory.addSchemaField(recordField.getFieldName(), recordField.getDataType().getFieldType());
    }
    for (int i = 0; i < numUsers; i++) {
        readerFactory.addRecord("name" + i, i, "blue" + i);
    }
    testRunner.addControllerService("mock-reader-factory", readerFactory);
    testRunner.enableControllerService(readerFactory);
    testRunner.setProperty(PutParquet.RECORD_READER, "mock-reader-factory");
}
Also used : RecordField(org.apache.nifi.serialization.record.RecordField) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) MockRecordParser(org.apache.nifi.serialization.record.MockRecordParser)

Example 10 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class PutDatabaseRecord method executeDML.

private void executeDML(ProcessContext context, ProcessSession session, FlowFile flowFile, FunctionContext functionContext, RoutingResult result, Connection con, RecordReader recordParser, String statementType, DMLSettings settings) throws IllegalArgumentException, MalformedRecordException, IOException, SQLException {
    final RecordSchema recordSchema = recordParser.getSchema();
    final ComponentLog log = getLogger();
    final String catalog = context.getProperty(CATALOG_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String schemaName = context.getProperty(SCHEMA_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String updateKeys = context.getProperty(UPDATE_KEYS).evaluateAttributeExpressions(flowFile).getValue();
    final SchemaKey schemaKey = new PutDatabaseRecord.SchemaKey(catalog, schemaName, tableName);
    // Ensure the table name has been set, the generated SQL statements (and TableSchema cache) will need it
    if (StringUtils.isEmpty(tableName)) {
        throw new IllegalArgumentException(format("Cannot process %s because Table Name is null or empty", flowFile));
    }
    // Always get the primary keys if Update Keys is empty. Otherwise if we have an Insert statement first, the table will be
    // cached but the primary keys will not be retrieved, causing future UPDATE statements to not have primary keys available
    final boolean includePrimaryKeys = updateKeys == null;
    // get the database schema from the cache, if one exists. We do this in a synchronized block, rather than
    // using a ConcurrentMap because the Map that we are using is a LinkedHashMap with a capacity such that if
    // the Map grows beyond this capacity, old elements are evicted. We do this in order to avoid filling the
    // Java Heap if there are a lot of different SQL statements being generated that reference different tables.
    TableSchema tableSchema;
    synchronized (this) {
        tableSchema = schemaCache.get(schemaKey);
        if (tableSchema == null) {
            // No schema exists for this table yet. Query the database to determine the schema and put it into the cache.
            tableSchema = TableSchema.from(con, catalog, schemaName, tableName, settings.translateFieldNames, includePrimaryKeys);
            schemaCache.put(schemaKey, tableSchema);
        }
    }
    if (tableSchema == null) {
        throw new IllegalArgumentException("No table schema specified!");
    }
    // build the fully qualified table name
    final StringBuilder tableNameBuilder = new StringBuilder();
    if (catalog != null) {
        tableNameBuilder.append(catalog).append(".");
    }
    if (schemaName != null) {
        tableNameBuilder.append(schemaName).append(".");
    }
    tableNameBuilder.append(tableName);
    final String fqTableName = tableNameBuilder.toString();
    if (recordSchema == null) {
        throw new IllegalArgumentException("No record schema specified!");
    }
    final SqlAndIncludedColumns sqlHolder;
    if (INSERT_TYPE.equalsIgnoreCase(statementType)) {
        sqlHolder = generateInsert(recordSchema, fqTableName, tableSchema, settings);
    } else if (UPDATE_TYPE.equalsIgnoreCase(statementType)) {
        sqlHolder = generateUpdate(recordSchema, fqTableName, updateKeys, tableSchema, settings);
    } else if (DELETE_TYPE.equalsIgnoreCase(statementType)) {
        sqlHolder = generateDelete(recordSchema, fqTableName, tableSchema, settings);
    } else {
        throw new IllegalArgumentException(format("Statement Type %s is not valid, FlowFile %s", statementType, flowFile));
    }
    try (PreparedStatement ps = con.prepareStatement(sqlHolder.getSql())) {
        final int queryTimeout = functionContext.queryTimeout;
        try {
            // timeout in seconds
            ps.setQueryTimeout(queryTimeout);
        } catch (SQLException se) {
            // If the driver doesn't support query timeout, then assume it is "infinite". Allow a timeout of zero only
            if (queryTimeout > 0) {
                throw se;
            }
        }
        Record currentRecord;
        List<Integer> fieldIndexes = sqlHolder.getFieldIndexes();
        while ((currentRecord = recordParser.nextRecord()) != null) {
            Object[] values = currentRecord.getValues();
            if (values != null) {
                if (fieldIndexes != null) {
                    for (int i = 0; i < fieldIndexes.size(); i++) {
                        // If DELETE type, insert the object twice because of the null check (see generateDelete for details)
                        if (DELETE_TYPE.equalsIgnoreCase(statementType)) {
                            ps.setObject(i * 2 + 1, values[fieldIndexes.get(i)]);
                            ps.setObject(i * 2 + 2, values[fieldIndexes.get(i)]);
                        } else {
                            ps.setObject(i + 1, values[fieldIndexes.get(i)]);
                        }
                    }
                } else {
                    // If there's no index map, assume all values are included and set them in order
                    for (int i = 0; i < values.length; i++) {
                        // If DELETE type, insert the object twice because of the null check (see generateDelete for details)
                        if (DELETE_TYPE.equalsIgnoreCase(statementType)) {
                            ps.setObject(i * 2 + 1, values[i]);
                            ps.setObject(i * 2 + 2, values[i]);
                        } else {
                            ps.setObject(i + 1, values[i]);
                        }
                    }
                }
                ps.addBatch();
            }
        }
        log.debug("Executing query {}", new Object[] { sqlHolder });
        ps.executeBatch();
        result.routeTo(flowFile, REL_SUCCESS);
        session.getProvenanceReporter().send(flowFile, functionContext.jdbcUrl);
    }
}
Also used : SQLException(java.sql.SQLException) PreparedStatement(java.sql.PreparedStatement) ComponentLog(org.apache.nifi.logging.ComponentLog) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Aggregations

RecordSchema (org.apache.nifi.serialization.record.RecordSchema)243 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)178 Test (org.junit.Test)168 Record (org.apache.nifi.serialization.record.Record)147 RecordField (org.apache.nifi.serialization.record.RecordField)138 ArrayList (java.util.ArrayList)107 MapRecord (org.apache.nifi.serialization.record.MapRecord)94 HashMap (java.util.HashMap)88 InputStream (java.io.InputStream)79 ByteArrayInputStream (java.io.ByteArrayInputStream)64 FileInputStream (java.io.FileInputStream)56 ComponentLog (org.apache.nifi.logging.ComponentLog)54 IOException (java.io.IOException)44 LinkedHashMap (java.util.LinkedHashMap)36 DataType (org.apache.nifi.serialization.record.DataType)36 File (java.io.File)31 Schema (org.apache.avro.Schema)29 SchemaIdentifier (org.apache.nifi.serialization.record.SchemaIdentifier)29 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)28 ByteArrayOutputStream (java.io.ByteArrayOutputStream)26