Search in sources :

Example 51 with ProcessContext

use of org.apache.nifi.processor.ProcessContext in project nifi by apache.

the class ConvertAvroToORC method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    try {
        long startTime = System.currentTimeMillis();
        final long stripeSize = context.getProperty(STRIPE_SIZE).asDataSize(DataUnit.B).longValue();
        final int bufferSize = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
        final CompressionKind compressionType = CompressionKind.valueOf(context.getProperty(COMPRESSION_TYPE).getValue());
        final AtomicReference<Schema> hiveAvroSchema = new AtomicReference<>(null);
        final AtomicInteger totalRecordCount = new AtomicInteger(0);
        final String fileName = flowFile.getAttribute(CoreAttributes.FILENAME.key());
        flowFile = session.write(flowFile, (rawIn, rawOut) -> {
            try (final InputStream in = new BufferedInputStream(rawIn);
                final OutputStream out = new BufferedOutputStream(rawOut);
                final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<>())) {
                // Create ORC schema from Avro schema
                Schema avroSchema = reader.getSchema();
                TypeInfo orcSchema = NiFiOrcUtils.getOrcField(avroSchema);
                if (orcConfig == null) {
                    orcConfig = new Configuration();
                }
                OrcFlowFileWriter orcWriter = NiFiOrcUtils.createWriter(out, new Path(fileName), orcConfig, orcSchema, stripeSize, compressionType, bufferSize);
                try {
                    int recordCount = 0;
                    GenericRecord currRecord = null;
                    while (reader.hasNext()) {
                        currRecord = reader.next(currRecord);
                        List<Schema.Field> fields = currRecord.getSchema().getFields();
                        if (fields != null) {
                            Object[] row = new Object[fields.size()];
                            for (int i = 0; i < fields.size(); i++) {
                                Schema.Field field = fields.get(i);
                                Schema fieldSchema = field.schema();
                                Object o = currRecord.get(field.name());
                                try {
                                    row[i] = NiFiOrcUtils.convertToORCObject(NiFiOrcUtils.getOrcField(fieldSchema), o);
                                } catch (ArrayIndexOutOfBoundsException aioobe) {
                                    getLogger().error("Index out of bounds at record {} for column {}, type {}, and object {}", new Object[] { recordCount, i, fieldSchema.getType().getName(), o.toString() }, aioobe);
                                    throw new IOException(aioobe);
                                }
                            }
                            orcWriter.addRow(NiFiOrcUtils.createOrcStruct(orcSchema, row));
                            recordCount++;
                        }
                    }
                    hiveAvroSchema.set(avroSchema);
                    totalRecordCount.set(recordCount);
                } finally {
                    // finished writing this record, close the writer (which will flush to the flow file)
                    orcWriter.close();
                }
            }
        });
        final String hiveTableName = context.getProperty(HIVE_TABLE_NAME).isSet() ? context.getProperty(HIVE_TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue() : NiFiOrcUtils.normalizeHiveTableName(hiveAvroSchema.get().getFullName());
        String hiveDDL = NiFiOrcUtils.generateHiveDDL(hiveAvroSchema.get(), hiveTableName);
        // Add attributes and transfer to success
        flowFile = session.putAttribute(flowFile, RECORD_COUNT_ATTRIBUTE, Integer.toString(totalRecordCount.get()));
        flowFile = session.putAttribute(flowFile, HIVE_DDL_ATTRIBUTE, hiveDDL);
        StringBuilder newFilename = new StringBuilder();
        int extensionIndex = fileName.lastIndexOf(".");
        if (extensionIndex != -1) {
            newFilename.append(fileName.substring(0, extensionIndex));
        } else {
            newFilename.append(fileName);
        }
        newFilename.append(".orc");
        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), ORC_MIME_TYPE);
        flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), newFilename.toString());
        session.transfer(flowFile, REL_SUCCESS);
        session.getProvenanceReporter().modifyContent(flowFile, "Converted " + totalRecordCount.get() + " records", System.currentTimeMillis() - startTime);
    } catch (final ProcessException pe) {
        getLogger().error("Failed to convert {} from Avro to ORC due to {}; transferring to failure", new Object[] { flowFile, pe });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : StandardValidators(org.apache.nifi.processor.util.StandardValidators) BufferedInputStream(java.io.BufferedInputStream) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) SideEffectFree(org.apache.nifi.annotation.behavior.SideEffectFree) AtomicReference(java.util.concurrent.atomic.AtomicReference) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) NiFiOrcUtils(org.apache.hadoop.hive.ql.io.orc.NiFiOrcUtils) BufferedOutputStream(java.io.BufferedOutputStream) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Relationship(org.apache.nifi.processor.Relationship) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) CompressionKind(org.apache.hadoop.hive.ql.io.orc.CompressionKind) HiveJdbcCommon(org.apache.nifi.util.hive.HiveJdbcCommon) OutputStream(java.io.OutputStream) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) FlowFile(org.apache.nifi.flowfile.FlowFile) ProcessContext(org.apache.nifi.processor.ProcessContext) DataFileStream(org.apache.avro.file.DataFileStream) Set(java.util.Set) OrcFlowFileWriter(org.apache.hadoop.hive.ql.io.orc.OrcFlowFileWriter) ProcessSession(org.apache.nifi.processor.ProcessSession) IOException(java.io.IOException) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) HiveUtils(org.apache.nifi.util.hive.HiveUtils) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) List(java.util.List) SupportsBatching(org.apache.nifi.annotation.behavior.SupportsBatching) AbstractProcessor(org.apache.nifi.processor.AbstractProcessor) Tags(org.apache.nifi.annotation.documentation.Tags) DataUnit(org.apache.nifi.processor.DataUnit) CoreAttributes(org.apache.nifi.flowfile.attributes.CoreAttributes) Collections(java.util.Collections) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) InputStream(java.io.InputStream) Configuration(org.apache.hadoop.conf.Configuration) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) BufferedInputStream(java.io.BufferedInputStream) OrcFlowFileWriter(org.apache.hadoop.hive.ql.io.orc.OrcFlowFileWriter) ArrayList(java.util.ArrayList) List(java.util.List) GenericRecord(org.apache.avro.generic.GenericRecord) BufferedOutputStream(java.io.BufferedOutputStream) Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) CompressionKind(org.apache.hadoop.hive.ql.io.orc.CompressionKind) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) DataFileStream(org.apache.avro.file.DataFileStream) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ProcessException(org.apache.nifi.processor.exception.ProcessException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger)

Example 52 with ProcessContext

use of org.apache.nifi.processor.ProcessContext in project nifi by apache.

the class ITListenGRPC method testSuccessfulRoundTrip.

@Test
public void testSuccessfulRoundTrip() throws UnrecoverableKeyException, CertificateException, NoSuchAlgorithmException, KeyStoreException, IOException {
    final int randPort = TestGRPCClient.randomPort();
    final ManagedChannel channel = TestGRPCClient.buildChannel(HOST, randPort);
    final FlowFileServiceGrpc.FlowFileServiceBlockingStub stub = FlowFileServiceGrpc.newBlockingStub(channel);
    final ListenGRPC listenGRPC = new ListenGRPC();
    final TestRunner runner = TestRunners.newTestRunner(listenGRPC);
    runner.setProperty(ListenGRPC.PROP_SERVICE_PORT, String.valueOf(randPort));
    final ProcessContext processContext = runner.getProcessContext();
    final ProcessSessionFactory processSessionFactory = runner.getProcessSessionFactory();
    try {
        // start the server. The order of the following statements shouldn't matter, because the
        // startServer() method waits for a processSessionFactory to be available to it.
        listenGRPC.startServer(processContext);
        listenGRPC.onTrigger(processContext, processSessionFactory);
        final FlowFileRequest ingestFile = FlowFileRequest.newBuilder().putAttributes("FOO", "BAR").putAttributes(CoreAttributes.UUID.key(), SOURCE_SYSTEM_UUID).setContent(ByteString.copyFrom("content".getBytes())).build();
        final FlowFileReply reply = stub.send(ingestFile);
        assertThat(reply.getResponseCode(), equalTo(FlowFileReply.ResponseCode.SUCCESS));
        assertThat(reply.getBody(), equalTo("FlowFile successfully received."));
        runner.assertTransferCount(ListenGRPC.REL_SUCCESS, 1);
        final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ListenGRPC.REL_SUCCESS);
        assertThat(successFiles.size(), equalTo(1));
        final MockFlowFile mockFlowFile = successFiles.get(0);
        assertThat(mockFlowFile.getAttribute("FOO"), equalTo("BAR"));
        assertThat(mockFlowFile.getAttribute(ListenGRPC.REMOTE_HOST), equalTo("127.0.0.1"));
        assertThat(mockFlowFile.getAttribute(ListenGRPC.REMOTE_USER_DN), equalTo(FlowFileIngestServiceInterceptor.DEFAULT_FOUND_SUBJECT));
    } finally {
        // stop the server
        listenGRPC.stopServer(processContext);
        channel.shutdown();
    }
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) TestRunner(org.apache.nifi.util.TestRunner) ManagedChannel(io.grpc.ManagedChannel) ProcessSessionFactory(org.apache.nifi.processor.ProcessSessionFactory) ProcessContext(org.apache.nifi.processor.ProcessContext) Test(org.junit.Test)

Example 53 with ProcessContext

use of org.apache.nifi.processor.ProcessContext in project nifi by apache.

the class ITListenGRPC method testOutOfSpaceRoundTrip.

@Test
public void testOutOfSpaceRoundTrip() throws UnrecoverableKeyException, CertificateException, NoSuchAlgorithmException, KeyStoreException, IOException {
    final int randPort = TestGRPCClient.randomPort();
    final ManagedChannel channel = TestGRPCClient.buildChannel(HOST, randPort);
    final FlowFileServiceGrpc.FlowFileServiceBlockingStub stub = FlowFileServiceGrpc.newBlockingStub(channel);
    final ListenGRPC listenGRPC = new ListenGRPC();
    final TestRunner runner = TestRunners.newTestRunner(listenGRPC);
    runner.setProperty(ListenGRPC.PROP_SERVICE_PORT, String.valueOf(randPort));
    final ProcessContext processContext = spy(runner.getProcessContext());
    // force the context to return that space isn't available, prompting an error message to be returned.
    when(processContext.getAvailableRelationships()).thenReturn(Sets.newHashSet());
    final ProcessSessionFactory processSessionFactory = runner.getProcessSessionFactory();
    try {
        // start the server. The order of the following statements shouldn't matter, because the
        // startServer() method waits for a processSessionFactory to be available to it.
        listenGRPC.startServer(processContext);
        listenGRPC.onTrigger(processContext, processSessionFactory);
        final FlowFileRequest ingestFile = FlowFileRequest.newBuilder().putAttributes("FOO", "BAR").setContent(ByteString.copyFrom("content".getBytes())).build();
        final FlowFileReply reply = stub.send(ingestFile);
        assertThat(reply.getResponseCode(), equalTo(FlowFileReply.ResponseCode.ERROR));
        assertThat(reply.getBody(), containsString("but no space available; Indicating Service Unavailable"));
        runner.assertTransferCount(ListenGRPC.REL_SUCCESS, 0);
    } finally {
        // stop the server
        listenGRPC.stopServer(processContext);
        channel.shutdown();
    }
}
Also used : TestRunner(org.apache.nifi.util.TestRunner) ManagedChannel(io.grpc.ManagedChannel) ProcessSessionFactory(org.apache.nifi.processor.ProcessSessionFactory) ProcessContext(org.apache.nifi.processor.ProcessContext) Test(org.junit.Test)

Example 54 with ProcessContext

use of org.apache.nifi.processor.ProcessContext in project nifi by apache.

the class ITListenGRPC method testSecureTwoWaySSLPassAuthorizedDNCheck.

@Test
public void testSecureTwoWaySSLPassAuthorizedDNCheck() throws UnrecoverableKeyException, CertificateException, NoSuchAlgorithmException, KeyStoreException, IOException {
    final int randPort = TestGRPCClient.randomPort();
    final Map<String, String> sslProperties = getKeystoreProperties();
    sslProperties.putAll(getTruststoreProperties());
    final ManagedChannel channel = TestGRPCClient.buildChannel(HOST, randPort, sslProperties);
    final FlowFileServiceGrpc.FlowFileServiceBlockingStub stub = FlowFileServiceGrpc.newBlockingStub(channel);
    final ListenGRPC listenGRPC = new ListenGRPC();
    final TestRunner runner = TestRunners.newTestRunner(listenGRPC);
    runner.setProperty(ListenGRPC.PROP_SERVICE_PORT, String.valueOf(randPort));
    runner.setProperty(ListenGRPC.PROP_USE_SECURE, "true");
    runner.setProperty(ListenGRPC.PROP_AUTHORIZED_DN_PATTERN, "CN=localhost.*");
    useSSLContextService(runner, sslProperties);
    final ProcessContext processContext = runner.getProcessContext();
    final ProcessSessionFactory processSessionFactory = runner.getProcessSessionFactory();
    try {
        // start the server. The order of the following statements shouldn't matter, because the
        // startServer() method waits for a processSessionFactory to be available to it.
        listenGRPC.startServer(processContext);
        listenGRPC.onTrigger(processContext, processSessionFactory);
        final FlowFileRequest ingestFile = FlowFileRequest.newBuilder().putAttributes("FOO", "BAR").setContent(ByteString.copyFrom("content".getBytes())).build();
        final FlowFileReply reply = stub.send(ingestFile);
        assertThat(reply.getResponseCode(), equalTo(FlowFileReply.ResponseCode.SUCCESS));
        assertThat(reply.getBody(), equalTo("FlowFile successfully received."));
        runner.assertTransferCount(ListenGRPC.REL_SUCCESS, 1);
        final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ListenGRPC.REL_SUCCESS);
        assertThat(successFiles.size(), equalTo(1));
        final MockFlowFile mockFlowFile = successFiles.get(0);
        assertThat(mockFlowFile.getAttribute("FOO"), equalTo("BAR"));
        assertThat(mockFlowFile.getAttribute(ListenGRPC.REMOTE_HOST), equalTo("127.0.0.1"));
        assertThat(mockFlowFile.getAttribute(ListenGRPC.REMOTE_USER_DN), equalTo(CERT_DN));
    } finally {
        // stop the server
        listenGRPC.stopServer(processContext);
        channel.shutdown();
    }
}
Also used : TestRunner(org.apache.nifi.util.TestRunner) StringContains.containsString(org.hamcrest.core.StringContains.containsString) ByteString(com.google.protobuf.ByteString) ProcessContext(org.apache.nifi.processor.ProcessContext) MockFlowFile(org.apache.nifi.util.MockFlowFile) ManagedChannel(io.grpc.ManagedChannel) ProcessSessionFactory(org.apache.nifi.processor.ProcessSessionFactory) Test(org.junit.Test)

Example 55 with ProcessContext

use of org.apache.nifi.processor.ProcessContext in project nifi by apache.

the class ITListenGRPC method testExceedMaxMessageSize.

@Test(expected = io.grpc.StatusRuntimeException.class)
public void testExceedMaxMessageSize() throws UnrecoverableKeyException, CertificateException, NoSuchAlgorithmException, KeyStoreException, IOException {
    final int randPort = TestGRPCClient.randomPort();
    final ManagedChannel channel = TestGRPCClient.buildChannel(HOST, randPort);
    final FlowFileServiceGrpc.FlowFileServiceBlockingStub stub = FlowFileServiceGrpc.newBlockingStub(channel);
    final ListenGRPC listenGRPC = new ListenGRPC();
    final TestRunner runner = TestRunners.newTestRunner(listenGRPC);
    runner.setProperty(ListenGRPC.PROP_SERVICE_PORT, String.valueOf(randPort));
    // set max message size to 1 byte to force exception to be thrown.
    runner.setProperty(ListenGRPC.PROP_MAX_MESSAGE_SIZE, "1B");
    final ProcessContext processContext = runner.getProcessContext();
    final ProcessSessionFactory processSessionFactory = runner.getProcessSessionFactory();
    try {
        // start the server. The order of the following statements shouldn't matter, because the
        // startServer() method waits for a processSessionFactory to be available to it.
        listenGRPC.startServer(processContext);
        listenGRPC.onTrigger(processContext, processSessionFactory);
        final FlowFileRequest ingestFile = FlowFileRequest.newBuilder().putAttributes("FOO", "BAR").putAttributes(CoreAttributes.UUID.key(), SOURCE_SYSTEM_UUID).setContent(ByteString.copyFrom("content".getBytes())).build();
        // this should throw a runtime exception
        final FlowFileReply reply = stub.send(ingestFile);
        assertThat(reply.getResponseCode(), equalTo(FlowFileReply.ResponseCode.SUCCESS));
        assertThat(reply.getBody(), equalTo("FlowFile successfully received."));
        runner.assertTransferCount(ListenGRPC.REL_SUCCESS, 1);
        final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(ListenGRPC.REL_SUCCESS);
        assertThat(successFiles.size(), equalTo(1));
        final MockFlowFile mockFlowFile = successFiles.get(0);
        assertThat(mockFlowFile.getAttribute("FOO"), equalTo("BAR"));
        assertThat(mockFlowFile.getAttribute(ListenGRPC.REMOTE_HOST), equalTo("127.0.0.1"));
        assertThat(mockFlowFile.getAttribute(ListenGRPC.REMOTE_USER_DN), equalTo(FlowFileIngestServiceInterceptor.DEFAULT_FOUND_SUBJECT));
    } finally {
        // stop the server
        listenGRPC.stopServer(processContext);
        channel.shutdown();
    }
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) TestRunner(org.apache.nifi.util.TestRunner) ManagedChannel(io.grpc.ManagedChannel) ProcessSessionFactory(org.apache.nifi.processor.ProcessSessionFactory) ProcessContext(org.apache.nifi.processor.ProcessContext) Test(org.junit.Test)

Aggregations

ProcessContext (org.apache.nifi.processor.ProcessContext)115 Test (org.junit.Test)67 TestRunner (org.apache.nifi.util.TestRunner)56 ProcessSession (org.apache.nifi.processor.ProcessSession)49 FlowFile (org.apache.nifi.flowfile.FlowFile)40 MockFlowFile (org.apache.nifi.util.MockFlowFile)39 HashSet (java.util.HashSet)35 Relationship (org.apache.nifi.processor.Relationship)35 List (java.util.List)34 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)34 ArrayList (java.util.ArrayList)33 Set (java.util.Set)33 Tags (org.apache.nifi.annotation.documentation.Tags)31 IOException (java.io.IOException)30 HashMap (java.util.HashMap)30 CapabilityDescription (org.apache.nifi.annotation.documentation.CapabilityDescription)30 ProcessException (org.apache.nifi.processor.exception.ProcessException)30 Collections (java.util.Collections)29 InputRequirement (org.apache.nifi.annotation.behavior.InputRequirement)29 ProcessSessionFactory (org.apache.nifi.processor.ProcessSessionFactory)29