Search in sources :

Example 1 with RecordReaderFactoryException

use of org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException in project nifi by apache.

the class AbstractPutHDFSRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    // do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
    final FileSystem fileSystem = getFileSystem();
    final Configuration configuration = getConfiguration();
    final UserGroupInformation ugi = getUserGroupInformation();
    if (configuration == null || fileSystem == null || ugi == null) {
        getLogger().error("Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
        context.yield();
        return;
    }
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        context.yield();
        return;
    }
    ugi.doAs((PrivilegedAction<Object>) () -> {
        Path tempDotCopyFile = null;
        FlowFile putFlowFile = flowFile;
        try {
            // TODO codec extension
            final String filenameValue = putFlowFile.getAttribute(CoreAttributes.FILENAME.key());
            final String directoryValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile).getValue();
            // create the directory if it doesn't exist
            final Path directoryPath = new Path(directoryValue);
            createDirectory(fileSystem, directoryPath, remoteOwner, remoteGroup);
            // write to tempFile first and on success rename to destFile
            final Path tempFile = new Path(directoryPath, "." + filenameValue);
            final Path destFile = new Path(directoryPath, filenameValue);
            final boolean destinationExists = fileSystem.exists(destFile) || fileSystem.exists(tempFile);
            final boolean shouldOverwrite = context.getProperty(OVERWRITE).asBoolean();
            // if the tempFile or destFile already exist, and overwrite is set to false, then transfer to failure
            if (destinationExists && !shouldOverwrite) {
                session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                getLogger().warn("penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile });
                return null;
            }
            final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
            final AtomicReference<WriteResult> writeResult = new AtomicReference<>();
            final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
            final FlowFile flowFileIn = putFlowFile;
            final StopWatch stopWatch = new StopWatch(true);
            // Read records from the incoming FlowFile and write them the tempFile
            session.read(putFlowFile, (final InputStream rawIn) -> {
                RecordReader recordReader = null;
                HDFSRecordWriter recordWriter = null;
                try (final BufferedInputStream in = new BufferedInputStream(rawIn)) {
                    // handle this separately from the other IOExceptions which normally route to retry
                    try {
                        recordReader = recordReaderFactory.createRecordReader(flowFileIn, in, getLogger());
                    } catch (Exception e) {
                        final RecordReaderFactoryException rrfe = new RecordReaderFactoryException("Unable to create RecordReader", e);
                        exceptionHolder.set(rrfe);
                        return;
                    }
                    final RecordSet recordSet = recordReader.createRecordSet();
                    recordWriter = createHDFSRecordWriter(context, flowFile, configuration, tempFile, recordReader.getSchema());
                    writeResult.set(recordWriter.write(recordSet));
                } catch (Exception e) {
                    exceptionHolder.set(e);
                } finally {
                    IOUtils.closeQuietly(recordReader);
                    IOUtils.closeQuietly(recordWriter);
                }
            });
            stopWatch.stop();
            final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
            final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
            tempDotCopyFile = tempFile;
            // into one of the appropriate catch blocks below
            if (exceptionHolder.get() != null) {
                throw exceptionHolder.get();
            }
            // Attempt to rename from the tempFile to destFile, and change owner if successfully renamed
            rename(fileSystem, tempFile, destFile);
            changeOwner(fileSystem, destFile, remoteOwner, remoteGroup);
            getLogger().info("Wrote {} to {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, destFile, millis, dataRate });
            putFlowFile = postProcess(context, session, putFlowFile, destFile);
            final String newFilename = destFile.getName();
            final String hdfsPath = destFile.getParent().toString();
            // Update the filename and absolute path attributes
            final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
            attributes.put(CoreAttributes.FILENAME.key(), newFilename);
            attributes.put(ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
            attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
            putFlowFile = session.putAllAttributes(putFlowFile, attributes);
            // Send a provenance event and transfer to success
            final Path qualifiedPath = destFile.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
            session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString());
            session.transfer(putFlowFile, REL_SUCCESS);
        } catch (IOException | FlowFileAccessException e) {
            deleteQuietly(fileSystem, tempDotCopyFile);
            getLogger().error("Failed to write due to {}", new Object[] { e });
            session.transfer(session.penalize(putFlowFile), REL_RETRY);
            context.yield();
        } catch (Throwable t) {
            deleteQuietly(fileSystem, tempDotCopyFile);
            getLogger().error("Failed to write due to {}", new Object[] { t });
            session.transfer(putFlowFile, REL_FAILURE);
        }
        return null;
    });
}
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) Configuration(org.apache.hadoop.conf.Configuration) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) HDFSRecordWriter(org.apache.nifi.processors.hadoop.record.HDFSRecordWriter) AtomicReference(java.util.concurrent.atomic.AtomicReference) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordReaderFactoryException(org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) FailureException(org.apache.nifi.processors.hadoop.exception.FailureException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) StopWatch(org.apache.nifi.util.StopWatch) BufferedInputStream(java.io.BufferedInputStream) FileSystem(org.apache.hadoop.fs.FileSystem) RecordReaderFactoryException(org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException) RecordSet(org.apache.nifi.serialization.record.RecordSet) HashMap(java.util.HashMap) Map(java.util.Map) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 2 with RecordReaderFactoryException

use of org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException in project nifi by apache.

the class AbstractKudu method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    try {
        if (flowFile == null)
            return;
        final Map<String, String> attributes = new HashMap<String, String>();
        final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
        final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
        final KuduSession kuduSession = this.getKuduSession(kuduClient);
        session.read(flowFile, (final InputStream rawIn) -> {
            RecordReader recordReader = null;
            try (final BufferedInputStream in = new BufferedInputStream(rawIn)) {
                try {
                    recordReader = recordReaderFactory.createRecordReader(flowFile, in, getLogger());
                } catch (Exception ex) {
                    final RecordReaderFactoryException rrfe = new RecordReaderFactoryException("Unable to create RecordReader", ex);
                    exceptionHolder.set(rrfe);
                    return;
                }
                List<String> fieldNames = recordReader.getSchema().getFieldNames();
                final RecordSet recordSet = recordReader.createRecordSet();
                if (skipHeadLine)
                    recordSet.next();
                int numOfAddedRecord = 0;
                Record record = recordSet.next();
                while (record != null) {
                    org.apache.kudu.client.Operation oper = null;
                    if (operationType == OperationType.UPSERT) {
                        oper = upsertRecordToKudu(kuduTable, record, fieldNames);
                    } else {
                        oper = insertRecordToKudu(kuduTable, record, fieldNames);
                    }
                    kuduSession.apply(oper);
                    numOfAddedRecord++;
                    record = recordSet.next();
                }
                getLogger().info("KUDU: number of inserted records: " + numOfAddedRecord);
                attributes.put(RECORD_COUNT_ATTR, String.valueOf(numOfAddedRecord));
            } catch (KuduException ex) {
                getLogger().error("Exception occurred while interacting with Kudu due to " + ex.getMessage(), ex);
                exceptionHolder.set(ex);
            } catch (Exception e) {
                exceptionHolder.set(e);
            } finally {
                IOUtils.closeQuietly(recordReader);
            }
        });
        kuduSession.close();
        if (exceptionHolder.get() != null) {
            throw exceptionHolder.get();
        }
        // Update flow file's attributes after the ingestion
        session.putAllAttributes(flowFile, attributes);
        session.transfer(flowFile, REL_SUCCESS);
        session.getProvenanceReporter().send(flowFile, "Successfully added flowfile to kudu");
    } catch (IOException | FlowFileAccessException e) {
        getLogger().error("Failed to write due to {}", new Object[] { e });
        session.transfer(flowFile, REL_FAILURE);
    } catch (Throwable t) {
        getLogger().error("Failed to write due to {}", new Object[] { t });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : KuduSession(org.apache.kudu.client.KuduSession) HashMap(java.util.HashMap) RecordReader(org.apache.nifi.serialization.RecordReader) KuduException(org.apache.kudu.client.KuduException) BufferedInputStream(java.io.BufferedInputStream) RecordReaderFactoryException(org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException) Record(org.apache.nifi.serialization.record.Record) RecordSet(org.apache.nifi.serialization.record.RecordSet) FlowFile(org.apache.nifi.flowfile.FlowFile) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) KuduException(org.apache.kudu.client.KuduException) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordReaderFactoryException(org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory)

Aggregations

BufferedInputStream (java.io.BufferedInputStream)2 IOException (java.io.IOException)2 InputStream (java.io.InputStream)2 HashMap (java.util.HashMap)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 FlowFile (org.apache.nifi.flowfile.FlowFile)2 FlowFileAccessException (org.apache.nifi.processor.exception.FlowFileAccessException)2 ProcessException (org.apache.nifi.processor.exception.ProcessException)2 RecordReaderFactoryException (org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException)2 RecordReader (org.apache.nifi.serialization.RecordReader)2 RecordReaderFactory (org.apache.nifi.serialization.RecordReaderFactory)2 RecordSet (org.apache.nifi.serialization.record.RecordSet)2 FileNotFoundException (java.io.FileNotFoundException)1 Map (java.util.Map)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)1 KuduException (org.apache.kudu.client.KuduException)1 KuduSession (org.apache.kudu.client.KuduSession)1