Search in sources :

Example 16 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class AbstractRecordProcessor method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final Map<String, String> attributes = new HashMap<>();
    final AtomicInteger recordCount = new AtomicInteger();
    final FlowFile original = flowFile;
    final Map<String, String> originalAttributes = flowFile.getAttributes();
    try {
        flowFile = session.write(flowFile, new StreamCallback() {

            @Override
            public void process(final InputStream in, final OutputStream out) throws IOException {
                try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
                    final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
                    try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), writeSchema, out)) {
                        writer.beginRecordSet();
                        Record record;
                        while ((record = reader.nextRecord()) != null) {
                            final Record processed = AbstractRecordProcessor.this.process(record, writeSchema, original, context);
                            writer.write(processed);
                        }
                        final WriteResult writeResult = writer.finishRecordSet();
                        attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
                        attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
                        attributes.putAll(writeResult.getAttributes());
                        recordCount.set(writeResult.getRecordCount());
                    }
                } catch (final SchemaNotFoundException e) {
                    throw new ProcessException(e.getLocalizedMessage(), e);
                } catch (final MalformedRecordException e) {
                    throw new ProcessException("Could not parse incoming data", e);
                }
            }
        });
    } catch (final Exception e) {
        getLogger().error("Failed to process {}; will route to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    flowFile = session.putAllAttributes(flowFile, attributes);
    session.transfer(flowFile, REL_SUCCESS);
    final int count = recordCount.get();
    session.adjustCounter("Records Processed", count, false);
    getLogger().info("Successfully converted {} records for {}", new Object[] { count, flowFile });
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) RecordReader(org.apache.nifi.serialization.RecordReader) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) StreamCallback(org.apache.nifi.processor.io.StreamCallback) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) ProcessException(org.apache.nifi.processor.exception.ProcessException) WriteResult(org.apache.nifi.serialization.WriteResult) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Record(org.apache.nifi.serialization.record.Record) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 17 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class AbstractRouteRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final T flowFileContext;
    try {
        flowFileContext = getFlowFileContext(flowFile, context);
    } catch (final Exception e) {
        getLogger().error("Failed to process {}; routing to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final AtomicInteger numRecords = new AtomicInteger(0);
    final Map<Relationship, Tuple<FlowFile, RecordSetWriter>> writers = new HashMap<>();
    final FlowFile original = flowFile;
    final Map<String, String> originalAttributes = original.getAttributes();
    try {
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
                    final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
                    Record record;
                    while ((record = reader.nextRecord()) != null) {
                        final Set<Relationship> relationships = route(record, writeSchema, original, context, flowFileContext);
                        numRecords.incrementAndGet();
                        for (final Relationship relationship : relationships) {
                            final RecordSetWriter recordSetWriter;
                            Tuple<FlowFile, RecordSetWriter> tuple = writers.get(relationship);
                            if (tuple == null) {
                                FlowFile outFlowFile = session.create(original);
                                final OutputStream out = session.write(outFlowFile);
                                recordSetWriter = writerFactory.createWriter(getLogger(), writeSchema, out);
                                recordSetWriter.beginRecordSet();
                                tuple = new Tuple<>(outFlowFile, recordSetWriter);
                                writers.put(relationship, tuple);
                            } else {
                                recordSetWriter = tuple.getValue();
                            }
                            recordSetWriter.write(record);
                        }
                    }
                } catch (final SchemaNotFoundException | MalformedRecordException e) {
                    throw new ProcessException("Could not parse incoming data", e);
                }
            }
        });
        for (final Map.Entry<Relationship, Tuple<FlowFile, RecordSetWriter>> entry : writers.entrySet()) {
            final Relationship relationship = entry.getKey();
            final Tuple<FlowFile, RecordSetWriter> tuple = entry.getValue();
            final RecordSetWriter writer = tuple.getValue();
            FlowFile childFlowFile = tuple.getKey();
            final WriteResult writeResult = writer.finishRecordSet();
            try {
                writer.close();
            } catch (final IOException ioe) {
                getLogger().warn("Failed to close Writer for {}", new Object[] { childFlowFile });
            }
            final Map<String, String> attributes = new HashMap<>();
            attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
            attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
            attributes.putAll(writeResult.getAttributes());
            childFlowFile = session.putAllAttributes(childFlowFile, attributes);
            session.transfer(childFlowFile, relationship);
            session.adjustCounter("Records Processed", writeResult.getRecordCount(), false);
            session.adjustCounter("Records Routed to " + relationship.getName(), writeResult.getRecordCount(), false);
            session.getProvenanceReporter().route(childFlowFile, relationship);
        }
    } catch (final Exception e) {
        getLogger().error("Failed to process {}", new Object[] { flowFile, e });
        for (final Tuple<FlowFile, RecordSetWriter> tuple : writers.values()) {
            try {
                tuple.getValue().close();
            } catch (final Exception e1) {
                getLogger().warn("Failed to close Writer for {}; some resources may not be cleaned up appropriately", new Object[] { tuple.getKey() });
            }
            session.remove(tuple.getKey());
        }
        session.transfer(flowFile, REL_FAILURE);
        return;
    } finally {
        for (final Tuple<FlowFile, RecordSetWriter> tuple : writers.values()) {
            final RecordSetWriter writer = tuple.getValue();
            try {
                writer.close();
            } catch (final Exception e) {
                getLogger().warn("Failed to close Record Writer for {}; some resources may not be properly cleaned up", new Object[] { tuple.getKey(), e });
            }
        }
    }
    if (isRouteOriginal()) {
        flowFile = session.putAttribute(flowFile, "record.count", String.valueOf(numRecords));
        session.transfer(flowFile, REL_ORIGINAL);
    } else {
        session.remove(flowFile);
    }
    getLogger().info("Successfully processed {}, creating {} derivative FlowFiles and processing {} records", new Object[] { flowFile, writers.size(), numRecords });
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) IOException(java.io.IOException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) WriteResult(org.apache.nifi.serialization.WriteResult) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Relationship(org.apache.nifi.processor.Relationship) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) HashMap(java.util.HashMap) Map(java.util.Map) Tuple(org.apache.nifi.util.Tuple)

Example 18 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class GetSolr method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLogger();
    final AtomicBoolean continuePaging = new AtomicBoolean(true);
    final SolrQuery solrQuery = new SolrQuery();
    try {
        if (id_field == null) {
            id_field = getFieldNameOfUniqueKey();
        }
        final String dateField = context.getProperty(DATE_FIELD).getValue();
        final Map<String, String> stateMap = new HashMap<String, String>();
        stateMap.putAll(context.getStateManager().getState(Scope.CLUSTER).toMap());
        solrQuery.setQuery("*:*");
        final String query = context.getProperty(SOLR_QUERY).getValue();
        if (!StringUtils.isBlank(query) && !query.equals("*:*")) {
            solrQuery.addFilterQuery(query);
        }
        final StringBuilder automatedFilterQuery = (new StringBuilder()).append(dateField).append(":[").append(stateMap.get(STATE_MANAGER_FILTER)).append(" TO *]");
        solrQuery.addFilterQuery(automatedFilterQuery.toString());
        final List<String> fieldList = new ArrayList<String>();
        final String returnFields = context.getProperty(RETURN_FIELDS).getValue();
        if (!StringUtils.isBlank(returnFields)) {
            fieldList.addAll(Arrays.asList(returnFields.trim().split("[,]")));
            if (!fieldList.contains(dateField)) {
                fieldList.add(dateField);
                dateFieldNotInSpecifiedFieldsList.set(true);
            }
            for (String returnField : fieldList) {
                solrQuery.addField(returnField.trim());
            }
        }
        solrQuery.setParam(CursorMarkParams.CURSOR_MARK_PARAM, stateMap.get(STATE_MANAGER_CURSOR_MARK));
        solrQuery.setRows(context.getProperty(BATCH_SIZE).asInteger());
        final StringBuilder sortClause = (new StringBuilder()).append(dateField).append(" asc,").append(id_field).append(" asc");
        solrQuery.setParam("sort", sortClause.toString());
        while (continuePaging.get()) {
            final QueryRequest req = new QueryRequest(solrQuery);
            if (isBasicAuthEnabled()) {
                req.setBasicAuthCredentials(getUsername(), getPassword());
            }
            logger.debug(solrQuery.toQueryString());
            final QueryResponse response = req.process(getSolrClient());
            final SolrDocumentList documentList = response.getResults();
            if (response.getResults().size() > 0) {
                final SolrDocument lastSolrDocument = documentList.get(response.getResults().size() - 1);
                final String latestDateValue = df.format(lastSolrDocument.get(dateField));
                final String newCursorMark = response.getNextCursorMark();
                solrQuery.setParam(CursorMarkParams.CURSOR_MARK_PARAM, newCursorMark);
                stateMap.put(STATE_MANAGER_CURSOR_MARK, newCursorMark);
                stateMap.put(STATE_MANAGER_FILTER, latestDateValue);
                FlowFile flowFile = session.create();
                flowFile = session.putAttribute(flowFile, "solrQuery", solrQuery.toString());
                if (context.getProperty(RETURN_TYPE).getValue().equals(MODE_XML.getValue())) {
                    if (dateFieldNotInSpecifiedFieldsList.get()) {
                        for (SolrDocument doc : response.getResults()) {
                            doc.removeFields(dateField);
                        }
                    }
                    flowFile = session.write(flowFile, SolrUtils.getOutputStreamCallbackToTransformSolrResponseToXml(response));
                    flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/xml");
                } else {
                    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
                    final RecordSchema schema = writerFactory.getSchema(null, null);
                    final RecordSet recordSet = SolrUtils.solrDocumentsToRecordSet(response.getResults(), schema);
                    final StringBuffer mimeType = new StringBuffer();
                    flowFile = session.write(flowFile, new OutputStreamCallback() {

                        @Override
                        public void process(final OutputStream out) throws IOException {
                            try {
                                final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out);
                                writer.write(recordSet);
                                writer.flush();
                                mimeType.append(writer.getMimeType());
                            } catch (SchemaNotFoundException e) {
                                throw new ProcessException("Could not parse Solr response", e);
                            }
                        }
                    });
                    flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), mimeType.toString());
                }
                session.transfer(flowFile, REL_SUCCESS);
            }
            continuePaging.set(response.getResults().size() == Integer.parseInt(context.getProperty(BATCH_SIZE).getValue()));
        }
        context.getStateManager().setState(stateMap, Scope.CLUSTER);
    } catch (SolrServerException | SchemaNotFoundException | IOException e) {
        context.yield();
        session.rollback();
        logger.error("Failed to execute query {} due to {}", new Object[] { solrQuery.toString(), e }, e);
        throw new ProcessException(e);
    } catch (final Throwable t) {
        context.yield();
        session.rollback();
        logger.error("Failed to execute query {} due to {}", new Object[] { solrQuery.toString(), t }, t);
        throw t;
    }
}
Also used : HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) SolrQuery(org.apache.solr.client.solrj.SolrQuery) SolrDocument(org.apache.solr.common.SolrDocument) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) RecordSet(org.apache.nifi.serialization.record.RecordSet) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) QueryRequest(org.apache.solr.client.solrj.request.QueryRequest) SolrDocumentList(org.apache.solr.common.SolrDocumentList) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ProcessException(org.apache.nifi.processor.exception.ProcessException) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException)

Example 19 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class HBase_1_1_2_RecordLookupService method lookup.

@Override
public Optional<Record> lookup(Map<String, Object> coordinates) throws LookupFailureException {
    if (coordinates.get(ROW_KEY_KEY) == null) {
        return Optional.empty();
    }
    final String rowKey = coordinates.get(ROW_KEY_KEY).toString();
    if (StringUtils.isBlank(rowKey)) {
        return Optional.empty();
    }
    final byte[] rowKeyBytes = rowKey.getBytes(StandardCharsets.UTF_8);
    try {
        final Map<String, Object> values = new HashMap<>();
        hBaseClientService.scan(tableName, rowKeyBytes, rowKeyBytes, columns, (byte[] row, ResultCell[] resultCells) -> {
            for (final ResultCell cell : resultCells) {
                final byte[] qualifier = Arrays.copyOfRange(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierOffset() + cell.getQualifierLength());
                final byte[] value = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueOffset() + cell.getValueLength());
                values.put(new String(qualifier, charset), new String(value, charset));
            }
        });
        if (values.size() > 0) {
            final List<RecordField> fields = new ArrayList<>();
            for (String key : values.keySet()) {
                fields.add(new RecordField(key, RecordFieldType.STRING.getDataType()));
            }
            final RecordSchema schema = new SimpleRecordSchema(fields);
            return Optional.ofNullable(new MapRecord(schema, values));
        } else {
            return Optional.empty();
        }
    } catch (IOException e) {
        getLogger().error("Error occurred loading {}", new Object[] { coordinates.get("rowKey") }, e);
        throw new LookupFailureException(e);
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ResultCell(org.apache.nifi.hbase.scan.ResultCell) IOException(java.io.IOException) LookupFailureException(org.apache.nifi.lookup.LookupFailureException) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema)

Example 20 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class CSVRecordLookupService method loadCache.

private void loadCache() throws IllegalStateException, IOException {
    if (lock.tryLock()) {
        try {
            final ComponentLog logger = getLogger();
            if (logger.isDebugEnabled()) {
                logger.debug("Loading lookup table from file: " + csvFile);
            }
            final FileReader reader = new FileReader(csvFile);
            final CSVParser records = csvFormat.withFirstRecordAsHeader().parse(reader);
            ConcurrentHashMap<String, Record> cache = new ConcurrentHashMap<>();
            RecordSchema lookupRecordSchema = null;
            for (final CSVRecord record : records) {
                final String key = record.get(lookupKeyColumn);
                if (StringUtils.isBlank(key)) {
                    throw new IllegalStateException("Empty lookup key encountered in: " + csvFile);
                } else if (!ignoreDuplicates && cache.containsKey(key)) {
                    throw new IllegalStateException("Duplicate lookup key encountered: " + key + " in " + csvFile);
                } else if (ignoreDuplicates && cache.containsKey(key)) {
                    logger.warn("Duplicate lookup key encountered: {} in {}", new Object[] { key, csvFile });
                }
                // Put each key/value pair (except the lookup) into the properties
                final Map<String, Object> properties = new HashMap<>();
                record.toMap().forEach((k, v) -> {
                    if (!lookupKeyColumn.equals(k)) {
                        properties.put(k, v);
                    }
                });
                if (lookupRecordSchema == null) {
                    List<RecordField> recordFields = new ArrayList<>(properties.size());
                    properties.forEach((k, v) -> recordFields.add(new RecordField(k, RecordFieldType.STRING.getDataType())));
                    lookupRecordSchema = new SimpleRecordSchema(recordFields);
                }
                cache.put(key, new MapRecord(lookupRecordSchema, properties));
            }
            this.cache = cache;
            if (cache.isEmpty()) {
                logger.warn("Lookup table is empty after reading file: " + csvFile);
            }
        } finally {
            lock.unlock();
        }
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ArrayList(java.util.ArrayList) ComponentLog(org.apache.nifi.logging.ComponentLog) CSVParser(org.apache.commons.csv.CSVParser) FileReader(java.io.FileReader) CSVRecord(org.apache.commons.csv.CSVRecord) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) CSVRecord(org.apache.commons.csv.CSVRecord) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema)

Aggregations

RecordSchema (org.apache.nifi.serialization.record.RecordSchema)243 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)178 Test (org.junit.Test)168 Record (org.apache.nifi.serialization.record.Record)147 RecordField (org.apache.nifi.serialization.record.RecordField)138 ArrayList (java.util.ArrayList)107 MapRecord (org.apache.nifi.serialization.record.MapRecord)94 HashMap (java.util.HashMap)88 InputStream (java.io.InputStream)79 ByteArrayInputStream (java.io.ByteArrayInputStream)64 FileInputStream (java.io.FileInputStream)56 ComponentLog (org.apache.nifi.logging.ComponentLog)54 IOException (java.io.IOException)44 LinkedHashMap (java.util.LinkedHashMap)36 DataType (org.apache.nifi.serialization.record.DataType)36 File (java.io.File)31 Schema (org.apache.avro.Schema)29 SchemaIdentifier (org.apache.nifi.serialization.record.SchemaIdentifier)29 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)28 ByteArrayOutputStream (java.io.ByteArrayOutputStream)26