use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class AbstractRecordProcessor method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final Map<String, String> attributes = new HashMap<>();
final AtomicInteger recordCount = new AtomicInteger();
final FlowFile original = flowFile;
final Map<String, String> originalAttributes = flowFile.getAttributes();
try {
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream in, final OutputStream out) throws IOException {
try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), writeSchema, out)) {
writer.beginRecordSet();
Record record;
while ((record = reader.nextRecord()) != null) {
final Record processed = AbstractRecordProcessor.this.process(record, writeSchema, original, context);
writer.write(processed);
}
final WriteResult writeResult = writer.finishRecordSet();
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
attributes.putAll(writeResult.getAttributes());
recordCount.set(writeResult.getRecordCount());
}
} catch (final SchemaNotFoundException e) {
throw new ProcessException(e.getLocalizedMessage(), e);
} catch (final MalformedRecordException e) {
throw new ProcessException("Could not parse incoming data", e);
}
}
});
} catch (final Exception e) {
getLogger().error("Failed to process {}; will route to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
return;
}
flowFile = session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_SUCCESS);
final int count = recordCount.get();
session.adjustCounter("Records Processed", count, false);
getLogger().info("Successfully converted {} records for {}", new Object[] { count, flowFile });
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class AbstractRouteRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final T flowFileContext;
try {
flowFileContext = getFlowFileContext(flowFile, context);
} catch (final Exception e) {
getLogger().error("Failed to process {}; routing to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
return;
}
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final AtomicInteger numRecords = new AtomicInteger(0);
final Map<Relationship, Tuple<FlowFile, RecordSetWriter>> writers = new HashMap<>();
final FlowFile original = flowFile;
final Map<String, String> originalAttributes = original.getAttributes();
try {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
Record record;
while ((record = reader.nextRecord()) != null) {
final Set<Relationship> relationships = route(record, writeSchema, original, context, flowFileContext);
numRecords.incrementAndGet();
for (final Relationship relationship : relationships) {
final RecordSetWriter recordSetWriter;
Tuple<FlowFile, RecordSetWriter> tuple = writers.get(relationship);
if (tuple == null) {
FlowFile outFlowFile = session.create(original);
final OutputStream out = session.write(outFlowFile);
recordSetWriter = writerFactory.createWriter(getLogger(), writeSchema, out);
recordSetWriter.beginRecordSet();
tuple = new Tuple<>(outFlowFile, recordSetWriter);
writers.put(relationship, tuple);
} else {
recordSetWriter = tuple.getValue();
}
recordSetWriter.write(record);
}
}
} catch (final SchemaNotFoundException | MalformedRecordException e) {
throw new ProcessException("Could not parse incoming data", e);
}
}
});
for (final Map.Entry<Relationship, Tuple<FlowFile, RecordSetWriter>> entry : writers.entrySet()) {
final Relationship relationship = entry.getKey();
final Tuple<FlowFile, RecordSetWriter> tuple = entry.getValue();
final RecordSetWriter writer = tuple.getValue();
FlowFile childFlowFile = tuple.getKey();
final WriteResult writeResult = writer.finishRecordSet();
try {
writer.close();
} catch (final IOException ioe) {
getLogger().warn("Failed to close Writer for {}", new Object[] { childFlowFile });
}
final Map<String, String> attributes = new HashMap<>();
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
attributes.putAll(writeResult.getAttributes());
childFlowFile = session.putAllAttributes(childFlowFile, attributes);
session.transfer(childFlowFile, relationship);
session.adjustCounter("Records Processed", writeResult.getRecordCount(), false);
session.adjustCounter("Records Routed to " + relationship.getName(), writeResult.getRecordCount(), false);
session.getProvenanceReporter().route(childFlowFile, relationship);
}
} catch (final Exception e) {
getLogger().error("Failed to process {}", new Object[] { flowFile, e });
for (final Tuple<FlowFile, RecordSetWriter> tuple : writers.values()) {
try {
tuple.getValue().close();
} catch (final Exception e1) {
getLogger().warn("Failed to close Writer for {}; some resources may not be cleaned up appropriately", new Object[] { tuple.getKey() });
}
session.remove(tuple.getKey());
}
session.transfer(flowFile, REL_FAILURE);
return;
} finally {
for (final Tuple<FlowFile, RecordSetWriter> tuple : writers.values()) {
final RecordSetWriter writer = tuple.getValue();
try {
writer.close();
} catch (final Exception e) {
getLogger().warn("Failed to close Record Writer for {}; some resources may not be properly cleaned up", new Object[] { tuple.getKey(), e });
}
}
}
if (isRouteOriginal()) {
flowFile = session.putAttribute(flowFile, "record.count", String.valueOf(numRecords));
session.transfer(flowFile, REL_ORIGINAL);
} else {
session.remove(flowFile);
}
getLogger().info("Successfully processed {}, creating {} derivative FlowFiles and processing {} records", new Object[] { flowFile, writers.size(), numRecords });
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class GetSolr method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final ComponentLog logger = getLogger();
final AtomicBoolean continuePaging = new AtomicBoolean(true);
final SolrQuery solrQuery = new SolrQuery();
try {
if (id_field == null) {
id_field = getFieldNameOfUniqueKey();
}
final String dateField = context.getProperty(DATE_FIELD).getValue();
final Map<String, String> stateMap = new HashMap<String, String>();
stateMap.putAll(context.getStateManager().getState(Scope.CLUSTER).toMap());
solrQuery.setQuery("*:*");
final String query = context.getProperty(SOLR_QUERY).getValue();
if (!StringUtils.isBlank(query) && !query.equals("*:*")) {
solrQuery.addFilterQuery(query);
}
final StringBuilder automatedFilterQuery = (new StringBuilder()).append(dateField).append(":[").append(stateMap.get(STATE_MANAGER_FILTER)).append(" TO *]");
solrQuery.addFilterQuery(automatedFilterQuery.toString());
final List<String> fieldList = new ArrayList<String>();
final String returnFields = context.getProperty(RETURN_FIELDS).getValue();
if (!StringUtils.isBlank(returnFields)) {
fieldList.addAll(Arrays.asList(returnFields.trim().split("[,]")));
if (!fieldList.contains(dateField)) {
fieldList.add(dateField);
dateFieldNotInSpecifiedFieldsList.set(true);
}
for (String returnField : fieldList) {
solrQuery.addField(returnField.trim());
}
}
solrQuery.setParam(CursorMarkParams.CURSOR_MARK_PARAM, stateMap.get(STATE_MANAGER_CURSOR_MARK));
solrQuery.setRows(context.getProperty(BATCH_SIZE).asInteger());
final StringBuilder sortClause = (new StringBuilder()).append(dateField).append(" asc,").append(id_field).append(" asc");
solrQuery.setParam("sort", sortClause.toString());
while (continuePaging.get()) {
final QueryRequest req = new QueryRequest(solrQuery);
if (isBasicAuthEnabled()) {
req.setBasicAuthCredentials(getUsername(), getPassword());
}
logger.debug(solrQuery.toQueryString());
final QueryResponse response = req.process(getSolrClient());
final SolrDocumentList documentList = response.getResults();
if (response.getResults().size() > 0) {
final SolrDocument lastSolrDocument = documentList.get(response.getResults().size() - 1);
final String latestDateValue = df.format(lastSolrDocument.get(dateField));
final String newCursorMark = response.getNextCursorMark();
solrQuery.setParam(CursorMarkParams.CURSOR_MARK_PARAM, newCursorMark);
stateMap.put(STATE_MANAGER_CURSOR_MARK, newCursorMark);
stateMap.put(STATE_MANAGER_FILTER, latestDateValue);
FlowFile flowFile = session.create();
flowFile = session.putAttribute(flowFile, "solrQuery", solrQuery.toString());
if (context.getProperty(RETURN_TYPE).getValue().equals(MODE_XML.getValue())) {
if (dateFieldNotInSpecifiedFieldsList.get()) {
for (SolrDocument doc : response.getResults()) {
doc.removeFields(dateField);
}
}
flowFile = session.write(flowFile, SolrUtils.getOutputStreamCallbackToTransformSolrResponseToXml(response));
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/xml");
} else {
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final RecordSchema schema = writerFactory.getSchema(null, null);
final RecordSet recordSet = SolrUtils.solrDocumentsToRecordSet(response.getResults(), schema);
final StringBuffer mimeType = new StringBuffer();
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
try {
final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out);
writer.write(recordSet);
writer.flush();
mimeType.append(writer.getMimeType());
} catch (SchemaNotFoundException e) {
throw new ProcessException("Could not parse Solr response", e);
}
}
});
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), mimeType.toString());
}
session.transfer(flowFile, REL_SUCCESS);
}
continuePaging.set(response.getResults().size() == Integer.parseInt(context.getProperty(BATCH_SIZE).getValue()));
}
context.getStateManager().setState(stateMap, Scope.CLUSTER);
} catch (SolrServerException | SchemaNotFoundException | IOException e) {
context.yield();
session.rollback();
logger.error("Failed to execute query {} due to {}", new Object[] { solrQuery.toString(), e }, e);
throw new ProcessException(e);
} catch (final Throwable t) {
context.yield();
session.rollback();
logger.error("Failed to execute query {} due to {}", new Object[] { solrQuery.toString(), t }, t);
throw t;
}
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class HBase_1_1_2_RecordLookupService method lookup.
@Override
public Optional<Record> lookup(Map<String, Object> coordinates) throws LookupFailureException {
if (coordinates.get(ROW_KEY_KEY) == null) {
return Optional.empty();
}
final String rowKey = coordinates.get(ROW_KEY_KEY).toString();
if (StringUtils.isBlank(rowKey)) {
return Optional.empty();
}
final byte[] rowKeyBytes = rowKey.getBytes(StandardCharsets.UTF_8);
try {
final Map<String, Object> values = new HashMap<>();
hBaseClientService.scan(tableName, rowKeyBytes, rowKeyBytes, columns, (byte[] row, ResultCell[] resultCells) -> {
for (final ResultCell cell : resultCells) {
final byte[] qualifier = Arrays.copyOfRange(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierOffset() + cell.getQualifierLength());
final byte[] value = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueOffset() + cell.getValueLength());
values.put(new String(qualifier, charset), new String(value, charset));
}
});
if (values.size() > 0) {
final List<RecordField> fields = new ArrayList<>();
for (String key : values.keySet()) {
fields.add(new RecordField(key, RecordFieldType.STRING.getDataType()));
}
final RecordSchema schema = new SimpleRecordSchema(fields);
return Optional.ofNullable(new MapRecord(schema, values));
} else {
return Optional.empty();
}
} catch (IOException e) {
getLogger().error("Error occurred loading {}", new Object[] { coordinates.get("rowKey") }, e);
throw new LookupFailureException(e);
}
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class CSVRecordLookupService method loadCache.
private void loadCache() throws IllegalStateException, IOException {
if (lock.tryLock()) {
try {
final ComponentLog logger = getLogger();
if (logger.isDebugEnabled()) {
logger.debug("Loading lookup table from file: " + csvFile);
}
final FileReader reader = new FileReader(csvFile);
final CSVParser records = csvFormat.withFirstRecordAsHeader().parse(reader);
ConcurrentHashMap<String, Record> cache = new ConcurrentHashMap<>();
RecordSchema lookupRecordSchema = null;
for (final CSVRecord record : records) {
final String key = record.get(lookupKeyColumn);
if (StringUtils.isBlank(key)) {
throw new IllegalStateException("Empty lookup key encountered in: " + csvFile);
} else if (!ignoreDuplicates && cache.containsKey(key)) {
throw new IllegalStateException("Duplicate lookup key encountered: " + key + " in " + csvFile);
} else if (ignoreDuplicates && cache.containsKey(key)) {
logger.warn("Duplicate lookup key encountered: {} in {}", new Object[] { key, csvFile });
}
// Put each key/value pair (except the lookup) into the properties
final Map<String, Object> properties = new HashMap<>();
record.toMap().forEach((k, v) -> {
if (!lookupKeyColumn.equals(k)) {
properties.put(k, v);
}
});
if (lookupRecordSchema == null) {
List<RecordField> recordFields = new ArrayList<>(properties.size());
properties.forEach((k, v) -> recordFields.add(new RecordField(k, RecordFieldType.STRING.getDataType())));
lookupRecordSchema = new SimpleRecordSchema(recordFields);
}
cache.put(key, new MapRecord(lookupRecordSchema, properties));
}
this.cache = cache;
if (cache.isEmpty()) {
logger.warn("Lookup table is empty after reading file: " + csvFile);
}
} finally {
lock.unlock();
}
}
}
Aggregations