use of org.apache.nifi.serialization.MalformedRecordException in project nifi by apache.
the class TestJsonTreeRowRecordReader method testSingleJsonElementWithChoiceFields.
@Test
public void testSingleJsonElementWithChoiceFields() throws IOException, MalformedRecordException {
// Wraps default fields by Choice data type to test mapping to a Choice type.
final List<RecordField> choiceFields = getDefaultFields().stream().map(f -> new RecordField(f.getFieldName(), RecordFieldType.CHOICE.getChoiceDataType(f.getDataType()))).collect(Collectors.toList());
final RecordSchema schema = new SimpleRecordSchema(choiceFields);
try (final InputStream in = new FileInputStream(new File("src/test/resources/json/single-bank-account.json"));
final JsonTreeRowRecordReader reader = new JsonTreeRowRecordReader(in, Mockito.mock(ComponentLog.class), schema, dateFormat, timeFormat, timestampFormat)) {
final List<String> fieldNames = schema.getFieldNames();
final List<String> expectedFieldNames = Arrays.asList(new String[] { "id", "name", "balance", "address", "city", "state", "zipCode", "country" });
assertEquals(expectedFieldNames, fieldNames);
final List<RecordFieldType> expectedTypes = Arrays.asList(new RecordFieldType[] { RecordFieldType.INT, RecordFieldType.STRING, RecordFieldType.DOUBLE, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.STRING });
final List<RecordField> fields = schema.getFields();
for (int i = 0; i < schema.getFields().size(); i++) {
assertTrue(fields.get(i).getDataType() instanceof ChoiceDataType);
final ChoiceDataType choiceDataType = (ChoiceDataType) fields.get(i).getDataType();
assertEquals(expectedTypes.get(i), choiceDataType.getPossibleSubTypes().get(0).getFieldType());
}
final Object[] firstRecordValues = reader.nextRecord().getValues();
Assert.assertArrayEquals(new Object[] { 1, "John Doe", 4750.89, "123 My Street", "My City", "MS", "11111", "USA" }, firstRecordValues);
assertNull(reader.nextRecord());
}
}
use of org.apache.nifi.serialization.MalformedRecordException in project nifi by apache.
the class TestJsonTreeRowRecordReader method testElementWithNestedData.
@Test
public void testElementWithNestedData() throws IOException, MalformedRecordException {
final DataType accountType = RecordFieldType.RECORD.getRecordDataType(getAccountSchema());
final List<RecordField> fields = getDefaultFields();
fields.add(new RecordField("account", accountType));
fields.remove(new RecordField("balance", RecordFieldType.DOUBLE.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream in = new FileInputStream(new File("src/test/resources/json/single-element-nested.json"));
final JsonTreeRowRecordReader reader = new JsonTreeRowRecordReader(in, Mockito.mock(ComponentLog.class), schema, dateFormat, timeFormat, timestampFormat)) {
final List<RecordFieldType> dataTypes = schema.getDataTypes().stream().map(dt -> dt.getFieldType()).collect(Collectors.toList());
final List<RecordFieldType> expectedTypes = Arrays.asList(new RecordFieldType[] { RecordFieldType.INT, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.RECORD });
assertEquals(expectedTypes, dataTypes);
final Object[] firstRecordValues = reader.nextRecord().getValues();
final Object[] allButLast = Arrays.copyOfRange(firstRecordValues, 0, firstRecordValues.length - 1);
Assert.assertArrayEquals(new Object[] { 1, "John Doe", "123 My Street", "My City", "MS", "11111", "USA" }, allButLast);
final Object last = firstRecordValues[firstRecordValues.length - 1];
assertTrue(Record.class.isAssignableFrom(last.getClass()));
final Record record = (Record) last;
assertEquals(42, record.getValue("id"));
assertEquals(4750.89, record.getValue("balance"));
assertNull(reader.nextRecord());
}
}
use of org.apache.nifi.serialization.MalformedRecordException in project nifi by apache.
the class PublishKafkaRecord_0_10 method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final List<FlowFile> flowFiles = session.get(FlowFileFilters.newSizeBasedFilter(1, DataUnit.MB, 500));
if (flowFiles.isEmpty()) {
return;
}
final PublisherPool pool = getPublisherPool(context);
if (pool == null) {
context.yield();
return;
}
final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue();
final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final long startTime = System.nanoTime();
try (final PublisherLease lease = pool.obtainPublisher()) {
// Send each FlowFile to Kafka asynchronously.
for (final FlowFile flowFile : flowFiles) {
if (!isScheduled()) {
// If stopped, re-queue FlowFile instead of sending it
session.transfer(flowFile);
continue;
}
final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue();
final String messageKeyField = context.getProperty(MESSAGE_KEY_FIELD).evaluateAttributeExpressions(flowFile).getValue();
final Map<String, String> attributes = flowFile.getAttributes();
try {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
final RecordReader reader = readerFactory.createRecordReader(attributes, in, getLogger());
final RecordSet recordSet = reader.createRecordSet();
final RecordSchema schema = writerFactory.getSchema(attributes, recordSet.getSchema());
lease.publish(flowFile, recordSet, writerFactory, schema, messageKeyField, topic);
} catch (final SchemaNotFoundException | MalformedRecordException e) {
throw new ProcessException(e);
}
}
});
} catch (final Exception e) {
// The FlowFile will be obtained and the error logged below, when calling publishResult.getFailedFlowFiles()
lease.getTracker().fail(flowFile, e);
continue;
}
}
// Complete the send
final PublishResult publishResult = lease.complete();
// Transfer any successful FlowFiles.
final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime);
for (FlowFile success : publishResult.getSuccessfulFlowFiles()) {
final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(success).getValue();
final int msgCount = publishResult.getSuccessfulMessageCount(success);
success = session.putAttribute(success, MSG_COUNT, String.valueOf(msgCount));
session.adjustCounter("Messages Sent", msgCount, true);
final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, topic);
session.getProvenanceReporter().send(success, transitUri, "Sent " + msgCount + " messages", transmissionMillis);
session.transfer(success, REL_SUCCESS);
}
// Transfer any failures.
for (final FlowFile failure : publishResult.getFailedFlowFiles()) {
final int successCount = publishResult.getSuccessfulMessageCount(failure);
if (successCount > 0) {
getLogger().error("Failed to send some messages for {} to Kafka, but {} messages were acknowledged by Kafka. Routing to failure due to {}", new Object[] { failure, successCount, publishResult.getReasonForFailure(failure) });
} else {
getLogger().error("Failed to send all message for {} to Kafka; routing to failure due to {}", new Object[] { failure, publishResult.getReasonForFailure(failure) });
}
session.transfer(failure, REL_FAILURE);
}
}
}
use of org.apache.nifi.serialization.MalformedRecordException in project nifi by apache.
the class MockRecordParser method createRecordReader.
@Override
public RecordReader createRecordReader(Map<String, String> variables, InputStream in, ComponentLog logger) throws IOException, SchemaNotFoundException {
final Iterator<Object[]> itr = records.iterator();
return new RecordReader() {
private int recordCount = 0;
@Override
public void close() throws IOException {
}
@Override
public Record nextRecord(final boolean coerceTypes, final boolean dropUnknown) throws IOException, MalformedRecordException {
if (failAfterN >= recordCount) {
throw new MalformedRecordException("Intentional Unit Test Exception because " + recordCount + " records have been read");
}
recordCount++;
if (!itr.hasNext()) {
return null;
}
final Object[] values = itr.next();
final Map<String, Object> valueMap = new HashMap<>();
int i = 0;
for (final RecordField field : fields) {
final String fieldName = field.getFieldName();
valueMap.put(fieldName, values[i++]);
}
return new MapRecord(new SimpleRecordSchema(fields), valueMap);
}
@Override
public RecordSchema getSchema() {
return new SimpleRecordSchema(fields);
}
};
}
use of org.apache.nifi.serialization.MalformedRecordException in project nifi by apache.
the class PutElasticsearchHttpRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
// Authentication
final String username = context.getProperty(USERNAME).evaluateAttributeExpressions(flowFile).getValue();
final String password = context.getProperty(PASSWORD).evaluateAttributeExpressions(flowFile).getValue();
OkHttpClient okHttpClient = getClient();
final ComponentLog logger = getLogger();
final String baseUrl = trimToEmpty(context.getProperty(ES_URL).evaluateAttributeExpressions().getValue());
HttpUrl.Builder urlBuilder = HttpUrl.parse(baseUrl).newBuilder().addPathSegment("_bulk");
// Find the user-added properties and set them as query parameters on the URL
for (Map.Entry<PropertyDescriptor, String> property : context.getProperties().entrySet()) {
PropertyDescriptor pd = property.getKey();
if (pd.isDynamic()) {
if (property.getValue() != null) {
urlBuilder = urlBuilder.addQueryParameter(pd.getName(), context.getProperty(pd).evaluateAttributeExpressions().getValue());
}
}
}
final URL url = urlBuilder.build().url();
final String index = context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
if (StringUtils.isEmpty(index)) {
logger.error("No value for index in for {}, transferring to failure", new Object[] { flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
}
final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
String indexOp = context.getProperty(INDEX_OP).evaluateAttributeExpressions(flowFile).getValue();
if (StringUtils.isEmpty(indexOp)) {
logger.error("No Index operation specified for {}, transferring to failure.", new Object[] { flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
}
switch(indexOp.toLowerCase()) {
case "index":
case "update":
case "upsert":
case "delete":
break;
default:
logger.error("Index operation {} not supported for {}, transferring to failure.", new Object[] { indexOp, flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
}
this.nullSuppression = context.getProperty(SUPPRESS_NULLS).getValue();
final String id_path = context.getProperty(ID_RECORD_PATH).evaluateAttributeExpressions(flowFile).getValue();
final RecordPath recordPath = StringUtils.isEmpty(id_path) ? null : recordPathCache.getCompiled(id_path);
final StringBuilder sb = new StringBuilder();
try (final InputStream in = session.read(flowFile);
final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger())) {
Record record;
while ((record = reader.nextRecord()) != null) {
final String id;
if (recordPath != null) {
Optional<FieldValue> idPathValue = recordPath.evaluate(record).getSelectedFields().findFirst();
if (!idPathValue.isPresent() || idPathValue.get().getValue() == null) {
throw new IdentifierNotFoundException("Identifier Record Path specified but no value was found, transferring {} to failure.");
}
id = idPathValue.get().getValue().toString();
} else {
id = null;
}
// a missing ID indicates one is to be auto-generated by Elasticsearch
if (id == null && !indexOp.equalsIgnoreCase("index")) {
throw new IdentifierNotFoundException("Index operation {} requires a valid identifier value from a flow file attribute, transferring to failure.");
}
final StringBuilder json = new StringBuilder();
ByteArrayOutputStream out = new ByteArrayOutputStream();
JsonGenerator generator = factory.createJsonGenerator(out);
writeRecord(record, record.getSchema(), generator);
generator.flush();
generator.close();
json.append(out.toString());
if (indexOp.equalsIgnoreCase("index")) {
sb.append("{\"index\": { \"_index\": \"");
sb.append(index);
sb.append("\", \"_type\": \"");
sb.append(docType);
sb.append("\"");
if (!StringUtils.isEmpty(id)) {
sb.append(", \"_id\": \"");
sb.append(id);
sb.append("\"");
}
sb.append("}}\n");
sb.append(json);
sb.append("\n");
} else if (indexOp.equalsIgnoreCase("upsert") || indexOp.equalsIgnoreCase("update")) {
sb.append("{\"update\": { \"_index\": \"");
sb.append(index);
sb.append("\", \"_type\": \"");
sb.append(docType);
sb.append("\", \"_id\": \"");
sb.append(id);
sb.append("\" }\n");
sb.append("{\"doc\": ");
sb.append(json);
sb.append(", \"doc_as_upsert\": ");
sb.append(indexOp.equalsIgnoreCase("upsert"));
sb.append(" }\n");
} else if (indexOp.equalsIgnoreCase("delete")) {
sb.append("{\"delete\": { \"_index\": \"");
sb.append(index);
sb.append("\", \"_type\": \"");
sb.append(docType);
sb.append("\", \"_id\": \"");
sb.append(id);
sb.append("\" }\n");
}
}
} catch (IdentifierNotFoundException infe) {
logger.error(infe.getMessage(), new Object[] { flowFile });
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
return;
} catch (final IOException | SchemaNotFoundException | MalformedRecordException e) {
logger.error("Could not parse incoming data", e);
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
return;
}
RequestBody requestBody = RequestBody.create(MediaType.parse("application/json"), sb.toString());
final Response getResponse;
try {
getResponse = sendRequestToElasticsearch(okHttpClient, url, username, password, "PUT", requestBody);
} catch (final Exception e) {
logger.error("Routing to {} due to exception: {}", new Object[] { REL_FAILURE.getName(), e }, e);
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
return;
}
final int statusCode = getResponse.code();
if (isSuccess(statusCode)) {
ResponseBody responseBody = getResponse.body();
try {
final byte[] bodyBytes = responseBody.bytes();
JsonNode responseJson = parseJsonResponse(new ByteArrayInputStream(bodyBytes));
boolean errors = responseJson.get("errors").asBoolean(false);
// ES has no rollback, so if errors occur, log them and route the whole flow file to failure
if (errors) {
ArrayNode itemNodeArray = (ArrayNode) responseJson.get("items");
if (itemNodeArray.size() > 0) {
// All items are returned whether they succeeded or failed, so iterate through the item array
// at the same time as the flow file list, moving each to success or failure accordingly,
// but only keep the first error for logging
String errorReason = null;
for (int i = itemNodeArray.size() - 1; i >= 0; i--) {
JsonNode itemNode = itemNodeArray.get(i);
int status = itemNode.findPath("status").asInt();
if (!isSuccess(status)) {
if (errorReason == null) {
// Use "result" if it is present; this happens for status codes like 404 Not Found, which may not have an error/reason
String reason = itemNode.findPath("//result").asText();
if (StringUtils.isEmpty(reason)) {
// If there was no result, we expect an error with a string description in the "reason" field
reason = itemNode.findPath("//error/reason").asText();
}
errorReason = reason;
logger.error("Failed to process {} due to {}, transferring to failure", new Object[] { flowFile, errorReason });
}
}
}
}
session.transfer(flowFile, REL_FAILURE);
} else {
session.transfer(flowFile, REL_SUCCESS);
session.getProvenanceReporter().send(flowFile, url.toString());
}
} catch (IOException ioe) {
// Something went wrong when parsing the response, log the error and route to failure
logger.error("Error parsing Bulk API response: {}", new Object[] { ioe.getMessage() }, ioe);
session.transfer(flowFile, REL_FAILURE);
context.yield();
}
} else if (statusCode / 100 == 5) {
// 5xx -> RETRY, but a server error might last a while, so yield
logger.warn("Elasticsearch returned code {} with message {}, transferring flow file to retry. This is likely a server problem, yielding...", new Object[] { statusCode, getResponse.message() });
session.transfer(flowFile, REL_RETRY);
context.yield();
} else {
// 1xx, 3xx, 4xx, etc. -> NO RETRY
logger.warn("Elasticsearch returned code {} with message {}, transferring flow file to failure", new Object[] { statusCode, getResponse.message() });
session.transfer(flowFile, REL_FAILURE);
}
getResponse.close();
}
Aggregations