Search in sources :

Example 11 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class MongoDBLookupService method lookup.

@Override
public Optional<Object> lookup(Map<String, Object> coordinates) throws LookupFailureException {
    Map<String, Object> clean = new HashMap<>();
    clean.putAll(coordinates);
    Document query = new Document(clean);
    if (coordinates.size() == 0) {
        throw new LookupFailureException("No keys were configured. Mongo query would return random documents.");
    }
    try {
        Document result = this.findOne(query);
        if (result == null) {
            return Optional.empty();
        } else if (!StringUtils.isEmpty(lookupValueField)) {
            return Optional.ofNullable(result.get(lookupValueField));
        } else {
            final List<RecordField> fields = new ArrayList<>();
            for (String key : result.keySet()) {
                if (key.equals("_id")) {
                    continue;
                }
                fields.add(new RecordField(key, RecordFieldType.STRING.getDataType()));
            }
            final RecordSchema schema = new SimpleRecordSchema(fields);
            return Optional.ofNullable(new MapRecord(schema, result));
        }
    } catch (Exception ex) {
        getLogger().error("Error during lookup {}", new Object[] { query.toJson() }, ex);
        throw new LookupFailureException(ex);
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) HashMap(java.util.HashMap) Document(org.bson.Document) InitializationException(org.apache.nifi.reporting.InitializationException) LookupFailureException(org.apache.nifi.lookup.LookupFailureException) IOException(java.io.IOException) LookupFailureException(org.apache.nifi.lookup.LookupFailureException) ArrayList(java.util.ArrayList) List(java.util.List) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema)

Example 12 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class CSVRecordReader method nextRecord.

@Override
public Record nextRecord(final boolean coerceTypes, final boolean dropUnknownFields) throws IOException, MalformedRecordException {
    final RecordSchema schema = getSchema();
    final List<RecordField> recordFields = getRecordFields();
    final int numFieldNames = recordFields.size();
    for (final CSVRecord csvRecord : csvParser) {
        final Map<String, Object> values = new LinkedHashMap<>(recordFields.size() * 2);
        for (int i = 0; i < csvRecord.size(); i++) {
            final String rawValue = csvRecord.get(i);
            final String rawFieldName;
            final DataType dataType;
            if (i >= numFieldNames) {
                if (!dropUnknownFields) {
                    values.put("unknown_field_index_" + i, rawValue);
                }
                continue;
            } else {
                final RecordField recordField = recordFields.get(i);
                rawFieldName = recordField.getFieldName();
                dataType = recordField.getDataType();
            }
            final Object value;
            if (coerceTypes) {
                value = convert(rawValue, dataType, rawFieldName);
            } else {
                // The CSV Reader is going to return all fields as Strings, because CSV doesn't have any way to
                // dictate a field type. As a result, we will use the schema that we have to attempt to convert
                // the value into the desired type if it's a simple type.
                value = convertSimpleIfPossible(rawValue, dataType, rawFieldName);
            }
            values.put(rawFieldName, value);
        }
        return new MapRecord(schema, values, coerceTypes, dropUnknownFields);
    }
    return null;
}
Also used : MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) DataType(org.apache.nifi.serialization.record.DataType) CSVRecord(org.apache.commons.csv.CSVRecord) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) LinkedHashMap(java.util.LinkedHashMap)

Example 13 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class GrokReader method createRecordSchema.

static RecordSchema createRecordSchema(final Grok grok) {
    final List<RecordField> fields = new ArrayList<>();
    String grokExpression = grok.getOriginalGrokPattern();
    populateSchemaFieldNames(grok, grokExpression, fields);
    fields.add(new RecordField(GrokRecordReader.STACK_TRACE_COLUMN_NAME, RecordFieldType.STRING.getDataType(), true));
    fields.add(new RecordField(GrokRecordReader.RAW_MESSAGE_NAME, RecordFieldType.STRING.getDataType(), true));
    final RecordSchema schema = new SimpleRecordSchema(fields);
    return schema;
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) RecordField(org.apache.nifi.serialization.record.RecordField) ArrayList(java.util.ArrayList) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema)

Example 14 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class GrokReader method populateSchemaFieldNames.

private static void populateSchemaFieldNames(final Grok grok, String grokExpression, final List<RecordField> fields) {
    while (grokExpression.length() > 0) {
        final Matcher matcher = GrokUtils.GROK_PATTERN.matcher(grokExpression);
        if (matcher.find()) {
            final Map<String, String> extractedGroups = GrokUtils.namedGroups(matcher, grokExpression);
            final String subName = extractedGroups.get("subname");
            if (subName == null) {
                final String subPatternName = extractedGroups.get("pattern");
                if (subPatternName == null) {
                    continue;
                }
                final String subExpression = grok.getPatterns().get(subPatternName);
                populateSchemaFieldNames(grok, subExpression, fields);
            } else {
                DataType dataType = RecordFieldType.STRING.getDataType();
                final RecordField recordField = new RecordField(subName, dataType);
                fields.add(recordField);
            }
            if (grokExpression.length() > matcher.end() + 1) {
                grokExpression = grokExpression.substring(matcher.end());
            } else {
                break;
            }
        } else {
            break;
        }
    }
}
Also used : RecordField(org.apache.nifi.serialization.record.RecordField) Matcher(java.util.regex.Matcher) DataType(org.apache.nifi.serialization.record.DataType)

Example 15 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class GrokRecordReader method createRecord.

private Record createRecord(final Map<String, Object> valueMap, final StringBuilder trailingText, final String stackTrace, final String raw, final boolean coerceTypes, final boolean dropUnknown) {
    final Map<String, Object> converted = new HashMap<>();
    for (final Map.Entry<String, Object> entry : valueMap.entrySet()) {
        final String fieldName = entry.getKey();
        final Object rawValue = entry.getValue();
        final Object normalizedValue;
        if (rawValue instanceof List) {
            final List<?> list = (List<?>) rawValue;
            final String[] array = new String[list.size()];
            for (int i = 0; i < list.size(); i++) {
                final Object rawObject = list.get(i);
                array[i] = rawObject == null ? null : rawObject.toString();
            }
            normalizedValue = array;
        } else {
            normalizedValue = rawValue == null ? null : rawValue.toString();
        }
        final Optional<RecordField> optionalRecordField = schema.getField(fieldName);
        final Object coercedValue;
        if (coerceTypes && optionalRecordField.isPresent()) {
            final RecordField field = optionalRecordField.get();
            final DataType fieldType = field.getDataType();
            coercedValue = convert(fieldType, normalizedValue, fieldName);
        } else {
            coercedValue = normalizedValue;
        }
        converted.put(fieldName, coercedValue);
    }
    // and then append the trailing text to it.
    if (append && trailingText.length() > 0) {
        String lastPopulatedFieldName = null;
        final List<RecordField> schemaFields = schemaFromGrok.getFields();
        for (int i = schemaFields.size() - 1; i >= 0; i--) {
            final RecordField field = schemaFields.get(i);
            Object value = converted.get(field.getFieldName());
            if (value != null) {
                lastPopulatedFieldName = field.getFieldName();
                break;
            }
            for (final String alias : field.getAliases()) {
                value = converted.get(alias);
                if (value != null) {
                    lastPopulatedFieldName = alias;
                    break;
                }
            }
        }
        if (lastPopulatedFieldName != null) {
            final Object value = converted.get(lastPopulatedFieldName);
            if (value == null) {
                converted.put(lastPopulatedFieldName, trailingText.toString());
            } else if (value instanceof String) {
                // if not a String it is a List and we will just drop the trailing text
                converted.put(lastPopulatedFieldName, (String) value + trailingText.toString());
            }
        }
    }
    converted.put(STACK_TRACE_COLUMN_NAME, stackTrace);
    converted.put(RAW_MESSAGE_NAME, raw);
    return new MapRecord(schema, converted);
}
Also used : MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) HashMap(java.util.HashMap) DataType(org.apache.nifi.serialization.record.DataType) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

RecordField (org.apache.nifi.serialization.record.RecordField)173 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)133 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)130 ArrayList (java.util.ArrayList)116 Test (org.junit.Test)108 Record (org.apache.nifi.serialization.record.Record)97 MapRecord (org.apache.nifi.serialization.record.MapRecord)73 HashMap (java.util.HashMap)52 InputStream (java.io.InputStream)48 FileInputStream (java.io.FileInputStream)44 ByteArrayInputStream (java.io.ByteArrayInputStream)43 ComponentLog (org.apache.nifi.logging.ComponentLog)39 DataType (org.apache.nifi.serialization.record.DataType)37 LinkedHashMap (java.util.LinkedHashMap)36 File (java.io.File)21 ByteArrayOutputStream (java.io.ByteArrayOutputStream)20 SchemaNameAsAttribute (org.apache.nifi.schema.access.SchemaNameAsAttribute)17 RecordDataType (org.apache.nifi.serialization.record.type.RecordDataType)17 Schema (org.apache.avro.Schema)16 RecordFieldType (org.apache.nifi.serialization.record.RecordFieldType)16