Search in sources :

Example 1 with DataType

use of org.apache.nifi.serialization.record.DataType in project nifi by apache.

the class PutElasticsearchHttpRecord method writeValue.

@SuppressWarnings("unchecked")
private void writeValue(final JsonGenerator generator, final Object value, final String fieldName, final DataType dataType) throws IOException {
    if (value == null) {
        if (nullSuppression.equals(NEVER_SUPPRESS.getValue()) || ((nullSuppression.equals(SUPPRESS_MISSING.getValue())) && fieldName != null && !fieldName.equals(""))) {
            generator.writeNullField(fieldName);
        }
        return;
    }
    final DataType chosenDataType = dataType.getFieldType() == RecordFieldType.CHOICE ? DataTypeUtils.chooseDataType(value, (ChoiceDataType) dataType) : dataType;
    final Object coercedValue = DataTypeUtils.convertType(value, chosenDataType, fieldName);
    if (coercedValue == null) {
        generator.writeNull();
        return;
    }
    switch(chosenDataType.getFieldType()) {
        case DATE:
            {
                final String stringValue = DataTypeUtils.toString(coercedValue, () -> DataTypeUtils.getDateFormat(RecordFieldType.DATE.getDefaultFormat()));
                if (DataTypeUtils.isLongTypeCompatible(stringValue)) {
                    generator.writeNumber(DataTypeUtils.toLong(coercedValue, fieldName));
                } else {
                    generator.writeString(stringValue);
                }
                break;
            }
        case TIME:
            {
                final String stringValue = DataTypeUtils.toString(coercedValue, () -> DataTypeUtils.getDateFormat(RecordFieldType.TIME.getDefaultFormat()));
                if (DataTypeUtils.isLongTypeCompatible(stringValue)) {
                    generator.writeNumber(DataTypeUtils.toLong(coercedValue, fieldName));
                } else {
                    generator.writeString(stringValue);
                }
                break;
            }
        case TIMESTAMP:
            {
                final String stringValue = DataTypeUtils.toString(coercedValue, () -> DataTypeUtils.getDateFormat(RecordFieldType.TIMESTAMP.getDefaultFormat()));
                if (DataTypeUtils.isLongTypeCompatible(stringValue)) {
                    generator.writeNumber(DataTypeUtils.toLong(coercedValue, fieldName));
                } else {
                    generator.writeString(stringValue);
                }
                break;
            }
        case DOUBLE:
            generator.writeNumber(DataTypeUtils.toDouble(coercedValue, fieldName));
            break;
        case FLOAT:
            generator.writeNumber(DataTypeUtils.toFloat(coercedValue, fieldName));
            break;
        case LONG:
            generator.writeNumber(DataTypeUtils.toLong(coercedValue, fieldName));
            break;
        case INT:
        case BYTE:
        case SHORT:
            generator.writeNumber(DataTypeUtils.toInteger(coercedValue, fieldName));
            break;
        case CHAR:
        case STRING:
            generator.writeString(coercedValue.toString());
            break;
        case BIGINT:
            if (coercedValue instanceof Long) {
                generator.writeNumber((Long) coercedValue);
            } else {
                generator.writeNumber((BigInteger) coercedValue);
            }
            break;
        case BOOLEAN:
            final String stringValue = coercedValue.toString();
            if ("true".equalsIgnoreCase(stringValue)) {
                generator.writeBoolean(true);
            } else if ("false".equalsIgnoreCase(stringValue)) {
                generator.writeBoolean(false);
            } else {
                generator.writeString(stringValue);
            }
            break;
        case RECORD:
            {
                final Record record = (Record) coercedValue;
                final RecordDataType recordDataType = (RecordDataType) chosenDataType;
                final RecordSchema childSchema = recordDataType.getChildSchema();
                writeRecord(record, childSchema, generator);
                break;
            }
        case MAP:
            {
                final MapDataType mapDataType = (MapDataType) chosenDataType;
                final DataType valueDataType = mapDataType.getValueType();
                final Map<String, ?> map = (Map<String, ?>) coercedValue;
                generator.writeStartObject();
                for (final Map.Entry<String, ?> entry : map.entrySet()) {
                    final String mapKey = entry.getKey();
                    final Object mapValue = entry.getValue();
                    generator.writeFieldName(mapKey);
                    writeValue(generator, mapValue, fieldName + "." + mapKey, valueDataType);
                }
                generator.writeEndObject();
                break;
            }
        case ARRAY:
        default:
            if (coercedValue instanceof Object[]) {
                final Object[] values = (Object[]) coercedValue;
                final ArrayDataType arrayDataType = (ArrayDataType) dataType;
                final DataType elementType = arrayDataType.getElementType();
                writeArray(values, fieldName, generator, elementType);
            } else {
                generator.writeString(coercedValue.toString());
            }
            break;
    }
}
Also used : MapDataType(org.apache.nifi.serialization.record.type.MapDataType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) DataType(org.apache.nifi.serialization.record.DataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) MapDataType(org.apache.nifi.serialization.record.type.MapDataType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) Record(org.apache.nifi.serialization.record.Record) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Map(java.util.Map)

Example 2 with DataType

use of org.apache.nifi.serialization.record.DataType in project nifi by apache.

the class PutElasticsearchHttpRecord method writeRecord.

private void writeRecord(final Record record, final RecordSchema writeSchema, final JsonGenerator generator) throws IOException {
    RecordSchema schema = record.getSchema();
    generator.writeStartObject();
    for (int i = 0; i < schema.getFieldCount(); i++) {
        final RecordField field = schema.getField(i);
        final String fieldName = field.getFieldName();
        final Object value = record.getValue(field);
        if (value == null) {
            if (nullSuppression.equals(NEVER_SUPPRESS.getValue()) || (nullSuppression.equals(SUPPRESS_MISSING.getValue())) && record.getRawFieldNames().contains(fieldName)) {
                generator.writeNullField(fieldName);
            }
            continue;
        }
        generator.writeFieldName(fieldName);
        final DataType dataType = schema.getDataType(fieldName).get();
        writeValue(generator, value, fieldName, dataType);
    }
    generator.writeEndObject();
}
Also used : RecordField(org.apache.nifi.serialization.record.RecordField) DataType(org.apache.nifi.serialization.record.DataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) MapDataType(org.apache.nifi.serialization.record.type.MapDataType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 3 with DataType

use of org.apache.nifi.serialization.record.DataType in project nifi by apache.

the class CSVRecordReader method nextRecord.

@Override
public Record nextRecord(final boolean coerceTypes, final boolean dropUnknownFields) throws IOException, MalformedRecordException {
    final RecordSchema schema = getSchema();
    final List<RecordField> recordFields = getRecordFields();
    final int numFieldNames = recordFields.size();
    for (final CSVRecord csvRecord : csvParser) {
        final Map<String, Object> values = new LinkedHashMap<>(recordFields.size() * 2);
        for (int i = 0; i < csvRecord.size(); i++) {
            final String rawValue = csvRecord.get(i);
            final String rawFieldName;
            final DataType dataType;
            if (i >= numFieldNames) {
                if (!dropUnknownFields) {
                    values.put("unknown_field_index_" + i, rawValue);
                }
                continue;
            } else {
                final RecordField recordField = recordFields.get(i);
                rawFieldName = recordField.getFieldName();
                dataType = recordField.getDataType();
            }
            final Object value;
            if (coerceTypes) {
                value = convert(rawValue, dataType, rawFieldName);
            } else {
                // The CSV Reader is going to return all fields as Strings, because CSV doesn't have any way to
                // dictate a field type. As a result, we will use the schema that we have to attempt to convert
                // the value into the desired type if it's a simple type.
                value = convertSimpleIfPossible(rawValue, dataType, rawFieldName);
            }
            values.put(rawFieldName, value);
        }
        return new MapRecord(schema, values, coerceTypes, dropUnknownFields);
    }
    return null;
}
Also used : MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) DataType(org.apache.nifi.serialization.record.DataType) CSVRecord(org.apache.commons.csv.CSVRecord) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) LinkedHashMap(java.util.LinkedHashMap)

Example 4 with DataType

use of org.apache.nifi.serialization.record.DataType in project nifi by apache.

the class GrokReader method populateSchemaFieldNames.

private static void populateSchemaFieldNames(final Grok grok, String grokExpression, final List<RecordField> fields) {
    while (grokExpression.length() > 0) {
        final Matcher matcher = GrokUtils.GROK_PATTERN.matcher(grokExpression);
        if (matcher.find()) {
            final Map<String, String> extractedGroups = GrokUtils.namedGroups(matcher, grokExpression);
            final String subName = extractedGroups.get("subname");
            if (subName == null) {
                final String subPatternName = extractedGroups.get("pattern");
                if (subPatternName == null) {
                    continue;
                }
                final String subExpression = grok.getPatterns().get(subPatternName);
                populateSchemaFieldNames(grok, subExpression, fields);
            } else {
                DataType dataType = RecordFieldType.STRING.getDataType();
                final RecordField recordField = new RecordField(subName, dataType);
                fields.add(recordField);
            }
            if (grokExpression.length() > matcher.end() + 1) {
                grokExpression = grokExpression.substring(matcher.end());
            } else {
                break;
            }
        } else {
            break;
        }
    }
}
Also used : RecordField(org.apache.nifi.serialization.record.RecordField) Matcher(java.util.regex.Matcher) DataType(org.apache.nifi.serialization.record.DataType)

Example 5 with DataType

use of org.apache.nifi.serialization.record.DataType in project nifi by apache.

the class GrokRecordReader method createRecord.

private Record createRecord(final Map<String, Object> valueMap, final StringBuilder trailingText, final String stackTrace, final String raw, final boolean coerceTypes, final boolean dropUnknown) {
    final Map<String, Object> converted = new HashMap<>();
    for (final Map.Entry<String, Object> entry : valueMap.entrySet()) {
        final String fieldName = entry.getKey();
        final Object rawValue = entry.getValue();
        final Object normalizedValue;
        if (rawValue instanceof List) {
            final List<?> list = (List<?>) rawValue;
            final String[] array = new String[list.size()];
            for (int i = 0; i < list.size(); i++) {
                final Object rawObject = list.get(i);
                array[i] = rawObject == null ? null : rawObject.toString();
            }
            normalizedValue = array;
        } else {
            normalizedValue = rawValue == null ? null : rawValue.toString();
        }
        final Optional<RecordField> optionalRecordField = schema.getField(fieldName);
        final Object coercedValue;
        if (coerceTypes && optionalRecordField.isPresent()) {
            final RecordField field = optionalRecordField.get();
            final DataType fieldType = field.getDataType();
            coercedValue = convert(fieldType, normalizedValue, fieldName);
        } else {
            coercedValue = normalizedValue;
        }
        converted.put(fieldName, coercedValue);
    }
    // and then append the trailing text to it.
    if (append && trailingText.length() > 0) {
        String lastPopulatedFieldName = null;
        final List<RecordField> schemaFields = schemaFromGrok.getFields();
        for (int i = schemaFields.size() - 1; i >= 0; i--) {
            final RecordField field = schemaFields.get(i);
            Object value = converted.get(field.getFieldName());
            if (value != null) {
                lastPopulatedFieldName = field.getFieldName();
                break;
            }
            for (final String alias : field.getAliases()) {
                value = converted.get(alias);
                if (value != null) {
                    lastPopulatedFieldName = alias;
                    break;
                }
            }
        }
        if (lastPopulatedFieldName != null) {
            final Object value = converted.get(lastPopulatedFieldName);
            if (value == null) {
                converted.put(lastPopulatedFieldName, trailingText.toString());
            } else if (value instanceof String) {
                // if not a String it is a List and we will just drop the trailing text
                converted.put(lastPopulatedFieldName, (String) value + trailingText.toString());
            }
        }
    }
    converted.put(STACK_TRACE_COLUMN_NAME, stackTrace);
    converted.put(RAW_MESSAGE_NAME, raw);
    return new MapRecord(schema, converted);
}
Also used : MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) HashMap(java.util.HashMap) DataType(org.apache.nifi.serialization.record.DataType) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

DataType (org.apache.nifi.serialization.record.DataType)45 RecordField (org.apache.nifi.serialization.record.RecordField)36 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)27 ArrayDataType (org.apache.nifi.serialization.record.type.ArrayDataType)24 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)22 RecordDataType (org.apache.nifi.serialization.record.type.RecordDataType)22 ChoiceDataType (org.apache.nifi.serialization.record.type.ChoiceDataType)21 MapDataType (org.apache.nifi.serialization.record.type.MapDataType)20 ArrayList (java.util.ArrayList)17 RecordFieldType (org.apache.nifi.serialization.record.RecordFieldType)17 HashMap (java.util.HashMap)15 Record (org.apache.nifi.serialization.record.Record)14 Map (java.util.Map)13 MapRecord (org.apache.nifi.serialization.record.MapRecord)13 Test (org.junit.Test)13 LinkedHashMap (java.util.LinkedHashMap)11 List (java.util.List)11 ComponentLog (org.apache.nifi.logging.ComponentLog)10 File (java.io.File)9 IOException (java.io.IOException)9