Search in sources :

Example 41 with DataType

use of org.apache.nifi.serialization.record.DataType in project nifi by apache.

the class TestWriteAvroResult method testDataTypes.

@Test
public void testDataTypes() throws IOException {
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/avro/datatypes.avsc"));
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final List<RecordField> subRecordFields = Collections.singletonList(new RecordField("field1", RecordFieldType.STRING.getDataType()));
    final RecordSchema subRecordSchema = new SimpleRecordSchema(subRecordFields);
    final DataType subRecordDataType = RecordFieldType.RECORD.getRecordDataType(subRecordSchema);
    final List<RecordField> fields = new ArrayList<>();
    fields.add(new RecordField("string", RecordFieldType.STRING.getDataType()));
    fields.add(new RecordField("int", RecordFieldType.INT.getDataType()));
    fields.add(new RecordField("long", RecordFieldType.LONG.getDataType()));
    fields.add(new RecordField("double", RecordFieldType.DOUBLE.getDataType()));
    fields.add(new RecordField("float", RecordFieldType.FLOAT.getDataType()));
    fields.add(new RecordField("boolean", RecordFieldType.BOOLEAN.getDataType()));
    fields.add(new RecordField("bytes", RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.BYTE.getDataType())));
    fields.add(new RecordField("nullOrLong", RecordFieldType.LONG.getDataType()));
    fields.add(new RecordField("array", RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.INT.getDataType())));
    fields.add(new RecordField("record", subRecordDataType));
    fields.add(new RecordField("map", RecordFieldType.MAP.getMapDataType(subRecordDataType)));
    final RecordSchema recordSchema = new SimpleRecordSchema(fields);
    final Record innerRecord = new MapRecord(subRecordSchema, Collections.singletonMap("field1", "hello"));
    final Map<String, Object> innerMap = new HashMap<>();
    innerMap.put("key1", innerRecord);
    final Map<String, Object> values = new HashMap<>();
    values.put("string", "hello");
    values.put("int", 8);
    values.put("long", 42L);
    values.put("double", 3.14159D);
    values.put("float", 1.23456F);
    values.put("boolean", true);
    values.put("bytes", AvroTypeUtil.convertByteArray("hello".getBytes()));
    values.put("nullOrLong", null);
    values.put("array", new Integer[] { 1, 2, 3 });
    values.put("record", innerRecord);
    values.put("map", innerMap);
    final Record record = new MapRecord(recordSchema, values);
    final WriteResult writeResult;
    try (final RecordSetWriter writer = createWriter(schema, baos)) {
        writeResult = writer.write(RecordSet.of(record.getSchema(), record));
    }
    verify(writeResult);
    final byte[] data = baos.toByteArray();
    try (final InputStream in = new ByteArrayInputStream(data)) {
        final GenericRecord avroRecord = readRecord(in, schema);
        assertMatch(record, avroRecord);
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) HashMap(java.util.HashMap) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Schema(org.apache.avro.Schema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) WriteResult(org.apache.nifi.serialization.WriteResult) ByteArrayInputStream(java.io.ByteArrayInputStream) DataType(org.apache.nifi.serialization.record.DataType) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Example 42 with DataType

use of org.apache.nifi.serialization.record.DataType in project nifi by apache.

the class TestJsonPathRowRecordReader method testElementWithNestedData.

@Test
public void testElementWithNestedData() throws IOException, MalformedRecordException {
    final LinkedHashMap<String, JsonPath> jsonPaths = new LinkedHashMap<>(allJsonPaths);
    jsonPaths.put("account", JsonPath.compile("$.account"));
    final DataType accountType = RecordFieldType.RECORD.getRecordDataType(getAccountSchema());
    final List<RecordField> fields = getDefaultFields();
    fields.add(new RecordField("account", accountType));
    final RecordSchema schema = new SimpleRecordSchema(fields);
    try (final InputStream in = new FileInputStream(new File("src/test/resources/json/single-element-nested.json"));
        final JsonPathRowRecordReader reader = new JsonPathRowRecordReader(jsonPaths, schema, in, Mockito.mock(ComponentLog.class), dateFormat, timeFormat, timestampFormat)) {
        final List<String> fieldNames = schema.getFieldNames();
        final List<String> expectedFieldNames = Arrays.asList(new String[] { "id", "name", "balance", "address", "city", "state", "zipCode", "country", "account" });
        assertEquals(expectedFieldNames, fieldNames);
        final List<RecordFieldType> dataTypes = schema.getDataTypes().stream().map(dt -> dt.getFieldType()).collect(Collectors.toList());
        final List<RecordFieldType> expectedTypes = Arrays.asList(new RecordFieldType[] { RecordFieldType.INT, RecordFieldType.STRING, RecordFieldType.DOUBLE, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.RECORD });
        assertEquals(expectedTypes, dataTypes);
        final Object[] firstRecordValues = reader.nextRecord().getValues();
        final Object[] simpleElements = Arrays.copyOfRange(firstRecordValues, 0, firstRecordValues.length - 1);
        Assert.assertArrayEquals(new Object[] { 1, "John Doe", null, "123 My Street", "My City", "MS", "11111", "USA" }, simpleElements);
        final Object lastElement = firstRecordValues[firstRecordValues.length - 1];
        assertTrue(lastElement instanceof Record);
        final Record record = (Record) lastElement;
        assertEquals(42, record.getValue("id"));
        assertEquals(4750.89D, record.getValue("balance"));
        assertNull(reader.nextRecord());
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Arrays(java.util.Arrays) DataType(org.apache.nifi.serialization.record.DataType) ComponentLog(org.apache.nifi.logging.ComponentLog) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Record(org.apache.nifi.serialization.record.Record) Before(org.junit.Before) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) Assert.assertNotNull(org.junit.Assert.assertNotNull) RecordField(org.apache.nifi.serialization.record.RecordField) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test) FileInputStream(java.io.FileInputStream) JsonPath(com.jayway.jsonpath.JsonPath) Collectors(java.util.stream.Collectors) File(java.io.File) Mockito(org.mockito.Mockito) List(java.util.List) Assert.assertNull(org.junit.Assert.assertNull) Assert(org.junit.Assert) RecordFieldType(org.apache.nifi.serialization.record.RecordFieldType) Assert.assertEquals(org.junit.Assert.assertEquals) InputStream(java.io.InputStream) RecordField(org.apache.nifi.serialization.record.RecordField) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) JsonPath(com.jayway.jsonpath.JsonPath) ComponentLog(org.apache.nifi.logging.ComponentLog) FileInputStream(java.io.FileInputStream) LinkedHashMap(java.util.LinkedHashMap) DataType(org.apache.nifi.serialization.record.DataType) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) File(java.io.File) RecordFieldType(org.apache.nifi.serialization.record.RecordFieldType) Test(org.junit.Test)

Example 43 with DataType

use of org.apache.nifi.serialization.record.DataType in project nifi by apache.

the class TestJsonTreeRowRecordReader method testIncorrectSchema.

@Test
public void testIncorrectSchema() throws IOException, MalformedRecordException {
    final DataType accountType = RecordFieldType.RECORD.getRecordDataType(getAccountSchema());
    final List<RecordField> fields = getDefaultFields();
    fields.add(new RecordField("account", accountType));
    fields.remove(new RecordField("balance", RecordFieldType.DOUBLE.getDataType()));
    final RecordSchema schema = new SimpleRecordSchema(fields);
    try (final InputStream in = new FileInputStream(new File("src/test/resources/json/single-bank-account-wrong-field-type.json"));
        final JsonTreeRowRecordReader reader = new JsonTreeRowRecordReader(in, Mockito.mock(ComponentLog.class), schema, dateFormat, timeFormat, timestampFormat)) {
        reader.nextRecord().getValues();
        Assert.fail("Was able to read record with invalid schema.");
    } catch (final MalformedRecordException mre) {
        final String msg = mre.getCause().getMessage();
        assertTrue(msg.contains("account.balance"));
        assertTrue(msg.contains("true"));
        assertTrue(msg.contains("Double"));
        assertTrue(msg.contains("Boolean"));
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) RecordField(org.apache.nifi.serialization.record.RecordField) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) DataType(org.apache.nifi.serialization.record.DataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) File(java.io.File) ComponentLog(org.apache.nifi.logging.ComponentLog) FileInputStream(java.io.FileInputStream) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) Test(org.junit.Test)

Example 44 with DataType

use of org.apache.nifi.serialization.record.DataType in project nifi by apache.

the class TestJsonTreeRowRecordReader method testReadArrayDifferentSchemasWithOverride.

@Test
public void testReadArrayDifferentSchemasWithOverride() throws IOException, MalformedRecordException {
    final Map<String, DataType> overrides = new HashMap<>();
    overrides.put("address2", RecordFieldType.STRING.getDataType());
    final List<RecordField> fields = getDefaultFields();
    fields.add(new RecordField("address2", RecordFieldType.STRING.getDataType()));
    final RecordSchema schema = new SimpleRecordSchema(fields);
    try (final InputStream in = new FileInputStream(new File("src/test/resources/json/bank-account-array-different-schemas.json"));
        final JsonTreeRowRecordReader reader = new JsonTreeRowRecordReader(in, Mockito.mock(ComponentLog.class), schema, dateFormat, timeFormat, timestampFormat)) {
        final List<String> fieldNames = schema.getFieldNames();
        final List<String> expectedFieldNames = Arrays.asList(new String[] { "id", "name", "balance", "address", "city", "state", "zipCode", "country", "address2" });
        assertEquals(expectedFieldNames, fieldNames);
        final List<RecordFieldType> dataTypes = schema.getDataTypes().stream().map(dt -> dt.getFieldType()).collect(Collectors.toList());
        final List<RecordFieldType> expectedTypes = Arrays.asList(new RecordFieldType[] { RecordFieldType.INT, RecordFieldType.STRING, RecordFieldType.DOUBLE, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.STRING, RecordFieldType.STRING });
        assertEquals(expectedTypes, dataTypes);
        final Object[] firstRecordValues = reader.nextRecord().getValues();
        Assert.assertArrayEquals(new Object[] { 1, "John Doe", 4750.89, "123 My Street", "My City", "MS", "11111", "USA", null }, firstRecordValues);
        final Object[] secondRecordValues = reader.nextRecord().getValues();
        Assert.assertArrayEquals(new Object[] { 2, "Jane Doe", 4820.09, "321 Your Street", "Your City", "NY", "33333", null, null }, secondRecordValues);
        final Object[] thirdRecordValues = reader.nextRecord().getValues();
        Assert.assertArrayEquals(new Object[] { 3, "Jake Doe", 4751.89, "124 My Street", "My City", "MS", "11111", "USA", "Apt. #12" }, thirdRecordValues);
        assertNull(reader.nextRecord());
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Arrays(java.util.Arrays) DataType(org.apache.nifi.serialization.record.DataType) HashMap(java.util.HashMap) ComponentLog(org.apache.nifi.logging.ComponentLog) ArrayList(java.util.ArrayList) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) ByteArrayInputStream(java.io.ByteArrayInputStream) Map(java.util.Map) Record(org.apache.nifi.serialization.record.Record) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) Files(java.nio.file.Files) RecordField(org.apache.nifi.serialization.record.RecordField) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test) FileInputStream(java.io.FileInputStream) Collectors(java.util.stream.Collectors) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) List(java.util.List) Assert.assertNull(org.junit.Assert.assertNull) Ignore(org.junit.Ignore) Assert(org.junit.Assert) Collections(java.util.Collections) RecordFieldType(org.apache.nifi.serialization.record.RecordFieldType) Assert.assertEquals(org.junit.Assert.assertEquals) InputStream(java.io.InputStream) RecordField(org.apache.nifi.serialization.record.RecordField) HashMap(java.util.HashMap) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ComponentLog(org.apache.nifi.logging.ComponentLog) FileInputStream(java.io.FileInputStream) DataType(org.apache.nifi.serialization.record.DataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) File(java.io.File) RecordFieldType(org.apache.nifi.serialization.record.RecordFieldType) Test(org.junit.Test)

Example 45 with DataType

use of org.apache.nifi.serialization.record.DataType in project nifi by apache.

the class TestWriteJsonResult method testDataTypes.

@Test
public void testDataTypes() throws IOException, ParseException {
    final List<RecordField> fields = new ArrayList<>();
    for (final RecordFieldType fieldType : RecordFieldType.values()) {
        if (fieldType == RecordFieldType.CHOICE) {
            final List<DataType> possibleTypes = new ArrayList<>();
            possibleTypes.add(RecordFieldType.INT.getDataType());
            possibleTypes.add(RecordFieldType.LONG.getDataType());
            fields.add(new RecordField(fieldType.name().toLowerCase(), fieldType.getChoiceDataType(possibleTypes)));
        } else if (fieldType == RecordFieldType.MAP) {
            fields.add(new RecordField(fieldType.name().toLowerCase(), fieldType.getMapDataType(RecordFieldType.INT.getDataType())));
        } else {
            fields.add(new RecordField(fieldType.name().toLowerCase(), fieldType.getDataType()));
        }
    }
    final RecordSchema schema = new SimpleRecordSchema(fields);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final DateFormat df = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss.SSS");
    df.setTimeZone(TimeZone.getTimeZone("gmt"));
    final long time = df.parse("2017/01/01 17:00:00.000").getTime();
    final Map<String, Object> map = new LinkedHashMap<>();
    map.put("height", 48);
    map.put("width", 96);
    final Map<String, Object> valueMap = new LinkedHashMap<>();
    valueMap.put("string", "string");
    valueMap.put("boolean", true);
    valueMap.put("byte", (byte) 1);
    valueMap.put("char", 'c');
    valueMap.put("short", (short) 8);
    valueMap.put("int", 9);
    valueMap.put("bigint", BigInteger.valueOf(8L));
    valueMap.put("long", 8L);
    valueMap.put("float", 8.0F);
    valueMap.put("double", 8.0D);
    valueMap.put("date", new Date(time));
    valueMap.put("time", new Time(time));
    valueMap.put("timestamp", new Timestamp(time));
    valueMap.put("record", null);
    valueMap.put("array", null);
    valueMap.put("choice", 48L);
    valueMap.put("map", map);
    final Record record = new MapRecord(schema, valueMap);
    final RecordSet rs = RecordSet.of(schema, record);
    try (final WriteJsonResult writer = new WriteJsonResult(Mockito.mock(ComponentLog.class), schema, new SchemaNameAsAttribute(), baos, true, NullSuppression.NEVER_SUPPRESS, RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat())) {
        writer.write(rs);
    }
    final String output = baos.toString();
    final String expected = new String(Files.readAllBytes(Paths.get("src/test/resources/json/output/dataTypes.json")));
    assertEquals(expected, output);
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) RecordField(org.apache.nifi.serialization.record.RecordField) ArrayList(java.util.ArrayList) Time(java.sql.Time) Timestamp(java.sql.Timestamp) LinkedHashMap(java.util.LinkedHashMap) DataType(org.apache.nifi.serialization.record.DataType) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordSet(org.apache.nifi.serialization.record.RecordSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) SchemaNameAsAttribute(org.apache.nifi.schema.access.SchemaNameAsAttribute) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ComponentLog(org.apache.nifi.logging.ComponentLog) Date(java.sql.Date) SimpleDateFormat(java.text.SimpleDateFormat) DateFormat(java.text.DateFormat) RecordFieldType(org.apache.nifi.serialization.record.RecordFieldType) SimpleDateFormat(java.text.SimpleDateFormat) Test(org.junit.Test)

Aggregations

DataType (org.apache.nifi.serialization.record.DataType)45 RecordField (org.apache.nifi.serialization.record.RecordField)36 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)27 ArrayDataType (org.apache.nifi.serialization.record.type.ArrayDataType)24 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)22 RecordDataType (org.apache.nifi.serialization.record.type.RecordDataType)22 ChoiceDataType (org.apache.nifi.serialization.record.type.ChoiceDataType)21 MapDataType (org.apache.nifi.serialization.record.type.MapDataType)20 ArrayList (java.util.ArrayList)17 RecordFieldType (org.apache.nifi.serialization.record.RecordFieldType)17 HashMap (java.util.HashMap)15 Record (org.apache.nifi.serialization.record.Record)14 Map (java.util.Map)13 MapRecord (org.apache.nifi.serialization.record.MapRecord)13 Test (org.junit.Test)13 LinkedHashMap (java.util.LinkedHashMap)11 List (java.util.List)11 ComponentLog (org.apache.nifi.logging.ComponentLog)10 File (java.io.File)9 IOException (java.io.IOException)9