Search in sources :

Example 31 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class TestAvroReaderWithEmbeddedSchema method testLogicalTypes.

private void testLogicalTypes(Schema schema) throws ParseException, IOException, MalformedRecordException {
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final String expectedTime = "2017-04-04 14:20:33.000";
    final DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
    df.setTimeZone(TimeZone.getTimeZone("gmt"));
    final long timeLong = df.parse(expectedTime).getTime();
    final long secondsSinceMidnight = 33 + (20 * 60) + (14 * 60 * 60);
    final long millisSinceMidnight = secondsSinceMidnight * 1000L;
    final BigDecimal bigDecimal = new BigDecimal("123.45");
    final byte[] serialized;
    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
        final DataFileWriter<GenericRecord> writer = dataFileWriter.create(schema, baos)) {
        final GenericRecord record = new GenericData.Record(schema);
        record.put("timeMillis", (int) millisSinceMidnight);
        record.put("timeMicros", millisSinceMidnight * 1000L);
        record.put("timestampMillis", timeLong);
        record.put("timestampMicros", timeLong * 1000L);
        record.put("date", 17260);
        record.put("decimal", ByteBuffer.wrap(bigDecimal.unscaledValue().toByteArray()));
        writer.append(record);
        writer.flush();
        serialized = baos.toByteArray();
    }
    try (final InputStream in = new ByteArrayInputStream(serialized)) {
        final AvroRecordReader reader = new AvroReaderWithEmbeddedSchema(in);
        final RecordSchema recordSchema = reader.getSchema();
        assertEquals(RecordFieldType.TIME, recordSchema.getDataType("timeMillis").get().getFieldType());
        assertEquals(RecordFieldType.TIME, recordSchema.getDataType("timeMicros").get().getFieldType());
        assertEquals(RecordFieldType.TIMESTAMP, recordSchema.getDataType("timestampMillis").get().getFieldType());
        assertEquals(RecordFieldType.TIMESTAMP, recordSchema.getDataType("timestampMicros").get().getFieldType());
        assertEquals(RecordFieldType.DATE, recordSchema.getDataType("date").get().getFieldType());
        assertEquals(RecordFieldType.DOUBLE, recordSchema.getDataType("decimal").get().getFieldType());
        final Record record = reader.nextRecord();
        assertEquals(new java.sql.Time(millisSinceMidnight), record.getValue("timeMillis"));
        assertEquals(new java.sql.Time(millisSinceMidnight), record.getValue("timeMicros"));
        assertEquals(new java.sql.Timestamp(timeLong), record.getValue("timestampMillis"));
        assertEquals(new java.sql.Timestamp(timeLong), record.getValue("timestampMicros"));
        final DateFormat noTimeOfDayDateFormat = new SimpleDateFormat("yyyy-MM-dd");
        noTimeOfDayDateFormat.setTimeZone(TimeZone.getTimeZone("gmt"));
        assertEquals(noTimeOfDayDateFormat.format(new java.sql.Date(timeLong)), noTimeOfDayDateFormat.format(record.getValue("date")));
        assertEquals(bigDecimal.doubleValue(), record.getValue("decimal"));
    }
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) DataFileWriter(org.apache.avro.file.DataFileWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) BigDecimal(java.math.BigDecimal) ByteArrayInputStream(java.io.ByteArrayInputStream) SimpleDateFormat(java.text.SimpleDateFormat) DateFormat(java.text.DateFormat) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) SimpleDateFormat(java.text.SimpleDateFormat) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema)

Example 32 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class TestAvroReaderWithEmbeddedSchema method testMultipleTypes.

@Test
public void testMultipleTypes() throws IOException, ParseException, MalformedRecordException, SchemaNotFoundException {
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/avro/multiple-types.avsc"));
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final byte[] serialized;
    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
        final DataFileWriter<GenericRecord> writer = dataFileWriter.create(schema, baos)) {
        // If a union field has multiple type options, a value should be mapped to the first compatible type.
        final GenericRecord r1 = new GenericData.Record(schema);
        r1.put("field", 123);
        final GenericRecord r2 = new GenericData.Record(schema);
        r2.put("field", Arrays.asList(1, 2, 3));
        final GenericRecord r3 = new GenericData.Record(schema);
        r3.put("field", "not a number");
        writer.append(r1);
        writer.append(r2);
        writer.append(r3);
        writer.flush();
        serialized = baos.toByteArray();
    }
    try (final InputStream in = new ByteArrayInputStream(serialized)) {
        final AvroRecordReader reader = new AvroReaderWithEmbeddedSchema(in);
        final RecordSchema recordSchema = reader.getSchema();
        assertEquals(RecordFieldType.CHOICE, recordSchema.getDataType("field").get().getFieldType());
        Record record = reader.nextRecord();
        assertEquals(123, record.getValue("field"));
        record = reader.nextRecord();
        assertArrayEquals(new Object[] { 1, 2, 3 }, (Object[]) record.getValue("field"));
        record = reader.nextRecord();
        assertEquals("not a number", record.getValue("field"));
    }
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Schema(org.apache.avro.Schema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) DataFileWriter(org.apache.avro.file.DataFileWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayInputStream(java.io.ByteArrayInputStream) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Example 33 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class TestCSVRecordReader method testExtraFieldNotInHeader.

@Test
public void testExtraFieldNotInHeader() throws IOException, MalformedRecordException {
    final List<RecordField> fields = getDefaultFields();
    final RecordSchema schema = new SimpleRecordSchema(fields);
    final String headerLine = "id, name, balance, address, city, state, zipCode, country";
    final String inputRecord = "1, John, 40.80, 123 My Street, My City, MS, 11111, USA, North America";
    final String csvData = headerLine + "\n" + inputRecord;
    final byte[] inputData = csvData.getBytes();
    // test nextRecord does not contain a 'continent' field
    try (final InputStream bais = new ByteArrayInputStream(inputData);
        final CSVRecordReader reader = createReader(bais, schema, format)) {
        final Record record = reader.nextRecord(false, false);
        assertNotNull(record);
        assertEquals("1", record.getValue("id"));
        assertEquals("John", record.getValue("name"));
        assertEquals("40.80", record.getValue("balance"));
        assertEquals("123 My Street", record.getValue("address"));
        assertEquals("My City", record.getValue("city"));
        assertEquals("MS", record.getValue("state"));
        assertEquals("11111", record.getValue("zipCode"));
        assertEquals("USA", record.getValue("country"));
        assertEquals("North America", record.getValue("unknown_field_index_8"));
        assertNull(reader.nextRecord(false, false));
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) RecordField(org.apache.nifi.serialization.record.RecordField) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Example 34 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class TestCSVRecordReader method testDateNullFormat.

@Test
public void testDateNullFormat() throws IOException, MalformedRecordException {
    final String text = "date\n1983-01-01";
    final List<RecordField> fields = new ArrayList<>();
    fields.add(new RecordField("date", RecordFieldType.DATE.getDataType()));
    final RecordSchema schema = new SimpleRecordSchema(fields);
    try (final InputStream bais = new ByteArrayInputStream(text.getBytes());
        final CSVRecordReader reader = new CSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false, null, RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(), "UTF-8")) {
        final Record record = reader.nextRecord(false, false);
        assertEquals("1983-01-01", (String) record.getValue("date"));
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) RecordField(org.apache.nifi.serialization.record.RecordField) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) ComponentLog(org.apache.nifi.logging.ComponentLog) Test(org.junit.Test)

Example 35 with RecordSchema

use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.

the class TestCSVRecordReader method testSimpleParse.

@Test
public void testSimpleParse() throws IOException, MalformedRecordException {
    final List<RecordField> fields = getDefaultFields();
    fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);
    final RecordSchema schema = new SimpleRecordSchema(fields);
    try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/single-bank-account.csv"));
        final CSVRecordReader reader = createReader(fis, schema, format)) {
        final Object[] record = reader.nextRecord().getValues();
        final Object[] expectedValues = new Object[] { "1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA" };
        Assert.assertArrayEquals(expectedValues, record);
        assertNull(reader.nextRecord());
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) RecordField(org.apache.nifi.serialization.record.RecordField) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) File(java.io.File) FileInputStream(java.io.FileInputStream) Test(org.junit.Test)

Aggregations

RecordSchema (org.apache.nifi.serialization.record.RecordSchema)243 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)178 Test (org.junit.Test)168 Record (org.apache.nifi.serialization.record.Record)147 RecordField (org.apache.nifi.serialization.record.RecordField)138 ArrayList (java.util.ArrayList)107 MapRecord (org.apache.nifi.serialization.record.MapRecord)94 HashMap (java.util.HashMap)88 InputStream (java.io.InputStream)79 ByteArrayInputStream (java.io.ByteArrayInputStream)64 FileInputStream (java.io.FileInputStream)56 ComponentLog (org.apache.nifi.logging.ComponentLog)54 IOException (java.io.IOException)44 LinkedHashMap (java.util.LinkedHashMap)36 DataType (org.apache.nifi.serialization.record.DataType)36 File (java.io.File)31 Schema (org.apache.avro.Schema)29 SchemaIdentifier (org.apache.nifi.serialization.record.SchemaIdentifier)29 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)28 ByteArrayOutputStream (java.io.ByteArrayOutputStream)26