use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class TestAvroReaderWithEmbeddedSchema method testLogicalTypes.
private void testLogicalTypes(Schema schema) throws ParseException, IOException, MalformedRecordException {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
final String expectedTime = "2017-04-04 14:20:33.000";
final DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
df.setTimeZone(TimeZone.getTimeZone("gmt"));
final long timeLong = df.parse(expectedTime).getTime();
final long secondsSinceMidnight = 33 + (20 * 60) + (14 * 60 * 60);
final long millisSinceMidnight = secondsSinceMidnight * 1000L;
final BigDecimal bigDecimal = new BigDecimal("123.45");
final byte[] serialized;
final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
final DataFileWriter<GenericRecord> writer = dataFileWriter.create(schema, baos)) {
final GenericRecord record = new GenericData.Record(schema);
record.put("timeMillis", (int) millisSinceMidnight);
record.put("timeMicros", millisSinceMidnight * 1000L);
record.put("timestampMillis", timeLong);
record.put("timestampMicros", timeLong * 1000L);
record.put("date", 17260);
record.put("decimal", ByteBuffer.wrap(bigDecimal.unscaledValue().toByteArray()));
writer.append(record);
writer.flush();
serialized = baos.toByteArray();
}
try (final InputStream in = new ByteArrayInputStream(serialized)) {
final AvroRecordReader reader = new AvroReaderWithEmbeddedSchema(in);
final RecordSchema recordSchema = reader.getSchema();
assertEquals(RecordFieldType.TIME, recordSchema.getDataType("timeMillis").get().getFieldType());
assertEquals(RecordFieldType.TIME, recordSchema.getDataType("timeMicros").get().getFieldType());
assertEquals(RecordFieldType.TIMESTAMP, recordSchema.getDataType("timestampMillis").get().getFieldType());
assertEquals(RecordFieldType.TIMESTAMP, recordSchema.getDataType("timestampMicros").get().getFieldType());
assertEquals(RecordFieldType.DATE, recordSchema.getDataType("date").get().getFieldType());
assertEquals(RecordFieldType.DOUBLE, recordSchema.getDataType("decimal").get().getFieldType());
final Record record = reader.nextRecord();
assertEquals(new java.sql.Time(millisSinceMidnight), record.getValue("timeMillis"));
assertEquals(new java.sql.Time(millisSinceMidnight), record.getValue("timeMicros"));
assertEquals(new java.sql.Timestamp(timeLong), record.getValue("timestampMillis"));
assertEquals(new java.sql.Timestamp(timeLong), record.getValue("timestampMicros"));
final DateFormat noTimeOfDayDateFormat = new SimpleDateFormat("yyyy-MM-dd");
noTimeOfDayDateFormat.setTimeZone(TimeZone.getTimeZone("gmt"));
assertEquals(noTimeOfDayDateFormat.format(new java.sql.Date(timeLong)), noTimeOfDayDateFormat.format(record.getValue("date")));
assertEquals(bigDecimal.doubleValue(), record.getValue("decimal"));
}
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class TestAvroReaderWithEmbeddedSchema method testMultipleTypes.
@Test
public void testMultipleTypes() throws IOException, ParseException, MalformedRecordException, SchemaNotFoundException {
final Schema schema = new Schema.Parser().parse(new File("src/test/resources/avro/multiple-types.avsc"));
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
final byte[] serialized;
final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
final DataFileWriter<GenericRecord> writer = dataFileWriter.create(schema, baos)) {
// If a union field has multiple type options, a value should be mapped to the first compatible type.
final GenericRecord r1 = new GenericData.Record(schema);
r1.put("field", 123);
final GenericRecord r2 = new GenericData.Record(schema);
r2.put("field", Arrays.asList(1, 2, 3));
final GenericRecord r3 = new GenericData.Record(schema);
r3.put("field", "not a number");
writer.append(r1);
writer.append(r2);
writer.append(r3);
writer.flush();
serialized = baos.toByteArray();
}
try (final InputStream in = new ByteArrayInputStream(serialized)) {
final AvroRecordReader reader = new AvroReaderWithEmbeddedSchema(in);
final RecordSchema recordSchema = reader.getSchema();
assertEquals(RecordFieldType.CHOICE, recordSchema.getDataType("field").get().getFieldType());
Record record = reader.nextRecord();
assertEquals(123, record.getValue("field"));
record = reader.nextRecord();
assertArrayEquals(new Object[] { 1, 2, 3 }, (Object[]) record.getValue("field"));
record = reader.nextRecord();
assertEquals("not a number", record.getValue("field"));
}
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class TestCSVRecordReader method testExtraFieldNotInHeader.
@Test
public void testExtraFieldNotInHeader() throws IOException, MalformedRecordException {
final List<RecordField> fields = getDefaultFields();
final RecordSchema schema = new SimpleRecordSchema(fields);
final String headerLine = "id, name, balance, address, city, state, zipCode, country";
final String inputRecord = "1, John, 40.80, 123 My Street, My City, MS, 11111, USA, North America";
final String csvData = headerLine + "\n" + inputRecord;
final byte[] inputData = csvData.getBytes();
// test nextRecord does not contain a 'continent' field
try (final InputStream bais = new ByteArrayInputStream(inputData);
final CSVRecordReader reader = createReader(bais, schema, format)) {
final Record record = reader.nextRecord(false, false);
assertNotNull(record);
assertEquals("1", record.getValue("id"));
assertEquals("John", record.getValue("name"));
assertEquals("40.80", record.getValue("balance"));
assertEquals("123 My Street", record.getValue("address"));
assertEquals("My City", record.getValue("city"));
assertEquals("MS", record.getValue("state"));
assertEquals("11111", record.getValue("zipCode"));
assertEquals("USA", record.getValue("country"));
assertEquals("North America", record.getValue("unknown_field_index_8"));
assertNull(reader.nextRecord(false, false));
}
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class TestCSVRecordReader method testDateNullFormat.
@Test
public void testDateNullFormat() throws IOException, MalformedRecordException {
final String text = "date\n1983-01-01";
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("date", RecordFieldType.DATE.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream bais = new ByteArrayInputStream(text.getBytes());
final CSVRecordReader reader = new CSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false, null, RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(), "UTF-8")) {
final Record record = reader.nextRecord(false, false);
assertEquals("1983-01-01", (String) record.getValue("date"));
}
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class TestCSVRecordReader method testSimpleParse.
@Test
public void testSimpleParse() throws IOException, MalformedRecordException {
final List<RecordField> fields = getDefaultFields();
fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/single-bank-account.csv"));
final CSVRecordReader reader = createReader(fis, schema, format)) {
final Object[] record = reader.nextRecord().getValues();
final Object[] expectedValues = new Object[] { "1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA" };
Assert.assertArrayEquals(expectedValues, record);
assertNull(reader.nextRecord());
}
}
Aggregations