use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class TestCSVRecordReader method testReadRawWithDifferentFieldName.
@Test
public void testReadRawWithDifferentFieldName() throws IOException, MalformedRecordException {
final List<RecordField> fields = getDefaultFields();
final RecordSchema schema = new SimpleRecordSchema(fields);
final String headerLine = "id, name, balance, address, city, state, zipCode, continent";
final String inputRecord = "1, John, 40.80, 123 My Street, My City, MS, 11111, North America";
final String csvData = headerLine + "\n" + inputRecord;
final byte[] inputData = csvData.getBytes();
// test nextRecord does not contain a 'continent' field
try (final InputStream bais = new ByteArrayInputStream(inputData);
final CSVRecordReader reader = createReader(bais, schema, format)) {
final Record record = reader.nextRecord();
assertNotNull(record);
assertEquals("1", record.getValue("id"));
assertEquals("John", record.getValue("name"));
assertEquals("40.80", record.getValue("balance"));
assertEquals("123 My Street", record.getValue("address"));
assertEquals("My City", record.getValue("city"));
assertEquals("MS", record.getValue("state"));
assertEquals("11111", record.getValue("zipCode"));
assertNull(record.getValue("country"));
assertNull(record.getValue("continent"));
assertNull(reader.nextRecord());
}
// test nextRawRecord does contain 'continent' field
try (final InputStream bais = new ByteArrayInputStream(inputData);
final CSVRecordReader reader = createReader(bais, schema, format)) {
final Record record = reader.nextRecord(false, false);
assertNotNull(record);
assertEquals("1", record.getValue("id"));
assertEquals("John", record.getValue("name"));
assertEquals("40.80", record.getValue("balance"));
assertEquals("123 My Street", record.getValue("address"));
assertEquals("My City", record.getValue("city"));
assertEquals("MS", record.getValue("state"));
assertEquals("11111", record.getValue("zipCode"));
assertNull(record.getValue("country"));
assertEquals("North America", record.getValue("continent"));
assertNull(reader.nextRecord(false, false));
}
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class TestCSVRecordReader method testTimeNullFormat.
@Test
public void testTimeNullFormat() throws IOException, MalformedRecordException {
final String text = "time\n01:02:03";
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("time", RecordFieldType.TIME.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream bais = new ByteArrayInputStream(text.getBytes());
final CSVRecordReader reader = new CSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false, RecordFieldType.DATE.getDefaultFormat(), null, RecordFieldType.TIMESTAMP.getDefaultFormat(), "UTF-8")) {
final Record record = reader.nextRecord(false, false);
assertEquals("01:02:03", (String) record.getValue("time"));
}
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class TestCSVRecordReader method testTimestampNoCoersionUnexpectedFormat.
@Test
public void testTimestampNoCoersionUnexpectedFormat() throws IOException, MalformedRecordException {
final String text = "timestamp\n01:02:03";
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("timestamp", RecordFieldType.TIMESTAMP.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream bais = new ByteArrayInputStream(text.getBytes());
final CSVRecordReader reader = new CSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false, RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), "HH-MM-SS", "UTF-8")) {
final Record record = reader.nextRecord(false, false);
assertEquals("01:02:03", (String) record.getValue("timestamp"));
}
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class TestJacksonCSVRecordReader method testFieldInSchemaButNotHeader.
@Test
public void testFieldInSchemaButNotHeader() throws IOException, MalformedRecordException {
final List<RecordField> fields = getDefaultFields();
final RecordSchema schema = new SimpleRecordSchema(fields);
final String headerLine = "id, name, balance, address, city, state, zipCode";
final String inputRecord = "1, John, 40.80, 123 My Street, My City, MS, 11111, USA";
final String csvData = headerLine + "\n" + inputRecord;
final byte[] inputData = csvData.getBytes();
try (final InputStream bais = new ByteArrayInputStream(inputData);
final JacksonCSVRecordReader reader = createReader(bais, schema, format)) {
final Record record = reader.nextRecord();
assertNotNull(record);
assertEquals("1", record.getValue("id"));
assertEquals("John", record.getValue("name"));
assertEquals("40.80", record.getValue("balance"));
assertEquals("123 My Street", record.getValue("address"));
assertEquals("My City", record.getValue("city"));
assertEquals("MS", record.getValue("state"));
assertEquals("11111", record.getValue("zipCode"));
// If schema says that there are fields a, b, c
// and the CSV has a header line that says field names are a, b
// and then the data has values 1,2,3
// then a=1, b=2, c=null
assertNull(record.getValue("country"));
assertNull(reader.nextRecord());
}
// our schema to be the definitive list of what fields exist.
try (final InputStream bais = new ByteArrayInputStream(inputData);
final JacksonCSVRecordReader reader = new JacksonCSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, true, RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(), "UTF-8")) {
final Record record = reader.nextRecord();
assertNotNull(record);
assertEquals("1", record.getValue("id"));
assertEquals("John", record.getValue("name"));
assertEquals("40.80", record.getValue("balance"));
assertEquals("123 My Street", record.getValue("address"));
assertEquals("My City", record.getValue("city"));
assertEquals("MS", record.getValue("state"));
assertEquals("11111", record.getValue("zipCode"));
// If schema says that there are fields a, b, c
// and the CSV has a header line that says field names are a, b
// and then the data has values 1,2,3
// then a=1, b=2, c=null
// But if we configure the reader to Ignore the header, then this will not occur!
assertEquals("USA", record.getValue("country"));
assertNull(reader.nextRecord());
}
}
use of org.apache.nifi.serialization.record.RecordSchema in project nifi by apache.
the class TestJacksonCSVRecordReader method testExtraFieldNotInHeader.
@Test
public void testExtraFieldNotInHeader() throws IOException, MalformedRecordException {
final List<RecordField> fields = getDefaultFields();
final RecordSchema schema = new SimpleRecordSchema(fields);
final String headerLine = "id, name, balance, address, city, state, zipCode, country";
final String inputRecord = "1, John, 40.80, 123 My Street, My City, MS, 11111, USA, North America";
final String csvData = headerLine + "\n" + inputRecord;
final byte[] inputData = csvData.getBytes();
// test nextRecord does not contain a 'continent' field
try (final InputStream bais = new ByteArrayInputStream(inputData);
final JacksonCSVRecordReader reader = createReader(bais, schema, format)) {
final Record record = reader.nextRecord(false, false);
assertNotNull(record);
assertEquals("1", record.getValue("id"));
assertEquals("John", record.getValue("name"));
assertEquals("40.80", record.getValue("balance"));
assertEquals("123 My Street", record.getValue("address"));
assertEquals("My City", record.getValue("city"));
assertEquals("MS", record.getValue("state"));
assertEquals("11111", record.getValue("zipCode"));
assertEquals("USA", record.getValue("country"));
assertEquals("North America", record.getValue("unknown_field_index_8"));
assertNull(reader.nextRecord(false, false));
}
}
Aggregations