Search in sources :

Example 26 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class TestCSVRecordReader method testReadRawWithDifferentFieldName.

@Test
public void testReadRawWithDifferentFieldName() throws IOException, MalformedRecordException {
    final List<RecordField> fields = getDefaultFields();
    final RecordSchema schema = new SimpleRecordSchema(fields);
    final String headerLine = "id, name, balance, address, city, state, zipCode, continent";
    final String inputRecord = "1, John, 40.80, 123 My Street, My City, MS, 11111, North America";
    final String csvData = headerLine + "\n" + inputRecord;
    final byte[] inputData = csvData.getBytes();
    // test nextRecord does not contain a 'continent' field
    try (final InputStream bais = new ByteArrayInputStream(inputData);
        final CSVRecordReader reader = createReader(bais, schema, format)) {
        final Record record = reader.nextRecord();
        assertNotNull(record);
        assertEquals("1", record.getValue("id"));
        assertEquals("John", record.getValue("name"));
        assertEquals("40.80", record.getValue("balance"));
        assertEquals("123 My Street", record.getValue("address"));
        assertEquals("My City", record.getValue("city"));
        assertEquals("MS", record.getValue("state"));
        assertEquals("11111", record.getValue("zipCode"));
        assertNull(record.getValue("country"));
        assertNull(record.getValue("continent"));
        assertNull(reader.nextRecord());
    }
    // test nextRawRecord does contain 'continent' field
    try (final InputStream bais = new ByteArrayInputStream(inputData);
        final CSVRecordReader reader = createReader(bais, schema, format)) {
        final Record record = reader.nextRecord(false, false);
        assertNotNull(record);
        assertEquals("1", record.getValue("id"));
        assertEquals("John", record.getValue("name"));
        assertEquals("40.80", record.getValue("balance"));
        assertEquals("123 My Street", record.getValue("address"));
        assertEquals("My City", record.getValue("city"));
        assertEquals("MS", record.getValue("state"));
        assertEquals("11111", record.getValue("zipCode"));
        assertNull(record.getValue("country"));
        assertEquals("North America", record.getValue("continent"));
        assertNull(reader.nextRecord(false, false));
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) RecordField(org.apache.nifi.serialization.record.RecordField) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Example 27 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class TestCSVRecordReader method testTimeNullFormat.

@Test
public void testTimeNullFormat() throws IOException, MalformedRecordException {
    final String text = "time\n01:02:03";
    final List<RecordField> fields = new ArrayList<>();
    fields.add(new RecordField("time", RecordFieldType.TIME.getDataType()));
    final RecordSchema schema = new SimpleRecordSchema(fields);
    try (final InputStream bais = new ByteArrayInputStream(text.getBytes());
        final CSVRecordReader reader = new CSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false, RecordFieldType.DATE.getDefaultFormat(), null, RecordFieldType.TIMESTAMP.getDefaultFormat(), "UTF-8")) {
        final Record record = reader.nextRecord(false, false);
        assertEquals("01:02:03", (String) record.getValue("time"));
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) RecordField(org.apache.nifi.serialization.record.RecordField) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) ComponentLog(org.apache.nifi.logging.ComponentLog) Test(org.junit.Test)

Example 28 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class TestCSVRecordReader method testTimestampNoCoersionUnexpectedFormat.

@Test
public void testTimestampNoCoersionUnexpectedFormat() throws IOException, MalformedRecordException {
    final String text = "timestamp\n01:02:03";
    final List<RecordField> fields = new ArrayList<>();
    fields.add(new RecordField("timestamp", RecordFieldType.TIMESTAMP.getDataType()));
    final RecordSchema schema = new SimpleRecordSchema(fields);
    try (final InputStream bais = new ByteArrayInputStream(text.getBytes());
        final CSVRecordReader reader = new CSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false, RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), "HH-MM-SS", "UTF-8")) {
        final Record record = reader.nextRecord(false, false);
        assertEquals("01:02:03", (String) record.getValue("timestamp"));
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) RecordField(org.apache.nifi.serialization.record.RecordField) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) ComponentLog(org.apache.nifi.logging.ComponentLog) Test(org.junit.Test)

Example 29 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class TestJacksonCSVRecordReader method testFieldInSchemaButNotHeader.

@Test
public void testFieldInSchemaButNotHeader() throws IOException, MalformedRecordException {
    final List<RecordField> fields = getDefaultFields();
    final RecordSchema schema = new SimpleRecordSchema(fields);
    final String headerLine = "id, name, balance, address, city, state, zipCode";
    final String inputRecord = "1, John, 40.80, 123 My Street, My City, MS, 11111, USA";
    final String csvData = headerLine + "\n" + inputRecord;
    final byte[] inputData = csvData.getBytes();
    try (final InputStream bais = new ByteArrayInputStream(inputData);
        final JacksonCSVRecordReader reader = createReader(bais, schema, format)) {
        final Record record = reader.nextRecord();
        assertNotNull(record);
        assertEquals("1", record.getValue("id"));
        assertEquals("John", record.getValue("name"));
        assertEquals("40.80", record.getValue("balance"));
        assertEquals("123 My Street", record.getValue("address"));
        assertEquals("My City", record.getValue("city"));
        assertEquals("MS", record.getValue("state"));
        assertEquals("11111", record.getValue("zipCode"));
        // If schema says that there are fields a, b, c
        // and the CSV has a header line that says field names are a, b
        // and then the data has values 1,2,3
        // then a=1, b=2, c=null
        assertNull(record.getValue("country"));
        assertNull(reader.nextRecord());
    }
    // our schema to be the definitive list of what fields exist.
    try (final InputStream bais = new ByteArrayInputStream(inputData);
        final JacksonCSVRecordReader reader = new JacksonCSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, true, RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(), "UTF-8")) {
        final Record record = reader.nextRecord();
        assertNotNull(record);
        assertEquals("1", record.getValue("id"));
        assertEquals("John", record.getValue("name"));
        assertEquals("40.80", record.getValue("balance"));
        assertEquals("123 My Street", record.getValue("address"));
        assertEquals("My City", record.getValue("city"));
        assertEquals("MS", record.getValue("state"));
        assertEquals("11111", record.getValue("zipCode"));
        // If schema says that there are fields a, b, c
        // and the CSV has a header line that says field names are a, b
        // and then the data has values 1,2,3
        // then a=1, b=2, c=null
        // But if we configure the reader to Ignore the header, then this will not occur!
        assertEquals("USA", record.getValue("country"));
        assertNull(reader.nextRecord());
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) RecordField(org.apache.nifi.serialization.record.RecordField) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) ComponentLog(org.apache.nifi.logging.ComponentLog) Test(org.junit.Test)

Example 30 with RecordField

use of org.apache.nifi.serialization.record.RecordField in project nifi by apache.

the class TestJacksonCSVRecordReader method testExtraFieldNotInHeader.

@Test
public void testExtraFieldNotInHeader() throws IOException, MalformedRecordException {
    final List<RecordField> fields = getDefaultFields();
    final RecordSchema schema = new SimpleRecordSchema(fields);
    final String headerLine = "id, name, balance, address, city, state, zipCode, country";
    final String inputRecord = "1, John, 40.80, 123 My Street, My City, MS, 11111, USA, North America";
    final String csvData = headerLine + "\n" + inputRecord;
    final byte[] inputData = csvData.getBytes();
    // test nextRecord does not contain a 'continent' field
    try (final InputStream bais = new ByteArrayInputStream(inputData);
        final JacksonCSVRecordReader reader = createReader(bais, schema, format)) {
        final Record record = reader.nextRecord(false, false);
        assertNotNull(record);
        assertEquals("1", record.getValue("id"));
        assertEquals("John", record.getValue("name"));
        assertEquals("40.80", record.getValue("balance"));
        assertEquals("123 My Street", record.getValue("address"));
        assertEquals("My City", record.getValue("city"));
        assertEquals("MS", record.getValue("state"));
        assertEquals("11111", record.getValue("zipCode"));
        assertEquals("USA", record.getValue("country"));
        assertEquals("North America", record.getValue("unknown_field_index_8"));
        assertNull(reader.nextRecord(false, false));
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) RecordField(org.apache.nifi.serialization.record.RecordField) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Aggregations

RecordField (org.apache.nifi.serialization.record.RecordField)173 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)133 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)130 ArrayList (java.util.ArrayList)116 Test (org.junit.Test)108 Record (org.apache.nifi.serialization.record.Record)97 MapRecord (org.apache.nifi.serialization.record.MapRecord)73 HashMap (java.util.HashMap)52 InputStream (java.io.InputStream)48 FileInputStream (java.io.FileInputStream)44 ByteArrayInputStream (java.io.ByteArrayInputStream)43 ComponentLog (org.apache.nifi.logging.ComponentLog)39 DataType (org.apache.nifi.serialization.record.DataType)37 LinkedHashMap (java.util.LinkedHashMap)36 File (java.io.File)21 ByteArrayOutputStream (java.io.ByteArrayOutputStream)20 SchemaNameAsAttribute (org.apache.nifi.schema.access.SchemaNameAsAttribute)17 RecordDataType (org.apache.nifi.serialization.record.type.RecordDataType)17 Schema (org.apache.avro.Schema)16 RecordFieldType (org.apache.nifi.serialization.record.RecordFieldType)16