Search in sources :

Example 1 with SeekableByteArrayInput

use of org.apache.avro.file.SeekableByteArrayInput in project kylo by Teradata.

the class GetTableDataTest method testAvro.

/**
 * Verify Avro output.
 */
@Test
public void testAvro() throws IOException {
    // Trigger processor
    runner.setProperty(GetTableData.OUTPUT_TYPE, GetTableDataSupport.OutputType.AVRO.toString());
    runner.enqueue(new byte[0]);
    runner.run();
    List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(CommonProperties.REL_SUCCESS);
    Assert.assertEquals(0, runner.getFlowFilesForRelationship(CommonProperties.REL_FAILURE).size());
    Assert.assertEquals(0, runner.getFlowFilesForRelationship(GetTableData.REL_NO_DATA).size());
    Assert.assertEquals(1, flowFiles.size());
    Assert.assertEquals("2", flowFiles.get(0).getAttribute(GetTableData.RESULT_ROW_COUNT));
    Assert.assertEquals("2", flowFiles.get(0).getAttribute(ComponentAttributes.NUM_SOURCE_RECORDS.key()));
    // Build Avro record reader
    final SeekableInput avroInput = new SeekableByteArrayInput(flowFiles.get(0).toByteArray());
    final Schema schema = SchemaBuilder.record("NiFi_ExecuteSQL_Record").namespace("any.data").fields().name("id").type().nullable().intType().noDefault().name("first_name").type().nullable().stringType().noDefault().name("last_name").type().nullable().stringType().noDefault().name("email").type().nullable().stringType().noDefault().name("last_updated").type().nullable().stringType().noDefault().endRecord();
    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema);
    final DataFileReader<GenericRecord> dataReader = new DataFileReader<>(avroInput, datumReader);
    // Verify Avro records
    List<GenericRecord> records = StreamSupport.stream(dataReader.spliterator(), false).collect(Collectors.toList());
    Assert.assertEquals(2, records.size());
    Assert.assertEquals(1, records.get(0).get(0));
    Assert.assertEquals(new Utf8("Mike"), records.get(0).get(1));
    Assert.assertEquals(new Utf8("Hillyer"), records.get(0).get(2));
    Assert.assertEquals(new Utf8("Mike.Hillyer@sakilastaff.com"), records.get(0).get(3));
    Assert.assertEquals(new Utf8("2006-02-15T03:57:16.000Z"), records.get(0).get(4));
    Assert.assertEquals(2, records.get(1).get(0));
    Assert.assertEquals(new Utf8("Jon"), records.get(1).get(1));
    Assert.assertEquals(new Utf8("Stephens"), records.get(1).get(2));
    Assert.assertEquals(new Utf8("Jon.Stephens@sakilastaff.com"), records.get(1).get(3));
    Assert.assertEquals(new Utf8("2006-02-15T03:57:16.000Z"), records.get(1).get(4));
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) DataFileReader(org.apache.avro.file.DataFileReader) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) Utf8(org.apache.avro.util.Utf8) SeekableInput(org.apache.avro.file.SeekableInput) GenericRecord(org.apache.avro.generic.GenericRecord) SeekableByteArrayInput(org.apache.avro.file.SeekableByteArrayInput) Test(org.junit.Test)

Example 2 with SeekableByteArrayInput

use of org.apache.avro.file.SeekableByteArrayInput in project kylo by Teradata.

the class JdbcCommonTest method convertToAvroStream.

/**
 * Verify converting results to avro.
 */
@Test
public void convertToAvroStream() throws Exception {
    // Mock result set metadata
    final ResultSetMetaData metadata = Mockito.mock(ResultSetMetaData.class);
    Mockito.when(metadata.getColumnCount()).thenReturn(14);
    Mockito.when(metadata.getColumnName(1)).thenReturn("event");
    Mockito.when(metadata.getColumnName(2)).thenReturn("empty");
    Mockito.when(metadata.getColumnName(3)).thenReturn("binary");
    Mockito.when(metadata.getColumnName(4)).thenReturn("byte");
    Mockito.when(metadata.getColumnName(5)).thenReturn("decimal");
    Mockito.when(metadata.getColumnName(6)).thenReturn("maxlong");
    Mockito.when(metadata.getColumnName(7)).thenReturn("date");
    Mockito.when(metadata.getColumnName(8)).thenReturn("time");
    Mockito.when(metadata.getColumnName(9)).thenReturn("timestamp");
    Mockito.when(metadata.getColumnName(10)).thenReturn("bool");
    Mockito.when(metadata.getColumnName(11)).thenReturn("int");
    Mockito.when(metadata.getColumnName(12)).thenReturn("id");
    Mockito.when(metadata.getColumnName(13)).thenReturn("float");
    Mockito.when(metadata.getColumnName(14)).thenReturn("double");
    Mockito.when(metadata.getColumnType(1)).thenReturn(Types.VARCHAR);
    Mockito.when(metadata.getColumnType(2)).thenReturn(Types.VARCHAR);
    Mockito.when(metadata.getColumnType(3)).thenReturn(Types.VARBINARY);
    Mockito.when(metadata.getColumnType(4)).thenReturn(Types.TINYINT);
    Mockito.when(metadata.getColumnType(5)).thenReturn(Types.DECIMAL);
    Mockito.when(metadata.getColumnType(6)).thenReturn(Types.BIGINT);
    Mockito.when(metadata.getColumnType(7)).thenReturn(Types.DATE);
    Mockito.when(metadata.getColumnType(8)).thenReturn(Types.TIME);
    Mockito.when(metadata.getColumnType(9)).thenReturn(Types.TIMESTAMP);
    Mockito.when(metadata.getColumnType(10)).thenReturn(Types.BOOLEAN);
    Mockito.when(metadata.getColumnType(11)).thenReturn(Types.INTEGER);
    Mockito.when(metadata.getColumnType(12)).thenReturn(Types.ROWID);
    Mockito.when(metadata.getColumnType(13)).thenReturn(Types.FLOAT);
    Mockito.when(metadata.getColumnType(14)).thenReturn(Types.DOUBLE);
    Mockito.when(metadata.getTableName(Mockito.anyInt())).thenReturn("mockito");
    Mockito.when(metadata.isSigned(11)).thenReturn(true);
    // Mock result set
    final ResultSet results = Mockito.mock(ResultSet.class);
    Mockito.when(results.getByte(4)).thenReturn((byte) 42);
    Mockito.when(results.getBytes(3)).thenReturn(new byte[] { 72, 73 });
    Mockito.when(results.getDate(7)).thenReturn(new Date(1483660800000L));
    Mockito.when(results.getMetaData()).thenReturn(metadata);
    Mockito.when(results.getObject(1)).thenReturn("Fun Friday");
    Mockito.when(results.getObject(2)).thenReturn(null);
    Mockito.when(results.getObject(3)).thenReturn(new byte[] { 72, 73 });
    Mockito.when(results.getObject(4)).thenReturn((byte) 42);
    Mockito.when(results.getObject(5)).thenReturn(new BigDecimal("3.14159265359"));
    Mockito.when(results.getObject(6)).thenReturn(Long.MAX_VALUE);
    Mockito.when(results.getObject(7)).thenReturn(new Date(1483660800000L));
    Mockito.when(results.getObject(8)).thenReturn(new Time(42600000L));
    Mockito.when(results.getObject(9)).thenReturn(new Timestamp(1483703400000L));
    Mockito.when(results.getObject(10)).thenReturn(Boolean.TRUE);
    Mockito.when(results.getObject(11)).thenReturn(12);
    Mockito.when(results.getObject(12)).thenReturn((RowId) () -> new byte[] { 1 });
    Mockito.when(results.getObject(13)).thenReturn(2.5f);
    Mockito.when(results.getObject(14)).thenReturn(1.61803);
    Mockito.when(results.getTime(8)).thenReturn(new Time(42600000L));
    Mockito.when(results.getTimestamp(7)).thenThrow(SQLException.class);
    Mockito.when(results.getTimestamp(9)).thenReturn(new Timestamp(1483703400000L));
    Mockito.when(results.next()).thenReturn(true).thenReturn(false);
    // Test converting to avro
    final ByteArrayOutputStream out = new ByteArrayOutputStream();
    final RowVisitor visitor = Mockito.mock(RowVisitor.class);
    Schema avroSchema = JdbcCommon.createSchema(results);
    JdbcCommon.convertToAvroStream(results, out, visitor, avroSchema);
    final InOrder inOrder = Mockito.inOrder(visitor);
    inOrder.verify(visitor).visitRow(results);
    inOrder.verify(visitor).visitColumn("event", Types.VARCHAR, "Fun Friday");
    inOrder.verify(visitor).visitColumn("empty", Types.VARCHAR, (String) null);
    inOrder.verify(visitor).visitColumn(Mockito.eq("binary"), Mockito.eq(Types.VARBINARY), Mockito.anyString());
    inOrder.verify(visitor).visitColumn("byte", Types.TINYINT, "42");
    inOrder.verify(visitor).visitColumn("decimal", Types.DECIMAL, "3.14159265359");
    inOrder.verify(visitor).visitColumn("maxlong", Types.BIGINT, Long.toString(Long.MAX_VALUE));
    inOrder.verify(visitor).visitColumn("date", Types.DATE, new Date(1483660800000L));
    inOrder.verify(visitor).visitColumn("time", Types.TIME, new Time(42600000L));
    inOrder.verify(visitor).visitColumn("timestamp", Types.TIMESTAMP, new Timestamp(1483703400000L));
    inOrder.verify(visitor).visitColumn("bool", Types.BOOLEAN, "true");
    inOrder.verify(visitor).visitColumn("int", Types.INTEGER, "12");
    inOrder.verify(visitor).visitColumn(Mockito.eq("id"), Mockito.eq(Types.ROWID), Mockito.anyString());
    inOrder.verify(visitor).visitColumn("float", Types.FLOAT, "2.5");
    inOrder.verify(visitor).visitColumn("double", Types.DOUBLE, "1.61803");
    inOrder.verifyNoMoreInteractions();
    final Schema schema = SchemaBuilder.record("mockito").namespace("any.data").fields().name("event").type().nullable().stringType().noDefault().name("empty").type().nullable().stringType().noDefault().name("binary").type().nullable().bytesType().noDefault().name("byte").type().nullable().intType().noDefault().name("decimal").type().nullable().stringType().noDefault().name("maxlong").type().nullable().longType().noDefault().name("date").type().nullable().stringType().noDefault().name("time").type().nullable().stringType().noDefault().name("timestamp").type().nullable().stringType().noDefault().name("bool").type().nullable().booleanType().noDefault().name("int").type().nullable().intType().noDefault().name("id").type().nullable().stringType().noDefault().name("float").type().nullable().floatType().noDefault().name("double").type().nullable().doubleType().noDefault().endRecord();
    final SeekableInput input = new SeekableByteArrayInput(out.toByteArray());
    final GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema);
    final DataFileReader<GenericRecord> dataReader = new DataFileReader<>(input, datumReader);
    final GenericRecord record = dataReader.next();
    assertEquals(new Utf8("Fun Friday"), record.get(0));
    assertEquals(null, record.get(1));
    assertEquals(ByteBuffer.wrap(new byte[] { 72, 73 }), record.get(2));
    assertEquals(42, record.get(3));
    assertEquals(new Utf8("3.14159265359"), record.get(4));
    assertEquals(Long.MAX_VALUE, record.get(5));
    assertEquals(new Utf8("2017-01-06T00:00:00.000Z"), record.get(6));
    assertEquals(new Utf8("11:50:00.000Z"), record.get(7));
    assertEquals(new Utf8("2017-01-06T11:50:00.000Z"), record.get(8));
    assertEquals(Boolean.TRUE, record.get(9));
    assertEquals(12, record.get(10));
    Assert.assertNotNull(record.get(11));
    assertEquals(2.5f, record.get(12));
    assertEquals(1.61803, record.get(13));
    Assert.assertFalse(dataReader.hasNext());
}
Also used : InOrder(org.mockito.InOrder) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) Time(java.sql.Time) SeekableInput(org.apache.avro.file.SeekableInput) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Timestamp(java.sql.Timestamp) Date(java.sql.Date) BigDecimal(java.math.BigDecimal) ResultSetMetaData(java.sql.ResultSetMetaData) DataFileReader(org.apache.avro.file.DataFileReader) ResultSet(java.sql.ResultSet) Utf8(org.apache.avro.util.Utf8) GenericRecord(org.apache.avro.generic.GenericRecord) RowVisitor(com.thinkbiganalytics.nifi.thrift.api.RowVisitor) SeekableByteArrayInput(org.apache.avro.file.SeekableByteArrayInput) Test(org.junit.Test)

Example 3 with SeekableByteArrayInput

use of org.apache.avro.file.SeekableByteArrayInput in project h2o-3 by h2oai.

the class AvroParser method runOnPreview.

static <T> T runOnPreview(byte[] bits, AvroPreviewProcessor<T> processor) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
    SeekableByteArrayInput sbai = new SeekableByteArrayInput(bits);
    DataFileReader<GenericRecord> dataFileReader = null;
    try {
        dataFileReader = new DataFileReader<>(sbai, datumReader);
        int headerLen = (int) dataFileReader.previousSync();
        byte[] header = Arrays.copyOf(bits, headerLen);
        if (dataFileReader.hasNext()) {
            GenericRecord gr = dataFileReader.next();
            return processor.process(header, gr, dataFileReader.getBlockCount(), dataFileReader.getBlockSize());
        } else {
            throw new RuntimeException("Empty Avro file - cannot run preview! ");
        }
    } finally {
        try {
            if (dataFileReader != null)
                dataFileReader.close();
        } catch (IOException safeToIgnore) {
        }
    }
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord) SeekableByteArrayInput(org.apache.avro.file.SeekableByteArrayInput)

Example 4 with SeekableByteArrayInput

use of org.apache.avro.file.SeekableByteArrayInput in project h2o-3 by h2oai.

the class AvroParser method parseChunk.

@Override
protected final ParseWriter parseChunk(int cidx, ParseReader din, ParseWriter dout) {
    // We will read GenericRecord and load them based on schema
    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    final H2OSeekableInputAdaptor sbai = new H2OSeekableInputAdaptor(cidx, din);
    DataFileReader<GenericRecord> dataFileReader = null;
    int cnt = 0;
    try {
        // Reconstruct Avro header
        DataFileStream.Header fakeHeader = new DataFileReader<>(new SeekableByteArrayInput(this.header), datumReader).getHeader();
        dataFileReader = DataFileReader.openReader(sbai, datumReader, fakeHeader, true);
        Schema schema = dataFileReader.getSchema();
        GenericRecord gr = new GenericData.Record(schema);
        Schema.Field[] flatSchema = flatSchema(schema);
        long sync = dataFileReader.previousSync();
        if (sbai.chunkCnt == 0) {
            // Find data in first chunk
            while (dataFileReader.hasNext() && dataFileReader.previousSync() == sync) {
                gr = dataFileReader.next(gr);
                // Write values to the output
                // FIXME: what if user change input names, or ignore an input column?
                write2frame(gr, _setup.getColumnNames(), flatSchema, _setup.getColumnTypes(), dout);
                cnt++;
            }
        }
    // else first chunk does not contain synchronization block, so give up and let another reader to use it
    } catch (Throwable e) {
        e.printStackTrace();
    }
    Log.trace(String.format("Avro: ChunkIdx: %d read %d records, start at %d off, block count: %d, block size: %d", cidx, cnt, din.getChunkDataStart(cidx), dataFileReader.getBlockCount(), dataFileReader.getBlockSize()));
    return dout;
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) DataFileStream(org.apache.avro.file.DataFileStream) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) SeekableByteArrayInput(org.apache.avro.file.SeekableByteArrayInput)

Example 5 with SeekableByteArrayInput

use of org.apache.avro.file.SeekableByteArrayInput in project sling by apache.

the class AvroContentSerializer method readAvroResources.

private Collection<AvroShallowResource> readAvroResources(byte[] bytes) throws IOException {
    DatumReader<AvroShallowResource> datumReader = new SpecificDatumReader<AvroShallowResource>(AvroShallowResource.class);
    DataFileReader<AvroShallowResource> dataFileReader = new DataFileReader<AvroShallowResource>(new SeekableByteArrayInput(bytes), datumReader);
    Collection<AvroShallowResource> avroResources = new LinkedList<AvroShallowResource>();
    try {
        for (AvroShallowResource avroResource : dataFileReader) {
            avroResources.add(avroResource);
        }
    } finally {
        dataFileReader.close();
    }
    return avroResources;
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) SeekableByteArrayInput(org.apache.avro.file.SeekableByteArrayInput) LinkedList(java.util.LinkedList)

Aggregations

SeekableByteArrayInput (org.apache.avro.file.SeekableByteArrayInput)6 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)5 GenericRecord (org.apache.avro.generic.GenericRecord)5 DataFileReader (org.apache.avro.file.DataFileReader)4 Schema (org.apache.avro.Schema)3 SeekableInput (org.apache.avro.file.SeekableInput)2 Utf8 (org.apache.avro.util.Utf8)2 Test (org.junit.Test)2 RowVisitor (com.thinkbiganalytics.nifi.thrift.api.RowVisitor)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 BigDecimal (java.math.BigDecimal)1 Date (java.sql.Date)1 ResultSet (java.sql.ResultSet)1 ResultSetMetaData (java.sql.ResultSetMetaData)1 Time (java.sql.Time)1 Timestamp (java.sql.Timestamp)1 HashMap (java.util.HashMap)1 LinkedList (java.util.LinkedList)1 DataFileStream (org.apache.avro.file.DataFileStream)1