Search in sources :

Example 46 with DataFileStream

use of org.apache.avro.file.DataFileStream in project nifi by apache.

the class TestJdbcTypesDerby method testSQLTypesMapping.

@Test
public void testSQLTypesMapping() throws ClassNotFoundException, SQLException, IOException {
    // remove previous test database, if any
    folder.delete();
    final Connection con = createConnection(folder.getRoot().getAbsolutePath());
    final Statement st = con.createStatement();
    try {
        st.executeUpdate(dropTable);
    } catch (final Exception e) {
    // table may not exist, this is not serious problem.
    }
    st.executeUpdate(createTable);
    st.executeUpdate("insert into users (email, password, activation_code, created, active) " + " values ('robert.gates@cold.com', '******', 'CAS', '2005-12-09', 'Y')");
    final ResultSet resultSet = st.executeQuery("select U.*, ROW_NUMBER() OVER () as rownr from users U");
    final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
    JdbcCommon.convertToAvroStream(resultSet, outStream, false);
    final byte[] serializedBytes = outStream.toByteArray();
    assertNotNull(serializedBytes);
    System.out.println("Avro serialized result size in bytes: " + serializedBytes.length);
    st.close();
    con.close();
    // Deserialize bytes to records
    final InputStream instream = new ByteArrayInputStream(serializedBytes);
    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
        GenericRecord record = null;
        while (dataFileReader.hasNext()) {
            // Reuse record object by passing it to next(). This saves us from
            // allocating and garbage collecting many objects for files with
            // many items.
            record = dataFileReader.next(record);
            System.out.println(record);
        }
    }
}
Also used : Statement(java.sql.Statement) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Connection(java.sql.Connection) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataFileStream(org.apache.avro.file.DataFileStream) IOException(java.io.IOException) SQLException(java.sql.SQLException) ByteArrayInputStream(java.io.ByteArrayInputStream) ResultSet(java.sql.ResultSet) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 47 with DataFileStream

use of org.apache.avro.file.DataFileStream in project nifi by apache.

the class TestJdbcTypesH2 method testSQLTypesMapping.

@Test
public void testSQLTypesMapping() throws ClassNotFoundException, SQLException, IOException {
    final Connection con = createConnection(folder.getRoot().getAbsolutePath());
    final Statement st = con.createStatement();
    try {
        st.executeUpdate(dropTable);
    } catch (final Exception e) {
    // table may not exist, this is not serious problem.
    }
    st.executeUpdate(createTable);
    // st.executeUpdate("insert into users (email, password, activation_code, forgotten_password_code, forgotten_password_time, created, active, home_module_id) "
    // + " values ('robert.gates@cold.com', '******', 'CAS', 'ounou', '2005-12-09', '2005-12-03', 1, 5)");
    st.executeUpdate("insert into users (email, password, activation_code, created, active, somebinary, somebinary2, somebinary3, someblob, someclob) " + " values ('mari.gates@cold.com', '******', 'CAS', '2005-12-03', 3, '66FF', 'ABDF', 'EE64', 'BB22', 'CC88')");
    final ResultSet resultSet = st.executeQuery("select U.*, ROW_NUMBER() OVER () as rownr from users U");
    // final ResultSet resultSet = st.executeQuery("select U.active from users U");
    // final ResultSet resultSet = st.executeQuery("select U.somebinary from users U");
    final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
    JdbcCommon.convertToAvroStream(resultSet, outStream, false);
    final byte[] serializedBytes = outStream.toByteArray();
    assertNotNull(serializedBytes);
    System.out.println("Avro serialized result size in bytes: " + serializedBytes.length);
    st.close();
    con.close();
    // Deserialize bytes to records
    final InputStream instream = new ByteArrayInputStream(serializedBytes);
    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
        GenericRecord record = null;
        while (dataFileReader.hasNext()) {
            // Reuse record object by passing it to next(). This saves us from
            // allocating and garbage collecting many objects for files with
            // many items.
            record = dataFileReader.next(record);
            System.out.println(record);
        }
    }
}
Also used : Statement(java.sql.Statement) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Connection(java.sql.Connection) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataFileStream(org.apache.avro.file.DataFileStream) IOException(java.io.IOException) SQLException(java.sql.SQLException) ByteArrayInputStream(java.io.ByteArrayInputStream) ResultSet(java.sql.ResultSet) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 48 with DataFileStream

use of org.apache.avro.file.DataFileStream in project cdap by caskdata.

the class DynamicPartitionerWithAvroTest method readOutput.

private Set<GenericRecord> readOutput(Location location) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(SCHEMA);
    Set<GenericRecord> records = new HashSet<>();
    for (Location file : location.list()) {
        if (file.getName().endsWith(".avro")) {
            DataFileStream<GenericRecord> fileStream = new DataFileStream<>(file.getInputStream(), datumReader);
            Iterables.addAll(records, fileStream);
            fileStream.close();
        }
    }
    return records;
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) GenericRecord(org.apache.avro.generic.GenericRecord) DataFileStream(org.apache.avro.file.DataFileStream) HashSet(java.util.HashSet) Location(org.apache.twill.filesystem.Location)

Example 49 with DataFileStream

use of org.apache.avro.file.DataFileStream in project kafka-connect-storage-cloud by confluentinc.

the class DataWriterAvroTest method testCompressFile.

@Test
public void testCompressFile() throws Exception {
    String avroCodec = "snappy";
    localProps.put(StorageSinkConnectorConfig.AVRO_CODEC_CONFIG, avroCodec);
    setUp();
    task = new S3SinkTask(connectorConfig, context, storage, partitioner, format, SYSTEM_TIME);
    List<SinkRecord> sinkRecords = createRecords(7);
    // Perform write
    task.put(sinkRecords);
    task.close(context.assignment());
    task.stop();
    List<S3ObjectSummary> summaries = listObjects(S3_TEST_BUCKET_NAME, "/", s3);
    for (S3ObjectSummary summary : summaries) {
        InputStream in = s3.getObject(summary.getBucketName(), summary.getKey()).getObjectContent();
        DatumReader<Object> reader = new GenericDatumReader<>();
        DataFileStream<Object> streamReader = new DataFileStream<>(in, reader);
        // make sure that produced Avro file has proper codec set
        Assert.assertEquals(avroCodec, streamReader.getMetaString(StorageSinkConnectorConfig.AVRO_CODEC_CONFIG));
        streamReader.close();
    }
    long[] validOffsets = { 0, 3, 6 };
    verify(sinkRecords, validOffsets);
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) SinkRecord(org.apache.kafka.connect.sink.SinkRecord) DataFileStream(org.apache.avro.file.DataFileStream) Test(org.junit.Test)

Example 50 with DataFileStream

use of org.apache.avro.file.DataFileStream in project components by Talend.

the class MiniDfsResource method assertReadAvroFile.

/**
 * Tests that a file on the HDFS cluster contains the given avro.
 *
 * @param path the name of the file on the HDFS cluster
 * @param expected the expected avro record in the file .
 */
public static void assertReadAvroFile(FileSystem fs, String path, Set<IndexedRecord> expected, boolean part) throws IOException {
    Path p = new Path(path);
    if (fs.isFile(p)) {
        try (DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(new BufferedInputStream(fs.open(new Path(path))), new GenericDatumReader<GenericRecord>())) {
            IndexedRecord record = null;
            while (reader.hasNext()) {
                record = reader.iterator().next();
                IndexedRecord eqRecord = null;
                for (IndexedRecord indexedRecord : expected) {
                    if (indexedRecord.equals(record)) {
                        eqRecord = indexedRecord;
                        break;
                    }
                }
                expected.remove(eqRecord);
            }
        }
        // Check before asserting for the message.
        if (!part && expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else if (fs.isDirectory(p)) {
        for (FileStatus fstatus : FileSystemUtil.listSubFiles(fs, p)) {
            assertReadAvroFile(fs, fstatus.getPath().toString(), expected, true);
        }
        // Check before asserting for the message.
        if (expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else {
        fail("No such path: " + path);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) IndexedRecord(org.apache.avro.generic.IndexedRecord) BufferedInputStream(java.io.BufferedInputStream) GenericRecord(org.apache.avro.generic.GenericRecord) DataFileStream(org.apache.avro.file.DataFileStream)

Aggregations

DataFileStream (org.apache.avro.file.DataFileStream)59 GenericRecord (org.apache.avro.generic.GenericRecord)39 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)34 Test (org.junit.Test)26 Schema (org.apache.avro.Schema)21 ByteArrayInputStream (java.io.ByteArrayInputStream)20 InputStream (java.io.InputStream)19 IOException (java.io.IOException)13 ByteArrayOutputStream (java.io.ByteArrayOutputStream)11 File (java.io.File)9 FileInputStream (java.io.FileInputStream)9 ResultSet (java.sql.ResultSet)9 HashMap (java.util.HashMap)9 MockFlowFile (org.apache.nifi.util.MockFlowFile)9 Statement (java.sql.Statement)8 BufferedInputStream (java.io.BufferedInputStream)7 HashSet (java.util.HashSet)7 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)7 DataFileWriter (org.apache.avro.file.DataFileWriter)7 Path (org.apache.hadoop.fs.Path)7