use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class TestJdbcTypesDerby method testSQLTypesMapping.
@Test
public void testSQLTypesMapping() throws ClassNotFoundException, SQLException, IOException {
// remove previous test database, if any
folder.delete();
final Connection con = createConnection(folder.getRoot().getAbsolutePath());
final Statement st = con.createStatement();
try {
st.executeUpdate(dropTable);
} catch (final Exception e) {
// table may not exist, this is not serious problem.
}
st.executeUpdate(createTable);
st.executeUpdate("insert into users (email, password, activation_code, created, active) " + " values ('robert.gates@cold.com', '******', 'CAS', '2005-12-09', 'Y')");
final ResultSet resultSet = st.executeQuery("select U.*, ROW_NUMBER() OVER () as rownr from users U");
final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
JdbcCommon.convertToAvroStream(resultSet, outStream, false);
final byte[] serializedBytes = outStream.toByteArray();
assertNotNull(serializedBytes);
System.out.println("Avro serialized result size in bytes: " + serializedBytes.length);
st.close();
con.close();
// Deserialize bytes to records
final InputStream instream = new ByteArrayInputStream(serializedBytes);
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
GenericRecord record = null;
while (dataFileReader.hasNext()) {
// Reuse record object by passing it to next(). This saves us from
// allocating and garbage collecting many objects for files with
// many items.
record = dataFileReader.next(record);
System.out.println(record);
}
}
}
use of org.apache.avro.file.DataFileStream in project nifi by apache.
the class TestJdbcTypesH2 method testSQLTypesMapping.
@Test
public void testSQLTypesMapping() throws ClassNotFoundException, SQLException, IOException {
final Connection con = createConnection(folder.getRoot().getAbsolutePath());
final Statement st = con.createStatement();
try {
st.executeUpdate(dropTable);
} catch (final Exception e) {
// table may not exist, this is not serious problem.
}
st.executeUpdate(createTable);
// st.executeUpdate("insert into users (email, password, activation_code, forgotten_password_code, forgotten_password_time, created, active, home_module_id) "
// + " values ('robert.gates@cold.com', '******', 'CAS', 'ounou', '2005-12-09', '2005-12-03', 1, 5)");
st.executeUpdate("insert into users (email, password, activation_code, created, active, somebinary, somebinary2, somebinary3, someblob, someclob) " + " values ('mari.gates@cold.com', '******', 'CAS', '2005-12-03', 3, '66FF', 'ABDF', 'EE64', 'BB22', 'CC88')");
final ResultSet resultSet = st.executeQuery("select U.*, ROW_NUMBER() OVER () as rownr from users U");
// final ResultSet resultSet = st.executeQuery("select U.active from users U");
// final ResultSet resultSet = st.executeQuery("select U.somebinary from users U");
final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
JdbcCommon.convertToAvroStream(resultSet, outStream, false);
final byte[] serializedBytes = outStream.toByteArray();
assertNotNull(serializedBytes);
System.out.println("Avro serialized result size in bytes: " + serializedBytes.length);
st.close();
con.close();
// Deserialize bytes to records
final InputStream instream = new ByteArrayInputStream(serializedBytes);
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
GenericRecord record = null;
while (dataFileReader.hasNext()) {
// Reuse record object by passing it to next(). This saves us from
// allocating and garbage collecting many objects for files with
// many items.
record = dataFileReader.next(record);
System.out.println(record);
}
}
}
use of org.apache.avro.file.DataFileStream in project cdap by caskdata.
the class DynamicPartitionerWithAvroTest method readOutput.
private Set<GenericRecord> readOutput(Location location) throws IOException {
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(SCHEMA);
Set<GenericRecord> records = new HashSet<>();
for (Location file : location.list()) {
if (file.getName().endsWith(".avro")) {
DataFileStream<GenericRecord> fileStream = new DataFileStream<>(file.getInputStream(), datumReader);
Iterables.addAll(records, fileStream);
fileStream.close();
}
}
return records;
}
use of org.apache.avro.file.DataFileStream in project kafka-connect-storage-cloud by confluentinc.
the class DataWriterAvroTest method testCompressFile.
@Test
public void testCompressFile() throws Exception {
String avroCodec = "snappy";
localProps.put(StorageSinkConnectorConfig.AVRO_CODEC_CONFIG, avroCodec);
setUp();
task = new S3SinkTask(connectorConfig, context, storage, partitioner, format, SYSTEM_TIME);
List<SinkRecord> sinkRecords = createRecords(7);
// Perform write
task.put(sinkRecords);
task.close(context.assignment());
task.stop();
List<S3ObjectSummary> summaries = listObjects(S3_TEST_BUCKET_NAME, "/", s3);
for (S3ObjectSummary summary : summaries) {
InputStream in = s3.getObject(summary.getBucketName(), summary.getKey()).getObjectContent();
DatumReader<Object> reader = new GenericDatumReader<>();
DataFileStream<Object> streamReader = new DataFileStream<>(in, reader);
// make sure that produced Avro file has proper codec set
Assert.assertEquals(avroCodec, streamReader.getMetaString(StorageSinkConnectorConfig.AVRO_CODEC_CONFIG));
streamReader.close();
}
long[] validOffsets = { 0, 3, 6 };
verify(sinkRecords, validOffsets);
}
use of org.apache.avro.file.DataFileStream in project components by Talend.
the class MiniDfsResource method assertReadAvroFile.
/**
* Tests that a file on the HDFS cluster contains the given avro.
*
* @param path the name of the file on the HDFS cluster
* @param expected the expected avro record in the file .
*/
public static void assertReadAvroFile(FileSystem fs, String path, Set<IndexedRecord> expected, boolean part) throws IOException {
Path p = new Path(path);
if (fs.isFile(p)) {
try (DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(new BufferedInputStream(fs.open(new Path(path))), new GenericDatumReader<GenericRecord>())) {
IndexedRecord record = null;
while (reader.hasNext()) {
record = reader.iterator().next();
IndexedRecord eqRecord = null;
for (IndexedRecord indexedRecord : expected) {
if (indexedRecord.equals(record)) {
eqRecord = indexedRecord;
break;
}
}
expected.remove(eqRecord);
}
}
// Check before asserting for the message.
if (!part && expected.size() != 0)
assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
} else if (fs.isDirectory(p)) {
for (FileStatus fstatus : FileSystemUtil.listSubFiles(fs, p)) {
assertReadAvroFile(fs, fstatus.getPath().toString(), expected, true);
}
// Check before asserting for the message.
if (expected.size() != 0)
assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
} else {
fail("No such path: " + path);
}
}
Aggregations