use of org.apache.avro.generic.GenericRecord in project drill by apache.
the class AvroTestUtil method generateLinkedList.
public static String generateLinkedList() throws Exception {
final File file = File.createTempFile("avro-linkedlist", ".avro");
file.deleteOnExit();
final Schema schema = SchemaBuilder.record("LongList").namespace("org.apache.drill.exec.store.avro").aliases("LinkedLongs").fields().name("value").type().optional().longType().name("next").type().optional().type("LongList").endRecord();
final DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(schema));
writer.create(schema, file);
GenericRecord previousRecord = null;
try {
for (int i = 0; i < RECORD_COUNT; i++) {
GenericRecord record = (GenericRecord) (previousRecord == null ? new GenericData.Record(schema) : previousRecord.get("next"));
record.put("value", (long) i);
if (previousRecord != null) {
writer.append(previousRecord);
}
GenericRecord nextRecord = new GenericData.Record(record.getSchema());
record.put("next", nextRecord);
previousRecord = record;
}
writer.append(previousRecord);
} finally {
writer.close();
}
return file.getAbsolutePath();
}
use of org.apache.avro.generic.GenericRecord in project drill by apache.
the class AvroTestUtil method generateNestedArraySchema.
public static AvroTestRecordWriter generateNestedArraySchema(int numRecords, int numArrayItems) throws IOException {
final File file = File.createTempFile("avro-nested-test", ".avro");
file.deleteOnExit();
final Schema schema = SchemaBuilder.record("AvroRecordReaderTest").namespace("org.apache.drill.exec.store.avro").fields().name("a_int").type().intType().noDefault().name("b_array").type().array().items().record("my_record_1").namespace("foo.blah.org").fields().name("nested_1_int").type().optional().intType().endRecord().arrayDefault(Collections.emptyList()).endRecord();
final Schema arraySchema = schema.getField("b_array").schema();
final Schema itemSchema = arraySchema.getElementType();
final AvroTestRecordWriter record = new AvroTestRecordWriter(schema, file);
try {
for (int i = 0; i < numRecords; i++) {
record.startRecord();
record.put("a_int", i);
GenericArray<GenericRecord> array = new GenericData.Array<>(ARRAY_SIZE, arraySchema);
for (int j = 0; j < numArrayItems; j++) {
final GenericRecord nestedRecord = new GenericData.Record(itemSchema);
nestedRecord.put("nested_1_int", j);
array.add(nestedRecord);
}
record.put("b_array", array);
record.endRecord();
}
} finally {
record.close();
}
return record;
}
use of org.apache.avro.generic.GenericRecord in project drill by apache.
the class AvroTestUtil method generateSimpleNestedSchema_NoNullValues.
public static AvroTestRecordWriter generateSimpleNestedSchema_NoNullValues() throws Exception {
final File file = File.createTempFile("avro-nested-test", ".avro");
file.deleteOnExit();
final Schema schema = SchemaBuilder.record("AvroRecordReaderTest").namespace("org.apache.drill.exec.store.avro").fields().name("a_string").type().stringType().noDefault().name("b_int").type().intType().noDefault().name("c_record").type().record("my_record_1").namespace("foo.blah.org").fields().name("nested_1_string").type().stringType().noDefault().name("nested_1_int").type().intType().noDefault().endRecord().noDefault().endRecord();
final Schema nestedSchema = schema.getField("c_record").schema();
final AvroTestRecordWriter record = new AvroTestRecordWriter(schema, file);
try {
for (int i = 0; i < RECORD_COUNT; i++) {
record.startRecord();
record.put("a_string", "a_" + i);
record.put("b_int", i);
final GenericRecord nestedRecord = new GenericData.Record(nestedSchema);
nestedRecord.put("nested_1_string", "nested_1_string_" + i);
nestedRecord.put("nested_1_int", i * i);
record.put("c_record", nestedRecord);
record.endRecord();
}
} finally {
record.close();
}
return record;
}
use of org.apache.avro.generic.GenericRecord in project drill by apache.
the class AvroTestUtil method generateUnionNestedArraySchema_withNullValues.
public static AvroTestRecordWriter generateUnionNestedArraySchema_withNullValues() throws Exception {
final File file = File.createTempFile("avro-nested-test", ".avro");
file.deleteOnExit();
final Schema schema = SchemaBuilder.record("AvroRecordReaderTest").namespace("org.apache.drill.exec.store.avro").fields().name("a_string").type().stringType().noDefault().name("b_int").type().intType().noDefault().name("c_array").type().optional().array().items().record("my_record_1").namespace("foo.blah.org").fields().name("nested_1_string").type().optional().stringType().name("nested_1_int").type().optional().intType().endRecord().endRecord();
final Schema nestedSchema = schema.getField("c_array").schema();
final Schema arraySchema = nestedSchema.getTypes().get(1);
final Schema itemSchema = arraySchema.getElementType();
final AvroTestRecordWriter record = new AvroTestRecordWriter(schema, file);
try {
for (int i = 0; i < RECORD_COUNT; i++) {
record.startRecord();
record.put("a_string", "a_" + i);
record.put("b_int", i);
if (i % 2 == 0) {
GenericArray<GenericRecord> array = new GenericData.Array<>(1, arraySchema);
final GenericRecord nestedRecord = new GenericData.Record(itemSchema);
nestedRecord.put("nested_1_string", "nested_1_string_" + i);
nestedRecord.put("nested_1_int", i * i);
array.add(nestedRecord);
record.put("c_array", array);
}
record.endRecord();
}
} finally {
record.close();
}
return record;
}
use of org.apache.avro.generic.GenericRecord in project samza by apache.
the class TestHdfsSystemConsumer method testHdfsSystemConsumerE2E.
/*
* A simple end to end test that covers the workflow from system admin to
* partitioner, system consumer, and so on, making sure the basic functionality
* works as expected.
*/
@Test
public void testHdfsSystemConsumerE2E() throws Exception {
Config config = generateDefaultConfig();
HdfsSystemFactory systemFactory = new HdfsSystemFactory();
// create admin and do partitioning
HdfsSystemAdmin systemAdmin = systemFactory.getAdmin(SYSTEM_NAME, config);
String streamName = WORKING_DIRECTORY;
Set<String> streamNames = new HashSet<>();
streamNames.add(streamName);
generateAvroDataFiles();
Map<String, SystemStreamMetadata> streamMetadataMap = systemAdmin.getSystemStreamMetadata(streamNames);
SystemStreamMetadata systemStreamMetadata = streamMetadataMap.get(streamName);
Assert.assertEquals(NUM_FILES, systemStreamMetadata.getSystemStreamPartitionMetadata().size());
// create consumer and read from files
HdfsSystemConsumer systemConsumer = systemFactory.getConsumer(SYSTEM_NAME, config, new NoOpMetricsRegistry());
Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> metadataMap = systemStreamMetadata.getSystemStreamPartitionMetadata();
Set<SystemStreamPartition> systemStreamPartitionSet = new HashSet<>();
metadataMap.forEach((partition, metadata) -> {
SystemStreamPartition ssp = new SystemStreamPartition(SYSTEM_NAME, streamName, partition);
systemStreamPartitionSet.add(ssp);
String offset = metadata.getOldestOffset();
systemConsumer.register(ssp, offset);
});
systemConsumer.start();
// verify events read from consumer
int eventsReceived = 0;
// one "End of Stream" event in the end
int totalEvents = (NUM_EVENTS + 1) * NUM_FILES;
int remainingRetires = 100;
Map<SystemStreamPartition, List<IncomingMessageEnvelope>> overallResults = new HashMap<>();
while (eventsReceived < totalEvents && remainingRetires > 0) {
remainingRetires--;
Map<SystemStreamPartition, List<IncomingMessageEnvelope>> result = systemConsumer.poll(systemStreamPartitionSet, 200);
for (SystemStreamPartition ssp : result.keySet()) {
List<IncomingMessageEnvelope> messageEnvelopeList = result.get(ssp);
overallResults.putIfAbsent(ssp, new ArrayList<>());
overallResults.get(ssp).addAll(messageEnvelopeList);
if (overallResults.get(ssp).size() >= NUM_EVENTS + 1) {
systemStreamPartitionSet.remove(ssp);
}
eventsReceived += messageEnvelopeList.size();
}
}
Assert.assertEquals(eventsReceived, totalEvents);
Assert.assertEquals(NUM_FILES, overallResults.size());
overallResults.values().forEach(messages -> {
Assert.assertEquals(NUM_EVENTS + 1, messages.size());
for (int index = 0; index < NUM_EVENTS; index++) {
GenericRecord record = (GenericRecord) messages.get(index).getMessage();
Assert.assertEquals(index % NUM_EVENTS, record.get(FIELD_1));
Assert.assertEquals("string_" + (index % NUM_EVENTS), record.get(FIELD_2).toString());
}
Assert.assertEquals(messages.get(NUM_EVENTS).getOffset(), IncomingMessageEnvelope.END_OF_STREAM_OFFSET);
});
}
Aggregations