use of org.apache.avro.generic.GenericRecordBuilder in project gobblin by apache.
the class TimeBasedAvroWriterPartitionerTest method testWriter.
/**
* Test
* 1. Record timestamp of type long
* 2. Partition path of a given record
*/
@Test
public void testWriter() throws IOException {
Schema schema = getRecordSchema("long");
State state = getBasicState();
// Write three records, each should be written to a different file
GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(schema);
DataWriter<GenericRecord> millisPartitionWriter = getWriter(schema, state);
// This timestamp corresponds to 2015/01/01
genericRecordBuilder.set("timestamp", 1420099200000l);
millisPartitionWriter.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build()));
// This timestamp corresponds to 2015/01/02
genericRecordBuilder.set("timestamp", 1420185600000l);
millisPartitionWriter.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build()));
millisPartitionWriter.close();
millisPartitionWriter.commit();
// Check that the writer reports that 2 records have been written
Assert.assertEquals(millisPartitionWriter.recordsWritten(), 2);
state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_TIMEUNIT, "seconds");
DataWriter<GenericRecord> secsPartitionWriter = getWriter(schema, state);
// This timestamp corresponds to 2015/01/03
genericRecordBuilder.set("timestamp", 1420272000L);
secsPartitionWriter.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build()));
secsPartitionWriter.close();
secsPartitionWriter.commit();
// Check that the writer reports that 1 record has been written
Assert.assertEquals(secsPartitionWriter.recordsWritten(), 1);
// Check that 3 files were created
Assert.assertEquals(FileUtils.listFiles(new File(TEST_ROOT_DIR), new String[] { "avro" }, true).size(), 3);
// Check if each file exists, and in the correct location
File baseOutputDir = new File(OUTPUT_DIR, BASE_FILE_PATH);
Assert.assertTrue(baseOutputDir.exists());
File outputDir20150101 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "01" + Path.SEPARATOR + FILE_NAME);
Assert.assertTrue(outputDir20150101.exists());
File outputDir20150102 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "02" + Path.SEPARATOR + FILE_NAME);
Assert.assertTrue(outputDir20150102.exists());
File outputDir20150103 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "03" + Path.SEPARATOR + FILE_NAME);
Assert.assertTrue(outputDir20150103.exists());
}
use of org.apache.avro.generic.GenericRecordBuilder in project daikon by Talend.
the class TestMessageHeaderExtractor method testStringAsFirstField.
@Test
public void testStringAsFirstField() throws Exception {
Schema messageSchema = SchemaBuilder.record("message").fields().name("fakeHeader").type().stringType().noDefault().endRecord();
IndexedRecord message = new GenericRecordBuilder(messageSchema).set("fakeHeader", "hello").build();
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage("Provided message's first field is not a record but STRING");
MessageHeaderExtractor extractor = new MessageHeaderExtractor();
extractor.extractHeader(message);
}
use of org.apache.avro.generic.GenericRecordBuilder in project daikon by Talend.
the class JsonGenericRecordConverter method getOutputRecord.
/**
* Generate Avro Generic Record from Json Node.
*
* Iterate Json Node fields and construct the Avro Generic Record.
*
* @param jsonNode to convert to Avro Generic Record
* @param schema of jsonNode
* @return Avro Generic Record
*/
private GenericRecord getOutputRecord(final JsonNode jsonNode, Schema schema) {
GenericRecordBuilder outputRecord = new GenericRecordBuilder(schema);
final Iterator<Map.Entry<String, JsonNode>> elements = jsonNode.fields();
Map.Entry<String, JsonNode> mapEntry;
while (elements.hasNext()) {
mapEntry = elements.next();
final JsonNode nextNode = mapEntry.getValue();
if (!(nextNode instanceof NullNode)) {
if (nextNode instanceof ValueNode) {
outputRecord.set(mapEntry.getKey(), getValue(nextNode));
} else if (nextNode instanceof ObjectNode) {
Schema schemaTo = jsonSchemaInferrer.inferSchema(nextNode.toString());
GenericRecord record = getOutputRecord(nextNode, schemaTo);
outputRecord.set(mapEntry.getKey(), record);
} else if (nextNode instanceof ArrayNode) {
List<Object> listRecords = new ArrayList<Object>();
Iterator<JsonNode> elementsIterator = ((ArrayNode) nextNode).elements();
while (elementsIterator.hasNext()) {
JsonNode nodeTo = elementsIterator.next();
if (nodeTo instanceof ValueNode) {
listRecords.add(getValue(nodeTo));
} else {
Schema schemaTo = jsonSchemaInferrer.inferSchema(nodeTo.toString());
listRecords.add(getOutputRecord(nodeTo, schemaTo));
}
}
outputRecord.set(mapEntry.getKey(), listRecords);
}
} else {
outputRecord.set(mapEntry.getKey(), null);
}
}
return outputRecord.build();
}
use of org.apache.avro.generic.GenericRecordBuilder in project trino by trinodb.
the class TestAvroDecoder method testRowWithNulls.
@Test
public void testRowWithNulls() {
Schema schema = SchemaBuilder.record("record_field").fields().name("f1").type().optional().floatType().name("f2").type().optional().doubleType().name("f3").type().optional().intType().name("f4").type().optional().longType().name("f5").type().optional().stringType().name("f6").type().optional().enumeration("color").symbols("red", "blue", "green").name("f7").type().optional().fixed("fixed5").size(5).name("f8").type().optional().bytesType().name("f9").type().optional().booleanType().name("f10").type().optional().array().items().unionOf().nullType().and().record("sub_array_field").fields().name("sf1").type().optional().stringType().name("sf2").type().optional().longType().endRecord().endUnion().name("f11").type().optional().map().values().unionOf().nullType().and().record("sub_map_field").fields().name("sf1").type().optional().doubleType().name("sf2").type().optional().booleanType().endRecord().endUnion().name("f12").type().optional().record("sub_row_field").fields().name("sf1").type().optional().intType().name("sf2").type().optional().enumeration("state").symbols("initialized", "running", "finished", "failed").endRecord().endRecord();
RowType rowType = RowType.from(ImmutableList.<RowType.Field>builder().add(RowType.field("f1", REAL)).add(RowType.field("f2", DOUBLE)).add(RowType.field("f3", INTEGER)).add(RowType.field("f4", BIGINT)).add(RowType.field("f5", VARCHAR)).add(RowType.field("f6", VARCHAR)).add(RowType.field("f7", VARBINARY)).add(RowType.field("f8", VARBINARY)).add(RowType.field("f9", BOOLEAN)).add(RowType.field("f10", new ArrayType(RowType.from(ImmutableList.<RowType.Field>builder().add(RowType.field("sf1", VARCHAR)).add(RowType.field("sf2", BIGINT)).build())))).add(RowType.field("f11", MAP_OF_RECORD)).add(RowType.field("f12", RowType.from(ImmutableList.<RowType.Field>builder().add(RowType.field("sf1", INTEGER)).add(RowType.field("sf2", VARCHAR)).build()))).build());
GenericRecord data = new GenericRecordBuilder(schema).build();
DecoderTestColumnHandle row = new DecoderTestColumnHandle(0, "record_field", rowType, "record_field", null, null, false, false, false);
Map<DecoderColumnHandle, FieldValueProvider> decodedRow = buildAndDecodeColumn(row, "record_field", schema.toString(), data);
checkRowValue(decodedRow, row, data);
// Check nested fields with null values
GenericData.Array<GenericRecord> array = new GenericData.Array<GenericRecord>(schema.getField("f10").schema().getTypes().get(1), Arrays.asList(new GenericRecordBuilder(schema.getField("f10").schema().getTypes().get(1).getElementType().getTypes().get(1)).build(), null));
data = new GenericRecordBuilder(schema).set("f10", array).set("f11", ImmutableMap.builder().put("key1", new GenericRecordBuilder(schema.getField("f11").schema().getTypes().get(1).getValueType().getTypes().get(1)).build()).buildOrThrow()).set("f12", new GenericRecordBuilder(schema.getField("f12").schema().getTypes().get(1)).build()).build();
decodedRow = buildAndDecodeColumn(row, "record_field", schema.toString(), data);
checkRowValue(decodedRow, row, data);
}
use of org.apache.avro.generic.GenericRecordBuilder in project java-bigquerystorage by googleapis.
the class ITBigQueryStorageTest method ReadAllRows.
/**
* Reads all the rows from the specified table reference and returns a list as generic Avro
* records.
*
* @param tableReference
* @param filter Optional. If specified, it will be used to restrict returned data.
* @return
*/
List<GenericData.Record> ReadAllRows(TableReference tableReference, String filter) throws IOException {
final List<GenericData.Record> rows = new ArrayList<>();
ProcessRowsAtSnapshot(/* tableReference = */
tableReference, /* snapshotInMillis = */
null, /* filter = */
filter, new AvroRowConsumer() {
@Override
public void accept(GenericData.Record record) {
// clone the record since that reference will be reused by the reader.
rows.add(new GenericRecordBuilder(record).build());
}
});
return rows;
}
Aggregations