Search in sources :

Example 51 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project gobblin by apache.

the class TimeBasedAvroWriterPartitionerTest method testWriter.

/**
 * Test
 *  1. Record timestamp of type long
 *  2. Partition path of a given record
 */
@Test
public void testWriter() throws IOException {
    Schema schema = getRecordSchema("long");
    State state = getBasicState();
    // Write three records, each should be written to a different file
    GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(schema);
    DataWriter<GenericRecord> millisPartitionWriter = getWriter(schema, state);
    // This timestamp corresponds to 2015/01/01
    genericRecordBuilder.set("timestamp", 1420099200000l);
    millisPartitionWriter.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build()));
    // This timestamp corresponds to 2015/01/02
    genericRecordBuilder.set("timestamp", 1420185600000l);
    millisPartitionWriter.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build()));
    millisPartitionWriter.close();
    millisPartitionWriter.commit();
    // Check that the writer reports that 2 records have been written
    Assert.assertEquals(millisPartitionWriter.recordsWritten(), 2);
    state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_TIMEUNIT, "seconds");
    DataWriter<GenericRecord> secsPartitionWriter = getWriter(schema, state);
    // This timestamp corresponds to 2015/01/03
    genericRecordBuilder.set("timestamp", 1420272000L);
    secsPartitionWriter.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build()));
    secsPartitionWriter.close();
    secsPartitionWriter.commit();
    // Check that the writer reports that 1 record has been written
    Assert.assertEquals(secsPartitionWriter.recordsWritten(), 1);
    // Check that 3 files were created
    Assert.assertEquals(FileUtils.listFiles(new File(TEST_ROOT_DIR), new String[] { "avro" }, true).size(), 3);
    // Check if each file exists, and in the correct location
    File baseOutputDir = new File(OUTPUT_DIR, BASE_FILE_PATH);
    Assert.assertTrue(baseOutputDir.exists());
    File outputDir20150101 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "01" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150101.exists());
    File outputDir20150102 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "02" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150102.exists());
    File outputDir20150103 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "03" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150103.exists());
}
Also used : State(org.apache.gobblin.configuration.State) Schema(org.apache.avro.Schema) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) Test(org.testng.annotations.Test)

Example 52 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project daikon by Talend.

the class TestMessageHeaderExtractor method testStringAsFirstField.

@Test
public void testStringAsFirstField() throws Exception {
    Schema messageSchema = SchemaBuilder.record("message").fields().name("fakeHeader").type().stringType().noDefault().endRecord();
    IndexedRecord message = new GenericRecordBuilder(messageSchema).set("fakeHeader", "hello").build();
    expectedException.expect(IllegalArgumentException.class);
    expectedException.expectMessage("Provided message's first field is not a record but STRING");
    MessageHeaderExtractor extractor = new MessageHeaderExtractor();
    extractor.extractHeader(message);
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) Schema(org.apache.avro.Schema) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) MessageHeaderExtractor(org.talend.daikon.messages.header.consumer.MessageHeaderExtractor) Test(org.junit.Test)

Example 53 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project daikon by Talend.

the class JsonGenericRecordConverter method getOutputRecord.

/**
 * Generate Avro Generic Record from Json Node.
 *
 * Iterate Json Node fields and construct the Avro Generic Record.
 *
 * @param jsonNode to convert to Avro Generic Record
 * @param schema of jsonNode
 * @return Avro Generic Record
 */
private GenericRecord getOutputRecord(final JsonNode jsonNode, Schema schema) {
    GenericRecordBuilder outputRecord = new GenericRecordBuilder(schema);
    final Iterator<Map.Entry<String, JsonNode>> elements = jsonNode.fields();
    Map.Entry<String, JsonNode> mapEntry;
    while (elements.hasNext()) {
        mapEntry = elements.next();
        final JsonNode nextNode = mapEntry.getValue();
        if (!(nextNode instanceof NullNode)) {
            if (nextNode instanceof ValueNode) {
                outputRecord.set(mapEntry.getKey(), getValue(nextNode));
            } else if (nextNode instanceof ObjectNode) {
                Schema schemaTo = jsonSchemaInferrer.inferSchema(nextNode.toString());
                GenericRecord record = getOutputRecord(nextNode, schemaTo);
                outputRecord.set(mapEntry.getKey(), record);
            } else if (nextNode instanceof ArrayNode) {
                List<Object> listRecords = new ArrayList<Object>();
                Iterator<JsonNode> elementsIterator = ((ArrayNode) nextNode).elements();
                while (elementsIterator.hasNext()) {
                    JsonNode nodeTo = elementsIterator.next();
                    if (nodeTo instanceof ValueNode) {
                        listRecords.add(getValue(nodeTo));
                    } else {
                        Schema schemaTo = jsonSchemaInferrer.inferSchema(nodeTo.toString());
                        listRecords.add(getOutputRecord(nodeTo, schemaTo));
                    }
                }
                outputRecord.set(mapEntry.getKey(), listRecords);
            }
        } else {
            outputRecord.set(mapEntry.getKey(), null);
        }
    }
    return outputRecord.build();
}
Also used : ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) JsonNode(com.fasterxml.jackson.databind.JsonNode) ValueNode(com.fasterxml.jackson.databind.node.ValueNode) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) GenericRecord(org.apache.avro.generic.GenericRecord) Map(java.util.Map) NullNode(com.fasterxml.jackson.databind.node.NullNode)

Example 54 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project trino by trinodb.

the class TestAvroDecoder method testRowWithNulls.

@Test
public void testRowWithNulls() {
    Schema schema = SchemaBuilder.record("record_field").fields().name("f1").type().optional().floatType().name("f2").type().optional().doubleType().name("f3").type().optional().intType().name("f4").type().optional().longType().name("f5").type().optional().stringType().name("f6").type().optional().enumeration("color").symbols("red", "blue", "green").name("f7").type().optional().fixed("fixed5").size(5).name("f8").type().optional().bytesType().name("f9").type().optional().booleanType().name("f10").type().optional().array().items().unionOf().nullType().and().record("sub_array_field").fields().name("sf1").type().optional().stringType().name("sf2").type().optional().longType().endRecord().endUnion().name("f11").type().optional().map().values().unionOf().nullType().and().record("sub_map_field").fields().name("sf1").type().optional().doubleType().name("sf2").type().optional().booleanType().endRecord().endUnion().name("f12").type().optional().record("sub_row_field").fields().name("sf1").type().optional().intType().name("sf2").type().optional().enumeration("state").symbols("initialized", "running", "finished", "failed").endRecord().endRecord();
    RowType rowType = RowType.from(ImmutableList.<RowType.Field>builder().add(RowType.field("f1", REAL)).add(RowType.field("f2", DOUBLE)).add(RowType.field("f3", INTEGER)).add(RowType.field("f4", BIGINT)).add(RowType.field("f5", VARCHAR)).add(RowType.field("f6", VARCHAR)).add(RowType.field("f7", VARBINARY)).add(RowType.field("f8", VARBINARY)).add(RowType.field("f9", BOOLEAN)).add(RowType.field("f10", new ArrayType(RowType.from(ImmutableList.<RowType.Field>builder().add(RowType.field("sf1", VARCHAR)).add(RowType.field("sf2", BIGINT)).build())))).add(RowType.field("f11", MAP_OF_RECORD)).add(RowType.field("f12", RowType.from(ImmutableList.<RowType.Field>builder().add(RowType.field("sf1", INTEGER)).add(RowType.field("sf2", VARCHAR)).build()))).build());
    GenericRecord data = new GenericRecordBuilder(schema).build();
    DecoderTestColumnHandle row = new DecoderTestColumnHandle(0, "record_field", rowType, "record_field", null, null, false, false, false);
    Map<DecoderColumnHandle, FieldValueProvider> decodedRow = buildAndDecodeColumn(row, "record_field", schema.toString(), data);
    checkRowValue(decodedRow, row, data);
    // Check nested fields with null values
    GenericData.Array<GenericRecord> array = new GenericData.Array<GenericRecord>(schema.getField("f10").schema().getTypes().get(1), Arrays.asList(new GenericRecordBuilder(schema.getField("f10").schema().getTypes().get(1).getElementType().getTypes().get(1)).build(), null));
    data = new GenericRecordBuilder(schema).set("f10", array).set("f11", ImmutableMap.builder().put("key1", new GenericRecordBuilder(schema.getField("f11").schema().getTypes().get(1).getValueType().getTypes().get(1)).build()).buildOrThrow()).set("f12", new GenericRecordBuilder(schema.getField("f12").schema().getTypes().get(1)).build()).build();
    decodedRow = buildAndDecodeColumn(row, "record_field", schema.toString(), data);
    checkRowValue(decodedRow, row, data);
}
Also used : DecoderTestColumnHandle(io.trino.decoder.DecoderTestColumnHandle) FieldValueProvider(io.trino.decoder.FieldValueProvider) Schema(org.apache.avro.Schema) RowType(io.trino.spi.type.RowType) DecoderColumnHandle(io.trino.decoder.DecoderColumnHandle) GenericData(org.apache.avro.generic.GenericData) ArrayType(io.trino.spi.type.ArrayType) GenericArray(org.apache.avro.generic.GenericArray) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.testng.annotations.Test)

Example 55 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project java-bigquerystorage by googleapis.

the class ITBigQueryStorageTest method ReadAllRows.

/**
 * Reads all the rows from the specified table reference and returns a list as generic Avro
 * records.
 *
 * @param tableReference
 * @param filter Optional. If specified, it will be used to restrict returned data.
 * @return
 */
List<GenericData.Record> ReadAllRows(TableReference tableReference, String filter) throws IOException {
    final List<GenericData.Record> rows = new ArrayList<>();
    ProcessRowsAtSnapshot(/* tableReference = */
    tableReference, /* snapshotInMillis = */
    null, /* filter = */
    filter, new AvroRowConsumer() {

        @Override
        public void accept(GenericData.Record record) {
            // clone the record since that reference will be reused by the reader.
            rows.add(new GenericRecordBuilder(record).build());
        }
    });
    return rows;
}
Also used : AvroRowConsumer(com.google.cloud.bigquery.storage.v1beta1.it.SimpleRowReader.AvroRowConsumer) ArrayList(java.util.ArrayList) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericData(org.apache.avro.generic.GenericData)

Aggregations

GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)399 GenericRecord (org.apache.avro.generic.GenericRecord)263 Test (org.junit.Test)263 Schema (org.apache.avro.Schema)216 GenericData (org.apache.avro.generic.GenericData)69 ArrayList (java.util.ArrayList)45 EnumTest (foo.bar.EnumTest)41 File (java.io.File)41 IndexedRecord (org.apache.avro.generic.IndexedRecord)39 Schema (org.apache.kafka.connect.data.Schema)39 SchemaAndValue (org.apache.kafka.connect.data.SchemaAndValue)35 Path (org.apache.hadoop.fs.Path)33 List (java.util.List)30 ByteBuffer (java.nio.ByteBuffer)29 HashMap (java.util.HashMap)29 AvroSchema (io.confluent.kafka.schemaregistry.avro.AvroSchema)28 Struct (org.apache.kafka.connect.data.Struct)28 ByteArrayOutputStream (java.io.ByteArrayOutputStream)27 Record (org.apache.avro.generic.GenericData.Record)25 SchemaBuilder (org.apache.avro.SchemaBuilder)22