Search in sources :

Example 26 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project crunch by cloudera.

the class AvroFileReaderFactoryTest method populateGenericFile.

private void populateGenericFile(List<GenericRecord> genericRecords, Schema outputSchema) throws IOException {
    FileOutputStream outputStream = new FileOutputStream(this.avroFile);
    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(outputSchema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(genericDatumWriter);
    dataFileWriter.create(outputSchema, outputStream);
    for (GenericRecord record : genericRecords) {
        dataFileWriter.append(record);
    }
    dataFileWriter.close();
    outputStream.close();
}
Also used : FileOutputStream(java.io.FileOutputStream) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 27 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project trevni by cutting.

the class RandomData method main.

public static void main(String[] args) throws Exception {
    if (args.length != 3) {
        System.out.println("Usage: RandomData <schemafile> <outputfile> <count>");
        System.exit(-1);
    }
    Schema sch = Schema.parse(new File(args[0]));
    DataFileWriter<Object> writer = new DataFileWriter<Object>(new GenericDatumWriter<Object>()).create(sch, new File(args[1]));
    try {
        for (Object datum : new RandomData(sch, Integer.parseInt(args[2]))) {
            writer.append(datum);
        }
    } finally {
        writer.close();
    }
}
Also used : Schema(org.apache.avro.Schema) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) File(java.io.File)

Example 28 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project druid by druid-io.

the class AvroStreamInputRowParserTest method testParse.

@Test
public void testParse() throws SchemaValidationException, IOException {
    // serde test
    Repository repository = new InMemoryRepository(null);
    AvroStreamInputRowParser parser = new AvroStreamInputRowParser(PARSE_SPEC, new SchemaRepoBasedAvroBytesDecoder<String, Integer>(new Avro1124SubjectAndIdConverter(TOPIC), repository));
    ByteBufferInputRowParser parser2 = jsonMapper.readValue(jsonMapper.writeValueAsString(parser), ByteBufferInputRowParser.class);
    repository = ((SchemaRepoBasedAvroBytesDecoder) ((AvroStreamInputRowParser) parser2).getAvroBytesDecoder()).getSchemaRepository();
    // prepare data
    GenericRecord someAvroDatum = buildSomeAvroDatum();
    // encode schema id
    Avro1124SubjectAndIdConverter converter = new Avro1124SubjectAndIdConverter(TOPIC);
    TypedSchemaRepository<Integer, Schema, String> repositoryClient = new TypedSchemaRepository<Integer, Schema, String>(repository, new IntegerConverter(), new AvroSchemaConverter(), new IdentityConverter());
    Integer id = repositoryClient.registerSchema(TOPIC, SomeAvroDatum.getClassSchema());
    ByteBuffer byteBuffer = ByteBuffer.allocate(4);
    converter.putSubjectAndId(TOPIC, id, byteBuffer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    out.write(byteBuffer.array());
    // encode data
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(someAvroDatum.getSchema());
    // write avro datum to bytes
    writer.write(someAvroDatum, EncoderFactory.get().directBinaryEncoder(out, null));
    InputRow inputRow = parser2.parse(ByteBuffer.wrap(out.toByteArray()));
    assertInputRowCorrect(inputRow);
}
Also used : Avro1124SubjectAndIdConverter(io.druid.data.input.schemarepo.Avro1124SubjectAndIdConverter) AvroSchemaConverter(org.schemarepo.api.converter.AvroSchemaConverter) InMemoryRepository(org.schemarepo.InMemoryRepository) TypedSchemaRepository(org.schemarepo.api.TypedSchemaRepository) Schema(org.apache.avro.Schema) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteBuffer(java.nio.ByteBuffer) IntegerConverter(org.schemarepo.api.converter.IntegerConverter) Repository(org.schemarepo.Repository) InMemoryRepository(org.schemarepo.InMemoryRepository) TypedSchemaRepository(org.schemarepo.api.TypedSchemaRepository) IdentityConverter(org.schemarepo.api.converter.IdentityConverter) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 29 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project druid by druid-io.

the class InlineSchemasAvroBytesDecoderTest method testParse.

@Test
public void testParse() throws Exception {
    GenericRecord someAvroDatum = AvroStreamInputRowParserTest.buildSomeAvroDatum();
    Schema schema = SomeAvroDatum.getClassSchema();
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    out.write(new byte[] { 1 });
    out.write(ByteBuffer.allocate(4).putInt(10).array());
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
    writer.write(someAvroDatum, EncoderFactory.get().directBinaryEncoder(out, null));
    GenericRecord actual = new InlineSchemasAvroBytesDecoder(ImmutableMap.of(10, schema)).parse(ByteBuffer.wrap(out.toByteArray()));
    Assert.assertEquals(someAvroDatum.get("id"), actual.get("id"));
}
Also used : Schema(org.apache.avro.Schema) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test) AvroStreamInputRowParserTest(io.druid.data.input.AvroStreamInputRowParserTest)

Example 30 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project h2o-3 by h2oai.

the class AvroFileGenerator method generateUnionTypes.

public static File generateUnionTypes(String filename, int nrows) throws IOException {
    File parentDir = Files.createTempDir();
    File f = new File(parentDir, filename);
    DatumWriter<GenericRecord> w = new GenericDatumWriter<GenericRecord>();
    DataFileWriter<GenericRecord> dw = new DataFileWriter<GenericRecord>(w);
    // Based on SchemaBuilder javadoc:
    // * The below two field declarations are equivalent:
    // * <pre>
    // *  .name("f").type().unionOf().nullType().and().longType().endUnion().nullDefault()
    // *  .name("f").type().optional().longType()
    // * </pre>
    Schema schema = SchemaBuilder.builder().record("test_union_types").fields().name("CUString").type().optional().stringType().name("CUBytes").type().optional().bytesType().name("CUInt").type().optional().intType().name("CULong").type().optional().longType().name("CUFloat").type().optional().floatType().name("CUDouble").type().optional().doubleType().name("CUBoolean").type().optional().booleanType().endRecord();
    try {
        dw.create(schema, f);
        for (int i = 0; i < nrows; i++) {
            GenericRecord gr = new GenericData.Record(schema);
            gr.put("CUString", i == 0 ? null : String.valueOf(i));
            gr.put("CUBytes", i == 0 ? null : ByteBuffer.wrap(StringUtils.toBytes(i)));
            gr.put("CUInt", i == 0 ? null : i);
            gr.put("CULong", i == 0 ? null : Long.valueOf(i));
            gr.put("CUFloat", i == 0 ? null : Float.valueOf(i));
            gr.put("CUDouble", i == 0 ? null : Double.valueOf(i));
            gr.put("CUBoolean", i == 0 ? null : (i & 1) == 1);
            dw.append(gr);
        }
        return f;
    } finally {
        dw.close();
        ;
    }
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Aggregations

GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)49 GenericRecord (org.apache.avro.generic.GenericRecord)46 ByteArrayOutputStream (java.io.ByteArrayOutputStream)24 Schema (org.apache.avro.Schema)23 DataFileWriter (org.apache.avro.file.DataFileWriter)17 BinaryEncoder (org.apache.avro.io.BinaryEncoder)17 IOException (java.io.IOException)13 Encoder (org.apache.avro.io.Encoder)12 File (java.io.File)9 Test (org.junit.Test)6 FileOutputStream (java.io.FileOutputStream)4 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)4 ArrayList (java.util.ArrayList)3 Properties (java.util.Properties)3 Producer (kafka.javaapi.producer.Producer)3 ProducerConfig (kafka.producer.ProducerConfig)3 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)3 JsonEncoder (org.apache.avro.io.JsonEncoder)3 AvroAdapter (com.linkedin.data.avro.AvroAdapter)2 DbusEventInfo (com.linkedin.databus.core.DbusEventInfo)2