Search in sources :

Example 31 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project haivvreo by jghoman.

the class AvroContainerOutputFormat method getHiveRecordWriter.

@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException {
    Schema schema;
    try {
        schema = HaivvreoUtils.determineSchemaOrThrowException(jobConf, properties);
    } catch (HaivvreoException e) {
        throw new IOException(e);
    }
    GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
    if (isCompressed) {
        int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
        dfw.setCodec(factory);
    }
    dfw.create(schema, path.getFileSystem(jobConf).create(path));
    return new AvroGenericRecordWriter(dfw);
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) IOException(java.io.IOException) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) CodecFactory(org.apache.avro.file.CodecFactory) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 32 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project haivvreo by jghoman.

the class TestAvroSerializer method canSerializeUnions.

@Test
public void canSerializeUnions() throws SerDeException, IOException {
    String field = "{ \"name\":\"union1\", \"type\":[\"float\", \"boolean\", \"string\"] }";
    GenericRecord r = serializeAndDeserialize(field, "union1", 424.4f);
    assertEquals(424.4f, r.get("union1"));
    r = serializeAndDeserialize(field, "union1", true);
    assertEquals(true, r.get("union1"));
    r = serializeAndDeserialize(field, "union1", "hello");
    assertEquals("hello", r.get("union1"));
}
Also used : GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 33 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project haivvreo by jghoman.

the class TestAvroSerializer method canSerializeBytes.

@Test
public void canSerializeBytes() throws SerDeException, IOException {
    String field = "{ \"name\":\"bytes1\", \"type\":\"bytes\" }";
    ByteBuffer bb = ByteBuffer.wrap("easy as one two three".getBytes());
    bb.rewind();
    GenericRecord r = serializeAndDeserialize(field, "bytes1", bb);
    assertEquals(bb, r.get("bytes1"));
}
Also used : GenericRecord(org.apache.avro.generic.GenericRecord) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 34 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project haivvreo by jghoman.

the class TestAvroSerializer method canSerializeNullableTypes.

@Test
public void canSerializeNullableTypes() throws SerDeException, IOException {
    String field = "{ \"name\":\"nullableint\", \"type\":[\"int\", \"null\"] }";
    GenericRecord r = serializeAndDeserialize(field, "nullableint", 42);
    assertEquals(42, r.get("nullableint"));
    r = serializeAndDeserialize(field, "nullableint", null);
    assertNull(r.get("nullableint"));
}
Also used : GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 35 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project haivvreo by jghoman.

the class TestAvroSerializer method canSerializeLists.

@Test
public void canSerializeLists() throws SerDeException, IOException {
    List<Integer> intList = new ArrayList<Integer>();
    Collections.addAll(intList, 1, 2, 3);
    String field = "{ \"name\":\"list1\", \"type\":{\"type\":\"array\", \"items\":\"int\"} }";
    GenericRecord r = serializeAndDeserialize(field, "list1", intList);
    assertEquals(intList, r.get("list1"));
}
Also used : GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Aggregations

GenericRecord (org.apache.avro.generic.GenericRecord)262 Schema (org.apache.avro.Schema)101 Test (org.junit.Test)80 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)46 File (java.io.File)35 IOException (java.io.IOException)34 GenericData (org.apache.avro.generic.GenericData)30 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)30 ArrayList (java.util.ArrayList)29 ByteArrayOutputStream (java.io.ByteArrayOutputStream)27 DataFileWriter (org.apache.avro.file.DataFileWriter)20 HashMap (java.util.HashMap)19 ByteBuffer (java.nio.ByteBuffer)18 BinaryEncoder (org.apache.avro.io.BinaryEncoder)17 Field (org.apache.avro.Schema.Field)14 DataFileStream (org.apache.avro.file.DataFileStream)14 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)14 Utf8 (org.apache.avro.util.Utf8)14 Encoder (org.apache.avro.io.Encoder)12 DatasetRepository (com.cloudera.cdk.data.DatasetRepository)11