Search in sources :

Example 11 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project hive by apache.

the class TestAvroSerializer method canSerializeArraysWithNullablePrimitiveElements.

@Test
public void canSerializeArraysWithNullablePrimitiveElements() throws SerDeException, IOException {
    final String field = "{ \"name\":\"listWithNulls\", \"type\": " + "{\"type\":\"array\", \"items\": [\"null\", \"int\"]} }";
    List<Integer> intList = new ArrayList<Integer>();
    Collections.addAll(intList, 1, 2, null, 3);
    GenericRecord r = serializeAndDeserialize(field, "listWithNulls", intList);
    Object result = r.get("listWithNulls");
    assertNotSame(intList, result);
    assertEquals(intList, result);
}
Also used : ArrayList(java.util.ArrayList) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 12 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project hive by apache.

the class TestAvroSerializer method canSerializeMaps.

@Test
public void canSerializeMaps() throws SerDeException, IOException {
    Map<String, Boolean> m = new HashMap<String, Boolean>();
    m.put("yes", true);
    m.put("no", false);
    String field = "{ \"name\":\"map1\", \"type\":{\"type\":\"map\", \"values\":\"boolean\"} }";
    GenericRecord r = serializeAndDeserialize(field, "map1", m);
    assertEquals(m, r.get("map1"));
}
Also used : HashMap(java.util.HashMap) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 13 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project hive by apache.

the class TestAvroSerializer method canSerializeMapsWithNullablePrimitiveValues.

@Test
public void canSerializeMapsWithNullablePrimitiveValues() throws SerDeException, IOException {
    String field = "{ \"name\":\"mapWithNulls\", \"type\": " + "{\"type\":\"map\", \"values\": [\"null\", \"boolean\"]} }";
    Map<String, Boolean> m = new HashMap<String, Boolean>();
    m.put("yes", true);
    m.put("no", false);
    m.put("maybe", null);
    GenericRecord r = serializeAndDeserialize(field, "mapWithNulls", m);
    Object result = r.get("mapWithNulls");
    assertEquals(m, result);
}
Also used : HashMap(java.util.HashMap) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 14 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project hive by apache.

the class TestHBaseSerDe method getTestAvroBytesFromSchema.

private byte[] getTestAvroBytesFromSchema(String schemaToUse) throws IOException {
    Schema s = Schema.parse(schemaToUse);
    GenericData.Record record = new GenericData.Record(s);
    GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema());
    innerRecord.put("int1", 42);
    innerRecord.put("boolean1", true);
    innerRecord.put("long1", 42432234234l);
    if (schemaToUse.equals(RECORD_SCHEMA_EVOLVED)) {
        innerRecord.put("string1", "new value");
    }
    record.put("aRecord", innerRecord);
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(s);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
    dataFileWriter.create(s, out);
    dataFileWriter.append(record);
    dataFileWriter.close();
    byte[] data = out.toByteArray();
    out.close();
    return data;
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericRecord(org.apache.avro.generic.GenericRecord) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericRecord(org.apache.avro.generic.GenericRecord) GenericData(org.apache.avro.generic.GenericData)

Example 15 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project hive by apache.

the class AvroContainerOutputFormat method getHiveRecordWriter.

@Override
public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException {
    Schema schema;
    try {
        schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties);
    } catch (AvroSerdeException e) {
        throw new IOException(e);
    }
    GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
    if (isCompressed) {
        int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
        dfw.setCodec(factory);
    }
    dfw.create(schema, path.getFileSystem(jobConf).create(path));
    return new AvroGenericRecordWriter(dfw);
}
Also used : AvroSerdeException(org.apache.hadoop.hive.serde2.avro.AvroSerdeException) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) IOException(java.io.IOException) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) CodecFactory(org.apache.avro.file.CodecFactory) GenericRecord(org.apache.avro.generic.GenericRecord)

Aggregations

GenericRecord (org.apache.avro.generic.GenericRecord)262 Schema (org.apache.avro.Schema)101 Test (org.junit.Test)80 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)46 File (java.io.File)35 IOException (java.io.IOException)34 GenericData (org.apache.avro.generic.GenericData)30 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)30 ArrayList (java.util.ArrayList)29 ByteArrayOutputStream (java.io.ByteArrayOutputStream)27 DataFileWriter (org.apache.avro.file.DataFileWriter)20 HashMap (java.util.HashMap)19 ByteBuffer (java.nio.ByteBuffer)18 BinaryEncoder (org.apache.avro.io.BinaryEncoder)17 Field (org.apache.avro.Schema.Field)14 DataFileStream (org.apache.avro.file.DataFileStream)14 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)14 Utf8 (org.apache.avro.util.Utf8)14 Encoder (org.apache.avro.io.Encoder)12 DatasetRepository (com.cloudera.cdk.data.DatasetRepository)11