Search in sources :

Example 6 with Record

use of org.apache.avro.generic.GenericData.Record in project pinot by linkedin.

the class PinotSegmentToAvroConverter method convert.

@Override
public void convert() throws Exception {
    PinotSegmentRecordReader recordReader = new PinotSegmentRecordReader(new File(_segmentDir));
    try {
        recordReader.init();
        Schema avroSchema = buildAvroSchemaFromPinotSchema(recordReader.getSchema());
        try (DataFileWriter<Record> recordWriter = new DataFileWriter<>(new GenericDatumWriter<Record>(avroSchema))) {
            recordWriter.create(avroSchema, new File(_outputFile));
            while (recordReader.hasNext()) {
                GenericRow row = recordReader.next();
                Record record = new Record(avroSchema);
                for (String field : row.getFieldNames()) {
                    Object value = row.getValue(field);
                    if (value instanceof Object[]) {
                        record.put(field, Arrays.asList((Object[]) value));
                    } else {
                        record.put(field, value);
                    }
                }
                recordWriter.append(record);
            }
        }
    } finally {
        recordReader.close();
    }
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) Record(org.apache.avro.generic.GenericData.Record) File(java.io.File) PinotSegmentRecordReader(com.linkedin.pinot.core.data.readers.PinotSegmentRecordReader)

Example 7 with Record

use of org.apache.avro.generic.GenericData.Record in project pinot by linkedin.

the class AvroRecordReader method transformAvroArrayToObjectArray.

public static Object[] transformAvroArrayToObjectArray(Array arr, FieldSpec spec) {
    if (arr == null) {
        return new Object[] { getDefaultNullValue(spec) };
    }
    if (arr.size() == 0) {
        return new Object[] { getDefaultNullValue(spec) };
    }
    final Object[] ret = new Object[arr.size()];
    final Iterator iterator = arr.iterator();
    int i = 0;
    while (iterator.hasNext()) {
        Object value = iterator.next();
        if (value instanceof Record) {
            value = ((Record) value).get(0);
        }
        if (value instanceof Utf8) {
            value = ((Utf8) value).toString();
        }
        if (value == null) {
            value = getDefaultNullValue(spec);
        }
        ret[i++] = value;
    }
    return ret;
}
Also used : Iterator(java.util.Iterator) Utf8(org.apache.avro.util.Utf8) Record(org.apache.avro.generic.GenericData.Record) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 8 with Record

use of org.apache.avro.generic.GenericData.Record in project pinot by linkedin.

the class SegmentTestUtils method transformAvroArrayToObjectArray.

private static Object[] transformAvroArrayToObjectArray(Array arr) {
    if (arr == null) {
        return new Object[0];
    }
    final Object[] ret = new Object[arr.size()];
    final Iterator iterator = arr.iterator();
    int i = 0;
    while (iterator.hasNext()) {
        Object value = iterator.next();
        if (value instanceof Record) {
            value = ((Record) value).get(0);
        }
        if (value instanceof Utf8) {
            value = ((Utf8) value).toString();
        }
        ret[i++] = value;
    }
    return ret;
}
Also used : Iterator(java.util.Iterator) Utf8(org.apache.avro.util.Utf8) Record(org.apache.avro.generic.GenericData.Record) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 9 with Record

use of org.apache.avro.generic.GenericData.Record in project pinot by linkedin.

the class KafkaAvroMessageDecoder method decode.

@Override
public GenericRow decode(byte[] payload, int offset, int length, GenericRow destination) {
    if (payload == null || payload.length == 0 || length == 0) {
        return null;
    }
    byte[] md5 = Arrays.copyOfRange(payload, SCHEMA_HASH_START_OFFSET + offset, SCHEMA_HASH_END_OFFSET + offset);
    String md5String = hex(md5);
    org.apache.avro.Schema schema = null;
    boolean schemaUpdateFailed = false;
    if (md5ToAvroSchemaMap.containsKey(md5String)) {
        schema = md5ToAvroSchemaMap.get(md5String);
    } else {
        final String schemaUri = schemaRegistryBaseUrl + "/id=" + md5String;
        try {
            schema = fetchSchema(new URL(schemaUri));
            md5ToAvroSchemaMap.put(md5String, schema);
        } catch (Exception e) {
            schema = defaultAvroSchema;
            LOGGER.error("Error fetching schema using url {}. Attempting to continue with previous schema", schemaUri, e);
            schemaUpdateFailed = true;
        }
    }
    DatumReader<Record> reader = new GenericDatumReader<Record>(schema);
    try {
        GenericData.Record avroRecord = reader.read(null, decoderFactory.createBinaryDecoder(payload, HEADER_LENGTH + offset, length - HEADER_LENGTH, null));
        return avroRecordConvetrer.transform(avroRecord, schema, destination);
    } catch (IOException e) {
        LOGGER.error("Caught exception while reading message using schema {}{}", (schema == null ? "null" : schema.getName()), (schemaUpdateFailed ? "(possibly due to schema update failure)" : ""), e);
        return null;
    }
}
Also used : Record(org.apache.avro.generic.GenericData.Record) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) IOException(java.io.IOException) GenericData(org.apache.avro.generic.GenericData) URL(java.net.URL) IOException(java.io.IOException) Record(org.apache.avro.generic.GenericData.Record)

Example 10 with Record

use of org.apache.avro.generic.GenericData.Record in project crunch by cloudera.

the class AvroTypeTest method testGetDetachedValue_GenericAvroType.

@Test
public void testGetDetachedValue_GenericAvroType() {
    AvroType<Record> genericType = Avros.generics(Person.SCHEMA$);
    GenericData.Record record = new GenericData.Record(Person.SCHEMA$);
    record.put("name", "name value");
    record.put("age", 42);
    record.put("siblingnames", Lists.newArrayList());
    Record detachedRecord = genericType.getDetachedValue(record);
    assertEquals(record, detachedRecord);
    assertNotSame(record, detachedRecord);
}
Also used : Record(org.apache.avro.generic.GenericData.Record) Record(org.apache.avro.generic.GenericData.Record) GenericData(org.apache.avro.generic.GenericData) Test(org.junit.Test)

Aggregations

Record (org.apache.avro.generic.GenericData.Record)13 Utf8 (org.apache.avro.util.Utf8)6 GenericData (org.apache.avro.generic.GenericData)5 Test (org.junit.Test)5 GenericRecord (org.apache.avro.generic.GenericRecord)4 Iterator (java.util.Iterator)3 Schema (org.apache.avro.Schema)3 GenericRow (com.linkedin.pinot.core.data.GenericRow)1 PinotSegmentRecordReader (com.linkedin.pinot.core.data.readers.PinotSegmentRecordReader)1 File (java.io.File)1 IOException (java.io.IOException)1 URL (java.net.URL)1 HashMap (java.util.HashMap)1 DataFileWriter (org.apache.avro.file.DataFileWriter)1 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)1 Pipeline (org.apache.crunch.Pipeline)1 MRPipeline (org.apache.crunch.impl.mr.MRPipeline)1 Path (org.apache.hadoop.fs.Path)1