Search in sources :

Example 66 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.

the class UploadRefreshDeleteIntegrationTest method generateAndUploadRandomSegment.

protected void generateAndUploadRandomSegment(String segmentName, int rowCount) throws Exception {
    ThreadLocalRandom random = ThreadLocalRandom.current();
    Schema schema = new Schema.Parser().parse(new File(TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource("dummy.avsc"))));
    GenericRecord record = new GenericData.Record(schema);
    GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<GenericRecord>(datumWriter);
    File avroFile = new File(_tmpDir, segmentName + ".avro");
    fileWriter.create(schema, avroFile);
    for (int i = 0; i < rowCount; i++) {
        record.put(0, random.nextInt());
        fileWriter.append(record);
    }
    fileWriter.close();
    int segmentIndex = Integer.parseInt(segmentName.split("_")[1]);
    File segmentTarDir = new File(_tarsDir, segmentName);
    ensureDirectoryExistsAndIsEmpty(segmentTarDir);
    ExecutorService executor = MoreExecutors.sameThreadExecutor();
    buildSegmentsFromAvro(Collections.singletonList(avroFile), executor, segmentIndex, new File(_segmentsDir, segmentName), segmentTarDir, this.tableName, false, null);
    executor.shutdown();
    executor.awaitTermination(1L, TimeUnit.MINUTES);
    for (String segmentFileName : segmentTarDir.list()) {
        File file = new File(segmentTarDir, segmentFileName);
        FileUploadUtils.sendSegmentFile("localhost", "8998", segmentFileName, file, file.length());
    }
    avroFile.delete();
    FileUtils.deleteQuietly(segmentTarDir);
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ExecutorService(java.util.concurrent.ExecutorService) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 67 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.

the class QueryGenerator method addAvroData.

/**
   * Helper method to read in an Avro file and add data to the storage.
   *
   * @param avroFile Avro file.
   */
private void addAvroData(File avroFile) {
    // Read in records and update the values stored.
    GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (DataFileReader<GenericRecord> fileReader = new DataFileReader<>(avroFile, datumReader)) {
        for (GenericRecord genericRecord : fileReader) {
            for (String columnName : _columnNames) {
                Set<String> values = _columnToValueSet.get(columnName);
                // Turn the Avro value into a valid SQL String token.
                Object avroValue = genericRecord.get(columnName);
                if (avroValue != null) {
                    Integer storedMaxNumElements = _multiValueColumnMaxNumElements.get(columnName);
                    if (storedMaxNumElements != null) {
                        // Multi-value column
                        GenericData.Array array = (GenericData.Array) avroValue;
                        int numElements = array.size();
                        if (storedMaxNumElements < numElements) {
                            _multiValueColumnMaxNumElements.put(columnName, numElements);
                        }
                        for (Object element : array) {
                            storeAvroValueIntoValueSet(values, element);
                        }
                    } else {
                        // Single-value column
                        storeAvroValueIntoValueSet(values, avroValue);
                    }
                }
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) GenericData(org.apache.avro.generic.GenericData) JSONArray(org.json.JSONArray) DataFileReader(org.apache.avro.file.DataFileReader) JSONObject(org.json.JSONObject) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 68 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project core by s4.

the class AvroSerDeser method deserialize.

@Override
public Object deserialize(byte[] rawMessage) {
    // convert the byte array into an event object
    Map<String, Object> event = null;
    Schema wrapperSchema = avroSchemaManager.getCompiledSchema(MiscConstants.EVENT_WRAPPER_SCHEMA_NAME);
    GenericRecord wrapper = new GenericData.Record(wrapperSchema);
    try {
        wrapper = deserialize(wrapperSchema, rawMessage);
        Utf8 schemaNameUtf8 = (Utf8) wrapper.get("eventType");
        if (schemaNameUtf8 == null) {
            throw new RuntimeException("Wrapper message does not contain eventType field");
        }
        String schemaName = schemaNameUtf8.toString();
        Schema eventSchema = avroSchemaManager.getCompiledSchema(schemaName);
        ByteBuffer byteBuffer = (ByteBuffer) wrapper.get("rawdata");
        if (byteBuffer == null) {
            throw new RuntimeException("Wrapper message does not contain rawdata field");
        }
        byte[] byteData = byteBuffer.array();
        GenericRecord avroEvent = deserialize(eventSchema, byteData);
        // convert the avro version of the event into a Map
        event = new HashMap<String, Object>();
        copyRecord(avroEvent, event);
        if (event.get(EVENT_NAME_KEY) == null) {
            event.put(EVENT_NAME_KEY, schemaName);
        }
        return event;
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}
Also used : Schema(org.apache.avro.Schema) Utf8(org.apache.avro.util.Utf8) GenericRecord(org.apache.avro.generic.GenericRecord) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord) ByteBuffer(java.nio.ByteBuffer)

Example 69 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project core by s4.

the class AvroSerDeser method copyArray.

public static GenericArray<GenericRecord> copyArray(List<Map<String, Object>> list, Schema elementSchema, GenericArray<GenericRecord> avroArray) {
    if (!elementSchema.getType().equals(Schema.Type.RECORD)) {
        // something weird here, we only support array of
        return avroArray;
    // records
    }
    for (Map<String, Object> record : list) {
        GenericRecord avroRecord = new GenericData.Record(elementSchema);
        avroArray.add(avroRecord);
        copyRecord(record, elementSchema, avroRecord);
    }
    return avroArray;
}
Also used : GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 70 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project core by s4.

the class AvroSerDeser method serialize.

public byte[] serialize(Object message) {
    Map<String, Object> event = (Map<String, Object>) message;
    Schema wrapperSchema = avroSchemaManager.getCompiledSchema(MiscConstants.EVENT_WRAPPER_SCHEMA_NAME);
    GenericRecord wrapper = new GenericData.Record(wrapperSchema);
    String eventName = (String) event.get(io.s4.collector.Event.EVENT_NAME_KEY);
    String schemaName = eventName;
    wrapper.put("eventType", new Utf8(schemaName));
    if (event.get("traceId") != null) {
        wrapper.put("traceId", event.get("traceId"));
    } else {
        wrapper.put("traceId", new Long(-1));
    }
    Schema eventSchema = avroSchemaManager.getCompiledSchema(schemaName);
    GenericRecord avroRecord = new GenericData.Record(eventSchema);
    copyRecord(event, eventSchema, avroRecord);
    try {
        byte[] serializedEvent = serialize(eventSchema, avroRecord);
        ByteBuffer byteBuffer = ByteBuffer.allocate(serializedEvent.length);
        byteBuffer.put(serializedEvent);
        byteBuffer.rewind();
        // put the serialized event in the wrapper
        wrapper.put("rawdata", byteBuffer);
        // serialize the wrapper for transmission
        return serialize(wrapperSchema, wrapper);
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}
Also used : Schema(org.apache.avro.Schema) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) Utf8(org.apache.avro.util.Utf8) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

GenericRecord (org.apache.avro.generic.GenericRecord)262 Schema (org.apache.avro.Schema)101 Test (org.junit.Test)80 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)46 File (java.io.File)35 IOException (java.io.IOException)34 GenericData (org.apache.avro.generic.GenericData)30 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)30 ArrayList (java.util.ArrayList)29 ByteArrayOutputStream (java.io.ByteArrayOutputStream)27 DataFileWriter (org.apache.avro.file.DataFileWriter)20 HashMap (java.util.HashMap)19 ByteBuffer (java.nio.ByteBuffer)18 BinaryEncoder (org.apache.avro.io.BinaryEncoder)17 Field (org.apache.avro.Schema.Field)14 DataFileStream (org.apache.avro.file.DataFileStream)14 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)14 Utf8 (org.apache.avro.util.Utf8)14 Encoder (org.apache.avro.io.Encoder)12 DatasetRepository (com.cloudera.cdk.data.DatasetRepository)11