use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.
the class UploadRefreshDeleteIntegrationTest method generateAndUploadRandomSegment.
protected void generateAndUploadRandomSegment(String segmentName, int rowCount) throws Exception {
ThreadLocalRandom random = ThreadLocalRandom.current();
Schema schema = new Schema.Parser().parse(new File(TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource("dummy.avsc"))));
GenericRecord record = new GenericData.Record(schema);
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<GenericRecord>(datumWriter);
File avroFile = new File(_tmpDir, segmentName + ".avro");
fileWriter.create(schema, avroFile);
for (int i = 0; i < rowCount; i++) {
record.put(0, random.nextInt());
fileWriter.append(record);
}
fileWriter.close();
int segmentIndex = Integer.parseInt(segmentName.split("_")[1]);
File segmentTarDir = new File(_tarsDir, segmentName);
ensureDirectoryExistsAndIsEmpty(segmentTarDir);
ExecutorService executor = MoreExecutors.sameThreadExecutor();
buildSegmentsFromAvro(Collections.singletonList(avroFile), executor, segmentIndex, new File(_segmentsDir, segmentName), segmentTarDir, this.tableName, false, null);
executor.shutdown();
executor.awaitTermination(1L, TimeUnit.MINUTES);
for (String segmentFileName : segmentTarDir.list()) {
File file = new File(segmentTarDir, segmentFileName);
FileUploadUtils.sendSegmentFile("localhost", "8998", segmentFileName, file, file.length());
}
avroFile.delete();
FileUtils.deleteQuietly(segmentTarDir);
}
use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.
the class QueryGenerator method addAvroData.
/**
* Helper method to read in an Avro file and add data to the storage.
*
* @param avroFile Avro file.
*/
private void addAvroData(File avroFile) {
// Read in records and update the values stored.
GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
try (DataFileReader<GenericRecord> fileReader = new DataFileReader<>(avroFile, datumReader)) {
for (GenericRecord genericRecord : fileReader) {
for (String columnName : _columnNames) {
Set<String> values = _columnToValueSet.get(columnName);
// Turn the Avro value into a valid SQL String token.
Object avroValue = genericRecord.get(columnName);
if (avroValue != null) {
Integer storedMaxNumElements = _multiValueColumnMaxNumElements.get(columnName);
if (storedMaxNumElements != null) {
// Multi-value column
GenericData.Array array = (GenericData.Array) avroValue;
int numElements = array.size();
if (storedMaxNumElements < numElements) {
_multiValueColumnMaxNumElements.put(columnName, numElements);
}
for (Object element : array) {
storeAvroValueIntoValueSet(values, element);
}
} else {
// Single-value column
storeAvroValueIntoValueSet(values, avroValue);
}
}
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.apache.avro.generic.GenericRecord in project core by s4.
the class AvroSerDeser method deserialize.
@Override
public Object deserialize(byte[] rawMessage) {
// convert the byte array into an event object
Map<String, Object> event = null;
Schema wrapperSchema = avroSchemaManager.getCompiledSchema(MiscConstants.EVENT_WRAPPER_SCHEMA_NAME);
GenericRecord wrapper = new GenericData.Record(wrapperSchema);
try {
wrapper = deserialize(wrapperSchema, rawMessage);
Utf8 schemaNameUtf8 = (Utf8) wrapper.get("eventType");
if (schemaNameUtf8 == null) {
throw new RuntimeException("Wrapper message does not contain eventType field");
}
String schemaName = schemaNameUtf8.toString();
Schema eventSchema = avroSchemaManager.getCompiledSchema(schemaName);
ByteBuffer byteBuffer = (ByteBuffer) wrapper.get("rawdata");
if (byteBuffer == null) {
throw new RuntimeException("Wrapper message does not contain rawdata field");
}
byte[] byteData = byteBuffer.array();
GenericRecord avroEvent = deserialize(eventSchema, byteData);
// convert the avro version of the event into a Map
event = new HashMap<String, Object>();
copyRecord(avroEvent, event);
if (event.get(EVENT_NAME_KEY) == null) {
event.put(EVENT_NAME_KEY, schemaName);
}
return event;
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
use of org.apache.avro.generic.GenericRecord in project core by s4.
the class AvroSerDeser method copyArray.
public static GenericArray<GenericRecord> copyArray(List<Map<String, Object>> list, Schema elementSchema, GenericArray<GenericRecord> avroArray) {
if (!elementSchema.getType().equals(Schema.Type.RECORD)) {
// something weird here, we only support array of
return avroArray;
// records
}
for (Map<String, Object> record : list) {
GenericRecord avroRecord = new GenericData.Record(elementSchema);
avroArray.add(avroRecord);
copyRecord(record, elementSchema, avroRecord);
}
return avroArray;
}
use of org.apache.avro.generic.GenericRecord in project core by s4.
the class AvroSerDeser method serialize.
public byte[] serialize(Object message) {
Map<String, Object> event = (Map<String, Object>) message;
Schema wrapperSchema = avroSchemaManager.getCompiledSchema(MiscConstants.EVENT_WRAPPER_SCHEMA_NAME);
GenericRecord wrapper = new GenericData.Record(wrapperSchema);
String eventName = (String) event.get(io.s4.collector.Event.EVENT_NAME_KEY);
String schemaName = eventName;
wrapper.put("eventType", new Utf8(schemaName));
if (event.get("traceId") != null) {
wrapper.put("traceId", event.get("traceId"));
} else {
wrapper.put("traceId", new Long(-1));
}
Schema eventSchema = avroSchemaManager.getCompiledSchema(schemaName);
GenericRecord avroRecord = new GenericData.Record(eventSchema);
copyRecord(event, eventSchema, avroRecord);
try {
byte[] serializedEvent = serialize(eventSchema, avroRecord);
ByteBuffer byteBuffer = ByteBuffer.allocate(serializedEvent.length);
byteBuffer.put(serializedEvent);
byteBuffer.rewind();
// put the serialized event in the wrapper
wrapper.put("rawdata", byteBuffer);
// serialize the wrapper for transmission
return serialize(wrapperSchema, wrapper);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
Aggregations