use of org.apache.avro.util.Utf8 in project pinot by linkedin.
the class DictionariesTest method before.
@BeforeClass
public static void before() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
if (INDEX_DIR.exists()) {
FileUtils.deleteQuietly(INDEX_DIR);
}
final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test");
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(config);
driver.build();
segmentDirectory = new File(INDEX_DIR, driver.getSegmentName());
final Schema schema = AvroUtils.extractSchemaFromAvro(new File(filePath));
final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
final org.apache.avro.Schema avroSchema = avroReader.getSchema();
final String[] columns = new String[avroSchema.getFields().size()];
int i = 0;
for (final Field f : avroSchema.getFields()) {
columns[i] = f.name();
i++;
}
uniqueEntries = new HashMap<String, Set<Object>>();
for (final String column : columns) {
uniqueEntries.put(column, new HashSet<Object>());
}
while (avroReader.hasNext()) {
final GenericRecord rec = avroReader.next();
for (final String column : columns) {
Object val = rec.get(column);
if (val instanceof Utf8) {
val = ((Utf8) val).toString();
}
uniqueEntries.get(column).add(getAppropriateType(schema.getFieldSpecFor(column).getDataType(), val));
}
}
}
use of org.apache.avro.util.Utf8 in project core by s4.
the class AvroSerDeser method deserialize.
@Override
public Object deserialize(byte[] rawMessage) {
// convert the byte array into an event object
Map<String, Object> event = null;
Schema wrapperSchema = avroSchemaManager.getCompiledSchema(MiscConstants.EVENT_WRAPPER_SCHEMA_NAME);
GenericRecord wrapper = new GenericData.Record(wrapperSchema);
try {
wrapper = deserialize(wrapperSchema, rawMessage);
Utf8 schemaNameUtf8 = (Utf8) wrapper.get("eventType");
if (schemaNameUtf8 == null) {
throw new RuntimeException("Wrapper message does not contain eventType field");
}
String schemaName = schemaNameUtf8.toString();
Schema eventSchema = avroSchemaManager.getCompiledSchema(schemaName);
ByteBuffer byteBuffer = (ByteBuffer) wrapper.get("rawdata");
if (byteBuffer == null) {
throw new RuntimeException("Wrapper message does not contain rawdata field");
}
byte[] byteData = byteBuffer.array();
GenericRecord avroEvent = deserialize(eventSchema, byteData);
// convert the avro version of the event into a Map
event = new HashMap<String, Object>();
copyRecord(avroEvent, event);
if (event.get(EVENT_NAME_KEY) == null) {
event.put(EVENT_NAME_KEY, schemaName);
}
return event;
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
use of org.apache.avro.util.Utf8 in project core by s4.
the class AvroSerDeser method serialize.
public byte[] serialize(Object message) {
Map<String, Object> event = (Map<String, Object>) message;
Schema wrapperSchema = avroSchemaManager.getCompiledSchema(MiscConstants.EVENT_WRAPPER_SCHEMA_NAME);
GenericRecord wrapper = new GenericData.Record(wrapperSchema);
String eventName = (String) event.get(io.s4.collector.Event.EVENT_NAME_KEY);
String schemaName = eventName;
wrapper.put("eventType", new Utf8(schemaName));
if (event.get("traceId") != null) {
wrapper.put("traceId", event.get("traceId"));
} else {
wrapper.put("traceId", new Long(-1));
}
Schema eventSchema = avroSchemaManager.getCompiledSchema(schemaName);
GenericRecord avroRecord = new GenericData.Record(eventSchema);
copyRecord(event, eventSchema, avroRecord);
try {
byte[] serializedEvent = serialize(eventSchema, avroRecord);
ByteBuffer byteBuffer = ByteBuffer.allocate(serializedEvent.length);
byteBuffer.put(serializedEvent);
byteBuffer.rewind();
// put the serialized event in the wrapper
wrapper.put("rawdata", byteBuffer);
// serialize the wrapper for transmission
return serialize(wrapperSchema, wrapper);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
use of org.apache.avro.util.Utf8 in project flink by apache.
the class DataInputDecoder method readString.
// --------------------------------------------------------------------------------------------
// strings
// --------------------------------------------------------------------------------------------
@Override
public Utf8 readString(Utf8 old) throws IOException {
int length = readInt();
Utf8 result = (old != null ? old : new Utf8());
result.setByteLength(length);
if (length > 0) {
in.readFully(result.getBytes(), 0, length);
}
return result;
}
use of org.apache.avro.util.Utf8 in project flink by apache.
the class AvroRecordInputFormatTest method testDeserialisation.
/**
* Test if the AvroInputFormat is able to properly read data from an avro file.
* @throws IOException
*/
@Test
public void testDeserialisation() throws IOException {
Configuration parameters = new Configuration();
AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
format.configure(parameters);
FileInputSplit[] splits = format.createInputSplits(1);
assertEquals(splits.length, 1);
format.open(splits[0]);
User u = format.nextRecord(null);
assertNotNull(u);
String name = u.getName().toString();
assertNotNull("empty record", name);
assertEquals("name not equal", TEST_NAME, name);
// check arrays
List<CharSequence> sl = u.getTypeArrayString();
assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString());
assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString());
List<Boolean> bl = u.getTypeArrayBoolean();
assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0));
assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1));
// check enums
Colors enumValue = u.getTypeEnum();
assertEquals("enum not equal", TEST_ENUM_COLOR, enumValue);
// check maps
Map<CharSequence, Long> lm = u.getTypeMap();
assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue());
assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue());
assertFalse("expecting second element", format.reachedEnd());
assertNotNull("expecting second element", format.nextRecord(u));
assertNull(format.nextRecord(u));
assertTrue(format.reachedEnd());
format.close();
}
Aggregations