Search in sources :

Example 16 with Utf8

use of org.apache.avro.util.Utf8 in project pinot by linkedin.

the class DictionariesTest method before.

@BeforeClass
public static void before() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
    if (INDEX_DIR.exists()) {
        FileUtils.deleteQuietly(INDEX_DIR);
    }
    final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test");
    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    driver.init(config);
    driver.build();
    segmentDirectory = new File(INDEX_DIR, driver.getSegmentName());
    final Schema schema = AvroUtils.extractSchemaFromAvro(new File(filePath));
    final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
    final org.apache.avro.Schema avroSchema = avroReader.getSchema();
    final String[] columns = new String[avroSchema.getFields().size()];
    int i = 0;
    for (final Field f : avroSchema.getFields()) {
        columns[i] = f.name();
        i++;
    }
    uniqueEntries = new HashMap<String, Set<Object>>();
    for (final String column : columns) {
        uniqueEntries.put(column, new HashSet<Object>());
    }
    while (avroReader.hasNext()) {
        final GenericRecord rec = avroReader.next();
        for (final String column : columns) {
            Object val = rec.get(column);
            if (val instanceof Utf8) {
                val = ((Utf8) val).toString();
            }
            uniqueEntries.get(column).add(getAppropriateType(schema.getFieldSpecFor(column).getDataType(), val));
        }
    }
}
Also used : SegmentIndexCreationDriver(com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver) HashSet(java.util.HashSet) Set(java.util.Set) Schema(com.linkedin.pinot.common.data.Schema) Field(org.apache.avro.Schema.Field) SegmentGeneratorConfig(com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig) Utf8(org.apache.avro.util.Utf8) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) BeforeClass(org.testng.annotations.BeforeClass)

Example 17 with Utf8

use of org.apache.avro.util.Utf8 in project core by s4.

the class AvroSerDeser method deserialize.

@Override
public Object deserialize(byte[] rawMessage) {
    // convert the byte array into an event object
    Map<String, Object> event = null;
    Schema wrapperSchema = avroSchemaManager.getCompiledSchema(MiscConstants.EVENT_WRAPPER_SCHEMA_NAME);
    GenericRecord wrapper = new GenericData.Record(wrapperSchema);
    try {
        wrapper = deserialize(wrapperSchema, rawMessage);
        Utf8 schemaNameUtf8 = (Utf8) wrapper.get("eventType");
        if (schemaNameUtf8 == null) {
            throw new RuntimeException("Wrapper message does not contain eventType field");
        }
        String schemaName = schemaNameUtf8.toString();
        Schema eventSchema = avroSchemaManager.getCompiledSchema(schemaName);
        ByteBuffer byteBuffer = (ByteBuffer) wrapper.get("rawdata");
        if (byteBuffer == null) {
            throw new RuntimeException("Wrapper message does not contain rawdata field");
        }
        byte[] byteData = byteBuffer.array();
        GenericRecord avroEvent = deserialize(eventSchema, byteData);
        // convert the avro version of the event into a Map
        event = new HashMap<String, Object>();
        copyRecord(avroEvent, event);
        if (event.get(EVENT_NAME_KEY) == null) {
            event.put(EVENT_NAME_KEY, schemaName);
        }
        return event;
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}
Also used : Schema(org.apache.avro.Schema) Utf8(org.apache.avro.util.Utf8) GenericRecord(org.apache.avro.generic.GenericRecord) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord) ByteBuffer(java.nio.ByteBuffer)

Example 18 with Utf8

use of org.apache.avro.util.Utf8 in project core by s4.

the class AvroSerDeser method serialize.

public byte[] serialize(Object message) {
    Map<String, Object> event = (Map<String, Object>) message;
    Schema wrapperSchema = avroSchemaManager.getCompiledSchema(MiscConstants.EVENT_WRAPPER_SCHEMA_NAME);
    GenericRecord wrapper = new GenericData.Record(wrapperSchema);
    String eventName = (String) event.get(io.s4.collector.Event.EVENT_NAME_KEY);
    String schemaName = eventName;
    wrapper.put("eventType", new Utf8(schemaName));
    if (event.get("traceId") != null) {
        wrapper.put("traceId", event.get("traceId"));
    } else {
        wrapper.put("traceId", new Long(-1));
    }
    Schema eventSchema = avroSchemaManager.getCompiledSchema(schemaName);
    GenericRecord avroRecord = new GenericData.Record(eventSchema);
    copyRecord(event, eventSchema, avroRecord);
    try {
        byte[] serializedEvent = serialize(eventSchema, avroRecord);
        ByteBuffer byteBuffer = ByteBuffer.allocate(serializedEvent.length);
        byteBuffer.put(serializedEvent);
        byteBuffer.rewind();
        // put the serialized event in the wrapper
        wrapper.put("rawdata", byteBuffer);
        // serialize the wrapper for transmission
        return serialize(wrapperSchema, wrapper);
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}
Also used : Schema(org.apache.avro.Schema) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) Utf8(org.apache.avro.util.Utf8) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) HashMap(java.util.HashMap) Map(java.util.Map)

Example 19 with Utf8

use of org.apache.avro.util.Utf8 in project flink by apache.

the class DataInputDecoder method readString.

// --------------------------------------------------------------------------------------------
// strings
// --------------------------------------------------------------------------------------------
@Override
public Utf8 readString(Utf8 old) throws IOException {
    int length = readInt();
    Utf8 result = (old != null ? old : new Utf8());
    result.setByteLength(length);
    if (length > 0) {
        in.readFully(result.getBytes(), 0, length);
    }
    return result;
}
Also used : Utf8(org.apache.avro.util.Utf8)

Example 20 with Utf8

use of org.apache.avro.util.Utf8 in project flink by apache.

the class AvroRecordInputFormatTest method testDeserialisation.

/**
	 * Test if the AvroInputFormat is able to properly read data from an avro file.
	 * @throws IOException
	 */
@Test
public void testDeserialisation() throws IOException {
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(1);
    assertEquals(splits.length, 1);
    format.open(splits[0]);
    User u = format.nextRecord(null);
    assertNotNull(u);
    String name = u.getName().toString();
    assertNotNull("empty record", name);
    assertEquals("name not equal", TEST_NAME, name);
    // check arrays
    List<CharSequence> sl = u.getTypeArrayString();
    assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString());
    assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString());
    List<Boolean> bl = u.getTypeArrayBoolean();
    assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0));
    assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1));
    // check enums
    Colors enumValue = u.getTypeEnum();
    assertEquals("enum not equal", TEST_ENUM_COLOR, enumValue);
    // check maps
    Map<CharSequence, Long> lm = u.getTypeMap();
    assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue());
    assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue());
    assertFalse("expecting second element", format.reachedEnd());
    assertNotNull("expecting second element", format.nextRecord(u));
    assertNull(format.nextRecord(u));
    assertTrue(format.reachedEnd());
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) User(org.apache.flink.api.io.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Colors(org.apache.flink.api.io.avro.generated.Colors) Utf8(org.apache.avro.util.Utf8) Test(org.junit.Test)

Aggregations

Utf8 (org.apache.avro.util.Utf8)123 Test (org.junit.Test)34 WebPage (org.apache.gora.examples.generated.WebPage)32 GenericRecord (org.apache.avro.generic.GenericRecord)17 Schema (org.apache.avro.Schema)14 GenericData (org.apache.avro.generic.GenericData)13 ByteBuffer (java.nio.ByteBuffer)12 HashMap (java.util.HashMap)12 Map (java.util.Map)12 Employee (org.apache.gora.examples.generated.Employee)11 IOException (java.io.IOException)7 ArrayList (java.util.ArrayList)7 Field (org.apache.avro.Schema.Field)6 Record (org.apache.avro.generic.GenericData.Record)5 File (java.io.File)4 SpecificDatumReader (org.apache.avro.specific.SpecificDatumReader)4 Metadata (org.apache.gora.examples.generated.Metadata)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 Iterator (java.util.Iterator)3 List (java.util.List)3