Search in sources :

Example 11 with Utf8

use of org.apache.avro.util.Utf8 in project crunch by cloudera.

the class AvrosTest method testTriples.

@Test
@SuppressWarnings("rawtypes")
public void testTriples() throws Exception {
    AvroType at = Avros.triples(Avros.strings(), Avros.strings(), Avros.strings());
    Tuple3 j = Tuple3.of("a", "b", "c");
    GenericData.Record w = new GenericData.Record(at.getSchema());
    w.put(0, new Utf8("a"));
    w.put(1, new Utf8("b"));
    w.put(2, new Utf8("c"));
    testInputOutputFn(at, j, w);
}
Also used : Tuple3(org.apache.crunch.Tuple3) Utf8(org.apache.avro.util.Utf8) GenericData(org.apache.avro.generic.GenericData) Test(org.junit.Test)

Example 12 with Utf8

use of org.apache.avro.util.Utf8 in project rest.li by linkedin.

the class AnyRecordTranslator method avroGenericToData.

@Override
public Object avroGenericToData(DataTranslatorContext context, Object avroData, Schema avroSchema, DataSchema schema) {
    boolean error = false;
    Object result = null;
    GenericRecord genericRecord = null;
    try {
        genericRecord = (GenericRecord) avroData;
    } catch (ClassCastException e) {
        context.appendMessage("Error translating %1$s, it is not a GenericRecord", avroData);
        error = true;
    }
    if (error == false) {
        Utf8 type = null;
        Utf8 value = null;
        try {
            type = (Utf8) genericRecord.get(TYPE);
            value = (Utf8) genericRecord.get(VALUE);
        } catch (ClassCastException e) {
            context.appendMessage("Error translating %1$s, \"type\" or \"value\" is not a %2$s", avroData, Utf8.class.getSimpleName());
            error = true;
        }
        if (error == false) {
            if (type == null || value == null) {
                context.appendMessage("Error translating %1$s, \"type\" or \"value\" is null", avroData);
            } else {
                try {
                    DataMap valueDataMap = _codec.bytesToMap(value.getBytes());
                    DataMap anyDataMap = new DataMap(2);
                    anyDataMap.put(type.toString(), valueDataMap);
                    result = anyDataMap;
                } catch (IOException e) {
                    context.appendMessage("Error translating %1$s, %2$s", avroData, e);
                }
            }
        }
    }
    return result;
}
Also used : Utf8(org.apache.avro.util.Utf8) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord) DataMap(com.linkedin.data.DataMap)

Example 13 with Utf8

use of org.apache.avro.util.Utf8 in project pinot by linkedin.

the class AvroRecordToPinotRowGenerator method transform.

public GenericRow transform(GenericData.Record record, org.apache.avro.Schema schema, GenericRow destination) {
    for (String column : indexingSchema.getColumnNames()) {
        Object entry = record.get(column);
        FieldSpec fieldSpec = indexingSchema.getFieldSpecFor(column);
        if (entry != null) {
            if (entry instanceof Array) {
                entry = AvroRecordReader.transformAvroArrayToObjectArray((Array) entry, fieldSpec);
                if (fieldSpec.getDataType() == DataType.STRING || fieldSpec.getDataType() == DataType.STRING_ARRAY) {
                    for (int i = 0; i < ((Object[]) entry).length; ++i) {
                        if (((Object[]) entry)[i] != null) {
                            ((Object[]) entry)[i] = ((Object[]) entry)[i].toString();
                        }
                    }
                }
            } else {
                if (entry instanceof Utf8) {
                    entry = ((Utf8) entry).toString();
                }
                if (fieldSpec.getDataType() == DataType.STRING) {
                    entry = entry.toString();
                }
            }
        } else {
            // entry was null.
            if (fieldSpec.isSingleValueField()) {
                entry = AvroRecordReader.getDefaultNullValue(fieldSpec);
            } else {
                // A multi-value field, and null. Any of the instanceof checks above will not match, so we need to repeat some
                // of the logic above here.
                entry = AvroRecordReader.transformAvroArrayToObjectArray((Array) entry, fieldSpec);
                if (fieldSpec.getDataType() == DataType.STRING || fieldSpec.getDataType() == DataType.STRING_ARRAY) {
                    for (int i = 0; i < ((Object[]) entry).length; ++i) {
                        if (((Object[]) entry)[i] != null) {
                            ((Object[]) entry)[i] = ((Object[]) entry)[i].toString();
                        }
                    }
                }
            }
        }
        destination.putField(column, entry);
    }
    return destination;
}
Also used : Array(org.apache.avro.generic.GenericData.Array) Utf8(org.apache.avro.util.Utf8) FieldSpec(com.linkedin.pinot.common.data.FieldSpec)

Example 14 with Utf8

use of org.apache.avro.util.Utf8 in project pinot by linkedin.

the class BaseClusterIntegrationTest method createH2SchemaAndInsertAvroFiles.

public static void createH2SchemaAndInsertAvroFiles(List<File> avroFiles, Connection connection) {
    try {
        connection.prepareCall("DROP TABLE IF EXISTS mytable");
        File schemaAvroFile = avroFiles.get(0);
        DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
        DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(schemaAvroFile, datumReader);
        Schema schema = dataFileReader.getSchema();
        List<Schema.Field> fields = schema.getFields();
        List<String> columnNamesAndTypes = new ArrayList<String>(fields.size());
        int columnCount = 0;
        for (Schema.Field field : fields) {
            String fieldName = field.name();
            Schema.Type fieldType = field.schema().getType();
            switch(fieldType) {
                case UNION:
                    List<Schema> types = field.schema().getTypes();
                    String columnNameAndType;
                    String typeName = types.get(0).getName();
                    if (typeName.equalsIgnoreCase("int")) {
                        typeName = "bigint";
                    }
                    if (types.size() == 1) {
                        columnNameAndType = fieldName + " " + typeName + " not null";
                    } else {
                        columnNameAndType = fieldName + " " + typeName;
                    }
                    columnNamesAndTypes.add(columnNameAndType.replace("string", "varchar(128)"));
                    ++columnCount;
                    break;
                case ARRAY:
                    String elementTypeName = field.schema().getElementType().getName();
                    if (elementTypeName.equalsIgnoreCase("int")) {
                        elementTypeName = "bigint";
                    }
                    elementTypeName = elementTypeName.replace("string", "varchar(128)");
                    for (int i = 0; i < MAX_ELEMENTS_IN_MULTI_VALUE; i++) {
                        columnNamesAndTypes.add(fieldName + "__MV" + i + " " + elementTypeName);
                    }
                    ++columnCount;
                    break;
                case BOOLEAN:
                case INT:
                case LONG:
                case FLOAT:
                case DOUBLE:
                case STRING:
                    String fieldTypeName = fieldType.getName();
                    if (fieldTypeName.equalsIgnoreCase("int")) {
                        fieldTypeName = "bigint";
                    }
                    columnNameAndType = fieldName + " " + fieldTypeName + " not null";
                    columnNamesAndTypes.add(columnNameAndType.replace("string", "varchar(128)"));
                    ++columnCount;
                    break;
                case RECORD:
                    // Ignore records
                    continue;
                default:
                    // Ignore other avro types
                    LOGGER.warn("Ignoring field {} of type {}", fieldName, field.schema());
            }
        }
        connection.prepareCall("create table mytable(" + StringUtil.join(",", columnNamesAndTypes.toArray(new String[columnNamesAndTypes.size()])) + ")").execute();
        long start = System.currentTimeMillis();
        StringBuilder params = new StringBuilder("?");
        for (int i = 0; i < columnNamesAndTypes.size() - 1; i++) {
            params.append(",?");
        }
        PreparedStatement statement = connection.prepareStatement("INSERT INTO mytable VALUES (" + params.toString() + ")");
        dataFileReader.close();
        for (File avroFile : avroFiles) {
            datumReader = new GenericDatumReader<GenericRecord>();
            dataFileReader = new DataFileReader<GenericRecord>(avroFile, datumReader);
            GenericRecord record = null;
            while (dataFileReader.hasNext()) {
                record = dataFileReader.next(record);
                int jdbcIndex = 1;
                for (int avroIndex = 0; avroIndex < columnCount; ++avroIndex) {
                    Object value = record.get(avroIndex);
                    if (value instanceof GenericData.Array) {
                        GenericData.Array array = (GenericData.Array) value;
                        for (int i = 0; i < MAX_ELEMENTS_IN_MULTI_VALUE; i++) {
                            if (i < array.size()) {
                                value = array.get(i);
                                if (value instanceof Utf8) {
                                    value = value.toString();
                                }
                            } else {
                                value = null;
                            }
                            statement.setObject(jdbcIndex, value);
                            ++jdbcIndex;
                        }
                    } else {
                        if (value instanceof Utf8) {
                            value = value.toString();
                        }
                        statement.setObject(jdbcIndex, value);
                        ++jdbcIndex;
                    }
                }
                statement.execute();
            }
            dataFileReader.close();
        }
        LOGGER.info("Insertion took " + (System.currentTimeMillis() - start));
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) DataFileReader(org.apache.avro.file.DataFileReader) GenericRecord(org.apache.avro.generic.GenericRecord) PreparedStatement(java.sql.PreparedStatement) GenericData(org.apache.avro.generic.GenericData) JSONException(org.json.JSONException) ArchiveException(org.apache.commons.compress.archivers.ArchiveException) SQLException(java.sql.SQLException) IOException(java.io.IOException) JSONArray(org.json.JSONArray) Utf8(org.apache.avro.util.Utf8) JSONObject(org.json.JSONObject) File(java.io.File)

Example 15 with Utf8

use of org.apache.avro.util.Utf8 in project pinot by linkedin.

the class BitmapInvertedIndexTest method testBitMapInvertedIndex.

void testBitMapInvertedIndex(ReadMode readMode) throws Exception {
    IndexLoadingConfigMetadata indexLoadingConfig = new IndexLoadingConfigMetadata(new PropertiesConfiguration());
    indexLoadingConfig.initLoadingInvertedIndexColumnSet(invertedIndexColumns);
    final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(segmentDirectory, readMode, indexLoadingConfig);
    // compare the loaded inverted index with the record in avro file
    final DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(new FileInputStream(new File(getClass().getClassLoader().getResource(AVRO_DATA).getFile())), new GenericDatumReader<GenericRecord>());
    int docId = 0;
    while (reader.hasNext()) {
        final GenericRecord rec = reader.next();
        for (final String column : ((SegmentMetadataImpl) mmapSegment.getSegmentMetadata()).getColumnMetadataMap().keySet()) {
            Object entry = rec.get(column);
            if (entry instanceof Utf8) {
                entry = ((Utf8) entry).toString();
            }
            final int dicId = mmapSegment.getDictionaryFor(column).indexOf(entry);
            // make sure that docId for dicId exist in the inverted index
            Assert.assertTrue(mmapSegment.getInvertedIndexFor(column).getImmutable(dicId).contains(docId));
            final int size = mmapSegment.getDictionaryFor(column).length();
            for (int i = 0; i < size; ++i) {
                // remove this for-loop for quick test
                if (i == dicId) {
                    continue;
                }
                // make sure that docId for dicId does not exist in the inverted index
                Assert.assertFalse(mmapSegment.getInvertedIndexFor(column).getImmutable(i).contains(docId));
            }
        }
        ++docId;
    }
}
Also used : IndexLoadingConfigMetadata(com.linkedin.pinot.common.metadata.segment.IndexLoadingConfigMetadata) DataFileStream(org.apache.avro.file.DataFileStream) PropertiesConfiguration(org.apache.commons.configuration.PropertiesConfiguration) FileInputStream(java.io.FileInputStream) IndexSegmentImpl(com.linkedin.pinot.core.segment.index.IndexSegmentImpl) Utf8(org.apache.avro.util.Utf8) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Aggregations

Utf8 (org.apache.avro.util.Utf8)123 Test (org.junit.Test)34 WebPage (org.apache.gora.examples.generated.WebPage)32 GenericRecord (org.apache.avro.generic.GenericRecord)17 Schema (org.apache.avro.Schema)14 GenericData (org.apache.avro.generic.GenericData)13 ByteBuffer (java.nio.ByteBuffer)12 HashMap (java.util.HashMap)12 Map (java.util.Map)12 Employee (org.apache.gora.examples.generated.Employee)11 IOException (java.io.IOException)7 ArrayList (java.util.ArrayList)7 Field (org.apache.avro.Schema.Field)6 Record (org.apache.avro.generic.GenericData.Record)5 File (java.io.File)4 SpecificDatumReader (org.apache.avro.specific.SpecificDatumReader)4 Metadata (org.apache.gora.examples.generated.Metadata)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 Iterator (java.util.Iterator)3 List (java.util.List)3