Search in sources :

Example 61 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class SolrStore method deserializeFieldValue.

@SuppressWarnings("unchecked")
private Object deserializeFieldValue(Field field, Schema fieldSchema, Object solrValue, T persistent) throws IOException {
    Object fieldValue = null;
    switch(fieldSchema.getType()) {
        case MAP:
        case ARRAY:
        case RECORD:
            @SuppressWarnings("rawtypes") SpecificDatumReader reader = getDatumReader(fieldSchema);
            fieldValue = IOUtils.deserialize((byte[]) solrValue, reader, persistent.get(field.pos()));
            break;
        case ENUM:
            fieldValue = AvroUtils.getEnumValue(fieldSchema, (String) solrValue);
            break;
        case FIXED:
            throw new IOException("???");
        // break;
        case BYTES:
            fieldValue = ByteBuffer.wrap((byte[]) solrValue);
            break;
        case STRING:
            fieldValue = new Utf8(solrValue.toString());
            break;
        case UNION:
            if (fieldSchema.getTypes().size() == 2 && isNullable(fieldSchema)) {
                // schema [type0, type1]
                Type type0 = fieldSchema.getTypes().get(0).getType();
                Type type1 = fieldSchema.getTypes().get(1).getType();
                // ["null","type"] or ["type","null"]
                if (!type0.equals(type1)) {
                    if (type0.equals(Schema.Type.NULL))
                        fieldSchema = fieldSchema.getTypes().get(1);
                    else
                        fieldSchema = fieldSchema.getTypes().get(0);
                } else {
                    fieldSchema = fieldSchema.getTypes().get(0);
                }
                fieldValue = deserializeFieldValue(field, fieldSchema, solrValue, persistent);
            } else {
                @SuppressWarnings("rawtypes") SpecificDatumReader unionReader = getDatumReader(fieldSchema);
                fieldValue = IOUtils.deserialize((byte[]) solrValue, unionReader, persistent.get(field.pos()));
                break;
            }
            break;
        default:
            fieldValue = solrValue;
    }
    return fieldValue;
}
Also used : Type(org.apache.avro.Schema.Type) Utf8(org.apache.avro.util.Utf8) IOException(java.io.IOException) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader)

Example 62 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class AccumuloStore method firstNotNullSchemaTypeIndex.

private int firstNotNullSchemaTypeIndex(Schema toSchema) {
    List<Schema> possibleTypes = toSchema.getTypes();
    int unionIndex = 0;
    for (int i = 0; i < possibleTypes.size(); i++) {
        Type pType = possibleTypes.get(i).getType();
        if (pType != Type.NULL) {
            // FIXME HUGE kludge to pass tests
            unionIndex = i;
            break;
        }
    }
    return unionIndex;
}
Also used : Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema)

Example 63 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class HiveQueryBuilder method getNullValue.

/**
 * Generate the null value for a given schema type
 *
 * @param parameterList carries the list of parameters to be injected into sql
 * @param schema schema to get null type
 * @return null value for the schema.type
 * @throws GoraException throw if the null value generation is failed
 */
private Object getNullValue(List<Object> parameterList, Schema schema) throws GoraException {
    final Type type = schema.getType();
    switch(type) {
        case BYTES:
            return "binary(null)";
        case MAP:
            return "map(null," + getNullValue(parameterList, schema.getValueType()) + CLOSE_BRACKET_SYMBOL;
        case ARRAY:
            return "array(" + getNullValue(parameterList, schema.getElementType()) + CLOSE_BRACKET_SYMBOL;
        case UNION:
            return serializeUnion(parameterList, schema, null);
        case RECORD:
            Class<?> clazz;
            try {
                clazz = ClassLoadingUtils.loadClass(schema.getFullName());
            } catch (ClassNotFoundException e) {
                throw new GoraException(e);
            }
            @SuppressWarnings("unchecked") final PersistentBase emptyRecord = (PersistentBase) new BeanFactoryImpl(hiveStore.getKeyClass(), clazz).newPersistent();
            return serializeRecord(parameterList, schema, emptyRecord);
        default:
            return null;
    }
}
Also used : Type(org.apache.avro.Schema.Type) GoraException(org.apache.gora.util.GoraException) PersistentBase(org.apache.gora.persistency.impl.PersistentBase) BeanFactoryImpl(org.apache.gora.persistency.impl.BeanFactoryImpl)

Example 64 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class HBaseByteInterface method fromBytes.

/**
 * Deserializes an array of bytes matching the given schema to the proper basic
 * (enum, Utf8,...) or complex type (Persistent/Record).
 *
 * Does not handle <code>arrays/maps</code> if not inside a <code>record</code> type.
 *
 * @param schema Avro schema describing the expected data
 * @param val array of bytes with the data serialized
 * @return Enum|Utf8|ByteBuffer|Integer|Long|Float|Double|Boolean|Persistent|Null
 * @throws IOException
 */
@SuppressWarnings({ "rawtypes" })
public static Object fromBytes(Schema schema, byte[] val) throws IOException {
    Type type = schema.getType();
    switch(type) {
        case ENUM:
            return AvroUtils.getEnumValue(schema, val[0]);
        case STRING:
            return new Utf8(Bytes.toString(val));
        case BYTES:
            return ByteBuffer.wrap(val);
        case INT:
            return Bytes.toInt(val);
        case LONG:
            return Bytes.toLong(val);
        case FLOAT:
            return Bytes.toFloat(val);
        case DOUBLE:
            return Bytes.toDouble(val);
        case BOOLEAN:
            return val[0] != 0;
        case UNION:
            // if 'val' is empty we ignore the special case (will match Null in "case RECORD")
            if (schema.getTypes().size() == 2) {
                // schema [type0, type1]
                Type type0 = schema.getTypes().get(0).getType();
                Type type1 = schema.getTypes().get(1).getType();
                // Check if types are different and there's a "null", like ["null","type"] or ["type","null"]
                if (!type0.equals(type1) && (type0.equals(Schema.Type.NULL) || type1.equals(Schema.Type.NULL))) {
                    if (type0.equals(Schema.Type.NULL))
                        schema = schema.getTypes().get(1);
                    else
                        schema = schema.getTypes().get(0);
                    // Deserialize as if schema was ["type"]
                    return fromBytes(schema, val);
                }
            }
        case MAP:
        case RECORD:
            // For UNION schemas, must use a specific SpecificDatumReader
            // from the readerMap since unions don't have own name
            // (key name in map will be "UNION-type-type-...")
            // String schemaId = schema.getType().equals(Schema.Type.UNION) ? String.valueOf(schema.hashCode()) : schema.getFullName();
            SpecificDatumReader<?> reader = readerMap.get(schema);
            if (reader == null) {
                // ignore dirty bits
                reader = new SpecificDatumReader(schema);
                SpecificDatumReader localReader = null;
                if ((localReader = readerMap.putIfAbsent(schema, reader)) != null) {
                    reader = localReader;
                }
            }
            // initialize a decoder, possibly reusing previous one
            BinaryDecoder decoderFromCache = decoders.get();
            BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(val, null);
            // put in threadlocal cache if the initial get was empty
            if (decoderFromCache == null) {
                decoders.set(decoder);
            }
            return reader.read(null, decoder);
        default:
            throw new RuntimeException("Unknown type: " + type);
    }
}
Also used : Type(org.apache.avro.Schema.Type) Utf8(org.apache.avro.util.Utf8) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) BinaryDecoder(org.apache.avro.io.BinaryDecoder)

Example 65 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class PersistentUtils method persistentField2PigType.

/**
 * Recursively converts PersistentBase fields to Pig type: Tuple | Bag | String | Long | ...
 *
 * The mapping is as follows:
 * null         -> null
 * Boolean      -> Boolean
 * Enum         -> Integer
 * ByteBuffer   -> DataByteArray
 * String       -> String
 * Float        -> Float
 * Double       -> Double
 * Integer      -> Integer
 * Long         -> Long
 * Union        -> X
 * Record       -> Tuple
 * Array        -> Bag
 * Map<String,b'> -> HashMap<String,Object>
 *
 * @param schema Source schema
 * @param data Source data: PersistentBase | String | Long,...
 * @return Pig type: Tuple | Bag | String | Long | ...
 * @throws ExecException
 */
@SuppressWarnings("unchecked")
private static Object persistentField2PigType(Schema schema, Object data) throws ExecException {
    Type schemaType = schema.getType();
    switch(schemaType) {
        case NULL:
            return null;
        case BOOLEAN:
            return (Boolean) data;
        case ENUM:
            return new Integer(((Enum<?>) data).ordinal());
        case BYTES:
            return new DataByteArray(((ByteBuffer) data).array());
        case STRING:
            return data.toString();
        case FLOAT:
        case DOUBLE:
        case INT:
        case LONG:
            return data;
        case UNION:
            int unionIndex = GenericData.get().resolveUnion(schema, data);
            Schema unionTypeSchema = schema.getTypes().get(unionIndex);
            return persistentField2PigType(unionTypeSchema, data);
        case RECORD:
            List<Field> recordFields = schema.getFields();
            int numRecordElements = recordFields.size();
            Tuple recordTuple = TupleFactory.getInstance().newTuple(numRecordElements);
            for (int i = 0; i < numRecordElements; i++) {
                recordTuple.set(i, persistentField2PigType(recordFields.get(i).schema(), ((PersistentBase) data).get(i)));
            }
            return recordTuple;
        case ARRAY:
            DataBag bag = BagFactory.getInstance().newDefaultBag();
            Schema arrValueSchema = schema.getElementType();
            for (Object element : (List<?>) data) {
                Object pigElement = persistentField2PigType(arrValueSchema, element);
                if (pigElement instanceof Tuple) {
                    bag.add((Tuple) pigElement);
                } else {
                    Tuple arrElemTuple = TupleFactory.getInstance().newTuple(1);
                    arrElemTuple.set(0, pigElement);
                    bag.add(arrElemTuple);
                }
            }
            return bag;
        case MAP:
            HashMap<String, Object> map = new HashMap<String, Object>();
            for (Entry<CharSequence, ?> e : ((Map<CharSequence, ?>) data).entrySet()) {
                map.put(e.getKey().toString(), persistentField2PigType(schema.getValueType(), e.getValue()));
            }
            return map;
        case FIXED:
            // TODO: Implement FIXED data type
            LOG.error("FIXED type not implemented");
            throw new RuntimeException("Fixed type not implemented");
        default:
            LOG.error("Unexpected schema type {}", schemaType);
            throw new RuntimeException("Unexpected schema type " + schemaType);
    }
}
Also used : PersistentBase(org.apache.gora.persistency.impl.PersistentBase) DataBag(org.apache.pig.data.DataBag) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) Field(org.apache.avro.Schema.Field) Type(org.apache.avro.Schema.Type) List(java.util.List) DataByteArray(org.apache.pig.data.DataByteArray) HashMap(java.util.HashMap) Map(java.util.Map) Tuple(org.apache.pig.data.Tuple)

Aggregations

Type (org.apache.avro.Schema.Type)80 Schema (org.apache.avro.Schema)58 Field (org.apache.avro.Schema.Field)32 Map (java.util.Map)20 List (java.util.List)16 HashMap (java.util.HashMap)15 ArrayList (java.util.ArrayList)13 ByteBuffer (java.nio.ByteBuffer)11 Collectors (java.util.stream.Collectors)11 IOException (java.io.IOException)10 LogicalType (org.apache.avro.LogicalType)8 LinkedHashMap (java.util.LinkedHashMap)7 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 Arrays (java.util.Arrays)5 PersistentBase (org.apache.gora.persistency.impl.PersistentBase)5 Test (org.junit.Test)5 BaseRuntimeChildDefinition (ca.uhn.fhir.context.BaseRuntimeChildDefinition)4 BaseRuntimeElementDefinition (ca.uhn.fhir.context.BaseRuntimeElementDefinition)4 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)4