Search in sources :

Example 31 with Type

use of org.apache.parquet.schema.Type in project presto by prestodb.

the class TestDataWritableWriter method writeGroupFields.

/**
 * It writes all the fields contained inside a group to the RecordConsumer.
 *
 * @param value The list of values contained in the group.
 * @param inspector The object inspector used to get the correct value type.
 * @param type Type that contains information about the group schema.
 */
private void writeGroupFields(final Object value, final StructObjectInspector inspector, final GroupType type) {
    if (value != null) {
        List<? extends StructField> fields = inspector.getAllStructFieldRefs();
        List<Object> fieldValuesList = inspector.getStructFieldsDataAsList(value);
        for (int i = 0; i < type.getFieldCount(); i++) {
            Type fieldType = type.getType(i);
            String fieldName = fieldType.getName();
            Object fieldValue = fieldValuesList.get(i);
            if (fieldValue != null) {
                ObjectInspector fieldInspector = fields.get(i).getFieldObjectInspector();
                recordConsumer.startField(fieldName, i);
                writeValue(fieldValue, fieldInspector, fieldType);
                recordConsumer.endField(fieldName, i);
            }
        }
    }
}
Also used : OriginalType(org.apache.parquet.schema.OriginalType) GroupType(org.apache.parquet.schema.GroupType) Type(org.apache.parquet.schema.Type) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector)

Example 32 with Type

use of org.apache.parquet.schema.Type in project presto by prestodb.

the class TestDataWritableWriter method writeArray.

/**
 * It writes a list type and its array elements to the Parquet RecordConsumer.
 * This is called when the original type (LIST) is detected by writeValue()/
 * This function assumes the following schema:
 * optional group arrayCol (LIST) {
 * repeated group array {
 * optional TYPE array_element;
 * }
 * }
 *
 * @param value The object that contains the array values.
 * @param inspector The object inspector used to get the correct value type.
 * @param type Type that contains information about the group (LIST) schema.
 */
private void writeArray(final Object value, final ListObjectInspector inspector, final GroupType type) {
    // Get the internal array structure
    GroupType repeatedType = type.getType(0).asGroupType();
    recordConsumer.startGroup();
    List<?> arrayValues = inspector.getList(value);
    if (!arrayValues.isEmpty()) {
        recordConsumer.startField(repeatedType.getName(), 0);
        ObjectInspector elementInspector = inspector.getListElementObjectInspector();
        Type elementType = repeatedType.getType(0);
        String elementName = elementType.getName();
        for (Object element : arrayValues) {
            recordConsumer.startGroup();
            if (element != null) {
                recordConsumer.startField(elementName, 0);
                writeValue(element, elementInspector, elementType);
                recordConsumer.endField(elementName, 0);
            }
            recordConsumer.endGroup();
        }
        recordConsumer.endField(repeatedType.getName(), 0);
    }
    recordConsumer.endGroup();
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) OriginalType(org.apache.parquet.schema.OriginalType) GroupType(org.apache.parquet.schema.GroupType) Type(org.apache.parquet.schema.Type) GroupType(org.apache.parquet.schema.GroupType)

Example 33 with Type

use of org.apache.parquet.schema.Type in project presto by prestodb.

the class TestDataWritableWriter method writeSingleLevelArray.

private void writeSingleLevelArray(final Object value, final ListObjectInspector inspector, final GroupType type) {
    // Get the internal array structure
    Type elementType = type.getType(0);
    recordConsumer.startGroup();
    List<?> arrayValues = inspector.getList(value);
    if (!arrayValues.isEmpty()) {
        recordConsumer.startField(elementType.getName(), 0);
        ObjectInspector elementInspector = inspector.getListElementObjectInspector();
        for (Object element : arrayValues) {
            if (element == null) {
                throw new IllegalArgumentException("Array elements are requires in given schema definition");
            }
            writeValue(element, elementInspector, elementType);
        }
        recordConsumer.endField(elementType.getName(), 0);
    }
    recordConsumer.endGroup();
}
Also used : OriginalType(org.apache.parquet.schema.OriginalType) GroupType(org.apache.parquet.schema.GroupType) Type(org.apache.parquet.schema.Type) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector)

Example 34 with Type

use of org.apache.parquet.schema.Type in project presto by prestodb.

the class SingleLevelArrayMapKeyValuesSchemaConverter method convertMapType.

// An optional group containing a repeated anonymous group "map", containing
// 2 elements: "key", "value"
private static GroupType convertMapType(final String name, final MapTypeInfo typeInfo, final Repetition repetition) {
    final Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(), typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
    final Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(), typeInfo.getMapValueTypeInfo());
    return mapType(repetition, name, "map", keyType, valueType);
}
Also used : GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) OriginalType(org.apache.parquet.schema.OriginalType)

Example 35 with Type

use of org.apache.parquet.schema.Type in project presto by prestodb.

the class MapKeyValuesSchemaConverter method convertMapType.

// An optional group containing a repeated anonymous group "map", containing
// 2 elements: "key", "value"
private static GroupType convertMapType(final String name, final MapTypeInfo typeInfo) {
    final Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(), typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
    final Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(), typeInfo.getMapValueTypeInfo());
    return mapType(Repetition.OPTIONAL, name, "map", keyType, valueType);
}
Also used : GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) OriginalType(org.apache.parquet.schema.OriginalType)

Aggregations

Type (org.apache.parquet.schema.Type)88 MessageType (org.apache.parquet.schema.MessageType)72 GroupType (org.apache.parquet.schema.GroupType)69 OriginalType (org.apache.parquet.schema.OriginalType)35 PrimitiveType (org.apache.parquet.schema.PrimitiveType)35 ArrayList (java.util.ArrayList)25 HashMap (java.util.HashMap)10 SchemaPath (org.apache.drill.common.expression.SchemaPath)10 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)10 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)10 PathSegment (org.apache.drill.common.expression.PathSegment)8 Converter (org.apache.parquet.io.api.Converter)6 GroupConverter (org.apache.parquet.io.api.GroupConverter)6 MinorType (org.apache.drill.common.types.TypeProtos.MinorType)5 MaterializedField (org.apache.drill.exec.record.MaterializedField)5 LogicalTypeAnnotation (org.apache.parquet.schema.LogicalTypeAnnotation)5 Collection (java.util.Collection)4 List (java.util.List)4 Function (java.util.function.Function)4 LogicalType (org.apache.avro.LogicalType)4