Search in sources :

Example 51 with Type

use of org.apache.parquet.schema.Type in project parquet-mr by apache.

the class AvroWriteSupport method writeMap.

private <V> void writeMap(GroupType schema, Schema avroSchema, Map<CharSequence, V> map) {
    GroupType innerGroup = schema.getType(0).asGroupType();
    Type keyType = innerGroup.getType(0);
    Type valueType = innerGroup.getType(1);
    // group wrapper (original type MAP)
    recordConsumer.startGroup();
    if (map.size() > 0) {
        recordConsumer.startField(MAP_REPEATED_NAME, 0);
        for (Map.Entry<CharSequence, V> entry : map.entrySet()) {
            // repeated group key_value, middle layer
            recordConsumer.startGroup();
            recordConsumer.startField(MAP_KEY_NAME, 0);
            writeValue(keyType, MAP_KEY_SCHEMA, entry.getKey());
            recordConsumer.endField(MAP_KEY_NAME, 0);
            V value = entry.getValue();
            if (value != null) {
                recordConsumer.startField(MAP_VALUE_NAME, 1);
                writeValue(valueType, avroSchema.getValueType(), value);
                recordConsumer.endField(MAP_VALUE_NAME, 1);
            } else if (!valueType.isRepetition(Type.Repetition.OPTIONAL)) {
                throw new RuntimeException("Null map value for " + avroSchema.getName());
            }
            recordConsumer.endGroup();
        }
        recordConsumer.endField(MAP_REPEATED_NAME, 0);
    }
    recordConsumer.endGroup();
}
Also used : GroupType(org.apache.parquet.schema.GroupType) LogicalType(org.apache.avro.LogicalType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) GroupType(org.apache.parquet.schema.GroupType) HashMap(java.util.HashMap) Map(java.util.Map)

Example 52 with Type

use of org.apache.parquet.schema.Type in project parquet-mr by apache.

the class AvroWriteSupport method writeRecordFields.

private void writeRecordFields(GroupType schema, Schema avroSchema, Object record) {
    List<Type> fields = schema.getFields();
    List<Schema.Field> avroFields = avroSchema.getFields();
    // parquet ignores Avro nulls, so index may differ
    int index = 0;
    for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) {
        Schema.Field avroField = avroFields.get(avroIndex);
        if (avroField.schema().getType().equals(Schema.Type.NULL)) {
            continue;
        }
        Type fieldType = fields.get(index);
        Object value = model.getField(record, avroField.name(), avroIndex);
        if (value != null) {
            recordConsumer.startField(fieldType.getName(), index);
            writeValue(fieldType, avroField.schema(), value);
            recordConsumer.endField(fieldType.getName(), index);
        } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
            throw new RuntimeException("Null-value for required field: " + avroField.name());
        }
        index++;
    }
}
Also used : GroupType(org.apache.parquet.schema.GroupType) LogicalType(org.apache.avro.LogicalType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) Schema(org.apache.avro.Schema)

Example 53 with Type

use of org.apache.parquet.schema.Type in project parquet-mr by apache.

the class ValidatingRecordConsumer method validate.

private void validate(PrimitiveTypeName p) {
    Type currentType = types.peek().asGroupType().getType(fields.peek());
    int c = fieldValueCount.pop() + 1;
    fieldValueCount.push(c);
    LOG.debug("validate {} for {}", p, currentType.getName());
    switch(currentType.getRepetition()) {
        case OPTIONAL:
        case REQUIRED:
            if (c > 1) {
                throw new InvalidRecordException("repeated value when the type is not repeated in " + currentType);
            }
            break;
        case REPEATED:
            break;
        default:
            throw new InvalidRecordException("unknown repetition " + currentType.getRepetition() + " in " + currentType);
    }
    if (!currentType.isPrimitive() || currentType.asPrimitiveType().getPrimitiveTypeName() != p) {
        throw new InvalidRecordException("expected type " + p + " but got " + currentType);
    }
}
Also used : MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type)

Example 54 with Type

use of org.apache.parquet.schema.Type in project parquet-mr by apache.

the class PigSchemaConverter method convertFields.

private Schema convertFields(List<Type> parquetFields) {
    List<FieldSchema> fields = new ArrayList<Schema.FieldSchema>();
    for (Type parquetType : parquetFields) {
        try {
            FieldSchema innerfieldSchema = getFieldSchema(parquetType);
            if (parquetType.isRepetition(Repetition.REPEATED)) {
                Schema bagSchema = new Schema(Arrays.asList(innerfieldSchema));
                fields.add(new FieldSchema(null, bagSchema, DataType.BAG));
            } else {
                fields.add(innerfieldSchema);
            }
        } catch (FrontendException fe) {
            throw new SchemaConversionException("can't convert " + parquetType, fe);
        }
    }
    return new Schema(fields);
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) DataType(org.apache.pig.data.DataType) OriginalType(org.apache.parquet.schema.OriginalType) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) ArrayList(java.util.ArrayList) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException)

Example 55 with Type

use of org.apache.parquet.schema.Type in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testOptionalRequiredInteraction.

@Test
public void testOptionalRequiredInteraction() {
    for (int i = 0; i < 6; i++) {
        Type current = new PrimitiveType(Repetition.REQUIRED, PrimitiveTypeName.BINARY, "primitive");
        for (int j = 0; j < i; j++) {
            current = new GroupType(Repetition.REQUIRED, "req" + j, current);
        }
        MessageType groupSchema = new MessageType("schema" + i, current);
        GroupFactory gf = new SimpleGroupFactory(groupSchema);
        List<Group> groups = new ArrayList<Group>();
        Group root = gf.newGroup();
        Group currentGroup = root;
        for (int j = 0; j < i; j++) {
            currentGroup = currentGroup.addGroup(0);
        }
        currentGroup.add(0, Binary.fromString("foo"));
        groups.add(root);
        testSchema(groupSchema, groups);
    }
    for (int i = 0; i < 6; i++) {
        Type current = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "primitive");
        for (int j = 0; j < i; j++) {
            current = new GroupType(Repetition.REQUIRED, "req" + j, current);
        }
        MessageType groupSchema = new MessageType("schema" + (i + 6), current);
        GroupFactory gf = new SimpleGroupFactory(groupSchema);
        List<Group> groups = new ArrayList<Group>();
        Group rootDefined = gf.newGroup();
        Group rootUndefined = gf.newGroup();
        Group currentDefinedGroup = rootDefined;
        Group currentUndefinedGroup = rootUndefined;
        for (int j = 0; j < i; j++) {
            currentDefinedGroup = currentDefinedGroup.addGroup(0);
            currentUndefinedGroup = currentUndefinedGroup.addGroup(0);
        }
        currentDefinedGroup.add(0, Binary.fromString("foo"));
        groups.add(rootDefined);
        groups.add(rootUndefined);
        testSchema(groupSchema, groups);
    }
    for (int i = 0; i < 6; i++) {
        Type current = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "primitive");
        for (int j = 0; j < 6; j++) {
            current = new GroupType(i == j ? Repetition.OPTIONAL : Repetition.REQUIRED, "req" + j, current);
        }
        MessageType groupSchema = new MessageType("schema" + (i + 12), current);
        GroupFactory gf = new SimpleGroupFactory(groupSchema);
        List<Group> groups = new ArrayList<Group>();
        Group rootDefined = gf.newGroup();
        Group rootUndefined = gf.newGroup();
        Group currentDefinedGroup = rootDefined;
        Group currentUndefinedGroup = rootUndefined;
        for (int j = 0; j < 6; j++) {
            currentDefinedGroup = currentDefinedGroup.addGroup(0);
            if (i < j) {
                currentUndefinedGroup = currentUndefinedGroup.addGroup(0);
            }
        }
        currentDefinedGroup.add(0, Binary.fromString("foo"));
        groups.add(rootDefined);
        groups.add(rootUndefined);
        testSchema(groupSchema, groups);
    }
}
Also used : Group(org.apache.parquet.example.data.Group) PrimitiveType(org.apache.parquet.schema.PrimitiveType) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) GroupType(org.apache.parquet.schema.GroupType) ArrayList(java.util.ArrayList) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) GroupFactory(org.apache.parquet.example.data.GroupFactory) PrimitiveType(org.apache.parquet.schema.PrimitiveType) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Aggregations

Type (org.apache.parquet.schema.Type)78 GroupType (org.apache.parquet.schema.GroupType)67 MessageType (org.apache.parquet.schema.MessageType)62 OriginalType (org.apache.parquet.schema.OriginalType)39 PrimitiveType (org.apache.parquet.schema.PrimitiveType)34 ArrayList (java.util.ArrayList)24 SchemaPath (org.apache.drill.common.expression.SchemaPath)10 HashMap (java.util.HashMap)9 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)9 PathSegment (org.apache.drill.common.expression.PathSegment)8 Converter (org.apache.parquet.io.api.Converter)6 GroupConverter (org.apache.parquet.io.api.GroupConverter)6 MinorType (org.apache.drill.common.types.TypeProtos.MinorType)5 MaterializedField (org.apache.drill.exec.record.MaterializedField)5 Collection (java.util.Collection)4 List (java.util.List)4 Function (java.util.function.Function)4 LogicalType (org.apache.avro.LogicalType)4 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)4 ExecConstants (org.apache.drill.exec.ExecConstants)4