use of org.apache.parquet.schema.Type in project parquet-mr by apache.
the class AvroWriteSupport method writeMap.
private <V> void writeMap(GroupType schema, Schema avroSchema, Map<CharSequence, V> map) {
GroupType innerGroup = schema.getType(0).asGroupType();
Type keyType = innerGroup.getType(0);
Type valueType = innerGroup.getType(1);
// group wrapper (original type MAP)
recordConsumer.startGroup();
if (map.size() > 0) {
recordConsumer.startField(MAP_REPEATED_NAME, 0);
for (Map.Entry<CharSequence, V> entry : map.entrySet()) {
// repeated group key_value, middle layer
recordConsumer.startGroup();
recordConsumer.startField(MAP_KEY_NAME, 0);
writeValue(keyType, MAP_KEY_SCHEMA, entry.getKey());
recordConsumer.endField(MAP_KEY_NAME, 0);
V value = entry.getValue();
if (value != null) {
recordConsumer.startField(MAP_VALUE_NAME, 1);
writeValue(valueType, avroSchema.getValueType(), value);
recordConsumer.endField(MAP_VALUE_NAME, 1);
} else if (!valueType.isRepetition(Type.Repetition.OPTIONAL)) {
throw new RuntimeException("Null map value for " + avroSchema.getName());
}
recordConsumer.endGroup();
}
recordConsumer.endField(MAP_REPEATED_NAME, 0);
}
recordConsumer.endGroup();
}
use of org.apache.parquet.schema.Type in project parquet-mr by apache.
the class AvroWriteSupport method writeRecordFields.
private void writeRecordFields(GroupType schema, Schema avroSchema, Object record) {
List<Type> fields = schema.getFields();
List<Schema.Field> avroFields = avroSchema.getFields();
// parquet ignores Avro nulls, so index may differ
int index = 0;
for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) {
Schema.Field avroField = avroFields.get(avroIndex);
if (avroField.schema().getType().equals(Schema.Type.NULL)) {
continue;
}
Type fieldType = fields.get(index);
Object value = model.getField(record, avroField.name(), avroIndex);
if (value != null) {
recordConsumer.startField(fieldType.getName(), index);
writeValue(fieldType, avroField.schema(), value);
recordConsumer.endField(fieldType.getName(), index);
} else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
throw new RuntimeException("Null-value for required field: " + avroField.name());
}
index++;
}
}
use of org.apache.parquet.schema.Type in project parquet-mr by apache.
the class ValidatingRecordConsumer method validate.
private void validate(PrimitiveTypeName p) {
Type currentType = types.peek().asGroupType().getType(fields.peek());
int c = fieldValueCount.pop() + 1;
fieldValueCount.push(c);
LOG.debug("validate {} for {}", p, currentType.getName());
switch(currentType.getRepetition()) {
case OPTIONAL:
case REQUIRED:
if (c > 1) {
throw new InvalidRecordException("repeated value when the type is not repeated in " + currentType);
}
break;
case REPEATED:
break;
default:
throw new InvalidRecordException("unknown repetition " + currentType.getRepetition() + " in " + currentType);
}
if (!currentType.isPrimitive() || currentType.asPrimitiveType().getPrimitiveTypeName() != p) {
throw new InvalidRecordException("expected type " + p + " but got " + currentType);
}
}
use of org.apache.parquet.schema.Type in project parquet-mr by apache.
the class PigSchemaConverter method convertFields.
private Schema convertFields(List<Type> parquetFields) {
List<FieldSchema> fields = new ArrayList<Schema.FieldSchema>();
for (Type parquetType : parquetFields) {
try {
FieldSchema innerfieldSchema = getFieldSchema(parquetType);
if (parquetType.isRepetition(Repetition.REPEATED)) {
Schema bagSchema = new Schema(Arrays.asList(innerfieldSchema));
fields.add(new FieldSchema(null, bagSchema, DataType.BAG));
} else {
fields.add(innerfieldSchema);
}
} catch (FrontendException fe) {
throw new SchemaConversionException("can't convert " + parquetType, fe);
}
}
return new Schema(fields);
}
use of org.apache.parquet.schema.Type in project parquet-mr by apache.
the class ValidatingColumnWriteStore method testOptionalRequiredInteraction.
@Test
public void testOptionalRequiredInteraction() {
for (int i = 0; i < 6; i++) {
Type current = new PrimitiveType(Repetition.REQUIRED, PrimitiveTypeName.BINARY, "primitive");
for (int j = 0; j < i; j++) {
current = new GroupType(Repetition.REQUIRED, "req" + j, current);
}
MessageType groupSchema = new MessageType("schema" + i, current);
GroupFactory gf = new SimpleGroupFactory(groupSchema);
List<Group> groups = new ArrayList<Group>();
Group root = gf.newGroup();
Group currentGroup = root;
for (int j = 0; j < i; j++) {
currentGroup = currentGroup.addGroup(0);
}
currentGroup.add(0, Binary.fromString("foo"));
groups.add(root);
testSchema(groupSchema, groups);
}
for (int i = 0; i < 6; i++) {
Type current = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "primitive");
for (int j = 0; j < i; j++) {
current = new GroupType(Repetition.REQUIRED, "req" + j, current);
}
MessageType groupSchema = new MessageType("schema" + (i + 6), current);
GroupFactory gf = new SimpleGroupFactory(groupSchema);
List<Group> groups = new ArrayList<Group>();
Group rootDefined = gf.newGroup();
Group rootUndefined = gf.newGroup();
Group currentDefinedGroup = rootDefined;
Group currentUndefinedGroup = rootUndefined;
for (int j = 0; j < i; j++) {
currentDefinedGroup = currentDefinedGroup.addGroup(0);
currentUndefinedGroup = currentUndefinedGroup.addGroup(0);
}
currentDefinedGroup.add(0, Binary.fromString("foo"));
groups.add(rootDefined);
groups.add(rootUndefined);
testSchema(groupSchema, groups);
}
for (int i = 0; i < 6; i++) {
Type current = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "primitive");
for (int j = 0; j < 6; j++) {
current = new GroupType(i == j ? Repetition.OPTIONAL : Repetition.REQUIRED, "req" + j, current);
}
MessageType groupSchema = new MessageType("schema" + (i + 12), current);
GroupFactory gf = new SimpleGroupFactory(groupSchema);
List<Group> groups = new ArrayList<Group>();
Group rootDefined = gf.newGroup();
Group rootUndefined = gf.newGroup();
Group currentDefinedGroup = rootDefined;
Group currentUndefinedGroup = rootUndefined;
for (int j = 0; j < 6; j++) {
currentDefinedGroup = currentDefinedGroup.addGroup(0);
if (i < j) {
currentUndefinedGroup = currentUndefinedGroup.addGroup(0);
}
}
currentDefinedGroup.add(0, Binary.fromString("foo"));
groups.add(rootDefined);
groups.add(rootUndefined);
testSchema(groupSchema, groups);
}
}
Aggregations