use of org.apache.parquet.thrift.struct.ThriftField in project parquet-mr by apache.
the class ThriftSchemaConvertVisitor method visit.
@Override
public ConvertedField visit(MapType mapType, State state) {
ThriftField keyField = mapType.getKey();
ThriftField valueField = mapType.getValue();
State keyState = new State(state.path.push(keyField), REQUIRED, "key");
// TODO: This is a bug! this should be REQUIRED but changing this will
// break the the schema compatibility check against old data
// Thrift does not support null / missing map values.
State valueState = new State(state.path.push(valueField), OPTIONAL, "value");
ConvertedField convertedKey = keyField.getType().accept(this, keyState);
ConvertedField convertedValue = valueField.getType().accept(this, valueState);
if (!convertedKey.isKeep()) {
if (convertedValue.isKeep()) {
throw new ThriftProjectionException("Cannot select only the values of a map, you must keep the keys as well: " + state.path);
}
// neither key nor value was requested
return new Drop(state.path);
}
// NOTE: doProjections prevents us from infinite recursion here.
if (doProjection) {
ConvertedField fullConvKey = keyField.getType().accept(new ThriftSchemaConvertVisitor(FieldProjectionFilter.ALL_COLUMNS, false, keepOneOfEachUnion), keyState);
if (!fullConvKey.asKeep().getType().equals(convertedKey.asKeep().getType())) {
throw new ThriftProjectionException("Cannot select only a subset of the fields in a map key, " + "for path " + state.path);
}
}
if (convertedValue.isKeep()) {
// keep both key and value
Type mapField = mapType(state.repetition, state.name, convertedKey.asKeep().getType(), convertedValue.asKeep().getType());
return new Keep(state.path, mapField);
}
// keep only the key, not the value
ConvertedField sentinelValue = valueField.getType().accept(new ThriftSchemaConvertVisitor(new KeepOnlyFirstPrimitiveFilter(), true, keepOneOfEachUnion), valueState);
Type mapField = mapType(state.repetition, state.name, convertedKey.asKeep().getType(), // signals to mapType method to project the value
sentinelValue.asKeep().getType());
return new Keep(state.path, mapField);
}
use of org.apache.parquet.thrift.struct.ThriftField in project parquet-mr by apache.
the class ThriftSchemaConvertVisitor method visit.
@Override
public ConvertedField visit(StructType structType, State state) {
// special care is taken when converting unions,
// because we are actually both converting + projecting in
// one pass, and unions need special handling when projecting.
final boolean needsToKeepOneOfEachUnion = keepOneOfEachUnion && isUnion(structType.getStructOrUnionType());
boolean hasSentinelUnionColumns = false;
boolean hasNonSentinelUnionColumns = false;
List<Type> convertedChildren = new ArrayList<Type>();
for (ThriftField child : structType.getChildren()) {
State childState = new State(state.path.push(child), getRepetition(child), child.getName());
ConvertedField converted = child.getType().accept(this, childState);
if (!converted.isKeep() && needsToKeepOneOfEachUnion) {
// user is not keeping this "kind" of union, but we still need
// to keep at least one of the primitives of this union around.
// in order to know what "kind" of union each record is.
// TODO: in the future, we should just filter these records out instead
// re-do the recursion, with a new projection filter that keeps only
// the first primitive it encounters
ConvertedField firstPrimitive = child.getType().accept(new ThriftSchemaConvertVisitor(new KeepOnlyFirstPrimitiveFilter(), true, keepOneOfEachUnion), childState);
convertedChildren.add(firstPrimitive.asKeep().getType().withId(child.getFieldId()));
hasSentinelUnionColumns = true;
}
if (converted.isSentinelUnion()) {
// child field is a sentinel union that we should drop if possible
if (childState.repetition == REQUIRED) {
// but this field is required, so we may still need it
convertedChildren.add(converted.asSentinelUnion().getType().withId(child.getFieldId()));
hasSentinelUnionColumns = true;
}
} else if (converted.isKeep()) {
// user has selected this column, so we keep it.
convertedChildren.add(converted.asKeep().getType().withId(child.getFieldId()));
hasNonSentinelUnionColumns = true;
}
}
if (!hasNonSentinelUnionColumns && hasSentinelUnionColumns) {
// we may not be able to, so tag this as a sentinel.
return new SentinelUnion(state.path, new GroupType(state.repetition, state.name, convertedChildren));
}
if (hasNonSentinelUnionColumns) {
// user requested some of the fields of this struct, so we keep the struct
return new Keep(state.path, new GroupType(state.repetition, state.name, convertedChildren));
} else {
// user requested none of the fields of this struct, so we drop it
return new Drop(state.path);
}
}
use of org.apache.parquet.thrift.struct.ThriftField in project parquet-mr by apache.
the class DefaultEventsVisitor method visit.
@Override
public Void visit(ThriftType.StructType structType, Void v) {
dummyEvents.add(new StructBeginProtocol("struct"));
List<ThriftField> children = structType.getChildren();
for (ThriftField child : children) {
dummyEvents.add(new ReadFieldBeginProtocol(child));
// currently will create all the attributes in struct, it's safer
child.getType().accept(this, null);
dummyEvents.add(DefaultProtocolEventsGenerator.READ_FIELD_END);
}
dummyEvents.add(DefaultProtocolEventsGenerator.READ_FIELD_STOP);
dummyEvents.add(DefaultProtocolEventsGenerator.READ_STRUCT_END);
return null;
}
use of org.apache.parquet.thrift.struct.ThriftField in project parquet-mr by apache.
the class BufferedProtocolReadToWrite method readOneStruct.
private boolean readOneStruct(TProtocol in, List<Action> buffer, StructType type) throws TException {
final TStruct struct = in.readStructBegin();
buffer.add(new Action() {
@Override
public void write(TProtocol out) throws TException {
out.writeStructBegin(struct);
}
@Override
public String toDebugString() {
return "(";
}
});
TField field;
boolean hasFieldsIgnored = false;
int childFieldsPresent = 0;
while ((field = in.readFieldBegin()).type != TType.STOP) {
final TField currentField = field;
ThriftField expectedField;
if ((expectedField = type.getChildById(field.id)) == null) {
handleUnrecognizedField(field, type, in);
hasFieldsIgnored |= true;
continue;
}
childFieldsPresent++;
buffer.add(new Action() {
@Override
public void write(TProtocol out) throws TException {
out.writeFieldBegin(currentField);
}
@Override
public String toDebugString() {
return "f=" + currentField.id + "<t=" + typeName(currentField.type) + ">: ";
}
});
hasFieldsIgnored |= readOneValue(in, field.type, buffer, expectedField.getType());
in.readFieldEnd();
buffer.add(FIELD_END);
}
// check that union had exactly 1 (no more no less) child fields.
assertUnionHasExactlyOneChild(type, childFieldsPresent);
in.readStructEnd();
buffer.add(STRUCT_END);
return hasFieldsIgnored;
}
use of org.apache.parquet.thrift.struct.ThriftField in project parquet-mr by apache.
the class FieldsPath method toDelimitedString.
public String toDelimitedString(String delim) {
StringBuilder delimited = new StringBuilder();
for (int i = 0; i < fields.size(); i++) {
ThriftField currentField = fields.get(i);
if (i > 0) {
ThriftField previousField = fields.get(i - 1);
if (FieldsPath.isKeyFieldOfMap(currentField, previousField)) {
delimited.append("key");
delimited.append(delim);
continue;
} else if (FieldsPath.isValueFieldOfMap(currentField, previousField)) {
delimited.append("value");
delimited.append(delim);
continue;
}
}
delimited.append(currentField.getName()).append(delim);
}
if (delimited.length() == 0) {
return "";
} else {
return delimited.substring(0, delimited.length() - 1);
}
}
Aggregations