use of org.apache.parquet.thrift.struct.ThriftType.MapType in project parquet-mr by apache.
the class ThriftSchemaConvertVisitor method visit.
@Override
public ConvertedField visit(MapType mapType, State state) {
ThriftField keyField = mapType.getKey();
ThriftField valueField = mapType.getValue();
State keyState = new State(state.path.push(keyField), REQUIRED, "key");
// TODO: This is a bug! this should be REQUIRED but changing this will
// break the the schema compatibility check against old data
// Thrift does not support null / missing map values.
State valueState = new State(state.path.push(valueField), OPTIONAL, "value");
ConvertedField convertedKey = keyField.getType().accept(this, keyState);
ConvertedField convertedValue = valueField.getType().accept(this, valueState);
if (!convertedKey.isKeep()) {
if (convertedValue.isKeep()) {
throw new ThriftProjectionException("Cannot select only the values of a map, you must keep the keys as well: " + state.path);
}
// neither key nor value was requested
return new Drop(state.path);
}
// NOTE: doProjections prevents us from infinite recursion here.
if (doProjection) {
ConvertedField fullConvKey = keyField.getType().accept(new ThriftSchemaConvertVisitor(FieldProjectionFilter.ALL_COLUMNS, false, keepOneOfEachUnion), keyState);
if (!fullConvKey.asKeep().getType().equals(convertedKey.asKeep().getType())) {
throw new ThriftProjectionException("Cannot select only a subset of the fields in a map key, " + "for path " + state.path);
}
}
if (convertedValue.isKeep()) {
// keep both key and value
Type mapField = mapType(state.repetition, state.name, convertedKey.asKeep().getType(), convertedValue.asKeep().getType());
return new Keep(state.path, mapField);
}
// keep only the key, not the value
ConvertedField sentinelValue = valueField.getType().accept(new ThriftSchemaConvertVisitor(new KeepOnlyFirstPrimitiveFilter(), true, keepOneOfEachUnion), valueState);
Type mapField = mapType(state.repetition, state.name, convertedKey.asKeep().getType(), // signals to mapType method to project the value
sentinelValue.asKeep().getType());
return new Keep(state.path, mapField);
}
use of org.apache.parquet.thrift.struct.ThriftType.MapType in project parquet-mr by apache.
the class ParquetWriteProtocol method getProtocol.
private TProtocol getProtocol(ThriftField field, ColumnIO columnIO, Events returnClause) {
TProtocol p;
final ThriftType type = field.getType();
switch(type.getType()) {
case STOP:
case VOID:
default:
throw new UnsupportedOperationException("can't convert type of " + field);
case BOOL:
case BYTE:
case DOUBLE:
case I16:
case I32:
case I64:
case STRING:
p = new PrimitiveWriteProtocol((PrimitiveColumnIO) columnIO, returnClause);
break;
case STRUCT:
p = new StructWriteProtocol((GroupColumnIO) columnIO, (StructType) type, returnClause);
break;
case MAP:
p = new MapWriteProtocol((GroupColumnIO) columnIO, (MapType) type, returnClause);
break;
case SET:
p = new ListWriteProtocol((GroupColumnIO) columnIO, ((SetType) type).getValues(), returnClause);
break;
case LIST:
p = new ListWriteProtocol((GroupColumnIO) columnIO, ((ListType) type).getValues(), returnClause);
break;
case ENUM:
p = new EnumWriteProtocol((PrimitiveColumnIO) columnIO, (EnumType) type, returnClause);
break;
}
return p;
}
use of org.apache.parquet.thrift.struct.ThriftType.MapType in project parquet-mr by apache.
the class BufferedProtocolReadToWrite method readOneValue.
/**
* @return true when all value is consumed, false when some values is ignored due to the field is not defined in expectedType
* @throws TException
*/
private boolean readOneValue(TProtocol in, byte type, List<Action> buffer, ThriftType expectedType) throws TException {
if (expectedType != null && expectedType.getType().getSerializedThriftType() != type) {
throw new DecodingSchemaMismatchException("the data type does not match the expected thrift structure: expected " + expectedType + " got " + typeName(type));
}
boolean hasFieldsIgnored = false;
switch(type) {
case TType.LIST:
hasFieldsIgnored = readOneList(in, buffer, (ListType) expectedType);
break;
case TType.MAP:
hasFieldsIgnored = readOneMap(in, buffer, (MapType) expectedType);
break;
case TType.SET:
hasFieldsIgnored = readOneSet(in, buffer, (SetType) expectedType);
break;
case TType.STRUCT:
hasFieldsIgnored = readOneStruct(in, buffer, (StructType) expectedType);
break;
case TType.STOP:
break;
case TType.BOOL:
final boolean bool = in.readBool();
writeBoolAction(buffer, bool);
break;
case TType.BYTE:
final byte b = in.readByte();
writeByteAction(buffer, b);
break;
case TType.DOUBLE:
final double d = in.readDouble();
writeDoubleAction(buffer, d);
break;
case TType.I16:
final short s = in.readI16();
writeShortAction(buffer, s);
break;
// same as i32 => actually never seen in the protocol layer as enums are written as a i32 field
case TType.ENUM:
case TType.I32:
final int i = in.readI32();
checkEnum(expectedType, i);
writeIntAction(buffer, i);
break;
case TType.I64:
final long l = in.readI64();
writeLongAction(buffer, l);
break;
case TType.STRING:
final ByteBuffer bin = in.readBinary();
writeStringAction(buffer, bin);
break;
case TType.VOID:
break;
default:
throw new TException("Unknown type: " + type);
}
return hasFieldsIgnored;
}
Aggregations