use of org.apache.parquet.thrift.projection.ThriftProjectionException in project parquet-mr by apache.
the class ThriftSchemaConvertVisitor method visit.
@Override
public ConvertedField visit(MapType mapType, State state) {
ThriftField keyField = mapType.getKey();
ThriftField valueField = mapType.getValue();
State keyState = new State(state.path.push(keyField), REQUIRED, "key");
// TODO: This is a bug! this should be REQUIRED but changing this will
// break the the schema compatibility check against old data
// Thrift does not support null / missing map values.
State valueState = new State(state.path.push(valueField), OPTIONAL, "value");
ConvertedField convertedKey = keyField.getType().accept(this, keyState);
ConvertedField convertedValue = valueField.getType().accept(this, valueState);
if (!convertedKey.isKeep()) {
if (convertedValue.isKeep()) {
throw new ThriftProjectionException("Cannot select only the values of a map, you must keep the keys as well: " + state.path);
}
// neither key nor value was requested
return new Drop(state.path);
}
// NOTE: doProjections prevents us from infinite recursion here.
if (doProjection) {
ConvertedField fullConvKey = keyField.getType().accept(new ThriftSchemaConvertVisitor(FieldProjectionFilter.ALL_COLUMNS, false, keepOneOfEachUnion), keyState);
if (!fullConvKey.asKeep().getType().equals(convertedKey.asKeep().getType())) {
throw new ThriftProjectionException("Cannot select only a subset of the fields in a map key, " + "for path " + state.path);
}
}
if (convertedValue.isKeep()) {
// keep both key and value
Type mapField = mapType(state.repetition, state.name, convertedKey.asKeep().getType(), convertedValue.asKeep().getType());
return new Keep(state.path, mapField);
}
// keep only the key, not the value
ConvertedField sentinelValue = valueField.getType().accept(new ThriftSchemaConvertVisitor(new KeepOnlyFirstPrimitiveFilter(), true, keepOneOfEachUnion), valueState);
Type mapField = mapType(state.repetition, state.name, convertedKey.asKeep().getType(), // signals to mapType method to project the value
sentinelValue.asKeep().getType());
return new Keep(state.path, mapField);
}
use of org.apache.parquet.thrift.projection.ThriftProjectionException in project parquet-mr by apache.
the class ThriftReadSupport method init.
@Override
public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(InitContext context) {
final Configuration configuration = context.getConfiguration();
final MessageType fileMessageType = context.getFileSchema();
MessageType requestedProjection = fileMessageType;
String partialSchemaString = configuration.get(ReadSupport.PARQUET_READ_SCHEMA);
FieldProjectionFilter projectionFilter = getFieldProjectionFilter(configuration);
if (partialSchemaString != null && projectionFilter != null) {
throw new ThriftProjectionException(String.format("You cannot provide both a partial schema and field projection filter." + "Only one of (%s, %s, %s) should be set.", PARQUET_READ_SCHEMA, STRICT_THRIFT_COLUMN_FILTER_KEY, THRIFT_COLUMN_FILTER_KEY));
}
// set requestedProjections only when it's specified
if (partialSchemaString != null) {
requestedProjection = getSchemaForRead(fileMessageType, partialSchemaString);
} else if (projectionFilter != null) {
try {
initThriftClassFromMultipleFiles(context.getKeyValueMetadata(), configuration);
requestedProjection = getProjectedSchema(projectionFilter);
} catch (ClassNotFoundException e) {
throw new ThriftProjectionException("can not find thriftClass from configuration", e);
}
}
MessageType schemaForRead = getSchemaForRead(fileMessageType, requestedProjection);
return new ReadContext(schemaForRead);
}
use of org.apache.parquet.thrift.projection.ThriftProjectionException in project parquet-mr by apache.
the class ThriftSchemaConvertVisitor method convert.
public static MessageType convert(StructType struct, FieldProjectionFilter filter, boolean keepOneOfEachUnion) {
State state = new State(new FieldsPath(), REPEATED, "ParquetSchema");
ConvertedField converted = struct.accept(new ThriftSchemaConvertVisitor(filter, true, keepOneOfEachUnion), state);
if (!converted.isKeep()) {
throw new ThriftProjectionException("No columns have been selected");
}
return new MessageType(state.name, converted.asKeep().getType().asGroupType().getFields());
}
Aggregations