use of org.apache.parquet.thrift.projection.FieldProjectionFilter in project parquet-mr by apache.
the class ThriftReadSupport method init.
@Override
public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(InitContext context) {
final Configuration configuration = context.getConfiguration();
final MessageType fileMessageType = context.getFileSchema();
MessageType requestedProjection = fileMessageType;
String partialSchemaString = configuration.get(ReadSupport.PARQUET_READ_SCHEMA);
FieldProjectionFilter projectionFilter = getFieldProjectionFilter(configuration);
if (partialSchemaString != null && projectionFilter != null) {
throw new ThriftProjectionException(String.format("You cannot provide both a partial schema and field projection filter." + "Only one of (%s, %s, %s) should be set.", PARQUET_READ_SCHEMA, STRICT_THRIFT_COLUMN_FILTER_KEY, THRIFT_COLUMN_FILTER_KEY));
}
// set requestedProjections only when it's specified
if (partialSchemaString != null) {
requestedProjection = getSchemaForRead(fileMessageType, partialSchemaString);
} else if (projectionFilter != null) {
try {
initThriftClassFromMultipleFiles(context.getKeyValueMetadata(), configuration);
requestedProjection = getProjectedSchema(projectionFilter);
} catch (ClassNotFoundException e) {
throw new ThriftProjectionException("can not find thriftClass from configuration", e);
}
}
MessageType schemaForRead = getSchemaForRead(fileMessageType, requestedProjection);
return new ReadContext(schemaForRead);
}
Aggregations