use of org.apache.parquet.schema.Type in project druid by druid-io.
the class ParquetGroupConverter method convertField.
/**
* See {@link ParquetGroupConverter#convertField(Group, String)}
*/
@Nullable
private static Object convertField(Group g, String fieldName, boolean binaryAsString) {
if (!g.getType().containsField(fieldName)) {
return null;
}
final int fieldIndex = g.getType().getFieldIndex(fieldName);
if (g.getFieldRepetitionCount(fieldIndex) <= 0) {
return null;
}
Type fieldType = g.getType().getFields().get(fieldIndex);
// primitive field
if (fieldType.isPrimitive()) {
// primitive list
if (fieldType.getRepetition().equals(Type.Repetition.REPEATED)) {
int repeated = g.getFieldRepetitionCount(fieldIndex);
List<Object> vals = new ArrayList<>();
for (int i = 0; i < repeated; i++) {
vals.add(convertPrimitiveField(g, fieldIndex, i, binaryAsString));
}
return vals;
}
return convertPrimitiveField(g, fieldIndex, binaryAsString);
} else {
if (fieldType.isRepetition(Type.Repetition.REPEATED)) {
return convertRepeatedFieldToList(g, fieldIndex, binaryAsString);
}
if (isLogicalMapType(fieldType)) {
return convertLogicalMap(g.getGroup(fieldIndex, 0), binaryAsString);
}
if (isLogicalListType(fieldType)) {
return convertLogicalList(g.getGroup(fieldIndex, 0), binaryAsString);
}
// not a list, but not a primitive, return the nested group type
return g.getGroup(fieldIndex, 0);
}
}
use of org.apache.parquet.schema.Type in project druid by druid-io.
the class ParquetGroupConverter method convertRepeatedFieldToList.
/**
* convert a repeated field into a list of primitives or groups
*/
private static List<Object> convertRepeatedFieldToList(Group g, int fieldIndex, boolean binaryAsString) {
Type t = g.getType().getFields().get(fieldIndex);
assert t.getRepetition().equals(Type.Repetition.REPEATED);
int repeated = g.getFieldRepetitionCount(fieldIndex);
List<Object> vals = new ArrayList<>();
for (int i = 0; i < repeated; i++) {
if (t.isPrimitive()) {
vals.add(convertPrimitiveField(g, fieldIndex, i, binaryAsString));
} else {
vals.add(g.getGroup(fieldIndex, i));
}
}
return vals;
}
use of org.apache.parquet.schema.Type in project drill by apache.
the class DrillParquetReader method adaptColumnsToParquetSchema.
/**
* This method adjusts collection of SchemaPath projection columns to better match columns in given
* schema. It does few things to reach the goal:
* <ul>
* <li>skips ArraySegments if present;</li>
* <li>interrupts further projections for Parquet MAPs to allow EvaluationVisitor manage get by key logic;</li>
* <li>adds additional listName and elementName for logical lists, because they exists in schema but absent in original projection columns.</li>
* </ul>
*
* @param columns original projection columns
* @param schema Parquet file schema
* @return adjusted projection columns
*/
private static List<SchemaPath> adaptColumnsToParquetSchema(Collection<SchemaPath> columns, MessageType schema) {
List<SchemaPath> modifiedColumns = new LinkedList<>();
for (SchemaPath path : columns) {
List<String> segments = new ArrayList<>();
Type segmentType = schema;
for (PathSegment seg = path.getRootSegment(); seg != null; seg = seg.getChild()) {
if (seg.isNamed()) {
segments.add(seg.getNameSegment().getPath());
}
segmentType = getSegmentType(segmentType, seg);
if (segmentType != null && !segmentType.isPrimitive()) {
GroupType segGroupType = segmentType.asGroupType();
if (ParquetReaderUtility.isLogicalMapType(segGroupType)) {
// later as values obtained from dict by key differ from the actual column's path
break;
} else if (ParquetReaderUtility.isLogicalListType(segGroupType)) {
// 'list' or 'bag'
String listName = segGroupType.getType(0).getName();
// 'element' or 'array_element'
String elementName = segGroupType.getType(0).asGroupType().getType(0).getName();
segments.add(listName);
segments.add(elementName);
}
}
}
modifiedColumns.add(SchemaPath.getCompoundPath(segments.toArray(new String[0])));
}
return modifiedColumns;
}
use of org.apache.parquet.schema.Type in project drill by apache.
the class DrillParquetReader method getSegmentType.
private static Type getSegmentType(String[] pathSegments, int depth, MessageType schema) {
int nextDepth = depth + 1;
Type type = schema.getType(Arrays.copyOfRange(pathSegments, 0, nextDepth));
if (nextDepth == pathSegments.length) {
return type;
} else {
Preconditions.checkState(!type.isPrimitive());
return Types.buildGroup(type.getRepetition()).as(type.getOriginalType()).addField(getSegmentType(pathSegments, nextDepth, schema)).named(type.getName());
}
}
use of org.apache.parquet.schema.Type in project drill by apache.
the class DrillParquetReader method getSegmentType.
/**
* Get type from the supplied {@code type} corresponding to given {@code segment}.
*
* @param parentSegmentType type to extract field corresponding to segment
* @param segment segment which type will be returned
* @return type corresponding to the {@code segment} or {@code null} if there is no field found in {@code type}.
*/
private static Type getSegmentType(Type parentSegmentType, PathSegment segment) {
Type segmentType = null;
if (parentSegmentType != null && !parentSegmentType.isPrimitive()) {
GroupType groupType = parentSegmentType.asGroupType();
if (segment.isNamed()) {
String fieldName = segment.getNameSegment().getPath();
segmentType = groupType.getFields().stream().filter(f -> f.getName().equalsIgnoreCase(fieldName)).findAny().map(field -> groupType.getType(field.getName())).orElse(null);
} else if (ParquetReaderUtility.isLogicalListType(parentSegmentType.asGroupType())) {
// the segment is array index
// get element type of the list
segmentType = groupType.getType(0).asGroupType().getType(0);
}
}
return segmentType;
}
Aggregations