use of org.apache.drill.common.expression.PathSegment in project drill by axbaretto.
the class FieldPathHelper method schemaPathToFieldPath.
/**
* Returns {@link FieldPath} equivalent of the specified {@link SchemaPath}.
*
* @param schemaPath {@link SchemaPath} instance that should be converted
* @return {@link FieldPath} equivalent of the specified {@link SchemaPath}.
*/
public static FieldPath schemaPathToFieldPath(SchemaPath schemaPath) {
Deque<PathSegment> pathSegments = Queues.newArrayDeque();
PathSegment pathSegment = schemaPath.getRootSegment();
while (pathSegment != null) {
pathSegments.push(pathSegment);
pathSegment = pathSegment.getChild();
}
FieldSegment child = null;
while (!pathSegments.isEmpty()) {
pathSegment = pathSegments.pop();
if (pathSegment.isNamed()) {
child = new FieldSegment.NameSegment(((PathSegment.NameSegment) pathSegment).getPath(), child, false);
} else {
child = new FieldSegment.IndexSegment(String.valueOf(((PathSegment.ArraySegment) pathSegment).getIndex()), child);
}
}
return new FieldPath((FieldSegment.NameSegment) child);
}
use of org.apache.drill.common.expression.PathSegment in project drill by axbaretto.
the class DrillParquetReader method getProjection.
public static MessageType getProjection(MessageType schema, Collection<SchemaPath> columns, List<SchemaPath> columnsNotFound) {
MessageType projection = null;
String messageName = schema.getName();
List<ColumnDescriptor> schemaColumns = schema.getColumns();
// parquet type.union() seems to lose ConvertedType info when merging two columns that are the same type. This can
// happen when selecting two elements from an array. So to work around this, we use set of SchemaPath to avoid duplicates
// and then merge the types at the end
Set<SchemaPath> selectedSchemaPaths = Sets.newLinkedHashSet();
// get a list of modified columns which have the array elements removed from the schema path since parquet schema doesn't include array elements
List<SchemaPath> modifiedColumns = Lists.newLinkedList();
for (SchemaPath path : columns) {
List<String> segments = Lists.newArrayList();
PathSegment seg = path.getRootSegment();
do {
if (seg.isNamed()) {
segments.add(seg.getNameSegment().getPath());
}
} while ((seg = seg.getChild()) != null);
String[] pathSegments = new String[segments.size()];
segments.toArray(pathSegments);
SchemaPath modifiedSchemaPath = SchemaPath.getCompoundPath(pathSegments);
modifiedColumns.add(modifiedSchemaPath);
}
// convert the columns in the parquet schema to a list of SchemaPath columns so that they can be compared in case insensitive manner
// to the projection columns
List<SchemaPath> schemaPaths = Lists.newLinkedList();
for (ColumnDescriptor columnDescriptor : schemaColumns) {
String[] schemaColDesc = Arrays.copyOf(columnDescriptor.getPath(), columnDescriptor.getPath().length);
SchemaPath schemaPath = SchemaPath.getCompoundPath(schemaColDesc);
schemaPaths.add(schemaPath);
}
// loop through projection columns and add any columns that are missing from parquet schema to columnsNotFound list
for (SchemaPath columnPath : modifiedColumns) {
boolean notFound = true;
for (SchemaPath schemaPath : schemaPaths) {
if (schemaPath.contains(columnPath)) {
selectedSchemaPaths.add(schemaPath);
notFound = false;
}
}
if (notFound) {
columnsNotFound.add(columnPath);
}
}
// convert SchemaPaths from selectedSchemaPaths and convert to parquet type, and merge into projection schema
for (SchemaPath schemaPath : selectedSchemaPaths) {
List<String> segments = Lists.newArrayList();
PathSegment seg = schemaPath.getRootSegment();
do {
segments.add(seg.getNameSegment().getPath());
} while ((seg = seg.getChild()) != null);
String[] pathSegments = new String[segments.size()];
segments.toArray(pathSegments);
Type t = getType(pathSegments, 0, schema);
if (projection == null) {
projection = new MessageType(messageName, t);
} else {
projection = projection.union(new MessageType(messageName, t));
}
}
return projection;
}
Aggregations