Search in sources :

Example 16 with PathSegment

use of org.apache.drill.common.expression.PathSegment in project drill by apache.

the class DrillParquetReader method convertSelectedColumnsToMessageType.

/**
 * Convert SchemaPaths from selectedSchemaPaths and convert to parquet type, and merge into projection schema.
 *
 * @param schema Parquet file schema
 * @param selectedSchemaPaths columns found in schema
 * @return projection schema
 */
private static MessageType convertSelectedColumnsToMessageType(MessageType schema, Set<SchemaPath> selectedSchemaPaths) {
    MessageType projection = null;
    String messageName = schema.getName();
    for (SchemaPath schemaPath : selectedSchemaPaths) {
        List<String> segments = new ArrayList<>();
        PathSegment seg = schemaPath.getRootSegment();
        do {
            segments.add(seg.getNameSegment().getPath());
        } while ((seg = seg.getChild()) != null);
        String[] pathSegments = new String[segments.size()];
        segments.toArray(pathSegments);
        Type t = getSegmentType(pathSegments, 0, schema);
        if (projection == null) {
            projection = new MessageType(messageName, t);
        } else {
            projection = projection.union(new MessageType(messageName, t));
        }
    }
    return projection;
}
Also used : GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) SchemaPath(org.apache.drill.common.expression.SchemaPath) ArrayList(java.util.ArrayList) PathSegment(org.apache.drill.common.expression.PathSegment) MessageType(org.apache.parquet.schema.MessageType)

Example 17 with PathSegment

use of org.apache.drill.common.expression.PathSegment in project drill by apache.

the class PrelUtil method getColumns.

public static ProjectPushInfo getColumns(RelDataType rowType, List<RexNode> projects) {
    final List<String> fieldNames = rowType.getFieldNames();
    if (fieldNames.isEmpty()) {
        return null;
    }
    RefFieldsVisitor v = new RefFieldsVisitor(rowType);
    for (RexNode exp : projects) {
        PathSegment segment = exp.accept(v);
        v.addColumn(segment);
    }
    return v.getInfo();
}
Also used : PathSegment(org.apache.drill.common.expression.PathSegment) RexNode(org.apache.calcite.rex.RexNode)

Example 18 with PathSegment

use of org.apache.drill.common.expression.PathSegment in project drill by axbaretto.

the class DrillPushProjectIntoScanRule method getFieldsInformation.

private ProjectPushInfo getFieldsInformation(RelDataType rowType, List<RexNode> projects) {
    ProjectFieldsVisitor fieldsVisitor = new ProjectFieldsVisitor(rowType);
    for (RexNode exp : projects) {
        PathSegment segment = exp.accept(fieldsVisitor);
        fieldsVisitor.addField(segment);
    }
    return fieldsVisitor.getInfo();
}
Also used : PathSegment(org.apache.drill.common.expression.PathSegment) RexNode(org.apache.calcite.rex.RexNode)

Example 19 with PathSegment

use of org.apache.drill.common.expression.PathSegment in project drill by axbaretto.

the class JsonReaderUtils method ensureAtLeastOneField.

public static void ensureAtLeastOneField(BaseWriter.ComplexWriter writer, Collection<SchemaPath> columns, boolean allTextMode, List<BaseWriter.ListWriter> emptyArrayWriters) {
    List<BaseWriter.MapWriter> writerList = Lists.newArrayList();
    List<PathSegment> fieldPathList = Lists.newArrayList();
    BitSet emptyStatus = new BitSet(columns.size());
    int i = 0;
    // first pass: collect which fields are empty
    for (SchemaPath sp : columns) {
        PathSegment fieldPath = sp.getRootSegment();
        BaseWriter.MapWriter fieldWriter = writer.rootAsMap();
        while (fieldPath.getChild() != null && !fieldPath.getChild().isArray()) {
            fieldWriter = fieldWriter.map(fieldPath.getNameSegment().getPath());
            fieldPath = fieldPath.getChild();
        }
        writerList.add(fieldWriter);
        fieldPathList.add(fieldPath);
        if (fieldWriter.isEmptyMap()) {
            emptyStatus.set(i, true);
        }
        if (i == 0 && !allTextMode) {
            // is necessary in order to avoid schema change exceptions by downstream operators.
            break;
        }
        i++;
    }
    // independently, so we rely on the emptyStatus.
    for (int j = 0; j < fieldPathList.size(); j++) {
        BaseWriter.MapWriter fieldWriter = writerList.get(j);
        PathSegment fieldPath = fieldPathList.get(j);
        if (emptyStatus.get(j)) {
            if (allTextMode) {
                fieldWriter.varChar(fieldPath.getNameSegment().getPath());
            } else {
                fieldWriter.integer(fieldPath.getNameSegment().getPath());
            }
        }
    }
    for (BaseWriter.ListWriter field : emptyArrayWriters) {
        // checks that array has not been initialized
        if (field.getValueCapacity() == 0) {
            if (allTextMode) {
                field.varChar();
            } else {
                field.integer();
            }
        }
    }
}
Also used : BaseWriter(org.apache.drill.exec.vector.complex.writer.BaseWriter) SchemaPath(org.apache.drill.common.expression.SchemaPath) BitSet(java.util.BitSet) PathSegment(org.apache.drill.common.expression.PathSegment)

Example 20 with PathSegment

use of org.apache.drill.common.expression.PathSegment in project drill by axbaretto.

the class HBaseRecordReader method transformColumns.

/**
 * Provides the projected columns information to the Hbase Scan instance. If the
 * projected columns list contains a column family and also a column in the
 * column family, only the column family is passed to the Scan instance.
 *
 * For example, if the projection list is {cf1, cf1.col1, cf2.col1} then we only
 * pass {cf1, cf2.col1} to the Scan instance.
 *
 * @param columns collection of projected columns
 * @return collection of projected column family names
 */
@Override
protected Collection<SchemaPath> transformColumns(Collection<SchemaPath> columns) {
    Set<SchemaPath> transformed = Sets.newLinkedHashSet();
    completeFamilies = Sets.newHashSet();
    rowKeyOnly = true;
    if (!isStarQuery()) {
        for (SchemaPath column : columns) {
            if (column.getRootSegment().getPath().equalsIgnoreCase(ROW_KEY)) {
                transformed.add(ROW_KEY_PATH);
                continue;
            }
            rowKeyOnly = false;
            NameSegment root = column.getRootSegment();
            byte[] family = root.getPath().getBytes();
            transformed.add(SchemaPath.getSimplePath(root.getPath()));
            PathSegment child = root.getChild();
            if (child != null && child.isNamed()) {
                byte[] qualifier = child.getNameSegment().getPath().getBytes();
                hbaseScanColumnsOnly.addColumn(family, qualifier);
                if (!completeFamilies.contains(root.getPath())) {
                    hbaseScan.addColumn(family, qualifier);
                }
            } else {
                hbaseScan.addFamily(family);
                completeFamilies.add(root.getPath());
            }
        }
        /* if only the row key was requested, add a FirstKeyOnlyFilter to the scan
       * to fetch only one KV from each row. If a filter is already part of this
       * scan, add the FirstKeyOnlyFilter as the LAST filter of a MUST_PASS_ALL
       * FilterList.
       */
        if (rowKeyOnly) {
            hbaseScan.setFilter(HBaseUtils.andFilterAtIndex(hbaseScan.getFilter(), HBaseUtils.LAST_FILTER, new FirstKeyOnlyFilter()));
        }
    } else {
        rowKeyOnly = false;
        transformed.add(ROW_KEY_PATH);
    }
    return transformed;
}
Also used : NameSegment(org.apache.drill.common.expression.PathSegment.NameSegment) SchemaPath(org.apache.drill.common.expression.SchemaPath) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) PathSegment(org.apache.drill.common.expression.PathSegment)

Aggregations

PathSegment (org.apache.drill.common.expression.PathSegment)32 SchemaPath (org.apache.drill.common.expression.SchemaPath)14 BaseWriter (org.apache.drill.exec.vector.complex.writer.BaseWriter)6 FieldSegment (org.ojai.FieldSegment)6 ArrayList (java.util.ArrayList)5 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)5 GroupType (org.apache.parquet.schema.GroupType)5 Type (org.apache.parquet.schema.Type)5 FieldPath (org.ojai.FieldPath)5 Stack (java.util.Stack)4 RexNode (org.apache.calcite.rex.RexNode)4 MessageType (org.apache.parquet.schema.MessageType)4 BitSet (java.util.BitSet)3 ValueVector (org.apache.drill.exec.vector.ValueVector)3 NameSegment (org.apache.drill.common.expression.PathSegment.NameSegment)2 MinorType (org.apache.drill.common.types.TypeProtos.MinorType)2 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)2 ColumnMetadata (org.apache.drill.exec.record.metadata.ColumnMetadata)2 ListWriter (org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter)2 MapWriter (org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter)2