Search in sources :

Example 1 with NameSegment

use of org.apache.drill.common.expression.PathSegment.NameSegment in project drill by apache.

the class ProjectRecordBatch method classifyExpr.

private void classifyExpr(final NamedExpression ex, final RecordBatch incoming, final ClassifierResult result) {
    final NameSegment expr = ((SchemaPath) ex.getExpr()).getRootSegment();
    final NameSegment ref = ex.getRef().getRootSegment();
    final boolean exprHasPrefix = expr.getPath().contains(StarColumnHelper.PREFIX_DELIMITER);
    final boolean refHasPrefix = ref.getPath().contains(StarColumnHelper.PREFIX_DELIMITER);
    final boolean exprIsStar = expr.getPath().equals(StarColumnHelper.STAR_COLUMN);
    final boolean refContainsStar = ref.getPath().contains(StarColumnHelper.STAR_COLUMN);
    final boolean exprContainsStar = expr.getPath().contains(StarColumnHelper.STAR_COLUMN);
    final boolean refEndsWithStar = ref.getPath().endsWith(StarColumnHelper.STAR_COLUMN);
    String exprPrefix = EMPTY_STRING;
    String exprSuffix = expr.getPath();
    if (exprHasPrefix) {
        // get the prefix of the expr
        final String[] exprComponents = expr.getPath().split(StarColumnHelper.PREFIX_DELIMITER, 2);
        assert (exprComponents.length == 2);
        exprPrefix = exprComponents[0];
        exprSuffix = exprComponents[1];
        result.prefix = exprPrefix;
    }
    boolean exprIsFirstWildcard = false;
    if (exprContainsStar) {
        result.isStar = true;
        final Integer value = (Integer) result.prefixMap.get(exprPrefix);
        if (value == null) {
            final Integer n = 1;
            result.prefixMap.put(exprPrefix, n);
            exprIsFirstWildcard = true;
        } else {
            final Integer n = value + 1;
            result.prefixMap.put(exprPrefix, n);
        }
    }
    final int incomingSchemaSize = incoming.getSchema().getFieldCount();
    // input is '*' and output is 'prefix_*'
    if (exprIsStar && refHasPrefix && refEndsWithStar) {
        final String[] components = ref.getPath().split(StarColumnHelper.PREFIX_DELIMITER, 2);
        assert (components.length == 2);
        final String prefix = components[0];
        result.outputNames = Lists.newArrayList();
        for (final VectorWrapper<?> wrapper : incoming) {
            final ValueVector vvIn = wrapper.getValueVector();
            final String name = vvIn.getField().getPath();
            // add the prefix to the incoming column name
            final String newName = prefix + StarColumnHelper.PREFIX_DELIMITER + name;
            addToResultMaps(newName, result, false);
        }
    } else // input and output are the same
    if (expr.getPath().equalsIgnoreCase(ref.getPath()) && (!exprContainsStar || exprIsFirstWildcard)) {
        if (exprContainsStar && exprHasPrefix) {
            assert exprPrefix != null;
            int k = 0;
            result.outputNames = Lists.newArrayListWithCapacity(incomingSchemaSize);
            for (int j = 0; j < incomingSchemaSize; j++) {
                // initialize
                result.outputNames.add(EMPTY_STRING);
            }
            for (final VectorWrapper<?> wrapper : incoming) {
                final ValueVector vvIn = wrapper.getValueVector();
                final String incomingName = vvIn.getField().getPath();
                // get the prefix of the name
                final String[] nameComponents = incomingName.split(StarColumnHelper.PREFIX_DELIMITER, 2);
                // if incoming valuevector does not have a prefix, ignore it since this expression is not referencing it
                if (nameComponents.length <= 1) {
                    k++;
                    continue;
                }
                final String namePrefix = nameComponents[0];
                if (exprPrefix.equalsIgnoreCase(namePrefix)) {
                    final String newName = incomingName;
                    if (!result.outputMap.containsKey(newName)) {
                        result.outputNames.set(k, newName);
                        result.outputMap.put(newName, newName);
                    }
                }
                k++;
            }
        } else {
            result.outputNames = Lists.newArrayList();
            if (exprContainsStar) {
                for (final VectorWrapper<?> wrapper : incoming) {
                    final ValueVector vvIn = wrapper.getValueVector();
                    final String incomingName = vvIn.getField().getPath();
                    if (refContainsStar) {
                        // allow dups since this is likely top-level project
                        addToResultMaps(incomingName, result, true);
                    } else {
                        addToResultMaps(incomingName, result, false);
                    }
                }
            } else {
                final String newName = expr.getPath();
                if (!refHasPrefix && !exprHasPrefix) {
                    // allow dups since this is likely top-level project
                    addToResultMaps(newName, result, true);
                } else {
                    addToResultMaps(newName, result, false);
                }
            }
        }
    } else // input is wildcard and it is not the first wildcard
    if (exprIsStar) {
        result.outputNames = Lists.newArrayList();
        for (final VectorWrapper<?> wrapper : incoming) {
            final ValueVector vvIn = wrapper.getValueVector();
            final String incomingName = vvIn.getField().getPath();
            // allow dups since this is likely top-level project
            addToResultMaps(incomingName, result, true);
        }
    } else // only the output has prefix
    if (!exprHasPrefix && refHasPrefix) {
        result.outputNames = Lists.newArrayList();
        final String newName = ref.getPath();
        addToResultMaps(newName, result, false);
    } else // input has prefix but output does not
    if (exprHasPrefix && !refHasPrefix) {
        int k = 0;
        result.outputNames = Lists.newArrayListWithCapacity(incomingSchemaSize);
        for (int j = 0; j < incomingSchemaSize; j++) {
            // initialize
            result.outputNames.add(EMPTY_STRING);
        }
        for (final VectorWrapper<?> wrapper : incoming) {
            final ValueVector vvIn = wrapper.getValueVector();
            final String name = vvIn.getField().getPath();
            final String[] components = name.split(StarColumnHelper.PREFIX_DELIMITER, 2);
            if (components.length <= 1) {
                k++;
                continue;
            }
            final String namePrefix = components[0];
            final String nameSuffix = components[1];
            if (exprPrefix.equalsIgnoreCase(namePrefix)) {
                // // case insensitive matching of prefix.
                if (refContainsStar) {
                    // remove the prefix from the incoming column names
                    // for top level we need to make names unique
                    final String newName = getUniqueName(nameSuffix, result);
                    result.outputNames.set(k, newName);
                } else if (exprSuffix.equalsIgnoreCase(nameSuffix)) {
                    // case insensitive matching of field name.
                    // example: ref: $f1, expr: T0<PREFIX><column_name>
                    final String newName = ref.getPath();
                    result.outputNames.set(k, newName);
                }
            } else {
                result.outputNames.add(EMPTY_STRING);
            }
            k++;
        }
    } else // input and output have prefixes although they could be different...
    if (exprHasPrefix && refHasPrefix) {
        final String[] input = expr.getPath().split(StarColumnHelper.PREFIX_DELIMITER, 2);
        assert (input.length == 2);
        // not handled yet
        assert false : "Unexpected project expression or reference";
    } else {
        // if the incoming schema's column name matches the expression name of the Project,
        // then we just want to pick the ref name as the output column name
        result.outputNames = Lists.newArrayList();
        for (final VectorWrapper<?> wrapper : incoming) {
            final ValueVector vvIn = wrapper.getValueVector();
            final String incomingName = vvIn.getField().getPath();
            if (expr.getPath().equalsIgnoreCase(incomingName)) {
                // case insensitive matching of field name.
                final String newName = ref.getPath();
                addToResultMaps(newName, result, true);
            }
        }
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) NameSegment(org.apache.drill.common.expression.PathSegment.NameSegment) SchemaPath(org.apache.drill.common.expression.SchemaPath) VectorWrapper(org.apache.drill.exec.record.VectorWrapper)

Example 2 with NameSegment

use of org.apache.drill.common.expression.PathSegment.NameSegment in project drill by apache.

the class ProjectRecordBatch method isClassificationNeeded.

private boolean isClassificationNeeded(final List<NamedExpression> exprs) {
    boolean needed = false;
    for (int i = 0; i < exprs.size(); i++) {
        final NamedExpression ex = exprs.get(i);
        if (!(ex.getExpr() instanceof SchemaPath)) {
            continue;
        }
        final NameSegment expr = ((SchemaPath) ex.getExpr()).getRootSegment();
        final NameSegment ref = ex.getRef().getRootSegment();
        final boolean refHasPrefix = ref.getPath().contains(StarColumnHelper.PREFIX_DELIMITER);
        final boolean exprContainsStar = expr.getPath().contains(StarColumnHelper.STAR_COLUMN);
        if (refHasPrefix || exprContainsStar) {
            needed = true;
            break;
        }
    }
    return needed;
}
Also used : NameSegment(org.apache.drill.common.expression.PathSegment.NameSegment) SchemaPath(org.apache.drill.common.expression.SchemaPath) NamedExpression(org.apache.drill.common.logical.data.NamedExpression)

Example 3 with NameSegment

use of org.apache.drill.common.expression.PathSegment.NameSegment in project drill by apache.

the class HBaseRecordReader method transformColumns.

@Override
protected Collection<SchemaPath> transformColumns(Collection<SchemaPath> columns) {
    Set<SchemaPath> transformed = Sets.newLinkedHashSet();
    rowKeyOnly = true;
    if (!isStarQuery()) {
        for (SchemaPath column : columns) {
            if (column.getRootSegment().getPath().equalsIgnoreCase(ROW_KEY)) {
                transformed.add(ROW_KEY_PATH);
                continue;
            }
            rowKeyOnly = false;
            NameSegment root = column.getRootSegment();
            byte[] family = root.getPath().getBytes();
            transformed.add(SchemaPath.getSimplePath(root.getPath()));
            PathSegment child = root.getChild();
            if (child != null && child.isNamed()) {
                byte[] qualifier = child.getNameSegment().getPath().getBytes();
                hbaseScan.addColumn(family, qualifier);
            } else {
                hbaseScan.addFamily(family);
            }
        }
        /* if only the row key was requested, add a FirstKeyOnlyFilter to the scan
       * to fetch only one KV from each row. If a filter is already part of this
       * scan, add the FirstKeyOnlyFilter as the LAST filter of a MUST_PASS_ALL
       * FilterList.
       */
        if (rowKeyOnly) {
            hbaseScan.setFilter(HBaseUtils.andFilterAtIndex(hbaseScan.getFilter(), HBaseUtils.LAST_FILTER, new FirstKeyOnlyFilter()));
        }
    } else {
        rowKeyOnly = false;
        transformed.add(ROW_KEY_PATH);
    }
    return transformed;
}
Also used : NameSegment(org.apache.drill.common.expression.PathSegment.NameSegment) SchemaPath(org.apache.drill.common.expression.SchemaPath) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) PathSegment(org.apache.drill.common.expression.PathSegment)

Aggregations

NameSegment (org.apache.drill.common.expression.PathSegment.NameSegment)3 SchemaPath (org.apache.drill.common.expression.SchemaPath)3 PathSegment (org.apache.drill.common.expression.PathSegment)1 NamedExpression (org.apache.drill.common.logical.data.NamedExpression)1 VectorWrapper (org.apache.drill.exec.record.VectorWrapper)1 ValueVector (org.apache.drill.exec.vector.ValueVector)1 FirstKeyOnlyFilter (org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter)1