Search in sources :

Example 1 with MajorType

use of org.apache.drill.common.types.TypeProtos.MajorType in project drill by apache.

the class FieldIdUtil method getFieldIdIfMatches.

public static TypedFieldId getFieldIdIfMatches(ValueVector vector, TypedFieldId.Builder builder, boolean addToBreadCrumb, PathSegment seg) {
    if (vector instanceof RepeatedMapVector && seg != null && seg.isArray() && !seg.isLastPath()) {
        if (addToBreadCrumb) {
            addToBreadCrumb = false;
            builder.remainder(seg);
        }
        // skip the first array segment as there is no corresponding child vector.
        seg = seg.getChild();
        // segment.
        if (seg.isArray()) {
            return null;
        }
    }
    if (seg == null) {
        if (addToBreadCrumb) {
            builder.intermediateType(vector.getField().getType());
        }
        return builder.finalType(vector.getField().getType()).build();
    }
    if (seg.isArray()) {
        if (seg.isLastPath()) {
            MajorType type;
            if (vector instanceof AbstractContainerVector) {
                type = ((AbstractContainerVector) vector).getLastPathType();
            } else if (vector instanceof ListVector) {
                type = ((ListVector) vector).getDataVector().getField().getType();
                builder.listVector();
            } else {
                throw new UnsupportedOperationException("FieldIdUtil does not support vector of type " + vector.getField().getType());
            }
            //
            builder.withIndex().finalType(type);
            // only set remainder when it's the only array segment.
            if (addToBreadCrumb) {
                addToBreadCrumb = false;
                builder.remainder(seg);
            }
            return builder.build();
        } else {
            if (addToBreadCrumb) {
                addToBreadCrumb = false;
                builder.remainder(seg);
            }
        }
    } else {
        if (vector instanceof ListVector) {
            return null;
        }
    }
    ValueVector v;
    if (vector instanceof AbstractContainerVector) {
        VectorWithOrdinal vord = ((AbstractContainerVector) vector).getChildVectorWithOrdinal(seg.isArray() ? null : seg.getNameSegment().getPath());
        if (vord == null) {
            return null;
        }
        v = vord.vector;
        if (addToBreadCrumb) {
            builder.intermediateType(v.getField().getType());
            builder.addId(vord.ordinal);
        }
    } else if (vector instanceof ListVector) {
        v = ((ListVector) vector).getDataVector();
    } else {
        throw new UnsupportedOperationException("FieldIdUtil does not support vector of type " + vector.getField().getType());
    }
    if (v instanceof AbstractContainerVector) {
        // we're looking for a multi path.
        AbstractContainerVector c = (AbstractContainerVector) v;
        return getFieldIdIfMatches(c, builder, addToBreadCrumb, seg.getChild());
    } else if (v instanceof ListVector) {
        ListVector list = (ListVector) v;
        return getFieldIdIfMatches(list, builder, addToBreadCrumb, seg.getChild());
    } else if (v instanceof UnionVector) {
        return getFieldIdIfMatchesUnion((UnionVector) v, builder, addToBreadCrumb, seg.getChild());
    } else {
        if (seg.isNamed()) {
            if (addToBreadCrumb) {
                builder.intermediateType(v.getField().getType());
            }
            builder.finalType(v.getField().getType());
        } else {
            builder.finalType(v.getField().getType().toBuilder().setMode(DataMode.OPTIONAL).build());
        }
        if (seg.isLastPath()) {
            return builder.build();
        } else {
            PathSegment child = seg.getChild();
            if (child.isLastPath() && child.isArray()) {
                if (addToBreadCrumb) {
                    builder.remainder(child);
                }
                builder.withIndex();
                builder.finalType(v.getField().getType().toBuilder().setMode(DataMode.OPTIONAL).build());
                return builder.build();
            } else {
                logger.warn("You tried to request a complex type inside a scalar object or path or type is wrong.");
                return null;
            }
        }
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) PathSegment(org.apache.drill.common.expression.PathSegment)

Example 2 with MajorType

use of org.apache.drill.common.types.TypeProtos.MajorType in project drill by apache.

the class FieldIdUtil method getFieldId.

public static TypedFieldId getFieldId(ValueVector vector, int id, SchemaPath expectedPath, boolean hyper) {
    if (!expectedPath.getRootSegment().getNameSegment().getPath().equalsIgnoreCase(vector.getField().getPath())) {
        return null;
    }
    PathSegment seg = expectedPath.getRootSegment();
    TypedFieldId.Builder builder = TypedFieldId.newBuilder();
    if (hyper) {
        builder.hyper();
    }
    if (vector instanceof UnionVector) {
        builder.addId(id).remainder(expectedPath.getRootSegment().getChild());
        List<MinorType> minorTypes = ((UnionVector) vector).getSubTypes();
        MajorType.Builder majorTypeBuilder = MajorType.newBuilder().setMinorType(MinorType.UNION);
        for (MinorType type : minorTypes) {
            majorTypeBuilder.addSubType(type);
        }
        MajorType majorType = majorTypeBuilder.build();
        builder.intermediateType(majorType);
        if (seg.isLastPath()) {
            builder.finalType(majorType);
            return builder.build();
        } else {
            return getFieldIdIfMatchesUnion((UnionVector) vector, builder, false, seg.getChild());
        }
    } else if (vector instanceof ListVector) {
        ListVector list = (ListVector) vector;
        builder.intermediateType(vector.getField().getType());
        builder.addId(id);
        return getFieldIdIfMatches(list, builder, true, expectedPath.getRootSegment().getChild());
    } else if (vector instanceof AbstractContainerVector) {
        // we're looking for a multi path.
        AbstractContainerVector c = (AbstractContainerVector) vector;
        builder.intermediateType(vector.getField().getType());
        builder.addId(id);
        return getFieldIdIfMatches(c, builder, true, expectedPath.getRootSegment().getChild());
    } else {
        builder.intermediateType(vector.getField().getType());
        builder.addId(id);
        builder.finalType(vector.getField().getType());
        if (seg.isLastPath()) {
            return builder.build();
        } else {
            PathSegment child = seg.getChild();
            if (child.isArray() && child.isLastPath()) {
                builder.remainder(child);
                builder.withIndex();
                builder.finalType(vector.getField().getType().toBuilder().setMode(DataMode.OPTIONAL).build());
                return builder.build();
            } else {
                return null;
            }
        }
    }
}
Also used : TypedFieldId(org.apache.drill.exec.record.TypedFieldId) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) PathSegment(org.apache.drill.common.expression.PathSegment)

Example 3 with MajorType

use of org.apache.drill.common.types.TypeProtos.MajorType in project drill by apache.

the class HiveAbstractReader method setup.

@Override
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
    // initializes "reader"
    final Callable<Void> readerInitializer = new Callable<Void>() {

        @Override
        public Void call() throws Exception {
            init();
            return null;
        }
    };
    final ListenableFuture<Void> result = context.runCallableAs(proxyUgi, readerInitializer);
    try {
        result.get();
    } catch (InterruptedException e) {
        result.cancel(true);
        // Preserve evidence that the interruption occurred so that code higher up on the call stack can learn of the
        // interruption and respond to it if it wants to.
        Thread.currentThread().interrupt();
    } catch (ExecutionException e) {
        throw ExecutionSetupException.fromThrowable(e.getMessage(), e);
    }
    try {
        final OptionManager options = fragmentContext.getOptions();
        for (int i = 0; i < selectedColumnNames.size(); i++) {
            MajorType type = HiveUtilities.getMajorTypeFromHiveTypeInfo(selectedColumnTypes.get(i), options);
            MaterializedField field = MaterializedField.create(selectedColumnNames.get(i), type);
            Class<? extends ValueVector> vvClass = TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode());
            vectors.add(output.addField(field, vvClass));
        }
        for (int i = 0; i < selectedPartitionNames.size(); i++) {
            MajorType type = HiveUtilities.getMajorTypeFromHiveTypeInfo(selectedPartitionTypes.get(i), options);
            MaterializedField field = MaterializedField.create(selectedPartitionNames.get(i), type);
            Class<? extends ValueVector> vvClass = TypeHelper.getValueVectorClass(field.getType().getMinorType(), field.getDataMode());
            pVectors.add(output.addField(field, vvClass));
        }
    } catch (SchemaChangeException e) {
        throw new ExecutionSetupException(e);
    }
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) MaterializedField(org.apache.drill.exec.record.MaterializedField) Callable(java.util.concurrent.Callable) OptionManager(org.apache.drill.exec.server.options.OptionManager) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) ExecutionException(java.util.concurrent.ExecutionException)

Example 4 with MajorType

use of org.apache.drill.common.types.TypeProtos.MajorType in project drill by apache.

the class TypeCastRules method getCost.

/*
   * code decide whether it's legal to do implicit cast. -1 : not allowed for
   * implicit cast > 0: cost associated with implicit cast. ==0: parms are
   * exactly same type of arg. No need of implicit.
   */
public static int getCost(List<MajorType> argumentTypes, DrillFuncHolder holder) {
    int cost = 0;
    if (argumentTypes.size() != holder.getParamCount()) {
        return -1;
    }
    // Indicates whether we used secondary cast rules
    boolean secondaryCast = false;
    // number of arguments that could implicitly casts using precedence map or didn't require casting at all
    int nCasts = 0;
    /*
     * If we are determining function holder for decimal data type, we need to make sure the output type of
     * the function can fit the precision that we need based on the input types.
     */
    if (holder.checkPrecisionRange() == true) {
        List<LogicalExpression> logicalExpressions = Lists.newArrayList();
        for (MajorType majorType : argumentTypes) {
            logicalExpressions.add(new MajorTypeInLogicalExpression(majorType));
        }
        if (DecimalUtility.getMaxPrecision(holder.getReturnType().getMinorType()) < holder.getReturnType(logicalExpressions).getPrecision()) {
            return -1;
        }
    }
    final int numOfArgs = holder.getParamCount();
    for (int i = 0; i < numOfArgs; i++) {
        final MajorType argType = argumentTypes.get(i);
        final MajorType parmType = holder.getParmMajorType(i);
        //@Param FieldReader will match any type
        if (holder.isFieldReader(i)) {
            //        if (Types.isComplex(call.args.get(i).getMajorType()) ||Types.isRepeated(call.args.get(i).getMajorType()) )
            // add the max cost when encountered with a field reader considering that it is the most expensive factor
            // contributing to the cost.
            cost += ResolverTypePrecedence.MAX_IMPLICIT_CAST_COST;
            continue;
        //        else
        //          return -1;
        }
        if (!TypeCastRules.isCastableWithNullHandling(argType, parmType, holder.getNullHandling())) {
            return -1;
        }
        Integer parmVal = ResolverTypePrecedence.precedenceMap.get(parmType.getMinorType());
        Integer argVal = ResolverTypePrecedence.precedenceMap.get(argType.getMinorType());
        if (parmVal == null) {
            throw new RuntimeException(String.format("Precedence for type %s is not defined", parmType.getMinorType().name()));
        }
        if (argVal == null) {
            throw new RuntimeException(String.format("Precedence for type %s is not defined", argType.getMinorType().name()));
        }
        if (parmVal - argVal < 0) {
            /* Precedence rules does not allow to implicitly cast, however check
         * if the seconday rules allow us to cast
         */
            Set<MinorType> rules;
            if ((rules = (ResolverTypePrecedence.secondaryImplicitCastRules.get(parmType.getMinorType()))) != null && rules.contains(argType.getMinorType()) != false) {
                secondaryCast = true;
            } else {
                return -1;
            }
        }
        // Otherwise, the function implementation is not a match.
        if (argType.getMode() != parmType.getMode()) {
            // this allows for a non-nullable implementation to be preferred
            if (holder.getNullHandling() == NullHandling.INTERNAL) {
                // a function that expects required output, but nullable was provided
                if (parmType.getMode() == DataMode.REQUIRED && argType.getMode() == DataMode.OPTIONAL) {
                    return -1;
                } else if (parmType.getMode() == DataMode.OPTIONAL && argType.getMode() == DataMode.REQUIRED) {
                    cost += DATAMODE_CAST_COST;
                }
            }
        }
        int castCost;
        if ((castCost = (parmVal - argVal)) >= 0) {
            nCasts++;
            cost += castCost;
        }
    }
    if (secondaryCast) {
        // We have a secondary cast for one or more of the arguments, determine the cost associated
        int secondaryCastCost = Integer.MAX_VALUE - 1;
        // Subtract maximum possible implicit costs from the secondary cast cost
        secondaryCastCost -= (nCasts * (ResolverTypePrecedence.MAX_IMPLICIT_CAST_COST + DATAMODE_CAST_COST));
        // Add cost of implicitly casting the rest of the arguments that didn't use secondary casting
        secondaryCastCost += cost;
        return secondaryCastCost;
    }
    return cost;
}
Also used : MajorTypeInLogicalExpression(org.apache.drill.common.expression.MajorTypeInLogicalExpression) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) MajorTypeInLogicalExpression(org.apache.drill.common.expression.MajorTypeInLogicalExpression) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) MinorType(org.apache.drill.common.types.TypeProtos.MinorType)

Example 5 with MajorType

use of org.apache.drill.common.types.TypeProtos.MajorType in project drill by apache.

the class UnionAllRecordBatch method doWork.

@SuppressWarnings("resource")
private IterOutcome doWork() throws ClassTransformationException, IOException, SchemaChangeException {
    if (allocationVectors != null) {
        for (ValueVector v : allocationVectors) {
            v.clear();
        }
    }
    allocationVectors = Lists.newArrayList();
    transfers.clear();
    // If both sides of Union-All are empty
    if (unionAllInput.isBothSideEmpty()) {
        for (int i = 0; i < outputFields.size(); ++i) {
            final String colName = outputFields.get(i).getPath();
            final MajorType majorType = MajorType.newBuilder().setMinorType(MinorType.INT).setMode(DataMode.OPTIONAL).build();
            MaterializedField outputField = MaterializedField.create(colName, majorType);
            ValueVector vv = container.addOrGet(outputField, callBack);
            allocationVectors.add(vv);
        }
        container.buildSchema(BatchSchema.SelectionVectorMode.NONE);
        return IterOutcome.OK_NEW_SCHEMA;
    }
    final ClassGenerator<UnionAller> cg = CodeGenerator.getRoot(UnionAller.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions());
    cg.getCodeGenerator().plainJavaCapable(true);
    // Uncomment out this line to debug the generated code.
    //    cg.getCodeGenerator().saveCodeForDebugging(true);
    int index = 0;
    for (VectorWrapper<?> vw : current) {
        ValueVector vvIn = vw.getValueVector();
        // get the original input column names
        SchemaPath inputPath = SchemaPath.getSimplePath(vvIn.getField().getPath());
        // get the renamed column names
        SchemaPath outputPath = SchemaPath.getSimplePath(outputFields.get(index).getPath());
        final ErrorCollector collector = new ErrorCollectorImpl();
        // cast data types (Minortype or DataMode)
        if (hasSameTypeAndMode(outputFields.get(index), vw.getValueVector().getField())) {
            // Transfer column
            MajorType outputFieldType = outputFields.get(index).getType();
            MaterializedField outputField = MaterializedField.create(outputPath.getAsUnescapedPath(), outputFieldType);
            /*
          todo: Fix if condition when DRILL-4824 is merged
          If condition should be changed to:
          `if (outputFields.get(index).getPath().equals(inputPath.getAsUnescapedPath())) {`
          DRILL-5419 has changed condition to correct one but this caused regression (DRILL-5521).
          Root cause is missing indication of child column in map types when it is null.
          DRILL-4824 is re-working json reader implementation, including map types and will fix this problem.
          Reverting condition to previous one to avoid regression till DRILL-4824 is merged.
          Unit test - TestJsonReader.testKvgenWithUnionAll().
         */
            if (outputFields.get(index).getPath().equals(inputPath)) {
                ValueVector vvOut = container.addOrGet(outputField);
                TransferPair tp = vvIn.makeTransferPair(vvOut);
                transfers.add(tp);
            // Copy data in order to rename the column
            } else {
                final LogicalExpression expr = ExpressionTreeMaterializer.materialize(inputPath, current, collector, context.getFunctionRegistry());
                if (collector.hasErrors()) {
                    throw new SchemaChangeException(String.format("Failure while trying to materialize incoming schema.  Errors:\n %s.", collector.toErrorString()));
                }
                ValueVector vv = container.addOrGet(outputField, callBack);
                allocationVectors.add(vv);
                TypedFieldId fid = container.getValueVectorId(SchemaPath.getSimplePath(outputField.getPath()));
                ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true);
                cg.addExpr(write);
            }
        // Cast is necessary
        } else {
            LogicalExpression expr = ExpressionTreeMaterializer.materialize(inputPath, current, collector, context.getFunctionRegistry());
            if (collector.hasErrors()) {
                throw new SchemaChangeException(String.format("Failure while trying to materialize incoming schema.  Errors:\n %s.", collector.toErrorString()));
            }
            // cast to the one with the least restriction
            if (vvIn.getField().getType().getMode() == DataMode.REQUIRED && outputFields.get(index).getType().getMode() != DataMode.REQUIRED) {
                expr = ExpressionTreeMaterializer.convertToNullableType(expr, vvIn.getField().getType().getMinorType(), context.getFunctionRegistry(), collector);
                if (collector.hasErrors()) {
                    throw new SchemaChangeException(String.format("Failure while trying to materialize incoming schema.  Errors:\n %s.", collector.toErrorString()));
                }
            }
            // Insert a cast before the Union operation
            if (vvIn.getField().getType().getMinorType() != outputFields.get(index).getType().getMinorType()) {
                expr = ExpressionTreeMaterializer.addCastExpression(expr, outputFields.get(index).getType(), context.getFunctionRegistry(), collector);
                if (collector.hasErrors()) {
                    throw new SchemaChangeException(String.format("Failure while trying to materialize incoming schema.  Errors:\n %s.", collector.toErrorString()));
                }
            }
            final MaterializedField outputField = MaterializedField.create(outputPath.getAsUnescapedPath(), expr.getMajorType());
            ValueVector vector = container.addOrGet(outputField, callBack);
            allocationVectors.add(vector);
            TypedFieldId fid = container.getValueVectorId(SchemaPath.getSimplePath(outputField.getPath()));
            boolean useSetSafe = !(vector instanceof FixedWidthVector);
            ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, useSetSafe);
            cg.addExpr(write);
        }
        ++index;
    }
    unionall = context.getImplementationClass(cg.getCodeGenerator());
    unionall.setup(context, current, this, transfers);
    if (!schemaAvailable) {
        container.buildSchema(BatchSchema.SelectionVectorMode.NONE);
        schemaAvailable = true;
    }
    if (!doAlloc()) {
        return IterOutcome.OUT_OF_MEMORY;
    }
    recordCount = unionall.unionRecords(0, current.getRecordCount(), 0);
    setValueCount(recordCount);
    return IterOutcome.OK;
}
Also used : TransferPair(org.apache.drill.exec.record.TransferPair) FixedWidthVector(org.apache.drill.exec.vector.FixedWidthVector) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) MaterializedField(org.apache.drill.exec.record.MaterializedField) ErrorCollector(org.apache.drill.common.expression.ErrorCollector) ValueVector(org.apache.drill.exec.vector.ValueVector) ErrorCollectorImpl(org.apache.drill.common.expression.ErrorCollectorImpl) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) SchemaChangeException(org.apache.drill.exec.exception.SchemaChangeException) SchemaPath(org.apache.drill.common.expression.SchemaPath) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) ValueVectorWriteExpression(org.apache.drill.exec.expr.ValueVectorWriteExpression)

Aggregations

MajorType (org.apache.drill.common.types.TypeProtos.MajorType)34 MaterializedField (org.apache.drill.exec.record.MaterializedField)13 MinorType (org.apache.drill.common.types.TypeProtos.MinorType)8 ValueVector (org.apache.drill.exec.vector.ValueVector)8 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)7 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)5 JVar (com.sun.codemodel.JVar)4 HoldingContainer (org.apache.drill.exec.expr.ClassGenerator.HoldingContainer)4 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)4 ArrayList (java.util.ArrayList)3 ExecutionSetupException (org.apache.drill.common.exceptions.ExecutionSetupException)3 ErrorCollector (org.apache.drill.common.expression.ErrorCollector)3 FunctionCall (org.apache.drill.common.expression.FunctionCall)3 SchemaPath (org.apache.drill.common.expression.SchemaPath)3 ImmutableList (com.google.common.collect.ImmutableList)2 JClass (com.sun.codemodel.JClass)2 JExpression (com.sun.codemodel.JExpression)2 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)2 ErrorCollectorImpl (org.apache.drill.common.expression.ErrorCollectorImpl)2 IfCondition (org.apache.drill.common.expression.IfExpression.IfCondition)2