Examples with TypeInfoFactory.binaryTypeInfo - org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.binaryTypeInfo

Example 1 with TypeInfoFactory.binaryTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.binaryTypeInfo in project hive by apache.

the class TestLazyBinarySerDe method testLazyBinaryObjectInspector.

/**
   * Test to see if byte[] with correct contents is generated by
   * LazyBinaryObjectInspector from input BytesWritable
   * @throws Throwable
   */
public void testLazyBinaryObjectInspector() throws Throwable {
    //create input ByteArrayRef
    ByteArrayRef inpBARef = new ByteArrayRef();
    inpBARef.setData(inpBArray);
    AbstractPrimitiveLazyObjectInspector<?> binInspector = LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.binaryTypeInfo, false, (byte) 0);
    //create LazyBinary initialed with inputBA
    LazyBinary lazyBin = (LazyBinary) LazyFactory.createLazyObject(binInspector);
    lazyBin.init(inpBARef, 0, inpBArray.length);
    //use inspector to get a byte[] out of LazyBinary
    byte[] outBARef = (byte[]) binInspector.getPrimitiveJavaObject(lazyBin);
    assertTrue("compare input and output BAs", Arrays.equals(inpBArray, outBARef));
}

Also used : ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) LazyBinary(org.apache.hadoop.hive.serde2.lazy.LazyBinary)

Example 2 with TypeInfoFactory.binaryTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.binaryTypeInfo in project hive by apache.

the class TestGenericUDFAesDecrypt method testAesDecKeyNullConstBin.

@Test
public void testAesDecKeyNullConstBin() throws HiveException {
    GenericUDFAesDecrypt udf = new GenericUDFAesDecrypt();
    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
    BytesWritable keyWr = null;
    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.binaryTypeInfo, keyWr);
    ObjectInspector[] arguments = { valueOI0, valueOI1 };
    udf.initialize(arguments);
    runAndVerifyBin("y6Ss+zCYObpCbgfWfyNWTw==", keyWr, null, udf);
}

Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) Test(org.junit.Test)

Example 3 with TypeInfoFactory.binaryTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.binaryTypeInfo in project hive by apache.

the class TestGenericUDFAesEncrypt method testAesEnc128ConstBin.

@Test
public void testAesEnc128ConstBin() throws HiveException {
    GenericUDFAesEncrypt udf = new GenericUDFAesEncrypt();
    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
    BytesWritable keyWr = new BytesWritable("1234567890123456".getBytes());
    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.binaryTypeInfo, keyWr);
    ObjectInspector[] arguments = { valueOI0, valueOI1 };
    udf.initialize(arguments);
    runAndVerifyBin(new byte[] { 65, 66, 67 }, keyWr, "y6Ss+zCYObpCbgfWfyNWTw==", udf);
    runAndVerifyBin(new byte[0], keyWr, "BQGHoM3lqYcsurCRq3PlUw==", udf);
    // null
    runAndVerifyBin(null, keyWr, null, udf);
}

Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) Test(org.junit.Test)

Example 4 with TypeInfoFactory.binaryTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.binaryTypeInfo in project hive by apache.

the class DynamicPartitionPruningOptimization method generateSemiJoinOperatorPlan.

// Generates plan for min/max when dynamic partition pruning is ruled out.
private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContext parseContext, TableScanOperator ts, String keyBaseAlias) throws SemanticException {
    // we will put a fork in the plan at the source of the reduce sink
    Operator<? extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
    // we need the expr that generated the key of the reduce sink
    ExprNodeDesc key = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex());
    String internalColName = null;
    ExprNodeDesc exprNodeDesc = key;
    // Find the ExprNodeColumnDesc
    while (!(exprNodeDesc instanceof ExprNodeColumnDesc) && (exprNodeDesc.getChildren() != null)) {
        exprNodeDesc = exprNodeDesc.getChildren().get(0);
    }
    if (!(exprNodeDesc instanceof ExprNodeColumnDesc)) {
        // Bail out
        return false;
    }
    internalColName = ((ExprNodeColumnDesc) exprNodeDesc).getColumn();
    if (parentOfRS instanceof SelectOperator) {
        // Make sure the semijoin branch is not on partition column.
        ExprNodeDesc expr = parentOfRS.getColumnExprMap().get(internalColName);
        while (!(expr instanceof ExprNodeColumnDesc) && (expr.getChildren() != null)) {
            expr = expr.getChildren().get(0);
        }
        if (!(expr instanceof ExprNodeColumnDesc)) {
            // Bail out
            return false;
        }
        ExprNodeColumnDesc colExpr = (ExprNodeColumnDesc) expr;
        String colName = ExprNodeDescUtils.extractColName(colExpr);
        // Fetch the TableScan Operator.
        Operator<?> op = parentOfRS.getParentOperators().get(0);
        while (op != null && !(op instanceof TableScanOperator)) {
            op = op.getParentOperators().get(0);
        }
        assert op != null;
        Table table = ((TableScanOperator) op).getConf().getTableMetadata();
        if (table.isPartitionKey(colName)) {
            // The column is partition column, skip the optimization.
            return false;
        }
    }
    List<ExprNodeDesc> keyExprs = new ArrayList<ExprNodeDesc>();
    keyExprs.add(key);
    // group by requires "ArrayList", don't ask.
    ArrayList<String> outputNames = new ArrayList<String>();
    outputNames.add(HiveConf.getColumnInternalName(0));
    // project the relevant key column
    SelectDesc select = new SelectDesc(keyExprs, outputNames);
    // Create the new RowSchema for the projected column
    ColumnInfo columnInfo = parentOfRS.getSchema().getColumnInfo(internalColName);
    ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>();
    signature.add(columnInfo);
    RowSchema rowSchema = new RowSchema(signature);
    // Create the column expr map
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    ExprNodeDesc exprNode = null;
    if (parentOfRS.getColumnExprMap() != null) {
        exprNode = parentOfRS.getColumnExprMap().get(internalColName).clone();
    } else {
        exprNode = new ExprNodeColumnDesc(columnInfo);
    }
    if (exprNode instanceof ExprNodeColumnDesc) {
        ExprNodeColumnDesc encd = (ExprNodeColumnDesc) exprNode;
        encd.setColumn(internalColName);
    }
    colExprMap.put(internalColName, exprNode);
    // Create the Select Operator
    SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(select, rowSchema, colExprMap, parentOfRS);
    // do a group by to aggregate min,max and bloom filter.
    float groupByMemoryUsage = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    ArrayList<ExprNodeDesc> groupByExprs = new ArrayList<ExprNodeDesc>();
    // Add min/max and bloom filter aggregations
    List<ObjectInspector> aggFnOIs = new ArrayList<ObjectInspector>();
    aggFnOIs.add(key.getWritableObjectInspector());
    ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
    params.add(new ExprNodeColumnDesc(key.getTypeInfo(), outputNames.get(0), "", false));
    ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
    try {
        AggregationDesc min = new AggregationDesc("min", FunctionRegistry.getGenericUDAFEvaluator("min", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
        AggregationDesc max = new AggregationDesc("max", FunctionRegistry.getGenericUDAFEvaluator("max", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
        AggregationDesc bloomFilter = new AggregationDesc("bloom_filter", FunctionRegistry.getGenericUDAFEvaluator("bloom_filter", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
        GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator();
        bloomFilterEval.setSourceOperator(selectOp);
        bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
        bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval);
        aggs.add(min);
        aggs.add(max);
        aggs.add(bloomFilter);
    } catch (SemanticException e) {
        LOG.error("Error creating min/max aggregations on key", e);
        throw new IllegalStateException("Error creating min/max aggregations on key", e);
    }
    // Create the Group by Operator
    ArrayList<String> gbOutputNames = new ArrayList<String>();
    gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(0));
    gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(1));
    gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(2));
    GroupByDesc groupBy = new GroupByDesc(GroupByDesc.Mode.HASH, gbOutputNames, new ArrayList<ExprNodeDesc>(), aggs, false, groupByMemoryUsage, memoryThreshold, null, false, 0, false);
    ArrayList<ColumnInfo> groupbyColInfos = new ArrayList<ColumnInfo>();
    groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(0), key.getTypeInfo(), "", false));
    groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(1), key.getTypeInfo(), "", false));
    groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(2), key.getTypeInfo(), "", false));
    GroupByOperator groupByOp = (GroupByOperator) OperatorFactory.getAndMakeChild(groupBy, new RowSchema(groupbyColInfos), selectOp);
    groupByOp.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
    // Get the column names of the aggregations for reduce sink
    int colPos = 0;
    ArrayList<ExprNodeDesc> rsValueCols = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < aggs.size() - 1; i++) {
        ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(key.getTypeInfo(), gbOutputNames.get(colPos++), "", false);
        rsValueCols.add(colExpr);
    }
    // Bloom Filter uses binary
    ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(TypeInfoFactory.binaryTypeInfo, gbOutputNames.get(colPos++), "", false);
    rsValueCols.add(colExpr);
    // Create the reduce sink operator
    ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(new ArrayList<ExprNodeDesc>(), rsValueCols, gbOutputNames, false, -1, 0, 1, Operation.NOT_ACID);
    ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(groupByOp.getSchema()), groupByOp);
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    rsOp.setColumnExprMap(columnExprMap);
    // Create the final Group By Operator
    ArrayList<AggregationDesc> aggsFinal = new ArrayList<AggregationDesc>();
    try {
        List<ObjectInspector> minFinalFnOIs = new ArrayList<ObjectInspector>();
        List<ObjectInspector> maxFinalFnOIs = new ArrayList<ObjectInspector>();
        List<ObjectInspector> bloomFilterFinalFnOIs = new ArrayList<ObjectInspector>();
        ArrayList<ExprNodeDesc> minFinalParams = new ArrayList<ExprNodeDesc>();
        ArrayList<ExprNodeDesc> maxFinalParams = new ArrayList<ExprNodeDesc>();
        ArrayList<ExprNodeDesc> bloomFilterFinalParams = new ArrayList<ExprNodeDesc>();
        // Use the expressions from Reduce Sink.
        minFinalFnOIs.add(rsValueCols.get(0).getWritableObjectInspector());
        maxFinalFnOIs.add(rsValueCols.get(1).getWritableObjectInspector());
        bloomFilterFinalFnOIs.add(rsValueCols.get(2).getWritableObjectInspector());
        // Coming from a ReduceSink the aggregations would be in the form VALUE._col0, VALUE._col1
        minFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(0).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(0), "", false));
        maxFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(1).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(1), "", false));
        bloomFilterFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(2).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(2), "", false));
        AggregationDesc min = new AggregationDesc("min", FunctionRegistry.getGenericUDAFEvaluator("min", minFinalFnOIs, false, false), minFinalParams, false, Mode.FINAL);
        AggregationDesc max = new AggregationDesc("max", FunctionRegistry.getGenericUDAFEvaluator("max", maxFinalFnOIs, false, false), maxFinalParams, false, Mode.FINAL);
        AggregationDesc bloomFilter = new AggregationDesc("bloom_filter", FunctionRegistry.getGenericUDAFEvaluator("bloom_filter", bloomFilterFinalFnOIs, false, false), bloomFilterFinalParams, false, Mode.FINAL);
        GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator();
        bloomFilterEval.setSourceOperator(selectOp);
        bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
        bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval);
        aggsFinal.add(min);
        aggsFinal.add(max);
        aggsFinal.add(bloomFilter);
    } catch (SemanticException e) {
        LOG.error("Error creating min/max aggregations on key", e);
        throw new IllegalStateException("Error creating min/max aggregations on key", e);
    }
    GroupByDesc groupByDescFinal = new GroupByDesc(GroupByDesc.Mode.FINAL, gbOutputNames, new ArrayList<ExprNodeDesc>(), aggsFinal, false, groupByMemoryUsage, memoryThreshold, null, false, 0, false);
    GroupByOperator groupByOpFinal = (GroupByOperator) OperatorFactory.getAndMakeChild(groupByDescFinal, new RowSchema(rsOp.getSchema()), rsOp);
    groupByOpFinal.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
    // for explain purpose
    if (parseContext.getContext().getExplainConfig() != null && parseContext.getContext().getExplainConfig().isFormatted()) {
        List<String> outputOperators = new ArrayList<>();
        outputOperators.add(groupByOpFinal.getOperatorId());
        rsOp.getConf().setOutputOperators(outputOperators);
    }
    // Create the final Reduce Sink Operator
    ReduceSinkDesc rsDescFinal = PlanUtils.getReduceSinkDesc(new ArrayList<ExprNodeDesc>(), rsValueCols, gbOutputNames, false, -1, 0, 1, Operation.NOT_ACID);
    ReduceSinkOperator rsOpFinal = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDescFinal, new RowSchema(groupByOpFinal.getSchema()), groupByOpFinal);
    rsOpFinal.setColumnExprMap(columnExprMap);
    LOG.debug("DynamicMinMaxPushdown: Saving RS to TS mapping: " + rsOpFinal + ": " + ts);
    parseContext.getRsOpToTsOpMap().put(rsOpFinal, ts);
    // for explain purpose
    if (parseContext.getContext().getExplainConfig() != null && parseContext.getContext().getExplainConfig().isFormatted()) {
        List<String> outputOperators = new ArrayList<>();
        outputOperators.add(ts.getOperatorId());
        rsOpFinal.getConf().setOutputOperators(outputOperators);
    }
    // Save the info that is required at query time to resolve dynamic/runtime values.
    RuntimeValuesInfo runtimeValuesInfo = new RuntimeValuesInfo();
    TableDesc rsFinalTableDesc = PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(rsValueCols, "_col"));
    List<String> dynamicValueIDs = new ArrayList<String>();
    dynamicValueIDs.add(keyBaseAlias + "_min");
    dynamicValueIDs.add(keyBaseAlias + "_max");
    dynamicValueIDs.add(keyBaseAlias + "_bloom_filter");
    runtimeValuesInfo.setTableDesc(rsFinalTableDesc);
    runtimeValuesInfo.setDynamicValueIDs(dynamicValueIDs);
    runtimeValuesInfo.setColExprs(rsValueCols);
    parseContext.getRsToRuntimeValuesInfoMap().put(rsOpFinal, runtimeValuesInfo);
    return true;
}

Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) RuntimeValuesInfo(org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo) GenericUDAFBloomFilterEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Table(org.apache.hadoop.hive.ql.metadata.Table)

Example 5 with TypeInfoFactory.binaryTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.binaryTypeInfo in project hive by apache.

the class TestGenericUDFAesDecrypt method testAesDec128ConstBin.

@Test
public void testAesDec128ConstBin() throws HiveException {
    GenericUDFAesDecrypt udf = new GenericUDFAesDecrypt();
    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
    BytesWritable keyWr = new BytesWritable("1234567890123456".getBytes());
    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.binaryTypeInfo, keyWr);
    ObjectInspector[] arguments = { valueOI0, valueOI1 };
    udf.initialize(arguments);
    runAndVerifyBin("y6Ss+zCYObpCbgfWfyNWTw==", keyWr, "ABC", udf);
    runAndVerifyBin("BQGHoM3lqYcsurCRq3PlUw==", keyWr, "", udf);
    // null
    runAndVerifyBin(null, keyWr, null, udf);
}

Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) Test(org.junit.Test)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)5 BytesWritable (org.apache.hadoop.io.BytesWritable)4 Test (org.junit.Test)4 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Table (org.apache.hadoop.hive.ql.metadata.Table)1 RuntimeValuesInfo (org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo)1 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)1 GenericUDAFBloomFilterEvaluator (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator)1 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)1 LazyBinary (org.apache.hadoop.hive.serde2.lazy.LazyBinary)1