Search in sources :

Example 16 with GenericUDFOPEqualOrLessThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.

the class TestAccumuloPredicateHandler method testCreateIteratorSettings.

@Test
public void testCreateIteratorSettings() throws Exception {
    // Override what's placed in the Configuration by setup()
    conf = new JobConf();
    List<String> columnNames = Arrays.asList("field1", "field2", "rid");
    List<TypeInfo> columnTypes = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
    conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames));
    conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,string");
    conf.set(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName());
    String columnMappingStr = "cf:f1,cf:f2,:rowID";
    conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr);
    columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes);
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field2", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5);
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
    assertNotNull(node2);
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
    String filterExpr = SerializationUtilities.serializeExpression(both);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    List<IteratorSetting> iterators = handler.getIterators(conf, columnMapper);
    assertEquals(iterators.size(), 2);
    IteratorSetting is1 = iterators.get(0);
    IteratorSetting is2 = iterators.get(1);
    boolean foundQual = false;
    boolean foundPCompare = false;
    boolean foundCOpt = false;
    boolean foundConst = false;
    for (Map.Entry<String, String> option : is1.getOptions().entrySet()) {
        String optKey = option.getKey();
        if (optKey.equals(PrimitiveComparisonFilter.COLUMN)) {
            foundQual = true;
            assertEquals(option.getValue(), "cf:f1");
        } else if (optKey.equals(PrimitiveComparisonFilter.CONST_VAL)) {
            foundConst = true;
            assertEquals(option.getValue(), Base64.getEncoder().encodeToString("aaa".getBytes()));
        } else if (optKey.equals(PrimitiveComparisonFilter.COMPARE_OPT_CLASS)) {
            foundCOpt = true;
            assertEquals(option.getValue(), LessThanOrEqual.class.getName());
        } else if (optKey.equals(PrimitiveComparisonFilter.P_COMPARE_CLASS)) {
            foundPCompare = true;
            assertEquals(option.getValue(), StringCompare.class.getName());
        }
    }
    assertTrue(foundConst & foundCOpt & foundPCompare & foundQual);
    foundQual = false;
    foundPCompare = false;
    foundCOpt = false;
    foundConst = false;
    for (Map.Entry<String, String> option : is2.getOptions().entrySet()) {
        String optKey = option.getKey();
        if (optKey.equals(PrimitiveComparisonFilter.COLUMN)) {
            foundQual = true;
            assertEquals(option.getValue(), "cf:f2");
        } else if (optKey.equals(PrimitiveComparisonFilter.CONST_VAL)) {
            foundConst = true;
            byte[] intVal = new byte[4];
            ByteBuffer.wrap(intVal).putInt(5);
            assertEquals(option.getValue(), Base64.getEncoder().encodeToString(intVal));
        } else if (optKey.equals(PrimitiveComparisonFilter.COMPARE_OPT_CLASS)) {
            foundCOpt = true;
            assertEquals(option.getValue(), GreaterThan.class.getName());
        } else if (optKey.equals(PrimitiveComparisonFilter.P_COMPARE_CLASS)) {
            foundPCompare = true;
            assertEquals(option.getValue(), IntCompare.class.getName());
        }
    }
    assertTrue(foundConst & foundCOpt & foundPCompare & foundQual);
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) LessThanOrEqual(org.apache.hadoop.hive.accumulo.predicate.compare.LessThanOrEqual) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) GreaterThan(org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThan) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) JobConf(org.apache.hadoop.mapred.JobConf) Map(java.util.Map) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 17 with GenericUDFOPEqualOrLessThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.

the class TestAccumuloPredicateHandler method testDisjointRanges.

@Test
public void testDisjointRanges() throws SerDeException {
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
    assertNotNull(node2);
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
    String filterExpr = SerializationUtilities.serializeExpression(both);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    Collection<Range> ranges = handler.getRanges(conf, columnMapper);
    // Impossible to get ranges for row <= 'aaa' and row >= 'bbb'
    assertEquals(0, ranges.size());
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Range(org.apache.accumulo.core.data.Range) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 18 with GenericUDFOPEqualOrLessThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.

the class TestKuduPredicateHandler method testOrPredicates.

@Test
public void testOrPredicates() throws Exception {
    for (ColumnSchema col : SCHEMA.getColumns()) {
        // Skip binary columns because binary predicates are not supported. (HIVE-11370)
        if (col.getName().equals("null") || col.getName().equals("default") || col.getName().equals("binary")) {
            continue;
        }
        PrimitiveTypeInfo typeInfo = toHiveType(col.getType(), col.getTypeAttributes());
        ExprNodeDesc colExpr = new ExprNodeColumnDesc(typeInfo, col.getName(), null, false);
        ExprNodeDesc constExpr = new ExprNodeConstantDesc(typeInfo, ROW.getObject(col.getName()));
        List<ExprNodeDesc> children = Lists.newArrayList();
        children.add(colExpr);
        children.add(constExpr);
        ExprNodeGenericFuncDesc gePredicateExpr = new ExprNodeGenericFuncDesc(typeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
        ExprNodeGenericFuncDesc lePredicateExpr = new ExprNodeGenericFuncDesc(typeInfo, new GenericUDFOPEqualOrLessThan(), children);
        List<ExprNodeDesc> orChildren = Lists.newArrayList();
        orChildren.add(gePredicateExpr);
        orChildren.add(lePredicateExpr);
        ExprNodeGenericFuncDesc predicateExpr = new ExprNodeGenericFuncDesc(typeInfo, new GenericUDFOPOr(), orChildren);
        // Verify KuduPredicateHandler.decompose
        HiveStoragePredicateHandler.DecomposedPredicate decompose = KuduPredicateHandler.decompose(predicateExpr, SCHEMA);
        // OR predicates are currently not supported.
        assertNull(decompose);
        List<KuduPredicate> predicates = expressionToPredicates(predicateExpr);
        assertEquals(0, predicates.size());
        // Also test NOT OR.
        List<ExprNodeDesc> notChildren = Lists.newArrayList();
        notChildren.add(predicateExpr);
        ExprNodeGenericFuncDesc notPredicateExpr = new ExprNodeGenericFuncDesc(typeInfo, new GenericUDFOPNot(), notChildren);
        // Verify KuduPredicateHandler.decompose
        HiveStoragePredicateHandler.DecomposedPredicate decomposeNot = KuduPredicateHandler.decompose(notPredicateExpr, SCHEMA);
        // See note in KuduPredicateHandler.newAnalyzer.
        assertNull(decomposeNot);
        List<KuduPredicate> notPredicates = expressionToPredicates(notPredicateExpr);
        assertEquals(2, notPredicates.size());
    }
}
Also used : HiveStoragePredicateHandler(org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ColumnSchema(org.apache.kudu.ColumnSchema) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) KuduPredicate(org.apache.kudu.client.KuduPredicate) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPNot(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) Test(org.junit.Test)

Example 19 with GenericUDFOPEqualOrLessThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.

the class TestVectorFilterCompare method doTestsWithDiffColumnScalar.

private void doTestsWithDiffColumnScalar(Random random, TypeInfo typeInfo1, TypeInfo typeInfo2, ColumnScalarMode columnScalarMode, Comparison comparison, boolean tryDecimal64) throws Exception {
    String typeName1 = typeInfo1.getTypeName();
    PrimitiveCategory primitiveCategory1 = ((PrimitiveTypeInfo) typeInfo1).getPrimitiveCategory();
    String typeName2 = typeInfo2.getTypeName();
    PrimitiveCategory primitiveCategory2 = ((PrimitiveTypeInfo) typeInfo2).getPrimitiveCategory();
    List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
    List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
    List<String> columns = new ArrayList<String>();
    int columnNum = 1;
    ExprNodeDesc col1Expr;
    Object scalar1Object = null;
    final boolean decimal64Enable1 = checkDecimal64(tryDecimal64, typeInfo1);
    if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) {
        generationSpecList.add(GenerationSpec.createSameType(typeInfo1));
        explicitDataTypePhysicalVariationList.add(decimal64Enable1 ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
        String columnName = "col" + (columnNum++);
        col1Expr = new ExprNodeColumnDesc(typeInfo1, columnName, "table", false);
        columns.add(columnName);
    } else {
        scalar1Object = VectorRandomRowSource.randomPrimitiveObject(random, (PrimitiveTypeInfo) typeInfo1);
        // Adjust the decimal type to the scalar's type...
        if (typeInfo1 instanceof DecimalTypeInfo) {
            typeInfo1 = getDecimalScalarTypeInfo(scalar1Object);
        }
        col1Expr = new ExprNodeConstantDesc(typeInfo1, scalar1Object);
    }
    ExprNodeDesc col2Expr;
    Object scalar2Object = null;
    final boolean decimal64Enable2 = checkDecimal64(tryDecimal64, typeInfo2);
    if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) {
        generationSpecList.add(GenerationSpec.createSameType(typeInfo2));
        explicitDataTypePhysicalVariationList.add(decimal64Enable2 ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
        String columnName = "col" + (columnNum++);
        col2Expr = new ExprNodeColumnDesc(typeInfo2, columnName, "table", false);
        columns.add(columnName);
    } else {
        scalar2Object = VectorRandomRowSource.randomPrimitiveObject(random, (PrimitiveTypeInfo) typeInfo2);
        // Adjust the decimal type to the scalar's type...
        if (typeInfo2 instanceof DecimalTypeInfo) {
            typeInfo2 = getDecimalScalarTypeInfo(scalar2Object);
        }
        col2Expr = new ExprNodeConstantDesc(typeInfo2, scalar2Object);
    }
    List<ObjectInspector> objectInspectorList = new ArrayList<ObjectInspector>();
    objectInspectorList.add(VectorRandomRowSource.getObjectInspector(typeInfo1));
    objectInspectorList.add(VectorRandomRowSource.getObjectInspector(typeInfo2));
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(col1Expr);
    children.add(col2Expr);
    // ----------------------------------------------------------------------------------------------
    String[] columnNames = columns.toArray(new String[0]);
    VectorRandomRowSource rowSource = new VectorRandomRowSource();
    rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
    0, /* allowNull */
    true, /* isUnicodeOk */
    true, explicitDataTypePhysicalVariationList);
    Object[][] randomRows = rowSource.randomRows(100000);
    VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
    GenericUDF genericUdf;
    switch(comparison) {
        case EQUALS:
            genericUdf = new GenericUDFOPEqual();
            break;
        case LESS_THAN:
            genericUdf = new GenericUDFOPLessThan();
            break;
        case LESS_THAN_EQUAL:
            genericUdf = new GenericUDFOPEqualOrLessThan();
            break;
        case GREATER_THAN:
            genericUdf = new GenericUDFOPGreaterThan();
            break;
        case GREATER_THAN_EQUAL:
            genericUdf = new GenericUDFOPEqualOrGreaterThan();
            break;
        case NOT_EQUALS:
            genericUdf = new GenericUDFOPNotEqual();
            break;
        default:
            throw new RuntimeException("Unexpected arithmetic " + comparison);
    }
    ObjectInspector[] objectInspectors = objectInspectorList.toArray(new ObjectInspector[objectInspectorList.size()]);
    ObjectInspector outputObjectInspector = null;
    try {
        outputObjectInspector = genericUdf.initialize(objectInspectors);
    } catch (Exception e) {
        Assert.fail(e.toString());
    }
    TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
    ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(outputTypeInfo, genericUdf, children);
    final int rowCount = randomRows.length;
    Object[][] resultObjectsArray = new Object[FilterCompareTestMode.count][];
    for (int i = 0; i < FilterCompareTestMode.count; i++) {
        Object[] resultObjects = new Object[rowCount];
        resultObjectsArray[i] = resultObjects;
        FilterCompareTestMode filterCompareTestMode = FilterCompareTestMode.values()[i];
        switch(filterCompareTestMode) {
            case ROW_MODE:
                doRowFilterCompareTest(typeInfo1, typeInfo2, columns, children, exprDesc, comparison, randomRows, columnScalarMode, rowSource.rowStructObjectInspector(), outputTypeInfo, resultObjects);
                break;
            case ADAPTOR:
            case FILTER_VECTOR_EXPRESSION:
            case COMPARE_VECTOR_EXPRESSION:
                doVectorFilterCompareTest(typeInfo1, typeInfo2, columns, columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), children, exprDesc, comparison, filterCompareTestMode, columnScalarMode, batchSource, exprDesc.getWritableObjectInspector(), outputTypeInfo, resultObjects);
                break;
            default:
                throw new RuntimeException("Unexpected IF statement test mode " + filterCompareTestMode);
        }
    }
    for (int i = 0; i < rowCount; i++) {
        // Row-mode is the expected value.
        Object expectedResult = resultObjectsArray[0][i];
        for (int v = 1; v < FilterCompareTestMode.count; v++) {
            FilterCompareTestMode filterCompareTestMode = FilterCompareTestMode.values()[v];
            Object vectorResult = resultObjectsArray[v][i];
            if (filterCompareTestMode == FilterCompareTestMode.FILTER_VECTOR_EXPRESSION && expectedResult == null && vectorResult != null) {
                // This is OK.
                boolean vectorBoolean = ((BooleanWritable) vectorResult).get();
                if (vectorBoolean) {
                    Assert.fail("Row " + i + " typeName1 " + typeName1 + " typeName2 " + typeName2 + " outputTypeName " + outputTypeInfo.getTypeName() + " " + comparison + " " + filterCompareTestMode + " " + columnScalarMode + " result is NOT NULL and true" + " does not match row-mode expected result is NULL which means false here" + (columnScalarMode == ColumnScalarMode.SCALAR_COLUMN ? " scalar1 " + scalar1Object.toString() : "") + " row values " + Arrays.toString(randomRows[i]) + (columnScalarMode == ColumnScalarMode.COLUMN_SCALAR ? " scalar2 " + scalar2Object.toString() : ""));
                }
            } else if (expectedResult == null || vectorResult == null) {
                if (expectedResult != null || vectorResult != null) {
                    Assert.fail("Row " + i + " typeName1 " + typeName1 + " typeName2 " + typeName2 + " outputTypeName " + outputTypeInfo.getTypeName() + " " + comparison + " " + filterCompareTestMode + " " + columnScalarMode + " result is NULL " + (vectorResult == null) + " does not match row-mode expected result is NULL " + (expectedResult == null) + (columnScalarMode == ColumnScalarMode.SCALAR_COLUMN ? " scalar1 " + scalar1Object.toString() : "") + " row values " + Arrays.toString(randomRows[i]) + (columnScalarMode == ColumnScalarMode.COLUMN_SCALAR ? " scalar2 " + scalar2Object.toString() : ""));
                }
            } else {
                if (!expectedResult.equals(vectorResult)) {
                    Assert.fail("Row " + i + " typeName1 " + typeName1 + " typeName2 " + typeName2 + " outputTypeName " + outputTypeInfo.getTypeName() + " " + comparison + " " + filterCompareTestMode + " " + columnScalarMode + " result " + vectorResult.toString() + " (" + vectorResult.getClass().getSimpleName() + ")" + " does not match row-mode expected result " + expectedResult.toString() + " (" + expectedResult.getClass().getSimpleName() + ")" + (columnScalarMode == ColumnScalarMode.SCALAR_COLUMN ? " scalar1 " + scalar1Object.toString() : "") + " row values " + Arrays.toString(randomRows[i]) + (columnScalarMode == ColumnScalarMode.COLUMN_SCALAR ? " scalar2 " + scalar2Object.toString() : ""));
                }
            }
        }
    }
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) ArrayList(java.util.ArrayList) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) GenericUDFOPNotEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) GenericUDFOPEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) VectorRandomBatchSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFOPLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) GenerationSpec(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) BooleanWritable(org.apache.hadoop.io.BooleanWritable) VectorRandomRowSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)

Example 20 with GenericUDFOPEqualOrLessThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.

the class TestOrcSplitElimination method testSplitEliminationLargeMaxSplit.

@Test
public void testSplitEliminationLargeMaxSplit() throws Exception {
    ObjectInspector inspector = createIO();
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 100000, CompressionKind.NONE, 10000, 10000);
    writeData(writer);
    writer.close();
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 1000);
    HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 150000);
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    GenericUDF udf = new GenericUDFOPEqualOrLessThan();
    List<ExprNodeDesc> childExpr = Lists.newArrayList();
    ExprNodeConstantDesc con;
    ExprNodeGenericFuncDesc en;
    String sargStr;
    createTestSarg(inspector, udf, childExpr);
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(2, splits.length);
    con = new ExprNodeConstantDesc(0);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    sargStr = SerializationUtilities.serializeExpression(en);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    // no stripes satisfies the condition
    assertEquals(0, splits.length);
    con = new ExprNodeConstantDesc(2);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    sargStr = SerializationUtilities.serializeExpression(en);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    // only first stripe will satisfy condition and hence single split
    assertEquals(1, splits.length);
    con = new ExprNodeConstantDesc(5);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    sargStr = SerializationUtilities.serializeExpression(en);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    // first stripe will satisfy the predicate and will be a single split, last stripe will be a
    // separate split
    assertEquals(2, splits.length);
    con = new ExprNodeConstantDesc(13);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    sargStr = SerializationUtilities.serializeExpression(en);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    // first 2 stripes will satisfy the predicate and merged to single split, last stripe will be a
    // separate split
    assertEquals(2, splits.length);
    con = new ExprNodeConstantDesc(29);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    sargStr = SerializationUtilities.serializeExpression(en);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    // first 3 stripes will satisfy the predicate and merged to single split, last stripe will be a
    // separate split
    assertEquals(2, splits.length);
    con = new ExprNodeConstantDesc(70);
    childExpr.set(1, con);
    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
    sargStr = SerializationUtilities.serializeExpression(en);
    conf.set("hive.io.filter.expr.serialized", sargStr);
    splits = in.getSplits(conf, 1);
    // first 2 stripes will satisfy the predicate and merged to single split, last two stripe will
    // be a separate split
    assertEquals(2, splits.length);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) InputSplit(org.apache.hadoop.mapred.InputSplit) Test(org.junit.Test)

Aggregations

ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)22 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)22 GenericUDFOPEqualOrLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan)22 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)21 Test (org.junit.Test)19 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)18 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)14 GenericUDFOPEqualOrGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)13 ArrayList (java.util.ArrayList)9 Range (org.apache.accumulo.core.data.Range)9 HashMap (java.util.HashMap)8 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)8 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)7 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)7 Node (org.apache.hadoop.hive.ql.lib.Node)7 SemanticDispatcher (org.apache.hadoop.hive.ql.lib.SemanticDispatcher)7 SemanticGraphWalker (org.apache.hadoop.hive.ql.lib.SemanticGraphWalker)7 List (java.util.List)6 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)6 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)6