Search in sources :

Example 1 with GenericUDFOPGreaterThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan in project hive by apache.

the class TestVectorizationContext method testFilterStringColCompareStringColumnExpressions.

@Test
public void testFilterStringColCompareStringColumnExpressions() throws HiveException {
    // Strings test
    ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
    ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(String.class, "col2", "table", false);
    GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
    ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc();
    exprDesc.setGenericUDF(udf);
    List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(col2Expr);
    exprDesc.setChildren(children1);
    List<String> columns = new ArrayList<String>();
    columns.add("col0");
    columns.add("col1");
    columns.add("col2");
    VectorizationContext vc = new VectorizationContext("name", columns);
    VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn);
    // 2 CHAR test
    CharTypeInfo charTypeInfo = new CharTypeInfo(10);
    col1Expr = new ExprNodeColumnDesc(charTypeInfo, "col1", "table", false);
    col2Expr = new ExprNodeColumnDesc(charTypeInfo, "col2", "table", false);
    udf = new GenericUDFOPGreaterThan();
    exprDesc = new ExprNodeGenericFuncDesc();
    exprDesc.setGenericUDF(udf);
    children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(col2Expr);
    exprDesc.setChildren(children1);
    vc = new VectorizationContext("name", columns);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn);
    // 2 VARCHAR test
    VarcharTypeInfo varcharTypeInfo = new VarcharTypeInfo(10);
    col1Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col1", "table", false);
    col2Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col2", "table", false);
    udf = new GenericUDFOPGreaterThan();
    exprDesc = new ExprNodeGenericFuncDesc();
    exprDesc.setGenericUDF(udf);
    children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(col2Expr);
    exprDesc.setChildren(children1);
    vc = new VectorizationContext("name", columns);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn);
    // Some mix tests (STRING, CHAR), (VARCHAR, CHAR), (VARCHAR, STRING)...
    col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
    col2Expr = new ExprNodeColumnDesc(charTypeInfo, "col2", "table", false);
    udf = new GenericUDFOPGreaterThan();
    exprDesc = new ExprNodeGenericFuncDesc();
    exprDesc.setGenericUDF(udf);
    children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(col2Expr);
    exprDesc.setChildren(children1);
    vc = new VectorizationContext("name", columns);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn);
    col1Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col1", "table", false);
    col2Expr = new ExprNodeColumnDesc(charTypeInfo, "col2", "table", false);
    udf = new GenericUDFOPGreaterThan();
    exprDesc = new ExprNodeGenericFuncDesc();
    exprDesc.setGenericUDF(udf);
    children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(col2Expr);
    exprDesc.setChildren(children1);
    vc = new VectorizationContext("name", columns);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn);
    col1Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col1", "table", false);
    col2Expr = new ExprNodeColumnDesc(String.class, "col2", "table", false);
    udf = new GenericUDFOPGreaterThan();
    exprDesc = new ExprNodeGenericFuncDesc();
    exprDesc.setGenericUDF(udf);
    children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(col2Expr);
    exprDesc.setChildren(children1);
    vc = new VectorizationContext("name", columns);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn);
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) ArrayList(java.util.ArrayList) FilterStringGroupColGreaterStringGroupColumn(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterStringGroupColumn) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Test(org.junit.Test)

Example 2 with GenericUDFOPGreaterThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan in project hive by apache.

the class TestVectorizationContext method testStringFilterExpressions.

@Test
public void testStringFilterExpressions() throws HiveException {
    ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
    ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha");
    GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
    ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc();
    exprDesc.setGenericUDF(udf);
    List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(constDesc);
    exprDesc.setChildren(children1);
    List<String> columns = new ArrayList<String>();
    columns.add("col0");
    columns.add("col1");
    columns.add("col2");
    VectorizationContext vc = new VectorizationContext("name", columns);
    VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringGroupColGreaterStringScalar);
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) FilterStringGroupColGreaterStringScalar(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterStringScalar) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Test(org.junit.Test)

Example 3 with GenericUDFOPGreaterThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan in project hive by apache.

the class TestVectorizationContext method testVectorizeAndOrProjectionExpression.

@Test
public void testVectorizeAndOrProjectionExpression() throws HiveException {
    ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false);
    ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10));
    GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
    ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc();
    greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
    greaterExprDesc.setGenericUDF(udf);
    List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(constDesc);
    greaterExprDesc.setChildren(children1);
    ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Boolean.class, "col2", "table", false);
    GenericUDFOPAnd andUdf = new GenericUDFOPAnd();
    ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc();
    andExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
    andExprDesc.setGenericUDF(andUdf);
    List<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>(2);
    children3.add(greaterExprDesc);
    children3.add(col2Expr);
    andExprDesc.setChildren(children3);
    List<String> columns = new ArrayList<String>();
    columns.add("col1");
    columns.add("col2");
    VectorizationContext vc = new VectorizationContext("name", columns);
    VectorExpression veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertEquals(veAnd.getClass(), FilterExprAndExpr.class);
    assertEquals(veAnd.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
    assertEquals(veAnd.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class);
    veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
    assertEquals(veAnd.getClass(), ColAndCol.class);
    assertEquals(1, veAnd.getChildExpressions().length);
    assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class);
    assertEquals(2, ((ColAndCol) veAnd).getColNum1());
    assertEquals(1, ((ColAndCol) veAnd).getColNum2());
    assertEquals(3, ((ColAndCol) veAnd).getOutputColumn());
    //OR
    GenericUDFOPOr orUdf = new GenericUDFOPOr();
    ExprNodeGenericFuncDesc orExprDesc = new ExprNodeGenericFuncDesc();
    orExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
    orExprDesc.setGenericUDF(orUdf);
    List<ExprNodeDesc> children4 = new ArrayList<ExprNodeDesc>(2);
    children4.add(greaterExprDesc);
    children4.add(col2Expr);
    orExprDesc.setChildren(children4);
    //Allocate new Vectorization context to reset the intermediate columns.
    vc = new VectorizationContext("name", columns);
    VectorExpression veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertEquals(veOr.getClass(), FilterExprOrExpr.class);
    assertEquals(veOr.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
    assertEquals(veOr.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class);
    veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
    assertEquals(veOr.getClass(), ColOrCol.class);
    assertEquals(1, veAnd.getChildExpressions().length);
    assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class);
    assertEquals(2, ((ColOrCol) veOr).getColNum1());
    assertEquals(1, ((ColOrCol) veOr).getColNum2());
    assertEquals(3, ((ColOrCol) veOr).getOutputColumn());
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 4 with GenericUDFOPGreaterThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan in project hive by apache.

the class TestParquetRowGroupFilter method testRowGroupFilterTakeEffect.

@Test
public void testRowGroupFilterTakeEffect() throws Exception {
    // define schema
    columnNames = "intCol";
    columnTypes = "int";
    StructObjectInspector inspector = getObjectInspector(columnNames, columnTypes);
    MessageType fileSchema = MessageTypeParser.parseMessageType("message hive_schema {\n" + "  optional int32 intCol;\n" + "}\n");
    conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "intCol");
    conf.set("columns", "intCol");
    conf.set("columns.types", "int");
    // create Parquet file with specific data
    Path testPath = writeDirect("RowGroupFilterTakeEffect", fileSchema, new DirectWriter() {

        @Override
        public void write(RecordConsumer consumer) {
            for (int i = 0; i < 100; i++) {
                consumer.startMessage();
                consumer.startField("int", 0);
                consumer.addInteger(i);
                consumer.endField("int", 0);
                consumer.endMessage();
            }
        }
    });
    // > 50
    GenericUDF udf = new GenericUDFOPGreaterThan();
    List<ExprNodeDesc> children = Lists.newArrayList();
    ExprNodeColumnDesc columnDesc = new ExprNodeColumnDesc(Integer.class, "intCol", "T", false);
    ExprNodeConstantDesc constantDesc = new ExprNodeConstantDesc(50);
    children.add(columnDesc);
    children.add(constantDesc);
    ExprNodeGenericFuncDesc genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children);
    String searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr);
    ParquetRecordReaderWrapper recordReader = (ParquetRecordReaderWrapper) new MapredParquetInputFormat().getRecordReader(new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null);
    Assert.assertEquals("row group is not filtered correctly", 1, recordReader.getFiltedBlocks().size());
    // > 100
    constantDesc = new ExprNodeConstantDesc(100);
    children.set(1, constantDesc);
    genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children);
    searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr);
    recordReader = (ParquetRecordReaderWrapper) new MapredParquetInputFormat().getRecordReader(new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null);
    Assert.assertEquals("row group is not filtered correctly", 0, recordReader.getFiltedBlocks().size());
}
Also used : Path(org.apache.hadoop.fs.Path) GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ParquetRecordReaderWrapper(org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) FileSplit(org.apache.hadoop.mapred.FileSplit) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) MessageType(org.apache.parquet.schema.MessageType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 5 with GenericUDFOPGreaterThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan in project hive by apache.

the class TestAccumuloPredicateHandler method testIteratorIgnoreRowIDFields.

@Test
public void testIteratorIgnoreRowIDFields() {
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
    assertNotNull(node2);
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
    String filterExpr = SerializationUtilities.serializeExpression(both);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        List<IteratorSetting> iterators = handler.getIterators(conf, columnMapper);
        assertEquals(iterators.size(), 0);
    } catch (SerDeException e) {
        StringUtils.stringifyException(e);
    }
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Aggregations

ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)19 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)19 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)19 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)19 Test (org.junit.Test)19 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)18 ArrayList (java.util.ArrayList)10 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)10 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)10 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)9 GenericUDFOPEqualOrLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan)4 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)3 GenericUDFOPLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan)3 GenericUDFOPOr (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr)3 Properties (java.util.Properties)2 Range (org.apache.accumulo.core.data.Range)2 Configuration (org.apache.hadoop.conf.Configuration)2 ColumnMapper (org.apache.hadoop.hive.accumulo.columns.ColumnMapper)2 HiveStoragePredicateHandler (org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler)2 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)2