Search in sources :

Example 16 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class TestVectorizedORCReader method checkVectorizedReader.

private void checkVectorizedReader() throws Exception {
    Reader vreader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
    RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows();
    RecordReaderImpl rr = (RecordReaderImpl) reader.rows();
    VectorizedRowBatch batch = reader.getSchema().createRowBatchV2();
    OrcStruct row = null;
    long lastRowNumber = -1;
    // Check Vectorized ORC reader against ORC row reader
    while (vrr.nextBatch(batch)) {
        Assert.assertEquals(lastRowNumber + 1, vrr.getRowNumber());
        for (int i = 0; i < batch.size; i++) {
            Assert.assertEquals(rr.getRowNumber(), vrr.getRowNumber() + i);
            lastRowNumber = rr.getRowNumber();
            row = (OrcStruct) rr.next(row);
            for (int j = 0; j < batch.cols.length; j++) {
                Object a = (row.getFieldValue(j));
                ColumnVector cv = batch.cols[j];
                // if the value is repeating, use row 0
                int rowId = cv.isRepeating ? 0 : i;
                // make sure the null flag agrees
                if (a == null) {
                    Assert.assertEquals(true, !cv.noNulls && cv.isNull[rowId]);
                } else if (a instanceof BooleanWritable) {
                    // Boolean values are stores a 1's and 0's, so convert and compare
                    Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0);
                    long b = ((LongColumnVector) cv).vector[rowId];
                    Assert.assertEquals(temp.toString(), Long.toString(b));
                } else if (a instanceof TimestampWritableV2) {
                    // Timestamps are stored as long, so convert and compare
                    TimestampWritableV2 t = ((TimestampWritableV2) a);
                    TimestampColumnVector tcv = ((TimestampColumnVector) cv);
                    java.sql.Timestamp ts = tcv.asScratchTimestamp(rowId);
                    Assert.assertEquals(t.getTimestamp(), Timestamp.ofEpochMilli(ts.getTime(), ts.getNanos()));
                } else if (a instanceof DateWritableV2) {
                    // Dates are stored as long, so convert and compare
                    DateWritableV2 adt = (DateWritableV2) a;
                    long b = ((LongColumnVector) cv).vector[rowId];
                    Assert.assertEquals(adt.get().toEpochMilli(), DateWritableV2.daysToMillis((int) b));
                } else if (a instanceof HiveDecimalWritable) {
                    // Decimals are stored as BigInteger, so convert and compare
                    HiveDecimalWritable dec = (HiveDecimalWritable) a;
                    HiveDecimalWritable b = ((DecimalColumnVector) cv).vector[i];
                    Assert.assertEquals(dec, b);
                } else if (a instanceof DoubleWritable) {
                    double b = ((DoubleColumnVector) cv).vector[rowId];
                    assertEquals(a.toString(), Double.toString(b));
                } else if (a instanceof Text) {
                    BytesColumnVector bcv = (BytesColumnVector) cv;
                    Text b = new Text();
                    b.set(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]);
                    assertEquals(a, b);
                } else if (a instanceof IntWritable || a instanceof LongWritable || a instanceof ByteWritable || a instanceof ShortWritable) {
                    assertEquals(a.toString(), Long.toString(((LongColumnVector) cv).vector[rowId]));
                } else {
                    assertEquals("huh", a.getClass().getName());
                }
            }
        }
        // Check repeating
        Assert.assertEquals(false, batch.cols[0].isRepeating);
        Assert.assertEquals(false, batch.cols[1].isRepeating);
        Assert.assertEquals(false, batch.cols[2].isRepeating);
        Assert.assertEquals(true, batch.cols[3].isRepeating);
        Assert.assertEquals(false, batch.cols[4].isRepeating);
        Assert.assertEquals(false, batch.cols[5].isRepeating);
        Assert.assertEquals(false, batch.cols[6].isRepeating);
        Assert.assertEquals(false, batch.cols[7].isRepeating);
        Assert.assertEquals(false, batch.cols[8].isRepeating);
        Assert.assertEquals(false, batch.cols[9].isRepeating);
        // Check non null
        Assert.assertEquals(false, batch.cols[0].noNulls);
        Assert.assertEquals(false, batch.cols[1].noNulls);
        Assert.assertEquals(true, batch.cols[2].noNulls);
        Assert.assertEquals(true, batch.cols[3].noNulls);
        Assert.assertEquals(false, batch.cols[4].noNulls);
        Assert.assertEquals(false, batch.cols[5].noNulls);
        Assert.assertEquals(false, batch.cols[6].noNulls);
        Assert.assertEquals(false, batch.cols[7].noNulls);
        Assert.assertEquals(false, batch.cols[8].noNulls);
        Assert.assertEquals(false, batch.cols[9].noNulls);
    }
    Assert.assertEquals(false, rr.nextBatch(batch));
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) IntWritable(org.apache.hadoop.io.IntWritable) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) Text(org.apache.hadoop.io.Text) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BooleanWritable(org.apache.hadoop.io.BooleanWritable)

Example 17 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class TestParquetSerDe method testParquetHiveSerDe.

@Test
public void testParquetHiveSerDe() throws Throwable {
    try {
        // Create the SerDe
        System.out.println("test: testParquetHiveSerDe");
        final ParquetHiveSerDe serDe = new ParquetHiveSerDe();
        final Configuration conf = new Configuration();
        final Properties tbl = createProperties();
        serDe.initialize(conf, tbl, null);
        // Data
        final Writable[] arr = new Writable[9];
        // primitive types
        arr[0] = new ByteWritable((byte) 123);
        arr[1] = new ShortWritable((short) 456);
        arr[2] = new IntWritable(789);
        arr[3] = new LongWritable(1000l);
        arr[4] = new DoubleWritable((double) 5.3);
        arr[5] = new BytesWritable("hive and hadoop and parquet. Big family.".getBytes("UTF-8"));
        arr[6] = new BytesWritable("parquetSerde binary".getBytes("UTF-8"));
        final Writable[] map = new Writable[3];
        for (int i = 0; i < 3; ++i) {
            final Writable[] pair = new Writable[2];
            pair[0] = new BytesWritable(("key_" + i).getBytes("UTF-8"));
            pair[1] = new IntWritable(i);
            map[i] = new ArrayWritable(Writable.class, pair);
        }
        arr[7] = new ArrayWritable(Writable.class, map);
        final Writable[] array = new Writable[5];
        for (int i = 0; i < 5; ++i) {
            array[i] = new BytesWritable(("elem_" + i).getBytes("UTF-8"));
        }
        arr[8] = new ArrayWritable(Writable.class, array);
        final ArrayWritable arrWritable = new ArrayWritable(Writable.class, arr);
        // Test
        deserializeAndSerializeLazySimple(serDe, arrWritable);
        System.out.println("test: testParquetHiveSerDe - OK");
    } catch (final Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) Configuration(org.apache.hadoop.conf.Configuration) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IntWritable(org.apache.hadoop.io.IntWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 18 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class GenericUDFTrunc method evaluateNumber.

private Object evaluateNumber(DeferredObject[] arguments) throws HiveException, UDFArgumentTypeException {
    if (arguments[0] == null) {
        return null;
    }
    Object input = arguments[0].get();
    if (input == null) {
        return null;
    }
    if (arguments.length == 2 && arguments[1] != null && arguments[1].get() != null && !inputSacleConst) {
        Object scaleObj = null;
        switch(inputScaleOI.getPrimitiveCategory()) {
            case BYTE:
                scaleObj = byteConverter.convert(arguments[1].get());
                scale = ((ByteWritable) scaleObj).get();
                break;
            case SHORT:
                scaleObj = shortConverter.convert(arguments[1].get());
                scale = ((ShortWritable) scaleObj).get();
                break;
            case INT:
                scaleObj = intConverter.convert(arguments[1].get());
                scale = ((IntWritable) scaleObj).get();
                break;
            case LONG:
                scaleObj = longConverter.convert(arguments[1].get());
                long l = ((LongWritable) scaleObj).get();
                if (l < Integer.MIN_VALUE || l > Integer.MAX_VALUE) {
                    throw new UDFArgumentException(getFuncName().toUpperCase() + " scale argument out of allowed range");
                }
                scale = (int) l;
            default:
                break;
        }
    }
    switch(inputType1) {
        case VOID:
            return null;
        case DECIMAL:
            HiveDecimalWritable decimalWritable = (HiveDecimalWritable) inputOI.getPrimitiveWritableObject(input);
            HiveDecimal dec = trunc(decimalWritable.getHiveDecimal(), scale);
            if (dec == null) {
                return null;
            }
            return new HiveDecimalWritable(dec);
        case BYTE:
            ByteWritable byteWritable = (ByteWritable) inputOI.getPrimitiveWritableObject(input);
            if (scale >= 0) {
                return byteWritable;
            } else {
                return new ByteWritable((byte) trunc(byteWritable.get(), scale));
            }
        case SHORT:
            ShortWritable shortWritable = (ShortWritable) inputOI.getPrimitiveWritableObject(input);
            if (scale >= 0) {
                return shortWritable;
            } else {
                return new ShortWritable((short) trunc(shortWritable.get(), scale));
            }
        case INT:
            IntWritable intWritable = (IntWritable) inputOI.getPrimitiveWritableObject(input);
            if (scale >= 0) {
                return intWritable;
            } else {
                return new IntWritable((int) trunc(intWritable.get(), scale));
            }
        case LONG:
            LongWritable longWritable = (LongWritable) inputOI.getPrimitiveWritableObject(input);
            if (scale >= 0) {
                return longWritable;
            } else {
                return new LongWritable(trunc(longWritable.get(), scale));
            }
        case FLOAT:
            float f = ((FloatWritable) inputOI.getPrimitiveWritableObject(input)).get();
            return new FloatWritable((float) trunc(f, scale));
        case DOUBLE:
            return trunc(((DoubleWritable) inputOI.getPrimitiveWritableObject(input)), scale);
        default:
            throw new UDFArgumentTypeException(0, "Only numeric or string group data types are allowed for TRUNC function. Got " + inputType1.name());
    }
}
Also used : HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Example 19 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class TestVectorGroupByOperator method testKeyTypeAggregate.

private void testKeyTypeAggregate(String aggregateName, FakeVectorRowBatchFromObjectIterables data, Map<Object, Object> expected) throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("Key");
    mapColumnNames.add("Value");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    Set<Object> keys = new HashSet<Object>();
    AggregationDesc agg = buildAggregationDesc(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "Value", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[1]));
    ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
    aggs.add(agg);
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    outputColumnNames.add("_col0");
    outputColumnNames.add("_col1");
    GroupByDesc desc = new GroupByDesc();
    VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc();
    desc.setOutputColumnNames(outputColumnNames);
    desc.setAggregators(aggs);
    vectorGroupByDesc.setProcessingMode(ProcessingMode.HASH);
    ExprNodeDesc keyExp = buildColumnDesc(ctx, "Key", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[0]));
    ArrayList<ExprNodeDesc> keysDesc = new ArrayList<ExprNodeDesc>();
    keysDesc.add(keyExp);
    desc.setKeys(keysDesc);
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc);
    if (vgo == null) {
        assertTrue(false);
    }
    FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {

        private int rowIndex;

        private String aggregateName;

        private Map<Object, Object> expected;

        private Set<Object> keys;

        @Override
        public void inspectRow(Object row, int tag) throws HiveException {
            assertTrue(row instanceof Object[]);
            Object[] fields = (Object[]) row;
            assertEquals(2, fields.length);
            Object key = fields[0];
            Object keyValue = null;
            if (null == key) {
                keyValue = null;
            } else if (key instanceof ByteWritable) {
                ByteWritable bwKey = (ByteWritable) key;
                keyValue = bwKey.get();
            } else if (key instanceof ShortWritable) {
                ShortWritable swKey = (ShortWritable) key;
                keyValue = swKey.get();
            } else if (key instanceof IntWritable) {
                IntWritable iwKey = (IntWritable) key;
                keyValue = iwKey.get();
            } else if (key instanceof LongWritable) {
                LongWritable lwKey = (LongWritable) key;
                keyValue = lwKey.get();
            } else if (key instanceof TimestampWritableV2) {
                TimestampWritableV2 twKey = (TimestampWritableV2) key;
                keyValue = twKey.getTimestamp().toSqlTimestamp();
            } else if (key instanceof DoubleWritable) {
                DoubleWritable dwKey = (DoubleWritable) key;
                keyValue = dwKey.get();
            } else if (key instanceof FloatWritable) {
                FloatWritable fwKey = (FloatWritable) key;
                keyValue = fwKey.get();
            } else if (key instanceof BooleanWritable) {
                BooleanWritable bwKey = (BooleanWritable) key;
                keyValue = bwKey.get();
            } else if (key instanceof HiveDecimalWritable) {
                HiveDecimalWritable hdwKey = (HiveDecimalWritable) key;
                keyValue = hdwKey.getHiveDecimal();
            } else {
                Assert.fail(String.format("Not implemented key output type %s: %s", key.getClass().getName(), key));
            }
            String keyValueAsString = String.format("%s", keyValue);
            assertTrue(expected.containsKey(keyValue));
            Object expectedValue = expected.get(keyValue);
            Object value = fields[1];
            Validator validator = getValidator(aggregateName);
            validator.validate(keyValueAsString, expectedValue, new Object[] { value });
            keys.add(keyValue);
        }

        private FakeCaptureVectorToRowOutputOperator.OutputInspector init(String aggregateName, Map<Object, Object> expected, Set<Object> keys) {
            this.aggregateName = aggregateName;
            this.expected = expected;
            this.keys = keys;
            return this;
        }
    }.init(aggregateName, expected, keys));
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(expected.size(), outBatchList.size());
    assertEquals(expected.size(), keys.size());
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) ArrayList(java.util.ArrayList) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongWritable(org.apache.hadoop.io.LongWritable) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HashSet(java.util.HashSet) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) FakeCaptureVectorToRowOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator) Map(java.util.Map) HashMap(java.util.HashMap)

Example 20 with ByteWritable

use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.

the class TestVectorGroupByOperator method testMultiKey.

private void testMultiKey(String aggregateName, FakeVectorRowBatchFromObjectIterables data, HashMap<Object, Object> expected) throws HiveException {
    Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<ExprNodeDesc> keysDesc = new ArrayList<ExprNodeDesc>();
    Set<Object> keys = new HashSet<Object>();
    // The types array tells us the number of columns in the data
    final String[] columnTypes = data.getTypes();
    // Columns 0..N-1 are keys. Column N is the aggregate value input
    int i = 0;
    for (; i < columnTypes.length - 1; ++i) {
        String columnName = String.format("_col%d", i);
        mapColumnNames.put(columnName, i);
        outputColumnNames.add(columnName);
    }
    mapColumnNames.put("value", i);
    outputColumnNames.add("value");
    VectorizationContext ctx = new VectorizationContext("name", outputColumnNames);
    ArrayList<AggregationDesc> aggs = new ArrayList(1);
    aggs.add(buildAggregationDesc(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "value", TypeInfoFactory.getPrimitiveTypeInfo(columnTypes[i])));
    for (i = 0; i < columnTypes.length - 1; ++i) {
        String columnName = String.format("_col%d", i);
        keysDesc.add(buildColumnDesc(ctx, columnName, TypeInfoFactory.getPrimitiveTypeInfo(columnTypes[i])));
    }
    GroupByDesc desc = new GroupByDesc();
    VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc();
    desc.setOutputColumnNames(outputColumnNames);
    desc.setAggregators(aggs);
    desc.setKeys(keysDesc);
    vectorGroupByDesc.setProcessingMode(ProcessingMode.HASH);
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc);
    FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {

        private int rowIndex;

        private String aggregateName;

        private Map<Object, Object> expected;

        private Set<Object> keys;

        @Override
        public void inspectRow(Object row, int tag) throws HiveException {
            assertTrue(row instanceof Object[]);
            Object[] fields = (Object[]) row;
            assertEquals(columnTypes.length, fields.length);
            ArrayList<Object> keyValue = new ArrayList<Object>(columnTypes.length - 1);
            for (int i = 0; i < columnTypes.length - 1; ++i) {
                Object key = fields[i];
                if (null == key) {
                    keyValue.add(null);
                } else if (key instanceof Text) {
                    Text txKey = (Text) key;
                    keyValue.add(txKey.toString());
                } else if (key instanceof ByteWritable) {
                    ByteWritable bwKey = (ByteWritable) key;
                    keyValue.add(bwKey.get());
                } else if (key instanceof ShortWritable) {
                    ShortWritable swKey = (ShortWritable) key;
                    keyValue.add(swKey.get());
                } else if (key instanceof IntWritable) {
                    IntWritable iwKey = (IntWritable) key;
                    keyValue.add(iwKey.get());
                } else if (key instanceof LongWritable) {
                    LongWritable lwKey = (LongWritable) key;
                    keyValue.add(lwKey.get());
                } else if (key instanceof TimestampWritableV2) {
                    TimestampWritableV2 twKey = (TimestampWritableV2) key;
                    keyValue.add(twKey.getTimestamp());
                } else if (key instanceof DoubleWritable) {
                    DoubleWritable dwKey = (DoubleWritable) key;
                    keyValue.add(dwKey.get());
                } else if (key instanceof FloatWritable) {
                    FloatWritable fwKey = (FloatWritable) key;
                    keyValue.add(fwKey.get());
                } else if (key instanceof BooleanWritable) {
                    BooleanWritable bwKey = (BooleanWritable) key;
                    keyValue.add(bwKey.get());
                } else {
                    Assert.fail(String.format("Not implemented key output type %s: %s", key.getClass().getName(), key));
                }
            }
            String keyAsString = Arrays.deepToString(keyValue.toArray());
            assertTrue(expected.containsKey(keyValue));
            Object expectedValue = expected.get(keyValue);
            Object value = fields[columnTypes.length - 1];
            Validator validator = getValidator(aggregateName);
            validator.validate(keyAsString, expectedValue, new Object[] { value });
            keys.add(keyValue);
        }

        private FakeCaptureVectorToRowOutputOperator.OutputInspector init(String aggregateName, Map<Object, Object> expected, Set<Object> keys) {
            this.aggregateName = aggregateName;
            this.expected = expected;
            this.keys = keys;
            return this;
        }
    }.init(aggregateName, expected, keys));
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(expected.size(), outBatchList.size());
    assertEquals(expected.size(), keys.size());
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongWritable(org.apache.hadoop.io.LongWritable) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HashSet(java.util.HashSet) Text(org.apache.hadoop.io.Text) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) FakeCaptureVectorToRowOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)81 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)56 IntWritable (org.apache.hadoop.io.IntWritable)56 LongWritable (org.apache.hadoop.io.LongWritable)52 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)47 Text (org.apache.hadoop.io.Text)47 Test (org.junit.Test)44 FloatWritable (org.apache.hadoop.io.FloatWritable)40 BooleanWritable (org.apache.hadoop.io.BooleanWritable)38 BytesWritable (org.apache.hadoop.io.BytesWritable)30 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)29 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)27 ArrayList (java.util.ArrayList)22 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)22 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)21 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)21 HiveCharWritable (org.apache.hadoop.hive.serde2.io.HiveCharWritable)18 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)17 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)17 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)17