Search in sources :

Example 21 with ShortWritable

use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.

the class TestVectorGroupByOperator method testMultiKey.

private void testMultiKey(String aggregateName, FakeVectorRowBatchFromObjectIterables data, HashMap<Object, Object> expected) throws HiveException {
    Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<ExprNodeDesc> keysDesc = new ArrayList<ExprNodeDesc>();
    Set<Object> keys = new HashSet<Object>();
    // The types array tells us the number of columns in the data
    final String[] columnTypes = data.getTypes();
    // Columns 0..N-1 are keys. Column N is the aggregate value input
    int i = 0;
    for (; i < columnTypes.length - 1; ++i) {
        String columnName = String.format("_col%d", i);
        mapColumnNames.put(columnName, i);
        outputColumnNames.add(columnName);
    }
    mapColumnNames.put("value", i);
    outputColumnNames.add("value");
    VectorizationContext ctx = new VectorizationContext("name", outputColumnNames);
    ArrayList<AggregationDesc> aggs = new ArrayList(1);
    aggs.add(buildAggregationDesc(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "value", TypeInfoFactory.getPrimitiveTypeInfo(columnTypes[i])));
    for (i = 0; i < columnTypes.length - 1; ++i) {
        String columnName = String.format("_col%d", i);
        keysDesc.add(buildColumnDesc(ctx, columnName, TypeInfoFactory.getPrimitiveTypeInfo(columnTypes[i])));
    }
    GroupByDesc desc = new GroupByDesc();
    VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc();
    desc.setOutputColumnNames(outputColumnNames);
    desc.setAggregators(aggs);
    desc.setKeys(keysDesc);
    vectorGroupByDesc.setProcessingMode(ProcessingMode.HASH);
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc);
    FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {

        private int rowIndex;

        private String aggregateName;

        private Map<Object, Object> expected;

        private Set<Object> keys;

        @Override
        public void inspectRow(Object row, int tag) throws HiveException {
            assertTrue(row instanceof Object[]);
            Object[] fields = (Object[]) row;
            assertEquals(columnTypes.length, fields.length);
            ArrayList<Object> keyValue = new ArrayList<Object>(columnTypes.length - 1);
            for (int i = 0; i < columnTypes.length - 1; ++i) {
                Object key = fields[i];
                if (null == key) {
                    keyValue.add(null);
                } else if (key instanceof Text) {
                    Text txKey = (Text) key;
                    keyValue.add(txKey.toString());
                } else if (key instanceof ByteWritable) {
                    ByteWritable bwKey = (ByteWritable) key;
                    keyValue.add(bwKey.get());
                } else if (key instanceof ShortWritable) {
                    ShortWritable swKey = (ShortWritable) key;
                    keyValue.add(swKey.get());
                } else if (key instanceof IntWritable) {
                    IntWritable iwKey = (IntWritable) key;
                    keyValue.add(iwKey.get());
                } else if (key instanceof LongWritable) {
                    LongWritable lwKey = (LongWritable) key;
                    keyValue.add(lwKey.get());
                } else if (key instanceof TimestampWritableV2) {
                    TimestampWritableV2 twKey = (TimestampWritableV2) key;
                    keyValue.add(twKey.getTimestamp());
                } else if (key instanceof DoubleWritable) {
                    DoubleWritable dwKey = (DoubleWritable) key;
                    keyValue.add(dwKey.get());
                } else if (key instanceof FloatWritable) {
                    FloatWritable fwKey = (FloatWritable) key;
                    keyValue.add(fwKey.get());
                } else if (key instanceof BooleanWritable) {
                    BooleanWritable bwKey = (BooleanWritable) key;
                    keyValue.add(bwKey.get());
                } else {
                    Assert.fail(String.format("Not implemented key output type %s: %s", key.getClass().getName(), key));
                }
            }
            String keyAsString = Arrays.deepToString(keyValue.toArray());
            assertTrue(expected.containsKey(keyValue));
            Object expectedValue = expected.get(keyValue);
            Object value = fields[columnTypes.length - 1];
            Validator validator = getValidator(aggregateName);
            validator.validate(keyAsString, expectedValue, new Object[] { value });
            keys.add(keyValue);
        }

        private FakeCaptureVectorToRowOutputOperator.OutputInspector init(String aggregateName, Map<Object, Object> expected, Set<Object> keys) {
            this.aggregateName = aggregateName;
            this.expected = expected;
            this.keys = keys;
            return this;
        }
    }.init(aggregateName, expected, keys));
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(expected.size(), outBatchList.size());
    assertEquals(expected.size(), keys.size());
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongWritable(org.apache.hadoop.io.LongWritable) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HashSet(java.util.HashSet) Text(org.apache.hadoop.io.Text) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) FakeCaptureVectorToRowOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator) Map(java.util.Map) HashMap(java.util.HashMap)

Example 22 with ShortWritable

use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.

the class TestVectorGroupByOperator method testKeyTypeAggregate.

private void testKeyTypeAggregate(String aggregateName, FakeVectorRowBatchFromObjectIterables data, Map<Object, Object> expected) throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("Key");
    mapColumnNames.add("Value");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    Set<Object> keys = new HashSet<Object>();
    AggregationDesc agg = buildAggregationDesc(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "Value", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[1]));
    ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
    aggs.add(agg);
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    outputColumnNames.add("_col0");
    outputColumnNames.add("_col1");
    GroupByDesc desc = new GroupByDesc();
    VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc();
    desc.setOutputColumnNames(outputColumnNames);
    desc.setAggregators(aggs);
    vectorGroupByDesc.setProcessingMode(ProcessingMode.HASH);
    ExprNodeDesc keyExp = buildColumnDesc(ctx, "Key", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[0]));
    ArrayList<ExprNodeDesc> keysDesc = new ArrayList<ExprNodeDesc>();
    keysDesc.add(keyExp);
    desc.setKeys(keysDesc);
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc);
    if (vgo == null) {
        assertTrue(false);
    }
    FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {

        private int rowIndex;

        private String aggregateName;

        private Map<Object, Object> expected;

        private Set<Object> keys;

        @Override
        public void inspectRow(Object row, int tag) throws HiveException {
            assertTrue(row instanceof Object[]);
            Object[] fields = (Object[]) row;
            assertEquals(2, fields.length);
            Object key = fields[0];
            Object keyValue = null;
            if (null == key) {
                keyValue = null;
            } else if (key instanceof ByteWritable) {
                ByteWritable bwKey = (ByteWritable) key;
                keyValue = bwKey.get();
            } else if (key instanceof ShortWritable) {
                ShortWritable swKey = (ShortWritable) key;
                keyValue = swKey.get();
            } else if (key instanceof IntWritable) {
                IntWritable iwKey = (IntWritable) key;
                keyValue = iwKey.get();
            } else if (key instanceof LongWritable) {
                LongWritable lwKey = (LongWritable) key;
                keyValue = lwKey.get();
            } else if (key instanceof TimestampWritableV2) {
                TimestampWritableV2 twKey = (TimestampWritableV2) key;
                keyValue = twKey.getTimestamp().toSqlTimestamp();
            } else if (key instanceof DoubleWritable) {
                DoubleWritable dwKey = (DoubleWritable) key;
                keyValue = dwKey.get();
            } else if (key instanceof FloatWritable) {
                FloatWritable fwKey = (FloatWritable) key;
                keyValue = fwKey.get();
            } else if (key instanceof BooleanWritable) {
                BooleanWritable bwKey = (BooleanWritable) key;
                keyValue = bwKey.get();
            } else if (key instanceof HiveDecimalWritable) {
                HiveDecimalWritable hdwKey = (HiveDecimalWritable) key;
                keyValue = hdwKey.getHiveDecimal();
            } else {
                Assert.fail(String.format("Not implemented key output type %s: %s", key.getClass().getName(), key));
            }
            String keyValueAsString = String.format("%s", keyValue);
            assertTrue(expected.containsKey(keyValue));
            Object expectedValue = expected.get(keyValue);
            Object value = fields[1];
            Validator validator = getValidator(aggregateName);
            validator.validate(keyValueAsString, expectedValue, new Object[] { value });
            keys.add(keyValue);
        }

        private FakeCaptureVectorToRowOutputOperator.OutputInspector init(String aggregateName, Map<Object, Object> expected, Set<Object> keys) {
            this.aggregateName = aggregateName;
            this.expected = expected;
            this.keys = keys;
            return this;
        }
    }.init(aggregateName, expected, keys));
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(expected.size(), outBatchList.size());
    assertEquals(expected.size(), keys.size());
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) ArrayList(java.util.ArrayList) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongWritable(org.apache.hadoop.io.LongWritable) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HashSet(java.util.HashSet) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) FakeCaptureVectorToRowOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator) Map(java.util.Map) HashMap(java.util.HashMap)

Example 23 with ShortWritable

use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.

the class TestRCFile method testSimpleReadAndWrite.

@Test
public void testSimpleReadAndWrite() throws IOException, SerDeException {
    cleanup();
    byte[][] record_1 = { "123".getBytes(StandardCharsets.UTF_8), "456".getBytes(StandardCharsets.UTF_8), "789".getBytes(StandardCharsets.UTF_8), "1000".getBytes(StandardCharsets.UTF_8), "5.3".getBytes(StandardCharsets.UTF_8), "hive and hadoop".getBytes(StandardCharsets.UTF_8), new byte[0], "NULL".getBytes(StandardCharsets.UTF_8) };
    byte[][] record_2 = { "100".getBytes(StandardCharsets.UTF_8), "200".getBytes(StandardCharsets.UTF_8), "123".getBytes(StandardCharsets.UTF_8), "1000".getBytes(StandardCharsets.UTF_8), "5.3".getBytes(StandardCharsets.UTF_8), "hive and hadoop".getBytes(StandardCharsets.UTF_8), new byte[0], "NULL".getBytes(StandardCharsets.UTF_8) };
    RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")), new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
        bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();
    Object[] expectedRecord_1 = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
    Object[] expectedRecord_2 = { new ByteWritable((byte) 100), new ShortWritable((short) 200), new IntWritable(123), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple")));
    assertEquals(new Text("block"), reader.getMetadataValueOf(new Text("apple")));
    assertEquals(new Text("dog"), reader.getMetadataValueOf(new Text("cat")));
    LongWritable rowID = new LongWritable();
    for (int i = 0; i < 2; i++) {
        reader.next(rowID);
        BytesRefArrayWritable cols = new BytesRefArrayWritable();
        reader.getCurrentRow(cols);
        cols.resetValid(8);
        Object row = serDe.deserialize(cols);
        StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
        assertEquals("Field size should be 8", 8, fieldRefs.size());
        for (int j = 0; j < fieldRefs.size(); j++) {
            Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j));
            Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData, fieldRefs.get(j).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
            if (i == 0) {
                assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]);
            } else {
                assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]);
            }
        }
    }
    reader.close();
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) RecordReader(org.apache.hadoop.mapred.RecordReader) Text(org.apache.hadoop.io.Text) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 24 with ShortWritable

use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.

the class TestGenericUDFOPDivide method testByteDivideShort.

@Test
public void testByteDivideShort() throws HiveException {
    GenericUDFOPDivide udf = new GenericUDFOPDivide();
    ByteWritable left = new ByteWritable((byte) 4);
    ShortWritable right = new ShortWritable((short) 6);
    ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.writableByteObjectInspector, PrimitiveObjectInspectorFactory.writableShortObjectInspector };
    DeferredObject[] args = { new DeferredJavaObject(left), new DeferredJavaObject(right) };
    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(oi.getTypeInfo(), TypeInfoFactory.getDecimalTypeInfo(9, 6));
    HiveDecimalWritable res = (HiveDecimalWritable) udf.evaluate(args);
    Assert.assertEquals(HiveDecimal.create("0.666667"), res.getHiveDecimal());
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Test(org.junit.Test)

Example 25 with ShortWritable

use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.

the class TestGenericUDFOPPlus method testBytePlusShort.

@Test
public void testBytePlusShort() throws HiveException {
    GenericUDFOPPlus udf = new GenericUDFOPPlus();
    // Byte
    ByteWritable left = new ByteWritable((byte) 4);
    ShortWritable right = new ShortWritable((short) 6);
    ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.writableByteObjectInspector, PrimitiveObjectInspectorFactory.writableShortObjectInspector };
    DeferredObject[] args = { new DeferredJavaObject(left), new DeferredJavaObject(right) };
    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(oi.getTypeInfo(), TypeInfoFactory.shortTypeInfo);
    ShortWritable res = (ShortWritable) udf.evaluate(args);
    Assert.assertEquals(10, res.get());
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Test(org.junit.Test)

Aggregations

ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)92 IntWritable (org.apache.hadoop.io.IntWritable)61 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)53 LongWritable (org.apache.hadoop.io.LongWritable)53 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)48 Test (org.junit.Test)47 Text (org.apache.hadoop.io.Text)42 FloatWritable (org.apache.hadoop.io.FloatWritable)40 BooleanWritable (org.apache.hadoop.io.BooleanWritable)37 BytesWritable (org.apache.hadoop.io.BytesWritable)29 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)28 ArrayList (java.util.ArrayList)25 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)23 HiveCharWritable (org.apache.hadoop.hive.serde2.io.HiveCharWritable)18 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)17 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)17 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)17 HiveVarcharWritable (org.apache.hadoop.hive.serde2.io.HiveVarcharWritable)17 Writable (org.apache.hadoop.io.Writable)17 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)15