Search in sources :

Example 6 with LazyPrimitive

use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.

the class GroupByOperator method shouldBeFlushed.

/**
   * Based on user-parameters, should the hash table be flushed.
   *
   * @param newKeys
   *          keys for the row under consideration
   **/
private boolean shouldBeFlushed(KeyWrapper newKeys) {
    int numEntries = hashAggregations.size();
    long usedMemory;
    float rate;
    // variable portion of the size every NUMROWSESTIMATESIZE rows.
    if ((numEntriesHashTable == 0) || ((numEntries % NUMROWSESTIMATESIZE) == 0)) {
        //check how much memory left memory
        usedMemory = memoryMXBean.getHeapMemoryUsage().getUsed();
        // TODO: there is no easy and reliable way to compute the memory used by the executor threads and on-heap cache.
        // Assuming the used memory is equally divided among all executors.
        usedMemory = isLlap ? usedMemory / numExecutors : usedMemory;
        rate = (float) usedMemory / (float) maxMemory;
        if (rate > memoryThreshold) {
            if (isTez && numEntriesHashTable == 0) {
                return false;
            } else {
                return true;
            }
        }
        for (Integer pos : keyPositionsSize) {
            Object key = newKeys.getKeyArray()[pos.intValue()];
            // Ignore nulls
            if (key != null) {
                if (key instanceof LazyString) {
                    totalVariableSize += ((LazyPrimitive<LazyStringObjectInspector, Text>) key).getWritableObject().getLength();
                } else if (key instanceof String) {
                    totalVariableSize += ((String) key).length();
                } else if (key instanceof Text) {
                    totalVariableSize += ((Text) key).getLength();
                } else if (key instanceof LazyBinary) {
                    totalVariableSize += ((LazyPrimitive<LazyBinaryObjectInspector, BytesWritable>) key).getWritableObject().getLength();
                } else if (key instanceof BytesWritable) {
                    totalVariableSize += ((BytesWritable) key).getLength();
                } else if (key instanceof ByteArrayRef) {
                    totalVariableSize += ((ByteArrayRef) key).getData().length;
                }
            }
        }
        AggregationBuffer[] aggs = hashAggregations.get(newKeys);
        for (int i = 0; i < aggs.length; i++) {
            AggregationBuffer agg = aggs[i];
            if (estimableAggregationEvaluators[i]) {
                totalVariableSize += ((GenericUDAFEvaluator.AbstractAggregationBuffer) agg).estimate();
                continue;
            }
            if (aggrPositions[i] != null) {
                totalVariableSize += estimateSize(agg, aggrPositions[i]);
            }
        }
        numEntriesVarSize++;
        // Update the number of entries that can fit in the hash table
        numEntriesHashTable = (int) (maxHashTblMemory / (fixedRowSize + (totalVariableSize / numEntriesVarSize)));
        if (isLogTraceEnabled) {
            LOG.trace("Hash Aggr: #hash table = " + numEntries + " #max in hash table = " + numEntriesHashTable);
        }
    }
    // flush if necessary
    if (numEntries >= numEntriesHashTable) {
        return true;
    }
    return false;
}
Also used : GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) LazyBinaryObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) LazyBinary(org.apache.hadoop.hive.serde2.lazy.LazyBinary) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject) AggregationBuffer(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer)

Example 7 with LazyPrimitive

use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.

the class TestLazyHBaseObject method testLazyHBaseRow3.

/**
   * Test the LazyHBaseRow class with a one-to-one/onto mapping between Hive columns and
   * HBase column family/column qualifier pairs. The column types are primitive and fields
   * are stored in binary format in HBase.
   * @throws SerDeException
   */
public void testLazyHBaseRow3() throws SerDeException {
    List<TypeInfo> fieldTypeInfos = TypeInfoUtils.getTypeInfosFromTypeString("string,int,tinyint,smallint,bigint,float,double,string,boolean");
    List<String> fieldNames = Arrays.asList(new String[] { "key", "c_int", "c_byte", "c_short", "c_long", "c_float", "c_double", "c_string", "c_bool" });
    Text nullSequence = new Text("\\N");
    String hbaseColumnsMapping = ":key#str,cf-int:cq-int#bin,cf-byte:cq-byte#bin," + "cf-short:cq-short#bin,cf-long:cq-long#bin,cf-float:cq-float#bin,cf-double:cq-double#bin," + "cf-string:cq-string#str,cf-bool:cq-bool#bin";
    ColumnMappings columnMappings = null;
    try {
        columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping);
    } catch (SerDeException e) {
        fail(e.toString());
    }
    ColumnMapping[] columnsMapping = columnMappings.getColumnsMapping();
    for (int i = 0; i < columnsMapping.length; i++) {
        ColumnMapping colMap = columnsMapping[i];
        if (i == 0 || i == 7) {
            colMap.binaryStorage.add(false);
        } else {
            colMap.binaryStorage.add(true);
        }
    }
    ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, new byte[] { ' ', ':', '=' }, nullSequence, false, false, (byte) 0);
    LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings);
    byte[] rowKey = "row-key".getBytes();
    List<KeyValue> kvs = new ArrayList<KeyValue>();
    byte[] value;
    for (int i = 1; i < columnsMapping.length; i++) {
        switch(i) {
            case 1:
                value = Bytes.toBytes(1);
                break;
            case 2:
                value = new byte[] { (byte) 1 };
                break;
            case 3:
                value = Bytes.toBytes((short) 1);
                break;
            case 4:
                value = Bytes.toBytes((long) 1);
                break;
            case 5:
                value = Bytes.toBytes((float) 1.0F);
                break;
            case 6:
                value = Bytes.toBytes((double) 1.0);
                break;
            case 7:
                value = "Hadoop, Hive, with HBase storage handler.".getBytes();
                break;
            case 8:
                value = Bytes.toBytes(true);
                break;
            default:
                throw new RuntimeException("Not expected: " + i);
        }
        ColumnMapping colMap = columnsMapping[i];
        kvs.add(new KeyValue(rowKey, colMap.familyNameBytes, colMap.qualifierNameBytes, value));
    }
    Collections.sort(kvs, KeyValue.COMPARATOR);
    Result result = new Result(kvs);
    o.init(result);
    List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
    for (int i = 0; i < fieldRefs.size(); i++) {
        Object fieldData = ((StructObjectInspector) oi).getStructFieldData(o, fieldRefs.get(i));
        assert (fieldData != null);
        assert (fieldData instanceof LazyPrimitive<?, ?>);
        Writable writable = ((LazyPrimitive<?, ?>) fieldData).getWritableObject();
        switch(i) {
            case 0:
                Text text = new Text("row-key");
                assertEquals(text, writable);
                break;
            case 1:
                IntWritable iw = new IntWritable(1);
                assertEquals(iw, writable);
                break;
            case 2:
                ByteWritable bw = new ByteWritable((byte) 1);
                assertEquals(bw, writable);
                break;
            case 3:
                ShortWritable sw = new ShortWritable((short) 1);
                assertEquals(sw, writable);
                break;
            case 4:
                LongWritable lw = new LongWritable(1);
                assertEquals(lw, writable);
                break;
            case 5:
                FloatWritable fw = new FloatWritable(1.0F);
                assertEquals(fw, writable);
                break;
            case 6:
                DoubleWritable dw = new DoubleWritable(1.0);
                assertEquals(dw, writable);
                break;
            case 7:
                Text t = new Text("Hadoop, Hive, with HBase storage handler.");
                assertEquals(t, writable);
                break;
            case 8:
                BooleanWritable boolWritable = new BooleanWritable(true);
                assertEquals(boolWritable, writable);
                break;
            default:
                fail("Error: Unanticipated value in deserializing fields for HBaseSerDe.");
                break;
        }
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) Result(org.apache.hadoop.hbase.client.Result) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) ColumnMapping(org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping) IntWritable(org.apache.hadoop.io.IntWritable) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 8 with LazyPrimitive

use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.

the class TestLazySimpleSerDe method deserializeAndSerialize.

private void deserializeAndSerialize(LazySimpleSerDe serDe, Text t, String s, Object[] expectedFieldsData) throws SerDeException {
    // Get the row structure
    StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
    assertEquals(expectedFieldsData.length, fieldRefs.size());
    // Deserialize
    Object row = serDe.deserialize(t);
    for (int i = 0; i < fieldRefs.size(); i++) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
        if (fieldData != null) {
            fieldData = ((LazyPrimitive) fieldData).getWritableObject();
        }
        assertEquals("Field " + i, expectedFieldsData[i], fieldData);
    }
    // Serialize
    assertEquals(Text.class, serDe.getSerializedClass());
    Text serializedText = (Text) serDe.serialize(row, oi);
    assertEquals("Serialized data", s, serializedText.toString());
}
Also used : Text(org.apache.hadoop.io.Text) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 9 with LazyPrimitive

use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.

the class LazyHBaseCellMap method parse.

private void parse() {
    if (cachedMap == null) {
        cachedMap = new LinkedHashMap<Object, Object>();
    } else {
        cachedMap.clear();
    }
    NavigableMap<byte[], byte[]> familyMap = result.getFamilyMap(columnFamilyBytes);
    if (familyMap != null) {
        for (Entry<byte[], byte[]> e : familyMap.entrySet()) {
            // null values and values of zero length are not added to the cachedMap
            if (e.getValue() == null || e.getValue().length == 0) {
                continue;
            }
            if (qualPrefix != null && !Bytes.startsWith(e.getKey(), qualPrefix)) {
                // prefix
                continue;
            }
            LazyMapObjectInspector lazyMoi = getInspector();
            // Keys are always primitive
            LazyPrimitive<? extends ObjectInspector, ? extends Writable> key = LazyFactory.createLazyPrimitiveClass((PrimitiveObjectInspector) lazyMoi.getMapKeyObjectInspector(), binaryStorage.get(0));
            ByteArrayRef keyRef = new ByteArrayRef();
            if (qualPrefix != null && hideQualPrefix) {
                //cut prefix from hive's map key
                keyRef.setData(Bytes.tail(e.getKey(), e.getKey().length - qualPrefix.length));
            } else {
                //for non-prefix maps
                keyRef.setData(e.getKey());
            }
            key.init(keyRef, 0, keyRef.getData().length);
            // Value
            LazyObject<?> value = LazyFactory.createLazyObject(lazyMoi.getMapValueObjectInspector(), binaryStorage.get(1));
            byte[] bytes = e.getValue();
            if (isNull(oi.getNullSequence(), bytes, 0, bytes.length)) {
                value.setNull();
            } else {
                ByteArrayRef valueRef = new ByteArrayRef();
                valueRef.setData(bytes);
                value.init(valueRef, 0, valueRef.getData().length);
            }
            // Put the key/value into the map
            cachedMap.put(key.getObject(), value.getObject());
        }
    }
    setParsed(true);
}
Also used : ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) LazyObject(org.apache.hadoop.hive.serde2.lazy.LazyObject) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)

Example 10 with LazyPrimitive

use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.

the class TestHBaseSerDe method deserializeAndSerializeHivePrefixColumnFamily.

private void deserializeAndSerializeHivePrefixColumnFamily(HBaseSerDe serDe, Result r, Put p, Object[] expectedFieldsData, int[] expectedMapSize, List<Object> expectedQualifiers, Object notPresentKey) throws SerDeException, IOException {
    StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
    Object row = serDe.deserialize(new ResultWritable(r));
    int j = 0;
    for (int i = 0; i < fieldRefs.size(); i++) {
        Object fieldData = soi.getStructFieldData(row, fieldRefs.get(i));
        assertNotNull(fieldData);
        if (fieldData instanceof LazyPrimitive<?, ?>) {
            assertEquals(expectedFieldsData[i], ((LazyPrimitive<?, ?>) fieldData).getWritableObject());
        } else if (fieldData instanceof LazyHBaseCellMap) {
            assertEquals(expectedFieldsData[i], ((LazyHBaseCellMap) fieldData).getMapValueElement(expectedQualifiers.get(j)).toString().trim());
            assertEquals(expectedMapSize[j], ((LazyHBaseCellMap) fieldData).getMapSize());
            // Make sure that the unwanted key is not present in the map
            assertNull(((LazyHBaseCellMap) fieldData).getMapValueElement(notPresentKey));
            j++;
        } else {
            fail("Error: field data not an instance of LazyPrimitive<?, ?> or LazyHBaseCellMap");
        }
    }
    SerDeUtils.getJSONString(row, soi);
    // Now serialize
    Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();
    if (p != null) {
        assertEquals("Serialized put:", p.toString(), put.toString());
    }
}
Also used : LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) Put(org.apache.hadoop.hbase.client.Put) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

LazyPrimitive (org.apache.hadoop.hive.serde2.lazy.LazyPrimitive)12 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)9 Text (org.apache.hadoop.io.Text)8 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)5 BooleanWritable (org.apache.hadoop.io.BooleanWritable)5 BytesWritable (org.apache.hadoop.io.BytesWritable)5 FloatWritable (org.apache.hadoop.io.FloatWritable)5 IntWritable (org.apache.hadoop.io.IntWritable)5 LongWritable (org.apache.hadoop.io.LongWritable)5 Put (org.apache.hadoop.hbase.client.Put)4 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)4 LazyMap (org.apache.hadoop.hive.serde2.lazy.LazyMap)4 LazyObject (org.apache.hadoop.hive.serde2.lazy.LazyObject)4 LazyMapObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)4 Writable (org.apache.hadoop.io.Writable)4 ArrayList (java.util.ArrayList)3 Map (java.util.Map)3 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)3 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)3 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)3