use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.
the class GroupByOperator method shouldBeFlushed.
/**
* Based on user-parameters, should the hash table be flushed.
*
* @param newKeys
* keys for the row under consideration
**/
private boolean shouldBeFlushed(KeyWrapper newKeys) {
int numEntries = hashAggregations.size();
long usedMemory;
float rate;
// variable portion of the size every NUMROWSESTIMATESIZE rows.
if ((numEntriesHashTable == 0) || ((numEntries % NUMROWSESTIMATESIZE) == 0)) {
//check how much memory left memory
usedMemory = memoryMXBean.getHeapMemoryUsage().getUsed();
// TODO: there is no easy and reliable way to compute the memory used by the executor threads and on-heap cache.
// Assuming the used memory is equally divided among all executors.
usedMemory = isLlap ? usedMemory / numExecutors : usedMemory;
rate = (float) usedMemory / (float) maxMemory;
if (rate > memoryThreshold) {
if (isTez && numEntriesHashTable == 0) {
return false;
} else {
return true;
}
}
for (Integer pos : keyPositionsSize) {
Object key = newKeys.getKeyArray()[pos.intValue()];
// Ignore nulls
if (key != null) {
if (key instanceof LazyString) {
totalVariableSize += ((LazyPrimitive<LazyStringObjectInspector, Text>) key).getWritableObject().getLength();
} else if (key instanceof String) {
totalVariableSize += ((String) key).length();
} else if (key instanceof Text) {
totalVariableSize += ((Text) key).getLength();
} else if (key instanceof LazyBinary) {
totalVariableSize += ((LazyPrimitive<LazyBinaryObjectInspector, BytesWritable>) key).getWritableObject().getLength();
} else if (key instanceof BytesWritable) {
totalVariableSize += ((BytesWritable) key).getLength();
} else if (key instanceof ByteArrayRef) {
totalVariableSize += ((ByteArrayRef) key).getData().length;
}
}
}
AggregationBuffer[] aggs = hashAggregations.get(newKeys);
for (int i = 0; i < aggs.length; i++) {
AggregationBuffer agg = aggs[i];
if (estimableAggregationEvaluators[i]) {
totalVariableSize += ((GenericUDAFEvaluator.AbstractAggregationBuffer) agg).estimate();
continue;
}
if (aggrPositions[i] != null) {
totalVariableSize += estimateSize(agg, aggrPositions[i]);
}
}
numEntriesVarSize++;
// Update the number of entries that can fit in the hash table
numEntriesHashTable = (int) (maxHashTblMemory / (fixedRowSize + (totalVariableSize / numEntriesVarSize)));
if (isLogTraceEnabled) {
LOG.trace("Hash Aggr: #hash table = " + numEntries + " #max in hash table = " + numEntriesHashTable);
}
}
// flush if necessary
if (numEntries >= numEntriesHashTable) {
return true;
}
return false;
}
use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.
the class TestLazyHBaseObject method testLazyHBaseRow3.
/**
* Test the LazyHBaseRow class with a one-to-one/onto mapping between Hive columns and
* HBase column family/column qualifier pairs. The column types are primitive and fields
* are stored in binary format in HBase.
* @throws SerDeException
*/
public void testLazyHBaseRow3() throws SerDeException {
List<TypeInfo> fieldTypeInfos = TypeInfoUtils.getTypeInfosFromTypeString("string,int,tinyint,smallint,bigint,float,double,string,boolean");
List<String> fieldNames = Arrays.asList(new String[] { "key", "c_int", "c_byte", "c_short", "c_long", "c_float", "c_double", "c_string", "c_bool" });
Text nullSequence = new Text("\\N");
String hbaseColumnsMapping = ":key#str,cf-int:cq-int#bin,cf-byte:cq-byte#bin," + "cf-short:cq-short#bin,cf-long:cq-long#bin,cf-float:cq-float#bin,cf-double:cq-double#bin," + "cf-string:cq-string#str,cf-bool:cq-bool#bin";
ColumnMappings columnMappings = null;
try {
columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping);
} catch (SerDeException e) {
fail(e.toString());
}
ColumnMapping[] columnsMapping = columnMappings.getColumnsMapping();
for (int i = 0; i < columnsMapping.length; i++) {
ColumnMapping colMap = columnsMapping[i];
if (i == 0 || i == 7) {
colMap.binaryStorage.add(false);
} else {
colMap.binaryStorage.add(true);
}
}
ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, new byte[] { ' ', ':', '=' }, nullSequence, false, false, (byte) 0);
LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings);
byte[] rowKey = "row-key".getBytes();
List<KeyValue> kvs = new ArrayList<KeyValue>();
byte[] value;
for (int i = 1; i < columnsMapping.length; i++) {
switch(i) {
case 1:
value = Bytes.toBytes(1);
break;
case 2:
value = new byte[] { (byte) 1 };
break;
case 3:
value = Bytes.toBytes((short) 1);
break;
case 4:
value = Bytes.toBytes((long) 1);
break;
case 5:
value = Bytes.toBytes((float) 1.0F);
break;
case 6:
value = Bytes.toBytes((double) 1.0);
break;
case 7:
value = "Hadoop, Hive, with HBase storage handler.".getBytes();
break;
case 8:
value = Bytes.toBytes(true);
break;
default:
throw new RuntimeException("Not expected: " + i);
}
ColumnMapping colMap = columnsMapping[i];
kvs.add(new KeyValue(rowKey, colMap.familyNameBytes, colMap.qualifierNameBytes, value));
}
Collections.sort(kvs, KeyValue.COMPARATOR);
Result result = new Result(kvs);
o.init(result);
List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = ((StructObjectInspector) oi).getStructFieldData(o, fieldRefs.get(i));
assert (fieldData != null);
assert (fieldData instanceof LazyPrimitive<?, ?>);
Writable writable = ((LazyPrimitive<?, ?>) fieldData).getWritableObject();
switch(i) {
case 0:
Text text = new Text("row-key");
assertEquals(text, writable);
break;
case 1:
IntWritable iw = new IntWritable(1);
assertEquals(iw, writable);
break;
case 2:
ByteWritable bw = new ByteWritable((byte) 1);
assertEquals(bw, writable);
break;
case 3:
ShortWritable sw = new ShortWritable((short) 1);
assertEquals(sw, writable);
break;
case 4:
LongWritable lw = new LongWritable(1);
assertEquals(lw, writable);
break;
case 5:
FloatWritable fw = new FloatWritable(1.0F);
assertEquals(fw, writable);
break;
case 6:
DoubleWritable dw = new DoubleWritable(1.0);
assertEquals(dw, writable);
break;
case 7:
Text t = new Text("Hadoop, Hive, with HBase storage handler.");
assertEquals(t, writable);
break;
case 8:
BooleanWritable boolWritable = new BooleanWritable(true);
assertEquals(boolWritable, writable);
break;
default:
fail("Error: Unanticipated value in deserializing fields for HBaseSerDe.");
break;
}
}
}
use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.
the class TestLazySimpleSerDe method deserializeAndSerialize.
private void deserializeAndSerialize(LazySimpleSerDe serDe, Text t, String s, Object[] expectedFieldsData) throws SerDeException {
// Get the row structure
StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
assertEquals(expectedFieldsData.length, fieldRefs.size());
// Deserialize
Object row = serDe.deserialize(t);
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
if (fieldData != null) {
fieldData = ((LazyPrimitive) fieldData).getWritableObject();
}
assertEquals("Field " + i, expectedFieldsData[i], fieldData);
}
// Serialize
assertEquals(Text.class, serDe.getSerializedClass());
Text serializedText = (Text) serDe.serialize(row, oi);
assertEquals("Serialized data", s, serializedText.toString());
}
use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.
the class LazyHBaseCellMap method parse.
private void parse() {
if (cachedMap == null) {
cachedMap = new LinkedHashMap<Object, Object>();
} else {
cachedMap.clear();
}
NavigableMap<byte[], byte[]> familyMap = result.getFamilyMap(columnFamilyBytes);
if (familyMap != null) {
for (Entry<byte[], byte[]> e : familyMap.entrySet()) {
// null values and values of zero length are not added to the cachedMap
if (e.getValue() == null || e.getValue().length == 0) {
continue;
}
if (qualPrefix != null && !Bytes.startsWith(e.getKey(), qualPrefix)) {
// prefix
continue;
}
LazyMapObjectInspector lazyMoi = getInspector();
// Keys are always primitive
LazyPrimitive<? extends ObjectInspector, ? extends Writable> key = LazyFactory.createLazyPrimitiveClass((PrimitiveObjectInspector) lazyMoi.getMapKeyObjectInspector(), binaryStorage.get(0));
ByteArrayRef keyRef = new ByteArrayRef();
if (qualPrefix != null && hideQualPrefix) {
//cut prefix from hive's map key
keyRef.setData(Bytes.tail(e.getKey(), e.getKey().length - qualPrefix.length));
} else {
//for non-prefix maps
keyRef.setData(e.getKey());
}
key.init(keyRef, 0, keyRef.getData().length);
// Value
LazyObject<?> value = LazyFactory.createLazyObject(lazyMoi.getMapValueObjectInspector(), binaryStorage.get(1));
byte[] bytes = e.getValue();
if (isNull(oi.getNullSequence(), bytes, 0, bytes.length)) {
value.setNull();
} else {
ByteArrayRef valueRef = new ByteArrayRef();
valueRef.setData(bytes);
value.init(valueRef, 0, valueRef.getData().length);
}
// Put the key/value into the map
cachedMap.put(key.getObject(), value.getObject());
}
}
setParsed(true);
}
use of org.apache.hadoop.hive.serde2.lazy.LazyPrimitive in project hive by apache.
the class TestHBaseSerDe method deserializeAndSerializeHivePrefixColumnFamily.
private void deserializeAndSerializeHivePrefixColumnFamily(HBaseSerDe serDe, Result r, Put p, Object[] expectedFieldsData, int[] expectedMapSize, List<Object> expectedQualifiers, Object notPresentKey) throws SerDeException, IOException {
StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
Object row = serDe.deserialize(new ResultWritable(r));
int j = 0;
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = soi.getStructFieldData(row, fieldRefs.get(i));
assertNotNull(fieldData);
if (fieldData instanceof LazyPrimitive<?, ?>) {
assertEquals(expectedFieldsData[i], ((LazyPrimitive<?, ?>) fieldData).getWritableObject());
} else if (fieldData instanceof LazyHBaseCellMap) {
assertEquals(expectedFieldsData[i], ((LazyHBaseCellMap) fieldData).getMapValueElement(expectedQualifiers.get(j)).toString().trim());
assertEquals(expectedMapSize[j], ((LazyHBaseCellMap) fieldData).getMapSize());
// Make sure that the unwanted key is not present in the map
assertNull(((LazyHBaseCellMap) fieldData).getMapValueElement(notPresentKey));
j++;
} else {
fail("Error: field data not an instance of LazyPrimitive<?, ?> or LazyHBaseCellMap");
}
}
SerDeUtils.getJSONString(row, soi);
// Now serialize
Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();
if (p != null) {
assertEquals("Serialized put:", p.toString(), put.toString());
}
}
Aggregations