use of org.apache.hadoop.hive.serde2.lazy.LazyString in project hive by apache.
the class TestAccumuloSerDe method testMapSerialization.
@Test
public void testMapSerialization() throws Exception {
Properties properties = new Properties();
Configuration conf = new Configuration();
properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:vals");
properties.setProperty(serdeConstants.LIST_COLUMNS, "row,values");
properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,map<string,string>");
properties.setProperty(serdeConstants.COLLECTION_DELIM, ":");
properties.setProperty(serdeConstants.MAPKEY_DELIM, "=");
// Get one of the default separators to avoid having to set a custom separator
char collectionSeparator = ':', kvSeparator = '=';
serde.initialize(conf, properties);
AccumuloHiveRow row = new AccumuloHiveRow();
row.setRowId("r1");
row.add("cf", "vals", ("k1" + kvSeparator + "v1" + collectionSeparator + "k2" + kvSeparator + "v2" + collectionSeparator + "k3" + kvSeparator + "v3").getBytes());
Object obj = serde.deserialize(row);
assertNotNull(obj);
assertTrue(obj instanceof LazyAccumuloRow);
LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
Object field0 = lazyRow.getField(0);
assertNotNull(field0);
assertTrue(field0 instanceof LazyString);
assertEquals(row.getRowId(), ((LazyString) field0).getWritableObject().toString());
Object field1 = lazyRow.getField(1);
assertNotNull(field1);
assertTrue(field1 instanceof LazyMap);
LazyMap map = (LazyMap) field1;
Map<Object, Object> untypedMap = map.getMap();
assertEquals(3, map.getMapSize());
Set<String> expectedKeys = new HashSet<String>();
expectedKeys.add("k1");
expectedKeys.add("k2");
expectedKeys.add("k3");
for (Entry<Object, Object> entry : untypedMap.entrySet()) {
assertNotNull(entry.getKey());
assertTrue(entry.getKey() instanceof LazyString);
LazyString key = (LazyString) entry.getKey();
assertNotNull(entry.getValue());
assertTrue(entry.getValue() instanceof LazyString);
LazyString value = (LazyString) entry.getValue();
String strKey = key.getWritableObject().toString(), strValue = value.getWritableObject().toString();
assertTrue(expectedKeys.remove(strKey));
assertEquals(2, strValue.length());
assertTrue(strValue.startsWith("v"));
assertTrue(strValue.endsWith(Character.toString(strKey.charAt(1))));
}
assertTrue("Did not find expected keys: " + expectedKeys, expectedKeys.isEmpty());
}
use of org.apache.hadoop.hive.serde2.lazy.LazyString in project hive by apache.
the class TestAccumuloSerDe method testCompositeKeyDeserialization.
@Test
public void testCompositeKeyDeserialization() throws Exception {
Properties properties = new Properties();
Configuration conf = new Configuration();
properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:f1");
properties.setProperty(serdeConstants.LIST_COLUMNS, "row,field1");
properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "struct<col1:string,col2:string,col3:string>,string");
properties.setProperty(DelimitedAccumuloRowIdFactory.ACCUMULO_COMPOSITE_DELIMITER, "_");
properties.setProperty(AccumuloSerDeParameters.COMPOSITE_ROWID_FACTORY, DelimitedAccumuloRowIdFactory.class.getName());
serde.initialize(conf, properties);
AccumuloHiveRow row = new AccumuloHiveRow();
row.setRowId("p1_p2_p3");
row.add("cf", "f1", "v1".getBytes());
Object obj = serde.deserialize(row);
assertTrue(obj instanceof LazyAccumuloRow);
LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
Object field0 = lazyRow.getField(0);
assertNotNull(field0);
assertTrue(field0 instanceof LazyStruct);
LazyStruct struct = (LazyStruct) field0;
List<Object> fields = struct.getFieldsAsList();
assertEquals(3, fields.size());
for (int i = 0; i < fields.size(); i++) {
assertEquals(LazyString.class, fields.get(i).getClass());
assertEquals("p" + (i + 1), fields.get(i).toString());
}
Object field1 = lazyRow.getField(1);
assertNotNull(field1);
assertTrue("Expected instance of LazyString but was " + field1.getClass(), field1 instanceof LazyString);
assertEquals(field1.toString(), "v1");
}
use of org.apache.hadoop.hive.serde2.lazy.LazyString in project hive by apache.
the class HBaseRowSerializer method serializeKeyField.
byte[] serializeKeyField(Object keyValue, StructField keyField, ColumnMapping keyMapping) throws IOException {
if (keyValue == null) {
throw new IOException("HBase row key cannot be NULL");
}
ObjectInspector keyFieldOI = keyField.getFieldObjectInspector();
if (!keyFieldOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && keyMapping.isCategory(ObjectInspector.Category.PRIMITIVE)) {
// we always serialize the String type using the escaped algorithm for LazyString
return serialize(SerDeUtils.getJSONString(keyValue, keyFieldOI), PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false);
}
// use the serialization option switch to write primitive values as either a variable
// length UTF8 string or a fixed width bytes if serializing in binary format
boolean writeBinary = keyMapping.binaryStorage.get(0);
return serialize(keyValue, keyFieldOI, 1, writeBinary);
}
use of org.apache.hadoop.hive.serde2.lazy.LazyString in project hive by apache.
the class HBaseRowSerializer method serializeField.
private void serializeField(Object value, StructField field, ColumnMapping colMap, Put put) throws IOException {
if (value == null) {
// a null object, we do not serialize it
return;
}
// Get the field objectInspector and the field object.
ObjectInspector foi = field.getFieldObjectInspector();
// If the field corresponds to a column family in HBase
if (colMap.qualifierName == null) {
MapObjectInspector moi = (MapObjectInspector) foi;
Map<?, ?> map = moi.getMap(value);
if (map == null) {
return;
}
ObjectInspector koi = moi.getMapKeyObjectInspector();
ObjectInspector voi = moi.getMapValueObjectInspector();
for (Map.Entry<?, ?> entry : map.entrySet()) {
// Get the Key
// Map keys are required to be primitive and may be serialized in binary format
byte[] columnQualifierBytes = serialize(entry.getKey(), koi, 3, colMap.binaryStorage.get(0));
if (columnQualifierBytes == null) {
continue;
}
// Map values may be serialized in binary format when they are primitive and binary
// serialization is the option selected
byte[] bytes = serialize(entry.getValue(), voi, 3, colMap.binaryStorage.get(1));
if (bytes == null) {
continue;
}
put.add(colMap.familyNameBytes, columnQualifierBytes, bytes);
}
} else {
byte[] bytes;
// delimited way.
if (!foi.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && colMap.isCategory(ObjectInspector.Category.PRIMITIVE)) {
// we always serialize the String type using the escaped algorithm for LazyString
bytes = serialize(SerDeUtils.getJSONString(value, foi), PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false);
} else {
// use the serialization option switch to write primitive values as either a variable
// length UTF8 string or a fixed width bytes if serializing in binary format
bytes = serialize(value, foi, 1, colMap.binaryStorage.get(0));
}
if (bytes == null) {
return;
}
put.add(colMap.familyNameBytes, colMap.qualifierNameBytes, bytes);
}
}
use of org.apache.hadoop.hive.serde2.lazy.LazyString in project hive by apache.
the class GroupByOperator method shouldBeFlushed.
/**
* Based on user-parameters, should the hash table be flushed.
*
* @param newKeys
* keys for the row under consideration
**/
private boolean shouldBeFlushed(KeyWrapper newKeys) {
int numEntries = hashAggregations.size();
long usedMemory;
float rate;
// variable portion of the size every NUMROWSESTIMATESIZE rows.
if ((numEntriesHashTable == 0) || ((numEntries % NUMROWSESTIMATESIZE) == 0)) {
//check how much memory left memory
usedMemory = memoryMXBean.getHeapMemoryUsage().getUsed();
// TODO: there is no easy and reliable way to compute the memory used by the executor threads and on-heap cache.
// Assuming the used memory is equally divided among all executors.
usedMemory = isLlap ? usedMemory / numExecutors : usedMemory;
rate = (float) usedMemory / (float) maxMemory;
if (rate > memoryThreshold) {
if (isTez && numEntriesHashTable == 0) {
return false;
} else {
return true;
}
}
for (Integer pos : keyPositionsSize) {
Object key = newKeys.getKeyArray()[pos.intValue()];
// Ignore nulls
if (key != null) {
if (key instanceof LazyString) {
totalVariableSize += ((LazyPrimitive<LazyStringObjectInspector, Text>) key).getWritableObject().getLength();
} else if (key instanceof String) {
totalVariableSize += ((String) key).length();
} else if (key instanceof Text) {
totalVariableSize += ((Text) key).getLength();
} else if (key instanceof LazyBinary) {
totalVariableSize += ((LazyPrimitive<LazyBinaryObjectInspector, BytesWritable>) key).getWritableObject().getLength();
} else if (key instanceof BytesWritable) {
totalVariableSize += ((BytesWritable) key).getLength();
} else if (key instanceof ByteArrayRef) {
totalVariableSize += ((ByteArrayRef) key).getData().length;
}
}
}
AggregationBuffer[] aggs = hashAggregations.get(newKeys);
for (int i = 0; i < aggs.length; i++) {
AggregationBuffer agg = aggs[i];
if (estimableAggregationEvaluators[i]) {
totalVariableSize += ((GenericUDAFEvaluator.AbstractAggregationBuffer) agg).estimate();
continue;
}
if (aggrPositions[i] != null) {
totalVariableSize += estimateSize(agg, aggrPositions[i]);
}
}
numEntriesVarSize++;
// Update the number of entries that can fit in the hash table
numEntriesHashTable = (int) (maxHashTblMemory / (fixedRowSize + (totalVariableSize / numEntriesVarSize)));
if (isLogTraceEnabled) {
LOG.trace("Hash Aggr: #hash table = " + numEntries + " #max in hash table = " + numEntriesHashTable);
}
}
// flush if necessary
if (numEntries >= numEntriesHashTable) {
return true;
}
return false;
}
Aggregations