Search in sources :

Example 6 with LazyString

use of org.apache.hadoop.hive.serde2.lazy.LazyString in project hive by apache.

the class TestAccumuloSerDe method testMapSerialization.

@Test
public void testMapSerialization() throws Exception {
    Properties properties = new Properties();
    Configuration conf = new Configuration();
    properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:vals");
    properties.setProperty(serdeConstants.LIST_COLUMNS, "row,values");
    properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,map<string,string>");
    properties.setProperty(serdeConstants.COLLECTION_DELIM, ":");
    properties.setProperty(serdeConstants.MAPKEY_DELIM, "=");
    // Get one of the default separators to avoid having to set a custom separator
    char collectionSeparator = ':', kvSeparator = '=';
    serde.initialize(conf, properties);
    AccumuloHiveRow row = new AccumuloHiveRow();
    row.setRowId("r1");
    row.add("cf", "vals", ("k1" + kvSeparator + "v1" + collectionSeparator + "k2" + kvSeparator + "v2" + collectionSeparator + "k3" + kvSeparator + "v3").getBytes());
    Object obj = serde.deserialize(row);
    assertNotNull(obj);
    assertTrue(obj instanceof LazyAccumuloRow);
    LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
    Object field0 = lazyRow.getField(0);
    assertNotNull(field0);
    assertTrue(field0 instanceof LazyString);
    assertEquals(row.getRowId(), ((LazyString) field0).getWritableObject().toString());
    Object field1 = lazyRow.getField(1);
    assertNotNull(field1);
    assertTrue(field1 instanceof LazyMap);
    LazyMap map = (LazyMap) field1;
    Map<Object, Object> untypedMap = map.getMap();
    assertEquals(3, map.getMapSize());
    Set<String> expectedKeys = new HashSet<String>();
    expectedKeys.add("k1");
    expectedKeys.add("k2");
    expectedKeys.add("k3");
    for (Entry<Object, Object> entry : untypedMap.entrySet()) {
        assertNotNull(entry.getKey());
        assertTrue(entry.getKey() instanceof LazyString);
        LazyString key = (LazyString) entry.getKey();
        assertNotNull(entry.getValue());
        assertTrue(entry.getValue() instanceof LazyString);
        LazyString value = (LazyString) entry.getValue();
        String strKey = key.getWritableObject().toString(), strValue = value.getWritableObject().toString();
        assertTrue(expectedKeys.remove(strKey));
        assertEquals(2, strValue.length());
        assertTrue(strValue.startsWith("v"));
        assertTrue(strValue.endsWith(Character.toString(strKey.charAt(1))));
    }
    assertTrue("Did not find expected keys: " + expectedKeys, expectedKeys.isEmpty());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) Properties(java.util.Properties) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 7 with LazyString

use of org.apache.hadoop.hive.serde2.lazy.LazyString in project hive by apache.

the class TestAccumuloSerDe method testCompositeKeyDeserialization.

@Test
public void testCompositeKeyDeserialization() throws Exception {
    Properties properties = new Properties();
    Configuration conf = new Configuration();
    properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:f1");
    properties.setProperty(serdeConstants.LIST_COLUMNS, "row,field1");
    properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "struct<col1:string,col2:string,col3:string>,string");
    properties.setProperty(DelimitedAccumuloRowIdFactory.ACCUMULO_COMPOSITE_DELIMITER, "_");
    properties.setProperty(AccumuloSerDeParameters.COMPOSITE_ROWID_FACTORY, DelimitedAccumuloRowIdFactory.class.getName());
    serde.initialize(conf, properties);
    AccumuloHiveRow row = new AccumuloHiveRow();
    row.setRowId("p1_p2_p3");
    row.add("cf", "f1", "v1".getBytes());
    Object obj = serde.deserialize(row);
    assertTrue(obj instanceof LazyAccumuloRow);
    LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
    Object field0 = lazyRow.getField(0);
    assertNotNull(field0);
    assertTrue(field0 instanceof LazyStruct);
    LazyStruct struct = (LazyStruct) field0;
    List<Object> fields = struct.getFieldsAsList();
    assertEquals(3, fields.size());
    for (int i = 0; i < fields.size(); i++) {
        assertEquals(LazyString.class, fields.get(i).getClass());
        assertEquals("p" + (i + 1), fields.get(i).toString());
    }
    Object field1 = lazyRow.getField(1);
    assertNotNull(field1);
    assertTrue("Expected instance of LazyString but was " + field1.getClass(), field1 instanceof LazyString);
    assertEquals(field1.toString(), "v1");
}
Also used : LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) Configuration(org.apache.hadoop.conf.Configuration) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) Properties(java.util.Properties) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) Test(org.junit.Test)

Example 8 with LazyString

use of org.apache.hadoop.hive.serde2.lazy.LazyString in project hive by apache.

the class HBaseRowSerializer method serializeKeyField.

byte[] serializeKeyField(Object keyValue, StructField keyField, ColumnMapping keyMapping) throws IOException {
    if (keyValue == null) {
        throw new IOException("HBase row key cannot be NULL");
    }
    ObjectInspector keyFieldOI = keyField.getFieldObjectInspector();
    if (!keyFieldOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && keyMapping.isCategory(ObjectInspector.Category.PRIMITIVE)) {
        // we always serialize the String type using the escaped algorithm for LazyString
        return serialize(SerDeUtils.getJSONString(keyValue, keyFieldOI), PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false);
    }
    // use the serialization option switch to write primitive values as either a variable
    // length UTF8 string or a fixed width bytes if serializing in binary format
    boolean writeBinary = keyMapping.binaryStorage.get(0);
    return serialize(keyValue, keyFieldOI, 1, writeBinary);
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) IOException(java.io.IOException)

Example 9 with LazyString

use of org.apache.hadoop.hive.serde2.lazy.LazyString in project hive by apache.

the class HBaseRowSerializer method serializeField.

private void serializeField(Object value, StructField field, ColumnMapping colMap, Put put) throws IOException {
    if (value == null) {
        // a null object, we do not serialize it
        return;
    }
    // Get the field objectInspector and the field object.
    ObjectInspector foi = field.getFieldObjectInspector();
    // If the field corresponds to a column family in HBase
    if (colMap.qualifierName == null) {
        MapObjectInspector moi = (MapObjectInspector) foi;
        Map<?, ?> map = moi.getMap(value);
        if (map == null) {
            return;
        }
        ObjectInspector koi = moi.getMapKeyObjectInspector();
        ObjectInspector voi = moi.getMapValueObjectInspector();
        for (Map.Entry<?, ?> entry : map.entrySet()) {
            // Get the Key
            // Map keys are required to be primitive and may be serialized in binary format
            byte[] columnQualifierBytes = serialize(entry.getKey(), koi, 3, colMap.binaryStorage.get(0));
            if (columnQualifierBytes == null) {
                continue;
            }
            // Map values may be serialized in binary format when they are primitive and binary
            // serialization is the option selected
            byte[] bytes = serialize(entry.getValue(), voi, 3, colMap.binaryStorage.get(1));
            if (bytes == null) {
                continue;
            }
            put.add(colMap.familyNameBytes, columnQualifierBytes, bytes);
        }
    } else {
        byte[] bytes;
        // delimited way.
        if (!foi.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && colMap.isCategory(ObjectInspector.Category.PRIMITIVE)) {
            // we always serialize the String type using the escaped algorithm for LazyString
            bytes = serialize(SerDeUtils.getJSONString(value, foi), PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false);
        } else {
            // use the serialization option switch to write primitive values as either a variable
            // length UTF8 string or a fixed width bytes if serializing in binary format
            bytes = serialize(value, foi, 1, colMap.binaryStorage.get(0));
        }
        if (bytes == null) {
            return;
        }
        put.add(colMap.familyNameBytes, colMap.qualifierNameBytes, bytes);
    }
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) Map(java.util.Map)

Example 10 with LazyString

use of org.apache.hadoop.hive.serde2.lazy.LazyString in project hive by apache.

the class GroupByOperator method shouldBeFlushed.

/**
   * Based on user-parameters, should the hash table be flushed.
   *
   * @param newKeys
   *          keys for the row under consideration
   **/
private boolean shouldBeFlushed(KeyWrapper newKeys) {
    int numEntries = hashAggregations.size();
    long usedMemory;
    float rate;
    // variable portion of the size every NUMROWSESTIMATESIZE rows.
    if ((numEntriesHashTable == 0) || ((numEntries % NUMROWSESTIMATESIZE) == 0)) {
        //check how much memory left memory
        usedMemory = memoryMXBean.getHeapMemoryUsage().getUsed();
        // TODO: there is no easy and reliable way to compute the memory used by the executor threads and on-heap cache.
        // Assuming the used memory is equally divided among all executors.
        usedMemory = isLlap ? usedMemory / numExecutors : usedMemory;
        rate = (float) usedMemory / (float) maxMemory;
        if (rate > memoryThreshold) {
            if (isTez && numEntriesHashTable == 0) {
                return false;
            } else {
                return true;
            }
        }
        for (Integer pos : keyPositionsSize) {
            Object key = newKeys.getKeyArray()[pos.intValue()];
            // Ignore nulls
            if (key != null) {
                if (key instanceof LazyString) {
                    totalVariableSize += ((LazyPrimitive<LazyStringObjectInspector, Text>) key).getWritableObject().getLength();
                } else if (key instanceof String) {
                    totalVariableSize += ((String) key).length();
                } else if (key instanceof Text) {
                    totalVariableSize += ((Text) key).getLength();
                } else if (key instanceof LazyBinary) {
                    totalVariableSize += ((LazyPrimitive<LazyBinaryObjectInspector, BytesWritable>) key).getWritableObject().getLength();
                } else if (key instanceof BytesWritable) {
                    totalVariableSize += ((BytesWritable) key).getLength();
                } else if (key instanceof ByteArrayRef) {
                    totalVariableSize += ((ByteArrayRef) key).getData().length;
                }
            }
        }
        AggregationBuffer[] aggs = hashAggregations.get(newKeys);
        for (int i = 0; i < aggs.length; i++) {
            AggregationBuffer agg = aggs[i];
            if (estimableAggregationEvaluators[i]) {
                totalVariableSize += ((GenericUDAFEvaluator.AbstractAggregationBuffer) agg).estimate();
                continue;
            }
            if (aggrPositions[i] != null) {
                totalVariableSize += estimateSize(agg, aggrPositions[i]);
            }
        }
        numEntriesVarSize++;
        // Update the number of entries that can fit in the hash table
        numEntriesHashTable = (int) (maxHashTblMemory / (fixedRowSize + (totalVariableSize / numEntriesVarSize)));
        if (isLogTraceEnabled) {
            LOG.trace("Hash Aggr: #hash table = " + numEntries + " #max in hash table = " + numEntriesHashTable);
        }
    }
    // flush if necessary
    if (numEntries >= numEntriesHashTable) {
        return true;
    }
    return false;
}
Also used : GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) LazyBinaryObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) LazyBinary(org.apache.hadoop.hive.serde2.lazy.LazyBinary) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject) AggregationBuffer(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer)

Aggregations

LazyString (org.apache.hadoop.hive.serde2.lazy.LazyString)10 Text (org.apache.hadoop.io.Text)10 Test (org.junit.Test)9 Configuration (org.apache.hadoop.conf.Configuration)8 Properties (java.util.Properties)6 AccumuloHiveRow (org.apache.hadoop.hive.accumulo.AccumuloHiveRow)6 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)6 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)6 LazyAccumuloRow (org.apache.hadoop.hive.accumulo.LazyAccumuloRow)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 Path (org.apache.hadoop.fs.Path)3 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)3 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)3 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)3 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)3 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)3 DataOutputStream (java.io.DataOutputStream)2 IOException (java.io.IOException)2 Date (java.sql.Date)2 Timestamp (java.sql.Timestamp)2