Search in sources :

Example 1 with HiveAccumuloMapColumnMapping

use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping in project hive by apache.

the class AccumuloRowSerializer method serialize.

public Mutation serialize(Object obj, ObjectInspector objInspector) throws SerDeException, IOException {
    if (objInspector.getCategory() != ObjectInspector.Category.STRUCT) {
        throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
    }
    // Prepare the field ObjectInspectors
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    List<Object> columnValues = soi.getStructFieldsDataAsList(obj);
    // Fail if we try to access an offset out of bounds
    if (rowIdOffset >= fields.size()) {
        throw new IllegalStateException("Attempted to access field outside of definition for struct. Have " + fields.size() + " fields and tried to access offset " + rowIdOffset);
    }
    StructField field = fields.get(rowIdOffset);
    Object value = columnValues.get(rowIdOffset);
    // The ObjectInspector for the row ID
    ObjectInspector fieldObjectInspector = field.getFieldObjectInspector();
    // Serialize the row component using the RowIdFactory. In the normal case, this will just
    // delegate back to the "local" serializeRowId method
    byte[] data = rowIdFactory.serializeRowId(value, field, output);
    // Set that as the row id in the mutation
    Mutation mutation = new Mutation(data);
    // Each column in the row
    for (int i = 0; i < fields.size(); i++) {
        if (rowIdOffset == i) {
            continue;
        }
        // Get the relevant information for this column
        field = fields.get(i);
        value = columnValues.get(i);
        // Despite having a fixed schema from Hive, we have sparse columns in Accumulo
        if (null == value) {
            continue;
        }
        // The ObjectInspector for the current column
        fieldObjectInspector = field.getFieldObjectInspector();
        // Make sure we got the right implementation of a ColumnMapping
        ColumnMapping mapping = mappings.get(i);
        if (mapping instanceof HiveAccumuloColumnMapping) {
            serializeColumnMapping((HiveAccumuloColumnMapping) mapping, fieldObjectInspector, value, mutation);
        } else if (mapping instanceof HiveAccumuloMapColumnMapping) {
            serializeColumnMapping((HiveAccumuloMapColumnMapping) mapping, fieldObjectInspector, value, mutation);
        } else {
            throw new IllegalArgumentException("Mapping for " + field.getFieldName() + " was not a HiveColumnMapping, but was " + mapping.getClass());
        }
    }
    return mutation;
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Mutation(org.apache.accumulo.core.data.Mutation) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) ColumnMapping(org.apache.hadoop.hive.accumulo.columns.ColumnMapping) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 2 with HiveAccumuloMapColumnMapping

use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping in project hive by apache.

the class HiveAccumuloTableInputFormat method getPairCollection.

/**
   * Create col fam/qual pairs from pipe separated values, usually from config object. Ignores
   * rowID.
   *
   * @param columnMappings
   *          The list of ColumnMappings for the given query
   * @return a Set of Pairs of colfams and colquals
   */
protected HashSet<Pair<Text, Text>> getPairCollection(List<ColumnMapping> columnMappings) {
    final HashSet<Pair<Text, Text>> pairs = new HashSet<Pair<Text, Text>>();
    for (ColumnMapping columnMapping : columnMappings) {
        if (columnMapping instanceof HiveAccumuloColumnMapping) {
            HiveAccumuloColumnMapping accumuloColumnMapping = (HiveAccumuloColumnMapping) columnMapping;
            Text cf = new Text(accumuloColumnMapping.getColumnFamily());
            Text cq = null;
            // A null cq implies an empty column qualifier
            if (null != accumuloColumnMapping.getColumnQualifier()) {
                cq = new Text(accumuloColumnMapping.getColumnQualifier());
            }
            pairs.add(new Pair<Text, Text>(cf, cq));
        } else if (columnMapping instanceof HiveAccumuloMapColumnMapping) {
            HiveAccumuloMapColumnMapping mapMapping = (HiveAccumuloMapColumnMapping) columnMapping;
            // Can't fetch prefix on colqual, must pull the entire qualifier
            // TODO use an iterator to do the filter, server-side.
            pairs.add(new Pair<Text, Text>(new Text(mapMapping.getColumnFamily()), null));
        }
    }
    log.info("Computed columns to fetch (" + pairs + ") from " + columnMappings);
    return pairs;
}
Also used : HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) Text(org.apache.hadoop.io.Text) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) ColumnMapping(org.apache.hadoop.hive.accumulo.columns.ColumnMapping) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) HashSet(java.util.HashSet) Pair(org.apache.accumulo.core.util.Pair)

Example 3 with HiveAccumuloMapColumnMapping

use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping in project hive by apache.

the class TestLazyAccumuloMap method testMixedSerializationMap.

@Test
public void testMixedSerializationMap() throws SerDeException, IOException {
    AccumuloHiveRow row = new AccumuloHiveRow("row");
    row.add(new Text("cf1"), new Text(toBytes(1)), "2".getBytes());
    row.add(new Text("cf1"), new Text(toBytes(2)), "4".getBytes());
    row.add(new Text("cf1"), new Text(toBytes(3)), "6".getBytes());
    HiveAccumuloMapColumnMapping mapping = new HiveAccumuloMapColumnMapping("cf1", null, ColumnEncoding.BINARY, ColumnEncoding.STRING, "column", TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo).toString());
    // Map of Integer to String
    Text nullSequence = new Text("\\N");
    ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<int,int>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
    LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi);
    map.init(row, mapping);
    Assert.assertEquals(3, map.getMapSize());
    Object o = map.getMapValueElement(new IntWritable(1));
    Assert.assertNotNull(o);
    Assert.assertEquals(new IntWritable(2), ((LazyInteger) o).getWritableObject());
    o = map.getMapValueElement(new IntWritable(2));
    Assert.assertNotNull(o);
    Assert.assertEquals(new IntWritable(4), ((LazyInteger) o).getWritableObject());
    o = map.getMapValueElement(new IntWritable(3));
    Assert.assertNotNull(o);
    Assert.assertEquals(new IntWritable(6), ((LazyInteger) o).getWritableObject());
}
Also used : LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) Text(org.apache.hadoop.io.Text) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 4 with HiveAccumuloMapColumnMapping

use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping in project hive by apache.

the class TestLazyAccumuloMap method testStringMapWithProjection.

@Test
public void testStringMapWithProjection() throws SerDeException {
    AccumuloHiveRow row = new AccumuloHiveRow("row");
    row.add("cf1", "foo", "bar".getBytes());
    row.add("cf1", "bar", "foo".getBytes());
    row.add("cf2", "foo1", "bar1".getBytes());
    row.add("cf3", "bar1", "foo1".getBytes());
    HiveAccumuloMapColumnMapping mapping = new HiveAccumuloMapColumnMapping("cf1", null, ColumnEncoding.STRING, ColumnEncoding.STRING, "column", TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo).toString());
    // Map of Integer to String
    Text nullSequence = new Text("\\N");
    ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<string,string>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
    LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi);
    map.init(row, mapping);
    Assert.assertEquals(2, map.getMapSize());
    Object o = map.getMapValueElement(new Text("foo"));
    Assert.assertNotNull(o);
    Assert.assertEquals(new Text("bar"), ((LazyString) o).getWritableObject());
    o = map.getMapValueElement(new Text("bar"));
    Assert.assertNotNull(o);
    Assert.assertEquals(new Text("foo"), ((LazyString) o).getWritableObject());
}
Also used : LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) Text(org.apache.hadoop.io.Text) Test(org.junit.Test)

Example 5 with HiveAccumuloMapColumnMapping

use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping in project hive by apache.

the class TestLazyAccumuloMap method testBinaryIntMap.

@Test
public void testBinaryIntMap() throws SerDeException, IOException {
    AccumuloHiveRow row = new AccumuloHiveRow("row");
    row.add(new Text("cf1"), new Text(toBytes(1)), toBytes(2));
    row.add(new Text("cf1"), new Text(toBytes(2)), toBytes(4));
    row.add(new Text("cf1"), new Text(toBytes(3)), toBytes(6));
    HiveAccumuloMapColumnMapping mapping = new HiveAccumuloMapColumnMapping("cf1", null, ColumnEncoding.BINARY, ColumnEncoding.BINARY, "column", TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo).toString());
    // Map of Integer to String
    Text nullSequence = new Text("\\N");
    ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<int,int>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
    LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi);
    map.init(row, mapping);
    Assert.assertEquals(3, map.getMapSize());
    Object o = map.getMapValueElement(new IntWritable(1));
    Assert.assertNotNull(o);
    Assert.assertEquals(new IntWritable(2), ((LazyInteger) o).getWritableObject());
    o = map.getMapValueElement(new IntWritable(2));
    Assert.assertNotNull(o);
    Assert.assertEquals(new IntWritable(4), ((LazyInteger) o).getWritableObject());
    o = map.getMapValueElement(new IntWritable(3));
    Assert.assertNotNull(o);
    Assert.assertEquals(new IntWritable(6), ((LazyInteger) o).getWritableObject());
}
Also used : LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) Text(org.apache.hadoop.io.Text) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Aggregations

HiveAccumuloMapColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping)7 Text (org.apache.hadoop.io.Text)6 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)5 LazyMapObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)4 Test (org.junit.Test)4 ColumnMapping (org.apache.hadoop.hive.accumulo.columns.ColumnMapping)3 HiveAccumuloColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping)3 IntWritable (org.apache.hadoop.io.IntWritable)3 HashSet (java.util.HashSet)1 Mutation (org.apache.accumulo.core.data.Mutation)1 Pair (org.apache.accumulo.core.util.Pair)1 HiveAccumuloRowIdColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping)1 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)1 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)1 LazyObjectBase (org.apache.hadoop.hive.serde2.lazy.LazyObjectBase)1 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)1 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)1 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)1 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1