Search in sources :

Example 76 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class TestAccumuloSerDe method testStructOfMapSerialization.

@Test
public void testStructOfMapSerialization() throws IOException, SerDeException {
    List<String> columns = Arrays.asList("row", "col");
    List<String> structColNames = Arrays.asList("map1", "map2");
    TypeInfo mapTypeInfo = TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
    // struct<map1:map<string,string>,map2:map<string,string>>,string
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.getStructTypeInfo(structColNames, Arrays.asList(mapTypeInfo, mapTypeInfo)), TypeInfoFactory.stringTypeInfo);
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq");
    // Use the default separators [0, 1, 2, 3, ..., 7]
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    byte[] seps = serDeParams.getSeparators();
    // struct<map<k:v,k:v>_map<k:v,k:v>>>
    TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
    LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, seps[3], seps[4], serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazySimpleStructObjectInspector rowStructOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(structColNames, Arrays.<ObjectInspector>asList(mapOI, mapOI), (byte) seps[2], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columns, Arrays.asList(rowStructOI, stringOI), seps[1], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), accumuloSerDeParams.getRowIdFactory());
    Map<String, String> map1 = new HashMap<String, String>(), map2 = new HashMap<String, String>();
    map1.put("key10", "value10");
    map1.put("key11", "value11");
    map2.put("key20", "value20");
    map2.put("key21", "value21");
    ByteArrayRef byteRef = new ByteArrayRef();
    // Default separators are 1-indexed (instead of 0-indexed), thus the separator at offset 1 is
    // (byte) 2
    // The separator for the hive row is \x02, for the row Id struct, \x03, and the maps \x04 and
    // \x05
    String accumuloRow = "key10\5value10\4key11\5value11\3key20\5value20\4key21\5value21";
    LazyStruct entireStruct = (LazyStruct) LazyFactory.createLazyObject(structOI);
    byteRef.setData((accumuloRow + "\2foo").getBytes());
    entireStruct.init(byteRef, 0, byteRef.getData().length);
    Mutation m = serializer.serialize(entireStruct, structOI);
    Assert.assertArrayEquals(accumuloRow.getBytes(), m.getRow());
    Assert.assertEquals(1, m.getUpdates().size());
    ColumnUpdate update = m.getUpdates().get(0);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getValue()));
    AccumuloHiveRow haRow = new AccumuloHiveRow(new String(m.getRow()));
    haRow.add("cf", "cq", "foo".getBytes());
    LazyAccumuloRow lazyAccumuloRow = new LazyAccumuloRow(structOI);
    lazyAccumuloRow.init(haRow, accumuloSerDeParams.getColumnMappings(), accumuloSerDeParams.getRowIdFactory());
    List<Object> objects = lazyAccumuloRow.getFieldsAsList();
    Assert.assertEquals(2, objects.size());
    Assert.assertEquals("foo", objects.get(1).toString());
    LazyStruct rowStruct = (LazyStruct) objects.get(0);
    List<Object> rowObjects = rowStruct.getFieldsAsList();
    Assert.assertEquals(2, rowObjects.size());
    LazyMap rowMap = (LazyMap) rowObjects.get(0);
    Map<?, ?> actualMap = rowMap.getMap();
    System.out.println("Actual map 1: " + actualMap);
    Map<String, String> actualStringMap = new HashMap<String, String>();
    for (Entry<?, ?> entry : actualMap.entrySet()) {
        actualStringMap.put(entry.getKey().toString(), entry.getValue().toString());
    }
    Assert.assertEquals(map1, actualStringMap);
    rowMap = (LazyMap) rowObjects.get(1);
    actualMap = rowMap.getMap();
    System.out.println("Actual map 2: " + actualMap);
    actualStringMap = new HashMap<String, String>();
    for (Entry<?, ?> entry : actualMap.entrySet()) {
        actualStringMap.put(entry.getKey().toString(), entry.getValue().toString());
    }
    Assert.assertEquals(map2, actualStringMap);
}
Also used : ColumnUpdate(org.apache.accumulo.core.data.ColumnUpdate) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) HashMap(java.util.HashMap) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) Properties(java.util.Properties) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) Mutation(org.apache.accumulo.core.data.Mutation) Test(org.junit.Test)

Example 77 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class TestAccumuloRowSerializer method testMapSerialization.

@Test
public void testMapSerialization() throws IOException, SerDeException {
    List<String> columns = Arrays.asList("row", "col");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo));
    List<String> typeNames = new ArrayList<String>(types.size());
    for (TypeInfo type : types) {
        typeNames.add(type.getTypeName());
    }
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:*");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.COLLECTION_DELIM, ",");
    tableProperties.setProperty(serdeConstants.MAPKEY_DELIM, ":");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
    LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, (byte) ',', (byte) ':', serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columns, Arrays.asList(stringOI, mapOI), (byte) ' ', serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), accumuloSerDeParams.getRowIdFactory());
    // Create the LazyStruct from the LazyStruct...Inspector
    LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(structOI);
    ByteArrayRef byteRef = new ByteArrayRef();
    byteRef.setData("row1 cq1:10,cq2:20,cq3:value".getBytes());
    obj.init(byteRef, 0, byteRef.getData().length);
    Mutation m = (Mutation) serializer.serialize(obj, structOI);
    Assert.assertArrayEquals("row1".getBytes(), m.getRow());
    List<ColumnUpdate> updates = m.getUpdates();
    Assert.assertEquals(3, updates.size());
    ColumnUpdate update = updates.get(0);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq1", new String(update.getColumnQualifier()));
    Assert.assertEquals("10", new String(update.getValue()));
    update = updates.get(1);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq2", new String(update.getColumnQualifier()));
    Assert.assertEquals("20", new String(update.getValue()));
    update = updates.get(2);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq3", new String(update.getColumnQualifier()));
    Assert.assertEquals("value", new String(update.getValue()));
}
Also used : LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ColumnUpdate(org.apache.accumulo.core.data.ColumnUpdate) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) ArrayList(java.util.ArrayList) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Mutation(org.apache.accumulo.core.data.Mutation) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Test(org.junit.Test)

Example 78 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class TestDefaultAccumuloRowIdFactory method testCorrectComplexInspectors.

@Test
public void testCorrectComplexInspectors() throws SerDeException {
    AccumuloSerDe accumuloSerDe = new AccumuloSerDe();
    Properties properties = new Properties();
    Configuration conf = new Configuration();
    properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq");
    properties.setProperty(serdeConstants.LIST_COLUMNS, "row,col");
    properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "struct<col1:int,col2:int>,map<string,string>");
    accumuloSerDe.initialize(conf, properties);
    AccumuloRowIdFactory factory = accumuloSerDe.getParams().getRowIdFactory();
    List<TypeInfo> columnTypes = accumuloSerDe.getParams().getHiveColumnTypes();
    ColumnMapper mapper = accumuloSerDe.getParams().getColumnMapper();
    LazySerDeParameters serDeParams = accumuloSerDe.getParams().getSerDeParameters();
    List<ObjectInspector> OIs = accumuloSerDe.getColumnObjectInspectors(columnTypes, serDeParams, mapper.getColumnMappings(), factory);
    // Expect the correct OIs
    Assert.assertEquals(2, OIs.size());
    Assert.assertEquals(LazySimpleStructObjectInspector.class, OIs.get(0).getClass());
    Assert.assertEquals(LazyMapObjectInspector.class, OIs.get(1).getClass());
    LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) OIs.get(0);
    Assert.assertEquals(2, (int) structOI.getSeparator());
    LazyMapObjectInspector mapOI = (LazyMapObjectInspector) OIs.get(1);
    Assert.assertEquals(2, (int) mapOI.getItemSeparator());
    Assert.assertEquals(3, (int) mapOI.getKeyValueSeparator());
}
Also used : LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) LazyIntObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Test(org.junit.Test)

Example 79 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class TestDefaultAccumuloRowIdFactory method testCorrectPrimitiveInspectors.

@Test
public void testCorrectPrimitiveInspectors() throws SerDeException {
    AccumuloSerDe accumuloSerDe = new AccumuloSerDe();
    Properties properties = new Properties();
    Configuration conf = new Configuration();
    properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq");
    properties.setProperty(serdeConstants.LIST_COLUMNS, "row,col");
    properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,int");
    accumuloSerDe.initialize(conf, properties);
    AccumuloRowIdFactory factory = accumuloSerDe.getParams().getRowIdFactory();
    List<TypeInfo> columnTypes = accumuloSerDe.getParams().getHiveColumnTypes();
    ColumnMapper mapper = accumuloSerDe.getParams().getColumnMapper();
    LazySerDeParameters serDeParams = accumuloSerDe.getParams().getSerDeParameters();
    List<ObjectInspector> OIs = accumuloSerDe.getColumnObjectInspectors(columnTypes, serDeParams, mapper.getColumnMappings(), factory);
    Assert.assertEquals(2, OIs.size());
    Assert.assertEquals(LazyStringObjectInspector.class, OIs.get(0).getClass());
    Assert.assertEquals(LazyIntObjectInspector.class, OIs.get(1).getClass());
}
Also used : LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) LazyIntObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Test(org.junit.Test)

Example 80 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.

the class ColumnMappings method setHiveColumnDescription.

void setHiveColumnDescription(String serdeName, List<String> columnNames, List<TypeInfo> columnTypes) throws SerDeException {
    if (columnsMapping.length != columnNames.size()) {
        throw new SerDeException(serdeName + ": columns has " + columnNames.size() + " elements while hbase.columns.mapping has " + columnsMapping.length + " elements" + " (counting the key if implicit)");
    }
    // where key extends LazyPrimitive<?, ?> and thus has type Category.PRIMITIVE
    for (int i = 0; i < columnNames.size(); i++) {
        ColumnMapping colMap = columnsMapping[i];
        colMap.columnName = columnNames.get(i);
        colMap.columnType = columnTypes.get(i);
        if (colMap.qualifierName == null && !colMap.hbaseRowKey && !colMap.hbaseTimestamp) {
            TypeInfo typeInfo = columnTypes.get(i);
            if ((typeInfo.getCategory() != ObjectInspector.Category.MAP) || (((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getCategory() != ObjectInspector.Category.PRIMITIVE)) {
                throw new SerDeException(serdeName + ": hbase column family '" + colMap.familyName + "' should be mapped to Map<? extends LazyPrimitive<?, ?>,?>, that is " + "the Key for the map should be of primitive type, but is mapped to " + typeInfo.getTypeName());
            }
        }
        if (colMap.hbaseTimestamp) {
            TypeInfo typeInfo = columnTypes.get(i);
            if (!colMap.isCategory(PrimitiveCategory.TIMESTAMP) && !colMap.isCategory(PrimitiveCategory.LONG)) {
                throw new SerDeException(serdeName + ": timestamp columns should be of " + "timestamp or bigint type, but is mapped to " + typeInfo.getTypeName());
            }
        }
    }
}
Also used : MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

SerDeException (org.apache.hadoop.hive.serde2.SerDeException)124 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)108 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)100 ArrayList (java.util.ArrayList)98 Properties (java.util.Properties)59 Test (org.junit.Test)59 Configuration (org.apache.hadoop.conf.Configuration)52 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)52 Text (org.apache.hadoop.io.Text)50 IOException (java.io.IOException)37 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)33 Schema (org.apache.avro.Schema)31 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)31 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)28 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)28 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)24 Put (org.apache.hadoop.hbase.client.Put)22 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)22 IntWritable (org.apache.hadoop.io.IntWritable)22 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)21