use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class TestLazyArrayMapStruct method testNestedinArrayAtLevel.
/**
* @param nestingLevel
* @param dtype
* @param tableProp
* @throws SerDeException
*/
private void testNestedinArrayAtLevel(int nestingLevel, ObjectInspector.Category dtype, Properties tableProp) throws SerDeException {
// create type with nestingLevel levels of nesting
// set inner schema for dtype
String inSchema = null;
switch(dtype) {
case LIST:
inSchema = "array<tinyint>";
break;
case MAP:
inSchema = "map<string,int>";
break;
case STRUCT:
inSchema = "struct<s:string,i:tinyint>";
break;
case UNION:
inSchema = "uniontype<string,tinyint>";
break;
default:
fail("type not supported by test case");
}
StringBuilder schema = new StringBuilder(inSchema);
for (int i = 0; i < nestingLevel - 1; i++) {
schema.insert(0, "array<");
schema.append(">");
}
System.err.println("Testing nesting level " + nestingLevel + ". Using schema " + schema);
// Create the SerDe
LazySimpleSerDe serDe = new LazySimpleSerDe();
Configuration conf = new Configuration();
tableProp.setProperty("columns", "narray");
tableProp.setProperty("columns.types", schema.toString());
serDe.initialize(conf, tableProp, null);
LazySerDeParameters serdeParams = new LazySerDeParameters(conf, tableProp, LazySimpleSerDe.class.getName());
// create the serialized string for type
byte[] separators = serdeParams.getSeparators();
System.err.println("Using separator " + (char) separators[nestingLevel]);
byte[] serializedRow = null;
switch(dtype) {
case LIST:
serializedRow = new byte[] { '8', separators[nestingLevel], '9' };
break;
case MAP:
byte kvSep = separators[nestingLevel + 1];
byte kvPairSep = separators[nestingLevel];
serializedRow = new byte[] { '1', kvSep, '1', kvPairSep, '2', kvSep, '2' };
break;
case STRUCT:
serializedRow = new byte[] { '8', separators[nestingLevel], '9' };
break;
case UNION:
serializedRow = new byte[] { '0', separators[nestingLevel], '9' };
break;
default:
fail("type not supported by test case");
}
// create LazyStruct with serialized string with expected separators
StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(oi);
TestLazyPrimitive.initLazyObject(struct, serializedRow, 0, serializedRow.length);
// Get fields out of the lazy struct and check if they match expected
// results
// Get first level array
LazyArray array = (LazyArray) struct.getField(0);
// Peel off the n-1 levels to get to the underlying array
for (int i = 0; i < nestingLevel - 2; i++) {
array = (LazyArray) array.getListElementObject(0);
}
// verify the serialized format for dtype
switch(dtype) {
case LIST:
LazyArray array1 = (LazyArray) array.getListElementObject(0);
// check elements of the innermost array
assertEquals(2, array1.getListLength());
assertEquals(new ByteWritable((byte) 8), ((LazyByte) array1.getListElementObject(0)).getWritableObject());
assertEquals(new ByteWritable((byte) 9), ((LazyByte) array1.getListElementObject(1)).getWritableObject());
break;
case MAP:
LazyMap lazyMap = (LazyMap) array.getListElementObject(0);
Map map = lazyMap.getMap();
System.err.println(map);
assertEquals(2, map.size());
Iterator<Map.Entry<LazyString, LazyInteger>> it = map.entrySet().iterator();
Entry<LazyString, LazyInteger> e1 = it.next();
assertEquals(e1.getKey().getWritableObject(), new Text(new byte[] { '1' }));
assertEquals(e1.getValue().getWritableObject(), new IntWritable(1));
Entry<LazyString, LazyInteger> e2 = it.next();
assertEquals(e2.getKey().getWritableObject(), new Text(new byte[] { '2' }));
assertEquals(e2.getValue().getWritableObject(), new IntWritable(2));
break;
case STRUCT:
LazyStruct innerStruct = (LazyStruct) array.getListElementObject(0);
// check elements of the innermost struct
assertEquals(2, innerStruct.getFieldsAsList().size());
assertEquals(new Text(new byte[] { '8' }), ((LazyString) innerStruct.getField(0)).getWritableObject());
assertEquals(new ByteWritable((byte) 9), ((LazyByte) innerStruct.getField(1)).getWritableObject());
break;
case UNION:
LazyUnion lazyUnion = (LazyUnion) array.getListElementObject(0);
// check elements of the innermost union
assertEquals(new Text(new byte[] { '9' }), ((LazyString) lazyUnion.getField()).getWritableObject());
break;
default:
fail("type not supported by test case");
}
// test serialization
Text serializedText = (Text) serDe.serialize(struct.getObject(), serDe.getObjectInspector());
org.junit.Assert.assertArrayEquals(serializedRow, serializedText.getBytes());
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class MultiDelimitSerDe method initialize.
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
// get the SerDe parameters
super.initialize(conf, tbl);
serdeParams = new LazySerDeParameters(conf, tbl, getClass().getName());
fieldDelimited = tbl.getProperty(serdeConstants.FIELD_DELIM);
if (fieldDelimited == null || fieldDelimited.isEmpty()) {
throw new SerDeException("This table does not have serde property \"field.delim\"!");
}
// get the collection separator and map key separator
// TODO: use serdeConstants.COLLECTION_DELIM when the typo is fixed
collSep = LazyUtils.getByte(tbl.getProperty(COLLECTION_DELIM), DEFAULT_SEPARATORS[1]);
keySep = LazyUtils.getByte(tbl.getProperty(serdeConstants.MAPKEY_DELIM), DEFAULT_SEPARATORS[2]);
serdeParams.setSeparator(1, collSep);
serdeParams.setSeparator(2, keySep);
// Create the ObjectInspectors for the fields
cachedObjectInspector = LazyFactory.createLazyStructInspector(serdeParams.getColumnNames(), serdeParams.getColumnTypes(), serdeParams.getSeparators(), serdeParams.getNullSequence(), serdeParams.isLastColumnTakesRest(), serdeParams.isEscaped(), serdeParams.getEscapeChar());
cachedLazyStruct = (LazyStruct) LazyFactory.createLazyObject(cachedObjectInspector);
assert serdeParams.getColumnNames().size() == serdeParams.getColumnTypes().size();
numColumns = serdeParams.getColumnNames().size();
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project cdap by caskdata.
the class ObjectSerializer method fromLazyObject.
private Object fromLazyObject(TypeInfo type, Object data) {
if (data == null) {
return null;
}
switch(type.getCategory()) {
case PRIMITIVE:
Writable writable = ((LazyPrimitive) data).getWritableObject();
return fromWritable(writable);
case LIST:
ListTypeInfo listType = (ListTypeInfo) type;
TypeInfo listElementType = listType.getListElementTypeInfo();
List<Object> list = ((LazyArray) data).getList();
if (list.isEmpty()) {
return ImmutableList.of();
}
Object[] arrayContent = new Object[list.size()];
for (int i = 0; i < arrayContent.length; i++) {
arrayContent[i] = fromLazyObject(listElementType, list.get(i));
}
return arrayContent;
case MAP:
MapTypeInfo mapType = (MapTypeInfo) type;
Map<Object, Object> mapContent = Maps.newConcurrentMap();
Map<Object, Object> map = ((LazyMap) data).getMap();
for (Map.Entry<Object, Object> entry : map.entrySet()) {
mapContent.put(fromLazyObject(mapType.getMapKeyTypeInfo(), entry.getKey()), fromLazyObject(mapType.getMapValueTypeInfo(), entry.getValue()));
}
return mapContent;
case STRUCT:
StructTypeInfo structType = (StructTypeInfo) type;
List<TypeInfo> info = structType.getAllStructFieldTypeInfos();
List<String> names = structType.getAllStructFieldNames();
Map<String, Object> structMap = Maps.newConcurrentMap();
List<Object> struct = ((LazyStruct) data).getFieldsAsList();
for (int structIndex = 0; structIndex < info.size(); structIndex++) {
structMap.put(names.get(structIndex), fromLazyObject(info.get(structIndex), struct.get(structIndex)));
}
return structMap;
case UNION:
throw new UnsupportedOperationException("union not yet supported");
default:
return data.toString();
}
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class TestHiveAccumuloTableOutputFormat method testWriteMap.
@Test
public void testWriteMap() throws Exception {
Instance inst = new MockInstance(test.getMethodName());
Connector conn = inst.getConnector("root", new PasswordToken(""));
HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
String table = test.getMethodName();
conn.tableOperations().create(table);
JobConf conf = new JobConf();
conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
conf.set(AccumuloConnectionParameters.USER_NAME, "root");
conf.set(AccumuloConnectionParameters.USER_PASS, "");
conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
FileSystem local = FileSystem.getLocal(conf);
outputFormat.checkOutputSpecs(local, conf);
RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
List<String> names = Arrays.asList("row", "col1");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:*");
tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, accumuloSerDeParams.getRowIdFactory());
TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, (byte) ',', (byte) ':', serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(Arrays.asList("row", "data"), Arrays.asList(stringOI, mapOI), (byte) ' ', serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
ByteArrayRef bytes = new ByteArrayRef();
bytes.setData("row cq1:value1,cq2:value2".getBytes());
struct.init(bytes, 0, bytes.getData().length);
// Serialize the struct into a mutation
Mutation m = serializer.serialize(struct, structOI);
// Write the mutation
recordWriter.write(new Text(table), m);
// Close the writer
recordWriter.close(null);
Iterator<Entry<Key, Value>> iter = conn.createScanner(table, new Authorizations()).iterator();
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
Entry<Key, Value> entry = iter.next();
Key k = entry.getKey();
Value v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq1", k.getColumnQualifier().toString());
Assert.assertEquals(AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, k.getColumnVisibilityParsed());
Assert.assertEquals("value1", new String(v.get()));
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
entry = iter.next();
k = entry.getKey();
v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq2", k.getColumnQualifier().toString());
Assert.assertEquals(AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, k.getColumnVisibilityParsed());
Assert.assertEquals("value2", new String(v.get()));
Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class TestAccumuloRowSerializer method testVisibilityLabel.
@Test
public void testVisibilityLabel() throws IOException, SerDeException {
List<String> columns = Arrays.asList("row", "cq1", "cq2", "cq3");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
List<String> typeNames = new ArrayList<String>(types.size());
for (TypeInfo type : types) {
typeNames.add(type.getTypeName());
}
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq1#b,cf:cq2#b,cf:cq3");
tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
LazySimpleStructObjectInspector oi = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility("foo"), accumuloSerDeParams.getRowIdFactory());
// Create the LazyStruct from the LazyStruct...Inspector
LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(oi);
ByteArrayRef byteRef = new ByteArrayRef();
byteRef.setData(new byte[] { 'r', 'o', 'w', '1', ' ', '1', '0', ' ', '2', '0', ' ', 'v', 'a', 'l', 'u', 'e' });
obj.init(byteRef, 0, byteRef.getData().length);
Mutation m = (Mutation) serializer.serialize(obj, oi);
Assert.assertArrayEquals("row1".getBytes(), m.getRow());
List<ColumnUpdate> updates = m.getUpdates();
Assert.assertEquals(3, updates.size());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream out = new DataOutputStream(baos);
ColumnUpdate update = updates.get(0);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq1", new String(update.getColumnQualifier()));
Assert.assertEquals("foo", new String(update.getColumnVisibility()));
out.writeInt(10);
Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
update = updates.get(1);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq2", new String(update.getColumnQualifier()));
Assert.assertEquals("foo", new String(update.getColumnVisibility()));
baos.reset();
out.writeInt(20);
Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
update = updates.get(2);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq3", new String(update.getColumnQualifier()));
Assert.assertEquals("foo", new String(update.getColumnVisibility()));
Assert.assertEquals("value", new String(update.getValue()));
}
Aggregations