use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.
the class TestAccumuloRowSerializer method testMapSerialization.
@Test
public void testMapSerialization() throws IOException, SerDeException {
List<String> columns = Arrays.asList("row", "col");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo));
List<String> typeNames = new ArrayList<String>(types.size());
for (TypeInfo type : types) {
typeNames.add(type.getTypeName());
}
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:*");
tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
tableProperties.setProperty(serdeConstants.COLLECTION_DELIM, ",");
tableProperties.setProperty(serdeConstants.MAPKEY_DELIM, ":");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, (byte) ',', (byte) ':', serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columns, Arrays.asList(stringOI, mapOI), (byte) ' ', serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), accumuloSerDeParams.getRowIdFactory());
// Create the LazyStruct from the LazyStruct...Inspector
LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(structOI);
ByteArrayRef byteRef = new ByteArrayRef();
byteRef.setData("row1 cq1:10,cq2:20,cq3:value".getBytes());
obj.init(byteRef, 0, byteRef.getData().length);
Mutation m = (Mutation) serializer.serialize(obj, structOI);
Assert.assertArrayEquals("row1".getBytes(), m.getRow());
List<ColumnUpdate> updates = m.getUpdates();
Assert.assertEquals(3, updates.size());
ColumnUpdate update = updates.get(0);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq1", new String(update.getColumnQualifier()));
Assert.assertEquals("10", new String(update.getValue()));
update = updates.get(1);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq2", new String(update.getColumnQualifier()));
Assert.assertEquals("20", new String(update.getValue()));
update = updates.get(2);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq3", new String(update.getColumnQualifier()));
Assert.assertEquals("value", new String(update.getValue()));
}
use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.
the class TestDefaultAccumuloRowIdFactory method testCorrectComplexInspectors.
@Test
public void testCorrectComplexInspectors() throws SerDeException {
AccumuloSerDe accumuloSerDe = new AccumuloSerDe();
Properties properties = new Properties();
Configuration conf = new Configuration();
properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq");
properties.setProperty(serdeConstants.LIST_COLUMNS, "row,col");
properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "struct<col1:int,col2:int>,map<string,string>");
accumuloSerDe.initialize(conf, properties);
AccumuloRowIdFactory factory = accumuloSerDe.getParams().getRowIdFactory();
List<TypeInfo> columnTypes = accumuloSerDe.getParams().getHiveColumnTypes();
ColumnMapper mapper = accumuloSerDe.getParams().getColumnMapper();
LazySerDeParameters serDeParams = accumuloSerDe.getParams().getSerDeParameters();
List<ObjectInspector> OIs = accumuloSerDe.getColumnObjectInspectors(columnTypes, serDeParams, mapper.getColumnMappings(), factory);
// Expect the correct OIs
Assert.assertEquals(2, OIs.size());
Assert.assertEquals(LazySimpleStructObjectInspector.class, OIs.get(0).getClass());
Assert.assertEquals(LazyMapObjectInspector.class, OIs.get(1).getClass());
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) OIs.get(0);
Assert.assertEquals(2, (int) structOI.getSeparator());
LazyMapObjectInspector mapOI = (LazyMapObjectInspector) OIs.get(1);
Assert.assertEquals(2, (int) mapOI.getItemSeparator());
Assert.assertEquals(3, (int) mapOI.getKeyValueSeparator());
}
use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.
the class TestDefaultAccumuloRowIdFactory method testCorrectPrimitiveInspectors.
@Test
public void testCorrectPrimitiveInspectors() throws SerDeException {
AccumuloSerDe accumuloSerDe = new AccumuloSerDe();
Properties properties = new Properties();
Configuration conf = new Configuration();
properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq");
properties.setProperty(serdeConstants.LIST_COLUMNS, "row,col");
properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,int");
accumuloSerDe.initialize(conf, properties);
AccumuloRowIdFactory factory = accumuloSerDe.getParams().getRowIdFactory();
List<TypeInfo> columnTypes = accumuloSerDe.getParams().getHiveColumnTypes();
ColumnMapper mapper = accumuloSerDe.getParams().getColumnMapper();
LazySerDeParameters serDeParams = accumuloSerDe.getParams().getSerDeParameters();
List<ObjectInspector> OIs = accumuloSerDe.getColumnObjectInspectors(columnTypes, serDeParams, mapper.getColumnMappings(), factory);
Assert.assertEquals(2, OIs.size());
Assert.assertEquals(LazyStringObjectInspector.class, OIs.get(0).getClass());
Assert.assertEquals(LazyIntObjectInspector.class, OIs.get(1).getClass());
}
use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project phoenix by apache.
the class PhoenixSerDe method initialize.
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
tableProperties = tbl;
if (LOG.isDebugEnabled()) {
LOG.debug("SerDe initialize : " + tbl.getProperty("name"));
}
serdeParams = new LazySerDeParameters(conf, tbl, getClass().getName());
objectInspector = createLazyPhoenixInspector(conf, tbl);
String inOutWork = tbl.getProperty(PhoenixStorageHandlerConstants.IN_OUT_WORK);
if (inOutWork == null) {
return;
}
serializer = new PhoenixSerializer(conf, tbl);
row = new PhoenixRow(serdeParams.getColumnNames());
}
use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.
the class AccumuloRowSerializer method writeWithLevel.
/**
* Recursively serialize an Object using its {@link ObjectInspector}, respecting the
* separators defined by the {@link LazySerDeParameters}.
* @param oi ObjectInspector for the current object
* @param value The current object
* @param output A buffer output is written to
* @param mapping The mapping for this Hive column
* @param level The current level/offset for the SerDe separator
* @throws IOException
*/
protected void writeWithLevel(ObjectInspector oi, Object value, ByteStream.Output output, ColumnMapping mapping, int level) throws IOException {
switch(oi.getCategory()) {
case PRIMITIVE:
if (mapping.getEncoding() == ColumnEncoding.BINARY) {
this.writeBinary(output, value, (PrimitiveObjectInspector) oi);
} else {
this.writeString(output, value, (PrimitiveObjectInspector) oi);
}
return;
case LIST:
char separator = (char) serDeParams.getSeparators()[level];
ListObjectInspector loi = (ListObjectInspector) oi;
List<?> list = loi.getList(value);
ObjectInspector eoi = loi.getListElementObjectInspector();
if (list == null) {
log.debug("No objects found when serializing list");
return;
} else {
for (int i = 0; i < list.size(); i++) {
if (i > 0) {
output.write(separator);
}
writeWithLevel(eoi, list.get(i), output, mapping, level + 1);
}
}
return;
case MAP:
char sep = (char) serDeParams.getSeparators()[level];
char keyValueSeparator = (char) serDeParams.getSeparators()[level + 1];
MapObjectInspector moi = (MapObjectInspector) oi;
ObjectInspector koi = moi.getMapKeyObjectInspector();
ObjectInspector voi = moi.getMapValueObjectInspector();
Map<?, ?> map = moi.getMap(value);
if (map == null) {
log.debug("No object found when serializing map");
return;
} else {
boolean first = true;
for (Map.Entry<?, ?> entry : map.entrySet()) {
if (first) {
first = false;
} else {
output.write(sep);
}
writeWithLevel(koi, entry.getKey(), output, mapping, level + 2);
output.write(keyValueSeparator);
writeWithLevel(voi, entry.getValue(), output, mapping, level + 2);
}
}
return;
case STRUCT:
sep = (char) serDeParams.getSeparators()[level];
StructObjectInspector soi = (StructObjectInspector) oi;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
list = soi.getStructFieldsDataAsList(value);
if (list == null) {
log.debug("No object found when serializing struct");
return;
} else {
for (int i = 0; i < list.size(); i++) {
if (i > 0) {
output.write(sep);
}
writeWithLevel(fields.get(i).getFieldObjectInspector(), list.get(i), output, mapping, level + 1);
}
}
return;
default:
throw new RuntimeException("Unknown category type: " + oi.getCategory());
}
}
Aggregations