use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector in project hive by apache.
the class TestLazyHBaseObject method testLazyHBaseRow1.
/**
* Test the LazyHBaseRow class with one-for-one mappings between
* Hive fields and HBase columns.
* @throws SerDeException
*/
public void testLazyHBaseRow1() throws SerDeException {
List<TypeInfo> fieldTypeInfos = TypeInfoUtils.getTypeInfosFromTypeString("string,int,array<string>,map<string,string>,string");
List<String> fieldNames = Arrays.asList("key", "a", "b", "c", "d");
Text nullSequence = new Text("\\N");
String hbaseColsMapping = ":key,cfa:a,cfa:b,cfb:c,cfb:d";
ColumnMappings columnMappings = null;
try {
columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColsMapping);
} catch (SerDeException e) {
fail(e.toString());
}
for (ColumnMapping colMap : columnMappings) {
if (!colMap.hbaseRowKey && colMap.qualifierName == null) {
colMap.binaryStorage.add(false);
colMap.binaryStorage.add(false);
} else {
colMap.binaryStorage.add(false);
}
}
ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, new byte[] { ' ', ':', '=' }, nullSequence, false, false, (byte) 0);
LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings);
List<KeyValue> kvs = new ArrayList<KeyValue>();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("a:b:c")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("d=e:f=g")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("hi")));
Result r = new Result(kvs);
o.init(r);
assertEquals(("{'key':'test-row','a':123,'b':['a','b','c']," + "'c':{'d':'e','f':'g'},'d':'hi'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
kvs.clear();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("d=e:f=g")));
r = new Result(kvs);
o.init(r);
assertEquals(("{'key':'test-row','a':123,'b':null," + "'c':{'d':'e','f':'g'},'d':null}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
kvs.clear();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("a")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("d=\\N:f=g:h")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("no")));
r = new Result(kvs);
o.init(r);
assertEquals(("{'key':'test-row','a':null,'b':['a']," + "'c':{'d':null,'f':'g','h':null},'d':'no'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
kvs.clear();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes(":a::")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("no")));
r = new Result(kvs);
o.init(r);
assertEquals(("{'key':'test-row','a':null,'b':['','a','','']," + "'c':null,'d':'no'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
// This is intentionally duplicated because of HIVE-3179
assertEquals(("{'key':'test-row','a':null,'b':['','a','','']," + "'c':null,'d':'no'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
kvs.clear();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("")));
r = new Result(kvs);
o.init(r);
assertEquals("{'key':'test-row','a':123,'b':[],'c':{},'d':''}".replace("'", "\""), SerDeUtils.getJSONString(o, oi));
}
use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector in project hive by apache.
the class TestLazyAccumuloRow method testDeserializationOfBinaryEncoding.
@Test
public void testDeserializationOfBinaryEncoding() throws Exception {
List<String> columns = Arrays.asList("row", "given_name", "surname", "age", "weight", "height");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo);
LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
Properties props = new Properties();
props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid#s,personal:given_name#s,personal:surname#s,personal:age,personal:weight,personal:height");
props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
props.setProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName());
AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
rowIdFactory.init(params, props);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream out = new DataOutputStream(baos);
LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
hiveRow.add("personal", "given_name", "Bob".getBytes());
hiveRow.add("personal", "surname", "Stevens".getBytes());
out.writeInt(30);
hiveRow.add("personal", "age", baos.toByteArray());
baos.reset();
out.writeInt(200);
hiveRow.add("personal", "weight", baos.toByteArray());
baos.reset();
out.writeInt(72);
hiveRow.add("personal", "height", baos.toByteArray());
ColumnMapper columnMapper = params.getColumnMapper();
lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
Object o = lazyRow.getField(0);
Assert.assertNotNull(o);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("1", ((LazyString) o).toString());
o = lazyRow.getField(1);
Assert.assertNotNull(o);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("Bob", ((LazyString) o).toString());
o = lazyRow.getField(2);
Assert.assertNotNull(o);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("Stevens", ((LazyString) o).toString());
o = lazyRow.getField(3);
Assert.assertNotNull(o);
Assert.assertEquals(LazyDioInteger.class, o.getClass());
Assert.assertEquals("30", ((LazyDioInteger) o).toString());
o = lazyRow.getField(4);
Assert.assertNotNull(o);
Assert.assertEquals(LazyDioInteger.class, o.getClass());
Assert.assertEquals("200", ((LazyDioInteger) o).toString());
o = lazyRow.getField(5);
Assert.assertNotNull(o);
Assert.assertEquals(LazyDioInteger.class, o.getClass());
Assert.assertEquals("72", ((LazyDioInteger) o).toString());
}
use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector in project hive by apache.
the class TestLazyAccumuloRow method testExpectedDeserializationOfColumns.
@Test
public void testExpectedDeserializationOfColumns() throws Exception {
List<String> columns = Arrays.asList("row", "given_name", "surname", "age", "weight", "height");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo);
LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
Properties props = new Properties();
props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,personal:given_name,personal:surname,personal:age,personal:weight,personal:height");
props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
rowIdFactory.init(params, props);
LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
hiveRow.add("personal", "given_name", "Bob".getBytes());
hiveRow.add("personal", "surname", "Stevens".getBytes());
hiveRow.add("personal", "age", "30".getBytes());
hiveRow.add("personal", "weight", "200".getBytes());
hiveRow.add("personal", "height", "72".getBytes());
ColumnMapper columnMapper = params.getColumnMapper();
lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
Object o = lazyRow.getField(0);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("1", ((LazyString) o).toString());
o = lazyRow.getField(1);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("Bob", ((LazyString) o).toString());
o = lazyRow.getField(2);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("Stevens", ((LazyString) o).toString());
o = lazyRow.getField(3);
Assert.assertEquals(LazyInteger.class, o.getClass());
Assert.assertEquals("30", ((LazyInteger) o).toString());
o = lazyRow.getField(4);
Assert.assertEquals(LazyInteger.class, o.getClass());
Assert.assertEquals("200", ((LazyInteger) o).toString());
o = lazyRow.getField(5);
Assert.assertEquals(LazyInteger.class, o.getClass());
Assert.assertEquals("72", ((LazyInteger) o).toString());
}
use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector in project hive by apache.
the class AccumuloSerDe method initialize.
public void initialize(Configuration conf, Properties properties) throws SerDeException {
accumuloSerDeParameters = new AccumuloSerDeParameters(conf, properties, getClass().getName());
final LazySerDeParameters serDeParams = accumuloSerDeParameters.getSerDeParameters();
final List<ColumnMapping> mappings = accumuloSerDeParameters.getColumnMappings();
final List<TypeInfo> columnTypes = accumuloSerDeParameters.getHiveColumnTypes();
final AccumuloRowIdFactory factory = accumuloSerDeParameters.getRowIdFactory();
ArrayList<ObjectInspector> columnObjectInspectors = getColumnObjectInspectors(columnTypes, serDeParams, mappings, factory);
cachedObjectInspector = LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(serDeParams.getColumnNames(), columnObjectInspectors, serDeParams.getSeparators()[0], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
cachedRow = new LazyAccumuloRow((LazySimpleStructObjectInspector) cachedObjectInspector);
serializer = new AccumuloRowSerializer(accumuloSerDeParameters.getRowIdOffset(), accumuloSerDeParameters.getSerDeParameters(), accumuloSerDeParameters.getColumnMappings(), accumuloSerDeParameters.getTableVisibilityLabel(), accumuloSerDeParameters.getRowIdFactory());
if (log.isInfoEnabled()) {
log.info("Initialized with {} type: {}", accumuloSerDeParameters.getSerDeParameters().getColumnNames(), accumuloSerDeParameters.getSerDeParameters().getColumnTypes());
}
}
use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector in project hive by apache.
the class TestAccumuloRowSerializer method testVisibilityLabel.
@Test
public void testVisibilityLabel() throws IOException, SerDeException {
List<String> columns = Arrays.asList("row", "cq1", "cq2", "cq3");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
List<String> typeNames = new ArrayList<String>(types.size());
for (TypeInfo type : types) {
typeNames.add(type.getTypeName());
}
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq1#b,cf:cq2#b,cf:cq3");
tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
LazySimpleStructObjectInspector oi = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility("foo"), accumuloSerDeParams.getRowIdFactory());
// Create the LazyStruct from the LazyStruct...Inspector
LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(oi);
ByteArrayRef byteRef = new ByteArrayRef();
byteRef.setData(new byte[] { 'r', 'o', 'w', '1', ' ', '1', '0', ' ', '2', '0', ' ', 'v', 'a', 'l', 'u', 'e' });
obj.init(byteRef, 0, byteRef.getData().length);
Mutation m = (Mutation) serializer.serialize(obj, oi);
Assert.assertArrayEquals("row1".getBytes(), m.getRow());
List<ColumnUpdate> updates = m.getUpdates();
Assert.assertEquals(3, updates.size());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream out = new DataOutputStream(baos);
ColumnUpdate update = updates.get(0);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq1", new String(update.getColumnQualifier()));
Assert.assertEquals("foo", new String(update.getColumnVisibility()));
out.writeInt(10);
Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
update = updates.get(1);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq2", new String(update.getColumnQualifier()));
Assert.assertEquals("foo", new String(update.getColumnVisibility()));
baos.reset();
out.writeInt(20);
Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
update = updates.get(2);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq3", new String(update.getColumnQualifier()));
Assert.assertEquals("foo", new String(update.getColumnVisibility()));
Assert.assertEquals("value", new String(update.getValue()));
}
Aggregations