use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector in project hive by apache.
the class TestHiveAccumuloTableOutputFormat method testWriteToMockInstance.
@Test
public void testWriteToMockInstance() throws Exception {
Instance inst = new MockInstance(test.getMethodName());
Connector conn = inst.getConnector("root", new PasswordToken(""));
HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
String table = test.getMethodName();
conn.tableOperations().create(table);
JobConf conf = new JobConf();
conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
conf.set(AccumuloConnectionParameters.USER_NAME, "root");
conf.set(AccumuloConnectionParameters.USER_PASS, "");
conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
FileSystem local = FileSystem.getLocal(conf);
outputFormat.checkOutputSpecs(local, conf);
RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
List<String> names = Arrays.asList("row", "col1", "col2");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq1,cf:cq2");
tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, accumuloSerDeParams.getRowIdFactory());
TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(Arrays.asList("row", "cq1", "cq2"), Arrays.asList(stringTypeInfo, stringTypeInfo, stringTypeInfo), serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
ByteArrayRef bytes = new ByteArrayRef();
bytes.setData("row value1 value2".getBytes());
struct.init(bytes, 0, bytes.getData().length);
// Serialize the struct into a mutation
Mutation m = serializer.serialize(struct, structOI);
// Write the mutation
recordWriter.write(new Text(table), m);
// Close the writer
recordWriter.close(null);
Iterator<Entry<Key, Value>> iter = conn.createScanner(table, new Authorizations()).iterator();
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
Entry<Key, Value> entry = iter.next();
Key k = entry.getKey();
Value v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq1", k.getColumnQualifier().toString());
Assert.assertEquals("", k.getColumnVisibility().toString());
Assert.assertEquals("value1", new String(v.get()));
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
entry = iter.next();
k = entry.getKey();
v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq2", k.getColumnQualifier().toString());
Assert.assertEquals("", k.getColumnVisibility().toString());
Assert.assertEquals("value2", new String(v.get()));
Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector in project hive by apache.
the class TestHiveAccumuloTableOutputFormat method testWriteToMockInstanceWithVisibility.
@Test
public void testWriteToMockInstanceWithVisibility() throws Exception {
Instance inst = new MockInstance(test.getMethodName());
Connector conn = inst.getConnector("root", new PasswordToken(""));
Authorizations auths = new Authorizations("foo");
conn.securityOperations().changeUserAuthorizations("root", auths);
HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
String table = test.getMethodName();
conn.tableOperations().create(table);
JobConf conf = new JobConf();
conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
conf.set(AccumuloConnectionParameters.USER_NAME, "root");
conf.set(AccumuloConnectionParameters.USER_PASS, "");
conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
FileSystem local = FileSystem.getLocal(conf);
outputFormat.checkOutputSpecs(local, conf);
RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
List<String> names = Arrays.asList("row", "col1", "col2");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq1,cf:cq2");
tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility("foo"), accumuloSerDeParams.getRowIdFactory());
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(Arrays.asList("row", "cq1", "cq2"), Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo), serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
ByteArrayRef bytes = new ByteArrayRef();
bytes.setData("row value1 value2".getBytes());
struct.init(bytes, 0, bytes.getData().length);
// Serialize the struct into a mutation
Mutation m = serializer.serialize(struct, structOI);
// Write the mutation
recordWriter.write(new Text(table), m);
// Close the writer
recordWriter.close(null);
Iterator<Entry<Key, Value>> iter = conn.createScanner(table, auths).iterator();
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
Entry<Key, Value> entry = iter.next();
Key k = entry.getKey();
Value v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq1", k.getColumnQualifier().toString());
Assert.assertEquals("foo", k.getColumnVisibility().toString());
Assert.assertEquals("value1", new String(v.get()));
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
entry = iter.next();
k = entry.getKey();
v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq2", k.getColumnQualifier().toString());
Assert.assertEquals("foo", k.getColumnVisibility().toString());
Assert.assertEquals("value2", new String(v.get()));
Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector in project hive by apache.
the class TestHiveAccumuloTableOutputFormat method testBinarySerializationOnStringFallsBackToUtf8.
@Test
public void testBinarySerializationOnStringFallsBackToUtf8() throws Exception {
Instance inst = new MockInstance(test.getMethodName());
Connector conn = inst.getConnector("root", new PasswordToken(""));
HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
String table = test.getMethodName();
conn.tableOperations().create(table);
JobConf conf = new JobConf();
conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
conf.set(AccumuloConnectionParameters.USER_NAME, "root");
conf.set(AccumuloConnectionParameters.USER_PASS, "");
conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
FileSystem local = FileSystem.getLocal(conf);
outputFormat.checkOutputSpecs(local, conf);
RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
List<String> names = Arrays.asList("row", "col1", "col2");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq1,cf:cq2");
tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
tableProperties.setProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName());
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, accumuloSerDeParams.getRowIdFactory());
TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(Arrays.asList("row", "cq1", "cq2"), Arrays.asList(stringTypeInfo, stringTypeInfo, stringTypeInfo), serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
ByteArrayRef bytes = new ByteArrayRef();
bytes.setData("row value1 value2".getBytes());
struct.init(bytes, 0, bytes.getData().length);
// Serialize the struct into a mutation
Mutation m = serializer.serialize(struct, structOI);
// Write the mutation
recordWriter.write(new Text(table), m);
// Close the writer
recordWriter.close(null);
Iterator<Entry<Key, Value>> iter = conn.createScanner(table, new Authorizations()).iterator();
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
Entry<Key, Value> entry = iter.next();
Key k = entry.getKey();
Value v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq1", k.getColumnQualifier().toString());
Assert.assertEquals("", k.getColumnVisibility().toString());
Assert.assertEquals("value1", new String(v.get()));
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
entry = iter.next();
k = entry.getKey();
v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq2", k.getColumnQualifier().toString());
Assert.assertEquals("", k.getColumnVisibility().toString());
Assert.assertEquals("value2", new String(v.get()));
Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector in project hive by apache.
the class TestLazyAccumuloRow method testNullInit.
@Test
public void testNullInit() throws SerDeException {
List<String> columns = Arrays.asList("row", "1", "2", "3");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME), TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME), TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME), TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME));
LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
Properties props = new Properties();
props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq1,cf:cq2,cf:cq3");
props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
rowIdFactory.init(params, props);
ColumnMapper columnMapper = params.getColumnMapper();
LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
hiveRow.add("cf", "cq1", "foo".getBytes());
hiveRow.add("cf", "cq3", "bar".getBytes());
lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
// Noticed that we also suffer from the same issue as HIVE-3179
// Only want to call a field init'ed when it's non-NULL
// Check it twice, make sure we get null both times
Assert.assertEquals("{'row':'1','1':'foo','2':null,'3':'bar'}".replace('\'', '"'), SerDeUtils.getJSONString(lazyRow, objectInspector));
Assert.assertEquals("{'row':'1','1':'foo','2':null,'3':'bar'}".replace('\'', '"'), SerDeUtils.getJSONString(lazyRow, objectInspector));
}
use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector in project hive by apache.
the class TestLazyHBaseObject method testLazyHBaseRow3.
/**
* Test the LazyHBaseRow class with a one-to-one/onto mapping between Hive columns and
* HBase column family/column qualifier pairs. The column types are primitive and fields
* are stored in binary format in HBase.
* @throws SerDeException
*/
public void testLazyHBaseRow3() throws SerDeException {
List<TypeInfo> fieldTypeInfos = TypeInfoUtils.getTypeInfosFromTypeString("string,int,tinyint,smallint,bigint,float,double,string,boolean");
List<String> fieldNames = Arrays.asList(new String[] { "key", "c_int", "c_byte", "c_short", "c_long", "c_float", "c_double", "c_string", "c_bool" });
Text nullSequence = new Text("\\N");
String hbaseColumnsMapping = ":key#str,cf-int:cq-int#bin,cf-byte:cq-byte#bin," + "cf-short:cq-short#bin,cf-long:cq-long#bin,cf-float:cq-float#bin,cf-double:cq-double#bin," + "cf-string:cq-string#str,cf-bool:cq-bool#bin";
ColumnMappings columnMappings = null;
try {
columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping);
} catch (SerDeException e) {
fail(e.toString());
}
ColumnMapping[] columnsMapping = columnMappings.getColumnsMapping();
for (int i = 0; i < columnsMapping.length; i++) {
ColumnMapping colMap = columnsMapping[i];
if (i == 0 || i == 7) {
colMap.binaryStorage.add(false);
} else {
colMap.binaryStorage.add(true);
}
}
ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, new byte[] { ' ', ':', '=' }, nullSequence, false, false, (byte) 0);
LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings);
byte[] rowKey = "row-key".getBytes();
List<KeyValue> kvs = new ArrayList<KeyValue>();
byte[] value;
for (int i = 1; i < columnsMapping.length; i++) {
switch(i) {
case 1:
value = Bytes.toBytes(1);
break;
case 2:
value = new byte[] { (byte) 1 };
break;
case 3:
value = Bytes.toBytes((short) 1);
break;
case 4:
value = Bytes.toBytes((long) 1);
break;
case 5:
value = Bytes.toBytes((float) 1.0F);
break;
case 6:
value = Bytes.toBytes((double) 1.0);
break;
case 7:
value = "Hadoop, Hive, with HBase storage handler.".getBytes();
break;
case 8:
value = Bytes.toBytes(true);
break;
default:
throw new RuntimeException("Not expected: " + i);
}
ColumnMapping colMap = columnsMapping[i];
kvs.add(new KeyValue(rowKey, colMap.familyNameBytes, colMap.qualifierNameBytes, value));
}
Collections.sort(kvs, KeyValue.COMPARATOR);
Result result = new Result(kvs);
o.init(result);
List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = ((StructObjectInspector) oi).getStructFieldData(o, fieldRefs.get(i));
assert (fieldData != null);
assert (fieldData instanceof LazyPrimitive<?, ?>);
Writable writable = ((LazyPrimitive<?, ?>) fieldData).getWritableObject();
switch(i) {
case 0:
Text text = new Text("row-key");
assertEquals(text, writable);
break;
case 1:
IntWritable iw = new IntWritable(1);
assertEquals(iw, writable);
break;
case 2:
ByteWritable bw = new ByteWritable((byte) 1);
assertEquals(bw, writable);
break;
case 3:
ShortWritable sw = new ShortWritable((short) 1);
assertEquals(sw, writable);
break;
case 4:
LongWritable lw = new LongWritable(1);
assertEquals(lw, writable);
break;
case 5:
FloatWritable fw = new FloatWritable(1.0F);
assertEquals(fw, writable);
break;
case 6:
DoubleWritable dw = new DoubleWritable(1.0);
assertEquals(dw, writable);
break;
case 7:
Text t = new Text("Hadoop, Hive, with HBase storage handler.");
assertEquals(t, writable);
break;
case 8:
BooleanWritable boolWritable = new BooleanWritable(true);
assertEquals(boolWritable, writable);
break;
default:
fail("Error: Unanticipated value in deserializing fields for HBaseSerDe.");
break;
}
}
}
Aggregations