use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class TestHiveAccumuloTableOutputFormat method testWriteToMockInstanceWithVisibility.
@Test
public void testWriteToMockInstanceWithVisibility() throws Exception {
Instance inst = new MockInstance(test.getMethodName());
Connector conn = inst.getConnector("root", new PasswordToken(""));
Authorizations auths = new Authorizations("foo");
conn.securityOperations().changeUserAuthorizations("root", auths);
HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
String table = test.getMethodName();
conn.tableOperations().create(table);
JobConf conf = new JobConf();
conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
conf.set(AccumuloConnectionParameters.USER_NAME, "root");
conf.set(AccumuloConnectionParameters.USER_PASS, "");
conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
FileSystem local = FileSystem.getLocal(conf);
outputFormat.checkOutputSpecs(local, conf);
RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
List<String> names = Arrays.asList("row", "col1", "col2");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq1,cf:cq2");
tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility("foo"), accumuloSerDeParams.getRowIdFactory());
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(Arrays.asList("row", "cq1", "cq2"), Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo), serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
ByteArrayRef bytes = new ByteArrayRef();
bytes.setData("row value1 value2".getBytes());
struct.init(bytes, 0, bytes.getData().length);
// Serialize the struct into a mutation
Mutation m = serializer.serialize(struct, structOI);
// Write the mutation
recordWriter.write(new Text(table), m);
// Close the writer
recordWriter.close(null);
Iterator<Entry<Key, Value>> iter = conn.createScanner(table, auths).iterator();
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
Entry<Key, Value> entry = iter.next();
Key k = entry.getKey();
Value v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq1", k.getColumnQualifier().toString());
Assert.assertEquals("foo", k.getColumnVisibility().toString());
Assert.assertEquals("value1", new String(v.get()));
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
entry = iter.next();
k = entry.getKey();
v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq2", k.getColumnQualifier().toString());
Assert.assertEquals("foo", k.getColumnVisibility().toString());
Assert.assertEquals("value2", new String(v.get()));
Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class TestHiveAccumuloTableOutputFormat method testBinarySerializationOnStringFallsBackToUtf8.
@Test
public void testBinarySerializationOnStringFallsBackToUtf8() throws Exception {
Instance inst = new MockInstance(test.getMethodName());
Connector conn = inst.getConnector("root", new PasswordToken(""));
HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
String table = test.getMethodName();
conn.tableOperations().create(table);
JobConf conf = new JobConf();
conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
conf.set(AccumuloConnectionParameters.USER_NAME, "root");
conf.set(AccumuloConnectionParameters.USER_PASS, "");
conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
FileSystem local = FileSystem.getLocal(conf);
outputFormat.checkOutputSpecs(local, conf);
RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
List<String> names = Arrays.asList("row", "col1", "col2");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq1,cf:cq2");
tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
tableProperties.setProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName());
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, accumuloSerDeParams.getRowIdFactory());
TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(Arrays.asList("row", "cq1", "cq2"), Arrays.asList(stringTypeInfo, stringTypeInfo, stringTypeInfo), serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
ByteArrayRef bytes = new ByteArrayRef();
bytes.setData("row value1 value2".getBytes());
struct.init(bytes, 0, bytes.getData().length);
// Serialize the struct into a mutation
Mutation m = serializer.serialize(struct, structOI);
// Write the mutation
recordWriter.write(new Text(table), m);
// Close the writer
recordWriter.close(null);
Iterator<Entry<Key, Value>> iter = conn.createScanner(table, new Authorizations()).iterator();
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
Entry<Key, Value> entry = iter.next();
Key k = entry.getKey();
Value v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq1", k.getColumnQualifier().toString());
Assert.assertEquals("", k.getColumnVisibility().toString());
Assert.assertEquals("value1", new String(v.get()));
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
entry = iter.next();
k = entry.getKey();
v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq2", k.getColumnQualifier().toString());
Assert.assertEquals("", k.getColumnVisibility().toString());
Assert.assertEquals("value2", new String(v.get()));
Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class TestHiveAccumuloTableOutputFormat method testWriteToMockInstance.
@Test
public void testWriteToMockInstance() throws Exception {
Instance inst = new MockInstance(test.getMethodName());
Connector conn = inst.getConnector("root", new PasswordToken(""));
HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
String table = test.getMethodName();
conn.tableOperations().create(table);
JobConf conf = new JobConf();
conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
conf.set(AccumuloConnectionParameters.USER_NAME, "root");
conf.set(AccumuloConnectionParameters.USER_PASS, "");
conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
FileSystem local = FileSystem.getLocal(conf);
outputFormat.checkOutputSpecs(local, conf);
RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
List<String> names = Arrays.asList("row", "col1", "col2");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq1,cf:cq2");
tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, accumuloSerDeParams.getRowIdFactory());
TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(Arrays.asList("row", "cq1", "cq2"), Arrays.asList(stringTypeInfo, stringTypeInfo, stringTypeInfo), serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
ByteArrayRef bytes = new ByteArrayRef();
bytes.setData("row value1 value2".getBytes());
struct.init(bytes, 0, bytes.getData().length);
// Serialize the struct into a mutation
Mutation m = serializer.serialize(struct, structOI);
// Write the mutation
recordWriter.write(new Text(table), m);
// Close the writer
recordWriter.close(null);
Iterator<Entry<Key, Value>> iter = conn.createScanner(table, new Authorizations()).iterator();
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
Entry<Key, Value> entry = iter.next();
Key k = entry.getKey();
Value v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq1", k.getColumnQualifier().toString());
Assert.assertEquals("", k.getColumnVisibility().toString());
Assert.assertEquals("value1", new String(v.get()));
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
entry = iter.next();
k = entry.getKey();
v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq2", k.getColumnQualifier().toString());
Assert.assertEquals("", k.getColumnVisibility().toString());
Assert.assertEquals("value2", new String(v.get()));
Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class MultiDelimitSerDe method initialize.
@Override
public void initialize(Configuration configuration, Properties tableProperties, Properties partitionProperties) throws SerDeException {
super.initialize(configuration, tableProperties, partitionProperties);
serdeParams = new LazySerDeParameters(configuration, tableProperties, getClass().getName());
fieldDelimited = properties.getProperty(serdeConstants.FIELD_DELIM);
if (fieldDelimited == null || fieldDelimited.isEmpty()) {
throw new SerDeException("This table does not have serde property \"field.delim\"!");
}
// get the collection separator and map key separator
collSep = LazyUtils.getByte(properties.getProperty(serdeConstants.COLLECTION_DELIM), DEFAULT_SEPARATORS[1]);
keySep = LazyUtils.getByte(properties.getProperty(serdeConstants.MAPKEY_DELIM), DEFAULT_SEPARATORS[2]);
serdeParams.setSeparator(1, collSep);
serdeParams.setSeparator(2, keySep);
// Create the ObjectInspectors for the fields
cachedObjectInspector = LazyFactory.createLazyStructInspector(serdeParams.getColumnNames(), serdeParams.getColumnTypes(), serdeParams.getSeparators(), serdeParams.getNullSequence(), serdeParams.isLastColumnTakesRest(), serdeParams.isEscaped(), serdeParams.getEscapeChar());
cachedLazyStruct = (LazyStruct) LazyFactory.createLazyObject(cachedObjectInspector);
assert serdeParams.getColumnNames().size() == serdeParams.getColumnTypes().size();
numColumns = serdeParams.getColumnNames().size();
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class TestLazySimpleSerDe method testSerDeParameters.
/**
* Tests the deprecated usage of SerDeParameters.
*/
@Test
@SuppressWarnings("deprecation")
public void testSerDeParameters() throws SerDeException, IOException {
// Setup
LazySimpleSerDe serDe = new LazySimpleSerDe();
Configuration conf = new Configuration();
MyTestClass row = new MyTestClass();
ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
row.randomFill(new Random(1234), extraTypeInfo);
StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
Properties schema = new Properties();
schema.setProperty(serdeConstants.LIST_COLUMNS, fieldNames);
schema.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes);
serDe.initialize(conf, schema, null);
SerDeParameters serdeParams = LazySimpleSerDe.initSerdeParams(conf, schema, "testSerdeName");
// Test
LazyStruct data = (LazyStruct) serializeAndDeserialize(row, rowOI, serDe, serdeParams);
assertEquals((boolean) row.myBool, ((LazyBoolean) data.getField(0)).getWritableObject().get());
assertEquals((int) row.myInt, ((LazyInteger) data.getField(3)).getWritableObject().get());
}
Aggregations