use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project hive by apache.
the class TestAvroDeserializer method canDeserializeBytes.
@Test
public void canDeserializeBytes() throws SerDeException, IOException {
Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.BYTES_SCHEMA);
GenericData.Record record = new GenericData.Record(s);
byte[] bytes = "ANANCIENTBLUEBOX".getBytes();
ByteBuffer bb = ByteBuffer.wrap(bytes);
bb.rewind();
record.put("bytesField", bb);
assertTrue(GENERIC_DATA.validate(s, record));
AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record);
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
AvroDeserializer de = new AvroDeserializer();
ArrayList<Object> row = (ArrayList<Object>) de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
assertEquals(1, row.size());
Object byteObject = row.get(0);
assertTrue(byteObject instanceof byte[]);
byte[] outBytes = (byte[]) byteObject;
// Verify the raw object that's been created
for (int i = 0; i < bytes.length; i++) {
assertEquals(bytes[i], outBytes[i]);
}
// Now go the correct way, through objectinspectors
StandardStructObjectInspector oi = (StandardStructObjectInspector) aoig.getObjectInspector();
List<Object> fieldsDataAsList = oi.getStructFieldsDataAsList(row);
assertEquals(1, fieldsDataAsList.size());
StructField fieldRef = oi.getStructFieldRef("bytesField");
outBytes = (byte[]) oi.getStructFieldData(row, fieldRef);
for (int i = 0; i < outBytes.length; i++) {
assertEquals(bytes[i], outBytes[i]);
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project hive by apache.
the class TestAvroDeserializer method canDeserializeArrays.
@Test
public void canDeserializeArrays() throws SerDeException, IOException {
Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.ARRAY_WITH_PRIMITIVE_ELEMENT_TYPE);
GenericData.Record record = new GenericData.Record(s);
List<String> list = new ArrayList<String>();
list.add("Eccleston");
list.add("Tennant");
list.add("Smith");
record.put("anArray", list);
assertTrue(GENERIC_DATA.validate(s, record));
System.out.println("Array-backed record = " + record);
AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record);
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
AvroDeserializer de = new AvroDeserializer();
ArrayList<Object> row = (ArrayList<Object>) de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
assertEquals(1, row.size());
Object theArrayObject = row.get(0);
assertTrue(theArrayObject instanceof List);
List theList = (List) theArrayObject;
// Verify the raw object that's been created
assertEquals("Eccleston", theList.get(0));
assertEquals("Tennant", theList.get(1));
assertEquals("Smith", theList.get(2));
// Now go the correct way, through objectinspectors
StandardStructObjectInspector oi = (StandardStructObjectInspector) aoig.getObjectInspector();
StructField fieldRefToArray = oi.getStructFieldRef("anArray");
Object anArrayData = oi.getStructFieldData(row, fieldRefToArray);
StandardListObjectInspector anArrayOI = (StandardListObjectInspector) fieldRefToArray.getFieldObjectInspector();
assertEquals(3, anArrayOI.getListLength(anArrayData));
JavaStringObjectInspector elementOI = (JavaStringObjectInspector) anArrayOI.getListElementObjectInspector();
Object firstElement = anArrayOI.getListElement(anArrayData, 0);
assertEquals("Eccleston", elementOI.getPrimitiveJavaObject(firstElement));
assertTrue(firstElement instanceof String);
Object secondElement = anArrayOI.getListElement(anArrayData, 1);
assertEquals("Tennant", elementOI.getPrimitiveJavaObject(secondElement));
assertTrue(secondElement instanceof String);
Object thirdElement = anArrayOI.getListElement(anArrayData, 2);
assertEquals("Smith", elementOI.getPrimitiveJavaObject(thirdElement));
assertTrue(thirdElement instanceof String);
}
use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project hive by apache.
the class VectorMapOperator method internalSetChildren.
/*
* Create information for vector map operator.
* The member oneRootOperator has been set.
*/
private void internalSetChildren(Configuration hconf) throws Exception {
// The setupPartitionContextVars uses the prior read type to flush the prior deserializerBatch,
// so set it here to none.
currentReadType = VectorMapOperatorReadType.NONE;
batchContext = conf.getVectorizedRowBatchCtx();
/*
* Use a different batch for vectorized Input File Format readers so they can do their work
* overlapped with work of the row collection that vector/row deserialization does. This allows
* the partitions to mix modes (e.g. for us to flush the previously batched rows on file change).
*/
vectorizedInputFileFormatBatch = batchContext.createVectorizedRowBatch();
conf.setVectorizedRowBatch(vectorizedInputFileFormatBatch);
/*
* This batch is used by vector/row deserializer readers.
*/
deserializerBatch = batchContext.createVectorizedRowBatch();
batchCounter = 0;
dataColumnCount = batchContext.getDataColumnCount();
partitionColumnCount = batchContext.getPartitionColumnCount();
partitionValues = new Object[partitionColumnCount];
dataColumnNums = batchContext.getDataColumnNums();
Preconditions.checkState(dataColumnNums != null);
// Form a truncated boolean include array for our vector/row deserializers.
determineDataColumnsToIncludeTruncated();
/*
* Create table related objects
*/
final String[] rowColumnNames = batchContext.getRowColumnNames();
final TypeInfo[] rowColumnTypeInfos = batchContext.getRowColumnTypeInfos();
tableStructTypeInfo = TypeInfoFactory.getStructTypeInfo(Arrays.asList(rowColumnNames), Arrays.asList(rowColumnTypeInfos));
tableStandardStructObjectInspector = (StandardStructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(tableStructTypeInfo);
tableRowTypeInfos = batchContext.getRowColumnTypeInfos();
/*
* NOTE: We do not alter the projectedColumns / projectionSize of the batches to just be
* the included columns (+ partition columns).
*
* For now, we need to model the object inspector rows because there are still several
* vectorized operators that use them.
*
* We need to continue to model the Object[] as having null objects for not included columns
* until the following has been fixed:
* o When we have to output a STRUCT for AVG we switch to row GroupBy operators.
* o Some variations of VectorMapOperator, VectorReduceSinkOperator, VectorFileSinkOperator
* use the row super class to process rows.
*/
/*
* The Vectorizer class enforces that there is only one TableScanOperator, so
* we don't need the more complicated multiple root operator mapping that MapOperator has.
*/
fileToPartitionContextMap = new HashMap<String, VectorPartitionContext>();
// Temporary map so we only create one partition context entry.
HashMap<PartitionDesc, VectorPartitionContext> partitionContextMap = new HashMap<PartitionDesc, VectorPartitionContext>();
for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
Path path = entry.getKey();
PartitionDesc partDesc = conf.getPathToPartitionInfo().get(path);
VectorPartitionContext vectorPartitionContext;
if (!partitionContextMap.containsKey(partDesc)) {
vectorPartitionContext = createAndInitPartitionContext(partDesc, hconf);
partitionContextMap.put(partDesc, vectorPartitionContext);
} else {
vectorPartitionContext = partitionContextMap.get(partDesc);
}
fileToPartitionContextMap.put(path.toString(), vectorPartitionContext);
}
// Create list of one.
List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
children.add(oneRootOperator);
setChildOperators(children);
}
use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project hive by apache.
the class Utilities method constructVectorizedReduceRowOI.
/**
* Create row key and value object inspectors for reduce vectorization.
* The row object inspector used by ReduceWork needs to be a **standard**
* struct object inspector, not just any struct object inspector.
* @param keyInspector
* @param valueInspector
* @return OI
* @throws HiveException
*/
public static StandardStructObjectInspector constructVectorizedReduceRowOI(StructObjectInspector keyInspector, StructObjectInspector valueInspector) throws HiveException {
ArrayList<String> colNames = new ArrayList<String>();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
List<? extends StructField> fields = keyInspector.getAllStructFieldRefs();
for (StructField field : fields) {
colNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName());
ois.add(field.getFieldObjectInspector());
}
fields = valueInspector.getAllStructFieldRefs();
for (StructField field : fields) {
colNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName());
ois.add(field.getFieldObjectInspector());
}
StandardStructObjectInspector rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, ois);
return rowObjectInspector;
}
use of org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector in project haivvreo by jghoman.
the class TestAvroObjectInspectorGenerator method primitiveTypesWorkCorrectly.
@Test
public void primitiveTypesWorkCorrectly() throws SerDeException {
final String bunchOfPrimitives = "{\n" + " \"namespace\": \"testing\",\n" + " \"name\": \"PrimitiveTypes\",\n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + " {\n" + " \"name\":\"aString\",\n" + " \"type\":\"string\"\n" + " },\n" + " {\n" + " \"name\":\"anInt\",\n" + " \"type\":\"int\"\n" + " },\n" + " {\n" + " \"name\":\"aBoolean\",\n" + " \"type\":\"boolean\"\n" + " },\n" + " {\n" + " \"name\":\"aLong\",\n" + " \"type\":\"long\"\n" + " },\n" + " {\n" + " \"name\":\"aFloat\",\n" + " \"type\":\"float\"\n" + " },\n" + " {\n" + " \"name\":\"aDouble\",\n" + " \"type\":\"double\"\n" + " },\n" + " {\n" + " \"name\":\"aNull\",\n" + " \"type\":\"null\"\n" + " }\n" + " ]\n" + "}";
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(Schema.parse(bunchOfPrimitives));
String[] expectedColumnNames = { "aString", "anInt", "aBoolean", "aLong", "aFloat", "aDouble", "aNull" };
verifyColumnNames(expectedColumnNames, aoig.getColumnNames());
TypeInfo[] expectedColumnTypes = { STRING, INT, BOOLEAN, LONG, FLOAT, DOUBLE, VOID };
verifyColumnTypes(expectedColumnTypes, aoig.getColumnTypes());
// Rip apart the object inspector, making sure we got what we expect.
final ObjectInspector oi = aoig.getObjectInspector();
assertTrue(oi instanceof StandardStructObjectInspector);
final StandardStructObjectInspector ssoi = (StandardStructObjectInspector) oi;
List<? extends StructField> structFields = ssoi.getAllStructFieldRefs();
assertEquals(expectedColumnNames.length, structFields.size());
for (int i = 0; i < expectedColumnNames.length; i++) {
assertEquals("Column names don't match", expectedColumnNames[i].toLowerCase(), structFields.get(i).getFieldName());
assertEquals("Column types don't match", expectedColumnTypes[i].getTypeName(), structFields.get(i).getFieldObjectInspector().getTypeName());
}
}
Aggregations