use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestInputOutputFormat method testACIDReaderFooterSerializeWithDeltas.
@Test
public void testACIDReaderFooterSerializeWithDeltas() throws Exception {
MockFileSystem fs = new MockFileSystem(conf);
MockPath mockPath = new MockPath(fs, "mock:///mocktable8");
conf.set("hive.transactional.table.scan", "true");
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
conf.set("hive.orc.splits.include.file.footer", "true");
conf.set("mapred.input.dir", mockPath.toString());
conf.set("fs.defaultFS", "mock:///");
conf.set("fs.mock.impl", MockFileSystem.class.getName());
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
writer = OrcFile.createWriter(new Path(new Path(mockPath + "/delta_001_002") + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
OrcInputFormat orcInputFormat = new OrcInputFormat();
InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
assertEquals(1, splits.length);
int readOpsBefore = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsBefore = statistics.getReadOps();
}
}
assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
for (InputSplit split : splits) {
assertTrue("OrcSplit is expected", split instanceof OrcSplit);
// ETL strategies will have start=3 (start of first stripe)
assertTrue(split.toString().contains("start=3"));
assertTrue(split.toString().contains("hasFooter=true"));
assertTrue(split.toString().contains("hasBase=true"));
// NOTE: don't be surprised if deltas value is different
// in older release deltas=2 as min and max transaction are added separately to delta list.
// in newer release since both of them are put together deltas=1
assertTrue(split.toString().contains("deltas=1"));
if (split instanceof OrcSplit) {
assertTrue("Footer serialize test for ACID reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter());
}
orcInputFormat.getRecordReader(split, conf, Reporter.NULL);
}
int readOpsDelta = -1;
for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
if (statistics.getScheme().equalsIgnoreCase("mock")) {
readOpsDelta = statistics.getReadOps() - readOpsBefore;
}
}
// call-1: open to read data - split 1 => mock:/mocktable8/0_0
// call-2: open side file (flush length) of delta directory
// call-3: fs.exists() check for delta_xxx_xxx/bucket_00000 file
assertEquals(3, readOpsDelta);
// revert back to local fs
conf.set("fs.defaultFS", "file:///");
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class OperatorTestUtils method assertResults.
/**
* Given a select operator and a collectOperator feed the sourceData into the operator
* tree and assert that each row matches the expectedResult
* @param selectOp
* @param collectOp
* @param sourceData
* @param expected
* @throws HiveException
*/
public static void assertResults(Operator<SelectDesc> selectOp, CollectOperator collectOp, InspectableObject[] sourceData, InspectableObject[] expected) throws HiveException {
InspectableObject resultRef = new InspectableObject();
for (int i = 0; i < sourceData.length; i++) {
selectOp.process(sourceData[i].o, 0);
collectOp.retrieve(resultRef);
StructObjectInspector expectedOi = (StructObjectInspector) expected[i].oi;
List<? extends StructField> expectedFields = expectedOi.getAllStructFieldRefs();
StructObjectInspector destinationOi = (StructObjectInspector) resultRef.oi;
List<? extends StructField> destinationFields = destinationOi.getAllStructFieldRefs();
Assert.assertEquals("Source and destination have differing numbers of fields ", expectedFields.size(), destinationFields.size());
for (StructField field : expectedFields) {
StructField dest = expectedOi.getStructFieldRef(field.getFieldName());
Assert.assertNotNull("Cound not find column named " + field.getFieldName(), dest);
Assert.assertEquals(field.getFieldObjectInspector(), dest.getFieldObjectInspector());
Assert.assertEquals("comparing " + expectedOi.getStructFieldData(expected[i].o, field) + " " + field.getFieldObjectInspector().getClass().getSimpleName() + " to " + destinationOi.getStructFieldData(resultRef.o, dest) + " " + dest.getFieldObjectInspector().getClass().getSimpleName(), 0, ObjectInspectorUtils.compare(expectedOi.getStructFieldData(expected[i].o, field), field.getFieldObjectInspector(), destinationOi.getStructFieldData(resultRef.o, dest), dest.getFieldObjectInspector()));
}
}
selectOp.close(false);
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class ObjectInspectorConverters method getConvertedOI.
/**
* Utility function to convert from one object inspector type to another.
* The output object inspector type should have all fields as settableOI type.
* The above condition can be violated only if equalsCheck is true and inputOI is
* equal to outputOI.
* @param inputOI : input object inspector
* @param outputOI : output object inspector
* @param oiSettableProperties : The object inspector to isSettable mapping used to cache
* intermediate results.
* @param equalsCheck : Do we need to check if the inputOI and outputOI are the same?
* true : If they are the same, we return the object inspector directly.
* false : Do not perform an equality check on inputOI and outputOI
* @return : The output object inspector containing all settable fields. The return value
* can contain non-settable fields only if inputOI equals outputOI and equalsCheck is
* true.
*/
public static ObjectInspector getConvertedOI(ObjectInspector inputOI, ObjectInspector outputOI, Map<ObjectInspector, Boolean> oiSettableProperties, boolean equalsCheck) {
// 2. If the outputOI has all fields settable, return it
if ((equalsCheck && inputOI.equals(outputOI)) || ObjectInspectorUtils.hasAllFieldsSettable(outputOI, oiSettableProperties) == true) {
return outputOI;
}
// T is settable recursively i.e all the nested fields are also settable.
switch(outputOI.getCategory()) {
case PRIMITIVE:
// Create a writable object inspector for primitive type and return it.
PrimitiveObjectInspector primOutputOI = (PrimitiveObjectInspector) outputOI;
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primOutputOI.getTypeInfo());
case STRUCT:
StructObjectInspector structOutputOI = (StructObjectInspector) outputOI;
// create a standard settable struct object inspector.
List<? extends StructField> listFields = structOutputOI.getAllStructFieldRefs();
List<String> structFieldNames = new ArrayList<String>(listFields.size());
List<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>(listFields.size());
for (StructField listField : listFields) {
structFieldNames.add(listField.getFieldName());
// We need to make sure that the underlying fields are settable as well.
// Hence, the recursive call for each field.
// Note that equalsCheck is false while invoking getConvertedOI() because
// we need to bypass the initial inputOI.equals(outputOI) check.
structFieldObjectInspectors.add(getConvertedOI(listField.getFieldObjectInspector(), listField.getFieldObjectInspector(), oiSettableProperties, false));
}
return ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);
case LIST:
ListObjectInspector listOutputOI = (ListObjectInspector) outputOI;
// We need to make sure that the list element type is settable.
return ObjectInspectorFactory.getStandardListObjectInspector(getConvertedOI(listOutputOI.getListElementObjectInspector(), listOutputOI.getListElementObjectInspector(), oiSettableProperties, false));
case MAP:
MapObjectInspector mapOutputOI = (MapObjectInspector) outputOI;
// We need to make sure that the key type and the value types are settable.
return ObjectInspectorFactory.getStandardMapObjectInspector(getConvertedOI(mapOutputOI.getMapKeyObjectInspector(), mapOutputOI.getMapKeyObjectInspector(), oiSettableProperties, false), getConvertedOI(mapOutputOI.getMapValueObjectInspector(), mapOutputOI.getMapValueObjectInspector(), oiSettableProperties, false));
case UNION:
UnionObjectInspector unionOutputOI = (UnionObjectInspector) outputOI;
// create a standard settable union object inspector
List<ObjectInspector> unionListFields = unionOutputOI.getObjectInspectors();
List<ObjectInspector> unionFieldObjectInspectors = new ArrayList<ObjectInspector>(unionListFields.size());
for (ObjectInspector listField : unionListFields) {
// We need to make sure that all the field associated with the union are settable.
unionFieldObjectInspectors.add(getConvertedOI(listField, listField, oiSettableProperties, false));
}
return ObjectInspectorFactory.getStandardUnionObjectInspector(unionFieldObjectInspectors);
default:
// Unsupported in-memory structure.
throw new RuntimeException("Hive internal error: conversion of " + inputOI.getTypeName() + " to " + outputOI.getTypeName() + " not supported yet.");
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class ObjectInspectorFactory method getReflectionObjectInspectorNoCache.
private static ObjectInspector getReflectionObjectInspectorNoCache(Type t, ObjectInspectorOptions options, boolean ensureInited) {
if (t instanceof GenericArrayType) {
GenericArrayType at = (GenericArrayType) t;
return getStandardListObjectInspector(getReflectionObjectInspector(at.getGenericComponentType(), options, ensureInited));
}
if (t instanceof ParameterizedType) {
ParameterizedType pt = (ParameterizedType) t;
// List?
if (List.class.isAssignableFrom((Class<?>) pt.getRawType()) || Set.class.isAssignableFrom((Class<?>) pt.getRawType())) {
return getStandardListObjectInspector(getReflectionObjectInspector(pt.getActualTypeArguments()[0], options, ensureInited));
}
// Map?
if (Map.class.isAssignableFrom((Class<?>) pt.getRawType())) {
return getStandardMapObjectInspector(getReflectionObjectInspector(pt.getActualTypeArguments()[0], options, ensureInited), getReflectionObjectInspector(pt.getActualTypeArguments()[1], options, ensureInited));
}
// Otherwise convert t to RawType so we will fall into the following if
// block.
t = pt.getRawType();
}
// Must be a class.
if (!(t instanceof Class)) {
throw new RuntimeException(ObjectInspectorFactory.class.getName() + " internal error:" + t);
}
Class<?> c = (Class<?>) t;
// Java Primitive Type?
if (PrimitiveObjectInspectorUtils.isPrimitiveJavaType(c)) {
return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaType(c).primitiveCategory);
}
// Java Primitive Class?
if (PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(c)) {
return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaClass(c).primitiveCategory);
}
// Primitive Writable class?
if (PrimitiveObjectInspectorUtils.isPrimitiveWritableClass(c)) {
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveWritableClass(c).primitiveCategory);
}
// Enum class?
if (Enum.class.isAssignableFrom(c)) {
return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
}
// Must be struct because List and Map need to be ParameterizedType
assert (!List.class.isAssignableFrom(c));
assert (!Map.class.isAssignableFrom(c));
// Create StructObjectInspector
ReflectionStructObjectInspector oi;
switch(options) {
case JAVA:
oi = new ReflectionStructObjectInspector();
break;
case THRIFT:
oi = TUnion.class.isAssignableFrom(c) ? new ThriftUnionObjectInspector() : new ThriftStructObjectInspector();
break;
case PROTOCOL_BUFFERS:
oi = new ProtocolBuffersStructObjectInspector();
break;
default:
throw new RuntimeException(ObjectInspectorFactory.class.getName() + ": internal error.");
}
// put it into the cache BEFORE it is initialized to make sure we can catch
// recursive types.
ReflectionStructObjectInspector prev = (ReflectionStructObjectInspector) objectInspectorCache.putIfAbsent(t, oi);
if (prev != null) {
oi = prev;
} else {
try {
oi.init(t, c, options);
} finally {
if (!oi.inited) {
// Failed to init, remove it from cache
objectInspectorCache.remove(t, oi);
}
}
}
return oi;
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class ObjectInspectorUtils method hasAllFieldsSettable.
/**
*
* @param oi - Input object inspector
* @param oiSettableProperties - Lookup map to cache the result.(If no caching, pass null)
* @return - true if : (1) oi is an instance of settable<DataType>OI.
* (2) All the embedded object inspectors are instances of settable<DataType>OI.
* If (1) or (2) is false, return false.
*/
public static boolean hasAllFieldsSettable(ObjectInspector oi, Map<ObjectInspector, Boolean> oiSettableProperties) {
// If the result is already present in the cache, return it.
if (!(oiSettableProperties == null) && oiSettableProperties.containsKey(oi)) {
return oiSettableProperties.get(oi).booleanValue();
}
// If the top-level object inspector is non-settable return false
if (!(isInstanceOfSettableOI(oi))) {
return setOISettablePropertiesMap(oi, oiSettableProperties, false);
}
Boolean returnValue = true;
switch(oi.getCategory()) {
case PRIMITIVE:
break;
case STRUCT:
StructObjectInspector structOutputOI = (StructObjectInspector) oi;
List<? extends StructField> listFields = structOutputOI.getAllStructFieldRefs();
for (StructField listField : listFields) {
if (!hasAllFieldsSettable(listField.getFieldObjectInspector(), oiSettableProperties)) {
returnValue = false;
break;
}
}
break;
case LIST:
ListObjectInspector listOutputOI = (ListObjectInspector) oi;
returnValue = hasAllFieldsSettable(listOutputOI.getListElementObjectInspector(), oiSettableProperties);
break;
case MAP:
MapObjectInspector mapOutputOI = (MapObjectInspector) oi;
returnValue = hasAllFieldsSettable(mapOutputOI.getMapKeyObjectInspector(), oiSettableProperties) && hasAllFieldsSettable(mapOutputOI.getMapValueObjectInspector(), oiSettableProperties);
break;
case UNION:
UnionObjectInspector unionOutputOI = (UnionObjectInspector) oi;
List<ObjectInspector> unionListFields = unionOutputOI.getObjectInspectors();
for (ObjectInspector listField : unionListFields) {
if (!hasAllFieldsSettable(listField, oiSettableProperties)) {
returnValue = false;
break;
}
}
break;
default:
throw new RuntimeException("Hive internal error inside hasAllFieldsSettable : " + oi.getTypeName() + " not supported yet.");
}
return setOISettablePropertiesMap(oi, oiSettableProperties, returnValue);
}
Aggregations