use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class SkewJoinHandler method initiliaze.
public void initiliaze(Configuration hconf) {
this.hconf = hconf;
JoinDesc desc = joinOp.getConf();
skewKeyDefinition = desc.getSkewKeyDefinition();
skewKeysTableObjectInspector = new HashMap<Byte, StructObjectInspector>(numAliases);
tblDesc = desc.getSkewKeysValuesTables();
tblSerializers = new HashMap<Byte, AbstractSerDe>(numAliases);
bigKeysExistingMap = new HashMap<Byte, Boolean>(numAliases);
taskId = Utilities.getTaskId(hconf);
int[][] filterMap = desc.getFilterMap();
for (int i = 0; i < numAliases; i++) {
Byte alias = conf.getTagOrder()[i];
List<ObjectInspector> skewTableKeyInspectors = new ArrayList<ObjectInspector>();
StructObjectInspector soi = (StructObjectInspector) joinOp.inputObjInspectors[alias];
StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
List<? extends StructField> keyFields = ((StructObjectInspector) sf.getFieldObjectInspector()).getAllStructFieldRefs();
int keyFieldSize = keyFields.size();
for (int k = 0; k < keyFieldSize; k++) {
skewTableKeyInspectors.add(keyFields.get(k).getFieldObjectInspector());
}
TableDesc joinKeyDesc = desc.getKeyTableDesc();
List<String> keyColNames = Utilities.getColumnNames(joinKeyDesc.getProperties());
StructObjectInspector structTblKeyInpector = ObjectInspectorFactory.getStandardStructObjectInspector(keyColNames, skewTableKeyInspectors);
try {
AbstractSerDe serializer = (AbstractSerDe) ReflectionUtils.newInstance(tblDesc.get(alias).getDeserializerClass(), null);
SerDeUtils.initializeSerDe(serializer, null, tblDesc.get(alias).getProperties(), null);
tblSerializers.put((byte) i, serializer);
} catch (SerDeException e) {
LOG.error("Skewjoin will be disabled due to " + e.getMessage(), e);
joinOp.handleSkewJoin = false;
break;
}
boolean hasFilter = filterMap != null && filterMap[i] != null;
TableDesc valTblDesc = JoinUtil.getSpillTableDesc(alias, joinOp.spillTableDesc, conf, !hasFilter);
List<String> valColNames = new ArrayList<String>();
if (valTblDesc != null) {
valColNames = Utilities.getColumnNames(valTblDesc.getProperties());
}
StructObjectInspector structTblValInpector = ObjectInspectorFactory.getStandardStructObjectInspector(valColNames, joinOp.joinValuesStandardObjectInspectors[i]);
StructObjectInspector structTblInpector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays.asList(structTblValInpector, structTblKeyInpector));
skewKeysTableObjectInspector.put((byte) i, structTblInpector);
}
// reset rowcontainer's serde, objectinspector, and tableDesc.
for (int i = 0; i < numAliases; i++) {
Byte alias = conf.getTagOrder()[i];
RowContainer<ArrayList<Object>> rc = (RowContainer) joinOp.storage[i];
if (rc != null) {
rc.setSerDe(tblSerializers.get((byte) i), skewKeysTableObjectInspector.get((byte) i));
rc.setTableDesc(tblDesc.get(alias));
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class VectorUDAFAvgDecimal method initPartialResultInspector.
private void initPartialResultInspector() {
// the output type of the vectorized partial aggregate must match the
// expected type for the row-mode aggregation
// For decimal, the type is "same number of integer digits and 4 more decimal digits"
DecimalTypeInfo dtiSum = GenericUDAFAverage.deriveSumFieldTypeInfo(inputPrecision, inputScale);
this.sumScale = (short) dtiSum.scale();
this.sumPrecision = (short) dtiSum.precision();
List<ObjectInspector> foi = new ArrayList<ObjectInspector>();
foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
foi.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(dtiSum));
List<String> fname = new ArrayList<String>();
fname.add("count");
fname.add("sum");
soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class RegexSerDe method initialize.
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
// We can get the table definition from tbl.
// Read the configuration parameters
inputRegex = tbl.getProperty(INPUT_REGEX);
String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(tbl.getProperty(INPUT_REGEX_CASE_SENSITIVE));
// output format string is not supported anymore, warn user of deprecation
if (null != tbl.getProperty("output.format.string")) {
LOG.warn("output.format.string has been deprecated");
}
// Parse the configuration parameters
if (inputRegex != null) {
inputPattern = Pattern.compile(inputRegex, Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
} else {
inputPattern = null;
throw new SerDeException("This table does not have serde property \"input.regex\"!");
}
final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
List<String> columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
assert columnNames.size() == columnTypes.size();
numColumns = columnNames.size();
/* Constructing the row ObjectInspector:
* The row consists of some set of primitive columns, each column will
* be a java object of primitive type.
*/
List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
for (int c = 0; c < numColumns; c++) {
TypeInfo typeInfo = columnTypes.get(c);
if (typeInfo instanceof PrimitiveTypeInfo) {
PrimitiveTypeInfo pti = (PrimitiveTypeInfo) columnTypes.get(c);
AbstractPrimitiveJavaObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti);
columnOIs.add(oi);
} else {
throw new SerDeException(getClass().getName() + " doesn't allow column [" + c + "] named " + columnNames.get(c) + " with type " + columnTypes.get(c));
}
}
// StandardStruct uses ArrayList to store the row.
rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs, Lists.newArrayList(Splitter.on('\0').split(tbl.getProperty("columns.comments"))));
row = new ArrayList<Object>(numColumns);
// Constructing the row object, etc, which will be reused for all rows.
for (int c = 0; c < numColumns; c++) {
row.add(null);
}
outputFields = new Object[numColumns];
outputRowText = new Text();
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class DynamicSerDe method dynamicSerDeStructBaseToObjectInspector.
public static ObjectInspector dynamicSerDeStructBaseToObjectInspector(DynamicSerDeTypeBase bt) throws SerDeException {
if (bt.isList()) {
return ObjectInspectorFactory.getStandardListObjectInspector(dynamicSerDeStructBaseToObjectInspector(((DynamicSerDeTypeList) bt).getElementType()));
} else if (bt.isMap()) {
DynamicSerDeTypeMap btMap = (DynamicSerDeTypeMap) bt;
return ObjectInspectorFactory.getStandardMapObjectInspector(dynamicSerDeStructBaseToObjectInspector(btMap.getKeyType()), dynamicSerDeStructBaseToObjectInspector(btMap.getValueType()));
} else if (bt.isPrimitive()) {
PrimitiveTypeEntry pte = PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaClass(bt.getRealType());
return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pte.primitiveCategory);
} else {
// Must be a struct
DynamicSerDeStructBase btStruct = (DynamicSerDeStructBase) bt;
DynamicSerDeFieldList fieldList = btStruct.getFieldList();
DynamicSerDeField[] fields = fieldList.getChildren();
ArrayList<String> fieldNames = new ArrayList<String>(fields.length);
ArrayList<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fields.length);
for (DynamicSerDeField field : fields) {
fieldNames.add(field.name);
fieldObjectInspectors.add(dynamicSerDeStructBaseToObjectInspector(field.getFieldType().getMyType()));
}
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldObjectInspectors);
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project mongo-hadoop by mongodb.
the class BSONSerDeTest method testStruct.
@Test
public void testStruct() throws SerDeException {
String columnNames = "m";
String columnTypes = "struct<one:int,two:string>";
BasicBSONObject value = new BasicBSONObject();
int oneValue = 10;
String twoValue = "key";
value.put("one", oneValue);
value.put("two", twoValue);
// Structs come back as arrays
ArrayList<Object> returned = new ArrayList<Object>();
returned.add(oneValue);
returned.add(twoValue);
BSONSerDe serde = new BSONSerDe();
Object result = helpDeserialize(serde, columnNames, columnTypes, value, true);
assertThat(returned, equalTo(result));
// A struct must have an array or list of inner inspector types
ArrayList<ObjectInspector> innerInspectorList = new ArrayList<ObjectInspector>();
innerInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(Integer.class));
innerInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(String.class));
// As well as a fields list
ArrayList<String> innerFieldsList = new ArrayList<String>();
innerFieldsList.add("one");
innerFieldsList.add("two");
// Then you get that inner struct's inspector
StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(innerFieldsList, innerInspectorList);
// Which is used to get the overall struct inspector
StructObjectInspector oi = createObjectInspector(columnNames, structInspector);
// This should be how it turns out
BasicBSONObject bObject = new BasicBSONObject();
bObject.put(columnNames, value);
// But structs are stored as array/list inside hive, so this is passed in
ArrayList<Object> obj = new ArrayList<Object>();
obj.add(returned);
Object serialized = serde.serialize(obj, oi);
assertThat(new BSONWritable(bObject), equalTo(serialized));
}
Aggregations