use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class MaskTransformer method getIntArg.
int getIntArg(ObjectInspector[] arguments, int index, int defaultValue) {
int ret = defaultValue;
ObjectInspector arg = (arguments != null && arguments.length > index) ? arguments[index] : null;
if (arg != null) {
if (arg instanceof WritableConstantIntObjectInspector) {
IntWritable value = ((WritableConstantIntObjectInspector) arg).getWritableConstantValue();
if (value != null) {
ret = value.get();
}
} else if (arg instanceof WritableConstantLongObjectInspector) {
LongWritable value = ((WritableConstantLongObjectInspector) arg).getWritableConstantValue();
if (value != null) {
ret = (int) value.get();
}
} else if (arg instanceof WritableConstantShortObjectInspector) {
ShortWritable value = ((WritableConstantShortObjectInspector) arg).getWritableConstantValue();
if (value != null) {
ret = value.get();
}
} else if (arg instanceof ConstantObjectInspector) {
Object value = ((ConstantObjectInspector) arg).getWritableConstantValue();
if (value != null) {
String strValue = value.toString();
if (strValue != null && strValue.length() > 0) {
ret = Integer.parseInt(value.toString());
}
}
}
}
return ret;
}
use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class CommonJoinOperator method getFilteredValue.
// all evaluation should be processed here for valid aliasFilterTags
//
// for MapJoin, filter tag is pre-calculated in MapredLocalTask and stored with value.
// when reading the hashtable, MapJoinObjectValue calculates alias filter and provide it to join
protected List<Object> getFilteredValue(byte alias, Object row) throws HiveException {
boolean hasFilter = hasFilter(alias);
List<Object> nr = JoinUtil.computeValues(row, joinValues[alias], joinValuesObjectInspectors[alias], hasFilter);
if (hasFilter) {
short filterTag = JoinUtil.isFiltered(row, joinFilters[alias], joinFilterObjectInspectors[alias], filterMaps[alias]);
nr.add(new ShortWritable(filterTag));
aliasFilterTags[alias] &= filterTag;
}
return nr;
}
use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class CommonJoinOperator method initializeOp.
@Override
@SuppressWarnings("unchecked")
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
closeOpCalled = false;
this.handleSkewJoin = conf.getHandleSkewJoin();
this.hconf = hconf;
heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT);
countAfterReport = 0;
totalSz = 0;
int tagLen = conf.getTagLength();
// Map that contains the rows for each alias
storage = new AbstractRowContainer[tagLen];
numAliases = conf.getExprs().size();
joinValues = new List[tagLen];
joinFilters = new List[tagLen];
order = conf.getTagOrder();
condn = conf.getConds();
nullsafes = conf.getNullSafes();
noOuterJoin = conf.isNoOuterJoin();
totalSz = JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), order, NOTSKIPBIGTABLE, hconf);
//process join filters
joinFilters = new List[tagLen];
JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(), order, NOTSKIPBIGTABLE, hconf);
joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, inputObjInspectors, NOTSKIPBIGTABLE, tagLen);
joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters, inputObjInspectors, NOTSKIPBIGTABLE, tagLen);
joinValuesStandardObjectInspectors = JoinUtil.getStandardObjectInspectors(joinValuesObjectInspectors, NOTSKIPBIGTABLE, tagLen);
filterMaps = conf.getFilterMap();
if (noOuterJoin) {
rowContainerStandardObjectInspectors = joinValuesStandardObjectInspectors;
} else {
List<ObjectInspector>[] rowContainerObjectInspectors = new List[tagLen];
for (Byte alias : order) {
ArrayList<ObjectInspector> rcOIs = new ArrayList<ObjectInspector>();
rcOIs.addAll(joinValuesObjectInspectors[alias]);
// for each alias, add object inspector for short as the last element
rcOIs.add(PrimitiveObjectInspectorFactory.writableShortObjectInspector);
rowContainerObjectInspectors[alias] = rcOIs;
}
rowContainerStandardObjectInspectors = JoinUtil.getStandardObjectInspectors(rowContainerObjectInspectors, NOTSKIPBIGTABLE, tagLen);
}
dummyObj = new ArrayList[numAliases];
dummyObjVectors = new RowContainer[numAliases];
joinEmitInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEJOINEMITINTERVAL);
joinCacheSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEJOINCACHESIZE);
// construct dummy null row (indicating empty table) and
// construct spill table serde which is used if input is too
// large to fit into main memory.
byte pos = 0;
for (Byte alias : order) {
int sz = conf.getExprs().get(alias).size();
ArrayList<Object> nr = new ArrayList<Object>(sz);
for (int j = 0; j < sz; j++) {
nr.add(null);
}
if (!noOuterJoin) {
// add whether the row is filtered or not
// this value does not matter for the dummyObj
// because the join values are already null
nr.add(new ShortWritable());
}
dummyObj[pos] = nr;
// there should be only 1 dummy object in the RowContainer
RowContainer<List<Object>> values = JoinUtil.getRowContainer(hconf, rowContainerStandardObjectInspectors[pos], alias, 1, spillTableDesc, conf, !hasFilter(pos), reporter);
values.addRow(dummyObj[pos]);
dummyObjVectors[pos] = values;
// if serde is null, the input doesn't need to be spilled out
// e.g., the output columns does not contains the input table
RowContainer<List<Object>> rc = JoinUtil.getRowContainer(hconf, rowContainerStandardObjectInspectors[pos], alias, joinCacheSize, spillTableDesc, conf, !hasFilter(pos), reporter);
storage[pos] = rc;
pos++;
}
forwardCache = new Object[totalSz];
aliasFilterTags = new short[numAliases];
Arrays.fill(aliasFilterTags, (byte) 0xff);
filterTags = new short[numAliases];
skipVectors = new boolean[numAliases][];
for (int i = 0; i < skipVectors.length; i++) {
skipVectors[i] = new boolean[i + 1];
}
intermediate = new List[numAliases];
offsets = new int[numAliases + 1];
int sum = 0;
for (int i = 0; i < numAliases; i++) {
offsets[i] = sum;
sum += joinValues[order[i]].size();
}
offsets[numAliases] = sum;
outputObjInspector = getJoinOutputObjectInspector(order, joinValuesStandardObjectInspectors, conf);
for (int i = 0; i < condn.length; i++) {
if (condn[i].getType() == JoinDesc.LEFT_SEMI_JOIN) {
hasLeftSemiJoin = true;
}
}
// Create post-filtering evaluators if needed
if (conf.getResidualFilterExprs() != null) {
// filter straight away.
assert !noOuterJoin;
residualJoinFilters = new ArrayList<>(conf.getResidualFilterExprs().size());
residualJoinFiltersOIs = new ArrayList<>(conf.getResidualFilterExprs().size());
for (int i = 0; i < conf.getResidualFilterExprs().size(); i++) {
ExprNodeDesc expr = conf.getResidualFilterExprs().get(i);
residualJoinFilters.add(ExprNodeEvaluatorFactory.get(expr));
residualJoinFiltersOIs.add(residualJoinFilters.get(i).initialize(outputObjInspector));
}
needsPostEvaluation = true;
// We need to disable join emit interval, since for outer joins with post conditions
// we need to have the full view on the right matching rows to know whether we need
// to produce a row with NULL values or not
joinEmitInterval = -1;
}
if (isLogInfoEnabled) {
LOG.info("JOIN " + outputObjInspector.getTypeName() + " totalsz = " + totalSz);
}
}
use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class TestMapJoinRowContainer method testSerialization.
@Test
public void testSerialization() throws Exception {
MapJoinRowContainer container1 = new MapJoinEagerRowContainer();
container1.addRow(new Object[] { new Text("f0"), null, new ShortWritable((short) 0xf) });
container1.addRow(Arrays.asList(new Object[] { null, new Text("f1"), new ShortWritable((short) 0xf) }));
container1.addRow(new Object[] { null, null, new ShortWritable((short) 0xf) });
container1.addRow(Arrays.asList(new Object[] { new Text("f0"), new Text("f1"), new ShortWritable((short) 0x1) }));
MapJoinRowContainer container2 = Utilities.serde(container1, "f0,f1,filter", "string,string,smallint");
Utilities.testEquality(container1, container2);
Assert.assertEquals(4, container1.rowCount());
Assert.assertEquals(1, container2.getAliasFilter());
}
use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class TypedBytesRecordReader method next.
public int next(Writable data) throws IOException {
int pos = 0;
barrStr.reset();
while (true) {
Type type = tbIn.readTypeCode();
// it was a empty stream
if (type == null) {
return -1;
}
if (type == Type.ENDOFRECORD) {
tbOut.writeEndOfRecord();
if (barrStr.getLength() > 0) {
((BytesWritable) data).set(barrStr.getData(), 0, barrStr.getLength());
}
return barrStr.getLength();
}
if (pos >= row.size()) {
Writable wrt = allocateWritable(type);
assert pos == row.size();
assert pos == rowTypeName.size();
row.add(wrt);
rowTypeName.add(type.name());
String typeName = typedBytesToTypeName.get(type);
PrimitiveTypeInfo srcTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(typeName);
srcOIns.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(srcTypeInfo));
converters.add(ObjectInspectorConverters.getConverter(srcOIns.get(pos), dstOIns.get(pos)));
} else {
if (!rowTypeName.get(pos).equals(type.name())) {
throw new RuntimeException("datatype of row changed from " + rowTypeName.get(pos) + " to " + type.name());
}
}
Writable w = row.get(pos);
switch(type) {
case BYTE:
tbIn.readByte((ByteWritable) w);
break;
case BOOL:
tbIn.readBoolean((BooleanWritable) w);
break;
case INT:
tbIn.readInt((IntWritable) w);
break;
case SHORT:
tbIn.readShort((ShortWritable) w);
break;
case LONG:
tbIn.readLong((LongWritable) w);
break;
case FLOAT:
tbIn.readFloat((FloatWritable) w);
break;
case DOUBLE:
tbIn.readDouble((DoubleWritable) w);
break;
case STRING:
tbIn.readText((Text) w);
break;
default:
// should never come here
assert false;
}
write(pos, w);
pos++;
}
}
Aggregations