Search in sources :

Example 56 with ShortWritable

use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.

the class MaskTransformer method getIntArg.

int getIntArg(ObjectInspector[] arguments, int index, int defaultValue) {
    int ret = defaultValue;
    ObjectInspector arg = (arguments != null && arguments.length > index) ? arguments[index] : null;
    if (arg != null) {
        if (arg instanceof WritableConstantIntObjectInspector) {
            IntWritable value = ((WritableConstantIntObjectInspector) arg).getWritableConstantValue();
            if (value != null) {
                ret = value.get();
            }
        } else if (arg instanceof WritableConstantLongObjectInspector) {
            LongWritable value = ((WritableConstantLongObjectInspector) arg).getWritableConstantValue();
            if (value != null) {
                ret = (int) value.get();
            }
        } else if (arg instanceof WritableConstantShortObjectInspector) {
            ShortWritable value = ((WritableConstantShortObjectInspector) arg).getWritableConstantValue();
            if (value != null) {
                ret = value.get();
            }
        } else if (arg instanceof ConstantObjectInspector) {
            Object value = ((ConstantObjectInspector) arg).getWritableConstantValue();
            if (value != null) {
                String strValue = value.toString();
                if (strValue != null && strValue.length() > 0) {
                    ret = Integer.parseInt(value.toString());
                }
            }
        }
    }
    return ret;
}
Also used : ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LongWritable(org.apache.hadoop.io.LongWritable) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Example 57 with ShortWritable

use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.

the class CommonJoinOperator method getFilteredValue.

// all evaluation should be processed here for valid aliasFilterTags
//
// for MapJoin, filter tag is pre-calculated in MapredLocalTask and stored with value.
// when reading the hashtable, MapJoinObjectValue calculates alias filter and provide it to join
protected List<Object> getFilteredValue(byte alias, Object row) throws HiveException {
    boolean hasFilter = hasFilter(alias);
    List<Object> nr = JoinUtil.computeValues(row, joinValues[alias], joinValuesObjectInspectors[alias], hasFilter);
    if (hasFilter) {
        short filterTag = JoinUtil.isFiltered(row, joinFilters[alias], joinFilterObjectInspectors[alias], filterMaps[alias]);
        nr.add(new ShortWritable(filterTag));
        aliasFilterTags[alias] &= filterTag;
    }
    return nr;
}
Also used : ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable)

Example 58 with ShortWritable

use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.

the class CommonJoinOperator method initializeOp.

@Override
@SuppressWarnings("unchecked")
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    closeOpCalled = false;
    this.handleSkewJoin = conf.getHandleSkewJoin();
    this.hconf = hconf;
    heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT);
    countAfterReport = 0;
    totalSz = 0;
    int tagLen = conf.getTagLength();
    // Map that contains the rows for each alias
    storage = new AbstractRowContainer[tagLen];
    numAliases = conf.getExprs().size();
    joinValues = new List[tagLen];
    joinFilters = new List[tagLen];
    order = conf.getTagOrder();
    condn = conf.getConds();
    nullsafes = conf.getNullSafes();
    noOuterJoin = conf.isNoOuterJoin();
    totalSz = JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), order, NOTSKIPBIGTABLE, hconf);
    //process join filters
    joinFilters = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(), order, NOTSKIPBIGTABLE, hconf);
    joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, inputObjInspectors, NOTSKIPBIGTABLE, tagLen);
    joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters, inputObjInspectors, NOTSKIPBIGTABLE, tagLen);
    joinValuesStandardObjectInspectors = JoinUtil.getStandardObjectInspectors(joinValuesObjectInspectors, NOTSKIPBIGTABLE, tagLen);
    filterMaps = conf.getFilterMap();
    if (noOuterJoin) {
        rowContainerStandardObjectInspectors = joinValuesStandardObjectInspectors;
    } else {
        List<ObjectInspector>[] rowContainerObjectInspectors = new List[tagLen];
        for (Byte alias : order) {
            ArrayList<ObjectInspector> rcOIs = new ArrayList<ObjectInspector>();
            rcOIs.addAll(joinValuesObjectInspectors[alias]);
            // for each alias, add object inspector for short as the last element
            rcOIs.add(PrimitiveObjectInspectorFactory.writableShortObjectInspector);
            rowContainerObjectInspectors[alias] = rcOIs;
        }
        rowContainerStandardObjectInspectors = JoinUtil.getStandardObjectInspectors(rowContainerObjectInspectors, NOTSKIPBIGTABLE, tagLen);
    }
    dummyObj = new ArrayList[numAliases];
    dummyObjVectors = new RowContainer[numAliases];
    joinEmitInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEJOINEMITINTERVAL);
    joinCacheSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEJOINCACHESIZE);
    // construct dummy null row (indicating empty table) and
    // construct spill table serde which is used if input is too
    // large to fit into main memory.
    byte pos = 0;
    for (Byte alias : order) {
        int sz = conf.getExprs().get(alias).size();
        ArrayList<Object> nr = new ArrayList<Object>(sz);
        for (int j = 0; j < sz; j++) {
            nr.add(null);
        }
        if (!noOuterJoin) {
            // add whether the row is filtered or not
            // this value does not matter for the dummyObj
            // because the join values are already null
            nr.add(new ShortWritable());
        }
        dummyObj[pos] = nr;
        // there should be only 1 dummy object in the RowContainer
        RowContainer<List<Object>> values = JoinUtil.getRowContainer(hconf, rowContainerStandardObjectInspectors[pos], alias, 1, spillTableDesc, conf, !hasFilter(pos), reporter);
        values.addRow(dummyObj[pos]);
        dummyObjVectors[pos] = values;
        // if serde is null, the input doesn't need to be spilled out
        // e.g., the output columns does not contains the input table
        RowContainer<List<Object>> rc = JoinUtil.getRowContainer(hconf, rowContainerStandardObjectInspectors[pos], alias, joinCacheSize, spillTableDesc, conf, !hasFilter(pos), reporter);
        storage[pos] = rc;
        pos++;
    }
    forwardCache = new Object[totalSz];
    aliasFilterTags = new short[numAliases];
    Arrays.fill(aliasFilterTags, (byte) 0xff);
    filterTags = new short[numAliases];
    skipVectors = new boolean[numAliases][];
    for (int i = 0; i < skipVectors.length; i++) {
        skipVectors[i] = new boolean[i + 1];
    }
    intermediate = new List[numAliases];
    offsets = new int[numAliases + 1];
    int sum = 0;
    for (int i = 0; i < numAliases; i++) {
        offsets[i] = sum;
        sum += joinValues[order[i]].size();
    }
    offsets[numAliases] = sum;
    outputObjInspector = getJoinOutputObjectInspector(order, joinValuesStandardObjectInspectors, conf);
    for (int i = 0; i < condn.length; i++) {
        if (condn[i].getType() == JoinDesc.LEFT_SEMI_JOIN) {
            hasLeftSemiJoin = true;
        }
    }
    // Create post-filtering evaluators if needed
    if (conf.getResidualFilterExprs() != null) {
        // filter straight away.
        assert !noOuterJoin;
        residualJoinFilters = new ArrayList<>(conf.getResidualFilterExprs().size());
        residualJoinFiltersOIs = new ArrayList<>(conf.getResidualFilterExprs().size());
        for (int i = 0; i < conf.getResidualFilterExprs().size(); i++) {
            ExprNodeDesc expr = conf.getResidualFilterExprs().get(i);
            residualJoinFilters.add(ExprNodeEvaluatorFactory.get(expr));
            residualJoinFiltersOIs.add(residualJoinFilters.get(i).initialize(outputObjInspector));
        }
        needsPostEvaluation = true;
        // We need to disable join emit interval, since for outer joins with post conditions
        // we need to have the full view on the right matching rows to know whether we need
        // to produce a row with NULL values or not
        joinEmitInterval = -1;
    }
    if (isLogInfoEnabled) {
        LOG.info("JOIN " + outputObjInspector.getTypeName() + " totalsz = " + totalSz);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ArrayList(java.util.ArrayList) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 59 with ShortWritable

use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.

the class TestMapJoinRowContainer method testSerialization.

@Test
public void testSerialization() throws Exception {
    MapJoinRowContainer container1 = new MapJoinEagerRowContainer();
    container1.addRow(new Object[] { new Text("f0"), null, new ShortWritable((short) 0xf) });
    container1.addRow(Arrays.asList(new Object[] { null, new Text("f1"), new ShortWritable((short) 0xf) }));
    container1.addRow(new Object[] { null, null, new ShortWritable((short) 0xf) });
    container1.addRow(Arrays.asList(new Object[] { new Text("f0"), new Text("f1"), new ShortWritable((short) 0x1) }));
    MapJoinRowContainer container2 = Utilities.serde(container1, "f0,f1,filter", "string,string,smallint");
    Utilities.testEquality(container1, container2);
    Assert.assertEquals(4, container1.rowCount());
    Assert.assertEquals(1, container2.getAliasFilter());
}
Also used : Text(org.apache.hadoop.io.Text) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) Test(org.junit.Test)

Example 60 with ShortWritable

use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.

the class TypedBytesRecordReader method next.

public int next(Writable data) throws IOException {
    int pos = 0;
    barrStr.reset();
    while (true) {
        Type type = tbIn.readTypeCode();
        // it was a empty stream
        if (type == null) {
            return -1;
        }
        if (type == Type.ENDOFRECORD) {
            tbOut.writeEndOfRecord();
            if (barrStr.getLength() > 0) {
                ((BytesWritable) data).set(barrStr.getData(), 0, barrStr.getLength());
            }
            return barrStr.getLength();
        }
        if (pos >= row.size()) {
            Writable wrt = allocateWritable(type);
            assert pos == row.size();
            assert pos == rowTypeName.size();
            row.add(wrt);
            rowTypeName.add(type.name());
            String typeName = typedBytesToTypeName.get(type);
            PrimitiveTypeInfo srcTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(typeName);
            srcOIns.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(srcTypeInfo));
            converters.add(ObjectInspectorConverters.getConverter(srcOIns.get(pos), dstOIns.get(pos)));
        } else {
            if (!rowTypeName.get(pos).equals(type.name())) {
                throw new RuntimeException("datatype of row changed from " + rowTypeName.get(pos) + " to " + type.name());
            }
        }
        Writable w = row.get(pos);
        switch(type) {
            case BYTE:
                tbIn.readByte((ByteWritable) w);
                break;
            case BOOL:
                tbIn.readBoolean((BooleanWritable) w);
                break;
            case INT:
                tbIn.readInt((IntWritable) w);
                break;
            case SHORT:
                tbIn.readShort((ShortWritable) w);
                break;
            case LONG:
                tbIn.readLong((LongWritable) w);
                break;
            case FLOAT:
                tbIn.readFloat((FloatWritable) w);
                break;
            case DOUBLE:
                tbIn.readDouble((DoubleWritable) w);
                break;
            case STRING:
                tbIn.readText((Text) w);
                break;
            default:
                // should never come here
                assert false;
        }
        write(pos, w);
        pos++;
    }
}
Also used : ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Aggregations

ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)69 IntWritable (org.apache.hadoop.io.IntWritable)44 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)41 LongWritable (org.apache.hadoop.io.LongWritable)40 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)36 Text (org.apache.hadoop.io.Text)32 FloatWritable (org.apache.hadoop.io.FloatWritable)30 BooleanWritable (org.apache.hadoop.io.BooleanWritable)26 Test (org.junit.Test)26 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)20 BytesWritable (org.apache.hadoop.io.BytesWritable)19 ArrayList (java.util.ArrayList)16 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)16 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)15 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)14 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)13 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)13 Configuration (org.apache.hadoop.conf.Configuration)11 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)11 Properties (java.util.Properties)10