Search in sources :

Example 1 with MapJoinRowContainer

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer in project hive by apache.

the class MapJoinOperator method process.

@Override
public void process(Object row, int tag) throws HiveException {
    try {
        alias = (byte) tag;
        if (hashMapRowGetters == null) {
            hashMapRowGetters = new ReusableGetAdaptor[mapJoinTables.length];
            MapJoinKey refKey = getRefKey(alias);
            for (byte pos = 0; pos < order.length; pos++) {
                if (pos != alias) {
                    hashMapRowGetters[pos] = mapJoinTables[pos].createGetter(refKey);
                }
            }
        }
        // coming from the spilled matchfile. We need to recreate hashMapRowGetter against new hashtables
        if (hybridMapJoinLeftover) {
            MapJoinKey refKey = getRefKey(alias);
            for (byte pos = 0; pos < order.length; pos++) {
                if (pos != alias && spilledMapJoinTables[pos] != null) {
                    hashMapRowGetters[pos] = spilledMapJoinTables[pos].createGetter(refKey);
                }
            }
        }
        // compute keys and values as StandardObjects
        ReusableGetAdaptor firstSetKey = null;
        int fieldCount = joinKeys[alias].size();
        boolean joinNeeded = false;
        boolean bigTableRowSpilled = false;
        for (byte pos = 0; pos < order.length; pos++) {
            if (pos != alias) {
                JoinUtil.JoinResult joinResult;
                ReusableGetAdaptor adaptor;
                if (firstSetKey == null) {
                    adaptor = firstSetKey = hashMapRowGetters[pos];
                    joinResult = setMapJoinKey(firstSetKey, row, alias);
                } else {
                    // Keys for all tables are the same, so only the first has to deserialize them.
                    adaptor = hashMapRowGetters[pos];
                    joinResult = adaptor.setFromOther(firstSetKey);
                }
                MapJoinRowContainer rowContainer = adaptor.getCurrentRows();
                if (joinResult != JoinUtil.JoinResult.MATCH) {
                    assert (rowContainer == null || !rowContainer.hasRows()) : "Expecting an empty result set for no match";
                }
                if (rowContainer != null && unwrapContainer[pos] != null) {
                    Object[] currentKey = firstSetKey.getCurrentKey();
                    rowContainer = unwrapContainer[pos].setInternal(rowContainer, currentKey);
                }
                // there is no join-value or join-key has all null elements
                if (rowContainer == null || firstSetKey.hasAnyNulls(fieldCount, nullsafes)) {
                    if (!noOuterJoin) {
                        // we only keep the LEFT side if the row is not spilled
                        if (!conf.isHybridHashJoin() || hybridMapJoinLeftover || (joinResult != JoinUtil.JoinResult.SPILL && !bigTableRowSpilled)) {
                            joinNeeded = true;
                            storage[pos] = dummyObjVectors[pos];
                        } else {
                            joinNeeded = false;
                        }
                    } else {
                        storage[pos] = emptyList;
                    }
                } else {
                    joinNeeded = true;
                    storage[pos] = rowContainer.copy();
                    aliasFilterTags[pos] = rowContainer.getAliasFilter();
                }
                // postpone the join processing for this pair by also spilling this big table row.
                if (joinResult == JoinUtil.JoinResult.SPILL && !bigTableRowSpilled) {
                    // For n-way join, only spill big table rows once
                    spillBigTableRow(mapJoinTables[pos], row);
                    bigTableRowSpilled = true;
                }
            }
        }
        if (joinNeeded) {
            List<Object> value = getFilteredValue(alias, row);
            // Add the value to the ArrayList
            storage[alias].addRow(value);
            // generate the output records
            checkAndGenObject();
        }
        // done with the row
        storage[tag].clearRows();
        for (byte pos = 0; pos < order.length; pos++) {
            if (pos != tag) {
                storage[pos] = null;
            }
        }
    } catch (Exception e) {
        String msg = "Unexpected exception from " + this.getClass().getSimpleName() + " : " + e.getMessage();
        LOG.error(msg, e);
        throw new HiveException(msg, e);
    }
}
Also used : MapJoinKey(org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) KryoException(com.esotericsoftware.kryo.KryoException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) MapJoinRowContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer) ReusableGetAdaptor(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor)

Example 2 with MapJoinRowContainer

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer in project hive by apache.

the class HashTableSinkOperator method process.

/*
   * This operator only process small tables Read the key/value pairs Load them into hashtable
   */
@Override
public void process(Object row, int tag) throws HiveException {
    byte alias = (byte) tag;
    // compute keys and values as StandardObjects. Use non-optimized key (MR).
    Object[] currentKey = new Object[joinKeys[alias].size()];
    for (int keyIndex = 0; keyIndex < joinKeys[alias].size(); ++keyIndex) {
        currentKey[keyIndex] = joinKeys[alias].get(keyIndex).evaluate(row);
    }
    MapJoinKeyObject key = new MapJoinKeyObject();
    key.readFromRow(currentKey, joinKeysObjectInspectors[alias]);
    Object[] value = emptyObjectArray;
    if ((hasFilter(alias) && filterMaps[alias].length > 0) || joinValues[alias].size() > 0) {
        value = JoinUtil.computeMapJoinValues(row, joinValues[alias], joinValuesObjectInspectors[alias], joinFilters[alias], joinFilterObjectInspectors[alias], filterMaps == null ? null : filterMaps[alias]);
    }
    MapJoinPersistableTableContainer tableContainer = mapJoinTables[alias];
    MapJoinRowContainer rowContainer = tableContainer.get(key);
    if (rowContainer == null) {
        if (value.length != 0) {
            rowContainer = new MapJoinEagerRowContainer();
            rowContainer.addRow(value);
        } else {
            rowContainer = emptyRowContainer;
        }
        rowNumber++;
        if (rowNumber > hashTableScale && rowNumber % hashTableScale == 0) {
            memoryExhaustionHandler.checkMemoryStatus(tableContainer.size(), rowNumber);
        }
        tableContainer.put(key, rowContainer);
    } else if (rowContainer == emptyRowContainer) {
        rowContainer = rowContainer.copy();
        rowContainer.addRow(value);
        tableContainer.put(key, rowContainer);
    } else {
        rowContainer.addRow(value);
    }
}
Also used : MapJoinKeyObject(org.apache.hadoop.hive.ql.exec.persistence.MapJoinKeyObject) MapJoinPersistableTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinPersistableTableContainer) MapJoinEagerRowContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinEagerRowContainer) MapJoinKeyObject(org.apache.hadoop.hive.ql.exec.persistence.MapJoinKeyObject) MapJoinRowContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer)

Aggregations

MapJoinRowContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer)2 KryoException (com.esotericsoftware.kryo.KryoException)1 IOException (java.io.IOException)1 MapJoinEagerRowContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinEagerRowContainer)1 MapJoinKey (org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey)1 MapJoinKeyObject (org.apache.hadoop.hive.ql.exec.persistence.MapJoinKeyObject)1 MapJoinPersistableTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinPersistableTableContainer)1 ReusableGetAdaptor (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)1