Search in sources :

Example 1 with KeyValueReader

use of org.apache.tez.runtime.library.api.KeyValueReader in project hive by apache.

the class VectorMapJoinFastHashTableLoader method load.

@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
    Map<Integer, String> parentToInput = desc.getParentToInput();
    Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
    for (int pos = 0; pos < mapJoinTables.length; pos++) {
        if (pos == desc.getPosBigTable()) {
            continue;
        }
        String inputName = parentToInput.get(pos);
        LogicalInput input = tezContext.getInput(inputName);
        try {
            input.start();
            tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
        } catch (Exception e) {
            throw new HiveException(e);
        }
        try {
            KeyValueReader kvReader = (KeyValueReader) input.getReader();
            Long keyCountObj = parentKeyCounts.get(pos);
            long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();
            VectorMapJoinFastTableContainer vectorMapJoinFastTableContainer = new VectorMapJoinFastTableContainer(desc, hconf, keyCount);
            // No SerDes here.
            vectorMapJoinFastTableContainer.setSerde(null, null);
            while (kvReader.next()) {
                vectorMapJoinFastTableContainer.putRow((BytesWritable) kvReader.getCurrentKey(), (BytesWritable) kvReader.getCurrentValue());
            }
            vectorMapJoinFastTableContainer.seal();
            mapJoinTables[pos] = (MapJoinTableContainer) vectorMapJoinFastTableContainer;
        } catch (IOException e) {
            throw new HiveException(e);
        } catch (SerDeException e) {
            throw new HiveException(e);
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) IOException(java.io.IOException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LogicalInput(org.apache.tez.runtime.api.LogicalInput) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 2 with KeyValueReader

use of org.apache.tez.runtime.library.api.KeyValueReader in project hive by apache.

the class MapRecordProcessor method initializeMapRecordSources.

private void initializeMapRecordSources() throws Exception {
    // the +1 is for the main map operator itself
    int size = mergeMapOpList.size() + 1;
    sources = new MapRecordSource[size];
    position = mapOp.getConf().getTag();
    sources[position] = new MapRecordSource();
    KeyValueReader reader = null;
    if (mainWorkMultiMRInput != null) {
        reader = getKeyValueReader(mainWorkMultiMRInput.getKeyValueReaders(), mapOp);
    } else {
        reader = legacyMRInput.getReader();
    }
    sources[position].init(jconf, mapOp, reader);
    for (AbstractMapOperator mapOp : mergeMapOpList) {
        int tag = mapOp.getConf().getTag();
        sources[tag] = new MapRecordSource();
        String inputName = mapOp.getConf().getName();
        MultiMRInput multiMRInput = multiMRInputMap.get(inputName);
        Collection<KeyValueReader> kvReaders = multiMRInput.getKeyValueReaders();
        l4j.debug("There are " + kvReaders.size() + " key-value readers for input " + inputName);
        if (kvReaders.size() > 0) {
            reader = getKeyValueReader(kvReaders, mapOp);
            sources[tag].init(jconf, mapOp, reader);
        }
    }
    ((TezContext) MapredContext.get()).setRecordSources(sources);
}
Also used : AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) MultiMRInput(org.apache.tez.mapreduce.input.MultiMRInput)

Example 3 with KeyValueReader

use of org.apache.tez.runtime.library.api.KeyValueReader in project hive by apache.

the class DynamicValueRegistryTez method init.

@Override
public void init(RegistryConf conf) throws Exception {
    RegistryConfTez rct = (RegistryConfTez) conf;
    for (String inputSourceName : rct.baseWork.getInputSourceToRuntimeValuesInfo().keySet()) {
        LOG.info("Runtime value source: " + inputSourceName);
        LogicalInput runtimeValueInput = rct.inputs.get(inputSourceName);
        RuntimeValuesInfo runtimeValuesInfo = rct.baseWork.getInputSourceToRuntimeValuesInfo().get(inputSourceName);
        // Setup deserializer/obj inspectors for the incoming data source
        Deserializer deserializer = ReflectionUtils.newInstance(runtimeValuesInfo.getTableDesc().getDeserializerClass(), null);
        deserializer.initialize(rct.conf, runtimeValuesInfo.getTableDesc().getProperties());
        ObjectInspector inspector = deserializer.getObjectInspector();
        // Set up col expressions for the dynamic values using this input
        List<ExprNodeEvaluator> colExprEvaluators = new ArrayList<ExprNodeEvaluator>();
        for (ExprNodeDesc expr : runtimeValuesInfo.getColExprs()) {
            ExprNodeEvaluator exprEval = ExprNodeEvaluatorFactory.get(expr, null);
            exprEval.initialize(inspector);
            colExprEvaluators.add(exprEval);
        }
        runtimeValueInput.start();
        List<Input> inputList = new ArrayList<Input>();
        inputList.add(runtimeValueInput);
        rct.processorContext.waitForAllInputsReady(inputList);
        KeyValueReader kvReader = (KeyValueReader) runtimeValueInput.getReader();
        long rowCount = 0;
        while (kvReader.next()) {
            Object row = deserializer.deserialize((Writable) kvReader.getCurrentValue());
            rowCount++;
            for (int colIdx = 0; colIdx < colExprEvaluators.size(); ++colIdx) {
                // Read each expression and save it to the value registry
                ExprNodeEvaluator eval = colExprEvaluators.get(colIdx);
                Object val = eval.evaluate(row);
                setValue(runtimeValuesInfo.getDynamicValueIDs().get(colIdx), val);
            }
        }
        // For now, expecting a single row (min/max, aggregated bloom filter), or no rows
        if (rowCount == 0) {
            LOG.debug("No input rows from " + inputSourceName + ", filling dynamic values with nulls");
            for (int colIdx = 0; colIdx < colExprEvaluators.size(); ++colIdx) {
                ExprNodeEvaluator eval = colExprEvaluators.get(colIdx);
                setValue(runtimeValuesInfo.getDynamicValueIDs().get(colIdx), null);
            }
        } else if (rowCount > 1) {
            throw new IllegalStateException("Expected 0 or 1 rows from " + inputSourceName + ", got " + rowCount);
        }
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) ArrayList(java.util.ArrayList) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) RuntimeValuesInfo(org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo) LogicalInput(org.apache.tez.runtime.api.LogicalInput) Input(org.apache.tez.runtime.api.Input) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) LogicalInput(org.apache.tez.runtime.api.LogicalInput) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 4 with KeyValueReader

use of org.apache.tez.runtime.library.api.KeyValueReader in project hive by apache.

the class HashTableLoader method load.

@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
    Map<Integer, String> parentToInput = desc.getParentToInput();
    Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
    boolean isCrossProduct = false;
    List<ExprNodeDesc> joinExprs = desc.getKeys().values().iterator().next();
    if (joinExprs.size() == 0) {
        isCrossProduct = true;
    }
    boolean useOptimizedTables = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
    boolean useHybridGraceHashJoin = desc.isHybridHashJoin();
    boolean isFirstKey = true;
    // Get the total available memory from memory manager
    long totalMapJoinMemory = desc.getMemoryNeeded();
    LOG.info("Memory manager allocates " + totalMapJoinMemory + " bytes for the loading hashtable.");
    if (totalMapJoinMemory <= 0) {
        totalMapJoinMemory = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
    }
    long processMaxMemory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
    if (totalMapJoinMemory > processMaxMemory) {
        float hashtableMemoryUsage = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
        LOG.warn("totalMapJoinMemory value of " + totalMapJoinMemory + " is greater than the max memory size of " + processMaxMemory);
        // Don't want to attempt to grab more memory than we have available .. percentage is a bit arbitrary
        totalMapJoinMemory = (long) (processMaxMemory * hashtableMemoryUsage);
    }
    // Only applicable to n-way Hybrid Grace Hash Join
    HybridHashTableConf nwayConf = null;
    long totalSize = 0;
    // position of the biggest small table
    int biggest = 0;
    Map<Integer, Long> tableMemorySizes = null;
    if (useHybridGraceHashJoin && mapJoinTables.length > 2) {
        // Create a Conf for n-way HybridHashTableContainers
        nwayConf = new HybridHashTableConf();
        LOG.info("N-way join: " + (mapJoinTables.length - 1) + " small tables.");
        // Find the biggest small table; also calculate total data size of all small tables
        // the size of the biggest small table
        long maxSize = Long.MIN_VALUE;
        for (int pos = 0; pos < mapJoinTables.length; pos++) {
            if (pos == desc.getPosBigTable()) {
                continue;
            }
            long smallTableSize = desc.getParentDataSizes().get(pos);
            totalSize += smallTableSize;
            if (maxSize < smallTableSize) {
                maxSize = smallTableSize;
                biggest = pos;
            }
        }
        tableMemorySizes = divideHybridHashTableMemory(mapJoinTables, desc, totalSize, totalMapJoinMemory);
        // Using biggest small table, calculate number of partitions to create for each small table
        long memory = tableMemorySizes.get(biggest);
        int numPartitions = 0;
        try {
            numPartitions = HybridHashTableContainer.calcNumPartitions(memory, maxSize, HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE));
        } catch (IOException e) {
            throw new HiveException(e);
        }
        nwayConf.setNumberOfPartitions(numPartitions);
    }
    for (int pos = 0; pos < mapJoinTables.length; pos++) {
        if (pos == desc.getPosBigTable()) {
            continue;
        }
        String inputName = parentToInput.get(pos);
        LogicalInput input = tezContext.getInput(inputName);
        try {
            input.start();
            tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
        } catch (Exception e) {
            throw new HiveException(e);
        }
        try {
            KeyValueReader kvReader = (KeyValueReader) input.getReader();
            MapJoinObjectSerDeContext keyCtx = mapJoinTableSerdes[pos].getKeyContext(), valCtx = mapJoinTableSerdes[pos].getValueContext();
            if (useOptimizedTables) {
                ObjectInspector keyOi = keyCtx.getSerDe().getObjectInspector();
                if (!MapJoinBytesTableContainer.isSupportedKey(keyOi)) {
                    if (isFirstKey) {
                        useOptimizedTables = false;
                        LOG.info(describeOi("Not using optimized hash table. " + "Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
                    } else {
                        throw new HiveException(describeOi("Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
                    }
                }
            }
            isFirstKey = false;
            Long keyCountObj = parentKeyCounts.get(pos);
            long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();
            long memory = 0;
            if (useHybridGraceHashJoin) {
                if (mapJoinTables.length > 2) {
                    memory = tableMemorySizes.get(pos);
                } else {
                    // binary join
                    memory = totalMapJoinMemory;
                }
            }
            MapJoinTableContainer tableContainer;
            if (useOptimizedTables) {
                if (!useHybridGraceHashJoin || isCrossProduct) {
                    tableContainer = new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0);
                } else {
                    tableContainer = new HybridHashTableContainer(hconf, keyCount, memory, desc.getParentDataSizes().get(pos), nwayConf);
                }
            } else {
                tableContainer = new HashMapWrapper(hconf, keyCount);
            }
            LOG.info("Using tableContainer " + tableContainer.getClass().getSimpleName());
            tableContainer.setSerde(keyCtx, valCtx);
            while (kvReader.next()) {
                tableContainer.putRow((Writable) kvReader.getCurrentKey(), (Writable) kvReader.getCurrentValue());
            }
            tableContainer.seal();
            LOG.info("Finished loading hashtable using " + tableContainer.getClass() + ". Small table position: " + pos);
            mapJoinTables[pos] = tableContainer;
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) HybridHashTableConf(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) LogicalInput(org.apache.tez.runtime.api.LogicalInput) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) HybridHashTableContainer(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)

Example 5 with KeyValueReader

use of org.apache.tez.runtime.library.api.KeyValueReader in project hive by apache.

the class MapRecordProcessor method getKeyValueReader.

@SuppressWarnings("deprecation")
private KeyValueReader getKeyValueReader(Collection<KeyValueReader> keyValueReaders, AbstractMapOperator mapOp) throws Exception {
    List<KeyValueReader> kvReaderList = new ArrayList<KeyValueReader>(keyValueReaders);
    // this sets up the map operator contexts correctly
    mapOp.initializeContexts();
    Deserializer deserializer = mapOp.getCurrentDeserializer();
    // deserializer is null in case of VectorMapOperator
    KeyValueReader reader = new KeyValueInputMerger(kvReaderList, deserializer, new ObjectInspector[] { deserializer == null ? null : deserializer.getObjectInspector() }, mapOp.getConf().getSortCols());
    return reader;
}
Also used : Deserializer(org.apache.hadoop.hive.serde2.Deserializer) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) ArrayList(java.util.ArrayList) KeyValueInputMerger(org.apache.hadoop.hive.ql.exec.tez.tools.KeyValueInputMerger)

Aggregations

KeyValueReader (org.apache.tez.runtime.library.api.KeyValueReader)7 ArrayList (java.util.ArrayList)4 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)3 LogicalInput (org.apache.tez.runtime.api.LogicalInput)3 IOException (java.io.IOException)2 AbstractMapOperator (org.apache.hadoop.hive.ql.exec.AbstractMapOperator)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 Deserializer (org.apache.hadoop.hive.serde2.Deserializer)2 Configuration (org.apache.hadoop.conf.Configuration)1 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)1 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)1 ExprNodeEvaluator (org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator)1 HashTableDummyOperator (org.apache.hadoop.hive.ql.exec.HashTableDummyOperator)1 MapOperator (org.apache.hadoop.hive.ql.exec.MapOperator)1 Operator (org.apache.hadoop.hive.ql.exec.Operator)1 TezDummyStoreOperator (org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator)1 ExecMapperContext (org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext)1 HashMapWrapper (org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper)1