Search in sources :

Example 56 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class HashTableSinkOperator method initializeOp.

@Override
@SuppressWarnings("unchecked")
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    boolean isSilent = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVESESSIONSILENT);
    console = new LogHelper(LOG, isSilent);
    memoryExhaustionHandler = new MapJoinMemoryExhaustionHandler(console, conf.getHashtableMemoryUsage());
    emptyRowContainer.addRow(emptyObjectArray);
    // for small tables only; so get the big table position first
    posBigTableAlias = conf.getPosBigTable();
    order = conf.getTagOrder();
    // initialize some variables, which used to be initialized in CommonJoinOperator
    this.hconf = hconf;
    filterMaps = conf.getFilterMap();
    int tagLen = conf.getTagLength();
    // process join keys
    joinKeys = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), posBigTableAlias, hconf);
    joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, inputObjInspectors, posBigTableAlias, tagLen);
    // process join values
    joinValues = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), posBigTableAlias, hconf);
    joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, inputObjInspectors, posBigTableAlias, tagLen);
    // process join filters
    joinFilters = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(), posBigTableAlias, hconf);
    joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters, inputObjInspectors, posBigTableAlias, tagLen);
    if (!conf.isNoOuterJoin()) {
        for (Byte alias : order) {
            if (alias == posBigTableAlias || joinValues[alias] == null) {
                continue;
            }
            List<ObjectInspector> rcOIs = joinValuesObjectInspectors[alias];
            if (filterMaps != null && filterMaps[alias] != null) {
                // for each alias, add object inspector for filter tag as the last element
                rcOIs = new ArrayList<ObjectInspector>(rcOIs);
                rcOIs.add(PrimitiveObjectInspectorFactory.writableShortObjectInspector);
            }
        }
    }
    mapJoinTables = new MapJoinPersistableTableContainer[tagLen];
    mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen];
    hashTableScale = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVEHASHTABLESCALE);
    if (hashTableScale <= 0) {
        hashTableScale = 1;
    }
    try {
        TableDesc keyTableDesc = conf.getKeyTblDesc();
        AbstractSerDe keySerde = (AbstractSerDe) ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(keySerde, null, keyTableDesc.getProperties(), null);
        MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerde, false);
        for (Byte pos : order) {
            if (pos == posBigTableAlias) {
                continue;
            }
            mapJoinTables[pos] = new HashMapWrapper(hconf, -1);
            TableDesc valueTableDesc = conf.getValueTblFilteredDescs().get(pos);
            AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
            SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
            mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos)));
        }
    } catch (SerDeException e) {
        throw new HiveException(e);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) MapJoinMemoryExhaustionHandler(org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionHandler) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 57 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class FetchOperator method setupOutputObjectInspector.

private StructObjectInspector setupOutputObjectInspector() throws HiveException {
    TableDesc tableDesc = work.getTblDesc();
    try {
        tableSerDe = tableDesc.getDeserializer(job, true);
        tableOI = (StructObjectInspector) tableSerDe.getObjectInspector();
        if (!isPartitioned) {
            return getTableRowOI(tableOI);
        }
        partKeyOI = getPartitionKeyOI(tableDesc);
        PartitionDesc partDesc = new PartitionDesc(tableDesc, null);
        List<PartitionDesc> listParts = work.getPartDesc();
        // use T1's schema to get the ObjectInspector.
        if (listParts == null || listParts.isEmpty() || !needConversion(tableDesc, listParts)) {
            return getPartitionedRowOI(tableOI);
        }
        convertedOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(tableOI, tableOI, null, false);
        return getPartitionedRowOI(convertedOI);
    } catch (Exception e) {
        throw new HiveException("Failed with exception " + e.getMessage() + StringUtils.stringifyException(e));
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException)

Example 58 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class Vectorizer method canSpecializeReduceSink.

private boolean canSpecializeReduceSink(ReduceSinkDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorReduceSinkInfo vectorReduceSinkInfo) throws HiveException {
    // Allocate a VectorReduceSinkDesc initially with key type NONE so EXPLAIN can report this
    // operator was vectorized, but not native.  And, the conditions.
    VectorReduceSinkDesc vectorDesc = new VectorReduceSinkDesc();
    desc.setVectorDesc(vectorDesc);
    boolean isVectorizationReduceSinkNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED);
    String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
    boolean hasBuckets = desc.getBucketCols() != null && !desc.getBucketCols().isEmpty();
    boolean hasTopN = desc.getTopN() >= 0;
    boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM);
    boolean hasDistinctColumns = desc.getDistinctColumnIndices().size() > 0;
    TableDesc keyTableDesc = desc.getKeySerializeInfo();
    Class<? extends Deserializer> keySerializerClass = keyTableDesc.getDeserializerClass();
    boolean isKeyBinarySortable = (keySerializerClass == org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class);
    TableDesc valueTableDesc = desc.getValueSerializeInfo();
    Class<? extends Deserializer> valueDeserializerClass = valueTableDesc.getDeserializerClass();
    boolean isValueLazyBinary = (valueDeserializerClass == org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class);
    // Remember the condition variables for EXPLAIN regardless.
    vectorDesc.setIsVectorizationReduceSinkNativeEnabled(isVectorizationReduceSinkNativeEnabled);
    vectorDesc.setEngine(engine);
    vectorDesc.setHasBuckets(hasBuckets);
    vectorDesc.setHasTopN(hasTopN);
    vectorDesc.setUseUniformHash(useUniformHash);
    vectorDesc.setHasDistinctColumns(hasDistinctColumns);
    vectorDesc.setIsKeyBinarySortable(isKeyBinarySortable);
    vectorDesc.setIsValueLazyBinary(isValueLazyBinary);
    // Many restrictions.
    if (!isVectorizationReduceSinkNativeEnabled || !isTezOrSpark || hasBuckets || hasTopN || !useUniformHash || hasDistinctColumns || !isKeyBinarySortable || !isValueLazyBinary) {
        return false;
    }
    // We are doing work here we'd normally do in VectorGroupByCommonOperator's constructor.
    // So if we later decide not to specialize, we'll just waste any scratch columns allocated...
    List<ExprNodeDesc> keysDescs = desc.getKeyCols();
    VectorExpression[] allKeyExpressions = vContext.getVectorExpressions(keysDescs);
    // Since a key expression can be a calculation and the key will go into a scratch column,
    // we need the mapping and type information.
    int[] reduceSinkKeyColumnMap = new int[allKeyExpressions.length];
    TypeInfo[] reduceSinkKeyTypeInfos = new TypeInfo[allKeyExpressions.length];
    Type[] reduceSinkKeyColumnVectorTypes = new Type[allKeyExpressions.length];
    ArrayList<VectorExpression> groupByKeyExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] reduceSinkKeyExpressions;
    for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) {
        VectorExpression ve = allKeyExpressions[i];
        reduceSinkKeyColumnMap[i] = ve.getOutputColumn();
        reduceSinkKeyTypeInfos[i] = keysDescs.get(i).getTypeInfo();
        reduceSinkKeyColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkKeyTypeInfos[i]);
        if (!IdentityExpression.isColumnOnly(ve)) {
            groupByKeyExpressionsList.add(ve);
        }
    }
    if (groupByKeyExpressionsList.size() == 0) {
        reduceSinkKeyExpressions = null;
    } else {
        reduceSinkKeyExpressions = groupByKeyExpressionsList.toArray(new VectorExpression[0]);
    }
    ArrayList<ExprNodeDesc> valueDescs = desc.getValueCols();
    VectorExpression[] allValueExpressions = vContext.getVectorExpressions(valueDescs);
    int[] reduceSinkValueColumnMap = new int[valueDescs.size()];
    TypeInfo[] reduceSinkValueTypeInfos = new TypeInfo[valueDescs.size()];
    Type[] reduceSinkValueColumnVectorTypes = new Type[valueDescs.size()];
    ArrayList<VectorExpression> reduceSinkValueExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] reduceSinkValueExpressions;
    for (int i = 0; i < valueDescs.size(); ++i) {
        VectorExpression ve = allValueExpressions[i];
        reduceSinkValueColumnMap[i] = ve.getOutputColumn();
        reduceSinkValueTypeInfos[i] = valueDescs.get(i).getTypeInfo();
        reduceSinkValueColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkValueTypeInfos[i]);
        if (!IdentityExpression.isColumnOnly(ve)) {
            reduceSinkValueExpressionsList.add(ve);
        }
    }
    if (reduceSinkValueExpressionsList.size() == 0) {
        reduceSinkValueExpressions = null;
    } else {
        reduceSinkValueExpressions = reduceSinkValueExpressionsList.toArray(new VectorExpression[0]);
    }
    vectorReduceSinkInfo.setReduceSinkKeyColumnMap(reduceSinkKeyColumnMap);
    vectorReduceSinkInfo.setReduceSinkKeyTypeInfos(reduceSinkKeyTypeInfos);
    vectorReduceSinkInfo.setReduceSinkKeyColumnVectorTypes(reduceSinkKeyColumnVectorTypes);
    vectorReduceSinkInfo.setReduceSinkKeyExpressions(reduceSinkKeyExpressions);
    vectorReduceSinkInfo.setReduceSinkValueColumnMap(reduceSinkValueColumnMap);
    vectorReduceSinkInfo.setReduceSinkValueTypeInfos(reduceSinkValueTypeInfos);
    vectorReduceSinkInfo.setReduceSinkValueColumnVectorTypes(reduceSinkValueColumnVectorTypes);
    vectorReduceSinkInfo.setReduceSinkValueExpressions(reduceSinkValueExpressions);
    return true;
}
Also used : ArrayList(java.util.ArrayList) VectorReduceSinkDesc(org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc) LazyBinarySerDe(org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) InConstantType(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType) HashTableImplementationType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) VectorDeserializeType(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType) OperatorType(org.apache.hadoop.hive.ql.plan.api.OperatorType) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 59 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class TestAccumuloStorageHandler method testTableJobPropertiesCallsInputAndOutputMethods.

@Test
public void testTableJobPropertiesCallsInputAndOutputMethods() {
    AccumuloStorageHandler mockStorageHandler = Mockito.mock(AccumuloStorageHandler.class);
    TableDesc tableDesc = Mockito.mock(TableDesc.class);
    Map<String, String> jobProperties = new HashMap<String, String>();
    Mockito.doCallRealMethod().when(mockStorageHandler).configureTableJobProperties(tableDesc, jobProperties);
    // configureTableJobProperties shouldn't be getting called by Hive, but, if it somehow does,
    // we should just set all of the configurations for input and output.
    mockStorageHandler.configureTableJobProperties(tableDesc, jobProperties);
    Mockito.verify(mockStorageHandler).configureInputJobProperties(tableDesc, jobProperties);
    Mockito.verify(mockStorageHandler).configureOutputJobProperties(tableDesc, jobProperties);
}
Also used : HashMap(java.util.HashMap) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) Test(org.junit.Test)

Example 60 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class TestAccumuloStorageHandler method testEmptyIteratorPushdownValue.

@Test(expected = IllegalArgumentException.class)
public void testEmptyIteratorPushdownValue() {
    TableDesc tableDesc = Mockito.mock(TableDesc.class);
    Properties props = new Properties();
    Map<String, String> jobProperties = new HashMap<String, String>();
    props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:cq1,cf:cq2,cf:cq3");
    props.setProperty(AccumuloSerDeParameters.TABLE_NAME, "table");
    props.setProperty(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, "");
    Mockito.when(tableDesc.getProperties()).thenReturn(props);
    storageHandler.configureInputJobProperties(tableDesc, jobProperties);
}
Also used : HashMap(java.util.HashMap) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) Properties(java.util.Properties) Test(org.junit.Test)

Aggregations

TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)93 ArrayList (java.util.ArrayList)47 Path (org.apache.hadoop.fs.Path)34 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)29 HashMap (java.util.HashMap)26 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)26 LinkedHashMap (java.util.LinkedHashMap)23 Properties (java.util.Properties)19 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)19 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)18 Operator (org.apache.hadoop.hive.ql.exec.Operator)16 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)16 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)16 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)16 JobConf (org.apache.hadoop.mapred.JobConf)15 List (java.util.List)14 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)14 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)14 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)11 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)11