Search in sources :

Example 21 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class Utilities method createEmptyBuckets.

/**
   * Check the existence of buckets according to bucket specification. Create empty buckets if
   * needed.
   *
   * @param hconf
   * @param paths A list of empty buckets to create
   * @param conf The definition of the FileSink.
   * @param reporter The mapreduce reporter object
   * @throws HiveException
   * @throws IOException
   */
private static void createEmptyBuckets(Configuration hconf, List<Path> paths, FileSinkDesc conf, Reporter reporter) throws HiveException, IOException {
    JobConf jc;
    if (hconf instanceof JobConf) {
        jc = new JobConf(hconf);
    } else {
        // test code path
        jc = new JobConf(hconf);
    }
    HiveOutputFormat<?, ?> hiveOutputFormat = null;
    Class<? extends Writable> outputClass = null;
    boolean isCompressed = conf.getCompressed();
    TableDesc tableInfo = conf.getTableInfo();
    try {
        Serializer serializer = (Serializer) tableInfo.getDeserializerClass().newInstance();
        serializer.initialize(null, tableInfo.getProperties());
        outputClass = serializer.getSerializedClass();
        hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, conf.getTableInfo());
    } catch (SerDeException e) {
        throw new HiveException(e);
    } catch (InstantiationException e) {
        throw new HiveException(e);
    } catch (IllegalAccessException e) {
        throw new HiveException(e);
    }
    for (Path path : paths) {
        RecordWriter writer = HiveFileFormatUtils.getRecordWriter(jc, hiveOutputFormat, outputClass, isCompressed, tableInfo.getProperties(), path, reporter);
        writer.close(false);
        LOG.info("created empty bucket for enforcing bucketing at " + path);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JobConf(org.apache.hadoop.mapred.JobConf) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 22 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class ReduceSinkOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    try {
        numRows = 0;
        cntr = 1;
        logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);
        statsMap.put(getCounterName(Counter.RECORDS_OUT_INTERMEDIATE, hconf), recordCounter);
        List<ExprNodeDesc> keys = conf.getKeyCols();
        if (isLogDebugEnabled) {
            LOG.debug("keys size is " + keys.size());
            for (ExprNodeDesc k : keys) {
                LOG.debug("Key exprNodeDesc " + k.getExprString());
            }
        }
        keyEval = new ExprNodeEvaluator[keys.size()];
        int i = 0;
        for (ExprNodeDesc e : keys) {
            if (e instanceof ExprNodeConstantDesc && (BUCKET_NUMBER_COL_NAME).equals(((ExprNodeConstantDesc) e).getValue())) {
                buckColIdxInKeyForSdpo = i;
            }
            keyEval[i++] = ExprNodeEvaluatorFactory.get(e);
        }
        numDistributionKeys = conf.getNumDistributionKeys();
        distinctColIndices = conf.getDistinctColumnIndices();
        numDistinctExprs = distinctColIndices.size();
        valueEval = new ExprNodeEvaluator[conf.getValueCols().size()];
        i = 0;
        for (ExprNodeDesc e : conf.getValueCols()) {
            valueEval[i++] = ExprNodeEvaluatorFactory.get(e);
        }
        partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
        i = 0;
        for (ExprNodeDesc e : conf.getPartitionCols()) {
            int index = ExprNodeDescUtils.indexOf(e, keys);
            partitionEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e) : keyEval[index];
        }
        if (conf.getBucketCols() != null && !conf.getBucketCols().isEmpty()) {
            bucketEval = new ExprNodeEvaluator[conf.getBucketCols().size()];
            i = 0;
            for (ExprNodeDesc e : conf.getBucketCols()) {
                int index = ExprNodeDescUtils.indexOf(e, keys);
                bucketEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e) : keyEval[index];
            }
            buckColIdxInKey = conf.getPartitionCols().size();
        }
        tag = conf.getTag();
        tagByte[0] = (byte) tag;
        skipTag = conf.getSkipTag();
        if (isLogInfoEnabled) {
            LOG.info("Using tag = " + tag);
        }
        TableDesc keyTableDesc = conf.getKeySerializeInfo();
        keySerializer = (Serializer) keyTableDesc.getDeserializerClass().newInstance();
        keySerializer.initialize(null, keyTableDesc.getProperties());
        keyIsText = keySerializer.getSerializedClass().equals(Text.class);
        TableDesc valueTableDesc = conf.getValueSerializeInfo();
        valueSerializer = (Serializer) valueTableDesc.getDeserializerClass().newInstance();
        valueSerializer.initialize(null, valueTableDesc.getProperties());
        int limit = conf.getTopN();
        float memUsage = conf.getTopNMemoryUsage();
        if (limit >= 0 && memUsage > 0) {
            reducerHash = conf.isPTFReduceSink() ? new PTFTopNHash() : new TopNHash();
            reducerHash.initialize(limit, memUsage, conf.isMapGroupBy(), this, conf, hconf);
        }
        useUniformHash = conf.getReducerTraits().contains(UNIFORM);
        firstRow = true;
    } catch (Exception e) {
        String msg = "Error initializing ReduceSinkOperator: " + e.getMessage();
        LOG.error(msg, e);
        throw new RuntimeException(e);
    }
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) Text(org.apache.hadoop.io.Text) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 23 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class SkewJoinHandler method initiliaze.

public void initiliaze(Configuration hconf) {
    this.hconf = hconf;
    JoinDesc desc = joinOp.getConf();
    skewKeyDefinition = desc.getSkewKeyDefinition();
    skewKeysTableObjectInspector = new HashMap<Byte, StructObjectInspector>(numAliases);
    tblDesc = desc.getSkewKeysValuesTables();
    tblSerializers = new HashMap<Byte, AbstractSerDe>(numAliases);
    bigKeysExistingMap = new HashMap<Byte, Boolean>(numAliases);
    taskId = Utilities.getTaskId(hconf);
    int[][] filterMap = desc.getFilterMap();
    for (int i = 0; i < numAliases; i++) {
        Byte alias = conf.getTagOrder()[i];
        List<ObjectInspector> skewTableKeyInspectors = new ArrayList<ObjectInspector>();
        StructObjectInspector soi = (StructObjectInspector) joinOp.inputObjInspectors[alias];
        StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
        List<? extends StructField> keyFields = ((StructObjectInspector) sf.getFieldObjectInspector()).getAllStructFieldRefs();
        int keyFieldSize = keyFields.size();
        for (int k = 0; k < keyFieldSize; k++) {
            skewTableKeyInspectors.add(keyFields.get(k).getFieldObjectInspector());
        }
        TableDesc joinKeyDesc = desc.getKeyTableDesc();
        List<String> keyColNames = Utilities.getColumnNames(joinKeyDesc.getProperties());
        StructObjectInspector structTblKeyInpector = ObjectInspectorFactory.getStandardStructObjectInspector(keyColNames, skewTableKeyInspectors);
        try {
            AbstractSerDe serializer = (AbstractSerDe) ReflectionUtils.newInstance(tblDesc.get(alias).getDeserializerClass(), null);
            SerDeUtils.initializeSerDe(serializer, null, tblDesc.get(alias).getProperties(), null);
            tblSerializers.put((byte) i, serializer);
        } catch (SerDeException e) {
            LOG.error("Skewjoin will be disabled due to " + e.getMessage(), e);
            joinOp.handleSkewJoin = false;
            break;
        }
        boolean hasFilter = filterMap != null && filterMap[i] != null;
        TableDesc valTblDesc = JoinUtil.getSpillTableDesc(alias, joinOp.spillTableDesc, conf, !hasFilter);
        List<String> valColNames = new ArrayList<String>();
        if (valTblDesc != null) {
            valColNames = Utilities.getColumnNames(valTblDesc.getProperties());
        }
        StructObjectInspector structTblValInpector = ObjectInspectorFactory.getStandardStructObjectInspector(valColNames, joinOp.joinValuesStandardObjectInspectors[i]);
        StructObjectInspector structTblInpector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays.asList(structTblValInpector, structTblKeyInpector));
        skewKeysTableObjectInspector.put((byte) i, structTblInpector);
    }
    // reset rowcontainer's serde, objectinspector, and tableDesc.
    for (int i = 0; i < numAliases; i++) {
        Byte alias = conf.getTagOrder()[i];
        RowContainer<ArrayList<Object>> rc = (RowContainer) joinOp.storage[i];
        if (rc != null) {
            rc.setSerDe(tblSerializers.get((byte) i), skewKeysTableObjectInspector.get((byte) i));
            rc.setTableDesc(tblDesc.get(alias));
        }
    }
}
Also used : RowContainer(org.apache.hadoop.hive.ql.exec.persistence.RowContainer) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ArrayList(java.util.ArrayList) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 24 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class MapOperator method setChildren.

public void setChildren(Configuration hconf) throws Exception {
    List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
    Map<String, Configuration> tableNameToConf = cloneConfsForNestedColPruning(hconf);
    Map<TableDesc, StructObjectInspector> convertedOI = getConvertedOI(tableNameToConf);
    for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
        Path onefile = entry.getKey();
        List<String> aliases = entry.getValue();
        PartitionDesc partDesc = conf.getPathToPartitionInfo().get(onefile);
        TableDesc tableDesc = partDesc.getTableDesc();
        Configuration newConf = tableNameToConf.get(tableDesc.getTableName());
        for (String alias : aliases) {
            Operator<? extends OperatorDesc> op = conf.getAliasToWork().get(alias);
            if (isLogDebugEnabled) {
                LOG.debug("Adding alias " + alias + " to work list for file " + onefile);
            }
            Map<Operator<?>, MapOpCtx> contexts = opCtxMap.get(onefile.toString());
            if (contexts == null) {
                opCtxMap.put(onefile.toString(), contexts = new LinkedHashMap<Operator<?>, MapOpCtx>());
            }
            if (contexts.containsKey(op)) {
                continue;
            }
            MapOpCtx context = new MapOpCtx(alias, op, partDesc);
            StructObjectInspector tableRowOI = convertedOI.get(partDesc.getTableDesc());
            contexts.put(op, initObjectInspector(newConf, context, tableRowOI));
            if (children.contains(op) == false) {
                op.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(1));
                op.getParentOperators().add(this);
                children.add(op);
            }
        }
    }
    initOperatorContext(children);
    // we found all the operators that we are supposed to process.
    setChildOperators(children);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 25 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class DynamicPartitionPruner method initialize.

private void initialize() throws SerDeException {
    this.clear();
    Map<String, SourceInfo> columnMap = new HashMap<String, SourceInfo>();
    // sources represent vertex names
    Set<String> sources = work.getEventSourceTableDescMap().keySet();
    sourcesWaitingForEvents.addAll(sources);
    for (String s : sources) {
        // Set to 0 to start with. This will be decremented for all columns for which events
        // are generated by this source - which is eventually used to determine number of expected
        // events for the source. #colums X #tasks
        numExpectedEventsPerSource.put(s, new MutableInt(0));
        numEventsSeenPerSource.put(s, new MutableInt(0));
        // Virtual relation generated by the reduce sync
        List<TableDesc> tables = work.getEventSourceTableDescMap().get(s);
        // Real column name - on which the operation is being performed
        List<String> columnNames = work.getEventSourceColumnNameMap().get(s);
        // Column type
        List<String> columnTypes = work.getEventSourceColumnTypeMap().get(s);
        // Expression for the operation. e.g. N^2 > 10
        List<ExprNodeDesc> partKeyExprs = work.getEventSourcePartKeyExprMap().get(s);
        // eventSourceTableDesc, eventSourceColumnName, evenSourcePartKeyExpr move in lock-step.
        // One entry is added to each at the same time
        Iterator<String> cit = columnNames.iterator();
        Iterator<String> typit = columnTypes.iterator();
        Iterator<ExprNodeDesc> pit = partKeyExprs.iterator();
        // A single source can process multiple columns, and will send an event for each of them.
        for (TableDesc t : tables) {
            numExpectedEventsPerSource.get(s).decrement();
            ++sourceInfoCount;
            String columnName = cit.next();
            String columnType = typit.next();
            ExprNodeDesc partKeyExpr = pit.next();
            SourceInfo si = createSourceInfo(t, partKeyExpr, columnName, columnType, jobConf);
            if (!sourceInfoMap.containsKey(s)) {
                sourceInfoMap.put(s, new ArrayList<SourceInfo>());
            }
            List<SourceInfo> sis = sourceInfoMap.get(s);
            sis.add(si);
            // the union of the values in that case.
            if (columnMap.containsKey(columnName)) {
                // All Sources are initialized up front. Events from different sources will end up getting added to the same list.
                // Pruning is disabled if either source sends in an event which causes pruning to be skipped
                si.values = columnMap.get(columnName).values;
                si.skipPruning = columnMap.get(columnName).skipPruning;
            }
            columnMap.put(columnName, si);
        }
    }
}
Also used : HashMap(java.util.HashMap) MutableInt(org.apache.commons.lang3.mutable.MutableInt) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)80 ArrayList (java.util.ArrayList)40 Path (org.apache.hadoop.fs.Path)33 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)27 HashMap (java.util.HashMap)24 LinkedHashMap (java.util.LinkedHashMap)21 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)21 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)19 Properties (java.util.Properties)16 Operator (org.apache.hadoop.hive.ql.exec.Operator)16 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)16 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)16 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)14 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)14 List (java.util.List)13 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)13 JobConf (org.apache.hadoop.mapred.JobConf)13 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)11 IOException (java.io.IOException)10 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)10