Search in sources :

Example 96 with LinkedHashMap

use of java.util.LinkedHashMap in project hive by apache.

the class Utilities method getFullDPSpecs.

/**
   * Construct a list of full partition spec from Dynamic Partition Context and the directory names
   * corresponding to these dynamic partitions.
   */
public static List<LinkedHashMap<String, String>> getFullDPSpecs(Configuration conf, DynamicPartitionCtx dpCtx) throws HiveException {
    try {
        Path loadPath = dpCtx.getRootPath();
        FileSystem fs = loadPath.getFileSystem(conf);
        int numDPCols = dpCtx.getNumDPCols();
        FileStatus[] status = HiveStatsUtils.getFileStatusRecurse(loadPath, numDPCols, fs);
        if (status.length == 0) {
            LOG.warn("No partition is generated by dynamic partitioning");
            return null;
        }
        // partial partition specification
        Map<String, String> partSpec = dpCtx.getPartSpec();
        // list of full partition specification
        List<LinkedHashMap<String, String>> fullPartSpecs = new ArrayList<LinkedHashMap<String, String>>();
        // and load the partition based on that
        for (int i = 0; i < status.length; ++i) {
            // get the dynamically created directory
            Path partPath = status[i].getPath();
            assert fs.getFileStatus(partPath).isDir() : "partitions " + partPath + " is not a directory !";
            // generate a full partition specification
            LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<String, String>(partSpec);
            Warehouse.makeSpecFromName(fullPartSpec, partPath);
            fullPartSpecs.add(fullPartSpec);
        }
        return fullPartSpecs;
    } catch (IOException e) {
        throw new HiveException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) LinkedHashMap(java.util.LinkedHashMap) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 97 with LinkedHashMap

use of java.util.LinkedHashMap in project hive by apache.

the class MapOperator method setChildren.

public void setChildren(Configuration hconf) throws Exception {
    List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
    Map<String, Configuration> tableNameToConf = cloneConfsForNestedColPruning(hconf);
    Map<TableDesc, StructObjectInspector> convertedOI = getConvertedOI(tableNameToConf);
    for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
        Path onefile = entry.getKey();
        List<String> aliases = entry.getValue();
        PartitionDesc partDesc = conf.getPathToPartitionInfo().get(onefile);
        TableDesc tableDesc = partDesc.getTableDesc();
        Configuration newConf = tableNameToConf.get(tableDesc.getTableName());
        for (String alias : aliases) {
            Operator<? extends OperatorDesc> op = conf.getAliasToWork().get(alias);
            if (isLogDebugEnabled) {
                LOG.debug("Adding alias " + alias + " to work list for file " + onefile);
            }
            Map<Operator<?>, MapOpCtx> contexts = opCtxMap.get(onefile.toString());
            if (contexts == null) {
                opCtxMap.put(onefile.toString(), contexts = new LinkedHashMap<Operator<?>, MapOpCtx>());
            }
            if (contexts.containsKey(op)) {
                continue;
            }
            MapOpCtx context = new MapOpCtx(alias, op, partDesc);
            StructObjectInspector tableRowOI = convertedOI.get(partDesc.getTableDesc());
            contexts.put(op, initObjectInspector(newConf, context, tableRowOI));
            if (children.contains(op) == false) {
                op.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(1));
                op.getParentOperators().add(this);
                children.add(op);
            }
        }
    }
    initOperatorContext(children);
    // we found all the operators that we are supposed to process.
    setChildOperators(children);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 98 with LinkedHashMap

use of java.util.LinkedHashMap in project hive by apache.

the class SparkMetricsUtils method collectMetrics.

static Map<String, Long> collectMetrics(Metrics allMetrics) {
    Map<String, Long> results = new LinkedHashMap<String, Long>();
    results.put(EXECUTOR_DESERIALIZE_TIME, allMetrics.executorDeserializeTime);
    results.put(EXECUTOR_RUN_TIME, allMetrics.executorRunTime);
    results.put(RESULT_SIZE, allMetrics.resultSize);
    results.put(JVM_GC_TIME, allMetrics.jvmGCTime);
    results.put(RESULT_SERIALIZATION_TIME, allMetrics.resultSerializationTime);
    results.put(MEMORY_BYTES_SPLIED, allMetrics.memoryBytesSpilled);
    results.put(DISK_BYTES_SPILLED, allMetrics.diskBytesSpilled);
    if (allMetrics.inputMetrics != null) {
        results.put(BYTES_READ, allMetrics.inputMetrics.bytesRead);
    }
    if (allMetrics.shuffleReadMetrics != null) {
        ShuffleReadMetrics shuffleReadMetrics = allMetrics.shuffleReadMetrics;
        long rbf = shuffleReadMetrics.remoteBlocksFetched;
        long lbf = shuffleReadMetrics.localBlocksFetched;
        results.put(REMOTE_BLOCKS_FETCHED, rbf);
        results.put(LOCAL_BLOCKS_FETCHED, lbf);
        results.put(TOTAL_BLOCKS_FETCHED, rbf + lbf);
        results.put(FETCH_WAIT_TIME, shuffleReadMetrics.fetchWaitTime);
        results.put(REMOTE_BYTES_READ, shuffleReadMetrics.remoteBytesRead);
    }
    if (allMetrics.shuffleWriteMetrics != null) {
        results.put(SHUFFLE_BYTES_WRITTEN, allMetrics.shuffleWriteMetrics.shuffleBytesWritten);
        results.put(SHUFFLE_WRITE_TIME, allMetrics.shuffleWriteMetrics.shuffleWriteTime);
    }
    return results;
}
Also used : ShuffleReadMetrics(org.apache.hive.spark.client.metrics.ShuffleReadMetrics) LinkedHashMap(java.util.LinkedHashMap)

Example 99 with LinkedHashMap

use of java.util.LinkedHashMap in project hive by apache.

the class AbstractBucketJoinProc method convertMapJoinToBucketMapJoin.

/*
   * Convert mapjoin to a bucketed mapjoin.
   * The operator tree is not changed, but the mapjoin descriptor in the big table is
   * enhanced to keep the big table bucket -> small table buckets mapping.
   */
protected void convertMapJoinToBucketMapJoin(MapJoinOperator mapJoinOp, BucketJoinProcCtx context) throws SemanticException {
    MapJoinDesc desc = mapJoinOp.getConf();
    Map<String, Map<String, List<String>>> aliasBucketFileNameMapping = new LinkedHashMap<String, Map<String, List<String>>>();
    Map<String, List<Integer>> tblAliasToNumberOfBucketsInEachPartition = context.getTblAliasToNumberOfBucketsInEachPartition();
    Map<String, List<List<String>>> tblAliasToBucketedFilePathsInEachPartition = context.getTblAliasToBucketedFilePathsInEachPartition();
    Map<Partition, List<String>> bigTblPartsToBucketFileNames = context.getBigTblPartsToBucketFileNames();
    Map<Partition, Integer> bigTblPartsToBucketNumber = context.getBigTblPartsToBucketNumber();
    List<String> joinAliases = context.getJoinAliases();
    String baseBigAlias = context.getBaseBigAlias();
    // sort bucket names for the big table
    for (List<String> partBucketNames : bigTblPartsToBucketFileNames.values()) {
        Collections.sort(partBucketNames);
    }
    // in the big table to bucket file names in small tables.
    for (int j = 0; j < joinAliases.size(); j++) {
        String alias = joinAliases.get(j);
        if (alias.equals(baseBigAlias)) {
            continue;
        }
        for (List<String> names : tblAliasToBucketedFilePathsInEachPartition.get(alias)) {
            Collections.sort(names);
        }
        List<Integer> smallTblBucketNums = tblAliasToNumberOfBucketsInEachPartition.get(alias);
        List<List<String>> smallTblFilesList = tblAliasToBucketedFilePathsInEachPartition.get(alias);
        Map<String, List<String>> mappingBigTableBucketFileNameToSmallTableBucketFileNames = new LinkedHashMap<String, List<String>>();
        aliasBucketFileNameMapping.put(alias, mappingBigTableBucketFileNameToSmallTableBucketFileNames);
        // for each bucket file in big table, get the corresponding bucket file
        // name in the small table.
        // more than 1 partition in the big table, do the mapping for each partition
        Iterator<Entry<Partition, List<String>>> bigTblPartToBucketNames = bigTblPartsToBucketFileNames.entrySet().iterator();
        Iterator<Entry<Partition, Integer>> bigTblPartToBucketNum = bigTblPartsToBucketNumber.entrySet().iterator();
        while (bigTblPartToBucketNames.hasNext()) {
            assert bigTblPartToBucketNum.hasNext();
            int bigTblBucketNum = bigTblPartToBucketNum.next().getValue();
            List<String> bigTblBucketNameList = bigTblPartToBucketNames.next().getValue();
            fillMappingBigTableBucketFileNameToSmallTableBucketFileNames(smallTblBucketNums, smallTblFilesList, mappingBigTableBucketFileNameToSmallTableBucketFileNames, bigTblBucketNum, bigTblBucketNameList, desc.getBigTableBucketNumMapping());
        }
    }
    desc.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
    desc.setBigTableAlias(baseBigAlias);
    boolean bigTablePartitioned = context.isBigTablePartitioned();
    if (bigTablePartitioned) {
        desc.setBigTablePartSpecToFileMapping(convert(bigTblPartsToBucketFileNames));
    }
    Map<Integer, Set<String>> posToAliasMap = mapJoinOp.getPosToAliasMap();
    Map<String, String> aliasToNewAliasMap = context.getAliasToNewAliasMap();
    if (aliasToNewAliasMap != null && posToAliasMap != null) {
        for (Map.Entry<String, String> entry : aliasToNewAliasMap.entrySet()) {
            for (Set<String> aliases : posToAliasMap.values()) {
                if (aliases.remove(entry.getKey())) {
                    aliases.add(entry.getValue());
                }
            }
        }
    }
    // successfully convert to bucket map join
    desc.setBucketMapJoin(true);
}
Also used : Set(java.util.Set) LinkedHashMap(java.util.LinkedHashMap) Entry(java.util.Map.Entry) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) List(java.util.List) Partition(org.apache.hadoop.hive.ql.metadata.Partition) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 100 with LinkedHashMap

use of java.util.LinkedHashMap in project hive by apache.

the class AnnotateReduceSinkOutputOperator method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    // 1. We apply the transformation
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("R1", "(" + ReduceSinkOperator.getOperatorName() + "%)"), new ReduceSinkOutputOperatorAnnotator());
    GraphWalker ogw = new DefaultGraphWalker(new DefaultRuleDispatcher(null, opRules, null));
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return pctx;
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) LinkedHashMap(java.util.LinkedHashMap)

Aggregations

LinkedHashMap (java.util.LinkedHashMap)1944 ArrayList (java.util.ArrayList)575 Map (java.util.Map)561 HashMap (java.util.HashMap)373 Test (org.junit.Test)275 List (java.util.List)255 IOException (java.io.IOException)122 HashSet (java.util.HashSet)91 Set (java.util.Set)79 File (java.io.File)76 LinkedHashSet (java.util.LinkedHashSet)68 TreeMap (java.util.TreeMap)68 Node (org.apache.hadoop.hive.ql.lib.Node)59 NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)58 Rule (org.apache.hadoop.hive.ql.lib.Rule)58 Date (java.util.Date)57 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)56 Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)55 Iterator (java.util.Iterator)54 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)54