use of java.util.LinkedHashMap in project hive by apache.
the class Utilities method getFullDPSpecs.
/**
* Construct a list of full partition spec from Dynamic Partition Context and the directory names
* corresponding to these dynamic partitions.
*/
public static List<LinkedHashMap<String, String>> getFullDPSpecs(Configuration conf, DynamicPartitionCtx dpCtx) throws HiveException {
try {
Path loadPath = dpCtx.getRootPath();
FileSystem fs = loadPath.getFileSystem(conf);
int numDPCols = dpCtx.getNumDPCols();
FileStatus[] status = HiveStatsUtils.getFileStatusRecurse(loadPath, numDPCols, fs);
if (status.length == 0) {
LOG.warn("No partition is generated by dynamic partitioning");
return null;
}
// partial partition specification
Map<String, String> partSpec = dpCtx.getPartSpec();
// list of full partition specification
List<LinkedHashMap<String, String>> fullPartSpecs = new ArrayList<LinkedHashMap<String, String>>();
// and load the partition based on that
for (int i = 0; i < status.length; ++i) {
// get the dynamically created directory
Path partPath = status[i].getPath();
assert fs.getFileStatus(partPath).isDir() : "partitions " + partPath + " is not a directory !";
// generate a full partition specification
LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<String, String>(partSpec);
Warehouse.makeSpecFromName(fullPartSpec, partPath);
fullPartSpecs.add(fullPartSpec);
}
return fullPartSpecs;
} catch (IOException e) {
throw new HiveException(e);
}
}
use of java.util.LinkedHashMap in project hive by apache.
the class MapOperator method setChildren.
public void setChildren(Configuration hconf) throws Exception {
List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
Map<String, Configuration> tableNameToConf = cloneConfsForNestedColPruning(hconf);
Map<TableDesc, StructObjectInspector> convertedOI = getConvertedOI(tableNameToConf);
for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
Path onefile = entry.getKey();
List<String> aliases = entry.getValue();
PartitionDesc partDesc = conf.getPathToPartitionInfo().get(onefile);
TableDesc tableDesc = partDesc.getTableDesc();
Configuration newConf = tableNameToConf.get(tableDesc.getTableName());
for (String alias : aliases) {
Operator<? extends OperatorDesc> op = conf.getAliasToWork().get(alias);
if (isLogDebugEnabled) {
LOG.debug("Adding alias " + alias + " to work list for file " + onefile);
}
Map<Operator<?>, MapOpCtx> contexts = opCtxMap.get(onefile.toString());
if (contexts == null) {
opCtxMap.put(onefile.toString(), contexts = new LinkedHashMap<Operator<?>, MapOpCtx>());
}
if (contexts.containsKey(op)) {
continue;
}
MapOpCtx context = new MapOpCtx(alias, op, partDesc);
StructObjectInspector tableRowOI = convertedOI.get(partDesc.getTableDesc());
contexts.put(op, initObjectInspector(newConf, context, tableRowOI));
if (children.contains(op) == false) {
op.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(1));
op.getParentOperators().add(this);
children.add(op);
}
}
}
initOperatorContext(children);
// we found all the operators that we are supposed to process.
setChildOperators(children);
}
use of java.util.LinkedHashMap in project hive by apache.
the class SparkMetricsUtils method collectMetrics.
static Map<String, Long> collectMetrics(Metrics allMetrics) {
Map<String, Long> results = new LinkedHashMap<String, Long>();
results.put(EXECUTOR_DESERIALIZE_TIME, allMetrics.executorDeserializeTime);
results.put(EXECUTOR_RUN_TIME, allMetrics.executorRunTime);
results.put(RESULT_SIZE, allMetrics.resultSize);
results.put(JVM_GC_TIME, allMetrics.jvmGCTime);
results.put(RESULT_SERIALIZATION_TIME, allMetrics.resultSerializationTime);
results.put(MEMORY_BYTES_SPLIED, allMetrics.memoryBytesSpilled);
results.put(DISK_BYTES_SPILLED, allMetrics.diskBytesSpilled);
if (allMetrics.inputMetrics != null) {
results.put(BYTES_READ, allMetrics.inputMetrics.bytesRead);
}
if (allMetrics.shuffleReadMetrics != null) {
ShuffleReadMetrics shuffleReadMetrics = allMetrics.shuffleReadMetrics;
long rbf = shuffleReadMetrics.remoteBlocksFetched;
long lbf = shuffleReadMetrics.localBlocksFetched;
results.put(REMOTE_BLOCKS_FETCHED, rbf);
results.put(LOCAL_BLOCKS_FETCHED, lbf);
results.put(TOTAL_BLOCKS_FETCHED, rbf + lbf);
results.put(FETCH_WAIT_TIME, shuffleReadMetrics.fetchWaitTime);
results.put(REMOTE_BYTES_READ, shuffleReadMetrics.remoteBytesRead);
}
if (allMetrics.shuffleWriteMetrics != null) {
results.put(SHUFFLE_BYTES_WRITTEN, allMetrics.shuffleWriteMetrics.shuffleBytesWritten);
results.put(SHUFFLE_WRITE_TIME, allMetrics.shuffleWriteMetrics.shuffleWriteTime);
}
return results;
}
use of java.util.LinkedHashMap in project hive by apache.
the class AbstractBucketJoinProc method convertMapJoinToBucketMapJoin.
/*
* Convert mapjoin to a bucketed mapjoin.
* The operator tree is not changed, but the mapjoin descriptor in the big table is
* enhanced to keep the big table bucket -> small table buckets mapping.
*/
protected void convertMapJoinToBucketMapJoin(MapJoinOperator mapJoinOp, BucketJoinProcCtx context) throws SemanticException {
MapJoinDesc desc = mapJoinOp.getConf();
Map<String, Map<String, List<String>>> aliasBucketFileNameMapping = new LinkedHashMap<String, Map<String, List<String>>>();
Map<String, List<Integer>> tblAliasToNumberOfBucketsInEachPartition = context.getTblAliasToNumberOfBucketsInEachPartition();
Map<String, List<List<String>>> tblAliasToBucketedFilePathsInEachPartition = context.getTblAliasToBucketedFilePathsInEachPartition();
Map<Partition, List<String>> bigTblPartsToBucketFileNames = context.getBigTblPartsToBucketFileNames();
Map<Partition, Integer> bigTblPartsToBucketNumber = context.getBigTblPartsToBucketNumber();
List<String> joinAliases = context.getJoinAliases();
String baseBigAlias = context.getBaseBigAlias();
// sort bucket names for the big table
for (List<String> partBucketNames : bigTblPartsToBucketFileNames.values()) {
Collections.sort(partBucketNames);
}
// in the big table to bucket file names in small tables.
for (int j = 0; j < joinAliases.size(); j++) {
String alias = joinAliases.get(j);
if (alias.equals(baseBigAlias)) {
continue;
}
for (List<String> names : tblAliasToBucketedFilePathsInEachPartition.get(alias)) {
Collections.sort(names);
}
List<Integer> smallTblBucketNums = tblAliasToNumberOfBucketsInEachPartition.get(alias);
List<List<String>> smallTblFilesList = tblAliasToBucketedFilePathsInEachPartition.get(alias);
Map<String, List<String>> mappingBigTableBucketFileNameToSmallTableBucketFileNames = new LinkedHashMap<String, List<String>>();
aliasBucketFileNameMapping.put(alias, mappingBigTableBucketFileNameToSmallTableBucketFileNames);
// for each bucket file in big table, get the corresponding bucket file
// name in the small table.
// more than 1 partition in the big table, do the mapping for each partition
Iterator<Entry<Partition, List<String>>> bigTblPartToBucketNames = bigTblPartsToBucketFileNames.entrySet().iterator();
Iterator<Entry<Partition, Integer>> bigTblPartToBucketNum = bigTblPartsToBucketNumber.entrySet().iterator();
while (bigTblPartToBucketNames.hasNext()) {
assert bigTblPartToBucketNum.hasNext();
int bigTblBucketNum = bigTblPartToBucketNum.next().getValue();
List<String> bigTblBucketNameList = bigTblPartToBucketNames.next().getValue();
fillMappingBigTableBucketFileNameToSmallTableBucketFileNames(smallTblBucketNums, smallTblFilesList, mappingBigTableBucketFileNameToSmallTableBucketFileNames, bigTblBucketNum, bigTblBucketNameList, desc.getBigTableBucketNumMapping());
}
}
desc.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
desc.setBigTableAlias(baseBigAlias);
boolean bigTablePartitioned = context.isBigTablePartitioned();
if (bigTablePartitioned) {
desc.setBigTablePartSpecToFileMapping(convert(bigTblPartsToBucketFileNames));
}
Map<Integer, Set<String>> posToAliasMap = mapJoinOp.getPosToAliasMap();
Map<String, String> aliasToNewAliasMap = context.getAliasToNewAliasMap();
if (aliasToNewAliasMap != null && posToAliasMap != null) {
for (Map.Entry<String, String> entry : aliasToNewAliasMap.entrySet()) {
for (Set<String> aliases : posToAliasMap.values()) {
if (aliases.remove(entry.getKey())) {
aliases.add(entry.getValue());
}
}
}
}
// successfully convert to bucket map join
desc.setBucketMapJoin(true);
}
use of java.util.LinkedHashMap in project hive by apache.
the class AnnotateReduceSinkOutputOperator method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
// 1. We apply the transformation
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", "(" + ReduceSinkOperator.getOperatorName() + "%)"), new ReduceSinkOutputOperatorAnnotator());
GraphWalker ogw = new DefaultGraphWalker(new DefaultRuleDispatcher(null, opRules, null));
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
return pctx;
}
Aggregations