use of org.apache.hadoop.fs.Path in project hive by apache.
the class GenMRTableScan1 method handlePartialScanCommand.
/**
* handle partial scan command. It is composed of PartialScanTask followed by StatsTask .
* @param op
* @param ctx
* @param parseCtx
* @param currTask
* @param parseInfo
* @param statsWork
* @param statsTask
* @throws SemanticException
*/
private void handlePartialScanCommand(TableScanOperator op, GenMRProcContext ctx, ParseContext parseCtx, Task<? extends Serializable> currTask, StatsWork statsWork, Task<StatsWork> statsTask) throws SemanticException {
String aggregationKey = op.getConf().getStatsAggPrefix();
StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey);
List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(op, aggregationKeyBuffer);
aggregationKey = aggregationKeyBuffer.toString();
// scan work
PartialScanWork scanWork = new PartialScanWork(inputPaths);
scanWork.setMapperCannotSpanPartns(true);
scanWork.setAggKey(aggregationKey);
scanWork.setStatsTmpDir(op.getConf().getTmpStatsDir(), parseCtx.getConf());
// stats work
statsWork.setPartialScanAnalyzeCommand(true);
// partial scan task
DriverContext driverCxt = new DriverContext();
Task<PartialScanWork> psTask = TaskFactory.get(scanWork, parseCtx.getConf());
psTask.initialize(parseCtx.getQueryState(), null, driverCxt, op.getCompilationOpContext());
psTask.setWork(scanWork);
// task dependency
ctx.getRootTasks().remove(currTask);
ctx.getRootTasks().add(psTask);
psTask.addDependentTask(statsTask);
List<Task<? extends Serializable>> parentTasks = new ArrayList<Task<? extends Serializable>>();
parentTasks.add(psTask);
statsTask.setParentTasks(parentTasks);
}
use of org.apache.hadoop.fs.Path in project hive by apache.
the class HiveMetaStoreChecker method checkPartitionDirs.
private void checkPartitionDirs(final ExecutorService executor, final Path basePath, final Set<Path> result, final FileSystem fs, final int maxDepth) throws HiveException {
try {
Queue<Future<Path>> futures = new LinkedList<Future<Path>>();
ConcurrentLinkedQueue<PathDepthInfo> nextLevel = new ConcurrentLinkedQueue<>();
nextLevel.add(new PathDepthInfo(basePath, 0));
//not done right
while (!nextLevel.isEmpty()) {
ConcurrentLinkedQueue<PathDepthInfo> tempQueue = new ConcurrentLinkedQueue<>();
//process each level in parallel
while (!nextLevel.isEmpty()) {
futures.add(executor.submit(new PathDepthInfoCallable(nextLevel.poll(), maxDepth, fs, tempQueue)));
}
while (!futures.isEmpty()) {
Path p = futures.poll().get();
if (p != null) {
result.add(p);
}
}
//update the nextlevel with newly discovered sub-directories from the above
nextLevel = tempQueue;
}
} catch (InterruptedException | ExecutionException e) {
LOG.error(e.getMessage());
executor.shutdownNow();
throw new HiveException(e.getCause());
}
}
use of org.apache.hadoop.fs.Path in project hive by apache.
the class JsonMetaDataFormatter method makeTableStatusLocations.
private List<Path> makeTableStatusLocations(Table tbl, Hive db, Partition par) throws HiveException {
// output file system information
Path tblPath = tbl.getPath();
List<Path> locations = new ArrayList<Path>();
if (tbl.isPartitioned()) {
if (par == null) {
for (Partition curPart : db.getPartitions(tbl)) {
if (curPart.getLocation() != null) {
locations.add(new Path(curPart.getLocation()));
}
}
} else {
if (par.getLocation() != null) {
locations.add(new Path(par.getLocation()));
}
}
} else {
if (tblPath != null) {
locations.add(tblPath);
}
}
return locations;
}
use of org.apache.hadoop.fs.Path in project hive by apache.
the class SortMergeJoinTaskDispatcher method genSMBJoinWork.
// Convert the work in the SMB plan to a regular join
// Note that the operator tree is not fixed, only the path/alias mappings in the
// plan are fixed. The operator tree will still contain the SMBJoinOperator
private void genSMBJoinWork(MapWork currWork, SMBMapJoinOperator smbJoinOp) {
// Remove the paths which are not part of aliasToPartitionInfo
Map<String, PartitionDesc> aliasToPartitionInfo = currWork.getAliasToPartnInfo();
List<Path> removePaths = new ArrayList<>();
for (Map.Entry<Path, ArrayList<String>> entry : currWork.getPathToAliases().entrySet()) {
boolean keepPath = false;
for (String alias : entry.getValue()) {
if (aliasToPartitionInfo.containsKey(alias)) {
keepPath = true;
break;
}
}
// Remove if the path is not present
if (!keepPath) {
removePaths.add(entry.getKey());
}
}
List<String> removeAliases = new ArrayList<String>();
for (Path removePath : removePaths) {
removeAliases.addAll(currWork.getPathToAliases().get(removePath));
currWork.removePathToAlias(removePath);
currWork.removePathToPartitionInfo(removePath);
}
for (String alias : removeAliases) {
currWork.getAliasToPartnInfo().remove(alias);
currWork.getAliasToWork().remove(alias);
}
// Get the MapredLocalWork
MapredLocalWork localWork = smbJoinOp.getConf().getLocalWork();
for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : localWork.getAliasToWork().entrySet()) {
String alias = entry.getKey();
Operator<? extends OperatorDesc> op = entry.getValue();
FetchWork fetchWork = localWork.getAliasToFetchWork().get(alias);
// Add the entry in mapredwork
currWork.getAliasToWork().put(alias, op);
PartitionDesc partitionInfo = currWork.getAliasToPartnInfo().get(alias);
if (fetchWork.getTblDir() != null) {
currWork.mergeAliasedInput(alias, fetchWork.getTblDir(), partitionInfo);
} else {
for (Path pathDir : fetchWork.getPartDir()) {
currWork.mergeAliasedInput(alias, pathDir, partitionInfo);
}
}
}
// Remove the dummy store operator from the tree
for (Operator<? extends OperatorDesc> parentOp : smbJoinOp.getParentOperators()) {
if (parentOp instanceof DummyStoreOperator) {
Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0);
smbJoinOp.replaceParent(parentOp, grandParentOp);
grandParentOp.setChildOperators(parentOp.getChildOperators());
parentOp.setParentOperators(null);
parentOp.setParentOperators(null);
}
}
}
use of org.apache.hadoop.fs.Path in project hive by apache.
the class SemanticAnalyzer method fetchFilesNotInLocalFilesystem.
private String fetchFilesNotInLocalFilesystem(String cmd) {
SessionState ss = SessionState.get();
String progName = getScriptProgName(cmd);
if (!ResourceDownloader.isFileUri(progName)) {
String filePath = ss.add_resource(ResourceType.FILE, progName, true);
Path p = new Path(filePath);
String fileName = p.getName();
String scriptArgs = getScriptArgs(cmd);
String finalCmd = fileName + scriptArgs;
return finalCmd;
}
return cmd;
}
Aggregations