Search in sources :

Example 76 with Path

use of org.apache.hadoop.fs.Path in project hive by apache.

the class GenMRTableScan1 method handlePartialScanCommand.

/**
   * handle partial scan command. It is composed of PartialScanTask followed by StatsTask .
   * @param op
   * @param ctx
   * @param parseCtx
   * @param currTask
   * @param parseInfo
   * @param statsWork
   * @param statsTask
   * @throws SemanticException
   */
private void handlePartialScanCommand(TableScanOperator op, GenMRProcContext ctx, ParseContext parseCtx, Task<? extends Serializable> currTask, StatsWork statsWork, Task<StatsWork> statsTask) throws SemanticException {
    String aggregationKey = op.getConf().getStatsAggPrefix();
    StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey);
    List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(op, aggregationKeyBuffer);
    aggregationKey = aggregationKeyBuffer.toString();
    // scan work
    PartialScanWork scanWork = new PartialScanWork(inputPaths);
    scanWork.setMapperCannotSpanPartns(true);
    scanWork.setAggKey(aggregationKey);
    scanWork.setStatsTmpDir(op.getConf().getTmpStatsDir(), parseCtx.getConf());
    // stats work
    statsWork.setPartialScanAnalyzeCommand(true);
    // partial scan task
    DriverContext driverCxt = new DriverContext();
    Task<PartialScanWork> psTask = TaskFactory.get(scanWork, parseCtx.getConf());
    psTask.initialize(parseCtx.getQueryState(), null, driverCxt, op.getCompilationOpContext());
    psTask.setWork(scanWork);
    // task dependency
    ctx.getRootTasks().remove(currTask);
    ctx.getRootTasks().add(psTask);
    psTask.addDependentTask(statsTask);
    List<Task<? extends Serializable>> parentTasks = new ArrayList<Task<? extends Serializable>>();
    parentTasks.add(psTask);
    statsTask.setParentTasks(parentTasks);
}
Also used : Path(org.apache.hadoop.fs.Path) DriverContext(org.apache.hadoop.hive.ql.DriverContext) Task(org.apache.hadoop.hive.ql.exec.Task) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) Serializable(java.io.Serializable) PartialScanWork(org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork) ArrayList(java.util.ArrayList)

Example 77 with Path

use of org.apache.hadoop.fs.Path in project hive by apache.

the class HiveMetaStoreChecker method checkPartitionDirs.

private void checkPartitionDirs(final ExecutorService executor, final Path basePath, final Set<Path> result, final FileSystem fs, final int maxDepth) throws HiveException {
    try {
        Queue<Future<Path>> futures = new LinkedList<Future<Path>>();
        ConcurrentLinkedQueue<PathDepthInfo> nextLevel = new ConcurrentLinkedQueue<>();
        nextLevel.add(new PathDepthInfo(basePath, 0));
        //not done right
        while (!nextLevel.isEmpty()) {
            ConcurrentLinkedQueue<PathDepthInfo> tempQueue = new ConcurrentLinkedQueue<>();
            //process each level in parallel
            while (!nextLevel.isEmpty()) {
                futures.add(executor.submit(new PathDepthInfoCallable(nextLevel.poll(), maxDepth, fs, tempQueue)));
            }
            while (!futures.isEmpty()) {
                Path p = futures.poll().get();
                if (p != null) {
                    result.add(p);
                }
            }
            //update the nextlevel with newly discovered sub-directories from the above
            nextLevel = tempQueue;
        }
    } catch (InterruptedException | ExecutionException e) {
        LOG.error(e.getMessage());
        executor.shutdownNow();
        throw new HiveException(e.getCause());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Future(java.util.concurrent.Future) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) ExecutionException(java.util.concurrent.ExecutionException) LinkedList(java.util.LinkedList)

Example 78 with Path

use of org.apache.hadoop.fs.Path in project hive by apache.

the class JsonMetaDataFormatter method makeTableStatusLocations.

private List<Path> makeTableStatusLocations(Table tbl, Hive db, Partition par) throws HiveException {
    // output file system information
    Path tblPath = tbl.getPath();
    List<Path> locations = new ArrayList<Path>();
    if (tbl.isPartitioned()) {
        if (par == null) {
            for (Partition curPart : db.getPartitions(tbl)) {
                if (curPart.getLocation() != null) {
                    locations.add(new Path(curPart.getLocation()));
                }
            }
        } else {
            if (par.getLocation() != null) {
                locations.add(new Path(par.getLocation()));
            }
        }
    } else {
        if (tblPath != null) {
            locations.add(tblPath);
        }
    }
    return locations;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) ArrayList(java.util.ArrayList)

Example 79 with Path

use of org.apache.hadoop.fs.Path in project hive by apache.

the class SortMergeJoinTaskDispatcher method genSMBJoinWork.

// Convert the work in the SMB plan to a regular join
// Note that the operator tree is not fixed, only the path/alias mappings in the
// plan are fixed. The operator tree will still contain the SMBJoinOperator
private void genSMBJoinWork(MapWork currWork, SMBMapJoinOperator smbJoinOp) {
    // Remove the paths which are not part of aliasToPartitionInfo
    Map<String, PartitionDesc> aliasToPartitionInfo = currWork.getAliasToPartnInfo();
    List<Path> removePaths = new ArrayList<>();
    for (Map.Entry<Path, ArrayList<String>> entry : currWork.getPathToAliases().entrySet()) {
        boolean keepPath = false;
        for (String alias : entry.getValue()) {
            if (aliasToPartitionInfo.containsKey(alias)) {
                keepPath = true;
                break;
            }
        }
        // Remove if the path is not present
        if (!keepPath) {
            removePaths.add(entry.getKey());
        }
    }
    List<String> removeAliases = new ArrayList<String>();
    for (Path removePath : removePaths) {
        removeAliases.addAll(currWork.getPathToAliases().get(removePath));
        currWork.removePathToAlias(removePath);
        currWork.removePathToPartitionInfo(removePath);
    }
    for (String alias : removeAliases) {
        currWork.getAliasToPartnInfo().remove(alias);
        currWork.getAliasToWork().remove(alias);
    }
    // Get the MapredLocalWork
    MapredLocalWork localWork = smbJoinOp.getConf().getLocalWork();
    for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : localWork.getAliasToWork().entrySet()) {
        String alias = entry.getKey();
        Operator<? extends OperatorDesc> op = entry.getValue();
        FetchWork fetchWork = localWork.getAliasToFetchWork().get(alias);
        // Add the entry in mapredwork
        currWork.getAliasToWork().put(alias, op);
        PartitionDesc partitionInfo = currWork.getAliasToPartnInfo().get(alias);
        if (fetchWork.getTblDir() != null) {
            currWork.mergeAliasedInput(alias, fetchWork.getTblDir(), partitionInfo);
        } else {
            for (Path pathDir : fetchWork.getPartDir()) {
                currWork.mergeAliasedInput(alias, pathDir, partitionInfo);
            }
        }
    }
    // Remove the dummy store operator from the tree
    for (Operator<? extends OperatorDesc> parentOp : smbJoinOp.getParentOperators()) {
        if (parentOp instanceof DummyStoreOperator) {
            Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0);
            smbJoinOp.replaceParent(parentOp, grandParentOp);
            grandParentOp.setChildOperators(parentOp.getChildOperators());
            parentOp.setParentOperators(null);
            parentOp.setParentOperators(null);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) ArrayList(java.util.ArrayList) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 80 with Path

use of org.apache.hadoop.fs.Path in project hive by apache.

the class SemanticAnalyzer method fetchFilesNotInLocalFilesystem.

private String fetchFilesNotInLocalFilesystem(String cmd) {
    SessionState ss = SessionState.get();
    String progName = getScriptProgName(cmd);
    if (!ResourceDownloader.isFileUri(progName)) {
        String filePath = ss.add_resource(ResourceType.FILE, progName, true);
        Path p = new Path(filePath);
        String fileName = p.getName();
        String scriptArgs = getScriptArgs(cmd);
        String finalCmd = fileName + scriptArgs;
        return finalCmd;
    }
    return cmd;
}
Also used : Path(org.apache.hadoop.fs.Path) SessionState(org.apache.hadoop.hive.ql.session.SessionState)

Aggregations

Path (org.apache.hadoop.fs.Path)11752 Test (org.junit.Test)4193 FileSystem (org.apache.hadoop.fs.FileSystem)3587 IOException (java.io.IOException)2631 Configuration (org.apache.hadoop.conf.Configuration)2621 FileStatus (org.apache.hadoop.fs.FileStatus)1568 ArrayList (java.util.ArrayList)1145 File (java.io.File)987 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)924 HashMap (java.util.HashMap)570 Job (org.apache.hadoop.mapreduce.Job)492 JobConf (org.apache.hadoop.mapred.JobConf)477 URI (java.net.URI)465 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)462 FileNotFoundException (java.io.FileNotFoundException)441 FsPermission (org.apache.hadoop.fs.permission.FsPermission)375 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)362 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)355 Map (java.util.Map)326 List (java.util.List)316