Examples with PathFilter - org.apache.hadoop.fs.PathFilter

Example 26 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class SnapshotManifestV2 method loadRegionManifests.

static List<SnapshotRegionManifest> loadRegionManifests(final Configuration conf, final Executor executor, final FileSystem fs, final Path snapshotDir, final SnapshotDescription desc, final int manifestSizeLimit) throws IOException {
    FileStatus[] manifestFiles = FSUtils.listStatus(fs, snapshotDir, new PathFilter() {

        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(SNAPSHOT_MANIFEST_PREFIX);
        }
    });
    if (manifestFiles == null || manifestFiles.length == 0)
        return null;
    final ExecutorCompletionService<SnapshotRegionManifest> completionService = new ExecutorCompletionService<>(executor);
    for (final FileStatus st : manifestFiles) {
        completionService.submit(new Callable<SnapshotRegionManifest>() {

            @Override
            public SnapshotRegionManifest call() throws IOException {
                FSDataInputStream stream = fs.open(st.getPath());
                CodedInputStream cin = CodedInputStream.newInstance(stream);
                cin.setSizeLimit(manifestSizeLimit);
                try {
                    return SnapshotRegionManifest.parseFrom(cin);
                } finally {
                    stream.close();
                }
            }
        });
    }
    ArrayList<SnapshotRegionManifest> regionsManifest = new ArrayList<>(manifestFiles.length);
    try {
        for (int i = 0; i < manifestFiles.length; ++i) {
            regionsManifest.add(completionService.take().get());
        }
    } catch (InterruptedException e) {
        throw new InterruptedIOException(e.getMessage());
    } catch (ExecutionException e) {
        Throwable t = e.getCause();
        if (t instanceof InvalidProtocolBufferException) {
            throw (InvalidProtocolBufferException) t;
        } else {
            IOException ex = new IOException("ExecutionException");
            ex.initCause(e.getCause());
            throw ex;
        }
    }
    return regionsManifest;
}

Also used : Path(org.apache.hadoop.fs.Path) InterruptedIOException(java.io.InterruptedIOException) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) CodedInputStream(org.apache.hadoop.hbase.shaded.com.google.protobuf.CodedInputStream) ArrayList(java.util.ArrayList) InvalidProtocolBufferException(org.apache.hadoop.hbase.shaded.com.google.protobuf.InvalidProtocolBufferException) SnapshotRegionManifest(org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ExecutionException(java.util.concurrent.ExecutionException)

Example 27 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class FSUtils method getRegionLocalityMappingFromFS.

/**
   * This function is to scan the root path of the file system to get either the
   * mapping between the region name and its best locality region server or the
   * degree of locality of each region on each of the servers having at least
   * one block of that region. The output map parameters are both optional.
   *
   * @param conf
   *          the configuration to use
   * @param desiredTable
   *          the table you wish to scan locality for
   * @param threadPoolSize
   *          the thread pool size to use
   * @param regionToBestLocalityRSMapping
   *          the map into which to put the best locality mapping or null
   * @param regionDegreeLocalityMapping
   *          the map into which to put the locality degree mapping or null,
   *          must be a thread-safe implementation
   * @throws IOException
   *           in case of file system errors or interrupts
   */
private static void getRegionLocalityMappingFromFS(final Configuration conf, final String desiredTable, int threadPoolSize, Map<String, String> regionToBestLocalityRSMapping, Map<String, Map<String, Float>> regionDegreeLocalityMapping) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path rootPath = FSUtils.getRootDir(conf);
    long startTime = EnvironmentEdgeManager.currentTime();
    Path queryPath;
    // The table files are in ${hbase.rootdir}/data/<namespace>/<table>/*
    if (null == desiredTable) {
        queryPath = new Path(new Path(rootPath, HConstants.BASE_NAMESPACE_DIR).toString() + "/*/*/*/");
    } else {
        queryPath = new Path(FSUtils.getTableDir(rootPath, TableName.valueOf(desiredTable)).toString() + "/*/");
    }
    // reject all paths that are not appropriate
    PathFilter pathFilter = new PathFilter() {

        @Override
        public boolean accept(Path path) {
            // this is the region name; it may get some noise data
            if (null == path) {
                return false;
            }
            // no parent?
            Path parent = path.getParent();
            if (null == parent) {
                return false;
            }
            String regionName = path.getName();
            if (null == regionName) {
                return false;
            }
            if (!regionName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
                return false;
            }
            return true;
        }
    };
    FileStatus[] statusList = fs.globStatus(queryPath, pathFilter);
    if (null == statusList) {
        return;
    } else {
        LOG.debug("Query Path: " + queryPath + " ; # list of files: " + statusList.length);
    }
    // lower the number of threads in case we have very few expected regions
    threadPoolSize = Math.min(threadPoolSize, statusList.length);
    // run in multiple threads
    ThreadPoolExecutor tpe = new ThreadPoolExecutor(threadPoolSize, threadPoolSize, 60, TimeUnit.SECONDS, new ArrayBlockingQueue<>(statusList.length));
    try {
        // ignore all file status items that are not of interest
        for (FileStatus regionStatus : statusList) {
            if (null == regionStatus) {
                continue;
            }
            if (!regionStatus.isDirectory()) {
                continue;
            }
            Path regionPath = regionStatus.getPath();
            if (null == regionPath) {
                continue;
            }
            tpe.execute(new FSRegionScanner(fs, regionPath, regionToBestLocalityRSMapping, regionDegreeLocalityMapping));
        }
    } finally {
        tpe.shutdown();
        int threadWakeFrequency = conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 60 * 1000);
        try {
            // exceptions in the execution of the threads
            while (!tpe.awaitTermination(threadWakeFrequency, TimeUnit.MILLISECONDS)) {
                // printing out rough estimate, so as to not introduce
                // AtomicInteger
                LOG.info("Locality checking is underway: { Scanned Regions : " + tpe.getCompletedTaskCount() + "/" + tpe.getTaskCount() + " }");
            }
        } catch (InterruptedException e) {
            throw (InterruptedIOException) new InterruptedIOException().initCause(e);
        }
    }
    long overhead = EnvironmentEdgeManager.currentTime() - startTime;
    String overheadMsg = "Scan DFS for locality info takes " + overhead + " ms";
    LOG.info(overheadMsg);
}

Also used : Path(org.apache.hadoop.fs.Path) InterruptedIOException(java.io.InterruptedIOException) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) HFileSystem(org.apache.hadoop.hbase.fs.HFileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor)

Example 28 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class AbstractTestWALReplay method testSequentialEditLogSeqNum.

@Test
public // the following test is for HBASE-6065
void testSequentialEditLogSeqNum() throws IOException {
    final TableName tableName = TableName.valueOf(currentTest.getMethodName());
    final HRegionInfo hri = createBasic3FamilyHRegionInfo(tableName);
    final Path basedir = FSUtils.getTableDir(this.hbaseRootDir, tableName);
    deleteDir(basedir);
    final byte[] rowName = tableName.getName();
    final int countPerFamily = 10;
    final HTableDescriptor htd = createBasic1FamilyHTD(tableName);
    // Mock the WAL
    MockWAL wal = createMockWAL();
    HRegion region = HRegion.openHRegion(this.conf, this.fs, hbaseRootDir, hri, htd, wal);
    for (HColumnDescriptor hcd : htd.getFamilies()) {
        addRegionEdits(rowName, hcd.getName(), countPerFamily, this.ee, region, "x");
    }
    // Let us flush the region
    // But this time completeflushcache is not yet done
    region.flush(true);
    for (HColumnDescriptor hcd : htd.getFamilies()) {
        addRegionEdits(rowName, hcd.getName(), 5, this.ee, region, "x");
    }
    long lastestSeqNumber = region.getReadPoint(null);
    // get the current seq no
    wal.doCompleteCacheFlush = true;
    // allow complete cache flush with the previous seq number got after first
    // set of edits.
    wal.completeCacheFlush(hri.getEncodedNameAsBytes());
    wal.shutdown();
    FileStatus[] listStatus = wal.getFiles();
    assertNotNull(listStatus);
    assertTrue(listStatus.length > 0);
    WALSplitter.splitLogFile(hbaseRootDir, listStatus[0], this.fs, this.conf, null, null, null, mode, wals);
    FileStatus[] listStatus1 = this.fs.listStatus(new Path(FSUtils.getTableDir(hbaseRootDir, tableName), new Path(hri.getEncodedName(), "recovered.edits")), new PathFilter() {

        @Override
        public boolean accept(Path p) {
            if (WALSplitter.isSequenceIdFile(p)) {
                return false;
            }
            return true;
        }
    });
    int editCount = 0;
    for (FileStatus fileStatus : listStatus1) {
        editCount = Integer.parseInt(fileStatus.getPath().getName());
    }
    // The sequence number should be same
    assertEquals("The sequence number of the recoverd.edits and the current edit seq should be same", lastestSeqNumber, editCount);
}

Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) Test(org.junit.Test)

Example 29 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class HFile method getStoreFiles.

/**
   * Returns all HFiles belonging to the given region directory. Could return an
   * empty list.
   *
   * @param fs  The file system reference.
   * @param regionDir  The region directory to scan.
   * @return The list of files found.
   * @throws IOException When scanning the files fails.
   */
static List<Path> getStoreFiles(FileSystem fs, Path regionDir) throws IOException {
    List<Path> regionHFiles = new ArrayList<>();
    PathFilter dirFilter = new FSUtils.DirFilter(fs);
    FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
    for (FileStatus dir : familyDirs) {
        FileStatus[] files = fs.listStatus(dir.getPath());
        for (FileStatus file : files) {
            if (!file.isDirectory() && (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) && (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))) {
                regionHFiles.add(file.getPath());
            }
        }
    }
    return regionHFiles;
}

Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList)

Example 30 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hive by apache.

the class MapReduceCompiler method decideExecMode.

@Override
protected void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) throws SemanticException {
    // bypass for explain queries for now
    if (ctx.isExplainSkipExecution()) {
        return;
    }
    // user has told us to run in local mode or doesn't want auto-local mode
    if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
        return;
    }
    final Context lCtx = ctx;
    PathFilter p = new PathFilter() {

        @Override
        public boolean accept(Path file) {
            return !lCtx.isMRTmpFileURI(file.toUri().getPath());
        }
    };
    List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks);
    // map-reduce jobs will be run locally based on data size
    // first find out if any of the jobs needs to run non-locally
    boolean hasNonLocalJob = false;
    for (ExecDriver mrtask : mrtasks) {
        try {
            ContentSummary inputSummary = Utilities.getInputSummary(ctx, mrtask.getWork().getMapWork(), p);
            int numReducers = getNumberOfReducers(mrtask.getWork(), conf);
            long estimatedInput;
            if (globalLimitCtx != null && globalLimitCtx.isEnable()) {
                // If the global limit optimization is triggered, we will
                // estimate input data actually needed based on limit rows.
                // estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2)
                //
                long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
                estimatedInput = (globalLimitCtx.getGlobalOffset() + globalLimitCtx.getGlobalLimit()) * sizePerRow;
                long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE);
                long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
                estimatedInput = estimatedInput * (estimatedNumMap + 1);
            } else {
                estimatedInput = inputSummary.getLength();
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + "," + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: " + estimatedInput);
            }
            if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput, inputSummary.getFileCount()) != null) {
                hasNonLocalJob = true;
                break;
            } else {
                mrtask.setLocalMode(true);
            }
        } catch (IOException e) {
            throw new SemanticException(e);
        }
    }
    if (!hasNonLocalJob) {
        // Entire query can be run locally.
        // Save the current tracker value and restore it when done.
        ctx.setOriginalTracker(ShimLoader.getHadoopShims().getJobLauncherRpcAddress(conf));
        ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, "local");
        console.printInfo("Automatically selecting local only mode for query");
    }
}

Also used : Context(org.apache.hadoop.hive.ql.Context) PhysicalContext(org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext) GenMRProcContext(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext) Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) ContentSummary(org.apache.hadoop.fs.ContentSummary) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) IOException(java.io.IOException)

Aggregations

PathFilter (org.apache.hadoop.fs.PathFilter)43 Path (org.apache.hadoop.fs.Path)41 FileStatus (org.apache.hadoop.fs.FileStatus)37 FileSystem (org.apache.hadoop.fs.FileSystem)18 IOException (java.io.IOException)16 ArrayList (java.util.ArrayList)11 Test (org.junit.Test)8 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)5 InterruptedIOException (java.io.InterruptedIOException)4 Configuration (org.apache.hadoop.conf.Configuration)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Table (org.apache.hadoop.hbase.client.Table)3 HRegion (org.apache.hadoop.hbase.regionserver.HRegion)3 ZooKeeperWatcher (org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher)3 URI (java.net.URI)2 HashMap (java.util.HashMap)2 ExecutionException (java.util.concurrent.ExecutionException)2 Exchange (org.apache.camel.Exchange)2 Message (org.apache.camel.Message)2 DefaultMessage (org.apache.camel.impl.DefaultMessage)2