use of org.apache.hadoop.fs.PathFilter in project hbase by apache.
the class SnapshotManifestV2 method loadRegionManifests.
static List<SnapshotRegionManifest> loadRegionManifests(final Configuration conf, final Executor executor, final FileSystem fs, final Path snapshotDir, final SnapshotDescription desc, final int manifestSizeLimit) throws IOException {
FileStatus[] manifestFiles = FSUtils.listStatus(fs, snapshotDir, new PathFilter() {
@Override
public boolean accept(Path path) {
return path.getName().startsWith(SNAPSHOT_MANIFEST_PREFIX);
}
});
if (manifestFiles == null || manifestFiles.length == 0)
return null;
final ExecutorCompletionService<SnapshotRegionManifest> completionService = new ExecutorCompletionService<>(executor);
for (final FileStatus st : manifestFiles) {
completionService.submit(new Callable<SnapshotRegionManifest>() {
@Override
public SnapshotRegionManifest call() throws IOException {
FSDataInputStream stream = fs.open(st.getPath());
CodedInputStream cin = CodedInputStream.newInstance(stream);
cin.setSizeLimit(manifestSizeLimit);
try {
return SnapshotRegionManifest.parseFrom(cin);
} finally {
stream.close();
}
}
});
}
ArrayList<SnapshotRegionManifest> regionsManifest = new ArrayList<>(manifestFiles.length);
try {
for (int i = 0; i < manifestFiles.length; ++i) {
regionsManifest.add(completionService.take().get());
}
} catch (InterruptedException e) {
throw new InterruptedIOException(e.getMessage());
} catch (ExecutionException e) {
Throwable t = e.getCause();
if (t instanceof InvalidProtocolBufferException) {
throw (InvalidProtocolBufferException) t;
} else {
IOException ex = new IOException("ExecutionException");
ex.initCause(e.getCause());
throw ex;
}
}
return regionsManifest;
}
use of org.apache.hadoop.fs.PathFilter in project hbase by apache.
the class FSUtils method getRegionLocalityMappingFromFS.
/**
* This function is to scan the root path of the file system to get either the
* mapping between the region name and its best locality region server or the
* degree of locality of each region on each of the servers having at least
* one block of that region. The output map parameters are both optional.
*
* @param conf
* the configuration to use
* @param desiredTable
* the table you wish to scan locality for
* @param threadPoolSize
* the thread pool size to use
* @param regionToBestLocalityRSMapping
* the map into which to put the best locality mapping or null
* @param regionDegreeLocalityMapping
* the map into which to put the locality degree mapping or null,
* must be a thread-safe implementation
* @throws IOException
* in case of file system errors or interrupts
*/
private static void getRegionLocalityMappingFromFS(final Configuration conf, final String desiredTable, int threadPoolSize, Map<String, String> regionToBestLocalityRSMapping, Map<String, Map<String, Float>> regionDegreeLocalityMapping) throws IOException {
FileSystem fs = FileSystem.get(conf);
Path rootPath = FSUtils.getRootDir(conf);
long startTime = EnvironmentEdgeManager.currentTime();
Path queryPath;
// The table files are in ${hbase.rootdir}/data/<namespace>/<table>/*
if (null == desiredTable) {
queryPath = new Path(new Path(rootPath, HConstants.BASE_NAMESPACE_DIR).toString() + "/*/*/*/");
} else {
queryPath = new Path(FSUtils.getTableDir(rootPath, TableName.valueOf(desiredTable)).toString() + "/*/");
}
// reject all paths that are not appropriate
PathFilter pathFilter = new PathFilter() {
@Override
public boolean accept(Path path) {
// this is the region name; it may get some noise data
if (null == path) {
return false;
}
// no parent?
Path parent = path.getParent();
if (null == parent) {
return false;
}
String regionName = path.getName();
if (null == regionName) {
return false;
}
if (!regionName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
return false;
}
return true;
}
};
FileStatus[] statusList = fs.globStatus(queryPath, pathFilter);
if (null == statusList) {
return;
} else {
LOG.debug("Query Path: " + queryPath + " ; # list of files: " + statusList.length);
}
// lower the number of threads in case we have very few expected regions
threadPoolSize = Math.min(threadPoolSize, statusList.length);
// run in multiple threads
ThreadPoolExecutor tpe = new ThreadPoolExecutor(threadPoolSize, threadPoolSize, 60, TimeUnit.SECONDS, new ArrayBlockingQueue<>(statusList.length));
try {
// ignore all file status items that are not of interest
for (FileStatus regionStatus : statusList) {
if (null == regionStatus) {
continue;
}
if (!regionStatus.isDirectory()) {
continue;
}
Path regionPath = regionStatus.getPath();
if (null == regionPath) {
continue;
}
tpe.execute(new FSRegionScanner(fs, regionPath, regionToBestLocalityRSMapping, regionDegreeLocalityMapping));
}
} finally {
tpe.shutdown();
int threadWakeFrequency = conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 60 * 1000);
try {
// exceptions in the execution of the threads
while (!tpe.awaitTermination(threadWakeFrequency, TimeUnit.MILLISECONDS)) {
// printing out rough estimate, so as to not introduce
// AtomicInteger
LOG.info("Locality checking is underway: { Scanned Regions : " + tpe.getCompletedTaskCount() + "/" + tpe.getTaskCount() + " }");
}
} catch (InterruptedException e) {
throw (InterruptedIOException) new InterruptedIOException().initCause(e);
}
}
long overhead = EnvironmentEdgeManager.currentTime() - startTime;
String overheadMsg = "Scan DFS for locality info takes " + overhead + " ms";
LOG.info(overheadMsg);
}
use of org.apache.hadoop.fs.PathFilter in project hbase by apache.
the class AbstractTestWALReplay method testSequentialEditLogSeqNum.
@Test
public // the following test is for HBASE-6065
void testSequentialEditLogSeqNum() throws IOException {
final TableName tableName = TableName.valueOf(currentTest.getMethodName());
final HRegionInfo hri = createBasic3FamilyHRegionInfo(tableName);
final Path basedir = FSUtils.getTableDir(this.hbaseRootDir, tableName);
deleteDir(basedir);
final byte[] rowName = tableName.getName();
final int countPerFamily = 10;
final HTableDescriptor htd = createBasic1FamilyHTD(tableName);
// Mock the WAL
MockWAL wal = createMockWAL();
HRegion region = HRegion.openHRegion(this.conf, this.fs, hbaseRootDir, hri, htd, wal);
for (HColumnDescriptor hcd : htd.getFamilies()) {
addRegionEdits(rowName, hcd.getName(), countPerFamily, this.ee, region, "x");
}
// Let us flush the region
// But this time completeflushcache is not yet done
region.flush(true);
for (HColumnDescriptor hcd : htd.getFamilies()) {
addRegionEdits(rowName, hcd.getName(), 5, this.ee, region, "x");
}
long lastestSeqNumber = region.getReadPoint(null);
// get the current seq no
wal.doCompleteCacheFlush = true;
// allow complete cache flush with the previous seq number got after first
// set of edits.
wal.completeCacheFlush(hri.getEncodedNameAsBytes());
wal.shutdown();
FileStatus[] listStatus = wal.getFiles();
assertNotNull(listStatus);
assertTrue(listStatus.length > 0);
WALSplitter.splitLogFile(hbaseRootDir, listStatus[0], this.fs, this.conf, null, null, null, mode, wals);
FileStatus[] listStatus1 = this.fs.listStatus(new Path(FSUtils.getTableDir(hbaseRootDir, tableName), new Path(hri.getEncodedName(), "recovered.edits")), new PathFilter() {
@Override
public boolean accept(Path p) {
if (WALSplitter.isSequenceIdFile(p)) {
return false;
}
return true;
}
});
int editCount = 0;
for (FileStatus fileStatus : listStatus1) {
editCount = Integer.parseInt(fileStatus.getPath().getName());
}
// The sequence number should be same
assertEquals("The sequence number of the recoverd.edits and the current edit seq should be same", lastestSeqNumber, editCount);
}
use of org.apache.hadoop.fs.PathFilter in project hbase by apache.
the class HFile method getStoreFiles.
/**
* Returns all HFiles belonging to the given region directory. Could return an
* empty list.
*
* @param fs The file system reference.
* @param regionDir The region directory to scan.
* @return The list of files found.
* @throws IOException When scanning the files fails.
*/
static List<Path> getStoreFiles(FileSystem fs, Path regionDir) throws IOException {
List<Path> regionHFiles = new ArrayList<>();
PathFilter dirFilter = new FSUtils.DirFilter(fs);
FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
for (FileStatus dir : familyDirs) {
FileStatus[] files = fs.listStatus(dir.getPath());
for (FileStatus file : files) {
if (!file.isDirectory() && (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) && (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))) {
regionHFiles.add(file.getPath());
}
}
}
return regionHFiles;
}
use of org.apache.hadoop.fs.PathFilter in project hive by apache.
the class MapReduceCompiler method decideExecMode.
@Override
protected void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) throws SemanticException {
// bypass for explain queries for now
if (ctx.isExplainSkipExecution()) {
return;
}
// user has told us to run in local mode or doesn't want auto-local mode
if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
return;
}
final Context lCtx = ctx;
PathFilter p = new PathFilter() {
@Override
public boolean accept(Path file) {
return !lCtx.isMRTmpFileURI(file.toUri().getPath());
}
};
List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks);
// map-reduce jobs will be run locally based on data size
// first find out if any of the jobs needs to run non-locally
boolean hasNonLocalJob = false;
for (ExecDriver mrtask : mrtasks) {
try {
ContentSummary inputSummary = Utilities.getInputSummary(ctx, mrtask.getWork().getMapWork(), p);
int numReducers = getNumberOfReducers(mrtask.getWork(), conf);
long estimatedInput;
if (globalLimitCtx != null && globalLimitCtx.isEnable()) {
// If the global limit optimization is triggered, we will
// estimate input data actually needed based on limit rows.
// estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2)
//
long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
estimatedInput = (globalLimitCtx.getGlobalOffset() + globalLimitCtx.getGlobalLimit()) * sizePerRow;
long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE);
long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
estimatedInput = estimatedInput * (estimatedNumMap + 1);
} else {
estimatedInput = inputSummary.getLength();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + "," + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: " + estimatedInput);
}
if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput, inputSummary.getFileCount()) != null) {
hasNonLocalJob = true;
break;
} else {
mrtask.setLocalMode(true);
}
} catch (IOException e) {
throw new SemanticException(e);
}
}
if (!hasNonLocalJob) {
// Entire query can be run locally.
// Save the current tracker value and restore it when done.
ctx.setOriginalTracker(ShimLoader.getHadoopShims().getJobLauncherRpcAddress(conf));
ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, "local");
console.printInfo("Automatically selecting local only mode for query");
}
}
Aggregations