use of org.apache.hadoop.fs.PathFilter in project druid by druid-io.
the class HdfsFileTimestampVersionFinder method mostRecentInDir.
private URI mostRecentInDir(final Path dir, final Pattern pattern) throws IOException {
final PathFilter filter = new PathFilter() {
@Override
public boolean accept(Path path) {
return pattern == null || pattern.matcher(path.getName()).matches();
}
};
long modifiedTime = Long.MIN_VALUE;
URI mostRecentURI = null;
final FileSystem fs = dir.getFileSystem(config);
for (FileStatus status : fs.listStatus(dir, filter)) {
if (status.isFile()) {
final long thisModifiedTime = status.getModificationTime();
if (thisModifiedTime >= modifiedTime) {
modifiedTime = thisModifiedTime;
mostRecentURI = status.getPath().toUri();
}
}
}
return mostRecentURI;
}
use of org.apache.hadoop.fs.PathFilter in project hive by apache.
the class TestMmCompactorOnTez method testMmCompactionDb.
/**
* Make sure db is specified in compaction queries.
*/
private void testMmCompactionDb(CompactionType compactionType, String resultDirName) throws Exception {
String dbName = "myDb";
String tableName = "testMmCompactionDb";
// Create test table
TestDataProvider dataProvider = new TestDataProvider();
dataProvider.createDb(dbName);
dataProvider.createMmTable(dbName, tableName, false, false, "orc");
// Find the location of the table
IMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf);
Table table = metaStoreClient.getTable(dbName, tableName);
FileSystem fs = FileSystem.get(conf);
// Insert test data into test table
dataProvider.insertMmTestData(dbName, tableName);
// Get all data before compaction is run
List<String> expectedData = dataProvider.getAllData(dbName, tableName, false);
Collections.sort(expectedData);
// Run a compaction
CompactorTestUtil.runCompaction(conf, dbName, tableName, compactionType, true);
CompactorTestUtil.runCleaner(conf);
verifySuccessulTxn(1);
// Verify directories after compaction
PathFilter pathFilter = compactionType == CompactionType.MAJOR ? AcidUtils.baseFileFilter : AcidUtils.deltaFileFilter;
Assert.assertEquals("Result directories does not match after " + compactionType.name() + " compaction", Collections.singletonList(resultDirName), CompactorTestUtil.getBaseOrDeltaNames(fs, pathFilter, table, null));
List<String> actualData = dataProvider.getAllData(dbName, tableName, false);
Collections.sort(actualData);
Assert.assertEquals(expectedData, actualData);
}
use of org.apache.hadoop.fs.PathFilter in project hive by apache.
the class TestCrudCompactorOnTez method testCompactionDb.
/**
* Make sure db is specified in compaction queries.
*/
private void testCompactionDb(CompactionType compactionType, String resultDirName) throws Exception {
String dbName = "myDb";
String tableName = "testCompactionDb";
// Create test table
TestDataProvider dataProvider = new TestDataProvider();
dataProvider.createDb(dbName);
dataProvider.createFullAcidTable(dbName, tableName, false, false);
// Find the location of the table
IMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf);
Table table = metaStoreClient.getTable(dbName, tableName);
FileSystem fs = FileSystem.get(conf);
// Insert test data into test table
dataProvider.insertTestData(dbName, tableName);
// Get all data before compaction is run
List<String> expectedData = dataProvider.getAllData(dbName, tableName, false);
Collections.sort(expectedData);
// Run a compaction
CompactorTestUtil.runCompaction(conf, dbName, tableName, compactionType, true);
CompactorTestUtil.runCleaner(conf);
verifySuccessfulCompaction(1);
// Verify directories after compaction
PathFilter pathFilter = compactionType == CompactionType.MAJOR ? AcidUtils.baseFileFilter : AcidUtils.deltaFileFilter;
Assert.assertEquals("Result directory does not match after " + compactionType.name() + " compaction", Collections.singletonList(resultDirName), CompactorTestUtil.getBaseOrDeltaNames(fs, pathFilter, table, null));
// Verify all contents
List<String> actualData = dataProvider.getAllData(dbName, tableName, false);
Assert.assertEquals(expectedData, actualData);
}
use of org.apache.hadoop.fs.PathFilter in project hive by apache.
the class FSStatsAggregator method connect.
@Override
public boolean connect(StatsCollectionContext scc) {
List<String> statsDirs = scc.getStatsTmpDirs();
assert statsDirs.size() == 1 : "Found multiple stats dirs: " + statsDirs;
Path statsDir = new Path(statsDirs.get(0));
Utilities.FILE_OP_LOGGER.trace("About to read stats from {}", statsDir);
int poolSize = HiveConf.getIntVar(scc.getHiveConf(), HiveConf.ConfVars.HIVE_MOVE_FILES_THREAD_COUNT);
// In case thread count is set to 0, use single thread.
poolSize = Math.max(poolSize, 1);
final ExecutorService pool = Executors.newFixedThreadPool(poolSize, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("stats-updater-thread-%d").build());
;
final List<Future<Map<String, Map<String, String>>>> futureList = new LinkedList<>();
try {
fs = statsDir.getFileSystem(scc.getHiveConf());
statsList = new ArrayList<>();
FileStatus[] status = fs.listStatus(statsDir, new PathFilter() {
@Override
public boolean accept(Path file) {
return file.getName().startsWith(StatsSetupConst.STATS_FILE_PREFIX);
}
});
Map<String, Map<String, String>> statsMap = new HashMap<>();
for (final FileStatus file : status) {
futureList.add(pool.submit(() -> {
Kryo kryo = null;
try (Input in = new Input(fs.open(file.getPath()))) {
kryo = SerializationUtilities.borrowKryo();
Map<String, Map<String, String>> stats = kryo.readObject(in, statsMap.getClass());
Utilities.FILE_OP_LOGGER.trace("Read stats {}", stats);
return stats;
} finally {
SerializationUtilities.releaseKryo(kryo);
}
}));
}
for (Future<Map<String, Map<String, String>>> future : futureList) {
Map<String, Map<String, String>> stats = future.get();
if (stats != null) {
statsList.add(stats);
}
}
return true;
} catch (IOException | ExecutionException e) {
Utilities.FILE_OP_LOGGER.error("Failed to read stats from filesystem ", e);
cancelRunningTasks(futureList);
return false;
} catch (InterruptedException e) {
cancelRunningTasks(futureList);
// reset interrupt state
Thread.currentThread().interrupt();
} finally {
pool.shutdownNow();
}
return false;
}
use of org.apache.hadoop.fs.PathFilter in project hive by apache.
the class AcidUtils method parsedDelta.
private static ParsedDelta parsedDelta(Path deltaDir, FileSystem fs, HdfsDirSnapshot dirSnapshot) throws IOException {
ParsedDeltaLight deltaLight = ParsedDeltaLight.parse(deltaDir);
// small optimization - delete delta can't be in raw format
boolean isRawFormat = !deltaLight.isDeleteDelta && MetaDataFile.isRawFormat(deltaDir, fs, dirSnapshot);
List<HdfsFileStatusWithId> files = null;
if (dirSnapshot != null) {
final PathFilter filter = isRawFormat ? AcidUtils.originalBucketFilter : AcidUtils.bucketFileFilter;
// If we already know the files, store it for future use
files = dirSnapshot.getFiles().stream().filter(fileStatus -> filter.accept(fileStatus.getPath())).map(HdfsFileStatusWithoutId::new).collect(Collectors.toList());
}
return new ParsedDelta(deltaLight, isRawFormat, files);
}
Aggregations