Search in sources :

Example 56 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project druid by druid-io.

the class HdfsFileTimestampVersionFinder method mostRecentInDir.

private URI mostRecentInDir(final Path dir, final Pattern pattern) throws IOException {
    final PathFilter filter = new PathFilter() {

        @Override
        public boolean accept(Path path) {
            return pattern == null || pattern.matcher(path.getName()).matches();
        }
    };
    long modifiedTime = Long.MIN_VALUE;
    URI mostRecentURI = null;
    final FileSystem fs = dir.getFileSystem(config);
    for (FileStatus status : fs.listStatus(dir, filter)) {
        if (status.isFile()) {
            final long thisModifiedTime = status.getModificationTime();
            if (thisModifiedTime >= modifiedTime) {
                modifiedTime = thisModifiedTime;
                mostRecentURI = status.getPath().toUri();
            }
        }
    }
    return mostRecentURI;
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) URI(java.net.URI)

Example 57 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hive by apache.

the class TestMmCompactorOnTez method testMmCompactionDb.

/**
 * Make sure db is specified in compaction queries.
 */
private void testMmCompactionDb(CompactionType compactionType, String resultDirName) throws Exception {
    String dbName = "myDb";
    String tableName = "testMmCompactionDb";
    // Create test table
    TestDataProvider dataProvider = new TestDataProvider();
    dataProvider.createDb(dbName);
    dataProvider.createMmTable(dbName, tableName, false, false, "orc");
    // Find the location of the table
    IMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf);
    Table table = metaStoreClient.getTable(dbName, tableName);
    FileSystem fs = FileSystem.get(conf);
    // Insert test data into test table
    dataProvider.insertMmTestData(dbName, tableName);
    // Get all data before compaction is run
    List<String> expectedData = dataProvider.getAllData(dbName, tableName, false);
    Collections.sort(expectedData);
    // Run a compaction
    CompactorTestUtil.runCompaction(conf, dbName, tableName, compactionType, true);
    CompactorTestUtil.runCleaner(conf);
    verifySuccessulTxn(1);
    // Verify directories after compaction
    PathFilter pathFilter = compactionType == CompactionType.MAJOR ? AcidUtils.baseFileFilter : AcidUtils.deltaFileFilter;
    Assert.assertEquals("Result directories does not match after " + compactionType.name() + " compaction", Collections.singletonList(resultDirName), CompactorTestUtil.getBaseOrDeltaNames(fs, pathFilter, table, null));
    List<String> actualData = dataProvider.getAllData(dbName, tableName, false);
    Collections.sort(actualData);
    Assert.assertEquals(expectedData, actualData);
}
Also used : PathFilter(org.apache.hadoop.fs.PathFilter) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileSystem(org.apache.hadoop.fs.FileSystem) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient)

Example 58 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hive by apache.

the class TestCrudCompactorOnTez method testCompactionDb.

/**
 * Make sure db is specified in compaction queries.
 */
private void testCompactionDb(CompactionType compactionType, String resultDirName) throws Exception {
    String dbName = "myDb";
    String tableName = "testCompactionDb";
    // Create test table
    TestDataProvider dataProvider = new TestDataProvider();
    dataProvider.createDb(dbName);
    dataProvider.createFullAcidTable(dbName, tableName, false, false);
    // Find the location of the table
    IMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf);
    Table table = metaStoreClient.getTable(dbName, tableName);
    FileSystem fs = FileSystem.get(conf);
    // Insert test data into test table
    dataProvider.insertTestData(dbName, tableName);
    // Get all data before compaction is run
    List<String> expectedData = dataProvider.getAllData(dbName, tableName, false);
    Collections.sort(expectedData);
    // Run a compaction
    CompactorTestUtil.runCompaction(conf, dbName, tableName, compactionType, true);
    CompactorTestUtil.runCleaner(conf);
    verifySuccessfulCompaction(1);
    // Verify directories after compaction
    PathFilter pathFilter = compactionType == CompactionType.MAJOR ? AcidUtils.baseFileFilter : AcidUtils.deltaFileFilter;
    Assert.assertEquals("Result directory does not match after " + compactionType.name() + " compaction", Collections.singletonList(resultDirName), CompactorTestUtil.getBaseOrDeltaNames(fs, pathFilter, table, null));
    // Verify all contents
    List<String> actualData = dataProvider.getAllData(dbName, tableName, false);
    Assert.assertEquals(expectedData, actualData);
}
Also used : PathFilter(org.apache.hadoop.fs.PathFilter) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileSystem(org.apache.hadoop.fs.FileSystem) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient)

Example 59 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hive by apache.

the class FSStatsAggregator method connect.

@Override
public boolean connect(StatsCollectionContext scc) {
    List<String> statsDirs = scc.getStatsTmpDirs();
    assert statsDirs.size() == 1 : "Found multiple stats dirs: " + statsDirs;
    Path statsDir = new Path(statsDirs.get(0));
    Utilities.FILE_OP_LOGGER.trace("About to read stats from {}", statsDir);
    int poolSize = HiveConf.getIntVar(scc.getHiveConf(), HiveConf.ConfVars.HIVE_MOVE_FILES_THREAD_COUNT);
    // In case thread count is set to 0, use single thread.
    poolSize = Math.max(poolSize, 1);
    final ExecutorService pool = Executors.newFixedThreadPool(poolSize, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("stats-updater-thread-%d").build());
    ;
    final List<Future<Map<String, Map<String, String>>>> futureList = new LinkedList<>();
    try {
        fs = statsDir.getFileSystem(scc.getHiveConf());
        statsList = new ArrayList<>();
        FileStatus[] status = fs.listStatus(statsDir, new PathFilter() {

            @Override
            public boolean accept(Path file) {
                return file.getName().startsWith(StatsSetupConst.STATS_FILE_PREFIX);
            }
        });
        Map<String, Map<String, String>> statsMap = new HashMap<>();
        for (final FileStatus file : status) {
            futureList.add(pool.submit(() -> {
                Kryo kryo = null;
                try (Input in = new Input(fs.open(file.getPath()))) {
                    kryo = SerializationUtilities.borrowKryo();
                    Map<String, Map<String, String>> stats = kryo.readObject(in, statsMap.getClass());
                    Utilities.FILE_OP_LOGGER.trace("Read stats {}", stats);
                    return stats;
                } finally {
                    SerializationUtilities.releaseKryo(kryo);
                }
            }));
        }
        for (Future<Map<String, Map<String, String>>> future : futureList) {
            Map<String, Map<String, String>> stats = future.get();
            if (stats != null) {
                statsList.add(stats);
            }
        }
        return true;
    } catch (IOException | ExecutionException e) {
        Utilities.FILE_OP_LOGGER.error("Failed to read stats from filesystem ", e);
        cancelRunningTasks(futureList);
        return false;
    } catch (InterruptedException e) {
        cancelRunningTasks(futureList);
        // reset interrupt state
        Thread.currentThread().interrupt();
    } finally {
        pool.shutdownNow();
    }
    return false;
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) HashMap(java.util.HashMap) IOException(java.io.IOException) LinkedList(java.util.LinkedList) Input(com.esotericsoftware.kryo.io.Input) ExecutorService(java.util.concurrent.ExecutorService) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) Future(java.util.concurrent.Future) ExecutionException(java.util.concurrent.ExecutionException) HashMap(java.util.HashMap) Map(java.util.Map) Kryo(com.esotericsoftware.kryo.Kryo)

Example 60 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hive by apache.

the class AcidUtils method parsedDelta.

private static ParsedDelta parsedDelta(Path deltaDir, FileSystem fs, HdfsDirSnapshot dirSnapshot) throws IOException {
    ParsedDeltaLight deltaLight = ParsedDeltaLight.parse(deltaDir);
    // small optimization - delete delta can't be in raw format
    boolean isRawFormat = !deltaLight.isDeleteDelta && MetaDataFile.isRawFormat(deltaDir, fs, dirSnapshot);
    List<HdfsFileStatusWithId> files = null;
    if (dirSnapshot != null) {
        final PathFilter filter = isRawFormat ? AcidUtils.originalBucketFilter : AcidUtils.bucketFileFilter;
        // If we already know the files, store it for future use
        files = dirSnapshot.getFiles().stream().filter(fileStatus -> filter.accept(fileStatus.getPath())).map(HdfsFileStatusWithoutId::new).collect(Collectors.toList());
    }
    return new ParsedDelta(deltaLight, isRawFormat, files);
}
Also used : OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) OrcRecordUpdater(org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater) Arrays(java.util.Arrays) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) LockComponentBuilder(org.apache.hadoop.hive.metastore.LockComponentBuilder) FileSystem(org.apache.hadoop.fs.FileSystem) URISyntaxException(java.net.URISyntaxException) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LoggerFactory(org.slf4j.LoggerFactory) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) FileStatus(org.apache.hadoop.fs.FileStatus) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) Writer(org.apache.hadoop.hive.ql.io.orc.Writer) OrcAcidUtils(org.apache.orc.impl.OrcAcidUtils) Matcher(java.util.regex.Matcher) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) LockComponent(org.apache.hadoop.hive.metastore.api.LockComponent) Context(org.apache.hadoop.hive.ql.Context) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) URI(java.net.URI) ValidReaderWriteIdList(org.apache.hadoop.hive.common.ValidReaderWriteIdList) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FileFormatException(org.apache.orc.FileFormatException) MetaStoreUtils(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils) HdfsFileStatusWithoutId(org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) HadoopShims(org.apache.hadoop.hive.shims.HadoopShims) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) Set(java.util.Set) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) SessionState(org.apache.hadoop.hive.ql.session.SessionState) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) Serializable(java.io.Serializable) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc) ValidReadTxnList(org.apache.hadoop.hive.common.ValidReadTxnList) List(java.util.List) Stream(java.util.stream.Stream) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) Pattern(java.util.regex.Pattern) CacheBuilder(com.google.common.cache.CacheBuilder) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) ErrorMsg(org.apache.hadoop.hive.ql.ErrorMsg) LockType(org.apache.hadoop.hive.metastore.api.LockType) HdfsFileStatusWithId(org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PathFilter(org.apache.hadoop.fs.PathFilter) HashMap(java.util.HashMap) Supplier(java.util.function.Supplier) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) TransactionalValidationListener(org.apache.hadoop.hive.metastore.TransactionalValidationListener) DataOperationType(org.apache.hadoop.hive.metastore.api.DataOperationType) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) Strings(com.google.common.base.Strings) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Charset(java.nio.charset.Charset) CreateMaterializedViewDesc(org.apache.hadoop.hive.ql.ddl.view.create.CreateMaterializedViewDesc) LoadSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.LoadSemanticAnalyzer) TableName(org.apache.hadoop.hive.common.TableName) Entity(org.apache.hadoop.hive.ql.hooks.Entity) DeltaFileMetaData(org.apache.hadoop.hive.ql.io.AcidInputFormat.DeltaFileMetaData) Ref(org.apache.hive.common.util.Ref) Properties(java.util.Properties) Logger(org.slf4j.Logger) LongStream(java.util.stream.LongStream) AcidMetaDataFile(org.apache.hadoop.hive.common.AcidMetaDataFile) HiveConf(org.apache.hadoop.hive.conf.HiveConf) HiveParser(org.apache.hadoop.hive.ql.parse.HiveParser) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IOException(java.io.IOException) Table(org.apache.hadoop.hive.ql.metadata.Table) AcidConstants(org.apache.hadoop.hive.common.AcidConstants) ASTSearcher(org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher) TimeUnit(java.util.concurrent.TimeUnit) Partition(org.apache.hadoop.hive.ql.metadata.Partition) COPY_KEYWORD(org.apache.hadoop.hive.ql.exec.Utilities.COPY_KEYWORD) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) ShimLoader(org.apache.hadoop.hive.shims.ShimLoader) Preconditions(com.google.common.base.Preconditions) TxnType(org.apache.hadoop.hive.metastore.api.TxnType) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Cache(com.google.common.cache.Cache) Collections(java.util.Collections) org.apache.hadoop.hive.metastore.api.hive_metastoreConstants(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants) Immutable(javax.annotation.concurrent.Immutable) InputStream(java.io.InputStream) HdfsFileStatusWithoutId(org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId) PathFilter(org.apache.hadoop.fs.PathFilter) HdfsFileStatusWithId(org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId)

Aggregations

PathFilter (org.apache.hadoop.fs.PathFilter)123 Path (org.apache.hadoop.fs.Path)114 FileStatus (org.apache.hadoop.fs.FileStatus)96 Test (org.junit.Test)47 IOException (java.io.IOException)42 FileSystem (org.apache.hadoop.fs.FileSystem)39 ArrayList (java.util.ArrayList)22 List (java.util.List)19 Configuration (org.apache.hadoop.conf.Configuration)18 Collections (java.util.Collections)11 BufferedReader (java.io.BufferedReader)9 InputStreamReader (java.io.InputStreamReader)9 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)9 Assert.assertEquals (org.junit.Assert.assertEquals)9 Assert.assertTrue (org.junit.Assert.assertTrue)9 URI (java.net.URI)8 Test (org.testng.annotations.Test)8 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)7 IGNORED (com.facebook.presto.hive.NestedDirectoryPolicy.IGNORED)6 RECURSE (com.facebook.presto.hive.NestedDirectoryPolicy.RECURSE)6