Search in sources :

Example 6 with HStoreFile

use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.

the class StripeCompactionPolicy method allFilesExpired.

private boolean allFilesExpired(final List<HStoreFile> storeFiles) {
    if (storeFiles == null || storeFiles.isEmpty()) {
        return false;
    }
    long cfTtl = this.storeConfigInfo.getStoreFileTtl();
    if (cfTtl == Long.MAX_VALUE) {
        // minversion might be set, cannot delete old files
        return false;
    }
    long timestampCutoff = EnvironmentEdgeManager.currentTime() - cfTtl;
    for (HStoreFile storeFile : storeFiles) {
        // Check store file is not empty and has not expired
        if (storeFile.getReader().getMaxTimestamp() >= timestampCutoff && storeFile.getReader().getEntries() != 0) {
            return false;
        }
    }
    return true;
}
Also used : HStoreFile(org.apache.hadoop.hbase.regionserver.HStoreFile)

Example 7 with HStoreFile

use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.

the class MobFileCleanerChore method cleanupObsoleteMobFiles.

/**
 * Performs housekeeping file cleaning (called by MOB Cleaner chore)
 * @param conf configuration
 * @param table table name
 * @throws IOException exception
 */
public void cleanupObsoleteMobFiles(Configuration conf, TableName table) throws IOException {
    long minAgeToArchive = conf.getLong(MobConstants.MIN_AGE_TO_ARCHIVE_KEY, MobConstants.DEFAULT_MIN_AGE_TO_ARCHIVE);
    // We check only those MOB files, which creation time is less
    // than maxCreationTimeToArchive. This is a current time - 1h. 1 hour gap
    // gives us full confidence that all corresponding store files will
    // exist at the time cleaning procedure begins and will be examined.
    // So, if MOB file creation time is greater than this maxTimeToArchive,
    // this will be skipped and won't be archived.
    long maxCreationTimeToArchive = EnvironmentEdgeManager.currentTime() - minAgeToArchive;
    try (final Connection conn = ConnectionFactory.createConnection(conf);
        final Admin admin = conn.getAdmin()) {
        TableDescriptor htd = admin.getDescriptor(table);
        List<ColumnFamilyDescriptor> list = MobUtils.getMobColumnFamilies(htd);
        if (list.size() == 0) {
            LOG.info("Skipping non-MOB table [{}]", table);
            return;
        } else {
            LOG.info("Only MOB files whose creation time older than {} will be archived, table={}", maxCreationTimeToArchive, table);
        }
        Path rootDir = CommonFSUtils.getRootDir(conf);
        Path tableDir = CommonFSUtils.getTableDir(rootDir, table);
        // How safe is this call?
        List<Path> regionDirs = FSUtils.getRegionDirs(FileSystem.get(conf), tableDir);
        Set<String> allActiveMobFileName = new HashSet<String>();
        FileSystem fs = FileSystem.get(conf);
        for (Path regionPath : regionDirs) {
            for (ColumnFamilyDescriptor hcd : list) {
                String family = hcd.getNameAsString();
                Path storePath = new Path(regionPath, family);
                boolean succeed = false;
                Set<String> regionMobs = new HashSet<String>();
                while (!succeed) {
                    if (!fs.exists(storePath)) {
                        String errMsg = String.format("Directory %s was deleted during MOB file cleaner chore" + " execution, aborting MOB file cleaner chore.", storePath);
                        throw new IOException(errMsg);
                    }
                    RemoteIterator<LocatedFileStatus> rit = fs.listLocatedStatus(storePath);
                    List<Path> storeFiles = new ArrayList<Path>();
                    // Load list of store files first
                    while (rit.hasNext()) {
                        Path p = rit.next().getPath();
                        if (fs.isFile(p)) {
                            storeFiles.add(p);
                        }
                    }
                    LOG.info("Found {} store files in: {}", storeFiles.size(), storePath);
                    Path currentPath = null;
                    try {
                        for (Path pp : storeFiles) {
                            currentPath = pp;
                            LOG.trace("Store file: {}", pp);
                            HStoreFile sf = new HStoreFile(fs, pp, conf, CacheConfig.DISABLED, BloomType.NONE, true);
                            sf.initReader();
                            byte[] mobRefData = sf.getMetadataValue(HStoreFile.MOB_FILE_REFS);
                            byte[] bulkloadMarkerData = sf.getMetadataValue(HStoreFile.BULKLOAD_TASK_KEY);
                            // close store file to avoid memory leaks
                            sf.closeStoreFile(true);
                            if (mobRefData == null) {
                                if (bulkloadMarkerData == null) {
                                    LOG.warn("Found old store file with no MOB_FILE_REFS: {} - " + "can not proceed until all old files will be MOB-compacted.", pp);
                                    return;
                                } else {
                                    LOG.debug("Skipping file without MOB references (bulkloaded file):{}", pp);
                                    continue;
                                }
                            }
                            // mob compaction code.
                            try {
                                SetMultimap<TableName, String> mobs = MobUtils.deserializeMobFileRefs(mobRefData).build();
                                LOG.debug("Found {} mob references for store={}", mobs.size(), sf);
                                LOG.trace("Specific mob references found for store={} : {}", sf, mobs);
                                regionMobs.addAll(mobs.values());
                            } catch (RuntimeException exception) {
                                throw new IOException("failure getting mob references for hfile " + sf, exception);
                            }
                        }
                    } catch (FileNotFoundException e) {
                        LOG.warn("Missing file:{} Starting MOB cleaning cycle from the beginning" + " due to error", currentPath, e);
                        regionMobs.clear();
                        continue;
                    }
                    succeed = true;
                }
                // Add MOB references for current region/family
                allActiveMobFileName.addAll(regionMobs);
            }
        // END column families
        }
        // Check if number of MOB files too big (over 1M)
        if (allActiveMobFileName.size() > 1000000) {
            LOG.warn("Found too many active MOB files: {}, table={}, " + "this may result in high memory pressure.", allActiveMobFileName.size(), table);
        }
        LOG.debug("Found: {} active mob refs for table={}", allActiveMobFileName.size(), table);
        allActiveMobFileName.stream().forEach(LOG::trace);
        // Now scan MOB directories and find MOB files with no references to them
        for (ColumnFamilyDescriptor hcd : list) {
            List<Path> toArchive = new ArrayList<Path>();
            String family = hcd.getNameAsString();
            Path dir = MobUtils.getMobFamilyPath(conf, table, family);
            RemoteIterator<LocatedFileStatus> rit = fs.listLocatedStatus(dir);
            while (rit.hasNext()) {
                LocatedFileStatus lfs = rit.next();
                Path p = lfs.getPath();
                if (!allActiveMobFileName.contains(p.getName())) {
                    // MOB is not in a list of active references, but it can be too
                    // fresh, skip it in this case
                    long creationTime = fs.getFileStatus(p).getModificationTime();
                    if (creationTime < maxCreationTimeToArchive) {
                        LOG.trace("Archiving MOB file {} creation time={}", p, (fs.getFileStatus(p).getModificationTime()));
                        toArchive.add(p);
                    } else {
                        LOG.trace("Skipping fresh file: {}. Creation time={}", p, fs.getFileStatus(p).getModificationTime());
                    }
                } else {
                    LOG.trace("Keeping active MOB file: {}", p);
                }
            }
            LOG.info(" MOB Cleaner found {} files to archive for table={} family={}", toArchive.size(), table, family);
            archiveMobFiles(conf, table, family.getBytes(), toArchive);
            LOG.info(" MOB Cleaner archived {} files, table={} family={}", toArchive.size(), table, family);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Connection(org.apache.hadoop.hbase.client.Connection) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) Admin(org.apache.hadoop.hbase.client.Admin) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor) TableDescriptor(org.apache.hadoop.hbase.client.TableDescriptor) TableName(org.apache.hadoop.hbase.TableName) FileSystem(org.apache.hadoop.fs.FileSystem) HStoreFile(org.apache.hadoop.hbase.regionserver.HStoreFile) HashSet(java.util.HashSet)

Example 8 with HStoreFile

use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.

the class MobUtils method cleanExpiredMobFiles.

/**
 * Cleans the expired mob files. Cleans the files whose creation date is older than (current -
 * columnFamily.ttl), and the minVersions of that column family is 0.
 * @param fs The current file system.
 * @param conf The current configuration.
 * @param tableName The current table name.
 * @param columnDescriptor The descriptor of the current column family.
 * @param cacheConfig The cacheConfig that disables the block cache.
 * @param current The current time.
 */
public static void cleanExpiredMobFiles(FileSystem fs, Configuration conf, TableName tableName, ColumnFamilyDescriptor columnDescriptor, CacheConfig cacheConfig, long current) throws IOException {
    long timeToLive = columnDescriptor.getTimeToLive();
    if (Integer.MAX_VALUE == timeToLive) {
        // no need to clean, because the TTL is not set.
        return;
    }
    Calendar calendar = Calendar.getInstance();
    calendar.setTimeInMillis(current - timeToLive * 1000);
    calendar.set(Calendar.HOUR_OF_DAY, 0);
    calendar.set(Calendar.MINUTE, 0);
    calendar.set(Calendar.SECOND, 0);
    Date expireDate = calendar.getTime();
    LOG.info("MOB HFiles older than " + expireDate.toGMTString() + " will be deleted!");
    FileStatus[] stats = null;
    Path mobTableDir = CommonFSUtils.getTableDir(getMobHome(conf), tableName);
    Path path = getMobFamilyPath(conf, tableName, columnDescriptor.getNameAsString());
    try {
        stats = fs.listStatus(path);
    } catch (FileNotFoundException e) {
        LOG.warn("Failed to find the mob file " + path, e);
    }
    if (null == stats) {
        // no file found
        return;
    }
    List<HStoreFile> filesToClean = new ArrayList<>();
    int deletedFileCount = 0;
    for (FileStatus file : stats) {
        String fileName = file.getPath().getName();
        try {
            if (HFileLink.isHFileLink(file.getPath())) {
                HFileLink hfileLink = HFileLink.buildFromHFileLinkPattern(conf, file.getPath());
                fileName = hfileLink.getOriginPath().getName();
            }
            Date fileDate = parseDate(MobFileName.getDateFromName(fileName));
            if (LOG.isDebugEnabled()) {
                LOG.debug("Checking file {}", fileName);
            }
            if (fileDate.getTime() < expireDate.getTime()) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("{} is an expired file", fileName);
                }
                filesToClean.add(new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true));
            }
        } catch (Exception e) {
            LOG.error("Cannot parse the fileName " + fileName, e);
        }
    }
    if (!filesToClean.isEmpty()) {
        try {
            removeMobFiles(conf, fs, tableName, mobTableDir, columnDescriptor.getName(), filesToClean);
            deletedFileCount = filesToClean.size();
        } catch (IOException e) {
            LOG.error("Failed to delete the mob files " + filesToClean, e);
        }
    }
    LOG.info("{} expired mob files are deleted", deletedFileCount);
}
Also used : Path(org.apache.hadoop.fs.Path) HFileLink(org.apache.hadoop.hbase.io.HFileLink) FileStatus(org.apache.hadoop.fs.FileStatus) Calendar(java.util.Calendar) FileNotFoundException(java.io.FileNotFoundException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Date(java.util.Date) ParseException(java.text.ParseException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) HStoreFile(org.apache.hadoop.hbase.regionserver.HStoreFile)

Example 9 with HStoreFile

use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.

the class HFileArchiver method deleteStoreFilesWithoutArchiving.

/**
 * Just do a simple delete of the given store files
 * <p>
 * A best effort is made to delete each of the files, rather than bailing on the first failure.
 * <p>
 * @param compactedFiles store files to delete from the file system.
 * @throws IOException if a file cannot be deleted. All files will be attempted to deleted before
 *           throwing the exception, rather than failing at the first file.
 */
private static void deleteStoreFilesWithoutArchiving(Collection<HStoreFile> compactedFiles) throws IOException {
    LOG.debug("Deleting files without archiving.");
    List<IOException> errors = new ArrayList<>(0);
    for (HStoreFile hsf : compactedFiles) {
        try {
            hsf.deleteStoreFile();
        } catch (IOException e) {
            LOG.error("Failed to delete {}", hsf.getPath());
            errors.add(e);
        }
    }
    if (errors.size() > 0) {
        throw MultipleIOException.createIOException(errors);
    }
}
Also used : ArrayList(java.util.ArrayList) HStoreFile(org.apache.hadoop.hbase.regionserver.HStoreFile) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) MultipleIOException(org.apache.hadoop.io.MultipleIOException)

Example 10 with HStoreFile

use of org.apache.hadoop.hbase.regionserver.HStoreFile in project hbase by apache.

the class TestStripeCompactionPolicy method testSingleStripeCompaction.

@Test
public void testSingleStripeCompaction() throws Exception {
    // Create a special policy that only compacts single stripes, using standard methods.
    Configuration conf = HBaseConfiguration.create();
    // Test depends on this not being set to pass.  Default breaks test.  TODO: Revisit.
    conf.unset("hbase.hstore.compaction.min.size");
    conf.setFloat(CompactionConfiguration.HBASE_HSTORE_COMPACTION_RATIO_KEY, 1.0F);
    conf.setInt(StripeStoreConfig.MIN_FILES_KEY, 3);
    conf.setInt(StripeStoreConfig.MAX_FILES_KEY, 4);
    // make sure the are no splits
    conf.setLong(StripeStoreConfig.SIZE_TO_SPLIT_KEY, 1000);
    StoreConfigInformation sci = mock(StoreConfigInformation.class);
    when(sci.getRegionInfo()).thenReturn(RegionInfoBuilder.FIRST_META_REGIONINFO);
    StripeStoreConfig ssc = new StripeStoreConfig(conf, sci);
    StripeCompactionPolicy policy = new StripeCompactionPolicy(conf, sci, ssc) {

        @Override
        public StripeCompactionRequest selectCompaction(StripeInformationProvider si, List<HStoreFile> filesCompacting, boolean isOffpeak) throws IOException {
            if (!filesCompacting.isEmpty()) {
                return null;
            }
            return selectSingleStripeCompaction(si, false, false, isOffpeak);
        }

        @Override
        public boolean needsCompactions(StripeInformationProvider si, List<HStoreFile> filesCompacting) {
            if (!filesCompacting.isEmpty()) {
                return false;
            }
            return needsSingleStripeCompaction(si);
        }
    };
    // No compaction due to min files or ratio
    StripeInformationProvider si = createStripesWithSizes(0, 0, new Long[] { 2L }, new Long[] { 3L, 3L }, new Long[] { 5L, 1L });
    verifyNoCompaction(policy, si);
    // No compaction due to min files or ratio - will report needed, but not do any.
    si = createStripesWithSizes(0, 0, new Long[] { 2L }, new Long[] { 3L, 3L }, new Long[] { 5L, 1L, 1L });
    assertNull(policy.selectCompaction(si, al(), false));
    assertTrue(policy.needsCompactions(si, al()));
    // One stripe has possible compaction
    si = createStripesWithSizes(0, 0, new Long[] { 2L }, new Long[] { 3L, 3L }, new Long[] { 5L, 4L, 3L });
    verifySingleStripeCompaction(policy, si, 2, null);
    // Several stripes have possible compactions; choose best quality (removes most files)
    si = createStripesWithSizes(0, 0, new Long[] { 3L, 2L, 2L }, new Long[] { 2L, 2L, 1L }, new Long[] { 3L, 2L, 2L, 1L });
    verifySingleStripeCompaction(policy, si, 2, null);
    si = createStripesWithSizes(0, 0, new Long[] { 5L }, new Long[] { 3L, 2L, 2L, 1L }, new Long[] { 3L, 2L, 2L });
    verifySingleStripeCompaction(policy, si, 1, null);
    // Or with smallest files, if the count is the same
    si = createStripesWithSizes(0, 0, new Long[] { 3L, 3L, 3L }, new Long[] { 3L, 1L, 2L }, new Long[] { 3L, 2L, 2L });
    verifySingleStripeCompaction(policy, si, 1, null);
    // Verify max count is respected.
    si = createStripesWithSizes(0, 0, new Long[] { 5L }, new Long[] { 5L, 4L, 4L, 4L, 4L });
    List<HStoreFile> sfs = si.getStripes().get(1).subList(1, 5);
    verifyCompaction(policy, si, sfs, null, 1, null, si.getStartRow(1), si.getEndRow(1), true);
    // Verify ratio is applied.
    si = createStripesWithSizes(0, 0, new Long[] { 5L }, new Long[] { 50L, 4L, 4L, 4L, 4L });
    sfs = si.getStripes().get(1).subList(1, 5);
    verifyCompaction(policy, si, sfs, null, 1, null, si.getStartRow(1), si.getEndRow(1), true);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) ArgumentMatchers.anyLong(org.mockito.ArgumentMatchers.anyLong) OptionalLong(java.util.OptionalLong) StripeInformationProvider(org.apache.hadoop.hbase.regionserver.compactions.StripeCompactionPolicy.StripeInformationProvider) HStoreFile(org.apache.hadoop.hbase.regionserver.HStoreFile) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList) StripeStoreConfig(org.apache.hadoop.hbase.regionserver.StripeStoreConfig) StoreConfigInformation(org.apache.hadoop.hbase.regionserver.StoreConfigInformation) Test(org.junit.Test)

Aggregations

HStoreFile (org.apache.hadoop.hbase.regionserver.HStoreFile)44 ArrayList (java.util.ArrayList)18 Test (org.junit.Test)16 Path (org.apache.hadoop.fs.Path)11 Configuration (org.apache.hadoop.conf.Configuration)8 HStore (org.apache.hadoop.hbase.regionserver.HStore)8 StripeInformationProvider (org.apache.hadoop.hbase.regionserver.compactions.StripeCompactionPolicy.StripeInformationProvider)8 IOException (java.io.IOException)6 OptionalLong (java.util.OptionalLong)6 TableName (org.apache.hadoop.hbase.TableName)5 Put (org.apache.hadoop.hbase.client.Put)5 TableDescriptor (org.apache.hadoop.hbase.client.TableDescriptor)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)4 StoreFileReader (org.apache.hadoop.hbase.regionserver.StoreFileReader)4 ImmutableList (org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList)4 InterruptedIOException (java.io.InterruptedIOException)3 ColumnFamilyDescriptor (org.apache.hadoop.hbase.client.ColumnFamilyDescriptor)3 ManualEnvironmentEdge (org.apache.hadoop.hbase.util.ManualEnvironmentEdge)3 FileNotFoundException (java.io.FileNotFoundException)2