Search in sources :

Example 11 with ColumnFamilyDescriptor

use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.

the class TestHFileOutputFormat2 method testColumnFamilySettings.

/**
 * Test that {@link HFileOutputFormat2} RecordWriter uses compression and
 * bloom filter settings from the column family descriptor
 */
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void testColumnFamilySettings() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("testColumnFamilySettings");
    // Setup table descriptor
    Table table = Mockito.mock(Table.class);
    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
    TableDescriptorBuilder tableDescriptorBuilder = TableDescriptorBuilder.newBuilder(TABLE_NAMES[0]);
    Mockito.doReturn(tableDescriptorBuilder.build()).when(table).getDescriptor();
    for (ColumnFamilyDescriptor hcd : HBaseTestingUtil.generateColumnDescriptors()) {
        tableDescriptorBuilder.setColumnFamily(hcd);
    }
    // set up the table to return some mock keys
    setupMockStartKeys(regionLocator);
    try {
        // partial map red setup to get an operational writer for testing
        // We turn off the sequence file compression, because DefaultCodec
        // pollutes the GZip codec pool with an incompatible compressor.
        conf.set("io.seqfile.compression.type", "NONE");
        conf.set("hbase.fs.tmp.dir", dir.toString());
        // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
        conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
        Job job = new Job(conf, "testLocalMRIncrementalLoad");
        job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
        setupRandomGeneratorMapper(job, false);
        HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
        FileOutputFormat.setOutputPath(job, dir);
        context = createTestTaskAttemptContext(job);
        HFileOutputFormat2 hof = new HFileOutputFormat2();
        writer = hof.getRecordWriter(context);
        // write out random rows
        writeRandomKeyValues(writer, context, tableDescriptorBuilder.build().getColumnFamilyNames(), ROWSPERSPLIT);
        writer.close(context);
        // Make sure that a directory was created for every CF
        FileSystem fs = dir.getFileSystem(conf);
        // commit so that the filesystem has one directory per column family
        hof.getOutputCommitter(context).commitTask(context);
        hof.getOutputCommitter(context).commitJob(context);
        FileStatus[] families = CommonFSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
        assertEquals(tableDescriptorBuilder.build().getColumnFamilies().length, families.length);
        for (FileStatus f : families) {
            String familyStr = f.getPath().getName();
            ColumnFamilyDescriptor hcd = tableDescriptorBuilder.build().getColumnFamily(Bytes.toBytes(familyStr));
            // verify that the compression on this file matches the configured
            // compression
            Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
            Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), true, conf);
            Map<byte[], byte[]> fileInfo = reader.getHFileInfo();
            byte[] bloomFilter = fileInfo.get(BLOOM_FILTER_TYPE_KEY);
            if (bloomFilter == null)
                bloomFilter = Bytes.toBytes("NONE");
            assertEquals("Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
            assertEquals("Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression());
        }
    } finally {
        dir.getFileSystem(conf).delete(dir, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) Reader(org.apache.hadoop.hbase.io.hfile.HFile.Reader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) TableDescriptorBuilder(org.apache.hadoop.hbase.client.TableDescriptorBuilder) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor) FileSystem(org.apache.hadoop.fs.FileSystem) TestHRegionFileSystem(org.apache.hadoop.hbase.regionserver.TestHRegionFileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) Job(org.apache.hadoop.mapreduce.Job) Cell(org.apache.hadoop.hbase.Cell) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) CommonFSUtils(org.apache.hadoop.hbase.util.CommonFSUtils) FSUtils(org.apache.hadoop.hbase.util.FSUtils) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 12 with ColumnFamilyDescriptor

use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.

the class TestCopyTable method createTable.

private Table createTable(TableName tableName, byte[] family, boolean isMob) throws IOException {
    if (isMob) {
        ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder(family).setMobEnabled(true).setMobThreshold(1).build();
        TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName).setColumnFamily(cfd).build();
        return TEST_UTIL.createTable(desc, null);
    } else {
        return TEST_UTIL.createTable(tableName, family);
    }
}
Also used : ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor) TableDescriptor(org.apache.hadoop.hbase.client.TableDescriptor)

Example 13 with ColumnFamilyDescriptor

use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.

the class ExpiredMobFileCleaner method run.

@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "REC_CATCH_EXCEPTION", justification = "Intentional")
@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return 1;
    }
    String tableName = args[0];
    String familyName = args[1];
    TableName tn = TableName.valueOf(tableName);
    Connection connection = ConnectionFactory.createConnection(getConf());
    Admin admin = connection.getAdmin();
    try {
        TableDescriptor htd = admin.getDescriptor(tn);
        ColumnFamilyDescriptor family = htd.getColumnFamily(Bytes.toBytes(familyName));
        if (family == null || !family.isMobEnabled()) {
            throw new IOException("Column family " + familyName + " is not a MOB column family");
        }
        if (family.getMinVersions() > 0) {
            throw new IOException("The minVersions of the column family is not 0, could not be handled by this cleaner");
        }
        cleanExpiredMobFiles(tableName, family);
        return 0;
    } finally {
        admin.close();
        try {
            connection.close();
        } catch (IOException e) {
            LOG.error("Failed to close the connection.", e);
        }
    }
}
Also used : TableName(org.apache.hadoop.hbase.TableName) Connection(org.apache.hadoop.hbase.client.Connection) IOException(java.io.IOException) Admin(org.apache.hadoop.hbase.client.Admin) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor) TableDescriptor(org.apache.hadoop.hbase.client.TableDescriptor)

Example 14 with ColumnFamilyDescriptor

use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.

the class MobFileCleanerChore method chore.

@Override
protected void chore() {
    TableDescriptors htds = master.getTableDescriptors();
    Map<String, TableDescriptor> map = null;
    try {
        map = htds.getAll();
    } catch (IOException e) {
        LOG.error("MobFileCleanerChore failed", e);
        return;
    }
    for (TableDescriptor htd : map.values()) {
        for (ColumnFamilyDescriptor hcd : htd.getColumnFamilies()) {
            if (hcd.isMobEnabled() && hcd.getMinVersions() == 0) {
                try {
                    cleaner.cleanExpiredMobFiles(htd.getTableName().getNameAsString(), hcd);
                } catch (IOException e) {
                    LOG.error("Failed to clean the expired mob files table={} family={}", htd.getTableName().getNameAsString(), hcd.getNameAsString(), e);
                }
            }
        }
        try {
            // Now clean obsolete files for a table
            LOG.info("Cleaning obsolete MOB files from table={}", htd.getTableName());
            cleanupObsoleteMobFiles(master.getConfiguration(), htd.getTableName());
            LOG.info("Cleaning obsolete MOB files finished for table={}", htd.getTableName());
        } catch (IOException e) {
            LOG.error("Failed to clean the obsolete mob files for table={}", htd.getTableName(), e);
        }
    }
}
Also used : TableDescriptors(org.apache.hadoop.hbase.TableDescriptors) IOException(java.io.IOException) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor) TableDescriptor(org.apache.hadoop.hbase.client.TableDescriptor)

Example 15 with ColumnFamilyDescriptor

use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.

the class MobFileCleanerChore method cleanupObsoleteMobFiles.

/**
 * Performs housekeeping file cleaning (called by MOB Cleaner chore)
 * @param conf configuration
 * @param table table name
 * @throws IOException exception
 */
public void cleanupObsoleteMobFiles(Configuration conf, TableName table) throws IOException {
    long minAgeToArchive = conf.getLong(MobConstants.MIN_AGE_TO_ARCHIVE_KEY, MobConstants.DEFAULT_MIN_AGE_TO_ARCHIVE);
    // We check only those MOB files, which creation time is less
    // than maxCreationTimeToArchive. This is a current time - 1h. 1 hour gap
    // gives us full confidence that all corresponding store files will
    // exist at the time cleaning procedure begins and will be examined.
    // So, if MOB file creation time is greater than this maxTimeToArchive,
    // this will be skipped and won't be archived.
    long maxCreationTimeToArchive = EnvironmentEdgeManager.currentTime() - minAgeToArchive;
    try (final Connection conn = ConnectionFactory.createConnection(conf);
        final Admin admin = conn.getAdmin()) {
        TableDescriptor htd = admin.getDescriptor(table);
        List<ColumnFamilyDescriptor> list = MobUtils.getMobColumnFamilies(htd);
        if (list.size() == 0) {
            LOG.info("Skipping non-MOB table [{}]", table);
            return;
        } else {
            LOG.info("Only MOB files whose creation time older than {} will be archived, table={}", maxCreationTimeToArchive, table);
        }
        Path rootDir = CommonFSUtils.getRootDir(conf);
        Path tableDir = CommonFSUtils.getTableDir(rootDir, table);
        // How safe is this call?
        List<Path> regionDirs = FSUtils.getRegionDirs(FileSystem.get(conf), tableDir);
        Set<String> allActiveMobFileName = new HashSet<String>();
        FileSystem fs = FileSystem.get(conf);
        for (Path regionPath : regionDirs) {
            for (ColumnFamilyDescriptor hcd : list) {
                String family = hcd.getNameAsString();
                Path storePath = new Path(regionPath, family);
                boolean succeed = false;
                Set<String> regionMobs = new HashSet<String>();
                while (!succeed) {
                    if (!fs.exists(storePath)) {
                        String errMsg = String.format("Directory %s was deleted during MOB file cleaner chore" + " execution, aborting MOB file cleaner chore.", storePath);
                        throw new IOException(errMsg);
                    }
                    RemoteIterator<LocatedFileStatus> rit = fs.listLocatedStatus(storePath);
                    List<Path> storeFiles = new ArrayList<Path>();
                    // Load list of store files first
                    while (rit.hasNext()) {
                        Path p = rit.next().getPath();
                        if (fs.isFile(p)) {
                            storeFiles.add(p);
                        }
                    }
                    LOG.info("Found {} store files in: {}", storeFiles.size(), storePath);
                    Path currentPath = null;
                    try {
                        for (Path pp : storeFiles) {
                            currentPath = pp;
                            LOG.trace("Store file: {}", pp);
                            HStoreFile sf = new HStoreFile(fs, pp, conf, CacheConfig.DISABLED, BloomType.NONE, true);
                            sf.initReader();
                            byte[] mobRefData = sf.getMetadataValue(HStoreFile.MOB_FILE_REFS);
                            byte[] bulkloadMarkerData = sf.getMetadataValue(HStoreFile.BULKLOAD_TASK_KEY);
                            // close store file to avoid memory leaks
                            sf.closeStoreFile(true);
                            if (mobRefData == null) {
                                if (bulkloadMarkerData == null) {
                                    LOG.warn("Found old store file with no MOB_FILE_REFS: {} - " + "can not proceed until all old files will be MOB-compacted.", pp);
                                    return;
                                } else {
                                    LOG.debug("Skipping file without MOB references (bulkloaded file):{}", pp);
                                    continue;
                                }
                            }
                            // mob compaction code.
                            try {
                                SetMultimap<TableName, String> mobs = MobUtils.deserializeMobFileRefs(mobRefData).build();
                                LOG.debug("Found {} mob references for store={}", mobs.size(), sf);
                                LOG.trace("Specific mob references found for store={} : {}", sf, mobs);
                                regionMobs.addAll(mobs.values());
                            } catch (RuntimeException exception) {
                                throw new IOException("failure getting mob references for hfile " + sf, exception);
                            }
                        }
                    } catch (FileNotFoundException e) {
                        LOG.warn("Missing file:{} Starting MOB cleaning cycle from the beginning" + " due to error", currentPath, e);
                        regionMobs.clear();
                        continue;
                    }
                    succeed = true;
                }
                // Add MOB references for current region/family
                allActiveMobFileName.addAll(regionMobs);
            }
        // END column families
        }
        // Check if number of MOB files too big (over 1M)
        if (allActiveMobFileName.size() > 1000000) {
            LOG.warn("Found too many active MOB files: {}, table={}, " + "this may result in high memory pressure.", allActiveMobFileName.size(), table);
        }
        LOG.debug("Found: {} active mob refs for table={}", allActiveMobFileName.size(), table);
        allActiveMobFileName.stream().forEach(LOG::trace);
        // Now scan MOB directories and find MOB files with no references to them
        for (ColumnFamilyDescriptor hcd : list) {
            List<Path> toArchive = new ArrayList<Path>();
            String family = hcd.getNameAsString();
            Path dir = MobUtils.getMobFamilyPath(conf, table, family);
            RemoteIterator<LocatedFileStatus> rit = fs.listLocatedStatus(dir);
            while (rit.hasNext()) {
                LocatedFileStatus lfs = rit.next();
                Path p = lfs.getPath();
                if (!allActiveMobFileName.contains(p.getName())) {
                    // MOB is not in a list of active references, but it can be too
                    // fresh, skip it in this case
                    long creationTime = fs.getFileStatus(p).getModificationTime();
                    if (creationTime < maxCreationTimeToArchive) {
                        LOG.trace("Archiving MOB file {} creation time={}", p, (fs.getFileStatus(p).getModificationTime()));
                        toArchive.add(p);
                    } else {
                        LOG.trace("Skipping fresh file: {}. Creation time={}", p, fs.getFileStatus(p).getModificationTime());
                    }
                } else {
                    LOG.trace("Keeping active MOB file: {}", p);
                }
            }
            LOG.info(" MOB Cleaner found {} files to archive for table={} family={}", toArchive.size(), table, family);
            archiveMobFiles(conf, table, family.getBytes(), toArchive);
            LOG.info(" MOB Cleaner archived {} files, table={} family={}", toArchive.size(), table, family);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Connection(org.apache.hadoop.hbase.client.Connection) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) Admin(org.apache.hadoop.hbase.client.Admin) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor) TableDescriptor(org.apache.hadoop.hbase.client.TableDescriptor) TableName(org.apache.hadoop.hbase.TableName) FileSystem(org.apache.hadoop.fs.FileSystem) HStoreFile(org.apache.hadoop.hbase.regionserver.HStoreFile) HashSet(java.util.HashSet)

Aggregations

ColumnFamilyDescriptor (org.apache.hadoop.hbase.client.ColumnFamilyDescriptor)199 TableDescriptor (org.apache.hadoop.hbase.client.TableDescriptor)95 Test (org.junit.Test)92 TableDescriptorBuilder (org.apache.hadoop.hbase.client.TableDescriptorBuilder)78 IOException (java.io.IOException)44 TableName (org.apache.hadoop.hbase.TableName)44 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)42 Path (org.apache.hadoop.fs.Path)41 Admin (org.apache.hadoop.hbase.client.Admin)36 Configuration (org.apache.hadoop.conf.Configuration)34 ArrayList (java.util.ArrayList)32 Put (org.apache.hadoop.hbase.client.Put)32 FileSystem (org.apache.hadoop.fs.FileSystem)28 HRegion (org.apache.hadoop.hbase.regionserver.HRegion)24 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)22 Get (org.apache.hadoop.hbase.client.Get)20 Result (org.apache.hadoop.hbase.client.Result)19 ColumnFamilyDescriptorBuilder (org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder)17 Scan (org.apache.hadoop.hbase.client.Scan)17 Table (org.apache.hadoop.hbase.client.Table)17