use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.
the class TestHFileOutputFormat2 method testColumnFamilySettings.
/**
* Test that {@link HFileOutputFormat2} RecordWriter uses compression and
* bloom filter settings from the column family descriptor
*/
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void testColumnFamilySettings() throws Exception {
Configuration conf = new Configuration(this.util.getConfiguration());
RecordWriter<ImmutableBytesWritable, Cell> writer = null;
TaskAttemptContext context = null;
Path dir = util.getDataTestDir("testColumnFamilySettings");
// Setup table descriptor
Table table = Mockito.mock(Table.class);
RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
TableDescriptorBuilder tableDescriptorBuilder = TableDescriptorBuilder.newBuilder(TABLE_NAMES[0]);
Mockito.doReturn(tableDescriptorBuilder.build()).when(table).getDescriptor();
for (ColumnFamilyDescriptor hcd : HBaseTestingUtil.generateColumnDescriptors()) {
tableDescriptorBuilder.setColumnFamily(hcd);
}
// set up the table to return some mock keys
setupMockStartKeys(regionLocator);
try {
// partial map red setup to get an operational writer for testing
// We turn off the sequence file compression, because DefaultCodec
// pollutes the GZip codec pool with an incompatible compressor.
conf.set("io.seqfile.compression.type", "NONE");
conf.set("hbase.fs.tmp.dir", dir.toString());
// turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
Job job = new Job(conf, "testLocalMRIncrementalLoad");
job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
setupRandomGeneratorMapper(job, false);
HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
FileOutputFormat.setOutputPath(job, dir);
context = createTestTaskAttemptContext(job);
HFileOutputFormat2 hof = new HFileOutputFormat2();
writer = hof.getRecordWriter(context);
// write out random rows
writeRandomKeyValues(writer, context, tableDescriptorBuilder.build().getColumnFamilyNames(), ROWSPERSPLIT);
writer.close(context);
// Make sure that a directory was created for every CF
FileSystem fs = dir.getFileSystem(conf);
// commit so that the filesystem has one directory per column family
hof.getOutputCommitter(context).commitTask(context);
hof.getOutputCommitter(context).commitJob(context);
FileStatus[] families = CommonFSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
assertEquals(tableDescriptorBuilder.build().getColumnFamilies().length, families.length);
for (FileStatus f : families) {
String familyStr = f.getPath().getName();
ColumnFamilyDescriptor hcd = tableDescriptorBuilder.build().getColumnFamily(Bytes.toBytes(familyStr));
// verify that the compression on this file matches the configured
// compression
Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), true, conf);
Map<byte[], byte[]> fileInfo = reader.getHFileInfo();
byte[] bloomFilter = fileInfo.get(BLOOM_FILTER_TYPE_KEY);
if (bloomFilter == null)
bloomFilter = Bytes.toBytes("NONE");
assertEquals("Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
assertEquals("Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression());
}
} finally {
dir.getFileSystem(conf).delete(dir, true);
}
}
use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.
the class TestCopyTable method createTable.
private Table createTable(TableName tableName, byte[] family, boolean isMob) throws IOException {
if (isMob) {
ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder(family).setMobEnabled(true).setMobThreshold(1).build();
TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName).setColumnFamily(cfd).build();
return TEST_UTIL.createTable(desc, null);
} else {
return TEST_UTIL.createTable(tableName, family);
}
}
use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.
the class ExpiredMobFileCleaner method run.
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "REC_CATCH_EXCEPTION", justification = "Intentional")
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
printUsage();
return 1;
}
String tableName = args[0];
String familyName = args[1];
TableName tn = TableName.valueOf(tableName);
Connection connection = ConnectionFactory.createConnection(getConf());
Admin admin = connection.getAdmin();
try {
TableDescriptor htd = admin.getDescriptor(tn);
ColumnFamilyDescriptor family = htd.getColumnFamily(Bytes.toBytes(familyName));
if (family == null || !family.isMobEnabled()) {
throw new IOException("Column family " + familyName + " is not a MOB column family");
}
if (family.getMinVersions() > 0) {
throw new IOException("The minVersions of the column family is not 0, could not be handled by this cleaner");
}
cleanExpiredMobFiles(tableName, family);
return 0;
} finally {
admin.close();
try {
connection.close();
} catch (IOException e) {
LOG.error("Failed to close the connection.", e);
}
}
}
use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.
the class MobFileCleanerChore method chore.
@Override
protected void chore() {
TableDescriptors htds = master.getTableDescriptors();
Map<String, TableDescriptor> map = null;
try {
map = htds.getAll();
} catch (IOException e) {
LOG.error("MobFileCleanerChore failed", e);
return;
}
for (TableDescriptor htd : map.values()) {
for (ColumnFamilyDescriptor hcd : htd.getColumnFamilies()) {
if (hcd.isMobEnabled() && hcd.getMinVersions() == 0) {
try {
cleaner.cleanExpiredMobFiles(htd.getTableName().getNameAsString(), hcd);
} catch (IOException e) {
LOG.error("Failed to clean the expired mob files table={} family={}", htd.getTableName().getNameAsString(), hcd.getNameAsString(), e);
}
}
}
try {
// Now clean obsolete files for a table
LOG.info("Cleaning obsolete MOB files from table={}", htd.getTableName());
cleanupObsoleteMobFiles(master.getConfiguration(), htd.getTableName());
LOG.info("Cleaning obsolete MOB files finished for table={}", htd.getTableName());
} catch (IOException e) {
LOG.error("Failed to clean the obsolete mob files for table={}", htd.getTableName(), e);
}
}
}
use of org.apache.hadoop.hbase.client.ColumnFamilyDescriptor in project hbase by apache.
the class MobFileCleanerChore method cleanupObsoleteMobFiles.
/**
* Performs housekeeping file cleaning (called by MOB Cleaner chore)
* @param conf configuration
* @param table table name
* @throws IOException exception
*/
public void cleanupObsoleteMobFiles(Configuration conf, TableName table) throws IOException {
long minAgeToArchive = conf.getLong(MobConstants.MIN_AGE_TO_ARCHIVE_KEY, MobConstants.DEFAULT_MIN_AGE_TO_ARCHIVE);
// We check only those MOB files, which creation time is less
// than maxCreationTimeToArchive. This is a current time - 1h. 1 hour gap
// gives us full confidence that all corresponding store files will
// exist at the time cleaning procedure begins and will be examined.
// So, if MOB file creation time is greater than this maxTimeToArchive,
// this will be skipped and won't be archived.
long maxCreationTimeToArchive = EnvironmentEdgeManager.currentTime() - minAgeToArchive;
try (final Connection conn = ConnectionFactory.createConnection(conf);
final Admin admin = conn.getAdmin()) {
TableDescriptor htd = admin.getDescriptor(table);
List<ColumnFamilyDescriptor> list = MobUtils.getMobColumnFamilies(htd);
if (list.size() == 0) {
LOG.info("Skipping non-MOB table [{}]", table);
return;
} else {
LOG.info("Only MOB files whose creation time older than {} will be archived, table={}", maxCreationTimeToArchive, table);
}
Path rootDir = CommonFSUtils.getRootDir(conf);
Path tableDir = CommonFSUtils.getTableDir(rootDir, table);
// How safe is this call?
List<Path> regionDirs = FSUtils.getRegionDirs(FileSystem.get(conf), tableDir);
Set<String> allActiveMobFileName = new HashSet<String>();
FileSystem fs = FileSystem.get(conf);
for (Path regionPath : regionDirs) {
for (ColumnFamilyDescriptor hcd : list) {
String family = hcd.getNameAsString();
Path storePath = new Path(regionPath, family);
boolean succeed = false;
Set<String> regionMobs = new HashSet<String>();
while (!succeed) {
if (!fs.exists(storePath)) {
String errMsg = String.format("Directory %s was deleted during MOB file cleaner chore" + " execution, aborting MOB file cleaner chore.", storePath);
throw new IOException(errMsg);
}
RemoteIterator<LocatedFileStatus> rit = fs.listLocatedStatus(storePath);
List<Path> storeFiles = new ArrayList<Path>();
// Load list of store files first
while (rit.hasNext()) {
Path p = rit.next().getPath();
if (fs.isFile(p)) {
storeFiles.add(p);
}
}
LOG.info("Found {} store files in: {}", storeFiles.size(), storePath);
Path currentPath = null;
try {
for (Path pp : storeFiles) {
currentPath = pp;
LOG.trace("Store file: {}", pp);
HStoreFile sf = new HStoreFile(fs, pp, conf, CacheConfig.DISABLED, BloomType.NONE, true);
sf.initReader();
byte[] mobRefData = sf.getMetadataValue(HStoreFile.MOB_FILE_REFS);
byte[] bulkloadMarkerData = sf.getMetadataValue(HStoreFile.BULKLOAD_TASK_KEY);
// close store file to avoid memory leaks
sf.closeStoreFile(true);
if (mobRefData == null) {
if (bulkloadMarkerData == null) {
LOG.warn("Found old store file with no MOB_FILE_REFS: {} - " + "can not proceed until all old files will be MOB-compacted.", pp);
return;
} else {
LOG.debug("Skipping file without MOB references (bulkloaded file):{}", pp);
continue;
}
}
// mob compaction code.
try {
SetMultimap<TableName, String> mobs = MobUtils.deserializeMobFileRefs(mobRefData).build();
LOG.debug("Found {} mob references for store={}", mobs.size(), sf);
LOG.trace("Specific mob references found for store={} : {}", sf, mobs);
regionMobs.addAll(mobs.values());
} catch (RuntimeException exception) {
throw new IOException("failure getting mob references for hfile " + sf, exception);
}
}
} catch (FileNotFoundException e) {
LOG.warn("Missing file:{} Starting MOB cleaning cycle from the beginning" + " due to error", currentPath, e);
regionMobs.clear();
continue;
}
succeed = true;
}
// Add MOB references for current region/family
allActiveMobFileName.addAll(regionMobs);
}
// END column families
}
// Check if number of MOB files too big (over 1M)
if (allActiveMobFileName.size() > 1000000) {
LOG.warn("Found too many active MOB files: {}, table={}, " + "this may result in high memory pressure.", allActiveMobFileName.size(), table);
}
LOG.debug("Found: {} active mob refs for table={}", allActiveMobFileName.size(), table);
allActiveMobFileName.stream().forEach(LOG::trace);
// Now scan MOB directories and find MOB files with no references to them
for (ColumnFamilyDescriptor hcd : list) {
List<Path> toArchive = new ArrayList<Path>();
String family = hcd.getNameAsString();
Path dir = MobUtils.getMobFamilyPath(conf, table, family);
RemoteIterator<LocatedFileStatus> rit = fs.listLocatedStatus(dir);
while (rit.hasNext()) {
LocatedFileStatus lfs = rit.next();
Path p = lfs.getPath();
if (!allActiveMobFileName.contains(p.getName())) {
// MOB is not in a list of active references, but it can be too
// fresh, skip it in this case
long creationTime = fs.getFileStatus(p).getModificationTime();
if (creationTime < maxCreationTimeToArchive) {
LOG.trace("Archiving MOB file {} creation time={}", p, (fs.getFileStatus(p).getModificationTime()));
toArchive.add(p);
} else {
LOG.trace("Skipping fresh file: {}. Creation time={}", p, fs.getFileStatus(p).getModificationTime());
}
} else {
LOG.trace("Keeping active MOB file: {}", p);
}
}
LOG.info(" MOB Cleaner found {} files to archive for table={} family={}", toArchive.size(), table, family);
archiveMobFiles(conf, table, family.getBytes(), toArchive);
LOG.info(" MOB Cleaner archived {} files, table={} family={}", toArchive.size(), table, family);
}
}
}
Aggregations