use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.
the class TestHFileOutputFormat2 method testExcludeAllFromMinorCompaction.
/**
* This test is to test the scenario happened in HBASE-6901.
* All files are bulk loaded and excluded from minor compaction.
* Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException
* will be thrown.
*/
@Ignore("Flakey: See HBASE-9051")
@Test
public void testExcludeAllFromMinorCompaction() throws Exception {
Configuration conf = util.getConfiguration();
conf.setInt("hbase.hstore.compaction.min", 2);
generateRandomStartKeys(5);
util.startMiniCluster();
try (Connection conn = ConnectionFactory.createConnection();
Admin admin = conn.getAdmin();
Table table = util.createTable(TABLE_NAME, FAMILIES);
RegionLocator locator = conn.getRegionLocator(TABLE_NAME)) {
final FileSystem fs = util.getDFSCluster().getFileSystem();
assertEquals("Should start with empty table", 0, util.countRows(table));
// deep inspection: get the StoreFile dir
final Path storePath = new Path(FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME), new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(), Bytes.toString(FAMILIES[0])));
assertEquals(0, fs.listStatus(storePath).length);
// Generate two bulk load files
conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true);
for (int i = 0; i < 2; i++) {
Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
runIncrementalPELoad(conf, table.getTableDescriptor(), conn.getRegionLocator(TABLE_NAME), testDir, false);
// Perform the actual load
new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, locator);
}
// Ensure data shows up
int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table));
// should have a second StoreFile now
assertEquals(2, fs.listStatus(storePath).length);
// minor compactions shouldn't get rid of the file
admin.compact(TABLE_NAME);
try {
quickPoll(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAME);
for (HRegion region : regions) {
for (Store store : region.getStores()) {
store.closeAndArchiveCompactedFiles();
}
}
return fs.listStatus(storePath).length == 1;
}
}, 5000);
throw new IOException("SF# = " + fs.listStatus(storePath).length);
} catch (AssertionError ae) {
// this is expected behavior
}
// a major compaction should work though
admin.majorCompact(TABLE_NAME);
quickPoll(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAME);
for (HRegion region : regions) {
for (Store store : region.getStores()) {
store.closeAndArchiveCompactedFiles();
}
}
return fs.listStatus(storePath).length == 1;
}
}, 5000);
} finally {
util.shutdownMiniCluster();
}
}
use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.
the class TestHFileOutputFormat2 method testColumnFamilySettings.
/**
* Test that {@link HFileOutputFormat2} RecordWriter uses compression and
* bloom filter settings from the column family descriptor
*/
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void testColumnFamilySettings() throws Exception {
Configuration conf = new Configuration(this.util.getConfiguration());
RecordWriter<ImmutableBytesWritable, Cell> writer = null;
TaskAttemptContext context = null;
Path dir = util.getDataTestDir("testColumnFamilySettings");
// Setup table descriptor
Table table = Mockito.mock(Table.class);
RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
Mockito.doReturn(htd).when(table).getTableDescriptor();
for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) {
htd.addFamily(hcd);
}
// set up the table to return some mock keys
setupMockStartKeys(regionLocator);
try {
// partial map red setup to get an operational writer for testing
// We turn off the sequence file compression, because DefaultCodec
// pollutes the GZip codec pool with an incompatible compressor.
conf.set("io.seqfile.compression.type", "NONE");
conf.set("hbase.fs.tmp.dir", dir.toString());
// turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
Job job = new Job(conf, "testLocalMRIncrementalLoad");
job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
setupRandomGeneratorMapper(job, false);
HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
FileOutputFormat.setOutputPath(job, dir);
context = createTestTaskAttemptContext(job);
HFileOutputFormat2 hof = new HFileOutputFormat2();
writer = hof.getRecordWriter(context);
// write out random rows
writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
writer.close(context);
// Make sure that a directory was created for every CF
FileSystem fs = dir.getFileSystem(conf);
// commit so that the filesystem has one directory per column family
hof.getOutputCommitter(context).commitTask(context);
hof.getOutputCommitter(context).commitJob(context);
FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
assertEquals(htd.getFamilies().size(), families.length);
for (FileStatus f : families) {
String familyStr = f.getPath().getName();
HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
// verify that the compression on this file matches the configured
// compression
Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
if (bloomFilter == null)
bloomFilter = Bytes.toBytes("NONE");
assertEquals("Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
assertEquals("Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression());
}
} finally {
dir.getFileSystem(conf).delete(dir, true);
}
}
use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.
the class TestHFileOutputFormat2 method doIncrementalLoadTest.
private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality, boolean putSortReducer, String tableStr) throws Exception {
util = new HBaseTestingUtility();
Configuration conf = util.getConfiguration();
conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
int hostCount = 1;
int regionNum = 5;
if (shouldKeepLocality) {
// We should change host count higher than hdfs replica count when MiniHBaseCluster supports
// explicit hostnames parameter just like MiniDFSCluster does.
hostCount = 3;
regionNum = 20;
}
byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
String[] hostnames = new String[hostCount];
for (int i = 0; i < hostCount; ++i) {
hostnames[i] = "datanode_" + i;
}
util.startMiniCluster(1, hostCount, hostnames);
TableName tableName = TableName.valueOf(tableStr);
Table table = util.createTable(tableName, FAMILIES, splitKeys);
Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
FileSystem fs = testDir.getFileSystem(conf);
try (RegionLocator r = util.getConnection().getRegionLocator(tableName);
Admin admin = util.getConnection().getAdmin()) {
assertEquals("Should start with empty table", 0, util.countRows(table));
int numRegions = r.getStartKeys().length;
assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
// Generate the bulk load files
runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir, putSortReducer);
// This doesn't write into the table, just makes files
assertEquals("HFOF should not touch actual table", 0, util.countRows(table));
// Make sure that a directory was created for every CF
int dir = 0;
for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
for (byte[] family : FAMILIES) {
if (Bytes.toString(family).equals(f.getPath().getName())) {
++dir;
}
}
}
assertEquals("Column family not found in FS.", FAMILIES.length, dir);
// handle the split case
if (shouldChangeRegions) {
LOG.info("Changing regions in table");
admin.disableTable(table.getName());
util.waitUntilNoRegionsInTransition();
util.deleteTable(table.getName());
byte[][] newSplitKeys = generateRandomSplitKeys(14);
table = util.createTable(tableName, FAMILIES, newSplitKeys);
while (util.getConnection().getRegionLocator(tableName).getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) {
Thread.sleep(200);
LOG.info("Waiting for new region assignment to happen");
}
}
// Perform the actual load
new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r);
// Ensure data shows up
int expectedRows = 0;
if (putSortReducer) {
// no rows should be extracted
assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table));
} else {
expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table));
Scan scan = new Scan();
ResultScanner results = table.getScanner(scan);
for (Result res : results) {
assertEquals(FAMILIES.length, res.rawCells().length);
Cell first = res.rawCells()[0];
for (Cell kv : res.rawCells()) {
assertTrue(CellUtil.matchingRow(first, kv));
assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
}
}
results.close();
}
String tableDigestBefore = util.checksumRows(table);
// Check region locality
HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
for (HRegion region : util.getHBaseCluster().getRegions(tableName)) {
hbd.add(region.getHDFSBlocksDistribution());
}
for (String hostname : hostnames) {
float locality = hbd.getBlockLocalityIndex(hostname);
LOG.info("locality of [" + hostname + "]: " + locality);
assertEquals(100, (int) (locality * 100));
}
// Cause regions to reopen
admin.disableTable(tableName);
while (!admin.isTableDisabled(tableName)) {
Thread.sleep(200);
LOG.info("Waiting for table to disable");
}
admin.enableTable(tableName);
util.waitTableAvailable(tableName);
assertEquals("Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table));
} finally {
testDir.getFileSystem(conf).delete(testDir, true);
util.deleteTable(tableName);
util.shutdownMiniCluster();
}
}
use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.
the class TestHFileOutputFormat2 method testExcludeMinorCompaction.
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void testExcludeMinorCompaction() throws Exception {
Configuration conf = util.getConfiguration();
conf.setInt("hbase.hstore.compaction.min", 2);
generateRandomStartKeys(5);
util.startMiniCluster();
try (Connection conn = ConnectionFactory.createConnection(conf);
Admin admin = conn.getAdmin()) {
Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
final FileSystem fs = util.getDFSCluster().getFileSystem();
Table table = util.createTable(TABLE_NAME, FAMILIES);
assertEquals("Should start with empty table", 0, util.countRows(table));
// deep inspection: get the StoreFile dir
final Path storePath = new Path(FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME), new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(), Bytes.toString(FAMILIES[0])));
assertEquals(0, fs.listStatus(storePath).length);
// put some data in it and flush to create a storefile
Put p = new Put(Bytes.toBytes("test"));
p.addColumn(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
table.put(p);
admin.flush(TABLE_NAME);
assertEquals(1, util.countRows(table));
quickPoll(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return fs.listStatus(storePath).length == 1;
}
}, 5000);
// Generate a bulk load file with more rows
conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true);
RegionLocator regionLocator = conn.getRegionLocator(TABLE_NAME);
runIncrementalPELoad(conf, table.getTableDescriptor(), regionLocator, testDir, false);
// Perform the actual load
new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, regionLocator);
// Ensure data shows up
int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows + 1, util.countRows(table));
// should have a second StoreFile now
assertEquals(2, fs.listStatus(storePath).length);
// minor compactions shouldn't get rid of the file
admin.compact(TABLE_NAME);
try {
quickPoll(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return fs.listStatus(storePath).length == 1;
}
}, 5000);
throw new IOException("SF# = " + fs.listStatus(storePath).length);
} catch (AssertionError ae) {
// this is expected behavior
}
// a major compaction should work though
admin.majorCompact(TABLE_NAME);
quickPoll(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return fs.listStatus(storePath).length == 1;
}
}, 5000);
} finally {
util.shutdownMiniCluster();
}
}
use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.
the class TestLoadIncrementalHFilesSplitRecovery method populateTable.
/**
* Populate table with known values.
*/
private void populateTable(final Connection connection, TableName table, int value) throws Exception {
// create HFiles for different column families
LoadIncrementalHFiles lih = new LoadIncrementalHFiles(util.getConfiguration());
Path bulk1 = buildBulkFiles(table, value);
try (Table t = connection.getTable(table);
RegionLocator locator = connection.getRegionLocator(table);
Admin admin = connection.getAdmin()) {
lih.doBulkLoad(bulk1, admin, t, locator);
}
}
Aggregations