use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project hbase by apache.
the class TestHFileOutputFormat2 method writeRandomKeyValues.
/**
* Write random values to the writer assuming a table created using
* {@link #FAMILIES} as column family descriptors
*/
private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer, TaskAttemptContext context, Set<byte[]> families, int numRows) throws IOException, InterruptedException {
byte[] keyBytes = new byte[Bytes.SIZEOF_INT];
int valLength = 10;
byte[] valBytes = new byte[valLength];
int taskId = context.getTaskAttemptID().getTaskID().getId();
assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
final byte[] qualifier = Bytes.toBytes("data");
Random random = new Random();
for (int i = 0; i < numRows; i++) {
Bytes.putInt(keyBytes, 0, i);
random.nextBytes(valBytes);
ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
for (byte[] family : families) {
Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
writer.write(key, kv);
}
}
}
use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project hbase by apache.
the class TestHFileOutputFormat2 method testColumnFamilySettings.
/**
* Test that {@link HFileOutputFormat2} RecordWriter uses compression and
* bloom filter settings from the column family descriptor
*/
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void testColumnFamilySettings() throws Exception {
Configuration conf = new Configuration(this.util.getConfiguration());
RecordWriter<ImmutableBytesWritable, Cell> writer = null;
TaskAttemptContext context = null;
Path dir = util.getDataTestDir("testColumnFamilySettings");
// Setup table descriptor
Table table = Mockito.mock(Table.class);
RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
Mockito.doReturn(htd).when(table).getTableDescriptor();
for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) {
htd.addFamily(hcd);
}
// set up the table to return some mock keys
setupMockStartKeys(regionLocator);
try {
// partial map red setup to get an operational writer for testing
// We turn off the sequence file compression, because DefaultCodec
// pollutes the GZip codec pool with an incompatible compressor.
conf.set("io.seqfile.compression.type", "NONE");
conf.set("hbase.fs.tmp.dir", dir.toString());
// turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
Job job = new Job(conf, "testLocalMRIncrementalLoad");
job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
setupRandomGeneratorMapper(job, false);
HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
FileOutputFormat.setOutputPath(job, dir);
context = createTestTaskAttemptContext(job);
HFileOutputFormat2 hof = new HFileOutputFormat2();
writer = hof.getRecordWriter(context);
// write out random rows
writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
writer.close(context);
// Make sure that a directory was created for every CF
FileSystem fs = dir.getFileSystem(conf);
// commit so that the filesystem has one directory per column family
hof.getOutputCommitter(context).commitTask(context);
hof.getOutputCommitter(context).commitJob(context);
FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
assertEquals(htd.getFamilies().size(), families.length);
for (FileStatus f : families) {
String familyStr = f.getPath().getName();
HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
// verify that the compression on this file matches the configured
// compression
Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
if (bloomFilter == null)
bloomFilter = Bytes.toBytes("NONE");
assertEquals("Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
assertEquals("Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression());
}
} finally {
dir.getFileSystem(conf).delete(dir, true);
}
}
use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project hbase by apache.
the class TestHFileOutputFormat2 method test_LATEST_TIMESTAMP_isReplaced.
/**
* Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if
* passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
* @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
*/
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void test_LATEST_TIMESTAMP_isReplaced() throws Exception {
Configuration conf = new Configuration(this.util.getConfiguration());
RecordWriter<ImmutableBytesWritable, Cell> writer = null;
TaskAttemptContext context = null;
Path dir = util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
try {
Job job = new Job(conf);
FileOutputFormat.setOutputPath(job, dir);
context = createTestTaskAttemptContext(job);
HFileOutputFormat2 hof = new HFileOutputFormat2();
writer = hof.getRecordWriter(context);
final byte[] b = Bytes.toBytes("b");
// Test 1. Pass a KV that has a ts of LATEST_TIMESTAMP. It should be
// changed by call to write. Check all in kv is same but ts.
KeyValue kv = new KeyValue(b, b, b);
KeyValue original = kv.clone();
writer.write(new ImmutableBytesWritable(), kv);
assertFalse(original.equals(kv));
assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
assertNotSame(original.getTimestamp(), kv.getTimestamp());
assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
// Test 2. Now test passing a kv that has explicit ts. It should not be
// changed by call to record write.
kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
original = kv.clone();
writer.write(new ImmutableBytesWritable(), kv);
assertTrue(original.equals(kv));
} finally {
if (writer != null && context != null)
writer.close(context);
dir.getFileSystem(conf).delete(dir, true);
}
}
use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project hbase by apache.
the class TestHRegionPartitioner method testHRegionPartitioner.
/**
* Test HRegionPartitioner
*/
@Test(timeout = 300000)
public void testHRegionPartitioner() throws Exception {
byte[][] families = { Bytes.toBytes("familyA"), Bytes.toBytes("familyB") };
UTIL.createTable(TableName.valueOf(name.getMethodName()), families, 1, Bytes.toBytes("aa"), Bytes.toBytes("cc"), 3);
HRegionPartitioner<Long, Long> partitioner = new HRegionPartitioner<>();
Configuration configuration = UTIL.getConfiguration();
configuration.set(TableOutputFormat.OUTPUT_TABLE, name.getMethodName());
partitioner.setConf(configuration);
ImmutableBytesWritable writable = new ImmutableBytesWritable(Bytes.toBytes("bb"));
assertEquals(1, partitioner.getPartition(writable, 10L, 3));
assertEquals(0, partitioner.getPartition(writable, 10L, 1));
}
use of org.apache.hadoop.hbase.io.ImmutableBytesWritable in project hbase by apache.
the class TestHashTable method testHashTable.
@Test
public void testHashTable() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
final byte[] family = Bytes.toBytes("family");
final byte[] column1 = Bytes.toBytes("c1");
final byte[] column2 = Bytes.toBytes("c2");
final byte[] column3 = Bytes.toBytes("c3");
int numRows = 100;
int numRegions = 10;
int numHashFiles = 3;
byte[][] splitRows = new byte[numRegions - 1][];
for (int i = 1; i < numRegions; i++) {
splitRows[i - 1] = Bytes.toBytes(numRows * i / numRegions);
}
long timestamp = 1430764183454L;
// put rows into the first table
Table t1 = TEST_UTIL.createTable(tableName, family, splitRows);
for (int i = 0; i < numRows; i++) {
Put p = new Put(Bytes.toBytes(i), timestamp);
p.addColumn(family, column1, column1);
p.addColumn(family, column2, column2);
p.addColumn(family, column3, column3);
t1.put(p);
}
t1.close();
HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration());
Path testDir = TEST_UTIL.getDataTestDirOnTestFS(tableName.getNameAsString());
long batchSize = 300;
int code = hashTable.run(new String[] { "--batchsize=" + batchSize, "--numhashfiles=" + numHashFiles, "--scanbatch=2", tableName.getNameAsString(), testDir.toString() });
assertEquals("test job failed", 0, code);
FileSystem fs = TEST_UTIL.getTestFileSystem();
HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir);
assertEquals(tableName.getNameAsString(), tableHash.tableName);
assertEquals(batchSize, tableHash.batchSize);
assertEquals(numHashFiles, tableHash.numHashFiles);
assertEquals(numHashFiles - 1, tableHash.partitions.size());
for (ImmutableBytesWritable bytes : tableHash.partitions) {
LOG.debug("partition: " + Bytes.toInt(bytes.get()));
}
ImmutableMap<Integer, ImmutableBytesWritable> expectedHashes = ImmutableMap.<Integer, ImmutableBytesWritable>builder().put(-1, new ImmutableBytesWritable(Bytes.fromHex("714cb10a9e3b5569852980edd8c6ca2f"))).put(5, new ImmutableBytesWritable(Bytes.fromHex("28d961d9252ce8f8d44a07b38d3e1d96"))).put(10, new ImmutableBytesWritable(Bytes.fromHex("f6bbc4a224d8fd929b783a92599eaffa"))).put(15, new ImmutableBytesWritable(Bytes.fromHex("522deb5d97f73a414ecc11457be46881"))).put(20, new ImmutableBytesWritable(Bytes.fromHex("b026f2611aaa46f7110116d807545352"))).put(25, new ImmutableBytesWritable(Bytes.fromHex("39ffc1a3094aa12a2e90ffd9cef2ce93"))).put(30, new ImmutableBytesWritable(Bytes.fromHex("f6b4d75727ce9a30ac29e4f08f601666"))).put(35, new ImmutableBytesWritable(Bytes.fromHex("422e2d2f1eb79a8f02171a705a42c090"))).put(40, new ImmutableBytesWritable(Bytes.fromHex("559ad61c900fffefea0a15abf8a97bc3"))).put(45, new ImmutableBytesWritable(Bytes.fromHex("23019084513eca41cee436b2a29611cb"))).put(50, new ImmutableBytesWritable(Bytes.fromHex("b40467d222ddb4949b142fe145ee9edc"))).put(55, new ImmutableBytesWritable(Bytes.fromHex("372bf89fcd8ca4b7ab3c1add9d07f7e4"))).put(60, new ImmutableBytesWritable(Bytes.fromHex("69ae0585e6255de27dce974e332b8f8b"))).put(65, new ImmutableBytesWritable(Bytes.fromHex("8029610044297aad0abdbecd485d8e59"))).put(70, new ImmutableBytesWritable(Bytes.fromHex("de5f784f7f78987b6e57ecfd81c8646f"))).put(75, new ImmutableBytesWritable(Bytes.fromHex("1cd757cc4e1715c8c3b1c24447a1ec56"))).put(80, new ImmutableBytesWritable(Bytes.fromHex("f9a53aacfeb6142b08066615e7038095"))).put(85, new ImmutableBytesWritable(Bytes.fromHex("89b872b7e639df32d3276b33928c0c91"))).put(90, new ImmutableBytesWritable(Bytes.fromHex("45eeac0646d46a474ea0484175faed38"))).put(95, new ImmutableBytesWritable(Bytes.fromHex("f57c447e32a08f4bf1abb2892839ac56"))).build();
Map<Integer, ImmutableBytesWritable> actualHashes = new HashMap<>();
Path dataDir = new Path(testDir, HashTable.HASH_DATA_DIR);
for (int i = 0; i < numHashFiles; i++) {
Path hashPath = new Path(dataDir, HashTable.TableHash.getDataFileName(i));
MapFile.Reader reader = new MapFile.Reader(hashPath, fs.getConf());
ImmutableBytesWritable key = new ImmutableBytesWritable();
ImmutableBytesWritable hash = new ImmutableBytesWritable();
while (reader.next(key, hash)) {
String keyString = Bytes.toHex(key.get(), key.getOffset(), key.getLength());
LOG.debug("Key: " + (keyString.isEmpty() ? "-1" : Integer.parseInt(keyString, 16)) + " Hash: " + Bytes.toHex(hash.get(), hash.getOffset(), hash.getLength()));
int intKey = -1;
if (key.getLength() > 0) {
intKey = Bytes.toInt(key.get(), key.getOffset(), key.getLength());
}
if (actualHashes.containsKey(intKey)) {
Assert.fail("duplicate key in data files: " + intKey);
}
actualHashes.put(intKey, new ImmutableBytesWritable(hash.copyBytes()));
}
reader.close();
}
FileStatus[] files = fs.listStatus(testDir);
for (FileStatus file : files) {
LOG.debug("Output file: " + file.getPath());
}
files = fs.listStatus(dataDir);
for (FileStatus file : files) {
LOG.debug("Data file: " + file.getPath());
}
if (!expectedHashes.equals(actualHashes)) {
LOG.error("Diff: " + Maps.difference(expectedHashes, actualHashes));
}
Assert.assertEquals(expectedHashes, actualHashes);
TEST_UTIL.deleteTable(tableName);
TEST_UTIL.cleanupDataTestDirOnTestFS();
}
Aggregations