use of org.apache.hadoop.hbase.io.hfile.HFile.Reader in project hbase by apache.
the class TestHFile method readStoreFile.
private void readStoreFile(Path storeFilePath) throws Exception {
// Open the file reader with block cache disabled.
HFile.Reader reader = HFile.createReader(fs, storeFilePath, conf);
long offset = 0;
while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
HFileBlock block = reader.readBlock(offset, -1, false, true, false, true, null, null);
offset += block.getOnDiskSizeWithHeader();
}
}
use of org.apache.hadoop.hbase.io.hfile.HFile.Reader in project hbase by apache.
the class TestHFile method metablocks.
private void metablocks(final String compress) throws Exception {
if (cacheConf == null)
cacheConf = new CacheConfig(conf);
Path mFile = new Path(ROOT_DIR, "meta.hfile");
FSDataOutputStream fout = createFSOutput(mFile);
HFileContext meta = new HFileContextBuilder().withCompression(HFileWriterImpl.compressionByName(compress)).withBlockSize(minBlockSize).build();
Writer writer = HFile.getWriterFactory(conf, cacheConf).withOutputStream(fout).withFileContext(meta).create();
someTestingWithMetaBlock(writer);
writer.close();
fout.close();
FSDataInputStream fin = fs.open(mFile);
Reader reader = HFile.createReaderFromStream(mFile, fs.open(mFile), this.fs.getFileStatus(mFile).getLen(), cacheConf, conf);
reader.loadFileInfo();
// No data -- this should return false.
assertFalse(reader.getScanner(false, false).seekTo());
someReadingWithMetaBlock(reader);
fs.delete(mFile, true);
reader.close();
fin.close();
}
use of org.apache.hadoop.hbase.io.hfile.HFile.Reader in project hbase by apache.
the class TestHFile method testNullMetaBlocks.
@Test
public void testNullMetaBlocks() throws Exception {
if (cacheConf == null)
cacheConf = new CacheConfig(conf);
for (Compression.Algorithm compressAlgo : HBaseTestingUtility.COMPRESSION_ALGORITHMS) {
Path mFile = new Path(ROOT_DIR, "nometa_" + compressAlgo + ".hfile");
FSDataOutputStream fout = createFSOutput(mFile);
HFileContext meta = new HFileContextBuilder().withCompression(compressAlgo).withBlockSize(minBlockSize).build();
Writer writer = HFile.getWriterFactory(conf, cacheConf).withOutputStream(fout).withFileContext(meta).create();
KeyValue kv = new KeyValue("foo".getBytes(), "f1".getBytes(), null, "value".getBytes());
writer.append(kv);
writer.close();
fout.close();
Reader reader = HFile.createReader(fs, mFile, cacheConf, conf);
reader.loadFileInfo();
assertNull(reader.getMetaBlock("non-existant", false));
}
}
use of org.apache.hadoop.hbase.io.hfile.HFile.Reader in project hbase by apache.
the class TestHFileOutputFormat2 method testColumnFamilySettings.
/**
* Test that {@link HFileOutputFormat2} RecordWriter uses compression and
* bloom filter settings from the column family descriptor
*/
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void testColumnFamilySettings() throws Exception {
Configuration conf = new Configuration(this.util.getConfiguration());
RecordWriter<ImmutableBytesWritable, Cell> writer = null;
TaskAttemptContext context = null;
Path dir = util.getDataTestDir("testColumnFamilySettings");
// Setup table descriptor
Table table = Mockito.mock(Table.class);
RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
Mockito.doReturn(htd).when(table).getTableDescriptor();
for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) {
htd.addFamily(hcd);
}
// set up the table to return some mock keys
setupMockStartKeys(regionLocator);
try {
// partial map red setup to get an operational writer for testing
// We turn off the sequence file compression, because DefaultCodec
// pollutes the GZip codec pool with an incompatible compressor.
conf.set("io.seqfile.compression.type", "NONE");
conf.set("hbase.fs.tmp.dir", dir.toString());
// turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
Job job = new Job(conf, "testLocalMRIncrementalLoad");
job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
setupRandomGeneratorMapper(job, false);
HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
FileOutputFormat.setOutputPath(job, dir);
context = createTestTaskAttemptContext(job);
HFileOutputFormat2 hof = new HFileOutputFormat2();
writer = hof.getRecordWriter(context);
// write out random rows
writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
writer.close(context);
// Make sure that a directory was created for every CF
FileSystem fs = dir.getFileSystem(conf);
// commit so that the filesystem has one directory per column family
hof.getOutputCommitter(context).commitTask(context);
hof.getOutputCommitter(context).commitJob(context);
FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
assertEquals(htd.getFamilies().size(), families.length);
for (FileStatus f : families) {
String familyStr = f.getPath().getName();
HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
// verify that the compression on this file matches the configured
// compression
Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
if (bloomFilter == null)
bloomFilter = Bytes.toBytes("NONE");
assertEquals("Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
assertEquals("Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression());
}
} finally {
dir.getFileSystem(conf).delete(dir, true);
}
}
use of org.apache.hadoop.hbase.io.hfile.HFile.Reader in project hbase by apache.
the class PartitionedMobCompactor method select.
/**
* Selects the compacted mob/del files.
* Iterates the candidates to find out all the del files and small mob files.
* @param candidates All the candidates.
* @param allFiles Whether add all mob files into the compaction.
* @return A compaction request.
* @throws IOException if IO failure is encountered
*/
protected PartitionedMobCompactionRequest select(List<FileStatus> candidates, boolean allFiles) throws IOException {
final Map<CompactionPartitionId, CompactionPartition> filesToCompact = new HashMap<>();
final CompactionPartitionId id = new CompactionPartitionId();
final NavigableMap<CompactionDelPartitionId, CompactionDelPartition> delFilesToCompact = new TreeMap<>();
final CompactionDelPartitionId delId = new CompactionDelPartitionId();
final ArrayList<CompactionDelPartition> allDelPartitions = new ArrayList<>();
int selectedFileCount = 0;
int irrelevantFileCount = 0;
int totalDelFiles = 0;
MobCompactPartitionPolicy policy = column.getMobCompactPartitionPolicy();
Calendar calendar = Calendar.getInstance();
Date currentDate = new Date();
Date firstDayOfCurrentMonth = null;
Date firstDayOfCurrentWeek = null;
if (policy == MobCompactPartitionPolicy.MONTHLY) {
firstDayOfCurrentMonth = MobUtils.getFirstDayOfMonth(calendar, currentDate);
firstDayOfCurrentWeek = MobUtils.getFirstDayOfWeek(calendar, currentDate);
} else if (policy == MobCompactPartitionPolicy.WEEKLY) {
firstDayOfCurrentWeek = MobUtils.getFirstDayOfWeek(calendar, currentDate);
}
// We check if there is any del files so the logic can be optimized for the following processing
// First step is to check if there is any delete files. If there is any delete files,
// For each Partition, it needs to read its startKey and endKey from files.
// If there is no delete file, there is no need to read startKey and endKey from files, this
// is an optimization.
boolean withDelFiles = false;
for (FileStatus file : candidates) {
if (!file.isFile()) {
continue;
}
// group the del files and small files.
FileStatus linkedFile = file;
if (HFileLink.isHFileLink(file.getPath())) {
HFileLink link = HFileLink.buildFromHFileLinkPattern(conf, file.getPath());
linkedFile = getLinkedFileStatus(link);
if (linkedFile == null) {
continue;
}
}
if (StoreFileInfo.isDelFile(linkedFile.getPath())) {
withDelFiles = true;
break;
}
}
for (FileStatus file : candidates) {
if (!file.isFile()) {
irrelevantFileCount++;
continue;
}
// group the del files and small files.
FileStatus linkedFile = file;
if (HFileLink.isHFileLink(file.getPath())) {
HFileLink link = HFileLink.buildFromHFileLinkPattern(conf, file.getPath());
linkedFile = getLinkedFileStatus(link);
if (linkedFile == null) {
// If the linked file cannot be found, regard it as an irrelevantFileCount file
irrelevantFileCount++;
continue;
}
}
if (withDelFiles && StoreFileInfo.isDelFile(linkedFile.getPath())) {
// File in the Del Partition List
// Get delId from the file
Reader reader = HFile.createReader(fs, linkedFile.getPath(), CacheConfig.DISABLED, conf);
try {
delId.setStartKey(reader.getFirstRowKey());
delId.setEndKey(reader.getLastRowKey());
} finally {
reader.close();
}
CompactionDelPartition delPartition = delFilesToCompact.get(delId);
if (delPartition == null) {
CompactionDelPartitionId newDelId = new CompactionDelPartitionId(delId.getStartKey(), delId.getEndKey());
delPartition = new CompactionDelPartition(newDelId);
delFilesToCompact.put(newDelId, delPartition);
}
delPartition.addDelFile(file);
totalDelFiles++;
} else {
String fileName = linkedFile.getPath().getName();
String date = MobFileName.getDateFromName(fileName);
boolean skipCompaction = MobUtils.fillPartitionId(id, firstDayOfCurrentMonth, firstDayOfCurrentWeek, date, policy, calendar, mergeableSize);
if (allFiles || (!skipCompaction && (linkedFile.getLen() < id.getThreshold()))) {
// add all files if allFiles is true,
// otherwise add the small files to the merge pool
// filter out files which are not supposed to be compacted with the
// current policy
id.setStartKey(MobFileName.getStartKeyFromName(fileName));
CompactionPartition compactionPartition = filesToCompact.get(id);
if (compactionPartition == null) {
CompactionPartitionId newId = new CompactionPartitionId(id.getStartKey(), id.getDate());
compactionPartition = new CompactionPartition(newId);
compactionPartition.addFile(file);
filesToCompact.put(newId, compactionPartition);
newId.updateLatestDate(date);
} else {
compactionPartition.addFile(file);
compactionPartition.getPartitionId().updateLatestDate(date);
}
if (withDelFiles) {
// get startKey and endKey from the file and update partition
// TODO: is it possible to skip read of most hfiles?
Reader reader = HFile.createReader(fs, linkedFile.getPath(), CacheConfig.DISABLED, conf);
try {
compactionPartition.setStartKey(reader.getFirstRowKey());
compactionPartition.setEndKey(reader.getLastRowKey());
} finally {
reader.close();
}
}
selectedFileCount++;
}
}
}
/*
* Merge del files so there are only non-overlapped del file lists
*/
for (Map.Entry<CompactionDelPartitionId, CompactionDelPartition> entry : delFilesToCompact.entrySet()) {
if (allDelPartitions.size() > 0) {
// check if the current key range overlaps the previous one
CompactionDelPartition prev = allDelPartitions.get(allDelPartitions.size() - 1);
if (Bytes.compareTo(prev.getId().getEndKey(), entry.getKey().getStartKey()) >= 0) {
// merge them together
prev.getId().setEndKey(entry.getValue().getId().getEndKey());
prev.addDelFileList(entry.getValue().listDelFiles());
} else {
allDelPartitions.add(entry.getValue());
}
} else {
allDelPartitions.add(entry.getValue());
}
}
PartitionedMobCompactionRequest request = new PartitionedMobCompactionRequest(filesToCompact.values(), allDelPartitions);
if (candidates.size() == (totalDelFiles + selectedFileCount + irrelevantFileCount)) {
// all the files are selected
request.setCompactionType(CompactionType.ALL_FILES);
}
LOG.info("The compaction type is " + request.getCompactionType() + ", the request has " + totalDelFiles + " del files, " + selectedFileCount + " selected files, and " + irrelevantFileCount + " irrelevant files");
return request;
}
Aggregations