Search in sources :

Example 1 with Reader

use of org.apache.hadoop.hbase.io.hfile.HFile.Reader in project hbase by apache.

the class TestHFile method readStoreFile.

private void readStoreFile(Path storeFilePath) throws Exception {
    // Open the file reader with block cache disabled.
    HFile.Reader reader = HFile.createReader(fs, storeFilePath, conf);
    long offset = 0;
    while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
        HFileBlock block = reader.readBlock(offset, -1, false, true, false, true, null, null);
        offset += block.getOnDiskSizeWithHeader();
    }
}
Also used : Reader(org.apache.hadoop.hbase.io.hfile.HFile.Reader)

Example 2 with Reader

use of org.apache.hadoop.hbase.io.hfile.HFile.Reader in project hbase by apache.

the class TestHFile method metablocks.

private void metablocks(final String compress) throws Exception {
    if (cacheConf == null)
        cacheConf = new CacheConfig(conf);
    Path mFile = new Path(ROOT_DIR, "meta.hfile");
    FSDataOutputStream fout = createFSOutput(mFile);
    HFileContext meta = new HFileContextBuilder().withCompression(HFileWriterImpl.compressionByName(compress)).withBlockSize(minBlockSize).build();
    Writer writer = HFile.getWriterFactory(conf, cacheConf).withOutputStream(fout).withFileContext(meta).create();
    someTestingWithMetaBlock(writer);
    writer.close();
    fout.close();
    FSDataInputStream fin = fs.open(mFile);
    Reader reader = HFile.createReaderFromStream(mFile, fs.open(mFile), this.fs.getFileStatus(mFile).getLen(), cacheConf, conf);
    reader.loadFileInfo();
    // No data -- this should return false.
    assertFalse(reader.getScanner(false, false).seekTo());
    someReadingWithMetaBlock(reader);
    fs.delete(mFile, true);
    reader.close();
    fin.close();
}
Also used : Path(org.apache.hadoop.fs.Path) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Reader(org.apache.hadoop.hbase.io.hfile.HFile.Reader) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) StoreFileWriter(org.apache.hadoop.hbase.regionserver.StoreFileWriter) Writer(org.apache.hadoop.hbase.io.hfile.HFile.Writer)

Example 3 with Reader

use of org.apache.hadoop.hbase.io.hfile.HFile.Reader in project hbase by apache.

the class TestHFile method testNullMetaBlocks.

@Test
public void testNullMetaBlocks() throws Exception {
    if (cacheConf == null)
        cacheConf = new CacheConfig(conf);
    for (Compression.Algorithm compressAlgo : HBaseTestingUtility.COMPRESSION_ALGORITHMS) {
        Path mFile = new Path(ROOT_DIR, "nometa_" + compressAlgo + ".hfile");
        FSDataOutputStream fout = createFSOutput(mFile);
        HFileContext meta = new HFileContextBuilder().withCompression(compressAlgo).withBlockSize(minBlockSize).build();
        Writer writer = HFile.getWriterFactory(conf, cacheConf).withOutputStream(fout).withFileContext(meta).create();
        KeyValue kv = new KeyValue("foo".getBytes(), "f1".getBytes(), null, "value".getBytes());
        writer.append(kv);
        writer.close();
        fout.close();
        Reader reader = HFile.createReader(fs, mFile, cacheConf, conf);
        reader.loadFileInfo();
        assertNull(reader.getMetaBlock("non-existant", false));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Compression(org.apache.hadoop.hbase.io.compress.Compression) KeyValue(org.apache.hadoop.hbase.KeyValue) Reader(org.apache.hadoop.hbase.io.hfile.HFile.Reader) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) StoreFileWriter(org.apache.hadoop.hbase.regionserver.StoreFileWriter) Writer(org.apache.hadoop.hbase.io.hfile.HFile.Writer) Test(org.junit.Test)

Example 4 with Reader

use of org.apache.hadoop.hbase.io.hfile.HFile.Reader in project hbase by apache.

the class TestHFileOutputFormat2 method testColumnFamilySettings.

/**
   * Test that {@link HFileOutputFormat2} RecordWriter uses compression and
   * bloom filter settings from the column family descriptor
   */
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void testColumnFamilySettings() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("testColumnFamilySettings");
    // Setup table descriptor
    Table table = Mockito.mock(Table.class);
    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
    HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
    Mockito.doReturn(htd).when(table).getTableDescriptor();
    for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) {
        htd.addFamily(hcd);
    }
    // set up the table to return some mock keys
    setupMockStartKeys(regionLocator);
    try {
        // partial map red setup to get an operational writer for testing
        // We turn off the sequence file compression, because DefaultCodec
        // pollutes the GZip codec pool with an incompatible compressor.
        conf.set("io.seqfile.compression.type", "NONE");
        conf.set("hbase.fs.tmp.dir", dir.toString());
        // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
        conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
        Job job = new Job(conf, "testLocalMRIncrementalLoad");
        job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
        setupRandomGeneratorMapper(job, false);
        HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
        FileOutputFormat.setOutputPath(job, dir);
        context = createTestTaskAttemptContext(job);
        HFileOutputFormat2 hof = new HFileOutputFormat2();
        writer = hof.getRecordWriter(context);
        // write out random rows
        writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
        writer.close(context);
        // Make sure that a directory was created for every CF
        FileSystem fs = dir.getFileSystem(conf);
        // commit so that the filesystem has one directory per column family
        hof.getOutputCommitter(context).commitTask(context);
        hof.getOutputCommitter(context).commitJob(context);
        FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
        assertEquals(htd.getFamilies().size(), families.length);
        for (FileStatus f : families) {
            String familyStr = f.getPath().getName();
            HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
            // verify that the compression on this file matches the configured
            // compression
            Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
            Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
            Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
            byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
            if (bloomFilter == null)
                bloomFilter = Bytes.toBytes("NONE");
            assertEquals("Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
            assertEquals("Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression());
        }
    } finally {
        dir.getFileSystem(conf).delete(dir, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) Reader(org.apache.hadoop.hbase.io.hfile.HFile.Reader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) Job(org.apache.hadoop.mapreduce.Job) Cell(org.apache.hadoop.hbase.Cell) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) FSUtils(org.apache.hadoop.hbase.util.FSUtils) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 5 with Reader

use of org.apache.hadoop.hbase.io.hfile.HFile.Reader in project hbase by apache.

the class PartitionedMobCompactor method select.

/**
   * Selects the compacted mob/del files.
   * Iterates the candidates to find out all the del files and small mob files.
   * @param candidates All the candidates.
   * @param allFiles Whether add all mob files into the compaction.
   * @return A compaction request.
   * @throws IOException if IO failure is encountered
   */
protected PartitionedMobCompactionRequest select(List<FileStatus> candidates, boolean allFiles) throws IOException {
    final Map<CompactionPartitionId, CompactionPartition> filesToCompact = new HashMap<>();
    final CompactionPartitionId id = new CompactionPartitionId();
    final NavigableMap<CompactionDelPartitionId, CompactionDelPartition> delFilesToCompact = new TreeMap<>();
    final CompactionDelPartitionId delId = new CompactionDelPartitionId();
    final ArrayList<CompactionDelPartition> allDelPartitions = new ArrayList<>();
    int selectedFileCount = 0;
    int irrelevantFileCount = 0;
    int totalDelFiles = 0;
    MobCompactPartitionPolicy policy = column.getMobCompactPartitionPolicy();
    Calendar calendar = Calendar.getInstance();
    Date currentDate = new Date();
    Date firstDayOfCurrentMonth = null;
    Date firstDayOfCurrentWeek = null;
    if (policy == MobCompactPartitionPolicy.MONTHLY) {
        firstDayOfCurrentMonth = MobUtils.getFirstDayOfMonth(calendar, currentDate);
        firstDayOfCurrentWeek = MobUtils.getFirstDayOfWeek(calendar, currentDate);
    } else if (policy == MobCompactPartitionPolicy.WEEKLY) {
        firstDayOfCurrentWeek = MobUtils.getFirstDayOfWeek(calendar, currentDate);
    }
    // We check if there is any del files so the logic can be optimized for the following processing
    // First step is to check if there is any delete files. If there is any delete files,
    // For each Partition, it needs to read its startKey and endKey from files.
    // If there is no delete file, there is no need to read startKey and endKey from files, this
    // is an optimization.
    boolean withDelFiles = false;
    for (FileStatus file : candidates) {
        if (!file.isFile()) {
            continue;
        }
        // group the del files and small files.
        FileStatus linkedFile = file;
        if (HFileLink.isHFileLink(file.getPath())) {
            HFileLink link = HFileLink.buildFromHFileLinkPattern(conf, file.getPath());
            linkedFile = getLinkedFileStatus(link);
            if (linkedFile == null) {
                continue;
            }
        }
        if (StoreFileInfo.isDelFile(linkedFile.getPath())) {
            withDelFiles = true;
            break;
        }
    }
    for (FileStatus file : candidates) {
        if (!file.isFile()) {
            irrelevantFileCount++;
            continue;
        }
        // group the del files and small files.
        FileStatus linkedFile = file;
        if (HFileLink.isHFileLink(file.getPath())) {
            HFileLink link = HFileLink.buildFromHFileLinkPattern(conf, file.getPath());
            linkedFile = getLinkedFileStatus(link);
            if (linkedFile == null) {
                // If the linked file cannot be found, regard it as an irrelevantFileCount file
                irrelevantFileCount++;
                continue;
            }
        }
        if (withDelFiles && StoreFileInfo.isDelFile(linkedFile.getPath())) {
            // File in the Del Partition List
            // Get delId from the file
            Reader reader = HFile.createReader(fs, linkedFile.getPath(), CacheConfig.DISABLED, conf);
            try {
                delId.setStartKey(reader.getFirstRowKey());
                delId.setEndKey(reader.getLastRowKey());
            } finally {
                reader.close();
            }
            CompactionDelPartition delPartition = delFilesToCompact.get(delId);
            if (delPartition == null) {
                CompactionDelPartitionId newDelId = new CompactionDelPartitionId(delId.getStartKey(), delId.getEndKey());
                delPartition = new CompactionDelPartition(newDelId);
                delFilesToCompact.put(newDelId, delPartition);
            }
            delPartition.addDelFile(file);
            totalDelFiles++;
        } else {
            String fileName = linkedFile.getPath().getName();
            String date = MobFileName.getDateFromName(fileName);
            boolean skipCompaction = MobUtils.fillPartitionId(id, firstDayOfCurrentMonth, firstDayOfCurrentWeek, date, policy, calendar, mergeableSize);
            if (allFiles || (!skipCompaction && (linkedFile.getLen() < id.getThreshold()))) {
                // add all files if allFiles is true,
                // otherwise add the small files to the merge pool
                // filter out files which are not supposed to be compacted with the
                // current policy
                id.setStartKey(MobFileName.getStartKeyFromName(fileName));
                CompactionPartition compactionPartition = filesToCompact.get(id);
                if (compactionPartition == null) {
                    CompactionPartitionId newId = new CompactionPartitionId(id.getStartKey(), id.getDate());
                    compactionPartition = new CompactionPartition(newId);
                    compactionPartition.addFile(file);
                    filesToCompact.put(newId, compactionPartition);
                    newId.updateLatestDate(date);
                } else {
                    compactionPartition.addFile(file);
                    compactionPartition.getPartitionId().updateLatestDate(date);
                }
                if (withDelFiles) {
                    // get startKey and endKey from the file and update partition
                    // TODO: is it possible to skip read of most hfiles?
                    Reader reader = HFile.createReader(fs, linkedFile.getPath(), CacheConfig.DISABLED, conf);
                    try {
                        compactionPartition.setStartKey(reader.getFirstRowKey());
                        compactionPartition.setEndKey(reader.getLastRowKey());
                    } finally {
                        reader.close();
                    }
                }
                selectedFileCount++;
            }
        }
    }
    /*
     * Merge del files so there are only non-overlapped del file lists
     */
    for (Map.Entry<CompactionDelPartitionId, CompactionDelPartition> entry : delFilesToCompact.entrySet()) {
        if (allDelPartitions.size() > 0) {
            // check if the current key range overlaps the previous one
            CompactionDelPartition prev = allDelPartitions.get(allDelPartitions.size() - 1);
            if (Bytes.compareTo(prev.getId().getEndKey(), entry.getKey().getStartKey()) >= 0) {
                // merge them together
                prev.getId().setEndKey(entry.getValue().getId().getEndKey());
                prev.addDelFileList(entry.getValue().listDelFiles());
            } else {
                allDelPartitions.add(entry.getValue());
            }
        } else {
            allDelPartitions.add(entry.getValue());
        }
    }
    PartitionedMobCompactionRequest request = new PartitionedMobCompactionRequest(filesToCompact.values(), allDelPartitions);
    if (candidates.size() == (totalDelFiles + selectedFileCount + irrelevantFileCount)) {
        // all the files are selected
        request.setCompactionType(CompactionType.ALL_FILES);
    }
    LOG.info("The compaction type is " + request.getCompactionType() + ", the request has " + totalDelFiles + " del files, " + selectedFileCount + " selected files, and " + irrelevantFileCount + " irrelevant files");
    return request;
}
Also used : CompactionDelPartitionId(org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionDelPartitionId) HFileLink(org.apache.hadoop.hbase.io.HFileLink) CompactionPartition(org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionPartition) FileStatus(org.apache.hadoop.fs.FileStatus) HashMap(java.util.HashMap) Calendar(java.util.Calendar) ArrayList(java.util.ArrayList) Reader(org.apache.hadoop.hbase.io.hfile.HFile.Reader) CompactionPartitionId(org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionPartitionId) TreeMap(java.util.TreeMap) Date(java.util.Date) CompactionDelPartition(org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionDelPartition) MobCompactPartitionPolicy(org.apache.hadoop.hbase.client.MobCompactPartitionPolicy) Map(java.util.Map) NavigableMap(java.util.NavigableMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Aggregations

Reader (org.apache.hadoop.hbase.io.hfile.HFile.Reader)12 Path (org.apache.hadoop.fs.Path)9 Test (org.junit.Test)7 Writer (org.apache.hadoop.hbase.io.hfile.HFile.Writer)5 StoreFileWriter (org.apache.hadoop.hbase.regionserver.StoreFileWriter)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)4 Configuration (org.apache.hadoop.conf.Configuration)3 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)3 FileStatus (org.apache.hadoop.fs.FileStatus)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)3 Cell (org.apache.hadoop.hbase.Cell)3 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)3 KeyValue (org.apache.hadoop.hbase.KeyValue)3 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)3 CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)3 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)3 Job (org.apache.hadoop.mapreduce.Job)3 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)3 ByteBuffer (java.nio.ByteBuffer)2