Search in sources :

Example 6 with ReaderContext

use of org.apache.hadoop.hbase.io.hfile.ReaderContext in project hbase by apache.

the class TestRowPrefixBloomFilter method testRowPrefixBloomFilterWithScan.

@Test
public void testRowPrefixBloomFilterWithScan() throws Exception {
    FileSystem fs = FileSystem.getLocal(conf);
    int expKeys = fixedLengthExpKeys;
    // write the file
    Path f = new Path(testDir, name.getMethodName());
    writeStoreFile(f, bt, expKeys);
    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
    HFileInfo fileInfo = new HFileInfo(context, conf);
    StoreFileReader reader = new StoreFileReader(context, fileInfo, cacheConf, new AtomicInteger(0), conf);
    fileInfo.initMetaAndIndex(reader.getHFileReader());
    reader.loadFileInfo();
    reader.loadBloomfilter();
    StoreFileScanner scanner = getStoreFileScanner(reader);
    HStore store = mock(HStore.class);
    when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
    // Scan with valid row style. startRow and stopRow have a common prefix.
    // And the length of the common prefix is no less than prefixLength.
    // prefix row in bloom
    String prefixRow = String.format(prefixFormatter, prefixRowCount - 2);
    String startRow = generateRowWithSuffix(prefixRow, 0);
    String stopRow = generateRowWithSuffix(prefixRow, 1);
    Scan scan = new Scan().withStartRow(Bytes.toBytes(startRow)).withStopRow(Bytes.toBytes(stopRow));
    boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
    assertTrue(exists);
    // prefix row not in bloom
    prefixRow = String.format(prefixFormatter, prefixRowCount - 1);
    startRow = generateRowWithSuffix(prefixRow, 0);
    stopRow = generateRowWithSuffix(prefixRow, 1);
    scan = new Scan().withStartRow(Bytes.toBytes(startRow)).withStopRow(Bytes.toBytes(stopRow));
    exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
    assertFalse(exists);
    // There is no common prefix between startRow and stopRow.
    prefixRow = String.format(prefixFormatter, prefixRowCount - 2);
    startRow = generateRowWithSuffix(prefixRow, 0);
    scan = new Scan().withStartRow(Bytes.toBytes(startRow));
    exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
    assertTrue(exists);
    // startRow and stopRow have a common prefix.
    // But the length of the common prefix is less than prefixLength.
    String prefixStartRow = String.format(prefixFormatter, prefixRowCount - 2);
    String prefixStopRow = String.format(prefixFormatter, prefixRowCount - 1);
    startRow = generateRowWithSuffix(prefixStartRow, 0);
    stopRow = generateRowWithSuffix(prefixStopRow, 0);
    scan = new Scan().withStartRow(Bytes.toBytes(startRow)).withStopRow(Bytes.toBytes(stopRow));
    exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
    assertTrue(exists);
    // evict because we are about to delete the file
    reader.close(true);
    fs.delete(f, true);
}
Also used : Path(org.apache.hadoop.fs.Path) HFileInfo(org.apache.hadoop.hbase.io.hfile.HFileInfo) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FileSystem(org.apache.hadoop.fs.FileSystem) ReaderContext(org.apache.hadoop.hbase.io.hfile.ReaderContext) ReaderContextBuilder(org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder) Scan(org.apache.hadoop.hbase.client.Scan) Test(org.junit.Test)

Example 7 with ReaderContext

use of org.apache.hadoop.hbase.io.hfile.ReaderContext in project hbase by apache.

the class BulkLoadHFilesTool method copyHFileHalf.

/**
 * Copy half of an HFile into a new HFile.
 */
private static void copyHFileHalf(Configuration conf, Path inFile, Path outFile, Reference reference, ColumnFamilyDescriptor familyDescriptor) throws IOException {
    FileSystem fs = inFile.getFileSystem(conf);
    CacheConfig cacheConf = CacheConfig.DISABLED;
    HalfStoreFileReader halfReader = null;
    StoreFileWriter halfWriter = null;
    try {
        ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, inFile).build();
        HFileInfo hfile = new HFileInfo(context, conf);
        halfReader = new HalfStoreFileReader(context, hfile, cacheConf, reference, new AtomicInteger(0), conf);
        hfile.initMetaAndIndex(halfReader.getHFileReader());
        Map<byte[], byte[]> fileInfo = halfReader.loadFileInfo();
        int blocksize = familyDescriptor.getBlocksize();
        Algorithm compression = familyDescriptor.getCompressionType();
        BloomType bloomFilterType = familyDescriptor.getBloomFilterType();
        HFileContext hFileContext = new HFileContextBuilder().withCompression(compression).withChecksumType(StoreUtils.getChecksumType(conf)).withBytesPerCheckSum(StoreUtils.getBytesPerChecksum(conf)).withBlockSize(blocksize).withDataBlockEncoding(familyDescriptor.getDataBlockEncoding()).withIncludesTags(true).build();
        halfWriter = new StoreFileWriter.Builder(conf, cacheConf, fs).withFilePath(outFile).withBloomType(bloomFilterType).withFileContext(hFileContext).build();
        HFileScanner scanner = halfReader.getScanner(false, false, false);
        scanner.seekTo();
        do {
            halfWriter.append(scanner.getCell());
        } while (scanner.next());
        for (Map.Entry<byte[], byte[]> entry : fileInfo.entrySet()) {
            if (shouldCopyHFileMetaKey(entry.getKey())) {
                halfWriter.appendFileInfo(entry.getKey(), entry.getValue());
            }
        }
    } finally {
        if (halfReader != null) {
            try {
                halfReader.close(cacheConf.shouldEvictOnClose());
            } catch (IOException e) {
                LOG.warn("failed to close hfile reader for " + inFile, e);
            }
        }
        if (halfWriter != null) {
            halfWriter.close();
        }
    }
}
Also used : StoreFileWriter(org.apache.hadoop.hbase.regionserver.StoreFileWriter) HalfStoreFileReader(org.apache.hadoop.hbase.io.HalfStoreFileReader) HFileScanner(org.apache.hadoop.hbase.io.hfile.HFileScanner) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm) HFileInfo(org.apache.hadoop.hbase.io.hfile.HFileInfo) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) BloomType(org.apache.hadoop.hbase.regionserver.BloomType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FileSystem(org.apache.hadoop.fs.FileSystem) ReaderContext(org.apache.hadoop.hbase.io.hfile.ReaderContext) ReaderContextBuilder(org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 8 with ReaderContext

use of org.apache.hadoop.hbase.io.hfile.ReaderContext in project hbase by apache.

the class TestHStoreFile method testReseek.

/**
 * Test for HBASE-8012
 */
@Test
public void testReseek() throws Exception {
    // write the file
    Path f = new Path(ROOT_DIR, name.getMethodName());
    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
    // Make a store file and write data to it.
    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f).withFileContext(meta).build();
    writeStoreFile(writer);
    writer.close();
    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
    HFileInfo fileInfo = new HFileInfo(context, conf);
    StoreFileReader reader = new StoreFileReader(context, fileInfo, cacheConf, new AtomicInteger(0), conf);
    fileInfo.initMetaAndIndex(reader.getHFileReader());
    // Now do reseek with empty KV to position to the beginning of the file
    KeyValue k = KeyValueUtil.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY);
    StoreFileScanner s = getStoreFileScanner(reader, false, false);
    s.reseek(k);
    assertNotNull("Intial reseek should position at the beginning of the file", s.peek());
}
Also used : Path(org.apache.hadoop.fs.Path) KeyValue(org.apache.hadoop.hbase.KeyValue) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ReaderContext(org.apache.hadoop.hbase.io.hfile.ReaderContext) ReaderContextBuilder(org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) HFileInfo(org.apache.hadoop.hbase.io.hfile.HFileInfo) Test(org.junit.Test)

Example 9 with ReaderContext

use of org.apache.hadoop.hbase.io.hfile.ReaderContext in project hbase by apache.

the class HStoreFile method open.

/**
 * Opens reader on this store file. Called by Constructor.
 * @see #closeStoreFile(boolean)
 */
private void open() throws IOException {
    fileInfo.initHDFSBlocksDistribution();
    long readahead = fileInfo.isNoReadahead() ? 0L : -1L;
    ReaderContext context = fileInfo.createReaderContext(false, readahead, ReaderType.PREAD);
    fileInfo.initHFileInfo(context);
    StoreFileReader reader = fileInfo.preStoreFileReaderOpen(context, cacheConf);
    if (reader == null) {
        reader = fileInfo.createReader(context, cacheConf);
        fileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
    }
    this.initialReader = fileInfo.postStoreFileReaderOpen(context, cacheConf, reader);
    if (InputStreamBlockDistribution.isEnabled(fileInfo.getConf())) {
        boolean useHBaseChecksum = context.getInputStreamWrapper().shouldUseHBaseChecksum();
        FSDataInputStream stream = context.getInputStreamWrapper().getStream(useHBaseChecksum);
        this.initialReaderBlockDistribution = new InputStreamBlockDistribution(stream, fileInfo);
    }
    // Load up indices and fileinfo. This also loads Bloom filter type.
    metadataMap = Collections.unmodifiableMap(initialReader.loadFileInfo());
    // Read in our metadata.
    byte[] b = metadataMap.get(MAX_SEQ_ID_KEY);
    if (b != null) {
        // By convention, if halfhfile, top half has a sequence number > bottom
        // half. Thats why we add one in below. Its done for case the two halves
        // are ever merged back together --rare.  Without it, on open of store,
        // since store files are distinguished by sequence id, the one half would
        // subsume the other.
        this.sequenceid = Bytes.toLong(b);
        if (fileInfo.isTopReference()) {
            this.sequenceid += 1;
        }
    }
    if (isBulkLoadResult()) {
        // generate the sequenceId from the fileName
        // fileName is of the form <randomName>_SeqId_<id-when-loaded>_
        String fileName = this.getPath().getName();
        // Use lastIndexOf() to get the last, most recent bulk load seqId.
        int startPos = fileName.lastIndexOf("SeqId_");
        if (startPos != -1) {
            this.sequenceid = Long.parseLong(fileName.substring(startPos + 6, fileName.indexOf('_', startPos + 6)));
            // Handle reference files as done above.
            if (fileInfo.isTopReference()) {
                this.sequenceid += 1;
            }
        }
        // SKIP_RESET_SEQ_ID only works in bulk loaded file.
        // In mob compaction, the hfile where the cells contain the path of a new mob file is bulk
        // loaded to hbase, these cells have the same seqIds with the old ones. We do not want
        // to reset new seqIds for them since this might make a mess of the visibility of cells that
        // have the same row key but different seqIds.
        boolean skipResetSeqId = isSkipResetSeqId(metadataMap.get(SKIP_RESET_SEQ_ID));
        if (skipResetSeqId) {
            // increase the seqId when it is a bulk loaded file from mob compaction.
            this.sequenceid += 1;
        }
        initialReader.setSkipResetSeqId(skipResetSeqId);
        initialReader.setBulkLoaded(true);
    }
    initialReader.setSequenceID(this.sequenceid);
    b = metadataMap.get(HFile.Writer.MAX_MEMSTORE_TS_KEY);
    if (b != null) {
        this.maxMemstoreTS = Bytes.toLong(b);
    }
    b = metadataMap.get(MAJOR_COMPACTION_KEY);
    if (b != null) {
        boolean mc = Bytes.toBoolean(b);
        if (this.majorCompaction == null) {
            this.majorCompaction = new AtomicBoolean(mc);
        } else {
            this.majorCompaction.set(mc);
        }
    } else {
        // Presume it is not major compacted if it doesn't explicity say so
        // HFileOutputFormat explicitly sets the major compacted key.
        this.majorCompaction = new AtomicBoolean(false);
    }
    b = metadataMap.get(EXCLUDE_FROM_MINOR_COMPACTION_KEY);
    this.excludeFromMinorCompaction = (b != null && Bytes.toBoolean(b));
    BloomType hfileBloomType = initialReader.getBloomFilterType();
    if (cfBloomType != BloomType.NONE) {
        initialReader.loadBloomfilter(BlockType.GENERAL_BLOOM_META);
        if (hfileBloomType != cfBloomType) {
            LOG.debug("HFile Bloom filter type for " + initialReader.getHFileReader().getName() + ": " + hfileBloomType + ", but " + cfBloomType + " specified in column family " + "configuration");
        }
    } else if (hfileBloomType != BloomType.NONE) {
        LOG.info("Bloom filter turned off by CF config for " + initialReader.getHFileReader().getName());
    }
    // load delete family bloom filter
    initialReader.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META);
    try {
        byte[] data = metadataMap.get(TIMERANGE_KEY);
        initialReader.timeRange = data == null ? null : TimeRangeTracker.parseFrom(data).toTimeRange();
    } catch (IllegalArgumentException e) {
        LOG.error("Error reading timestamp range data from meta -- " + "proceeding without", e);
        this.initialReader.timeRange = null;
    }
    try {
        byte[] data = metadataMap.get(COMPACTION_EVENT_KEY);
        this.compactedStoreFiles.addAll(ProtobufUtil.toCompactedStoreFiles(data));
    } catch (IOException e) {
        LOG.error("Error reading compacted storefiles from meta data", e);
    }
    // initialize so we can reuse them after reader closed.
    firstKey = initialReader.getFirstKey();
    lastKey = initialReader.getLastKey();
    comparator = initialReader.getComparator();
}
Also used : IOException(java.io.IOException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ReaderContext(org.apache.hadoop.hbase.io.hfile.ReaderContext) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Example 10 with ReaderContext

use of org.apache.hadoop.hbase.io.hfile.ReaderContext in project hbase by apache.

the class TestRowPrefixBloomFilter method testRowPrefixBloomFilter.

@Test
public void testRowPrefixBloomFilter() throws Exception {
    FileSystem fs = FileSystem.getLocal(conf);
    float expErr = 2 * prefixRowCount * suffixRowCount * err;
    int expKeys = fixedLengthExpKeys;
    // write the file
    Path f = new Path(testDir, name.getMethodName());
    writeStoreFile(f, bt, expKeys);
    // read the file
    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
    HFileInfo fileInfo = new HFileInfo(context, conf);
    StoreFileReader reader = new StoreFileReader(context, fileInfo, cacheConf, new AtomicInteger(0), conf);
    fileInfo.initMetaAndIndex(reader.getHFileReader());
    reader.loadFileInfo();
    reader.loadBloomfilter();
    // check basic param
    assertEquals(bt, reader.getBloomFilterType());
    assertEquals(prefixLength, reader.getPrefixLength());
    assertEquals(expKeys, reader.getGeneralBloomFilter().getKeyCount());
    StoreFileScanner scanner = getStoreFileScanner(reader);
    HStore store = mock(HStore.class);
    when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
    // check false positives rate
    int falsePos = 0;
    int falseNeg = 0;
    for (int i = 0; i < prefixRowCount; i++) {
        // prefix rows
        String prefixRow = String.format(prefixFormatter, i);
        for (int j = 0; j < suffixRowCount; j++) {
            // suffix rows
            String startRow = generateRowWithSuffix(prefixRow, j);
            String stopRow = generateRowWithSuffix(prefixRow, j + 1);
            Scan scan = new Scan().withStartRow(Bytes.toBytes(startRow)).withStopRow(Bytes.toBytes(stopRow));
            boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
            boolean shouldPrefixRowExist = i % 2 == 0;
            if (shouldPrefixRowExist) {
                if (!exists) {
                    falseNeg++;
                }
            } else {
                if (exists) {
                    falsePos++;
                }
            }
        }
    }
    for (int i = prefixRowCount; i < prefixRowCount * 2; i++) {
        // prefix rows
        String row = String.format(invalidFormatter, i);
        Scan scan = new Scan(new Get(Bytes.toBytes(row)));
        boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
        boolean shouldPrefixRowExist = i % 2 == 0;
        if (shouldPrefixRowExist) {
            if (!exists) {
                falseNeg++;
            }
        } else {
            if (exists) {
                falsePos++;
            }
        }
    }
    // evict because we are about to delete the file
    reader.close(true);
    fs.delete(f, true);
    assertEquals("False negatives: " + falseNeg, 0, falseNeg);
    int maxFalsePos = (int) (2 * expErr);
    assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than " + maxFalsePos + ")", falsePos <= maxFalsePos);
}
Also used : Path(org.apache.hadoop.fs.Path) HFileInfo(org.apache.hadoop.hbase.io.hfile.HFileInfo) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FileSystem(org.apache.hadoop.fs.FileSystem) ReaderContext(org.apache.hadoop.hbase.io.hfile.ReaderContext) Get(org.apache.hadoop.hbase.client.Get) ReaderContextBuilder(org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder) Scan(org.apache.hadoop.hbase.client.Scan) Test(org.junit.Test)

Aggregations

ReaderContext (org.apache.hadoop.hbase.io.hfile.ReaderContext)15 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)11 HFileInfo (org.apache.hadoop.hbase.io.hfile.HFileInfo)11 ReaderContextBuilder (org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder)11 Path (org.apache.hadoop.fs.Path)10 Test (org.junit.Test)9 FileSystem (org.apache.hadoop.fs.FileSystem)7 KeyValue (org.apache.hadoop.hbase.KeyValue)6 Scan (org.apache.hadoop.hbase.client.Scan)5 HFileContext (org.apache.hadoop.hbase.io.hfile.HFileContext)5 HFileContextBuilder (org.apache.hadoop.hbase.io.hfile.HFileContextBuilder)5 HFileScanner (org.apache.hadoop.hbase.io.hfile.HFileScanner)3 FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 TreeSet (java.util.TreeSet)2 Cell (org.apache.hadoop.hbase.Cell)2 Get (org.apache.hadoop.hbase.client.Get)2 InterruptedIOException (java.io.InterruptedIOException)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1