Search in sources :

Example 16 with FSDataInputStreamWrapper

use of org.apache.hadoop.hbase.io.FSDataInputStreamWrapper in project hbase by apache.

the class TestHFileBlock method testConcurrentReadingInternals.

protected void testConcurrentReadingInternals() throws IOException, InterruptedException, ExecutionException {
    Configuration conf = TEST_UTIL.getConfiguration();
    for (Compression.Algorithm compressAlgo : COMPRESSION_ALGORITHMS) {
        Path path = new Path(TEST_UTIL.getDataTestDir(), "concurrent_reading");
        Random rand = defaultRandom();
        List<Long> offsets = new ArrayList<>();
        List<BlockType> types = new ArrayList<>();
        writeBlocks(TEST_UTIL.getConfiguration(), rand, compressAlgo, path, offsets, null, types, null);
        FSDataInputStream is = fs.open(path);
        long fileSize = fs.getFileStatus(path).getLen();
        HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(true).withIncludesMvcc(includesMemstoreTS).withIncludesTags(includesTag).withCompression(compressAlgo).build();
        ReaderContext context = new ReaderContextBuilder().withInputStreamWrapper(new FSDataInputStreamWrapper(is)).withFileSize(fileSize).withFilePath(path).withFileSystem(fs).build();
        HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(context, meta, alloc, conf);
        Executor exec = Executors.newFixedThreadPool(NUM_READER_THREADS);
        ExecutorCompletionService<Boolean> ecs = new ExecutorCompletionService<>(exec);
        for (int i = 0; i < NUM_READER_THREADS; ++i) {
            ecs.submit(new BlockReaderThread("reader_" + (char) ('A' + i), hbr, offsets, types, fileSize));
        }
        for (int i = 0; i < NUM_READER_THREADS; ++i) {
            Future<Boolean> result = ecs.take();
            assertTrue(result.get());
            if (detailedLogging) {
                LOG.info(String.valueOf(i + 1) + " reader threads finished successfully (algo=" + compressAlgo + ")");
            }
        }
        is.close();
    }
}
Also used : Compression(org.apache.hadoop.hbase.io.compress.Compression) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) ArrayList(java.util.ArrayList) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) Executor(java.util.concurrent.Executor) Random(java.util.Random) Path(org.apache.hadoop.fs.Path) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataInputStreamWrapper(org.apache.hadoop.hbase.io.FSDataInputStreamWrapper)

Example 17 with FSDataInputStreamWrapper

use of org.apache.hadoop.hbase.io.FSDataInputStreamWrapper in project hbase by apache.

the class TestChecksum method testChecksumInternals.

protected void testChecksumInternals(boolean useTags) throws IOException {
    Compression.Algorithm algo = NONE;
    for (boolean pread : new boolean[] { false, true }) {
        for (int bytesPerChecksum : BYTES_PER_CHECKSUM) {
            Path path = new Path(TEST_UTIL.getDataTestDir(), "checksumChunk_" + algo + bytesPerChecksum);
            FSDataOutputStream os = fs.create(path);
            HFileContext meta = new HFileContextBuilder().withCompression(algo).withIncludesMvcc(true).withIncludesTags(useTags).withHBaseCheckSum(true).withBytesPerCheckSum(bytesPerChecksum).build();
            HFileBlock.Writer hbw = new HFileBlock.Writer(TEST_UTIL.getConfiguration(), null, meta);
            // write one block. The block has data
            // that is at least 6 times more than the checksum chunk size
            long dataSize = 0;
            DataOutputStream dos = hbw.startWriting(BlockType.DATA);
            for (; dataSize < 6 * bytesPerChecksum; ) {
                for (int i = 0; i < 1234; ++i) {
                    dos.writeInt(i);
                    dataSize += 4;
                }
            }
            hbw.writeHeaderAndData(os);
            long totalSize = hbw.getOnDiskSizeWithHeader();
            os.close();
            long expectedChunks = ChecksumUtil.numChunks(dataSize + HConstants.HFILEBLOCK_HEADER_SIZE, bytesPerChecksum);
            LOG.info("testChecksumChunks: pread={}, bytesPerChecksum={}, fileSize={}, " + "dataSize={}, expectedChunks={}, compression={}", pread, bytesPerChecksum, totalSize, dataSize, expectedChunks, algo.toString());
            // Verify hbase checksums.
            assertEquals(true, hfs.useHBaseChecksum());
            // Read data back from file.
            FSDataInputStream is = fs.open(path);
            FSDataInputStream nochecksum = hfs.getNoChecksumFs().open(path);
            meta = new HFileContextBuilder().withCompression(algo).withIncludesMvcc(true).withIncludesTags(useTags).withHBaseCheckSum(true).withBytesPerCheckSum(bytesPerChecksum).build();
            ReaderContext context = new ReaderContextBuilder().withInputStreamWrapper(new FSDataInputStreamWrapper(is, nochecksum)).withFileSize(totalSize).withFileSystem(hfs).withFilePath(path).build();
            HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(context, meta, ByteBuffAllocator.HEAP, TEST_UTIL.getConfiguration());
            HFileBlock b = hbr.readBlockData(0, -1, pread, false, true);
            assertTrue(b.getBufferReadOnly() instanceof SingleByteBuff);
            is.close();
            b.sanityCheck();
            assertEquals(dataSize, b.getUncompressedSizeWithoutHeader());
            // verify that we have the expected number of checksum chunks
            assertEquals(totalSize, HConstants.HFILEBLOCK_HEADER_SIZE + dataSize + expectedChunks * HFileBlock.CHECKSUM_SIZE);
            // assert that we did not encounter hbase checksum verification failures
            assertEquals(0, HFile.getAndResetChecksumFailuresCount());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Compression(org.apache.hadoop.hbase.io.compress.Compression) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DataOutputStream(java.io.DataOutputStream) SingleByteBuff(org.apache.hadoop.hbase.nio.SingleByteBuff) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataInputStreamWrapper(org.apache.hadoop.hbase.io.FSDataInputStreamWrapper) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream)

Example 18 with FSDataInputStreamWrapper

use of org.apache.hadoop.hbase.io.FSDataInputStreamWrapper in project hbase by apache.

the class TestChecksum method testVerifyCheckSum.

@Test
public void testVerifyCheckSum() throws IOException {
    int intCount = 10000;
    for (ChecksumType ckt : ChecksumType.values()) {
        Path path = new Path(TEST_UTIL.getDataTestDir(), "checksum" + ckt.getName());
        FSDataOutputStream os = fs.create(path);
        HFileContext meta = new HFileContextBuilder().withChecksumType(ckt).build();
        HFileBlock.Writer hbw = new HFileBlock.Writer(TEST_UTIL.getConfiguration(), null, meta);
        DataOutputStream dos = hbw.startWriting(BlockType.DATA);
        for (int i = 0; i < intCount; ++i) {
            dos.writeInt(i);
        }
        hbw.writeHeaderAndData(os);
        int totalSize = hbw.getOnDiskSizeWithHeader();
        os.close();
        // Use hbase checksums.
        assertEquals(true, hfs.useHBaseChecksum());
        FSDataInputStreamWrapper is = new FSDataInputStreamWrapper(fs, path);
        meta = new HFileContextBuilder().withHBaseCheckSum(true).build();
        ReaderContext context = new ReaderContextBuilder().withInputStreamWrapper(is).withFileSize(totalSize).withFileSystem((HFileSystem) fs).withFilePath(path).build();
        HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(context, meta, ByteBuffAllocator.HEAP, TEST_UTIL.getConfiguration());
        HFileBlock b = hbr.readBlockData(0, -1, false, false, true);
        assertTrue(!b.isSharedMem());
        // verify SingleByteBuff checksum.
        verifySBBCheckSum(b.getBufferReadOnly());
        // verify MultiByteBuff checksum.
        verifyMBBCheckSum(b.getBufferReadOnly());
        ByteBuff data = b.getBufferWithoutHeader();
        for (int i = 0; i < intCount; i++) {
            assertEquals(i, data.getInt());
        }
        try {
            data.getInt();
            fail();
        } catch (BufferUnderflowException e) {
        // expected failure
        }
        assertEquals(0, HFile.getAndResetChecksumFailuresCount());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DataOutputStream(java.io.DataOutputStream) ChecksumType(org.apache.hadoop.hbase.util.ChecksumType) FSDataInputStreamWrapper(org.apache.hadoop.hbase.io.FSDataInputStreamWrapper) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) MultiByteBuff(org.apache.hadoop.hbase.nio.MultiByteBuff) SingleByteBuff(org.apache.hadoop.hbase.nio.SingleByteBuff) ByteBuff(org.apache.hadoop.hbase.nio.ByteBuff) BufferUnderflowException(java.nio.BufferUnderflowException) Test(org.junit.Test)

Example 19 with FSDataInputStreamWrapper

use of org.apache.hadoop.hbase.io.FSDataInputStreamWrapper in project hbase by apache.

the class TestChecksum method testNewBlocksHaveDefaultChecksum.

@Test
public void testNewBlocksHaveDefaultChecksum() throws IOException {
    Path path = new Path(TEST_UTIL.getDataTestDir(), "default_checksum");
    FSDataOutputStream os = fs.create(path);
    HFileContext meta = new HFileContextBuilder().build();
    HFileBlock.Writer hbw = new HFileBlock.Writer(TEST_UTIL.getConfiguration(), null, meta);
    DataOutputStream dos = hbw.startWriting(BlockType.DATA);
    for (int i = 0; i < 1000; ++i) dos.writeInt(i);
    hbw.writeHeaderAndData(os);
    int totalSize = hbw.getOnDiskSizeWithHeader();
    os.close();
    // Use hbase checksums.
    assertEquals(true, hfs.useHBaseChecksum());
    FSDataInputStreamWrapper is = new FSDataInputStreamWrapper(fs, path);
    meta = new HFileContextBuilder().withHBaseCheckSum(true).build();
    ReaderContext context = new ReaderContextBuilder().withInputStreamWrapper(is).withFileSize(totalSize).withFileSystem((HFileSystem) fs).withFilePath(path).build();
    HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(context, meta, ByteBuffAllocator.HEAP, TEST_UTIL.getConfiguration());
    HFileBlock b = hbr.readBlockData(0, -1, false, false, true);
    assertTrue(!b.isSharedMem());
    assertEquals(b.getChecksumType(), ChecksumType.getDefaultChecksumType().getCode());
}
Also used : Path(org.apache.hadoop.fs.Path) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DataOutputStream(java.io.DataOutputStream) FSDataInputStreamWrapper(org.apache.hadoop.hbase.io.FSDataInputStreamWrapper) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 20 with FSDataInputStreamWrapper

use of org.apache.hadoop.hbase.io.FSDataInputStreamWrapper in project hbase by apache.

the class TestHStoreFile method testBloomTypes.

@Test
public void testBloomTypes() throws Exception {
    float err = (float) 0.01;
    FileSystem fs = FileSystem.getLocal(conf);
    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err);
    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
    int rowCount = 50;
    int colCount = 10;
    int versions = 2;
    // run once using columns and once using rows
    BloomType[] bt = { BloomType.ROWCOL, BloomType.ROW };
    int[] expKeys = { rowCount * colCount, rowCount };
    // below line deserves commentary. it is expected bloom false positives
    // column = rowCount*2*colCount inserts
    // row-level = only rowCount*2 inserts, but failures will be magnified by
    // 2nd for loop for every column (2*colCount)
    float[] expErr = { 2 * rowCount * colCount * err, 2 * rowCount * 2 * colCount * err };
    for (int x : new int[] { 0, 1 }) {
        // write the file
        Path f = new Path(ROOT_DIR, name.getMethodName() + x);
        HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
        // Make a store file and write data to it.
        StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f).withBloomType(bt[x]).withMaxKeyCount(expKeys[x]).withFileContext(meta).build();
        long now = EnvironmentEdgeManager.currentTime();
        for (int i = 0; i < rowCount * 2; i += 2) {
            // rows
            for (int j = 0; j < colCount * 2; j += 2) {
                // column qualifiers
                String row = String.format(localFormatter, i);
                String col = String.format(localFormatter, j);
                for (int k = 0; k < versions; ++k) {
                    // versions
                    KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col" + col), now - k, Bytes.toBytes(-1L));
                    writer.append(kv);
                }
            }
        }
        writer.close();
        ReaderContext context = new ReaderContextBuilder().withFilePath(f).withFileSize(fs.getFileStatus(f).getLen()).withFileSystem(fs).withInputStreamWrapper(new FSDataInputStreamWrapper(fs, f)).build();
        HFileInfo fileInfo = new HFileInfo(context, conf);
        StoreFileReader reader = new StoreFileReader(context, fileInfo, cacheConf, new AtomicInteger(0), conf);
        fileInfo.initMetaAndIndex(reader.getHFileReader());
        reader.loadFileInfo();
        reader.loadBloomfilter();
        StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
        assertEquals(expKeys[x], reader.getGeneralBloomFilter().getKeyCount());
        HStore store = mock(HStore.class);
        when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
        // check false positives rate
        int falsePos = 0;
        int falseNeg = 0;
        for (int i = 0; i < rowCount * 2; ++i) {
            // rows
            for (int j = 0; j < colCount * 2; ++j) {
                // column qualifiers
                String row = String.format(localFormatter, i);
                String col = String.format(localFormatter, j);
                TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
                columns.add(Bytes.toBytes("col" + col));
                Scan scan = new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true);
                scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes(("col" + col)));
                boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
                boolean shouldRowExist = i % 2 == 0;
                boolean shouldColExist = j % 2 == 0;
                shouldColExist = shouldColExist || bt[x] == BloomType.ROW;
                if (shouldRowExist && shouldColExist) {
                    if (!exists) {
                        falseNeg++;
                    }
                } else {
                    if (exists) {
                        falsePos++;
                    }
                }
            }
        }
        // evict because we are about to delete the file
        reader.close(true);
        fs.delete(f, true);
        System.out.println(bt[x].toString());
        System.out.println("  False negatives: " + falseNeg);
        System.out.println("  False positives: " + falsePos);
        assertEquals(0, falseNeg);
        assertTrue(falsePos < 2 * expErr[x]);
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) HFileInfo(org.apache.hadoop.hbase.io.hfile.HFileInfo) TreeSet(java.util.TreeSet) FileSystem(org.apache.hadoop.fs.FileSystem) ReaderContext(org.apache.hadoop.hbase.io.hfile.ReaderContext) Path(org.apache.hadoop.fs.Path) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ReaderContextBuilder(org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder) FSDataInputStreamWrapper(org.apache.hadoop.hbase.io.FSDataInputStreamWrapper) Scan(org.apache.hadoop.hbase.client.Scan) Test(org.junit.Test)

Aggregations

FSDataInputStreamWrapper (org.apache.hadoop.hbase.io.FSDataInputStreamWrapper)21 Path (org.apache.hadoop.fs.Path)16 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)9 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)8 ArrayList (java.util.ArrayList)7 Compression (org.apache.hadoop.hbase.io.compress.Compression)7 ByteBuff (org.apache.hadoop.hbase.nio.ByteBuff)7 DataOutputStream (java.io.DataOutputStream)6 Configuration (org.apache.hadoop.conf.Configuration)5 SingleByteBuff (org.apache.hadoop.hbase.nio.SingleByteBuff)5 Test (org.junit.Test)5 Random (java.util.Random)4 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)4 Algorithm (org.apache.hadoop.hbase.io.compress.Compression.Algorithm)4 MultiByteBuff (org.apache.hadoop.hbase.nio.MultiByteBuff)4 IOException (java.io.IOException)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 KeyValue (org.apache.hadoop.hbase.KeyValue)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInputStream (java.io.DataInputStream)2