Search in sources :

Example 26 with CacheConfig

use of org.apache.hadoop.hbase.io.hfile.CacheConfig in project hbase by apache.

the class LoadIncrementalHFiles method copyHFileHalf.

/**
   * Copy half of an HFile into a new HFile.
   */
private static void copyHFileHalf(Configuration conf, Path inFile, Path outFile, Reference reference, HColumnDescriptor familyDescriptor) throws IOException {
    FileSystem fs = inFile.getFileSystem(conf);
    CacheConfig cacheConf = new CacheConfig(conf);
    HalfStoreFileReader halfReader = null;
    StoreFileWriter halfWriter = null;
    try {
        halfReader = new HalfStoreFileReader(fs, inFile, cacheConf, reference, conf);
        Map<byte[], byte[]> fileInfo = halfReader.loadFileInfo();
        int blocksize = familyDescriptor.getBlocksize();
        Algorithm compression = familyDescriptor.getCompressionType();
        BloomType bloomFilterType = familyDescriptor.getBloomFilterType();
        HFileContext hFileContext = new HFileContextBuilder().withCompression(compression).withChecksumType(HStore.getChecksumType(conf)).withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blocksize).withDataBlockEncoding(familyDescriptor.getDataBlockEncoding()).withIncludesTags(true).build();
        halfWriter = new StoreFileWriter.Builder(conf, cacheConf, fs).withFilePath(outFile).withBloomType(bloomFilterType).withFileContext(hFileContext).build();
        HFileScanner scanner = halfReader.getScanner(false, false, false);
        scanner.seekTo();
        do {
            halfWriter.append(scanner.getCell());
        } while (scanner.next());
        for (Map.Entry<byte[], byte[]> entry : fileInfo.entrySet()) {
            if (shouldCopyHFileMetaKey(entry.getKey())) {
                halfWriter.appendFileInfo(entry.getKey(), entry.getValue());
            }
        }
    } finally {
        if (halfWriter != null) {
            halfWriter.close();
        }
        if (halfReader != null) {
            halfReader.close(cacheConf.shouldEvictOnClose());
        }
    }
}
Also used : StoreFileWriter(org.apache.hadoop.hbase.regionserver.StoreFileWriter) HalfStoreFileReader(org.apache.hadoop.hbase.io.HalfStoreFileReader) HFileScanner(org.apache.hadoop.hbase.io.hfile.HFileScanner) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) BloomType(org.apache.hadoop.hbase.regionserver.BloomType) FileSystem(org.apache.hadoop.fs.FileSystem) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 27 with CacheConfig

use of org.apache.hadoop.hbase.io.hfile.CacheConfig in project hbase by apache.

the class HBaseFsck method adoptHdfsOrphan.

/**
   * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
   * these orphans by creating a new region, and moving the column families,
   * recovered edits, WALs, into the new region dir.  We determine the region
   * startkey and endkeys by looking at all of the hfiles inside the column
   * families to identify the min and max keys. The resulting region will
   * likely violate table integrity but will be dealt with by merging
   * overlapping regions.
   */
@SuppressWarnings("deprecation")
private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
    Path p = hi.getHdfsRegionDir();
    FileSystem fs = p.getFileSystem(getConf());
    FileStatus[] dirs = fs.listStatus(p);
    if (dirs == null) {
        LOG.warn("Attempt to adopt orphan hdfs region skipped because no files present in " + p + ". This dir could probably be deleted.");
        return;
    }
    TableName tableName = hi.getTableName();
    TableInfo tableInfo = tablesInfo.get(tableName);
    Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
    HTableDescriptor template = tableInfo.getHTD();
    // find min and max key values
    Pair<byte[], byte[]> orphanRegionRange = null;
    for (FileStatus cf : dirs) {
        String cfName = cf.getPath().getName();
        // TODO Figure out what the special dirs are
        if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME))
            continue;
        FileStatus[] hfiles = fs.listStatus(cf.getPath());
        for (FileStatus hfile : hfiles) {
            byte[] start, end;
            HFile.Reader hf = null;
            try {
                CacheConfig cacheConf = new CacheConfig(getConf());
                hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
                hf.loadFileInfo();
                Cell startKv = hf.getFirstKey();
                start = CellUtil.cloneRow(startKv);
                Cell endKv = hf.getLastKey();
                end = CellUtil.cloneRow(endKv);
            } catch (IOException ioe) {
                LOG.warn("Problem reading orphan file " + hfile + ", skipping");
                continue;
            } catch (NullPointerException ioe) {
                LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
                continue;
            } finally {
                if (hf != null) {
                    hf.close();
                }
            }
            // expand the range to include the range of all hfiles
            if (orphanRegionRange == null) {
                // first range
                orphanRegionRange = new Pair<>(start, end);
            } else {
                // expand range only if the hfile is wider.
                if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
                    orphanRegionRange.setFirst(start);
                }
                if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0) {
                    orphanRegionRange.setSecond(end);
                }
            }
        }
    }
    if (orphanRegionRange == null) {
        LOG.warn("No data in dir " + p + ", sidelining data");
        fixes++;
        sidelineRegionDir(fs, hi);
        return;
    }
    LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " + Bytes.toString(orphanRegionRange.getSecond()) + ")");
    // create new region on hdfs. move data into place.
    HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), Bytes.add(orphanRegionRange.getSecond(), new byte[1]));
    LOG.info("Creating new region : " + hri);
    HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
    Path target = region.getRegionFileSystem().getRegionDir();
    // rename all the data to new region
    mergeRegionDirs(target, hi);
    fixes++;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TableName(org.apache.hadoop.hbase.TableName) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) FileSystem(org.apache.hadoop.fs.FileSystem) MasterFileSystem(org.apache.hadoop.hbase.master.MasterFileSystem) HRegionFileSystem(org.apache.hadoop.hbase.regionserver.HRegionFileSystem) HFile(org.apache.hadoop.hbase.io.hfile.HFile) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Cell(org.apache.hadoop.hbase.Cell)

Example 28 with CacheConfig

use of org.apache.hadoop.hbase.io.hfile.CacheConfig in project hbase by apache.

the class HBaseFsck method checkRegionBoundaries.

public void checkRegionBoundaries() {
    try {
        ByteArrayComparator comparator = new ByteArrayComparator();
        List<HRegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
        final RegionBoundariesInformation currentRegionBoundariesInformation = new RegionBoundariesInformation();
        Path hbaseRoot = FSUtils.getRootDir(getConf());
        for (HRegionInfo regionInfo : regions) {
            Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
            currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
            // For each region, get the start and stop key from the META and compare them to the
            // same information from the Stores.
            Path path = new Path(tableDir, regionInfo.getEncodedName());
            FileSystem fs = path.getFileSystem(getConf());
            FileStatus[] files = fs.listStatus(path);
            // For all the column families in this region...
            byte[] storeFirstKey = null;
            byte[] storeLastKey = null;
            for (FileStatus file : files) {
                String fileName = file.getPath().toString();
                fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
                if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
                    FileStatus[] storeFiles = fs.listStatus(file.getPath());
                    // For all the stores in this column family.
                    for (FileStatus storeFile : storeFiles) {
                        HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(getConf()), getConf());
                        if ((reader.getFirstKey() != null) && ((storeFirstKey == null) || (comparator.compare(storeFirstKey, ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey()).getKey()) > 0))) {
                            storeFirstKey = ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey()).getKey();
                        }
                        if ((reader.getLastKey() != null) && ((storeLastKey == null) || (comparator.compare(storeLastKey, ((KeyValue.KeyOnlyKeyValue) reader.getLastKey()).getKey())) < 0)) {
                            storeLastKey = ((KeyValue.KeyOnlyKeyValue) reader.getLastKey()).getKey();
                        }
                        reader.close();
                    }
                }
            }
            currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
            currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
            currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
            currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
            if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
                currentRegionBoundariesInformation.metaFirstKey = null;
            if (currentRegionBoundariesInformation.metaLastKey.length == 0)
                currentRegionBoundariesInformation.metaLastKey = null;
            // For a region to be correct, we need the META start key to be smaller or equal to the
            // smallest start key from all the stores, and the start key from the next META entry to
            // be bigger than the last key from all the current stores. First region start key is null;
            // Last region end key is null; some regions can be empty and not have any store.
            boolean valid = true;
            // Checking start key.
            if ((currentRegionBoundariesInformation.storesFirstKey != null) && (currentRegionBoundariesInformation.metaFirstKey != null)) {
                valid = valid && comparator.compare(currentRegionBoundariesInformation.storesFirstKey, currentRegionBoundariesInformation.metaFirstKey) >= 0;
            }
            // Checking stop key.
            if ((currentRegionBoundariesInformation.storesLastKey != null) && (currentRegionBoundariesInformation.metaLastKey != null)) {
                valid = valid && comparator.compare(currentRegionBoundariesInformation.storesLastKey, currentRegionBoundariesInformation.metaLastKey) < 0;
            }
            if (!valid) {
                errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries", tablesInfo.get(regionInfo.getTable()));
                LOG.warn("Region's boundaries not aligned between stores and META for:");
                LOG.warn(currentRegionBoundariesInformation);
            }
        }
    } catch (IOException e) {
        LOG.error(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) KeyValue(org.apache.hadoop.hbase.KeyValue) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) FileSystem(org.apache.hadoop.fs.FileSystem) MasterFileSystem(org.apache.hadoop.hbase.master.MasterFileSystem) HRegionFileSystem(org.apache.hadoop.hbase.regionserver.HRegionFileSystem) HFile(org.apache.hadoop.hbase.io.hfile.HFile) ByteArrayComparator(org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig)

Example 29 with CacheConfig

use of org.apache.hadoop.hbase.io.hfile.CacheConfig in project hbase by apache.

the class TestAvoidCellReferencesIntoShippedBlocks method testHBase16372InCompactionWritePath.

@Test
public void testHBase16372InCompactionWritePath() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    // Create a table with block size as 1024
    final Table table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024, CompactorRegionObserver.class.getName());
    try {
        // get the block cache and region
        RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(tableName);
        String regionName = locator.getAllRegionLocations().get(0).getRegionInfo().getEncodedName();
        Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions(regionName);
        Store store = region.getStores().iterator().next();
        CacheConfig cacheConf = store.getCacheConfig();
        cacheConf.setCacheDataOnWrite(true);
        cacheConf.setEvictOnClose(true);
        final BlockCache cache = cacheConf.getBlockCache();
        // insert data. 5 Rows are added
        Put put = new Put(ROW);
        put.addColumn(FAMILY, QUALIFIER, data);
        table.put(put);
        put = new Put(ROW);
        put.addColumn(FAMILY, QUALIFIER1, data);
        table.put(put);
        put = new Put(ROW1);
        put.addColumn(FAMILY, QUALIFIER, data);
        table.put(put);
        // data was in memstore so don't expect any changes
        region.flush(true);
        put = new Put(ROW1);
        put.addColumn(FAMILY, QUALIFIER1, data);
        table.put(put);
        put = new Put(ROW2);
        put.addColumn(FAMILY, QUALIFIER, data);
        table.put(put);
        put = new Put(ROW2);
        put.addColumn(FAMILY, QUALIFIER1, data);
        table.put(put);
        // data was in memstore so don't expect any changes
        region.flush(true);
        put = new Put(ROW3);
        put.addColumn(FAMILY, QUALIFIER, data);
        table.put(put);
        put = new Put(ROW3);
        put.addColumn(FAMILY, QUALIFIER1, data);
        table.put(put);
        put = new Put(ROW4);
        put.addColumn(FAMILY, QUALIFIER, data);
        table.put(put);
        // data was in memstore so don't expect any changes
        region.flush(true);
        put = new Put(ROW4);
        put.addColumn(FAMILY, QUALIFIER1, data);
        table.put(put);
        put = new Put(ROW5);
        put.addColumn(FAMILY, QUALIFIER, data);
        table.put(put);
        put = new Put(ROW5);
        put.addColumn(FAMILY, QUALIFIER1, data);
        table.put(put);
        // data was in memstore so don't expect any changes
        region.flush(true);
        // Load cache
        Scan s = new Scan();
        s.setMaxResultSize(1000);
        ResultScanner scanner = table.getScanner(s);
        int count = 0;
        for (Result result : scanner) {
            count++;
        }
        assertEquals("Count all the rows ", count, 6);
        // all the cache is loaded
        // trigger a major compaction
        ScannerThread scannerThread = new ScannerThread(table, cache);
        scannerThread.start();
        region.compact(true);
        s = new Scan();
        s.setMaxResultSize(1000);
        scanner = table.getScanner(s);
        count = 0;
        for (Result result : scanner) {
            count++;
        }
        assertEquals("Count all the rows ", count, 6);
    } finally {
        table.close();
    }
}
Also used : Store(org.apache.hadoop.hbase.regionserver.Store) MultiRowMutationEndpoint(org.apache.hadoop.hbase.coprocessor.MultiRowMutationEndpoint) TableName(org.apache.hadoop.hbase.TableName) BlockCache(org.apache.hadoop.hbase.io.hfile.BlockCache) Region(org.apache.hadoop.hbase.regionserver.Region) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Test(org.junit.Test)

Example 30 with CacheConfig

use of org.apache.hadoop.hbase.io.hfile.CacheConfig in project hbase by apache.

the class TestAvoidCellReferencesIntoShippedBlocks method testHBASE16372InReadPath.

@Test
public void testHBASE16372InReadPath() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    // Create a table with block size as 1024
    final Table table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024, null);
    try {
        // get the block cache and region
        RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(tableName);
        String regionName = locator.getAllRegionLocations().get(0).getRegionInfo().getEncodedName();
        Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions(regionName);
        Store store = region.getStores().iterator().next();
        CacheConfig cacheConf = store.getCacheConfig();
        cacheConf.setCacheDataOnWrite(true);
        cacheConf.setEvictOnClose(true);
        final BlockCache cache = cacheConf.getBlockCache();
        // insert data. 5 Rows are added
        Put put = new Put(ROW);
        put.addColumn(FAMILY, QUALIFIER, data);
        table.put(put);
        put = new Put(ROW);
        put.addColumn(FAMILY, QUALIFIER1, data);
        table.put(put);
        put = new Put(ROW1);
        put.addColumn(FAMILY, QUALIFIER, data);
        table.put(put);
        put = new Put(ROW1);
        put.addColumn(FAMILY, QUALIFIER1, data);
        table.put(put);
        put = new Put(ROW2);
        put.addColumn(FAMILY, QUALIFIER, data);
        table.put(put);
        put = new Put(ROW2);
        put.addColumn(FAMILY, QUALIFIER1, data);
        table.put(put);
        put = new Put(ROW3);
        put.addColumn(FAMILY, QUALIFIER, data);
        table.put(put);
        put = new Put(ROW3);
        put.addColumn(FAMILY, QUALIFIER1, data);
        table.put(put);
        put = new Put(ROW4);
        put.addColumn(FAMILY, QUALIFIER, data);
        table.put(put);
        put = new Put(ROW4);
        put.addColumn(FAMILY, QUALIFIER1, data);
        table.put(put);
        put = new Put(ROW5);
        put.addColumn(FAMILY, QUALIFIER, data);
        table.put(put);
        put = new Put(ROW5);
        put.addColumn(FAMILY, QUALIFIER1, data);
        table.put(put);
        // data was in memstore so don't expect any changes
        region.flush(true);
        // Load cache
        Scan s = new Scan();
        s.setMaxResultSize(1000);
        ResultScanner scanner = table.getScanner(s);
        int count = 0;
        for (Result result : scanner) {
            count++;
        }
        assertEquals("Count all the rows ", count, 6);
        // Scan from cache
        s = new Scan();
        // Start a scan from row3
        s.setCaching(1);
        s.setStartRow(ROW1);
        // set partial as true so that the scan can send partial columns also
        s.setAllowPartialResults(true);
        s.setMaxResultSize(1000);
        scanner = table.getScanner(s);
        Thread evictorThread = new Thread() {

            @Override
            public void run() {
                List<BlockCacheKey> cacheList = new ArrayList<>();
                Iterator<CachedBlock> iterator = cache.iterator();
                // evict all the blocks
                while (iterator.hasNext()) {
                    CachedBlock next = iterator.next();
                    BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
                    cacheList.add(cacheKey);
                    cache.evictBlock(cacheKey);
                }
                try {
                    Thread.sleep(1);
                } catch (InterruptedException e1) {
                }
                iterator = cache.iterator();
                int refBlockCount = 0;
                while (iterator.hasNext()) {
                    iterator.next();
                    refBlockCount++;
                }
                assertEquals("One block should be there ", refBlockCount, 1);
                // Rescan to prepopulate the data
                // cache this row.
                Scan s1 = new Scan();
                // This scan will start from ROW1 and it will populate the cache with a
                // row that is lower than ROW3.
                s1.setStartRow(ROW3);
                s1.setStopRow(ROW5);
                s1.setCaching(1);
                ResultScanner scanner;
                try {
                    scanner = table.getScanner(s1);
                    int count = 0;
                    for (Result result : scanner) {
                        count++;
                    }
                    assertEquals("Count the rows", count, 2);
                    iterator = cache.iterator();
                    List<BlockCacheKey> newCacheList = new ArrayList<>();
                    while (iterator.hasNext()) {
                        CachedBlock next = iterator.next();
                        BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
                        newCacheList.add(cacheKey);
                    }
                    int newBlockRefCount = 0;
                    for (BlockCacheKey key : cacheList) {
                        if (newCacheList.contains(key)) {
                            newBlockRefCount++;
                        }
                    }
                    assertEquals("old blocks should still be found ", newBlockRefCount, 6);
                    latch.countDown();
                } catch (IOException e) {
                }
            }
        };
        count = 0;
        for (Result result : scanner) {
            count++;
            if (count == 2) {
                evictorThread.start();
                latch.await();
            }
        }
        assertEquals("Count should give all rows ", count, 10);
    } finally {
        table.close();
    }
}
Also used : CachedBlock(org.apache.hadoop.hbase.io.hfile.CachedBlock) ArrayList(java.util.ArrayList) Store(org.apache.hadoop.hbase.regionserver.Store) IOException(java.io.IOException) BlockCacheKey(org.apache.hadoop.hbase.io.hfile.BlockCacheKey) MultiRowMutationEndpoint(org.apache.hadoop.hbase.coprocessor.MultiRowMutationEndpoint) TableName(org.apache.hadoop.hbase.TableName) BlockCache(org.apache.hadoop.hbase.io.hfile.BlockCache) Region(org.apache.hadoop.hbase.regionserver.Region) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Test(org.junit.Test)

Aggregations

CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)63 Path (org.apache.hadoop.fs.Path)28 Test (org.junit.Test)26 Configuration (org.apache.hadoop.conf.Configuration)21 HFile (org.apache.hadoop.hbase.io.hfile.HFile)21 HFileContext (org.apache.hadoop.hbase.io.hfile.HFileContext)21 FileSystem (org.apache.hadoop.fs.FileSystem)20 HFileContextBuilder (org.apache.hadoop.hbase.io.hfile.HFileContextBuilder)20 BlockCache (org.apache.hadoop.hbase.io.hfile.BlockCache)15 KeyValue (org.apache.hadoop.hbase.KeyValue)14 TableName (org.apache.hadoop.hbase.TableName)14 Region (org.apache.hadoop.hbase.regionserver.Region)13 Store (org.apache.hadoop.hbase.regionserver.Store)13 Cell (org.apache.hadoop.hbase.Cell)10 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)10 CombinedBlockCache (org.apache.hadoop.hbase.io.hfile.CombinedBlockCache)10 IOException (java.io.IOException)9 CountDownLatch (java.util.concurrent.CountDownLatch)8 FileStatus (org.apache.hadoop.fs.FileStatus)8 HFileScanner (org.apache.hadoop.hbase.io.hfile.HFileScanner)8