Examples with HFileContextBuilder - org.apache.hadoop.hbase.io.hfile.HFileContextBuilder

Example 56 with HFileContextBuilder

use of org.apache.hadoop.hbase.io.hfile.HFileContextBuilder in project hbase by apache.

the class TestStoreFile method testBasicHalfMapFile.

/**
   * Write a file and then assert that we can read from top and bottom halves
   * using two HalfMapFiles.
   * @throws Exception
   */
@Test
public void testBasicHalfMapFile() throws Exception {
    final HRegionInfo hri = new HRegionInfo(TableName.valueOf("testBasicHalfMapFileTb"));
    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, new Path(testDir, hri.getTable().getNameAsString()), hri);
    HFileContext meta = new HFileContextBuilder().withBlockSize(2 * 1024).build();
    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(regionFs.createTempName()).withFileContext(meta).build();
    writeStoreFile(writer);
    Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
    StoreFile sf = new StoreFile(this.fs, sfPath, conf, cacheConf, BloomType.NONE);
    checkHalfHFile(regionFs, sf);
}

Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) Path(org.apache.hadoop.fs.Path) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) Test(org.junit.Test)

Example 57 with HFileContextBuilder

use of org.apache.hadoop.hbase.io.hfile.HFileContextBuilder in project hbase by apache.

the class TestStoreFile method testBloomFilter.

@Test
public void testBloomFilter() throws Exception {
    FileSystem fs = FileSystem.getLocal(conf);
    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
    // write the file
    Path f = new Path(ROOT_DIR, getName());
    HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
    // Make a store file and write data to it.
    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f).withBloomType(BloomType.ROW).withMaxKeyCount(2000).withFileContext(meta).build();
    bloomWriteRead(writer, fs);
}

Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) Test(org.junit.Test)

Example 58 with HFileContextBuilder

use of org.apache.hadoop.hbase.io.hfile.HFileContextBuilder in project hbase by apache.

the class TestStoreFile method testBloomTypes.

@Test
public void testBloomTypes() throws Exception {
    float err = (float) 0.01;
    FileSystem fs = FileSystem.getLocal(conf);
    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err);
    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
    int rowCount = 50;
    int colCount = 10;
    int versions = 2;
    // run once using columns and once using rows
    BloomType[] bt = { BloomType.ROWCOL, BloomType.ROW };
    int[] expKeys = { rowCount * colCount, rowCount };
    // below line deserves commentary.  it is expected bloom false positives
    //  column = rowCount*2*colCount inserts
    //  row-level = only rowCount*2 inserts, but failures will be magnified by
    //              2nd for loop for every column (2*colCount)
    float[] expErr = { 2 * rowCount * colCount * err, 2 * rowCount * 2 * colCount * err };
    for (int x : new int[] { 0, 1 }) {
        // write the file
        Path f = new Path(ROOT_DIR, getName() + x);
        HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
        // Make a store file and write data to it.
        StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f).withBloomType(bt[x]).withMaxKeyCount(expKeys[x]).withFileContext(meta).build();
        long now = System.currentTimeMillis();
        for (int i = 0; i < rowCount * 2; i += 2) {
            // rows
            for (int j = 0; j < colCount * 2; j += 2) {
                // column qualifiers
                String row = String.format(localFormatter, i);
                String col = String.format(localFormatter, j);
                for (int k = 0; k < versions; ++k) {
                    // versions
                    KeyValue kv = new KeyValue(row.getBytes(), "family".getBytes(), ("col" + col).getBytes(), now - k, Bytes.toBytes((long) -1));
                    writer.append(kv);
                }
            }
        }
        writer.close();
        StoreFileReader reader = new StoreFileReader(fs, f, cacheConf, conf);
        reader.loadFileInfo();
        reader.loadBloomfilter();
        StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
        assertEquals(expKeys[x], reader.generalBloomFilter.getKeyCount());
        Store store = mock(Store.class);
        HColumnDescriptor hcd = mock(HColumnDescriptor.class);
        when(hcd.getName()).thenReturn(Bytes.toBytes("family"));
        when(store.getFamily()).thenReturn(hcd);
        // check false positives rate
        int falsePos = 0;
        int falseNeg = 0;
        for (int i = 0; i < rowCount * 2; ++i) {
            // rows
            for (int j = 0; j < colCount * 2; ++j) {
                // column qualifiers
                String row = String.format(localFormatter, i);
                String col = String.format(localFormatter, j);
                TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
                columns.add(("col" + col).getBytes());
                Scan scan = new Scan(row.getBytes(), row.getBytes());
                scan.addColumn("family".getBytes(), ("col" + col).getBytes());
                boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
                boolean shouldRowExist = i % 2 == 0;
                boolean shouldColExist = j % 2 == 0;
                shouldColExist = shouldColExist || bt[x] == BloomType.ROW;
                if (shouldRowExist && shouldColExist) {
                    if (!exists)
                        falseNeg++;
                } else {
                    if (exists)
                        falsePos++;
                }
            }
        }
        // evict because we are about to delete the file
        reader.close(true);
        fs.delete(f, true);
        System.out.println(bt[x].toString());
        System.out.println("  False negatives: " + falseNeg);
        System.out.println("  False positives: " + falsePos);
        assertEquals(0, falseNeg);
        assertTrue(falsePos < 2 * expErr[x]);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) KeyValue(org.apache.hadoop.hbase.KeyValue) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) TreeSet(java.util.TreeSet) FileSystem(org.apache.hadoop.fs.FileSystem) Scan(org.apache.hadoop.hbase.client.Scan) Test(org.junit.Test)

Example 59 with HFileContextBuilder

use of org.apache.hadoop.hbase.io.hfile.HFileContextBuilder in project phoenix by apache.

the class MultiHfileOutputFormat method createRecordWriter.

/**
 * @param context
 * @return
 * @throws IOException
 */
static <V extends Cell> RecordWriter<TableRowkeyPair, V> createRecordWriter(final TaskAttemptContext context) throws IOException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
    return new RecordWriter<TableRowkeyPair, V>() {

        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(Bytes.BYTES_COMPARATOR);

        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;

        private final byte[] now = Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis());

        private boolean rollRequested = false;

        @Override
        public void write(TableRowkeyPair row, V cell) throws IOException {
            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }
            // phoenix-2216: start : extract table name from the rowkey
            String tableName = row.getTableName();
            byte[] rowKey = row.getRowkey().get();
            long length = kv.getLength();
            byte[] family = CellUtil.cloneFamily(kv);
            byte[] tableAndFamily = join(tableName, Bytes.toString(family));
            WriterLength wl = this.writers.get(tableAndFamily);
            // If this is a new column family, verify that the directory exists
            if (wl == null) {
                // phoenix-2216: start : create a directory for table and family within the output dir
                Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
                fs.mkdirs(new Path(tableOutputPath, Bytes.toString(family)));
            // phoenix-2216: end
            }
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }
            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }
            // create a new WAL writer, if necessary
            if (wl == null || wl.writer == null) {
                // phoenix-2216: start : passed even the table name
                wl = getNewWriter(tableName, family, conf);
            // phoenix-2216: end
            }
            // we now have the proper WAL writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;
            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /* Create a new StoreFile.Writer.
           * @param family
           * @return A WriterLength, containing a new StoreFile.Writer.
           * @throws IOException
           */
        @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
        private WriterLength getNewWriter(final String tableName, byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
            Path familydir = new Path(tableOutputPath, Bytes.toString(family));
            // phoenix-2216: start : fetching the configuration properties that were set to the table.
            // create a map from column family to the compression algorithm for the table.
            final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf, tableName);
            final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf, tableName);
            final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf, tableName);
            // phoenix-2216: end
            String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
            final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf, tableName);
            final DataBlockEncoding overriddenEncoding;
            if (dataBlockEncodingStr != null) {
                overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
            } else {
                overriddenEncoding = null;
            }
            Algorithm compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            BloomType bloomType = bloomTypeMap.get(family);
            bloomType = bloomType == null ? BloomType.NONE : bloomType;
            Integer blockSize = blockSizeMap.get(family);
            blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
            DataBlockEncoding encoding = overriddenEncoding;
            encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
            encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression).withChecksumType(HStore.getChecksumType(conf)).withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
            contextBuilder.withDataBlockEncoding(encoding);
            HFileContext hFileContext = contextBuilder.build();
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs).withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withFileContext(hFileContext).build();
            // join and put it in the writers map .
            // phoenix-2216: start : holds a map of writers where the
            // key in the map is a join byte array of table name and family.
            byte[] tableAndFamily = join(tableName, Bytes.toString(family));
            this.writers.put(tableAndFamily, wl);
            // phoenix-2216: end
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        @Override
        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

Also used : DataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding) KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TableRowkeyPair(org.apache.phoenix.mapreduce.bulkload.TableRowkeyPair) FileSystem(org.apache.hadoop.fs.FileSystem) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Path(org.apache.hadoop.fs.Path) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) BloomType(org.apache.hadoop.hbase.regionserver.BloomType) Map(java.util.Map) TreeMap(java.util.TreeMap) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) AbstractHFileWriter(org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter)

Example 60 with HFileContextBuilder

use of org.apache.hadoop.hbase.io.hfile.HFileContextBuilder in project hbase by apache.

the class MobUtils method createWriter.

/**
 * Creates a writer for the mob file in temp directory.
 * @param conf The current configuration.
 * @param fs The current file system.
 * @param family The descriptor of the current column family.
 * @param path The path for a temp directory.
 * @param maxKeyCount The key count.
 * @param compression The compression algorithm.
 * @param cacheConfig The current cache config.
 * @param cryptoContext The encryption context.
 * @param checksumType The checksum type.
 * @param bytesPerChecksum The bytes per checksum.
 * @param blocksize The HFile block size.
 * @param bloomType The bloom filter type.
 * @param isCompaction If the writer is used in compaction.
 * @return The writer for the mob file.
 */
public static StoreFileWriter createWriter(Configuration conf, FileSystem fs, ColumnFamilyDescriptor family, Path path, long maxKeyCount, Compression.Algorithm compression, CacheConfig cacheConfig, Encryption.Context cryptoContext, ChecksumType checksumType, int bytesPerChecksum, int blocksize, BloomType bloomType, boolean isCompaction) throws IOException {
    if (compression == null) {
        compression = HFile.DEFAULT_COMPRESSION_ALGORITHM;
    }
    final CacheConfig writerCacheConf;
    if (isCompaction) {
        writerCacheConf = new CacheConfig(cacheConfig);
        writerCacheConf.setCacheDataOnWrite(false);
    } else {
        writerCacheConf = cacheConfig;
    }
    HFileContext hFileContext = new HFileContextBuilder().withCompression(compression).withIncludesMvcc(true).withIncludesTags(true).withCompressTags(family.isCompressTags()).withChecksumType(checksumType).withBytesPerCheckSum(bytesPerChecksum).withBlockSize(blocksize).withHBaseCheckSum(true).withDataBlockEncoding(family.getDataBlockEncoding()).withEncryptionContext(cryptoContext).withCreateTime(EnvironmentEdgeManager.currentTime()).build();
    StoreFileWriter w = new StoreFileWriter.Builder(conf, writerCacheConf, fs).withFilePath(path).withBloomType(bloomType).withMaxKeyCount(maxKeyCount).withFileContext(hFileContext).build();
    return w;
}

Also used : StoreFileWriter(org.apache.hadoop.hbase.regionserver.StoreFileWriter) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext)

Aggregations

HFileContextBuilder (org.apache.hadoop.hbase.io.hfile.HFileContextBuilder)89 HFileContext (org.apache.hadoop.hbase.io.hfile.HFileContext)82 Path (org.apache.hadoop.fs.Path)52 Test (org.junit.Test)48 KeyValue (org.apache.hadoop.hbase.KeyValue)39 CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)27 FileSystem (org.apache.hadoop.fs.FileSystem)26 Cell (org.apache.hadoop.hbase.Cell)17 HFile (org.apache.hadoop.hbase.io.hfile.HFile)16 ByteBuffer (java.nio.ByteBuffer)15 Configuration (org.apache.hadoop.conf.Configuration)14 HFileScanner (org.apache.hadoop.hbase.io.hfile.HFileScanner)12 StoreFileWriter (org.apache.hadoop.hbase.regionserver.StoreFileWriter)12 DataOutputStream (java.io.DataOutputStream)6 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)6 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)6 DataBlockEncoding (org.apache.hadoop.hbase.io.encoding.DataBlockEncoding)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 IOException (java.io.IOException)5 ArrayList (java.util.ArrayList)5