Search in sources :

Example 6 with BloomType

use of org.apache.hadoop.hbase.regionserver.BloomType in project hbase by apache.

the class ThriftUtilities method colDescFromThrift.

/**
 * This utility method creates a new Hbase HColumnDescriptor object based on a
 * Thrift ColumnDescriptor "struct".
 *
 * @param in Thrift ColumnDescriptor object
 * @return ModifyableColumnFamilyDescriptor
 * @throws IllegalArgument if the column name is empty
 */
public static ColumnFamilyDescriptor colDescFromThrift(ColumnDescriptor in) throws IllegalArgument {
    Compression.Algorithm comp = Compression.getCompressionAlgorithmByName(in.compression.toLowerCase(Locale.ROOT));
    BloomType bt = BloomType.valueOf(in.bloomFilterType);
    if (in.name == null || !in.name.hasRemaining()) {
        throw new IllegalArgument("column name is empty");
    }
    byte[] parsedName = CellUtil.parseColumn(Bytes.getBytes(in.name))[0];
    return ColumnFamilyDescriptorBuilder.newBuilder(parsedName).setMaxVersions(in.maxVersions).setCompressionType(comp).setInMemory(in.inMemory).setBlockCacheEnabled(in.blockCacheEnabled).setTimeToLive(in.timeToLive > 0 ? in.timeToLive : Integer.MAX_VALUE).setBloomFilterType(bt).build();
}
Also used : Compression(org.apache.hadoop.hbase.io.compress.Compression) BloomType(org.apache.hadoop.hbase.regionserver.BloomType) IllegalArgument(org.apache.hadoop.hbase.thrift.generated.IllegalArgument)

Example 7 with BloomType

use of org.apache.hadoop.hbase.regionserver.BloomType in project hbase by apache.

the class BulkLoadHFilesTool method copyHFileHalf.

/**
 * Copy half of an HFile into a new HFile.
 */
private static void copyHFileHalf(Configuration conf, Path inFile, Path outFile, Reference reference, ColumnFamilyDescriptor familyDescriptor) throws IOException {
    FileSystem fs = inFile.getFileSystem(conf);
    CacheConfig cacheConf = CacheConfig.DISABLED;
    HalfStoreFileReader halfReader = null;
    StoreFileWriter halfWriter = null;
    try {
        ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, inFile).build();
        HFileInfo hfile = new HFileInfo(context, conf);
        halfReader = new HalfStoreFileReader(context, hfile, cacheConf, reference, new AtomicInteger(0), conf);
        hfile.initMetaAndIndex(halfReader.getHFileReader());
        Map<byte[], byte[]> fileInfo = halfReader.loadFileInfo();
        int blocksize = familyDescriptor.getBlocksize();
        Algorithm compression = familyDescriptor.getCompressionType();
        BloomType bloomFilterType = familyDescriptor.getBloomFilterType();
        HFileContext hFileContext = new HFileContextBuilder().withCompression(compression).withChecksumType(StoreUtils.getChecksumType(conf)).withBytesPerCheckSum(StoreUtils.getBytesPerChecksum(conf)).withBlockSize(blocksize).withDataBlockEncoding(familyDescriptor.getDataBlockEncoding()).withIncludesTags(true).build();
        halfWriter = new StoreFileWriter.Builder(conf, cacheConf, fs).withFilePath(outFile).withBloomType(bloomFilterType).withFileContext(hFileContext).build();
        HFileScanner scanner = halfReader.getScanner(false, false, false);
        scanner.seekTo();
        do {
            halfWriter.append(scanner.getCell());
        } while (scanner.next());
        for (Map.Entry<byte[], byte[]> entry : fileInfo.entrySet()) {
            if (shouldCopyHFileMetaKey(entry.getKey())) {
                halfWriter.appendFileInfo(entry.getKey(), entry.getValue());
            }
        }
    } finally {
        if (halfReader != null) {
            try {
                halfReader.close(cacheConf.shouldEvictOnClose());
            } catch (IOException e) {
                LOG.warn("failed to close hfile reader for " + inFile, e);
            }
        }
        if (halfWriter != null) {
            halfWriter.close();
        }
    }
}
Also used : StoreFileWriter(org.apache.hadoop.hbase.regionserver.StoreFileWriter) HalfStoreFileReader(org.apache.hadoop.hbase.io.HalfStoreFileReader) HFileScanner(org.apache.hadoop.hbase.io.hfile.HFileScanner) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm) HFileInfo(org.apache.hadoop.hbase.io.hfile.HFileInfo) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) BloomType(org.apache.hadoop.hbase.regionserver.BloomType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FileSystem(org.apache.hadoop.fs.FileSystem) ReaderContext(org.apache.hadoop.hbase.io.hfile.ReaderContext) ReaderContextBuilder(org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 8 with BloomType

use of org.apache.hadoop.hbase.regionserver.BloomType in project hbase by apache.

the class HBaseTestingUtility method generateColumnDescriptors.

/**
 * Create a set of column descriptors with the combination of compression,
 * encoding, bloom codecs available.
 * @param prefix family names prefix
 * @return the list of column descriptors
 */
public static List<ColumnFamilyDescriptor> generateColumnDescriptors(final String prefix) {
    List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>();
    long familyId = 0;
    for (Compression.Algorithm compressionType : getSupportedCompressionAlgorithms()) {
        for (DataBlockEncoding encodingType : DataBlockEncoding.values()) {
            for (BloomType bloomType : BloomType.values()) {
                String name = String.format("%s-cf-!@#&-%d!@#", prefix, familyId);
                ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(name));
                columnFamilyDescriptorBuilder.setCompressionType(compressionType);
                columnFamilyDescriptorBuilder.setDataBlockEncoding(encodingType);
                columnFamilyDescriptorBuilder.setBloomFilterType(bloomType);
                columnFamilyDescriptors.add(columnFamilyDescriptorBuilder.build());
                familyId++;
            }
        }
    }
    return columnFamilyDescriptors;
}
Also used : DataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding) Compression(org.apache.hadoop.hbase.io.compress.Compression) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm) BloomType(org.apache.hadoop.hbase.regionserver.BloomType) ArrayList(java.util.ArrayList) ColumnFamilyDescriptorBuilder(org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor)

Example 9 with BloomType

use of org.apache.hadoop.hbase.regionserver.BloomType in project phoenix by apache.

the class MultiHfileOutputFormat method createRecordWriter.

/**
 * @param context
 * @return
 * @throws IOException
 */
static <V extends Cell> RecordWriter<TableRowkeyPair, V> createRecordWriter(final TaskAttemptContext context) throws IOException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
    return new RecordWriter<TableRowkeyPair, V>() {

        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(Bytes.BYTES_COMPARATOR);

        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;

        private final byte[] now = Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis());

        private boolean rollRequested = false;

        @Override
        public void write(TableRowkeyPair row, V cell) throws IOException {
            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }
            // phoenix-2216: start : extract table name from the rowkey
            String tableName = row.getTableName();
            byte[] rowKey = row.getRowkey().get();
            long length = kv.getLength();
            byte[] family = CellUtil.cloneFamily(kv);
            byte[] tableAndFamily = join(tableName, Bytes.toString(family));
            WriterLength wl = this.writers.get(tableAndFamily);
            // If this is a new column family, verify that the directory exists
            if (wl == null) {
                // phoenix-2216: start : create a directory for table and family within the output dir
                Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
                fs.mkdirs(new Path(tableOutputPath, Bytes.toString(family)));
            // phoenix-2216: end
            }
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }
            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }
            // create a new WAL writer, if necessary
            if (wl == null || wl.writer == null) {
                // phoenix-2216: start : passed even the table name
                wl = getNewWriter(tableName, family, conf);
            // phoenix-2216: end
            }
            // we now have the proper WAL writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;
            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /* Create a new StoreFile.Writer.
           * @param family
           * @return A WriterLength, containing a new StoreFile.Writer.
           * @throws IOException
           */
        @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
        private WriterLength getNewWriter(final String tableName, byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
            Path familydir = new Path(tableOutputPath, Bytes.toString(family));
            // phoenix-2216: start : fetching the configuration properties that were set to the table.
            // create a map from column family to the compression algorithm for the table.
            final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf, tableName);
            final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf, tableName);
            final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf, tableName);
            // phoenix-2216: end
            String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
            final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf, tableName);
            final DataBlockEncoding overriddenEncoding;
            if (dataBlockEncodingStr != null) {
                overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
            } else {
                overriddenEncoding = null;
            }
            Algorithm compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            BloomType bloomType = bloomTypeMap.get(family);
            bloomType = bloomType == null ? BloomType.NONE : bloomType;
            Integer blockSize = blockSizeMap.get(family);
            blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
            DataBlockEncoding encoding = overriddenEncoding;
            encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
            encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression).withChecksumType(HStore.getChecksumType(conf)).withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
            contextBuilder.withDataBlockEncoding(encoding);
            HFileContext hFileContext = contextBuilder.build();
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs).withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withFileContext(hFileContext).build();
            // join and put it in the writers map .
            // phoenix-2216: start : holds a map of writers where the
            // key in the map is a join byte array of table name and family.
            byte[] tableAndFamily = join(tableName, Bytes.toString(family));
            this.writers.put(tableAndFamily, wl);
            // phoenix-2216: end
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        @Override
        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}
Also used : DataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding) KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TableRowkeyPair(org.apache.phoenix.mapreduce.bulkload.TableRowkeyPair) FileSystem(org.apache.hadoop.fs.FileSystem) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Path(org.apache.hadoop.fs.Path) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) BloomType(org.apache.hadoop.hbase.regionserver.BloomType) Map(java.util.Map) TreeMap(java.util.TreeMap) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) AbstractHFileWriter(org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter)

Example 10 with BloomType

use of org.apache.hadoop.hbase.regionserver.BloomType in project hbase by apache.

the class ChangeBloomFilterAction method perform.

@Override
public void perform() throws Exception {
    final Random random = new Random();
    final BloomType[] bloomArray = BloomType.values();
    final int bloomArraySize = bloomArray.length;
    getLogger().info("Performing action: Change bloom filter on all columns of table " + tableName);
    modifyAllTableColumns(tableName, (columnName, columnBuilder) -> {
        BloomType bloomType = bloomArray[random.nextInt(bloomArraySize)];
        getLogger().debug("Performing action: About to set bloom filter type to " + bloomType + " on column " + columnName + " of table " + tableName);
        columnBuilder.setBloomFilterType(bloomType);
        if (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH) {
            columnBuilder.setConfiguration(BloomFilterUtil.PREFIX_LENGTH_KEY, "10");
        }
    });
    getLogger().debug("Performing action: Just set bloom filter types on table " + tableName);
}
Also used : Random(java.util.Random) BloomType(org.apache.hadoop.hbase.regionserver.BloomType)

Aggregations

BloomType (org.apache.hadoop.hbase.regionserver.BloomType)13 Map (java.util.Map)6 TreeMap (java.util.TreeMap)6 Algorithm (org.apache.hadoop.hbase.io.compress.Compression.Algorithm)6 FileSystem (org.apache.hadoop.fs.FileSystem)4 DataBlockEncoding (org.apache.hadoop.hbase.io.encoding.DataBlockEncoding)4 HFileContext (org.apache.hadoop.hbase.io.hfile.HFileContext)4 HFileContextBuilder (org.apache.hadoop.hbase.io.hfile.HFileContextBuilder)4 StoreFileWriter (org.apache.hadoop.hbase.regionserver.StoreFileWriter)4 Configuration (org.apache.hadoop.conf.Configuration)3 Path (org.apache.hadoop.fs.Path)3 ColumnFamilyDescriptor (org.apache.hadoop.hbase.client.ColumnFamilyDescriptor)3 ColumnFamilyDescriptorBuilder (org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder)3 Compression (org.apache.hadoop.hbase.io.compress.Compression)3 CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 Cell (org.apache.hadoop.hbase.Cell)2 KeyValue (org.apache.hadoop.hbase.KeyValue)2 HalfStoreFileReader (org.apache.hadoop.hbase.io.HalfStoreFileReader)2