Search in sources :

Example 26 with Scanner

use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.

the class MetaPurge method purgeTSMeta.

/**
   * Scans the entire UID table and removes any UIDMeta objects found.
   * @return The total number of columns deleted
   */
public Deferred<Long> purgeTSMeta() {
    // a list to store all pending deletes so we don't exit before they've 
    // completed
    final ArrayList<Deferred<Object>> delete_calls = new ArrayList<Deferred<Object>>();
    final Deferred<Long> result = new Deferred<Long>();
    /**
     * Scanner callback that will recursively call itself and loop through the
     * rows of the UID table, issuing delete requests for all of the columns in
     * a row that match a meta qualifier.
     */
    final class MetaScanner implements Callback<Deferred<Long>, ArrayList<ArrayList<KeyValue>>> {

        final Scanner scanner;

        public MetaScanner() {
            scanner = getScanner(tsdb.metaTable());
        }

        /**
       * Fetches the next group of rows from the scanner and sets this class as
       * a callback
       * @return The total number of columns deleted after completion
       */
        public Deferred<Long> scan() {
            return scanner.nextRows().addCallbackDeferring(this);
        }

        @Override
        public Deferred<Long> call(ArrayList<ArrayList<KeyValue>> rows) throws Exception {
            if (rows == null) {
                result.callback(columns);
                return null;
            }
            for (final ArrayList<KeyValue> row : rows) {
                // one delete request per row. We'll almost always delete the whole
                // row, so preallocate some ram.
                ArrayList<byte[]> qualifiers = new ArrayList<byte[]>(row.size());
                for (KeyValue column : row) {
                    if (Bytes.equals(TSMeta.META_QUALIFIER(), column.qualifier())) {
                        qualifiers.add(column.qualifier());
                    } else if (Bytes.equals(TSMeta.COUNTER_QUALIFIER(), column.qualifier())) {
                        qualifiers.add(column.qualifier());
                    }
                }
                if (qualifiers.size() > 0) {
                    columns += qualifiers.size();
                    final DeleteRequest delete = new DeleteRequest(tsdb.metaTable(), row.get(0).key(), NAME_FAMILY, qualifiers.toArray(new byte[qualifiers.size()][]));
                    delete_calls.add(tsdb.getClient().delete(delete));
                }
            }
            /**
         * Buffer callback used to wait on all of the delete calls for the
         * last set of rows returned from the scanner so we don't fill up the
         * deferreds array and OOM out.
         */
            final class ContinueCB implements Callback<Deferred<Long>, ArrayList<Object>> {

                @Override
                public Deferred<Long> call(ArrayList<Object> deletes) throws Exception {
                    LOG.debug("[" + thread_id + "] Processed [" + deletes.size() + "] delete calls");
                    delete_calls.clear();
                    return scan();
                }
            }
            // fetch the next set of rows after waiting for current set of delete
            // requests to complete
            Deferred.group(delete_calls).addCallbackDeferring(new ContinueCB());
            return null;
        }
    }
    // start the scan
    new MetaScanner().scan();
    return result;
}
Also used : Scanner(org.hbase.async.Scanner) KeyValue(org.hbase.async.KeyValue) Deferred(com.stumbleupon.async.Deferred) ArrayList(java.util.ArrayList) Callback(com.stumbleupon.async.Callback) DeleteRequest(org.hbase.async.DeleteRequest)

Example 27 with Scanner

use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.

the class TreeSync method run.

/**
   * Performs a tree synchronization using a table scanner across the UID table
   * @return 0 if completed successfully, something else if an error occurred
   */
public void run() {
    final Scanner scanner = getScanner();
    // start the process by loading all of the trees in the system
    final List<Tree> trees;
    try {
        trees = Tree.fetchAllTrees(tsdb).joinUninterruptibly();
        LOG.info("[" + thread_id + "] Complete");
    } catch (Exception e) {
        LOG.error("[" + thread_id + "] Unexpected Exception", e);
        throw new RuntimeException("[" + thread_id + "] Unexpected exception", e);
    }
    if (trees == null) {
        LOG.warn("No tree definitions were found");
        return;
    } else {
        boolean has_enabled_tree = false;
        for (Tree tree : trees) {
            if (tree.getEnabled()) {
                has_enabled_tree = true;
                break;
            }
        }
        if (!has_enabled_tree) {
            LOG.warn("No enabled trees were found");
            return;
        }
        LOG.info("Found [" + trees.size() + "] trees");
    }
    // setup an array for storing the tree processing calls so we can block 
    // until each call has completed
    final ArrayList<Deferred<Boolean>> tree_calls = new ArrayList<Deferred<Boolean>>();
    final Deferred<Boolean> completed = new Deferred<Boolean>();
    /**
     * Scanner callback that loops through the UID table recursively until 
     * the scanner returns a null row set.
     */
    final class TsuidScanner implements Callback<Deferred<Boolean>, ArrayList<ArrayList<KeyValue>>> {

        /**
       * Fetches the next set of rows from the scanner, adding this class as a 
       * callback
       * @return A meaningless deferred used to wait on until processing has
       * completed
       */
        public Deferred<Boolean> scan() {
            return scanner.nextRows().addCallbackDeferring(this);
        }

        @Override
        public Deferred<Boolean> call(ArrayList<ArrayList<KeyValue>> rows) throws Exception {
            if (rows == null) {
                completed.callback(true);
                return null;
            }
            for (final ArrayList<KeyValue> row : rows) {
                // convert to a string one time
                final String tsuid = UniqueId.uidToString(row.get(0).key());
                /**
           * A throttling callback used to wait for the current TSMeta to 
           * complete processing through the trees before continuing on with 
           * the next set.
           */
                final class TreeBuilderBufferCB implements Callback<Boolean, ArrayList<ArrayList<Boolean>>> {

                    @Override
                    public Boolean call(ArrayList<ArrayList<Boolean>> builder_calls) throws Exception {
                        //LOG.debug("Processed [" + builder_calls.size() + "] tree_calls");
                        return true;
                    }
                }
                /**
           * Executed after parsing a TSMeta object and loading all of the
           * associated UIDMetas. Once the meta has been loaded, this callback
           * runs it through each of the configured TreeBuilder objects and
           * stores the resulting deferred in an array. Once processing of all
           * of the rules has completed, we group the deferreds and call
           * BufferCB() to wait for their completion.
           */
                final class ParseCB implements Callback<Deferred<Boolean>, TSMeta> {

                    final ArrayList<Deferred<ArrayList<Boolean>>> builder_calls = new ArrayList<Deferred<ArrayList<Boolean>>>();

                    @Override
                    public Deferred<Boolean> call(TSMeta meta) throws Exception {
                        if (meta != null) {
                            LOG.debug("Processing TSMeta: " + meta + " w value: " + JSON.serializeToString(meta));
                            // copy the trees into a tree builder object and iterate through
                            // each builder. We need to do this as a builder is not thread
                            // safe and cannot be used asynchronously.
                            final ArrayList<TreeBuilder> tree_builders = new ArrayList<TreeBuilder>(trees.size());
                            for (Tree tree : trees) {
                                if (!tree.getEnabled()) {
                                    continue;
                                }
                                final TreeBuilder builder = new TreeBuilder(tsdb, tree);
                                tree_builders.add(builder);
                            }
                            for (TreeBuilder builder : tree_builders) {
                                builder_calls.add(builder.processTimeseriesMeta(meta));
                            }
                            return Deferred.group(builder_calls).addCallback(new TreeBuilderBufferCB());
                        } else {
                            return Deferred.fromResult(false);
                        }
                    }
                }
                /**
           * An error handler used to catch issues when loading the TSMeta such
           * as a missing UID name. In these situations we want to log that the 
           * TSMeta had an issue and continue on.
           */
                final class ErrBack implements Callback<Deferred<Boolean>, Exception> {

                    @Override
                    public Deferred<Boolean> call(Exception e) throws Exception {
                        if (e.getClass().equals(IllegalStateException.class)) {
                            LOG.error("Invalid data when processing TSUID [" + tsuid + "]", e);
                        } else if (e.getClass().equals(IllegalArgumentException.class)) {
                            LOG.error("Invalid data when processing TSUID [" + tsuid + "]", e);
                        } else if (e.getClass().equals(NoSuchUniqueId.class)) {
                            LOG.warn("Timeseries [" + tsuid + "] includes a non-existant UID: " + e.getMessage());
                        } else {
                            LOG.error("[" + thread_id + "] Exception while processing TSUID [" + tsuid + "]", e);
                        }
                        return Deferred.fromResult(false);
                    }
                }
                // matched a TSMeta column, so request a parsing and loading of
                // associated UIDMeta objects, then pass it off to callbacks for 
                // parsing through the trees.
                final Deferred<Boolean> process_tsmeta = TSMeta.parseFromColumn(tsdb, row.get(0), true).addCallbackDeferring(new ParseCB());
                process_tsmeta.addErrback(new ErrBack());
                tree_calls.add(process_tsmeta);
            }
            /**
         * Another buffer callback that waits for the current set of TSMetas to
         * complete their tree calls before we fetch another set of rows from
         * the scanner. This necessary to avoid OOM issues.
         */
            final class ContinueCB implements Callback<Deferred<Boolean>, ArrayList<Boolean>> {

                @Override
                public Deferred<Boolean> call(ArrayList<Boolean> tsuids) throws Exception {
                    LOG.debug("Processed [" + tsuids.size() + "] tree_calls, continuing");
                    tree_calls.clear();
                    return scan();
                }
            }
            // request the next set of rows from the scanner, but wait until the
            // current set of TSMetas has been processed so we don't slaughter our
            // host
            Deferred.group(tree_calls).addCallback(new ContinueCB());
            return Deferred.fromResult(null);
        }
    }
    /**
     * Used to capture unhandled exceptions from the scanner callbacks and 
     * exit the thread properly
     */
    final class ErrBack implements Callback<Deferred<Boolean>, Exception> {

        @Override
        public Deferred<Boolean> call(Exception e) throws Exception {
            LOG.error("Unexpected exception", e);
            completed.callback(false);
            return Deferred.fromResult(false);
        }
    }
    final TsuidScanner tree_scanner = new TsuidScanner();
    tree_scanner.scan().addErrback(new ErrBack());
    try {
        completed.joinUninterruptibly();
        LOG.info("[" + thread_id + "] Complete");
    } catch (Exception e) {
        LOG.error("[" + thread_id + "] Scanner Exception", e);
        throw new RuntimeException("[" + thread_id + "] Scanner exception", e);
    }
    return;
}
Also used : Scanner(org.hbase.async.Scanner) KeyValue(org.hbase.async.KeyValue) Deferred(com.stumbleupon.async.Deferred) ArrayList(java.util.ArrayList) Tree(net.opentsdb.tree.Tree) TSMeta(net.opentsdb.meta.TSMeta) HBaseException(org.hbase.async.HBaseException) TreeBuilder(net.opentsdb.tree.TreeBuilder) Callback(com.stumbleupon.async.Callback)

Example 28 with Scanner

use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.

the class UidManager method metaSync.

/**
   * Runs through the entire data table and creates TSMeta objects for unique
   * timeseries and/or updates {@code created} timestamps
   * The process is as follows:
   * <ul><li>Fetch the max number of Metric UIDs as we'll use those to match
   * on the data rows</li>
   * <li>Split the # of UIDs amongst worker threads</li>
   * <li>Setup a scanner in each thread for the range it will be working on and
   * start iterating</li>
   * <li>Fetch the TSUID from the row key</li>
   * <li>For each unprocessed TSUID:
   * <ul><li>Check if the metric UID mapping is present, if not, log an error
   * and continue</li>
   * <li>See if the meta for the metric UID exists, if not, create it</li>
   * <li>See if the row timestamp is less than the metric UID meta's created
   * time. This means we have a record of the UID being used earlier than the
   * meta data indicates. Update it.</li>
   * <li>Repeat the previous three steps for each of the TAGK and TAGV tags</li>
   * <li>Check to see if meta data exists for the timeseries</li>
   * <li>If not, create the counter column if it's missing, and create the meta
   * column</li>
   * <li>If it did exist, check the {@code created} timestamp and if the row's 
   * time is less, update the meta data</li></ul></li>
   * <li>Continue on to the next unprocessed timeseries data row</li></ul>
   * <b>Note:</b> Updates or new entries will also be sent to the search plugin
   * if configured.
   * @param tsdb The tsdb to use for processing, including a search plugin
   * @return 0 if completed successfully, something else if it dies
   */
private static int metaSync(final TSDB tsdb) throws Exception {
    final long start_time = System.currentTimeMillis() / 1000;
    // now figure out how many IDs to divy up between the workers
    final int workers = Runtime.getRuntime().availableProcessors() * 2;
    final Set<Integer> processed_tsuids = Collections.synchronizedSet(new HashSet<Integer>());
    final ConcurrentHashMap<String, Long> metric_uids = new ConcurrentHashMap<String, Long>();
    final ConcurrentHashMap<String, Long> tagk_uids = new ConcurrentHashMap<String, Long>();
    final ConcurrentHashMap<String, Long> tagv_uids = new ConcurrentHashMap<String, Long>();
    final List<Scanner> scanners = CliUtils.getDataTableScanners(tsdb, workers);
    LOG.info("Spooling up [" + scanners.size() + "] worker threads");
    final List<Thread> threads = new ArrayList<Thread>(scanners.size());
    int i = 0;
    for (final Scanner scanner : scanners) {
        final MetaSync worker = new MetaSync(tsdb, scanner, processed_tsuids, metric_uids, tagk_uids, tagv_uids, i++);
        worker.setName("Sync #" + i);
        worker.start();
        threads.add(worker);
    }
    for (final Thread thread : threads) {
        thread.join();
        LOG.info("Thread [" + thread + "] Finished");
    }
    LOG.info("All metasync threads have completed");
    // make sure buffered data is flushed to storage before exiting
    tsdb.flush().joinUninterruptibly();
    final long duration = (System.currentTimeMillis() / 1000) - start_time;
    LOG.info("Completed meta data synchronization in [" + duration + "] seconds");
    return 0;
}
Also used : Scanner(org.hbase.async.Scanner) ArrayList(java.util.ArrayList) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 29 with Scanner

use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.

the class Branch method fetchBranch.

/**
   * Attempts to fetch the branch, it's leaves and all child branches.
   * The UID names for each leaf may also be loaded if configured.
   * @param tsdb The TSDB to use for storage access
   * @param branch_id ID of the branch to retrieve
   * @param load_leaf_uids Whether or not to load UID names for each leaf
   * @return A branch if found, null if it did not exist
   * @throws JSONException if the object could not be deserialized
   */
public static Deferred<Branch> fetchBranch(final TSDB tsdb, final byte[] branch_id, final boolean load_leaf_uids) {
    final Deferred<Branch> result = new Deferred<Branch>();
    final Scanner scanner = setupBranchScanner(tsdb, branch_id);
    // This is the branch that will be loaded with data from the scanner and
    // returned at the end of the process.
    final Branch branch = new Branch();
    // A list of deferreds to wait on for child leaf processing
    final ArrayList<Deferred<Object>> leaf_group = new ArrayList<Deferred<Object>>();
    /**
     * Exception handler to catch leaves with an invalid UID name due to a 
     * possible deletion. This will allow the scanner to keep loading valid
     * leaves and ignore problems. The fsck tool can be used to clean up
     * orphaned leaves. If we catch something other than an NSU, it will
     * re-throw the exception
     */
    final class LeafErrBack implements Callback<Object, Exception> {

        final byte[] qualifier;

        public LeafErrBack(final byte[] qualifier) {
            this.qualifier = qualifier;
        }

        @Override
        public Object call(final Exception e) throws Exception {
            Throwable ex = e;
            while (ex.getClass().equals(DeferredGroupException.class)) {
                ex = ex.getCause();
            }
            if (ex.getClass().equals(NoSuchUniqueId.class)) {
                LOG.debug("Invalid UID for leaf: " + idToString(qualifier) + " in branch: " + idToString(branch_id), ex);
            } else {
                throw (Exception) ex;
            }
            return null;
        }
    }
    /**
     * Called after a leaf has been loaded successfully and adds the leaf
     * to the branch's leaf set. Also lazily initializes the leaf set if it 
     * hasn't been.
     */
    final class LeafCB implements Callback<Object, Leaf> {

        @Override
        public Object call(final Leaf leaf) throws Exception {
            if (leaf != null) {
                if (branch.leaves == null) {
                    branch.leaves = new HashMap<Integer, Leaf>();
                }
                branch.leaves.put(leaf.hashCode(), leaf);
            }
            return null;
        }
    }
    /**
     * Scanner callback executed recursively each time we get a set of data
     * from storage. This is responsible for determining what columns are 
     * returned and issuing requests to load leaf objects.
     * When the scanner returns a null set of rows, the method initiates the
     * final callback.
     */
    final class FetchBranchCB implements Callback<Object, ArrayList<ArrayList<KeyValue>>> {

        /**
       * Starts the scanner and is called recursively to fetch the next set of
       * rows from the scanner.
       * @return The branch if loaded successfully, null if the branch was not
       * found.
       */
        public Object fetchBranch() {
            return scanner.nextRows().addCallback(this);
        }

        /**
       * Loops through each row of the scanner results and parses out branch
       * definitions and child leaves.
       * @return The final branch callback if the scanner returns a null set
       */
        @Override
        public Object call(final ArrayList<ArrayList<KeyValue>> rows) throws Exception {
            if (rows == null) {
                if (branch.tree_id < 1 || branch.path == null) {
                    result.callback(null);
                } else {
                    result.callback(branch);
                }
                return null;
            }
            for (final ArrayList<KeyValue> row : rows) {
                for (KeyValue column : row) {
                    // matched a branch column
                    if (Bytes.equals(BRANCH_QUALIFIER, column.qualifier())) {
                        if (Bytes.equals(branch_id, column.key())) {
                            // it's *this* branch. We deserialize to a new object and copy
                            // since the columns could be in any order and we may get a 
                            // leaf before the branch
                            final Branch local_branch = JSON.parseToObject(column.value(), Branch.class);
                            branch.path = local_branch.path;
                            branch.display_name = local_branch.display_name;
                            branch.tree_id = Tree.bytesToId(column.key());
                        } else {
                            // it's a child branch
                            final Branch child = JSON.parseToObject(column.value(), Branch.class);
                            child.tree_id = Tree.bytesToId(column.key());
                            branch.addChild(child);
                        }
                    // parse out a leaf
                    } else if (Bytes.memcmp(Leaf.LEAF_PREFIX(), column.qualifier(), 0, Leaf.LEAF_PREFIX().length) == 0) {
                        if (Bytes.equals(branch_id, column.key())) {
                            // process a leaf and skip if the UIDs for the TSUID can't be 
                            // found. Add an errback to catch NoSuchUniqueId exceptions
                            leaf_group.add(Leaf.parseFromStorage(tsdb, column, load_leaf_uids).addCallbacks(new LeafCB(), new LeafErrBack(column.qualifier())));
                        } else {
                        // TODO - figure out an efficient way to increment a counter in 
                        // the child branch with the # of leaves it has
                        }
                    }
                }
            }
            // recursively call ourself to fetch more results from the scanner
            return fetchBranch();
        }
    }
    // start scanning
    new FetchBranchCB().fetchBranch();
    return result;
}
Also used : Scanner(org.hbase.async.Scanner) KeyValue(org.hbase.async.KeyValue) Deferred(com.stumbleupon.async.Deferred) ArrayList(java.util.ArrayList) DeferredGroupException(com.stumbleupon.async.DeferredGroupException) IOException(java.io.IOException) HBaseException(org.hbase.async.HBaseException) JSONException(net.opentsdb.utils.JSONException) Callback(com.stumbleupon.async.Callback)

Example 30 with Scanner

use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.

the class TimeSeriesLookup method getScanner.

/** Compiles a scanner with the given salt ID if salting is enabled AND we're
   * not scanning the meta table.
   * @param salt An ID for the salt bucket
   * @return A scanner to send to HBase.
   */
private Scanner getScanner(final int salt) {
    final Scanner scanner = tsdb.getClient().newScanner(query.useMeta() ? tsdb.metaTable() : tsdb.dataTable());
    scanner.setFamily(query.useMeta() ? TSMeta.FAMILY : TSDB.FAMILY());
    if (metric_uid != null) {
        byte[] key;
        if (query.useMeta() || Const.SALT_WIDTH() < 1) {
            key = metric_uid;
        } else {
            key = new byte[Const.SALT_WIDTH() + TSDB.metrics_width()];
            System.arraycopy(RowKey.getSaltBytes(salt), 0, key, 0, Const.SALT_WIDTH());
            System.arraycopy(metric_uid, 0, key, Const.SALT_WIDTH(), metric_uid.length);
        }
        scanner.setStartKey(key);
        long uid = UniqueId.uidToLong(metric_uid, TSDB.metrics_width());
        uid++;
        if (uid < Internal.getMaxUnsignedValueOnBytes(TSDB.metrics_width())) {
            // value. If so, we need to leave the stop key as null
            if (query.useMeta() || Const.SALT_WIDTH() < 1) {
                key = UniqueId.longToUID(uid, TSDB.metrics_width());
            } else {
                key = new byte[Const.SALT_WIDTH() + TSDB.metrics_width()];
                System.arraycopy(RowKey.getSaltBytes(salt), 0, key, 0, Const.SALT_WIDTH());
                System.arraycopy(UniqueId.longToUID(uid, TSDB.metrics_width()), 0, key, Const.SALT_WIDTH(), metric_uid.length);
            }
            scanner.setStopKey(key);
        }
    }
    if (rowkey_regex != null) {
        scanner.setKeyRegexp(rowkey_regex, CHARSET);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Scanner regex: " + QueryUtil.byteRegexToString(rowkey_regex));
        }
    }
    return scanner;
}
Also used : Scanner(org.hbase.async.Scanner)

Aggregations

Scanner (org.hbase.async.Scanner)35 ArrayList (java.util.ArrayList)24 KeyValue (org.hbase.async.KeyValue)19 Callback (com.stumbleupon.async.Callback)12 Deferred (com.stumbleupon.async.Deferred)11 DeleteRequest (org.hbase.async.DeleteRequest)7 HBaseException (org.hbase.async.HBaseException)6 List (java.util.List)5 Map (java.util.Map)3 DeferredGroupException (com.stumbleupon.async.DeferredGroupException)2 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 Query (net.opentsdb.core.Query)2 ByteMap (org.hbase.async.Bytes.ByteMap)2 Test (org.junit.Test)2 Matchers.anyString (org.mockito.Matchers.anyString)2 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)2 ByteArrayByteIterator (com.yahoo.ycsb.ByteArrayByteIterator)1 ByteIterator (com.yahoo.ycsb.ByteIterator)1