Search in sources :

Example 26 with Scanner

use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.

the class MetaPurge method purgeTSMeta.

   * Scans the entire UID table and removes any UIDMeta objects found.
   * @return The total number of columns deleted
public Deferred<Long> purgeTSMeta() {
    // a list to store all pending deletes so we don't exit before they've 
    // completed
    final ArrayList<Deferred<Object>> delete_calls = new ArrayList<Deferred<Object>>();
    final Deferred<Long> result = new Deferred<Long>();
     * Scanner callback that will recursively call itself and loop through the
     * rows of the UID table, issuing delete requests for all of the columns in
     * a row that match a meta qualifier.
    final class MetaScanner implements Callback<Deferred<Long>, ArrayList<ArrayList<KeyValue>>> {

        final Scanner scanner;

        public MetaScanner() {
            scanner = getScanner(tsdb.metaTable());

       * Fetches the next group of rows from the scanner and sets this class as
       * a callback
       * @return The total number of columns deleted after completion
        public Deferred<Long> scan() {
            return scanner.nextRows().addCallbackDeferring(this);

        public Deferred<Long> call(ArrayList<ArrayList<KeyValue>> rows) throws Exception {
            if (rows == null) {
                return null;
            for (final ArrayList<KeyValue> row : rows) {
                // one delete request per row. We'll almost always delete the whole
                // row, so preallocate some ram.
                ArrayList<byte[]> qualifiers = new ArrayList<byte[]>(row.size());
                for (KeyValue column : row) {
                    if (Bytes.equals(TSMeta.META_QUALIFIER(), column.qualifier())) {
                    } else if (Bytes.equals(TSMeta.COUNTER_QUALIFIER(), column.qualifier())) {
                if (qualifiers.size() > 0) {
                    columns += qualifiers.size();
                    final DeleteRequest delete = new DeleteRequest(tsdb.metaTable(), row.get(0).key(), NAME_FAMILY, qualifiers.toArray(new byte[qualifiers.size()][]));
         * Buffer callback used to wait on all of the delete calls for the
         * last set of rows returned from the scanner so we don't fill up the
         * deferreds array and OOM out.
            final class ContinueCB implements Callback<Deferred<Long>, ArrayList<Object>> {

                public Deferred<Long> call(ArrayList<Object> deletes) throws Exception {
                    LOG.debug("[" + thread_id + "] Processed [" + deletes.size() + "] delete calls");
                    return scan();
            // fetch the next set of rows after waiting for current set of delete
            // requests to complete
            return null;
    // start the scan
    new MetaScanner().scan();
    return result;
Also used : Scanner(org.hbase.async.Scanner) KeyValue(org.hbase.async.KeyValue) Deferred(com.stumbleupon.async.Deferred) ArrayList(java.util.ArrayList) Callback(com.stumbleupon.async.Callback) DeleteRequest(org.hbase.async.DeleteRequest)

Example 27 with Scanner

use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.

the class TreeSync method run.

   * Performs a tree synchronization using a table scanner across the UID table
   * @return 0 if completed successfully, something else if an error occurred
public void run() {
    final Scanner scanner = getScanner();
    // start the process by loading all of the trees in the system
    final List<Tree> trees;
    try {
        trees = Tree.fetchAllTrees(tsdb).joinUninterruptibly();"[" + thread_id + "] Complete");
    } catch (Exception e) {
        LOG.error("[" + thread_id + "] Unexpected Exception", e);
        throw new RuntimeException("[" + thread_id + "] Unexpected exception", e);
    if (trees == null) {
        LOG.warn("No tree definitions were found");
    } else {
        boolean has_enabled_tree = false;
        for (Tree tree : trees) {
            if (tree.getEnabled()) {
                has_enabled_tree = true;
        if (!has_enabled_tree) {
            LOG.warn("No enabled trees were found");
        }"Found [" + trees.size() + "] trees");
    // setup an array for storing the tree processing calls so we can block 
    // until each call has completed
    final ArrayList<Deferred<Boolean>> tree_calls = new ArrayList<Deferred<Boolean>>();
    final Deferred<Boolean> completed = new Deferred<Boolean>();
     * Scanner callback that loops through the UID table recursively until 
     * the scanner returns a null row set.
    final class TsuidScanner implements Callback<Deferred<Boolean>, ArrayList<ArrayList<KeyValue>>> {

       * Fetches the next set of rows from the scanner, adding this class as a 
       * callback
       * @return A meaningless deferred used to wait on until processing has
       * completed
        public Deferred<Boolean> scan() {
            return scanner.nextRows().addCallbackDeferring(this);

        public Deferred<Boolean> call(ArrayList<ArrayList<KeyValue>> rows) throws Exception {
            if (rows == null) {
                return null;
            for (final ArrayList<KeyValue> row : rows) {
                // convert to a string one time
                final String tsuid = UniqueId.uidToString(row.get(0).key());
           * A throttling callback used to wait for the current TSMeta to 
           * complete processing through the trees before continuing on with 
           * the next set.
                final class TreeBuilderBufferCB implements Callback<Boolean, ArrayList<ArrayList<Boolean>>> {

                    public Boolean call(ArrayList<ArrayList<Boolean>> builder_calls) throws Exception {
                        //LOG.debug("Processed [" + builder_calls.size() + "] tree_calls");
                        return true;
           * Executed after parsing a TSMeta object and loading all of the
           * associated UIDMetas. Once the meta has been loaded, this callback
           * runs it through each of the configured TreeBuilder objects and
           * stores the resulting deferred in an array. Once processing of all
           * of the rules has completed, we group the deferreds and call
           * BufferCB() to wait for their completion.
                final class ParseCB implements Callback<Deferred<Boolean>, TSMeta> {

                    final ArrayList<Deferred<ArrayList<Boolean>>> builder_calls = new ArrayList<Deferred<ArrayList<Boolean>>>();

                    public Deferred<Boolean> call(TSMeta meta) throws Exception {
                        if (meta != null) {
                            LOG.debug("Processing TSMeta: " + meta + " w value: " + JSON.serializeToString(meta));
                            // copy the trees into a tree builder object and iterate through
                            // each builder. We need to do this as a builder is not thread
                            // safe and cannot be used asynchronously.
                            final ArrayList<TreeBuilder> tree_builders = new ArrayList<TreeBuilder>(trees.size());
                            for (Tree tree : trees) {
                                if (!tree.getEnabled()) {
                                final TreeBuilder builder = new TreeBuilder(tsdb, tree);
                            for (TreeBuilder builder : tree_builders) {
                            return TreeBuilderBufferCB());
                        } else {
                            return Deferred.fromResult(false);
           * An error handler used to catch issues when loading the TSMeta such
           * as a missing UID name. In these situations we want to log that the 
           * TSMeta had an issue and continue on.
                final class ErrBack implements Callback<Deferred<Boolean>, Exception> {

                    public Deferred<Boolean> call(Exception e) throws Exception {
                        if (e.getClass().equals(IllegalStateException.class)) {
                            LOG.error("Invalid data when processing TSUID [" + tsuid + "]", e);
                        } else if (e.getClass().equals(IllegalArgumentException.class)) {
                            LOG.error("Invalid data when processing TSUID [" + tsuid + "]", e);
                        } else if (e.getClass().equals(NoSuchUniqueId.class)) {
                            LOG.warn("Timeseries [" + tsuid + "] includes a non-existant UID: " + e.getMessage());
                        } else {
                            LOG.error("[" + thread_id + "] Exception while processing TSUID [" + tsuid + "]", e);
                        return Deferred.fromResult(false);
                // matched a TSMeta column, so request a parsing and loading of
                // associated UIDMeta objects, then pass it off to callbacks for 
                // parsing through the trees.
                final Deferred<Boolean> process_tsmeta = TSMeta.parseFromColumn(tsdb, row.get(0), true).addCallbackDeferring(new ParseCB());
                process_tsmeta.addErrback(new ErrBack());
         * Another buffer callback that waits for the current set of TSMetas to
         * complete their tree calls before we fetch another set of rows from
         * the scanner. This necessary to avoid OOM issues.
            final class ContinueCB implements Callback<Deferred<Boolean>, ArrayList<Boolean>> {

                public Deferred<Boolean> call(ArrayList<Boolean> tsuids) throws Exception {
                    LOG.debug("Processed [" + tsuids.size() + "] tree_calls, continuing");
                    return scan();
            // request the next set of rows from the scanner, but wait until the
            // current set of TSMetas has been processed so we don't slaughter our
            // host
            return Deferred.fromResult(null);
     * Used to capture unhandled exceptions from the scanner callbacks and 
     * exit the thread properly
    final class ErrBack implements Callback<Deferred<Boolean>, Exception> {

        public Deferred<Boolean> call(Exception e) throws Exception {
            LOG.error("Unexpected exception", e);
            return Deferred.fromResult(false);
    final TsuidScanner tree_scanner = new TsuidScanner();
    tree_scanner.scan().addErrback(new ErrBack());
    try {
        completed.joinUninterruptibly();"[" + thread_id + "] Complete");
    } catch (Exception e) {
        LOG.error("[" + thread_id + "] Scanner Exception", e);
        throw new RuntimeException("[" + thread_id + "] Scanner exception", e);
Also used : Scanner(org.hbase.async.Scanner) KeyValue(org.hbase.async.KeyValue) Deferred(com.stumbleupon.async.Deferred) ArrayList(java.util.ArrayList) Tree(net.opentsdb.tree.Tree) TSMeta(net.opentsdb.meta.TSMeta) HBaseException(org.hbase.async.HBaseException) TreeBuilder(net.opentsdb.tree.TreeBuilder) Callback(com.stumbleupon.async.Callback)

Example 28 with Scanner

use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.

the class UidManager method metaSync.

   * Runs through the entire data table and creates TSMeta objects for unique
   * timeseries and/or updates {@code created} timestamps
   * The process is as follows:
   * <ul><li>Fetch the max number of Metric UIDs as we'll use those to match
   * on the data rows</li>
   * <li>Split the # of UIDs amongst worker threads</li>
   * <li>Setup a scanner in each thread for the range it will be working on and
   * start iterating</li>
   * <li>Fetch the TSUID from the row key</li>
   * <li>For each unprocessed TSUID:
   * <ul><li>Check if the metric UID mapping is present, if not, log an error
   * and continue</li>
   * <li>See if the meta for the metric UID exists, if not, create it</li>
   * <li>See if the row timestamp is less than the metric UID meta's created
   * time. This means we have a record of the UID being used earlier than the
   * meta data indicates. Update it.</li>
   * <li>Repeat the previous three steps for each of the TAGK and TAGV tags</li>
   * <li>Check to see if meta data exists for the timeseries</li>
   * <li>If not, create the counter column if it's missing, and create the meta
   * column</li>
   * <li>If it did exist, check the {@code created} timestamp and if the row's 
   * time is less, update the meta data</li></ul></li>
   * <li>Continue on to the next unprocessed timeseries data row</li></ul>
   * <b>Note:</b> Updates or new entries will also be sent to the search plugin
   * if configured.
   * @param tsdb The tsdb to use for processing, including a search plugin
   * @return 0 if completed successfully, something else if it dies
private static int metaSync(final TSDB tsdb) throws Exception {
    final long start_time = System.currentTimeMillis() / 1000;
    // now figure out how many IDs to divy up between the workers
    final int workers = Runtime.getRuntime().availableProcessors() * 2;
    final Set<Integer> processed_tsuids = Collections.synchronizedSet(new HashSet<Integer>());
    final ConcurrentHashMap<String, Long> metric_uids = new ConcurrentHashMap<String, Long>();
    final ConcurrentHashMap<String, Long> tagk_uids = new ConcurrentHashMap<String, Long>();
    final ConcurrentHashMap<String, Long> tagv_uids = new ConcurrentHashMap<String, Long>();
    final List<Scanner> scanners = CliUtils.getDataTableScanners(tsdb, workers);"Spooling up [" + scanners.size() + "] worker threads");
    final List<Thread> threads = new ArrayList<Thread>(scanners.size());
    int i = 0;
    for (final Scanner scanner : scanners) {
        final MetaSync worker = new MetaSync(tsdb, scanner, processed_tsuids, metric_uids, tagk_uids, tagv_uids, i++);
        worker.setName("Sync #" + i);
    for (final Thread thread : threads) {
        thread.join();"Thread [" + thread + "] Finished");
    }"All metasync threads have completed");
    // make sure buffered data is flushed to storage before exiting
    final long duration = (System.currentTimeMillis() / 1000) - start_time;"Completed meta data synchronization in [" + duration + "] seconds");
    return 0;
Also used : Scanner(org.hbase.async.Scanner) ArrayList(java.util.ArrayList) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 29 with Scanner

use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.

the class Branch method fetchBranch.

   * Attempts to fetch the branch, it's leaves and all child branches.
   * The UID names for each leaf may also be loaded if configured.
   * @param tsdb The TSDB to use for storage access
   * @param branch_id ID of the branch to retrieve
   * @param load_leaf_uids Whether or not to load UID names for each leaf
   * @return A branch if found, null if it did not exist
   * @throws JSONException if the object could not be deserialized
public static Deferred<Branch> fetchBranch(final TSDB tsdb, final byte[] branch_id, final boolean load_leaf_uids) {
    final Deferred<Branch> result = new Deferred<Branch>();
    final Scanner scanner = setupBranchScanner(tsdb, branch_id);
    // This is the branch that will be loaded with data from the scanner and
    // returned at the end of the process.
    final Branch branch = new Branch();
    // A list of deferreds to wait on for child leaf processing
    final ArrayList<Deferred<Object>> leaf_group = new ArrayList<Deferred<Object>>();
     * Exception handler to catch leaves with an invalid UID name due to a 
     * possible deletion. This will allow the scanner to keep loading valid
     * leaves and ignore problems. The fsck tool can be used to clean up
     * orphaned leaves. If we catch something other than an NSU, it will
     * re-throw the exception
    final class LeafErrBack implements Callback<Object, Exception> {

        final byte[] qualifier;

        public LeafErrBack(final byte[] qualifier) {
            this.qualifier = qualifier;

        public Object call(final Exception e) throws Exception {
            Throwable ex = e;
            while (ex.getClass().equals(DeferredGroupException.class)) {
                ex = ex.getCause();
            if (ex.getClass().equals(NoSuchUniqueId.class)) {
                LOG.debug("Invalid UID for leaf: " + idToString(qualifier) + " in branch: " + idToString(branch_id), ex);
            } else {
                throw (Exception) ex;
            return null;
     * Called after a leaf has been loaded successfully and adds the leaf
     * to the branch's leaf set. Also lazily initializes the leaf set if it 
     * hasn't been.
    final class LeafCB implements Callback<Object, Leaf> {

        public Object call(final Leaf leaf) throws Exception {
            if (leaf != null) {
                if (branch.leaves == null) {
                    branch.leaves = new HashMap<Integer, Leaf>();
                branch.leaves.put(leaf.hashCode(), leaf);
            return null;
     * Scanner callback executed recursively each time we get a set of data
     * from storage. This is responsible for determining what columns are 
     * returned and issuing requests to load leaf objects.
     * When the scanner returns a null set of rows, the method initiates the
     * final callback.
    final class FetchBranchCB implements Callback<Object, ArrayList<ArrayList<KeyValue>>> {

       * Starts the scanner and is called recursively to fetch the next set of
       * rows from the scanner.
       * @return The branch if loaded successfully, null if the branch was not
       * found.
        public Object fetchBranch() {
            return scanner.nextRows().addCallback(this);

       * Loops through each row of the scanner results and parses out branch
       * definitions and child leaves.
       * @return The final branch callback if the scanner returns a null set
        public Object call(final ArrayList<ArrayList<KeyValue>> rows) throws Exception {
            if (rows == null) {
                if (branch.tree_id < 1 || branch.path == null) {
                } else {
                return null;
            for (final ArrayList<KeyValue> row : rows) {
                for (KeyValue column : row) {
                    // matched a branch column
                    if (Bytes.equals(BRANCH_QUALIFIER, column.qualifier())) {
                        if (Bytes.equals(branch_id, column.key())) {
                            // it's *this* branch. We deserialize to a new object and copy
                            // since the columns could be in any order and we may get a 
                            // leaf before the branch
                            final Branch local_branch = JSON.parseToObject(column.value(), Branch.class);
                            branch.path = local_branch.path;
                            branch.display_name = local_branch.display_name;
                            branch.tree_id = Tree.bytesToId(column.key());
                        } else {
                            // it's a child branch
                            final Branch child = JSON.parseToObject(column.value(), Branch.class);
                            child.tree_id = Tree.bytesToId(column.key());
                    // parse out a leaf
                    } else if (Bytes.memcmp(Leaf.LEAF_PREFIX(), column.qualifier(), 0, Leaf.LEAF_PREFIX().length) == 0) {
                        if (Bytes.equals(branch_id, column.key())) {
                            // process a leaf and skip if the UIDs for the TSUID can't be 
                            // found. Add an errback to catch NoSuchUniqueId exceptions
                            leaf_group.add(Leaf.parseFromStorage(tsdb, column, load_leaf_uids).addCallbacks(new LeafCB(), new LeafErrBack(column.qualifier())));
                        } else {
                        // TODO - figure out an efficient way to increment a counter in 
                        // the child branch with the # of leaves it has
            // recursively call ourself to fetch more results from the scanner
            return fetchBranch();
    // start scanning
    new FetchBranchCB().fetchBranch();
    return result;
Also used : Scanner(org.hbase.async.Scanner) KeyValue(org.hbase.async.KeyValue) Deferred(com.stumbleupon.async.Deferred) ArrayList(java.util.ArrayList) DeferredGroupException(com.stumbleupon.async.DeferredGroupException) IOException( HBaseException(org.hbase.async.HBaseException) JSONException(net.opentsdb.utils.JSONException) Callback(com.stumbleupon.async.Callback)

Example 30 with Scanner

use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.

the class TimeSeriesLookup method getScanner.

/** Compiles a scanner with the given salt ID if salting is enabled AND we're
   * not scanning the meta table.
   * @param salt An ID for the salt bucket
   * @return A scanner to send to HBase.
private Scanner getScanner(final int salt) {
    final Scanner scanner = tsdb.getClient().newScanner(query.useMeta() ? tsdb.metaTable() : tsdb.dataTable());
    scanner.setFamily(query.useMeta() ? TSMeta.FAMILY : TSDB.FAMILY());
    if (metric_uid != null) {
        byte[] key;
        if (query.useMeta() || Const.SALT_WIDTH() < 1) {
            key = metric_uid;
        } else {
            key = new byte[Const.SALT_WIDTH() + TSDB.metrics_width()];
            System.arraycopy(RowKey.getSaltBytes(salt), 0, key, 0, Const.SALT_WIDTH());
            System.arraycopy(metric_uid, 0, key, Const.SALT_WIDTH(), metric_uid.length);
        long uid = UniqueId.uidToLong(metric_uid, TSDB.metrics_width());
        if (uid < Internal.getMaxUnsignedValueOnBytes(TSDB.metrics_width())) {
            // value. If so, we need to leave the stop key as null
            if (query.useMeta() || Const.SALT_WIDTH() < 1) {
                key = UniqueId.longToUID(uid, TSDB.metrics_width());
            } else {
                key = new byte[Const.SALT_WIDTH() + TSDB.metrics_width()];
                System.arraycopy(RowKey.getSaltBytes(salt), 0, key, 0, Const.SALT_WIDTH());
                System.arraycopy(UniqueId.longToUID(uid, TSDB.metrics_width()), 0, key, Const.SALT_WIDTH(), metric_uid.length);
    if (rowkey_regex != null) {
        scanner.setKeyRegexp(rowkey_regex, CHARSET);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Scanner regex: " + QueryUtil.byteRegexToString(rowkey_regex));
    return scanner;
Also used : Scanner(org.hbase.async.Scanner)


Scanner (org.hbase.async.Scanner)35 ArrayList (java.util.ArrayList)24 KeyValue (org.hbase.async.KeyValue)19 Callback (com.stumbleupon.async.Callback)12 Deferred (com.stumbleupon.async.Deferred)11 DeleteRequest (org.hbase.async.DeleteRequest)7 HBaseException (org.hbase.async.HBaseException)6 List (java.util.List)5 Map (java.util.Map)3 DeferredGroupException (com.stumbleupon.async.DeferredGroupException)2 IOException ( HashMap (java.util.HashMap)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 Query (net.opentsdb.core.Query)2 ByteMap (org.hbase.async.Bytes.ByteMap)2 Test (org.junit.Test)2 Matchers.anyString (org.mockito.Matchers.anyString)2 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)2 ByteArrayByteIterator ( ByteIterator (