use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.
the class MetaPurge method purgeTSMeta.
/**
* Scans the entire UID table and removes any UIDMeta objects found.
* @return The total number of columns deleted
*/
public Deferred<Long> purgeTSMeta() {
// a list to store all pending deletes so we don't exit before they've
// completed
final ArrayList<Deferred<Object>> delete_calls = new ArrayList<Deferred<Object>>();
final Deferred<Long> result = new Deferred<Long>();
/**
* Scanner callback that will recursively call itself and loop through the
* rows of the UID table, issuing delete requests for all of the columns in
* a row that match a meta qualifier.
*/
final class MetaScanner implements Callback<Deferred<Long>, ArrayList<ArrayList<KeyValue>>> {
final Scanner scanner;
public MetaScanner() {
scanner = getScanner(tsdb.metaTable());
}
/**
* Fetches the next group of rows from the scanner and sets this class as
* a callback
* @return The total number of columns deleted after completion
*/
public Deferred<Long> scan() {
return scanner.nextRows().addCallbackDeferring(this);
}
@Override
public Deferred<Long> call(ArrayList<ArrayList<KeyValue>> rows) throws Exception {
if (rows == null) {
result.callback(columns);
return null;
}
for (final ArrayList<KeyValue> row : rows) {
// one delete request per row. We'll almost always delete the whole
// row, so preallocate some ram.
ArrayList<byte[]> qualifiers = new ArrayList<byte[]>(row.size());
for (KeyValue column : row) {
if (Bytes.equals(TSMeta.META_QUALIFIER(), column.qualifier())) {
qualifiers.add(column.qualifier());
} else if (Bytes.equals(TSMeta.COUNTER_QUALIFIER(), column.qualifier())) {
qualifiers.add(column.qualifier());
}
}
if (qualifiers.size() > 0) {
columns += qualifiers.size();
final DeleteRequest delete = new DeleteRequest(tsdb.metaTable(), row.get(0).key(), NAME_FAMILY, qualifiers.toArray(new byte[qualifiers.size()][]));
delete_calls.add(tsdb.getClient().delete(delete));
}
}
/**
* Buffer callback used to wait on all of the delete calls for the
* last set of rows returned from the scanner so we don't fill up the
* deferreds array and OOM out.
*/
final class ContinueCB implements Callback<Deferred<Long>, ArrayList<Object>> {
@Override
public Deferred<Long> call(ArrayList<Object> deletes) throws Exception {
LOG.debug("[" + thread_id + "] Processed [" + deletes.size() + "] delete calls");
delete_calls.clear();
return scan();
}
}
// fetch the next set of rows after waiting for current set of delete
// requests to complete
Deferred.group(delete_calls).addCallbackDeferring(new ContinueCB());
return null;
}
}
// start the scan
new MetaScanner().scan();
return result;
}
use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.
the class TreeSync method run.
/**
* Performs a tree synchronization using a table scanner across the UID table
* @return 0 if completed successfully, something else if an error occurred
*/
public void run() {
final Scanner scanner = getScanner();
// start the process by loading all of the trees in the system
final List<Tree> trees;
try {
trees = Tree.fetchAllTrees(tsdb).joinUninterruptibly();
LOG.info("[" + thread_id + "] Complete");
} catch (Exception e) {
LOG.error("[" + thread_id + "] Unexpected Exception", e);
throw new RuntimeException("[" + thread_id + "] Unexpected exception", e);
}
if (trees == null) {
LOG.warn("No tree definitions were found");
return;
} else {
boolean has_enabled_tree = false;
for (Tree tree : trees) {
if (tree.getEnabled()) {
has_enabled_tree = true;
break;
}
}
if (!has_enabled_tree) {
LOG.warn("No enabled trees were found");
return;
}
LOG.info("Found [" + trees.size() + "] trees");
}
// setup an array for storing the tree processing calls so we can block
// until each call has completed
final ArrayList<Deferred<Boolean>> tree_calls = new ArrayList<Deferred<Boolean>>();
final Deferred<Boolean> completed = new Deferred<Boolean>();
/**
* Scanner callback that loops through the UID table recursively until
* the scanner returns a null row set.
*/
final class TsuidScanner implements Callback<Deferred<Boolean>, ArrayList<ArrayList<KeyValue>>> {
/**
* Fetches the next set of rows from the scanner, adding this class as a
* callback
* @return A meaningless deferred used to wait on until processing has
* completed
*/
public Deferred<Boolean> scan() {
return scanner.nextRows().addCallbackDeferring(this);
}
@Override
public Deferred<Boolean> call(ArrayList<ArrayList<KeyValue>> rows) throws Exception {
if (rows == null) {
completed.callback(true);
return null;
}
for (final ArrayList<KeyValue> row : rows) {
// convert to a string one time
final String tsuid = UniqueId.uidToString(row.get(0).key());
/**
* A throttling callback used to wait for the current TSMeta to
* complete processing through the trees before continuing on with
* the next set.
*/
final class TreeBuilderBufferCB implements Callback<Boolean, ArrayList<ArrayList<Boolean>>> {
@Override
public Boolean call(ArrayList<ArrayList<Boolean>> builder_calls) throws Exception {
//LOG.debug("Processed [" + builder_calls.size() + "] tree_calls");
return true;
}
}
/**
* Executed after parsing a TSMeta object and loading all of the
* associated UIDMetas. Once the meta has been loaded, this callback
* runs it through each of the configured TreeBuilder objects and
* stores the resulting deferred in an array. Once processing of all
* of the rules has completed, we group the deferreds and call
* BufferCB() to wait for their completion.
*/
final class ParseCB implements Callback<Deferred<Boolean>, TSMeta> {
final ArrayList<Deferred<ArrayList<Boolean>>> builder_calls = new ArrayList<Deferred<ArrayList<Boolean>>>();
@Override
public Deferred<Boolean> call(TSMeta meta) throws Exception {
if (meta != null) {
LOG.debug("Processing TSMeta: " + meta + " w value: " + JSON.serializeToString(meta));
// copy the trees into a tree builder object and iterate through
// each builder. We need to do this as a builder is not thread
// safe and cannot be used asynchronously.
final ArrayList<TreeBuilder> tree_builders = new ArrayList<TreeBuilder>(trees.size());
for (Tree tree : trees) {
if (!tree.getEnabled()) {
continue;
}
final TreeBuilder builder = new TreeBuilder(tsdb, tree);
tree_builders.add(builder);
}
for (TreeBuilder builder : tree_builders) {
builder_calls.add(builder.processTimeseriesMeta(meta));
}
return Deferred.group(builder_calls).addCallback(new TreeBuilderBufferCB());
} else {
return Deferred.fromResult(false);
}
}
}
/**
* An error handler used to catch issues when loading the TSMeta such
* as a missing UID name. In these situations we want to log that the
* TSMeta had an issue and continue on.
*/
final class ErrBack implements Callback<Deferred<Boolean>, Exception> {
@Override
public Deferred<Boolean> call(Exception e) throws Exception {
if (e.getClass().equals(IllegalStateException.class)) {
LOG.error("Invalid data when processing TSUID [" + tsuid + "]", e);
} else if (e.getClass().equals(IllegalArgumentException.class)) {
LOG.error("Invalid data when processing TSUID [" + tsuid + "]", e);
} else if (e.getClass().equals(NoSuchUniqueId.class)) {
LOG.warn("Timeseries [" + tsuid + "] includes a non-existant UID: " + e.getMessage());
} else {
LOG.error("[" + thread_id + "] Exception while processing TSUID [" + tsuid + "]", e);
}
return Deferred.fromResult(false);
}
}
// matched a TSMeta column, so request a parsing and loading of
// associated UIDMeta objects, then pass it off to callbacks for
// parsing through the trees.
final Deferred<Boolean> process_tsmeta = TSMeta.parseFromColumn(tsdb, row.get(0), true).addCallbackDeferring(new ParseCB());
process_tsmeta.addErrback(new ErrBack());
tree_calls.add(process_tsmeta);
}
/**
* Another buffer callback that waits for the current set of TSMetas to
* complete their tree calls before we fetch another set of rows from
* the scanner. This necessary to avoid OOM issues.
*/
final class ContinueCB implements Callback<Deferred<Boolean>, ArrayList<Boolean>> {
@Override
public Deferred<Boolean> call(ArrayList<Boolean> tsuids) throws Exception {
LOG.debug("Processed [" + tsuids.size() + "] tree_calls, continuing");
tree_calls.clear();
return scan();
}
}
// request the next set of rows from the scanner, but wait until the
// current set of TSMetas has been processed so we don't slaughter our
// host
Deferred.group(tree_calls).addCallback(new ContinueCB());
return Deferred.fromResult(null);
}
}
/**
* Used to capture unhandled exceptions from the scanner callbacks and
* exit the thread properly
*/
final class ErrBack implements Callback<Deferred<Boolean>, Exception> {
@Override
public Deferred<Boolean> call(Exception e) throws Exception {
LOG.error("Unexpected exception", e);
completed.callback(false);
return Deferred.fromResult(false);
}
}
final TsuidScanner tree_scanner = new TsuidScanner();
tree_scanner.scan().addErrback(new ErrBack());
try {
completed.joinUninterruptibly();
LOG.info("[" + thread_id + "] Complete");
} catch (Exception e) {
LOG.error("[" + thread_id + "] Scanner Exception", e);
throw new RuntimeException("[" + thread_id + "] Scanner exception", e);
}
return;
}
use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.
the class UidManager method metaSync.
/**
* Runs through the entire data table and creates TSMeta objects for unique
* timeseries and/or updates {@code created} timestamps
* The process is as follows:
* <ul><li>Fetch the max number of Metric UIDs as we'll use those to match
* on the data rows</li>
* <li>Split the # of UIDs amongst worker threads</li>
* <li>Setup a scanner in each thread for the range it will be working on and
* start iterating</li>
* <li>Fetch the TSUID from the row key</li>
* <li>For each unprocessed TSUID:
* <ul><li>Check if the metric UID mapping is present, if not, log an error
* and continue</li>
* <li>See if the meta for the metric UID exists, if not, create it</li>
* <li>See if the row timestamp is less than the metric UID meta's created
* time. This means we have a record of the UID being used earlier than the
* meta data indicates. Update it.</li>
* <li>Repeat the previous three steps for each of the TAGK and TAGV tags</li>
* <li>Check to see if meta data exists for the timeseries</li>
* <li>If not, create the counter column if it's missing, and create the meta
* column</li>
* <li>If it did exist, check the {@code created} timestamp and if the row's
* time is less, update the meta data</li></ul></li>
* <li>Continue on to the next unprocessed timeseries data row</li></ul>
* <b>Note:</b> Updates or new entries will also be sent to the search plugin
* if configured.
* @param tsdb The tsdb to use for processing, including a search plugin
* @return 0 if completed successfully, something else if it dies
*/
private static int metaSync(final TSDB tsdb) throws Exception {
final long start_time = System.currentTimeMillis() / 1000;
// now figure out how many IDs to divy up between the workers
final int workers = Runtime.getRuntime().availableProcessors() * 2;
final Set<Integer> processed_tsuids = Collections.synchronizedSet(new HashSet<Integer>());
final ConcurrentHashMap<String, Long> metric_uids = new ConcurrentHashMap<String, Long>();
final ConcurrentHashMap<String, Long> tagk_uids = new ConcurrentHashMap<String, Long>();
final ConcurrentHashMap<String, Long> tagv_uids = new ConcurrentHashMap<String, Long>();
final List<Scanner> scanners = CliUtils.getDataTableScanners(tsdb, workers);
LOG.info("Spooling up [" + scanners.size() + "] worker threads");
final List<Thread> threads = new ArrayList<Thread>(scanners.size());
int i = 0;
for (final Scanner scanner : scanners) {
final MetaSync worker = new MetaSync(tsdb, scanner, processed_tsuids, metric_uids, tagk_uids, tagv_uids, i++);
worker.setName("Sync #" + i);
worker.start();
threads.add(worker);
}
for (final Thread thread : threads) {
thread.join();
LOG.info("Thread [" + thread + "] Finished");
}
LOG.info("All metasync threads have completed");
// make sure buffered data is flushed to storage before exiting
tsdb.flush().joinUninterruptibly();
final long duration = (System.currentTimeMillis() / 1000) - start_time;
LOG.info("Completed meta data synchronization in [" + duration + "] seconds");
return 0;
}
use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.
the class Branch method fetchBranch.
/**
* Attempts to fetch the branch, it's leaves and all child branches.
* The UID names for each leaf may also be loaded if configured.
* @param tsdb The TSDB to use for storage access
* @param branch_id ID of the branch to retrieve
* @param load_leaf_uids Whether or not to load UID names for each leaf
* @return A branch if found, null if it did not exist
* @throws JSONException if the object could not be deserialized
*/
public static Deferred<Branch> fetchBranch(final TSDB tsdb, final byte[] branch_id, final boolean load_leaf_uids) {
final Deferred<Branch> result = new Deferred<Branch>();
final Scanner scanner = setupBranchScanner(tsdb, branch_id);
// This is the branch that will be loaded with data from the scanner and
// returned at the end of the process.
final Branch branch = new Branch();
// A list of deferreds to wait on for child leaf processing
final ArrayList<Deferred<Object>> leaf_group = new ArrayList<Deferred<Object>>();
/**
* Exception handler to catch leaves with an invalid UID name due to a
* possible deletion. This will allow the scanner to keep loading valid
* leaves and ignore problems. The fsck tool can be used to clean up
* orphaned leaves. If we catch something other than an NSU, it will
* re-throw the exception
*/
final class LeafErrBack implements Callback<Object, Exception> {
final byte[] qualifier;
public LeafErrBack(final byte[] qualifier) {
this.qualifier = qualifier;
}
@Override
public Object call(final Exception e) throws Exception {
Throwable ex = e;
while (ex.getClass().equals(DeferredGroupException.class)) {
ex = ex.getCause();
}
if (ex.getClass().equals(NoSuchUniqueId.class)) {
LOG.debug("Invalid UID for leaf: " + idToString(qualifier) + " in branch: " + idToString(branch_id), ex);
} else {
throw (Exception) ex;
}
return null;
}
}
/**
* Called after a leaf has been loaded successfully and adds the leaf
* to the branch's leaf set. Also lazily initializes the leaf set if it
* hasn't been.
*/
final class LeafCB implements Callback<Object, Leaf> {
@Override
public Object call(final Leaf leaf) throws Exception {
if (leaf != null) {
if (branch.leaves == null) {
branch.leaves = new HashMap<Integer, Leaf>();
}
branch.leaves.put(leaf.hashCode(), leaf);
}
return null;
}
}
/**
* Scanner callback executed recursively each time we get a set of data
* from storage. This is responsible for determining what columns are
* returned and issuing requests to load leaf objects.
* When the scanner returns a null set of rows, the method initiates the
* final callback.
*/
final class FetchBranchCB implements Callback<Object, ArrayList<ArrayList<KeyValue>>> {
/**
* Starts the scanner and is called recursively to fetch the next set of
* rows from the scanner.
* @return The branch if loaded successfully, null if the branch was not
* found.
*/
public Object fetchBranch() {
return scanner.nextRows().addCallback(this);
}
/**
* Loops through each row of the scanner results and parses out branch
* definitions and child leaves.
* @return The final branch callback if the scanner returns a null set
*/
@Override
public Object call(final ArrayList<ArrayList<KeyValue>> rows) throws Exception {
if (rows == null) {
if (branch.tree_id < 1 || branch.path == null) {
result.callback(null);
} else {
result.callback(branch);
}
return null;
}
for (final ArrayList<KeyValue> row : rows) {
for (KeyValue column : row) {
// matched a branch column
if (Bytes.equals(BRANCH_QUALIFIER, column.qualifier())) {
if (Bytes.equals(branch_id, column.key())) {
// it's *this* branch. We deserialize to a new object and copy
// since the columns could be in any order and we may get a
// leaf before the branch
final Branch local_branch = JSON.parseToObject(column.value(), Branch.class);
branch.path = local_branch.path;
branch.display_name = local_branch.display_name;
branch.tree_id = Tree.bytesToId(column.key());
} else {
// it's a child branch
final Branch child = JSON.parseToObject(column.value(), Branch.class);
child.tree_id = Tree.bytesToId(column.key());
branch.addChild(child);
}
// parse out a leaf
} else if (Bytes.memcmp(Leaf.LEAF_PREFIX(), column.qualifier(), 0, Leaf.LEAF_PREFIX().length) == 0) {
if (Bytes.equals(branch_id, column.key())) {
// process a leaf and skip if the UIDs for the TSUID can't be
// found. Add an errback to catch NoSuchUniqueId exceptions
leaf_group.add(Leaf.parseFromStorage(tsdb, column, load_leaf_uids).addCallbacks(new LeafCB(), new LeafErrBack(column.qualifier())));
} else {
// TODO - figure out an efficient way to increment a counter in
// the child branch with the # of leaves it has
}
}
}
}
// recursively call ourself to fetch more results from the scanner
return fetchBranch();
}
}
// start scanning
new FetchBranchCB().fetchBranch();
return result;
}
use of org.hbase.async.Scanner in project opentsdb by OpenTSDB.
the class TimeSeriesLookup method getScanner.
/** Compiles a scanner with the given salt ID if salting is enabled AND we're
* not scanning the meta table.
* @param salt An ID for the salt bucket
* @return A scanner to send to HBase.
*/
private Scanner getScanner(final int salt) {
final Scanner scanner = tsdb.getClient().newScanner(query.useMeta() ? tsdb.metaTable() : tsdb.dataTable());
scanner.setFamily(query.useMeta() ? TSMeta.FAMILY : TSDB.FAMILY());
if (metric_uid != null) {
byte[] key;
if (query.useMeta() || Const.SALT_WIDTH() < 1) {
key = metric_uid;
} else {
key = new byte[Const.SALT_WIDTH() + TSDB.metrics_width()];
System.arraycopy(RowKey.getSaltBytes(salt), 0, key, 0, Const.SALT_WIDTH());
System.arraycopy(metric_uid, 0, key, Const.SALT_WIDTH(), metric_uid.length);
}
scanner.setStartKey(key);
long uid = UniqueId.uidToLong(metric_uid, TSDB.metrics_width());
uid++;
if (uid < Internal.getMaxUnsignedValueOnBytes(TSDB.metrics_width())) {
// value. If so, we need to leave the stop key as null
if (query.useMeta() || Const.SALT_WIDTH() < 1) {
key = UniqueId.longToUID(uid, TSDB.metrics_width());
} else {
key = new byte[Const.SALT_WIDTH() + TSDB.metrics_width()];
System.arraycopy(RowKey.getSaltBytes(salt), 0, key, 0, Const.SALT_WIDTH());
System.arraycopy(UniqueId.longToUID(uid, TSDB.metrics_width()), 0, key, Const.SALT_WIDTH(), metric_uid.length);
}
scanner.setStopKey(key);
}
}
if (rowkey_regex != null) {
scanner.setKeyRegexp(rowkey_regex, CHARSET);
if (LOG.isDebugEnabled()) {
LOG.debug("Scanner regex: " + QueryUtil.byteRegexToString(rowkey_regex));
}
}
return scanner;
}
Aggregations