Search in sources :

Example 41 with KeyValue

use of org.hbase.async.KeyValue in project opentsdb by OpenTSDB.

the class Internal method extractDataPoints.

/**
   * Breaks down all the values in a row into individual {@link Cell}s sorted on
   * the qualifier. Columns with non data-point data will be discarded.
   * <b>Note:</b> This method does not account for duplicate timestamps in
   * qualifiers.
   * @param row An array of data row columns to parse
   * @param estimated_nvalues Estimate of the number of values to compact.
   * Used to pre-allocate a collection of the right size, so it's better to
   * overshoot a bit to avoid re-allocations.
   * @return An array list of data point {@link Cell} objects. The list may be 
   * empty if the row did not contain a data point.
   * @throws IllegalDataException if one of the cells cannot be read because
   * it's corrupted or in a format we don't understand.
   * @since 2.0
   */
public static ArrayList<Cell> extractDataPoints(final ArrayList<KeyValue> row, final int estimated_nvalues) {
    final ArrayList<Cell> cells = new ArrayList<Cell>(estimated_nvalues);
    for (final KeyValue kv : row) {
        final byte[] qual = kv.qualifier();
        final int len = qual.length;
        final byte[] val = kv.value();
        if (len % 2 != 0) {
            // skip a non data point column
            continue;
        } else if (len == 2) {
            // Single-value cell.
            // Maybe we need to fix the flags in the qualifier.
            final byte[] actual_val = fixFloatingPointValue(qual[1], val);
            final byte q = fixQualifierFlags(qual[1], actual_val.length);
            final byte[] actual_qual;
            if (q != qual[1]) {
                // We need to fix the qualifier.
                // So make a copy.
                actual_qual = new byte[] { qual[0], q };
            } else {
                // Otherwise use the one we already have.
                actual_qual = qual;
            }
            final Cell cell = new Cell(actual_qual, actual_val);
            cells.add(cell);
            continue;
        } else if (len == 4 && inMilliseconds(qual[0])) {
            // since ms support is new, there's nothing to fix
            final Cell cell = new Cell(qual, val);
            cells.add(cell);
            continue;
        }
        // Now break it down into Cells.
        int val_idx = 0;
        try {
            for (int i = 0; i < len; i += 2) {
                final byte[] q = extractQualifier(qual, i);
                final int vlen = getValueLengthFromQualifier(qual, i);
                if (inMilliseconds(qual[i])) {
                    i += 2;
                }
                final byte[] v = new byte[vlen];
                System.arraycopy(val, val_idx, v, 0, vlen);
                val_idx += vlen;
                final Cell cell = new Cell(q, v);
                cells.add(cell);
            }
        } catch (ArrayIndexOutOfBoundsException e) {
            throw new IllegalDataException("Corrupted value: couldn't break down" + " into individual values (consumed " + val_idx + " bytes, but was" + " expecting to consume " + (val.length - 1) + "): " + kv + ", cells so far: " + cells);
        }
        // is metadata, so it's normal that we didn't consume it.
        if (val_idx != val.length - 1) {
            throw new IllegalDataException("Corrupted value: couldn't break down" + " into individual values (consumed " + val_idx + " bytes, but was" + " expecting to consume " + (val.length - 1) + "): " + kv + ", cells so far: " + cells);
        }
    }
    Collections.sort(cells);
    return cells;
}
Also used : KeyValue(org.hbase.async.KeyValue) ArrayList(java.util.ArrayList)

Example 42 with KeyValue

use of org.hbase.async.KeyValue in project opentsdb by OpenTSDB.

the class TsdbQuery method findSpans.

/**
   * Finds all the {@link Span}s that match this query.
   * This is what actually scans the HBase table and loads the data into
   * {@link Span}s.
   * @return A map from HBase row key to the {@link Span} for that row key.
   * Since a {@link Span} actually contains multiple HBase rows, the row key
   * stored in the map has its timestamp zero'ed out.
   * @throws HBaseException if there was a problem communicating with HBase to
   * perform the search.
   * @throws IllegalArgumentException if bad data was retrieved from HBase.
   */
private Deferred<TreeMap<byte[], Span>> findSpans() throws HBaseException {
    final short metric_width = tsdb.metrics.width();
    final // The key is a row key from HBase.
    TreeMap<byte[], Span> spans = new TreeMap<byte[], Span>(new SpanCmp((short) (Const.SALT_WIDTH() + metric_width)));
    // Copy only the filters that should trigger a tag resolution. If this list
    // is empty due to literals or a wildcard star, then we'll save a TON of
    // UID lookups
    final List<TagVFilter> scanner_filters;
    if (filters != null) {
        scanner_filters = new ArrayList<TagVFilter>(filters.size());
        for (final TagVFilter filter : filters) {
            if (filter.postScan()) {
                scanner_filters.add(filter);
            }
        }
    } else {
        scanner_filters = null;
    }
    if (Const.SALT_WIDTH() > 0) {
        final List<Scanner> scanners = new ArrayList<Scanner>(Const.SALT_BUCKETS());
        for (int i = 0; i < Const.SALT_BUCKETS(); i++) {
            scanners.add(getScanner(i));
        }
        scan_start_time = DateTime.nanoTime();
        return new SaltScanner(tsdb, metric, scanners, spans, scanner_filters, delete, query_stats, query_index).scan();
    }
    scan_start_time = DateTime.nanoTime();
    final Scanner scanner = getScanner();
    if (query_stats != null) {
        query_stats.addScannerId(query_index, 0, scanner.toString());
    }
    final Deferred<TreeMap<byte[], Span>> results = new Deferred<TreeMap<byte[], Span>>();
    /**
    * Scanner callback executed recursively each time we get a set of data
    * from storage. This is responsible for determining what columns are
    * returned and issuing requests to load leaf objects.
    * When the scanner returns a null set of rows, the method initiates the
    * final callback.
    */
    final class ScannerCB implements Callback<Object, ArrayList<ArrayList<KeyValue>>> {

        int nrows = 0;

        boolean seenAnnotation = false;

        long scanner_start = DateTime.nanoTime();

        long timeout = tsdb.getConfig().getLong("tsd.query.timeout");

        private final Set<String> skips = new HashSet<String>();

        private final Set<String> keepers = new HashSet<String>();

        // only used for salted scanners
        private final int index = 0;

        /** nanosecond timestamps */
        // reset each time we send an RPC to HBase
        private long fetch_start = 0;

        // cumulation of time waiting on HBase
        private long fetch_time = 0;

        // cumulation of time resolving UIDs
        private long uid_resolve_time = 0;

        private long uids_resolved = 0;

        // cumulation of time compacting
        private long compaction_time = 0;

        private long dps_pre_filter = 0;

        private long rows_pre_filter = 0;

        private long dps_post_filter = 0;

        private long rows_post_filter = 0;

        /** Error callback that will capture an exception from AsyncHBase and store
       * it so we can bubble it up to the caller.
       */
        class ErrorCB implements Callback<Object, Exception> {

            @Override
            public Object call(final Exception e) throws Exception {
                LOG.error("Scanner " + scanner + " threw an exception", e);
                close(e);
                return null;
            }
        }

        /**
      * Starts the scanner and is called recursively to fetch the next set of
      * rows from the scanner.
      * @return The map of spans if loaded successfully, null if no data was
      * found
      */
        public Object scan() {
            fetch_start = DateTime.nanoTime();
            return scanner.nextRows().addCallback(this).addErrback(new ErrorCB());
        }

        /**
      * Loops through each row of the scanner results and parses out data
      * points and optional meta data
      * @return null if no rows were found, otherwise the TreeMap with spans
      */
        @Override
        public Object call(final ArrayList<ArrayList<KeyValue>> rows) throws Exception {
            fetch_time += DateTime.nanoTime() - fetch_start;
            try {
                if (rows == null) {
                    scanlatency.add((int) DateTime.msFromNano(fetch_time));
                    LOG.info(TsdbQuery.this + " matched " + nrows + " rows in " + spans.size() + " spans in " + DateTime.msFromNano(fetch_time) + "ms");
                    close(null);
                    return null;
                }
                if (timeout > 0 && DateTime.msFromNanoDiff(DateTime.nanoTime(), scanner_start) > timeout) {
                    throw new InterruptedException("Query timeout exceeded!");
                }
                rows_pre_filter += rows.size();
                // used for UID resolution if a filter is involved
                final List<Deferred<Object>> lookups = filters != null && !filters.isEmpty() ? new ArrayList<Deferred<Object>>(rows.size()) : null;
                for (final ArrayList<KeyValue> row : rows) {
                    final byte[] key = row.get(0).key();
                    if (Bytes.memcmp(metric, key, 0, metric_width) != 0) {
                        scanner.close();
                        throw new IllegalDataException("HBase returned a row that doesn't match" + " our scanner (" + scanner + ")! " + row + " does not start" + " with " + Arrays.toString(metric));
                    }
                    // columns.
                    for (final KeyValue kv : row) {
                        if (kv.qualifier().length % 2 == 0) {
                            if (kv.qualifier().length == 2 || kv.qualifier().length == 4) {
                                ++dps_pre_filter;
                            } else {
                                // same precision. This is likely incorrect.
                                if (Internal.inMilliseconds(kv.qualifier())) {
                                    dps_pre_filter += (kv.qualifier().length / 4);
                                } else {
                                    dps_pre_filter += (kv.qualifier().length / 2);
                                }
                            }
                        } else if (kv.qualifier()[0] == AppendDataPoints.APPEND_COLUMN_PREFIX) {
                            // with appends we don't have a good rough estimate as the length
                            // can vary widely with the value length variability. Therefore we
                            // have to iterate.
                            int idx = 0;
                            int qlength = 0;
                            while (idx < kv.value().length) {
                                qlength = Internal.getQualifierLength(kv.value(), idx);
                                idx += qlength + Internal.getValueLengthFromQualifier(kv.value(), idx);
                                ++dps_pre_filter;
                            }
                        }
                    }
                    // TODO - byte set instead of a string for the uid may be faster
                    if (scanner_filters != null && !scanner_filters.isEmpty()) {
                        lookups.clear();
                        final String tsuid = UniqueId.uidToString(UniqueId.getTSUIDFromKey(key, TSDB.metrics_width(), Const.TIMESTAMP_BYTES));
                        if (skips.contains(tsuid)) {
                            continue;
                        }
                        if (!keepers.contains(tsuid)) {
                            final long uid_start = DateTime.nanoTime();
                            /** CB to called after all of the UIDs have been resolved */
                            class MatchCB implements Callback<Object, ArrayList<Boolean>> {

                                @Override
                                public Object call(final ArrayList<Boolean> matches) throws Exception {
                                    for (final boolean matched : matches) {
                                        if (!matched) {
                                            skips.add(tsuid);
                                            return null;
                                        }
                                    }
                                    // matched all, good data
                                    keepers.add(tsuid);
                                    processRow(key, row);
                                    return null;
                                }
                            }
                            /** Resolves all of the row key UIDs to their strings for filtering */
                            class GetTagsCB implements Callback<Deferred<ArrayList<Boolean>>, Map<String, String>> {

                                @Override
                                public Deferred<ArrayList<Boolean>> call(final Map<String, String> tags) throws Exception {
                                    uid_resolve_time += (DateTime.nanoTime() - uid_start);
                                    uids_resolved += tags.size();
                                    final List<Deferred<Boolean>> matches = new ArrayList<Deferred<Boolean>>(scanner_filters.size());
                                    for (final TagVFilter filter : scanner_filters) {
                                        matches.add(filter.match(tags));
                                    }
                                    return Deferred.group(matches);
                                }
                            }
                            lookups.add(Tags.getTagsAsync(tsdb, key).addCallbackDeferring(new GetTagsCB()).addBoth(new MatchCB()));
                        } else {
                            processRow(key, row);
                        }
                    } else {
                        processRow(key, row);
                    }
                }
                // if we don't have filters.
                if (lookups != null && lookups.size() > 0) {
                    class GroupCB implements Callback<Object, ArrayList<Object>> {

                        @Override
                        public Object call(final ArrayList<Object> group) throws Exception {
                            return scan();
                        }
                    }
                    return Deferred.group(lookups).addCallback(new GroupCB());
                } else {
                    return scan();
                }
            } catch (Exception e) {
                close(e);
                return null;
            }
        }

        /**
        * Finds or creates the span for this row, compacts it and stores it.
        * @param key The row key to use for fetching the span
        * @param row The row to add
        */
        void processRow(final byte[] key, final ArrayList<KeyValue> row) {
            ++rows_post_filter;
            if (delete) {
                final DeleteRequest del = new DeleteRequest(tsdb.dataTable(), key);
                tsdb.getClient().delete(del);
            }
            // columns.
            for (final KeyValue kv : row) {
                if (kv.qualifier().length % 2 == 0) {
                    if (kv.qualifier().length == 2 || kv.qualifier().length == 4) {
                        ++dps_post_filter;
                    } else {
                        // same precision. This is likely incorrect.
                        if (Internal.inMilliseconds(kv.qualifier())) {
                            dps_post_filter += (kv.qualifier().length / 4);
                        } else {
                            dps_post_filter += (kv.qualifier().length / 2);
                        }
                    }
                } else if (kv.qualifier()[0] == AppendDataPoints.APPEND_COLUMN_PREFIX) {
                    // with appends we don't have a good rough estimate as the length
                    // can vary widely with the value length variability. Therefore we
                    // have to iterate.
                    int idx = 0;
                    int qlength = 0;
                    while (idx < kv.value().length) {
                        qlength = Internal.getQualifierLength(kv.value(), idx);
                        idx += qlength + Internal.getValueLengthFromQualifier(kv.value(), idx);
                        ++dps_post_filter;
                    }
                }
            }
            Span datapoints = spans.get(key);
            if (datapoints == null) {
                datapoints = new Span(tsdb);
                spans.put(key, datapoints);
            }
            final long compaction_start = DateTime.nanoTime();
            final KeyValue compacted = tsdb.compact(row, datapoints.getAnnotations());
            compaction_time += (DateTime.nanoTime() - compaction_start);
            seenAnnotation |= !datapoints.getAnnotations().isEmpty();
            if (compacted != null) {
                // Can be null if we ignored all KVs.
                datapoints.addRow(compacted);
                ++nrows;
            }
        }

        void close(final Exception e) {
            scanner.close();
            if (query_stats != null) {
                query_stats.addScannerStat(query_index, index, QueryStat.SCANNER_TIME, DateTime.nanoTime() - scan_start_time);
                // Scanner Stats
                /* Uncomment when AsyncHBase has this feature:
           query_stats.addScannerStat(query_index, index, 
               QueryStat.ROWS_FROM_STORAGE, scanner.getRowsFetched());
           query_stats.addScannerStat(query_index, index, 
               QueryStat.COLUMNS_FROM_STORAGE, scanner.getColumnsFetched());
           query_stats.addScannerStat(query_index, index, 
               QueryStat.BYTES_FROM_STORAGE, scanner.getBytesFetched()); */
                query_stats.addScannerStat(query_index, index, QueryStat.HBASE_TIME, fetch_time);
                query_stats.addScannerStat(query_index, index, QueryStat.SUCCESSFUL_SCAN, e == null ? 1 : 0);
                // Post Scan stats
                query_stats.addScannerStat(query_index, index, QueryStat.ROWS_PRE_FILTER, rows_pre_filter);
                query_stats.addScannerStat(query_index, index, QueryStat.DPS_PRE_FILTER, dps_pre_filter);
                query_stats.addScannerStat(query_index, index, QueryStat.ROWS_POST_FILTER, rows_post_filter);
                query_stats.addScannerStat(query_index, index, QueryStat.DPS_POST_FILTER, dps_post_filter);
                query_stats.addScannerStat(query_index, index, QueryStat.SCANNER_UID_TO_STRING_TIME, uid_resolve_time);
                query_stats.addScannerStat(query_index, index, QueryStat.UID_PAIRS_RESOLVED, uids_resolved);
                query_stats.addScannerStat(query_index, index, QueryStat.COMPACTION_TIME, compaction_time);
            }
            if (e != null) {
                results.callback(e);
            } else if (nrows < 1 && !seenAnnotation) {
                results.callback(null);
            } else {
                results.callback(spans);
            }
        }
    }
    new ScannerCB().scan();
    return results;
}
Also used : Scanner(org.hbase.async.Scanner) HashSet(java.util.HashSet) Set(java.util.Set) KeyValue(org.hbase.async.KeyValue) Deferred(com.stumbleupon.async.Deferred) ArrayList(java.util.ArrayList) TagVFilter(net.opentsdb.query.filter.TagVFilter) TreeMap(java.util.TreeMap) DeferredGroupException(com.stumbleupon.async.DeferredGroupException) HBaseException(org.hbase.async.HBaseException) Callback(com.stumbleupon.async.Callback) Map(java.util.Map) ByteMap(org.hbase.async.Bytes.ByteMap) TreeMap(java.util.TreeMap) DeleteRequest(org.hbase.async.DeleteRequest)

Example 43 with KeyValue

use of org.hbase.async.KeyValue in project opentsdb by OpenTSDB.

the class Annotation method deleteRange.

/**
   * Deletes global or TSUID associated annotiations for the given time range.
   * @param tsdb The TSDB object to use for storage access
   * @param tsuid An optional TSUID. If set to null, then global annotations for
   * the given range will be deleted
   * @param start_time A start timestamp in milliseconds
   * @param end_time An end timestamp in millseconds
   * @return The number of annotations deleted
   * @throws IllegalArgumentException if the timestamps are invalid
   * @since 2.1
   */
public static Deferred<Integer> deleteRange(final TSDB tsdb, final byte[] tsuid, final long start_time, final long end_time) {
    if (end_time < 1) {
        throw new IllegalArgumentException("The end timestamp has not been set");
    }
    if (end_time < start_time) {
        throw new IllegalArgumentException("The end timestamp cannot be less than the start timestamp");
    }
    final List<Deferred<Object>> delete_requests = new ArrayList<Deferred<Object>>();
    int width = tsuid != null ? Const.SALT_WIDTH() + tsuid.length + Const.TIMESTAMP_BYTES : Const.SALT_WIDTH() + TSDB.metrics_width() + Const.TIMESTAMP_BYTES;
    final byte[] start_row = new byte[width];
    final byte[] end_row = new byte[width];
    // downsample to seconds for the row keys
    final long start = start_time / 1000;
    final long end = end_time / 1000;
    final long normalized_start = (start - (start % Const.MAX_TIMESPAN));
    final long normalized_end = (end - (end % Const.MAX_TIMESPAN) + Const.MAX_TIMESPAN);
    Bytes.setInt(start_row, (int) normalized_start, Const.SALT_WIDTH() + TSDB.metrics_width());
    Bytes.setInt(end_row, (int) normalized_end, Const.SALT_WIDTH() + TSDB.metrics_width());
    if (tsuid != null) {
        // first copy the metric UID then the tags
        System.arraycopy(tsuid, 0, start_row, Const.SALT_WIDTH(), TSDB.metrics_width());
        System.arraycopy(tsuid, 0, end_row, Const.SALT_WIDTH(), TSDB.metrics_width());
        width = Const.SALT_WIDTH() + TSDB.metrics_width() + Const.TIMESTAMP_BYTES;
        final int remainder = tsuid.length - TSDB.metrics_width();
        System.arraycopy(tsuid, TSDB.metrics_width(), start_row, width, remainder);
        System.arraycopy(tsuid, TSDB.metrics_width(), end_row, width, remainder);
    }
    /**
     * Iterates through the scanner results in an asynchronous manner, returning
     * once the scanner returns a null result set.
     */
    final class ScannerCB implements Callback<Deferred<List<Deferred<Object>>>, ArrayList<ArrayList<KeyValue>>> {

        final Scanner scanner;

        public ScannerCB() {
            scanner = tsdb.getClient().newScanner(tsdb.dataTable());
            scanner.setStartKey(start_row);
            scanner.setStopKey(end_row);
            scanner.setFamily(FAMILY);
            if (tsuid != null) {
                final List<String> tsuids = new ArrayList<String>(1);
                tsuids.add(UniqueId.uidToString(tsuid));
                Internal.createAndSetTSUIDFilter(scanner, tsuids);
            }
        }

        public Deferred<List<Deferred<Object>>> scan() {
            return scanner.nextRows().addCallbackDeferring(this);
        }

        @Override
        public Deferred<List<Deferred<Object>>> call(final ArrayList<ArrayList<KeyValue>> rows) throws Exception {
            if (rows == null || rows.isEmpty()) {
                return Deferred.fromResult(delete_requests);
            }
            for (final ArrayList<KeyValue> row : rows) {
                final long base_time = Internal.baseTime(tsdb, row.get(0).key());
                for (KeyValue column : row) {
                    if ((column.qualifier().length == 3 || column.qualifier().length == 5) && column.qualifier()[0] == PREFIX()) {
                        final long timestamp = timeFromQualifier(column.qualifier(), base_time);
                        if (timestamp < start_time || timestamp > end_time) {
                            continue;
                        }
                        final DeleteRequest delete = new DeleteRequest(tsdb.dataTable(), column.key(), FAMILY, column.qualifier());
                        delete_requests.add(tsdb.getClient().delete(delete));
                    }
                }
            }
            return scan();
        }
    }
    /** Called when the scanner is done. Delete requests may still be pending */
    final class ScannerDoneCB implements Callback<Deferred<ArrayList<Object>>, List<Deferred<Object>>> {

        @Override
        public Deferred<ArrayList<Object>> call(final List<Deferred<Object>> deletes) throws Exception {
            return Deferred.group(delete_requests);
        }
    }
    /** Waits on the group of deferreds to complete before returning the count */
    final class GroupCB implements Callback<Deferred<Integer>, ArrayList<Object>> {

        @Override
        public Deferred<Integer> call(final ArrayList<Object> deletes) throws Exception {
            return Deferred.fromResult(deletes.size());
        }
    }
    Deferred<ArrayList<Object>> scanner_done = new ScannerCB().scan().addCallbackDeferring(new ScannerDoneCB());
    return scanner_done.addCallbackDeferring(new GroupCB());
}
Also used : Scanner(org.hbase.async.Scanner) KeyValue(org.hbase.async.KeyValue) Deferred(com.stumbleupon.async.Deferred) ArrayList(java.util.ArrayList) Callback(com.stumbleupon.async.Callback) ArrayList(java.util.ArrayList) List(java.util.List) DeleteRequest(org.hbase.async.DeleteRequest)

Example 44 with KeyValue

use of org.hbase.async.KeyValue in project opentsdb by OpenTSDB.

the class UidManager method fsck.

/**
   * Implements the {@code fsck} subcommand.
   * @param client The HBase client to use.
   * @param table The name of the HBase table to use.
   * @return The exit status of the command (0 means success).
   */
private static int fsck(final HBaseClient client, final byte[] table, final boolean fix, final boolean fix_unknowns) {
    if (fix) {
        LOG.info("----------------------------------");
        LOG.info("-    Running fsck in FIX mode    -");
        LOG.info("-      Remove Unknowns: " + fix_unknowns + "     -");
        LOG.info("----------------------------------");
    } else {
        LOG.info("Running in log only mode");
    }
    final class Uids {

        int errors;

        long maxid;

        long max_found_id;

        short width;

        final HashMap<String, String> id2name = new HashMap<String, String>();

        final HashMap<String, String> name2id = new HashMap<String, String>();

        void error(final KeyValue kv, final String msg) {
            error(msg + ".  kv=" + kv);
        }

        void error(final String msg) {
            LOG.error(msg);
            errors++;
        }

        /*
       * Replaces or creates the reverse map in storage and in the local map
       */
        void restoreReverseMap(final String kind, final String name, final String uid) {
            final PutRequest put = new PutRequest(table, UniqueId.stringToUid(uid), CliUtils.NAME_FAMILY, CliUtils.toBytes(kind), CliUtils.toBytes(name));
            client.put(put);
            id2name.put(uid, name);
            LOG.info("FIX: Restoring " + kind + " reverse mapping: " + uid + " -> " + name);
        }

        /*
       * Removes the reverse map from storage only
       */
        void removeReverseMap(final String kind, final String name, final String uid) {
            // clean up meta data too
            final byte[][] qualifiers = new byte[2][];
            qualifiers[0] = CliUtils.toBytes(kind);
            if (Bytes.equals(CliUtils.METRICS, qualifiers[0])) {
                qualifiers[1] = CliUtils.METRICS_META;
            } else if (Bytes.equals(CliUtils.TAGK, qualifiers[0])) {
                qualifiers[1] = CliUtils.TAGK_META;
            } else if (Bytes.equals(CliUtils.TAGV, qualifiers[0])) {
                qualifiers[1] = CliUtils.TAGV_META;
            }
            final DeleteRequest delete = new DeleteRequest(table, UniqueId.stringToUid(uid), CliUtils.NAME_FAMILY, qualifiers);
            client.delete(delete);
            // can't remove from the id2name map as this will be called while looping
            LOG.info("FIX: Removed " + kind + " reverse mapping: " + uid + " -> " + name);
        }
    }
    final long start_time = System.nanoTime();
    final HashMap<String, Uids> name2uids = new HashMap<String, Uids>();
    final Scanner scanner = client.newScanner(table);
    scanner.setMaxNumRows(1024);
    int kvcount = 0;
    try {
        ArrayList<ArrayList<KeyValue>> rows;
        while ((rows = scanner.nextRows().joinUninterruptibly()) != null) {
            for (final ArrayList<KeyValue> row : rows) {
                for (final KeyValue kv : row) {
                    kvcount++;
                    final byte[] qualifier = kv.qualifier();
                    // TODO - validate meta data in the future, for now skip it
                    if (Bytes.equals(qualifier, TSMeta.META_QUALIFIER()) || Bytes.equals(qualifier, TSMeta.COUNTER_QUALIFIER()) || Bytes.equals(qualifier, CliUtils.METRICS_META) || Bytes.equals(qualifier, CliUtils.TAGK_META) || Bytes.equals(qualifier, CliUtils.TAGV_META)) {
                        continue;
                    }
                    if (!Bytes.equals(qualifier, CliUtils.METRICS) && !Bytes.equals(qualifier, CliUtils.TAGK) && !Bytes.equals(qualifier, CliUtils.TAGV)) {
                        LOG.warn("Unknown qualifier " + UniqueId.uidToString(qualifier) + " in row " + UniqueId.uidToString(kv.key()));
                        if (fix && fix_unknowns) {
                            final DeleteRequest delete = new DeleteRequest(table, kv.key(), kv.family(), qualifier);
                            client.delete(delete);
                            LOG.info("FIX: Removed unknown qualifier " + UniqueId.uidToString(qualifier) + " in row " + UniqueId.uidToString(kv.key()));
                        }
                        continue;
                    }
                    final String kind = CliUtils.fromBytes(kv.qualifier());
                    Uids uids = name2uids.get(kind);
                    if (uids == null) {
                        uids = new Uids();
                        name2uids.put(kind, uids);
                    }
                    final byte[] key = kv.key();
                    final byte[] family = kv.family();
                    final byte[] value = kv.value();
                    if (Bytes.equals(key, CliUtils.MAXID_ROW)) {
                        if (value.length != 8) {
                            uids.error(kv, "Invalid maximum ID for " + kind + ": should be on 8 bytes: ");
                        // TODO - a fix would be to find the max used ID for the type 
                        // and store that in the max row.
                        } else {
                            uids.maxid = Bytes.getLong(value);
                            LOG.info("Maximum ID for " + kind + ": " + uids.maxid);
                        }
                    } else {
                        short idwidth = 0;
                        if (Bytes.equals(family, CliUtils.ID_FAMILY)) {
                            idwidth = (short) value.length;
                            final String skey = CliUtils.fromBytes(key);
                            final String svalue = UniqueId.uidToString(value);
                            final long max_found_id;
                            if (Bytes.equals(qualifier, CliUtils.METRICS)) {
                                max_found_id = UniqueId.uidToLong(value, TSDB.metrics_width());
                            } else if (Bytes.equals(qualifier, CliUtils.TAGK)) {
                                max_found_id = UniqueId.uidToLong(value, TSDB.tagk_width());
                            } else {
                                max_found_id = UniqueId.uidToLong(value, TSDB.tagv_width());
                            }
                            if (uids.max_found_id < max_found_id) {
                                uids.max_found_id = max_found_id;
                            }
                            final String id = uids.name2id.put(skey, svalue);
                            if (id != null) {
                                uids.error(kv, "Duplicate forward " + kind + " mapping: " + skey + " -> " + id + " and " + skey + " -> " + svalue);
                            }
                        } else if (Bytes.equals(family, CliUtils.NAME_FAMILY)) {
                            final String skey = UniqueId.uidToString(key);
                            final String svalue = CliUtils.fromBytes(value);
                            idwidth = (short) key.length;
                            final String name = uids.id2name.put(skey, svalue);
                            if (name != null) {
                                uids.error(kv, "Duplicate reverse " + kind + "  mapping: " + svalue + " -> " + name + " and " + svalue + " -> " + skey);
                            }
                        }
                        if (uids.width == 0) {
                            uids.width = idwidth;
                        } else if (uids.width != idwidth) {
                            uids.error(kv, "Invalid " + kind + " ID of length " + idwidth + " (expected: " + uids.width + ')');
                        }
                    }
                }
            }
        }
    } catch (HBaseException e) {
        LOG.error("Error while scanning HBase, scanner=" + scanner, e);
        throw e;
    } catch (Exception e) {
        LOG.error("WTF?  Unexpected exception type, scanner=" + scanner, e);
        throw new AssertionError("Should never happen");
    }
    // Match up all forward mappings with their reverse mappings and vice
    // versa and make sure they agree.
    int errors = 0;
    for (final Map.Entry<String, Uids> entry : name2uids.entrySet()) {
        final String kind = entry.getKey();
        final Uids uids = entry.getValue();
        // This will be used in the event that we run into an inconsistent forward
        // mapping that could mean a single UID was assigned to different names.
        // It SHOULD NEVER HAPPEN, but it could.
        HashMap<String, TreeSet<String>> uid_collisions = null;
        // These are harmful and shouldn't exist.
        for (final Map.Entry<String, String> nameid : uids.name2id.entrySet()) {
            final String name = nameid.getKey();
            final String id = nameid.getValue();
            final String found = uids.id2name.get(id);
            if (found == null) {
                uids.error("Forward " + kind + " mapping is missing reverse" + " mapping: " + name + " -> " + id);
                if (fix) {
                    uids.restoreReverseMap(kind, name, id);
                }
            } else if (!found.equals(name)) {
                uids.error("Forward " + kind + " mapping " + name + " -> " + id + " is different than reverse mapping: " + id + " -> " + found);
                final String id2 = uids.name2id.get(found);
                if (id2 != null) {
                    uids.error("Inconsistent forward " + kind + " mapping " + name + " -> " + id + " vs " + name + " -> " + found + " / " + found + " -> " + id2);
                    //    series.
                    if (fix) {
                        // once, as needed, since it's expensive.
                        if (uid_collisions == null) {
                            uid_collisions = new HashMap<String, TreeSet<String>>(uids.name2id.size());
                            for (final Map.Entry<String, String> row : uids.name2id.entrySet()) {
                                TreeSet<String> names = uid_collisions.get(row.getValue());
                                if (names == null) {
                                    names = new TreeSet<String>();
                                    uid_collisions.put(row.getValue(), names);
                                }
                                names.add(row.getKey());
                            }
                        }
                        // series *should* be OK and we can just fix the reverse map.
                        if (uid_collisions.containsKey(id) && uid_collisions.get(id).size() <= 1) {
                            uids.restoreReverseMap(kind, name, id);
                        }
                    }
                } else {
                    uids.error("Duplicate forward " + kind + " mapping " + name + " -> " + id + " and " + id2 + " -> " + found);
                    if (fix) {
                        uids.restoreReverseMap(kind, name, id);
                    }
                }
            }
        }
        // Scan through the UID collisions map and fix the screw ups
        if (uid_collisions != null) {
            for (Map.Entry<String, TreeSet<String>> collision : uid_collisions.entrySet()) {
                if (collision.getValue().size() <= 1) {
                    continue;
                }
                // The data in any time series with the errant UID is 
                // a mashup of with all of the names. The best thing to do is
                // start over. We'll rename the old time series so the user can
                // still see it if they want to, but delete the forward mappings
                // so that UIDs can be reassigned and clean series started.
                // - concatenate all of the names into 
                //   "fsck.<name1>.<name2>[...<nameN>]"
                // - delete the forward mappings for all of the names
                // - create a mapping with the fsck'd name pointing to the id
                final StringBuilder fsck_builder = new StringBuilder("fsck");
                final String id = collision.getKey();
                // compile the new fsck'd name and remove each of the duplicate keys
                for (String name : collision.getValue()) {
                    fsck_builder.append(".").append(name);
                    final DeleteRequest delete = new DeleteRequest(table, CliUtils.toBytes(name), CliUtils.ID_FAMILY, CliUtils.toBytes(kind));
                    client.delete(delete);
                    uids.name2id.remove(name);
                    LOG.info("FIX: Removed forward " + kind + " mapping for " + name + " -> " + id);
                }
                // write the new forward map
                final String fsck_name = fsck_builder.toString();
                final PutRequest put = new PutRequest(table, CliUtils.toBytes(fsck_name), CliUtils.ID_FAMILY, CliUtils.toBytes(kind), UniqueId.stringToUid(id));
                client.put(put);
                LOG.info("FIX: Created forward " + kind + " mapping for fsck'd UID " + fsck_name + " -> " + collision.getKey());
                // we still need to fix the uids map for the reverse run through below
                uids.name2id.put(fsck_name, collision.getKey());
                uids.restoreReverseMap(kind, fsck_name, id);
                LOG.error("----------------------------------");
                LOG.error("-     UID COLLISION DETECTED     -");
                LOG.error("Corrupted UID [" + collision.getKey() + "] renamed to [" + fsck_name + "]");
                LOG.error("----------------------------------");
            }
        }
        // These are harmless but shouldn't frequently occur.
        for (final Map.Entry<String, String> idname : uids.id2name.entrySet()) {
            final String name = idname.getValue();
            final String id = idname.getKey();
            final String found = uids.name2id.get(name);
            if (found == null) {
                LOG.warn("Reverse " + kind + " mapping is missing forward" + " mapping: " + name + " -> " + id);
                if (fix) {
                    uids.removeReverseMap(kind, name, id);
                }
            } else if (!found.equals(id)) {
                final String name2 = uids.id2name.get(found);
                if (name2 != null) {
                    uids.error("Inconsistent reverse " + kind + " mapping " + id + " -> " + name + " vs " + found + " -> " + name + " / " + name2 + " -> " + found);
                    if (fix) {
                        uids.removeReverseMap(kind, name, id);
                    }
                } else {
                    uids.error("Duplicate reverse " + kind + " mapping " + id + " -> " + name + " and " + found + " -> " + name2);
                    if (fix) {
                        uids.removeReverseMap(kind, name, id);
                    }
                }
            }
        }
        final int maxsize = Math.max(uids.id2name.size(), uids.name2id.size());
        if (uids.maxid > maxsize) {
            LOG.warn("Max ID for " + kind + " is " + uids.maxid + " but only " + maxsize + " entries were found.  Maybe " + (uids.maxid - maxsize) + " IDs were deleted?");
        } else if (uids.maxid < uids.max_found_id) {
            uids.error("We found an ID of " + uids.max_found_id + " for " + kind + " but the max ID is only " + uids.maxid + "!  Future IDs may be double-assigned!");
            if (fix) {
                // IDs than to under-run.
                if (uids.max_found_id == Long.MAX_VALUE) {
                    LOG.error("Ran out of UIDs for " + kind + ". Unable to fix max ID");
                } else {
                    final long diff = uids.max_found_id - uids.maxid;
                    final AtomicIncrementRequest air = new AtomicIncrementRequest(table, CliUtils.MAXID_ROW, CliUtils.ID_FAMILY, CliUtils.toBytes(kind), diff);
                    client.atomicIncrement(air);
                    LOG.info("FIX: Updated max ID for " + kind + " to " + uids.max_found_id);
                }
            }
        }
        if (uids.errors > 0) {
            LOG.error(kind + ": Found " + uids.errors + " errors.");
            errors += uids.errors;
        }
    }
    final long timing = (System.nanoTime() - start_time) / 1000000;
    LOG.info(kvcount + " KVs analyzed in " + timing + "ms (~" + (kvcount * 1000 / timing) + " KV/s)");
    if (errors == 0) {
        LOG.info("No errors found.");
        return 0;
    }
    LOG.warn(errors + " errors found.");
    return errors;
}
Also used : Scanner(org.hbase.async.Scanner) KeyValue(org.hbase.async.KeyValue) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ArrayList(java.util.ArrayList) TreeSet(java.util.TreeSet) PutRequest(org.hbase.async.PutRequest) AtomicIncrementRequest(org.hbase.async.AtomicIncrementRequest) HBaseException(org.hbase.async.HBaseException) HBaseException(org.hbase.async.HBaseException) DeleteRequest(org.hbase.async.DeleteRequest) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 45 with KeyValue

use of org.hbase.async.KeyValue in project opentsdb by OpenTSDB.

the class UidManager method grep.

/**
   * Implements the {@code grep} subcommand.
   * @param client The HBase client to use.
   * @param table The name of the HBase table to use.
   * @param ignorecase Whether or not to ignore the case while grepping.
   * @param args Command line arguments ({@code [kind] RE}).
   * @return The exit status of the command (0 means at least 1 match).
   */
private static int grep(final HBaseClient client, final byte[] table, final boolean ignorecase, final String[] args) {
    final Scanner scanner = client.newScanner(table);
    scanner.setMaxNumRows(1024);
    String regexp;
    scanner.setFamily(CliUtils.ID_FAMILY);
    if (args.length == 3) {
        scanner.setQualifier(CliUtils.toBytes(args[1]));
        regexp = args[2];
    } else {
        regexp = args[1];
    }
    if (ignorecase) {
        regexp = "(?i)" + regexp;
    }
    scanner.setKeyRegexp(regexp, CliUtils.CHARSET);
    boolean found = false;
    try {
        ArrayList<ArrayList<KeyValue>> rows;
        while ((rows = scanner.nextRows().joinUninterruptibly()) != null) {
            for (final ArrayList<KeyValue> row : rows) {
                found |= printResult(row, CliUtils.ID_FAMILY, true);
            }
        }
    } catch (HBaseException e) {
        LOG.error("Error while scanning HBase, scanner=" + scanner, e);
        throw e;
    } catch (Exception e) {
        LOG.error("WTF?  Unexpected exception type, scanner=" + scanner, e);
        throw new AssertionError("Should never happen");
    }
    return found ? 0 : 1;
}
Also used : Scanner(org.hbase.async.Scanner) KeyValue(org.hbase.async.KeyValue) ArrayList(java.util.ArrayList) HBaseException(org.hbase.async.HBaseException) HBaseException(org.hbase.async.HBaseException)

Aggregations

KeyValue (org.hbase.async.KeyValue)171 Test (org.junit.Test)127 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)121 ArrayList (java.util.ArrayList)101 Annotation (net.opentsdb.meta.Annotation)50 Callback (com.stumbleupon.async.Callback)30 GetRequest (org.hbase.async.GetRequest)21 Scanner (org.hbase.async.Scanner)19 Deferred (com.stumbleupon.async.Deferred)14 HBaseException (org.hbase.async.HBaseException)13 TSDB (net.opentsdb.core.TSDB)12 Matchers.anyString (org.mockito.Matchers.anyString)11 Config (net.opentsdb.utils.Config)10 UniqueIdType (net.opentsdb.uid.UniqueId.UniqueIdType)9 DeleteRequest (org.hbase.async.DeleteRequest)8 DeferredGroupException (com.stumbleupon.async.DeferredGroupException)7 Map (java.util.Map)7 HashMap (java.util.HashMap)6 PutRequest (org.hbase.async.PutRequest)6 List (java.util.List)5