Search in sources :

Example 1 with ColumnTracker

use of org.apache.phoenix.hbase.index.covered.update.ColumnTracker in project phoenix by apache.

the class NonTxIndexBuilder method cleanupIndexStateFromBatchOnward.

/**
     * Cleanup the index based on the current state from the given batch. Iterates over each timestamp (for the indexed
     * rows) for the current state of the table and cleans up all the existing entries generated by the codec.
     * <p>
     * Adds all pending updates to the updateMap
     * 
     * @param updateMap
     *            updated with the pending index updates from the codec
     * @param batchTs
     *            timestamp from which we should cleanup
     * @param state
     *            current state of the primary table. Should already by setup to the correct state from which we want to
     *            cleanup.
     * @param indexMetaData TODO
     * @throws IOException
     */
private void cleanupIndexStateFromBatchOnward(IndexUpdateManager updateMap, long batchTs, LocalTableState state, IndexMetaData indexMetaData) throws IOException {
    // get the cleanup for the current state
    state.setCurrentTimestamp(batchTs);
    addDeleteUpdatesToMap(updateMap, state, batchTs, indexMetaData);
    Set<ColumnTracker> trackers = state.getTrackedColumns();
    long minTs = ColumnTracker.NO_NEWER_PRIMARY_TABLE_ENTRY_TIMESTAMP;
    for (ColumnTracker tracker : trackers) {
        if (tracker.getTS() < minTs) {
            minTs = tracker.getTS();
        }
    }
    state.resetTrackedColumns();
    if (!ColumnTracker.isNewestTime(minTs)) {
        state.setHints(Lists.newArrayList(trackers));
        cleanupIndexStateFromBatchOnward(updateMap, minTs, state, indexMetaData);
    }
}
Also used : ColumnTracker(org.apache.phoenix.hbase.index.covered.update.ColumnTracker)

Example 2 with ColumnTracker

use of org.apache.phoenix.hbase.index.covered.update.ColumnTracker in project phoenix by apache.

the class NonTxIndexBuilder method addCurrentStateMutationsForBatch.

/**
     * Add the necessary mutations for the pending batch on the local state. Handles rolling up through history to
     * determine the index changes after applying the batch (for the case where the batch is back in time).
     * 
     * @param updateMap
     *            to update with index mutations
     * @param state
     *            current state of the table
     * @param indexMetaData TODO
     * @param batch
     *            to apply to the current state
     * @return the minimum timestamp across all index columns requested. If {@link ColumnTracker#isNewestTime(long)}
     *         returns <tt>true</tt> on the returned timestamp, we know that this <i>was not a back-in-time update</i>.
     * @throws IOException
     */
private long addCurrentStateMutationsForBatch(IndexUpdateManager updateMap, LocalTableState state, IndexMetaData indexMetaData) throws IOException {
    // get the index updates for this current batch
    Iterable<IndexUpdate> upserts = codec.getIndexUpserts(state, indexMetaData);
    state.resetTrackedColumns();
    /*
         * go through all the pending updates. If we are sure that all the entries are the latest timestamp, we can just
         * add the index updates and move on. However, if there are columns that we skip past (based on the timestamp of
         * the batch), we need to roll back up the history. Regardless of whether or not they are the latest timestamp,
         * the entries here are going to be correct for the current batch timestamp, so we add them to the updates. The
         * only thing we really care about it if we need to roll up the history and fix it as we go.
         */
    // timestamp of the next update we need to track
    long minTs = ColumnTracker.NO_NEWER_PRIMARY_TABLE_ENTRY_TIMESTAMP;
    List<IndexedColumnGroup> columnHints = new ArrayList<IndexedColumnGroup>();
    for (IndexUpdate update : upserts) {
        // this is the one bit where we check the timestamps
        final ColumnTracker tracker = update.getIndexedColumns();
        long trackerTs = tracker.getTS();
        // update the next min TS we need to track
        if (trackerTs < minTs) {
            minTs = tracker.getTS();
        }
        // track index hints for the next round. Hint if we need an update for that column for the
        // next timestamp. These columns clearly won't need to update as we go through time as they
        // already match the most recent possible thing.
        boolean needsCleanup = false;
        if (tracker.hasNewerTimestamps()) {
            columnHints.add(tracker);
            // this update also needs to be cleaned up at the next timestamp because it not the latest.
            needsCleanup = true;
        }
        // only make the put if the index update has been setup
        if (update.isValid()) {
            byte[] table = update.getTableName();
            Mutation mutation = update.getUpdate();
            updateMap.addIndexUpdate(table, mutation);
            // only make the cleanup if we made a put and need cleanup
            if (needsCleanup) {
                // there is a TS for the interested columns that is greater than the columns in the
                // put. Therefore, we need to issue a delete at the same timestamp
                Delete d = new Delete(mutation.getRow());
                d.setTimestamp(tracker.getTS());
                updateMap.addIndexUpdate(table, d);
            }
        }
    }
    return minTs;
}
Also used : Delete(org.apache.hadoop.hbase.client.Delete) ArrayList(java.util.ArrayList) ColumnTracker(org.apache.phoenix.hbase.index.covered.update.ColumnTracker) Mutation(org.apache.hadoop.hbase.client.Mutation) IndexedColumnGroup(org.apache.phoenix.hbase.index.covered.update.IndexedColumnGroup)

Example 3 with ColumnTracker

use of org.apache.phoenix.hbase.index.covered.update.ColumnTracker in project phoenix by apache.

the class TestColumnTracker method testHasNewerTimestamps.

@Test
public void testHasNewerTimestamps() throws Exception {
    Collection<ColumnReference> columns = new ArrayList<ColumnReference>();
    ColumnTracker tracker = new ColumnTracker(columns);
    assertFalse("Tracker has newer timestamps when no ts set", tracker.hasNewerTimestamps());
    tracker.setTs(10);
    assertTrue("Tracker doesn't have newer timetamps with set ts", tracker.hasNewerTimestamps());
}
Also used : ArrayList(java.util.ArrayList) ColumnTracker(org.apache.phoenix.hbase.index.covered.update.ColumnTracker) ColumnReference(org.apache.phoenix.hbase.index.covered.update.ColumnReference) Test(org.junit.Test)

Example 4 with ColumnTracker

use of org.apache.phoenix.hbase.index.covered.update.ColumnTracker in project phoenix by apache.

the class TestColumnTracker method testOnlyKeepsOlderTimestamps.

@Test
public void testOnlyKeepsOlderTimestamps() {
    Collection<ColumnReference> columns = new ArrayList<ColumnReference>();
    ColumnTracker tracker = new ColumnTracker(columns);
    tracker.setTs(10);
    assertEquals("Column tracker didn't set original TS", 10, tracker.getTS());
    tracker.setTs(12);
    assertEquals("Column tracker allowed newer timestamp to be set.", 10, tracker.getTS());
    tracker.setTs(9);
    assertEquals("Column tracker didn't decrease set timestamp for smaller value", 9, tracker.getTS());
}
Also used : ArrayList(java.util.ArrayList) ColumnTracker(org.apache.phoenix.hbase.index.covered.update.ColumnTracker) ColumnReference(org.apache.phoenix.hbase.index.covered.update.ColumnReference) Test(org.junit.Test)

Example 5 with ColumnTracker

use of org.apache.phoenix.hbase.index.covered.update.ColumnTracker in project phoenix by apache.

the class LocalTableState method getIndexedColumnsTableState.

/**
     * Get a scanner on the columns that are needed by the index.
     * <p>
     * The returned scanner is already pre-seeked to the first {@link KeyValue} that matches the given
     * columns with a timestamp earlier than the timestamp to which the table is currently set (the
     * current state of the table for which we need to build an update).
     * <p>
     * If none of the passed columns matches any of the columns in the pending update (as determined
     * by {@link ColumnReference#matchesFamily(byte[])} and
     * {@link ColumnReference#matchesQualifier(byte[])}, then an empty scanner will be returned. This
     * is because it doesn't make sense to build index updates when there is no change in the table
     * state for any of the columns you are indexing.
     * <p>
     * <i>NOTE:</i> This method should <b>not</b> be used during
     * {@link IndexCodec#getIndexDeletes(TableState, BatchState)} as the pending update will not yet have been
     * applied - you are merely attempting to cleanup the current state and therefore do <i>not</i>
     * need to track the indexed columns.
     * <p>
     * As a side-effect, we update a timestamp for the next-most-recent timestamp for the columns you
     * request - you will never see a column with the timestamp we are tracking, but the next oldest
     * timestamp for that column.
     * @param indexedColumns the columns to that will be indexed
     * @param ignoreNewerMutations ignore mutations newer than m when determining current state. Useful
     *        when replaying mutation state for partial index rebuild where writes succeeded to the data
     *        table, but not to the index table.
     * @param indexMetaData TODO
     * @return an iterator over the columns and the {@link IndexUpdate} that should be passed back to
     *         the builder. Even if no update is necessary for the requested columns, you still need
     *         to return the {@link IndexUpdate}, just don't set the update for the
     *         {@link IndexUpdate}.
     * @throws IOException
     */
public Pair<Scanner, IndexUpdate> getIndexedColumnsTableState(Collection<? extends ColumnReference> indexedColumns, boolean ignoreNewerMutations, boolean returnNullScannerIfRowNotFound, IndexMetaData indexMetaData) throws IOException {
    ensureLocalStateInitialized(indexedColumns, ignoreNewerMutations, indexMetaData);
    // filter out things with a newer timestamp and track the column references to which it applies
    ColumnTracker tracker = new ColumnTracker(indexedColumns);
    synchronized (this.trackedColumns) {
        // we haven't seen this set of columns before, so we need to create a new tracker
        if (!this.trackedColumns.contains(tracker)) {
            this.trackedColumns.add(tracker);
        }
    }
    Scanner scanner = this.scannerBuilder.buildIndexedColumnScanner(indexedColumns, tracker, ts, returnNullScannerIfRowNotFound);
    return new Pair<Scanner, IndexUpdate>(scanner, new IndexUpdate(tracker));
}
Also used : Scanner(org.apache.phoenix.hbase.index.scanner.Scanner) ColumnTracker(org.apache.phoenix.hbase.index.covered.update.ColumnTracker) Pair(org.apache.hadoop.hbase.util.Pair)

Aggregations

ColumnTracker (org.apache.phoenix.hbase.index.covered.update.ColumnTracker)5 ArrayList (java.util.ArrayList)3 ColumnReference (org.apache.phoenix.hbase.index.covered.update.ColumnReference)2 Test (org.junit.Test)2 Delete (org.apache.hadoop.hbase.client.Delete)1 Mutation (org.apache.hadoop.hbase.client.Mutation)1 Pair (org.apache.hadoop.hbase.util.Pair)1 IndexedColumnGroup (org.apache.phoenix.hbase.index.covered.update.IndexedColumnGroup)1 Scanner (org.apache.phoenix.hbase.index.scanner.Scanner)1