use of org.apache.phoenix.hbase.index.covered.update.ColumnTracker in project phoenix by apache.
the class NonTxIndexBuilder method cleanupIndexStateFromBatchOnward.
/**
* Cleanup the index based on the current state from the given batch. Iterates over each timestamp (for the indexed
* rows) for the current state of the table and cleans up all the existing entries generated by the codec.
* <p>
* Adds all pending updates to the updateMap
*
* @param updateMap
* updated with the pending index updates from the codec
* @param batchTs
* timestamp from which we should cleanup
* @param state
* current state of the primary table. Should already by setup to the correct state from which we want to
* cleanup.
* @param indexMetaData TODO
* @throws IOException
*/
private void cleanupIndexStateFromBatchOnward(IndexUpdateManager updateMap, long batchTs, LocalTableState state, IndexMetaData indexMetaData) throws IOException {
// get the cleanup for the current state
state.setCurrentTimestamp(batchTs);
addDeleteUpdatesToMap(updateMap, state, batchTs, indexMetaData);
Set<ColumnTracker> trackers = state.getTrackedColumns();
long minTs = ColumnTracker.NO_NEWER_PRIMARY_TABLE_ENTRY_TIMESTAMP;
for (ColumnTracker tracker : trackers) {
if (tracker.getTS() < minTs) {
minTs = tracker.getTS();
}
}
state.resetTrackedColumns();
if (!ColumnTracker.isNewestTime(minTs)) {
state.setHints(Lists.newArrayList(trackers));
cleanupIndexStateFromBatchOnward(updateMap, minTs, state, indexMetaData);
}
}
use of org.apache.phoenix.hbase.index.covered.update.ColumnTracker in project phoenix by apache.
the class NonTxIndexBuilder method addCurrentStateMutationsForBatch.
/**
* Add the necessary mutations for the pending batch on the local state. Handles rolling up through history to
* determine the index changes after applying the batch (for the case where the batch is back in time).
*
* @param updateMap
* to update with index mutations
* @param state
* current state of the table
* @param indexMetaData TODO
* @param batch
* to apply to the current state
* @return the minimum timestamp across all index columns requested. If {@link ColumnTracker#isNewestTime(long)}
* returns <tt>true</tt> on the returned timestamp, we know that this <i>was not a back-in-time update</i>.
* @throws IOException
*/
private long addCurrentStateMutationsForBatch(IndexUpdateManager updateMap, LocalTableState state, IndexMetaData indexMetaData) throws IOException {
// get the index updates for this current batch
Iterable<IndexUpdate> upserts = codec.getIndexUpserts(state, indexMetaData);
state.resetTrackedColumns();
/*
* go through all the pending updates. If we are sure that all the entries are the latest timestamp, we can just
* add the index updates and move on. However, if there are columns that we skip past (based on the timestamp of
* the batch), we need to roll back up the history. Regardless of whether or not they are the latest timestamp,
* the entries here are going to be correct for the current batch timestamp, so we add them to the updates. The
* only thing we really care about it if we need to roll up the history and fix it as we go.
*/
// timestamp of the next update we need to track
long minTs = ColumnTracker.NO_NEWER_PRIMARY_TABLE_ENTRY_TIMESTAMP;
List<IndexedColumnGroup> columnHints = new ArrayList<IndexedColumnGroup>();
for (IndexUpdate update : upserts) {
// this is the one bit where we check the timestamps
final ColumnTracker tracker = update.getIndexedColumns();
long trackerTs = tracker.getTS();
// update the next min TS we need to track
if (trackerTs < minTs) {
minTs = tracker.getTS();
}
// track index hints for the next round. Hint if we need an update for that column for the
// next timestamp. These columns clearly won't need to update as we go through time as they
// already match the most recent possible thing.
boolean needsCleanup = false;
if (tracker.hasNewerTimestamps()) {
columnHints.add(tracker);
// this update also needs to be cleaned up at the next timestamp because it not the latest.
needsCleanup = true;
}
// only make the put if the index update has been setup
if (update.isValid()) {
byte[] table = update.getTableName();
Mutation mutation = update.getUpdate();
updateMap.addIndexUpdate(table, mutation);
// only make the cleanup if we made a put and need cleanup
if (needsCleanup) {
// there is a TS for the interested columns that is greater than the columns in the
// put. Therefore, we need to issue a delete at the same timestamp
Delete d = new Delete(mutation.getRow());
d.setTimestamp(tracker.getTS());
updateMap.addIndexUpdate(table, d);
}
}
}
return minTs;
}
use of org.apache.phoenix.hbase.index.covered.update.ColumnTracker in project phoenix by apache.
the class TestColumnTracker method testHasNewerTimestamps.
@Test
public void testHasNewerTimestamps() throws Exception {
Collection<ColumnReference> columns = new ArrayList<ColumnReference>();
ColumnTracker tracker = new ColumnTracker(columns);
assertFalse("Tracker has newer timestamps when no ts set", tracker.hasNewerTimestamps());
tracker.setTs(10);
assertTrue("Tracker doesn't have newer timetamps with set ts", tracker.hasNewerTimestamps());
}
use of org.apache.phoenix.hbase.index.covered.update.ColumnTracker in project phoenix by apache.
the class TestColumnTracker method testOnlyKeepsOlderTimestamps.
@Test
public void testOnlyKeepsOlderTimestamps() {
Collection<ColumnReference> columns = new ArrayList<ColumnReference>();
ColumnTracker tracker = new ColumnTracker(columns);
tracker.setTs(10);
assertEquals("Column tracker didn't set original TS", 10, tracker.getTS());
tracker.setTs(12);
assertEquals("Column tracker allowed newer timestamp to be set.", 10, tracker.getTS());
tracker.setTs(9);
assertEquals("Column tracker didn't decrease set timestamp for smaller value", 9, tracker.getTS());
}
use of org.apache.phoenix.hbase.index.covered.update.ColumnTracker in project phoenix by apache.
the class LocalTableState method getIndexedColumnsTableState.
/**
* Get a scanner on the columns that are needed by the index.
* <p>
* The returned scanner is already pre-seeked to the first {@link KeyValue} that matches the given
* columns with a timestamp earlier than the timestamp to which the table is currently set (the
* current state of the table for which we need to build an update).
* <p>
* If none of the passed columns matches any of the columns in the pending update (as determined
* by {@link ColumnReference#matchesFamily(byte[])} and
* {@link ColumnReference#matchesQualifier(byte[])}, then an empty scanner will be returned. This
* is because it doesn't make sense to build index updates when there is no change in the table
* state for any of the columns you are indexing.
* <p>
* <i>NOTE:</i> This method should <b>not</b> be used during
* {@link IndexCodec#getIndexDeletes(TableState, BatchState)} as the pending update will not yet have been
* applied - you are merely attempting to cleanup the current state and therefore do <i>not</i>
* need to track the indexed columns.
* <p>
* As a side-effect, we update a timestamp for the next-most-recent timestamp for the columns you
* request - you will never see a column with the timestamp we are tracking, but the next oldest
* timestamp for that column.
* @param indexedColumns the columns to that will be indexed
* @param ignoreNewerMutations ignore mutations newer than m when determining current state. Useful
* when replaying mutation state for partial index rebuild where writes succeeded to the data
* table, but not to the index table.
* @param indexMetaData TODO
* @return an iterator over the columns and the {@link IndexUpdate} that should be passed back to
* the builder. Even if no update is necessary for the requested columns, you still need
* to return the {@link IndexUpdate}, just don't set the update for the
* {@link IndexUpdate}.
* @throws IOException
*/
public Pair<Scanner, IndexUpdate> getIndexedColumnsTableState(Collection<? extends ColumnReference> indexedColumns, boolean ignoreNewerMutations, boolean returnNullScannerIfRowNotFound, IndexMetaData indexMetaData) throws IOException {
ensureLocalStateInitialized(indexedColumns, ignoreNewerMutations, indexMetaData);
// filter out things with a newer timestamp and track the column references to which it applies
ColumnTracker tracker = new ColumnTracker(indexedColumns);
synchronized (this.trackedColumns) {
// we haven't seen this set of columns before, so we need to create a new tracker
if (!this.trackedColumns.contains(tracker)) {
this.trackedColumns.add(tracker);
}
}
Scanner scanner = this.scannerBuilder.buildIndexedColumnScanner(indexedColumns, tracker, ts, returnNullScannerIfRowNotFound);
return new Pair<Scanner, IndexUpdate>(scanner, new IndexUpdate(tracker));
}
Aggregations