use of org.apache.hadoop.hbase.client.Mutation in project phoenix by apache.
the class IndexUpdateManager method toString.
@Override
public String toString() {
StringBuffer sb = new StringBuffer("Pending Index Updates:\n");
for (Entry<ImmutableBytesPtr, Collection<Mutation>> entry : map.entrySet()) {
String tableName = Bytes.toStringBinary(entry.getKey().get());
sb.append(" Table: '" + tableName + "'\n");
for (Mutation m : entry.getValue()) {
sb.append("\t");
if (shouldBeRemoved(m)) {
sb.append("[REMOVED]");
}
sb.append(m.getClass().getSimpleName() + ":" + ((m instanceof Put) ? m.getTimeStamp() + " " : ""));
sb.append(" row=" + Bytes.toStringBinary(m.getRow()));
sb.append("\n");
if (m.getFamilyCellMap().isEmpty()) {
sb.append("\t\t=== EMPTY ===\n");
}
for (List<Cell> kvs : m.getFamilyCellMap().values()) {
for (Cell kv : kvs) {
sb.append("\t\t" + kv.toString() + "/value=" + Bytes.toStringBinary(kv.getValueArray(), kv.getValueOffset(), kv.getValueLength()));
sb.append("\n");
}
}
}
}
return sb.toString();
}
use of org.apache.hadoop.hbase.client.Mutation in project phoenix by apache.
the class IndexUpdateManager method fixUpCurrentUpdates.
/**
* Fix up the current updates, given the pending mutation.
* @param updates current updates
* @param pendingMutation
*/
protected void fixUpCurrentUpdates(Collection<Mutation> updates, Mutation pendingMutation) {
// need to check for each entry to see if we have a duplicate
Mutation toRemove = null;
Delete pendingDelete = pendingMutation instanceof Delete ? (Delete) pendingMutation : null;
boolean sawRowMatch = false;
for (Mutation stored : updates) {
int compare = pendingMutation.compareTo(stored);
// skip to the right row
if (compare < 0) {
continue;
} else if (compare > 0) {
if (sawRowMatch) {
break;
}
continue;
}
// set that we saw a row match, so any greater row will necessarily be the wrong
sawRowMatch = true;
// skip until we hit the right timestamp
if (stored.getTimeStamp() < pendingMutation.getTimeStamp()) {
continue;
}
if (stored instanceof Delete) {
// we already have a delete for this row, so we are done.
if (pendingDelete != null) {
return;
}
// pending update must be a Put, so we ignore the Put.
// add a marker in the this delete that it has been canceled out already. We need to keep
// the delete around though so we can figure out if other Puts would also be canceled out.
markMutationForRemoval(stored);
return;
}
// otherwise, the stored mutation is a Put. Either way, we want to remove it. If the pending
// update is a delete, we need to remove the entry (no longer applies - covered by the
// delete), or its an older version of the row, so we cover it with the newer.
toRemove = stored;
if (pendingDelete != null) {
// the pending mutation, but we need to mark the mutation for removal later
markMutationForRemoval(pendingMutation);
break;
}
}
if (toRemove != null) {
updates.remove(toRemove);
}
if (pendingMutation != null) {
updates.add(pendingMutation);
}
}
use of org.apache.hadoop.hbase.client.Mutation in project phoenix by apache.
the class RecoveryIndexWriter method resolveTableReferences.
/**
* Convert the passed index updates to {@link HTableInterfaceReference}s.
*
* @param indexUpdates
* from the index builder
* @return pairs that can then be written by an {@link RecoveryIndexWriter}.
*/
@Override
protected Multimap<HTableInterfaceReference, Mutation> resolveTableReferences(Collection<Pair<Mutation, byte[]>> indexUpdates) {
Multimap<HTableInterfaceReference, Mutation> updates = ArrayListMultimap.<HTableInterfaceReference, Mutation>create();
// simple map to make lookups easy while we build the map of tables to create
Map<ImmutableBytesPtr, HTableInterfaceReference> tables = new HashMap<ImmutableBytesPtr, HTableInterfaceReference>(updates.size());
for (Pair<Mutation, byte[]> entry : indexUpdates) {
byte[] tableName = entry.getSecond();
ImmutableBytesPtr ptr = new ImmutableBytesPtr(tableName);
HTableInterfaceReference table = tables.get(ptr);
if (nonExistingTablesList.contains(table)) {
LOG.debug("Edits found for non existing table: " + table.getTableName() + " so skipping it!!");
continue;
}
if (table == null) {
table = new HTableInterfaceReference(ptr);
tables.put(ptr, table);
}
updates.put(table, entry.getFirst());
}
return updates;
}
use of org.apache.hadoop.hbase.client.Mutation in project phoenix by apache.
the class TrackingParallelWriterIndexCommitter method write.
@Override
public void write(Multimap<HTableInterfaceReference, Mutation> toWrite, final boolean allowLocalUpdates) throws MultiIndexWriteFailureException {
Set<Entry<HTableInterfaceReference, Collection<Mutation>>> entries = toWrite.asMap().entrySet();
TaskBatch<Boolean> tasks = new TaskBatch<Boolean>(entries.size());
List<HTableInterfaceReference> tables = new ArrayList<HTableInterfaceReference>(entries.size());
for (Entry<HTableInterfaceReference, Collection<Mutation>> entry : entries) {
// get the mutations for each table. We leak the implementation here a little bit to save
// doing a complete copy over of all the index update for each table.
final List<Mutation> mutations = (List<Mutation>) entry.getValue();
// track each reference so we can get at it easily later, when determing failures
final HTableInterfaceReference tableReference = entry.getKey();
final RegionCoprocessorEnvironment env = this.env;
if (env != null && !allowLocalUpdates && tableReference.getTableName().equals(env.getRegion().getTableDesc().getNameAsString())) {
continue;
}
tables.add(tableReference);
/*
* Write a batch of index updates to an index table. This operation stops (is cancelable) via two
* mechanisms: (1) setting aborted or stopped on the IndexWriter or, (2) interrupting the running thread.
* The former will only work if we are not in the midst of writing the current batch to the table, though we
* do check these status variables before starting and before writing the batch. The latter usage,
* interrupting the thread, will work in the previous situations as was at some points while writing the
* batch, depending on the underlying writer implementation (HTableInterface#batch is blocking, but doesn't
* elaborate when is supports an interrupt).
*/
tasks.add(new Task<Boolean>() {
/**
* Do the actual write to the primary table. We don't need to worry about closing the table because that
* is handled the {@link CachingHTableFactory}.
*/
@SuppressWarnings("deprecation")
@Override
public Boolean call() throws Exception {
HTableInterface table = null;
try {
// this may have been queued, but there was an abort/stop so we try to early exit
throwFailureIfDone();
if (allowLocalUpdates && env != null && tableReference.getTableName().equals(env.getRegion().getTableDesc().getNameAsString())) {
try {
throwFailureIfDone();
IndexUtil.writeLocalUpdates(env.getRegion(), mutations, true);
return Boolean.TRUE;
} catch (IOException ignord) {
// when it's failed we fall back to the standard & slow way
if (LOG.isTraceEnabled()) {
LOG.trace("indexRegion.batchMutate failed and fall back to HTable.batch(). Got error=" + ignord);
}
}
}
if (LOG.isTraceEnabled()) {
LOG.trace("Writing index update:" + mutations + " to table: " + tableReference);
}
table = factory.getTable(tableReference.get());
throwFailureIfDone();
table.batch(mutations);
} catch (InterruptedException e) {
// reset the interrupt status on the thread
Thread.currentThread().interrupt();
throw e;
} catch (Exception e) {
throw e;
} finally {
if (table != null) {
table.close();
}
}
return Boolean.TRUE;
}
private void throwFailureIfDone() throws SingleIndexWriteFailureException {
if (stopped.isStopped() || abortable.isAborted() || Thread.currentThread().isInterrupted()) {
throw new SingleIndexWriteFailureException("Pool closed, not attempting to write to the index!", null);
}
}
});
}
List<Boolean> results = null;
try {
LOG.debug("Waiting on index update tasks to complete...");
results = this.pool.submitUninterruptible(tasks);
} catch (ExecutionException e) {
throw new RuntimeException("Should not fail on the results while using a WaitForCompletionTaskRunner", e);
} catch (EarlyExitFailure e) {
throw new RuntimeException("Stopped while waiting for batch, quiting!", e);
}
// track the failures. We only ever access this on return from our calls, so no extra
// synchronization is needed. We could update all the failures as we find them, but that add a
// lot of locking overhead, and just doing the copy later is about as efficient.
List<HTableInterfaceReference> failures = new ArrayList<HTableInterfaceReference>();
int index = 0;
for (Boolean result : results) {
// there was a failure
if (result == null) {
// we know which table failed by the index of the result
failures.add(tables.get(index));
}
index++;
}
// if any of the tasks failed, then we need to propagate the failure
if (failures.size() > 0) {
// make the list unmodifiable to avoid any more synchronization concerns
throw new MultiIndexWriteFailureException(Collections.unmodifiableList(failures));
}
return;
}
use of org.apache.hadoop.hbase.client.Mutation in project phoenix by apache.
the class NonTxIndexBuilder method addCurrentStateMutationsForBatch.
/**
* Add the necessary mutations for the pending batch on the local state. Handles rolling up through history to
* determine the index changes after applying the batch (for the case where the batch is back in time).
*
* @param updateMap
* to update with index mutations
* @param state
* current state of the table
* @param indexMetaData TODO
* @param batch
* to apply to the current state
* @return the minimum timestamp across all index columns requested. If {@link ColumnTracker#isNewestTime(long)}
* returns <tt>true</tt> on the returned timestamp, we know that this <i>was not a back-in-time update</i>.
* @throws IOException
*/
private long addCurrentStateMutationsForBatch(IndexUpdateManager updateMap, LocalTableState state, IndexMetaData indexMetaData) throws IOException {
// get the index updates for this current batch
Iterable<IndexUpdate> upserts = codec.getIndexUpserts(state, indexMetaData);
state.resetTrackedColumns();
/*
* go through all the pending updates. If we are sure that all the entries are the latest timestamp, we can just
* add the index updates and move on. However, if there are columns that we skip past (based on the timestamp of
* the batch), we need to roll back up the history. Regardless of whether or not they are the latest timestamp,
* the entries here are going to be correct for the current batch timestamp, so we add them to the updates. The
* only thing we really care about it if we need to roll up the history and fix it as we go.
*/
// timestamp of the next update we need to track
long minTs = ColumnTracker.NO_NEWER_PRIMARY_TABLE_ENTRY_TIMESTAMP;
List<IndexedColumnGroup> columnHints = new ArrayList<IndexedColumnGroup>();
for (IndexUpdate update : upserts) {
// this is the one bit where we check the timestamps
final ColumnTracker tracker = update.getIndexedColumns();
long trackerTs = tracker.getTS();
// update the next min TS we need to track
if (trackerTs < minTs) {
minTs = tracker.getTS();
}
// track index hints for the next round. Hint if we need an update for that column for the
// next timestamp. These columns clearly won't need to update as we go through time as they
// already match the most recent possible thing.
boolean needsCleanup = false;
if (tracker.hasNewerTimestamps()) {
columnHints.add(tracker);
// this update also needs to be cleaned up at the next timestamp because it not the latest.
needsCleanup = true;
}
// only make the put if the index update has been setup
if (update.isValid()) {
byte[] table = update.getTableName();
Mutation mutation = update.getUpdate();
updateMap.addIndexUpdate(table, mutation);
// only make the cleanup if we made a put and need cleanup
if (needsCleanup) {
// there is a TS for the interested columns that is greater than the columns in the
// put. Therefore, we need to issue a delete at the same timestamp
Delete d = new Delete(mutation.getRow());
d.setTimestamp(tracker.getTS());
updateMap.addIndexUpdate(table, d);
}
}
}
return minTs;
}
Aggregations