Search in sources :

Example 41 with Mutation

use of org.apache.hadoop.hbase.client.Mutation in project phoenix by apache.

the class IndexUpdateManager method toString.

@Override
public String toString() {
    StringBuffer sb = new StringBuffer("Pending Index Updates:\n");
    for (Entry<ImmutableBytesPtr, Collection<Mutation>> entry : map.entrySet()) {
        String tableName = Bytes.toStringBinary(entry.getKey().get());
        sb.append("   Table: '" + tableName + "'\n");
        for (Mutation m : entry.getValue()) {
            sb.append("\t");
            if (shouldBeRemoved(m)) {
                sb.append("[REMOVED]");
            }
            sb.append(m.getClass().getSimpleName() + ":" + ((m instanceof Put) ? m.getTimeStamp() + " " : ""));
            sb.append(" row=" + Bytes.toStringBinary(m.getRow()));
            sb.append("\n");
            if (m.getFamilyCellMap().isEmpty()) {
                sb.append("\t\t=== EMPTY ===\n");
            }
            for (List<Cell> kvs : m.getFamilyCellMap().values()) {
                for (Cell kv : kvs) {
                    sb.append("\t\t" + kv.toString() + "/value=" + Bytes.toStringBinary(kv.getValueArray(), kv.getValueOffset(), kv.getValueLength()));
                    sb.append("\n");
                }
            }
        }
    }
    return sb.toString();
}
Also used : ImmutableBytesPtr(org.apache.phoenix.hbase.index.util.ImmutableBytesPtr) Collection(java.util.Collection) Mutation(org.apache.hadoop.hbase.client.Mutation) Cell(org.apache.hadoop.hbase.Cell) Put(org.apache.hadoop.hbase.client.Put)

Example 42 with Mutation

use of org.apache.hadoop.hbase.client.Mutation in project phoenix by apache.

the class IndexUpdateManager method fixUpCurrentUpdates.

/**
   * Fix up the current updates, given the pending mutation.
   * @param updates current updates
   * @param pendingMutation
   */
protected void fixUpCurrentUpdates(Collection<Mutation> updates, Mutation pendingMutation) {
    // need to check for each entry to see if we have a duplicate
    Mutation toRemove = null;
    Delete pendingDelete = pendingMutation instanceof Delete ? (Delete) pendingMutation : null;
    boolean sawRowMatch = false;
    for (Mutation stored : updates) {
        int compare = pendingMutation.compareTo(stored);
        // skip to the right row
        if (compare < 0) {
            continue;
        } else if (compare > 0) {
            if (sawRowMatch) {
                break;
            }
            continue;
        }
        // set that we saw a row match, so any greater row will necessarily be the wrong
        sawRowMatch = true;
        // skip until we hit the right timestamp
        if (stored.getTimeStamp() < pendingMutation.getTimeStamp()) {
            continue;
        }
        if (stored instanceof Delete) {
            // we already have a delete for this row, so we are done.
            if (pendingDelete != null) {
                return;
            }
            // pending update must be a Put, so we ignore the Put.
            // add a marker in the this delete that it has been canceled out already. We need to keep
            // the delete around though so we can figure out if other Puts would also be canceled out.
            markMutationForRemoval(stored);
            return;
        }
        // otherwise, the stored mutation is a Put. Either way, we want to remove it. If the pending
        // update is a delete, we need to remove the entry (no longer applies - covered by the
        // delete), or its an older version of the row, so we cover it with the newer.
        toRemove = stored;
        if (pendingDelete != null) {
            // the pending mutation, but we need to mark the mutation for removal later
            markMutationForRemoval(pendingMutation);
            break;
        }
    }
    if (toRemove != null) {
        updates.remove(toRemove);
    }
    if (pendingMutation != null) {
        updates.add(pendingMutation);
    }
}
Also used : Delete(org.apache.hadoop.hbase.client.Delete) Mutation(org.apache.hadoop.hbase.client.Mutation)

Example 43 with Mutation

use of org.apache.hadoop.hbase.client.Mutation in project phoenix by apache.

the class RecoveryIndexWriter method resolveTableReferences.

/**
     * Convert the passed index updates to {@link HTableInterfaceReference}s.
     * 
     * @param indexUpdates
     *            from the index builder
     * @return pairs that can then be written by an {@link RecoveryIndexWriter}.
     */
@Override
protected Multimap<HTableInterfaceReference, Mutation> resolveTableReferences(Collection<Pair<Mutation, byte[]>> indexUpdates) {
    Multimap<HTableInterfaceReference, Mutation> updates = ArrayListMultimap.<HTableInterfaceReference, Mutation>create();
    // simple map to make lookups easy while we build the map of tables to create
    Map<ImmutableBytesPtr, HTableInterfaceReference> tables = new HashMap<ImmutableBytesPtr, HTableInterfaceReference>(updates.size());
    for (Pair<Mutation, byte[]> entry : indexUpdates) {
        byte[] tableName = entry.getSecond();
        ImmutableBytesPtr ptr = new ImmutableBytesPtr(tableName);
        HTableInterfaceReference table = tables.get(ptr);
        if (nonExistingTablesList.contains(table)) {
            LOG.debug("Edits found for non existing table: " + table.getTableName() + " so skipping it!!");
            continue;
        }
        if (table == null) {
            table = new HTableInterfaceReference(ptr);
            tables.put(ptr, table);
        }
        updates.put(table, entry.getFirst());
    }
    return updates;
}
Also used : HashMap(java.util.HashMap) HTableInterfaceReference(org.apache.phoenix.hbase.index.table.HTableInterfaceReference) ImmutableBytesPtr(org.apache.phoenix.hbase.index.util.ImmutableBytesPtr) Mutation(org.apache.hadoop.hbase.client.Mutation)

Example 44 with Mutation

use of org.apache.hadoop.hbase.client.Mutation in project phoenix by apache.

the class TrackingParallelWriterIndexCommitter method write.

@Override
public void write(Multimap<HTableInterfaceReference, Mutation> toWrite, final boolean allowLocalUpdates) throws MultiIndexWriteFailureException {
    Set<Entry<HTableInterfaceReference, Collection<Mutation>>> entries = toWrite.asMap().entrySet();
    TaskBatch<Boolean> tasks = new TaskBatch<Boolean>(entries.size());
    List<HTableInterfaceReference> tables = new ArrayList<HTableInterfaceReference>(entries.size());
    for (Entry<HTableInterfaceReference, Collection<Mutation>> entry : entries) {
        // get the mutations for each table. We leak the implementation here a little bit to save
        // doing a complete copy over of all the index update for each table.
        final List<Mutation> mutations = (List<Mutation>) entry.getValue();
        // track each reference so we can get at it easily later, when determing failures
        final HTableInterfaceReference tableReference = entry.getKey();
        final RegionCoprocessorEnvironment env = this.env;
        if (env != null && !allowLocalUpdates && tableReference.getTableName().equals(env.getRegion().getTableDesc().getNameAsString())) {
            continue;
        }
        tables.add(tableReference);
        /*
             * Write a batch of index updates to an index table. This operation stops (is cancelable) via two
             * mechanisms: (1) setting aborted or stopped on the IndexWriter or, (2) interrupting the running thread.
             * The former will only work if we are not in the midst of writing the current batch to the table, though we
             * do check these status variables before starting and before writing the batch. The latter usage,
             * interrupting the thread, will work in the previous situations as was at some points while writing the
             * batch, depending on the underlying writer implementation (HTableInterface#batch is blocking, but doesn't
             * elaborate when is supports an interrupt).
             */
        tasks.add(new Task<Boolean>() {

            /**
                 * Do the actual write to the primary table. We don't need to worry about closing the table because that
                 * is handled the {@link CachingHTableFactory}.
                 */
            @SuppressWarnings("deprecation")
            @Override
            public Boolean call() throws Exception {
                HTableInterface table = null;
                try {
                    // this may have been queued, but there was an abort/stop so we try to early exit
                    throwFailureIfDone();
                    if (allowLocalUpdates && env != null && tableReference.getTableName().equals(env.getRegion().getTableDesc().getNameAsString())) {
                        try {
                            throwFailureIfDone();
                            IndexUtil.writeLocalUpdates(env.getRegion(), mutations, true);
                            return Boolean.TRUE;
                        } catch (IOException ignord) {
                            // when it's failed we fall back to the standard & slow way
                            if (LOG.isTraceEnabled()) {
                                LOG.trace("indexRegion.batchMutate failed and fall back to HTable.batch(). Got error=" + ignord);
                            }
                        }
                    }
                    if (LOG.isTraceEnabled()) {
                        LOG.trace("Writing index update:" + mutations + " to table: " + tableReference);
                    }
                    table = factory.getTable(tableReference.get());
                    throwFailureIfDone();
                    table.batch(mutations);
                } catch (InterruptedException e) {
                    // reset the interrupt status on the thread
                    Thread.currentThread().interrupt();
                    throw e;
                } catch (Exception e) {
                    throw e;
                } finally {
                    if (table != null) {
                        table.close();
                    }
                }
                return Boolean.TRUE;
            }

            private void throwFailureIfDone() throws SingleIndexWriteFailureException {
                if (stopped.isStopped() || abortable.isAborted() || Thread.currentThread().isInterrupted()) {
                    throw new SingleIndexWriteFailureException("Pool closed, not attempting to write to the index!", null);
                }
            }
        });
    }
    List<Boolean> results = null;
    try {
        LOG.debug("Waiting on index update tasks to complete...");
        results = this.pool.submitUninterruptible(tasks);
    } catch (ExecutionException e) {
        throw new RuntimeException("Should not fail on the results while using a WaitForCompletionTaskRunner", e);
    } catch (EarlyExitFailure e) {
        throw new RuntimeException("Stopped while waiting for batch, quiting!", e);
    }
    // track the failures. We only ever access this on return from our calls, so no extra
    // synchronization is needed. We could update all the failures as we find them, but that add a
    // lot of locking overhead, and just doing the copy later is about as efficient.
    List<HTableInterfaceReference> failures = new ArrayList<HTableInterfaceReference>();
    int index = 0;
    for (Boolean result : results) {
        // there was a failure
        if (result == null) {
            // we know which table failed by the index of the result
            failures.add(tables.get(index));
        }
        index++;
    }
    // if any of the tasks failed, then we need to propagate the failure
    if (failures.size() > 0) {
        // make the list unmodifiable to avoid any more synchronization concerns
        throw new MultiIndexWriteFailureException(Collections.unmodifiableList(failures));
    }
    return;
}
Also used : ArrayList(java.util.ArrayList) TaskBatch(org.apache.phoenix.hbase.index.parallel.TaskBatch) HTableInterface(org.apache.hadoop.hbase.client.HTableInterface) Entry(java.util.Map.Entry) RegionCoprocessorEnvironment(org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment) ArrayList(java.util.ArrayList) List(java.util.List) ExecutionException(java.util.concurrent.ExecutionException) EarlyExitFailure(org.apache.phoenix.hbase.index.parallel.EarlyExitFailure) IOException(java.io.IOException) MultiIndexWriteFailureException(org.apache.phoenix.hbase.index.exception.MultiIndexWriteFailureException) MultiIndexWriteFailureException(org.apache.phoenix.hbase.index.exception.MultiIndexWriteFailureException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) SingleIndexWriteFailureException(org.apache.phoenix.hbase.index.exception.SingleIndexWriteFailureException) SingleIndexWriteFailureException(org.apache.phoenix.hbase.index.exception.SingleIndexWriteFailureException) HTableInterfaceReference(org.apache.phoenix.hbase.index.table.HTableInterfaceReference) Collection(java.util.Collection) Mutation(org.apache.hadoop.hbase.client.Mutation)

Example 45 with Mutation

use of org.apache.hadoop.hbase.client.Mutation in project phoenix by apache.

the class NonTxIndexBuilder method addCurrentStateMutationsForBatch.

/**
     * Add the necessary mutations for the pending batch on the local state. Handles rolling up through history to
     * determine the index changes after applying the batch (for the case where the batch is back in time).
     * 
     * @param updateMap
     *            to update with index mutations
     * @param state
     *            current state of the table
     * @param indexMetaData TODO
     * @param batch
     *            to apply to the current state
     * @return the minimum timestamp across all index columns requested. If {@link ColumnTracker#isNewestTime(long)}
     *         returns <tt>true</tt> on the returned timestamp, we know that this <i>was not a back-in-time update</i>.
     * @throws IOException
     */
private long addCurrentStateMutationsForBatch(IndexUpdateManager updateMap, LocalTableState state, IndexMetaData indexMetaData) throws IOException {
    // get the index updates for this current batch
    Iterable<IndexUpdate> upserts = codec.getIndexUpserts(state, indexMetaData);
    state.resetTrackedColumns();
    /*
         * go through all the pending updates. If we are sure that all the entries are the latest timestamp, we can just
         * add the index updates and move on. However, if there are columns that we skip past (based on the timestamp of
         * the batch), we need to roll back up the history. Regardless of whether or not they are the latest timestamp,
         * the entries here are going to be correct for the current batch timestamp, so we add them to the updates. The
         * only thing we really care about it if we need to roll up the history and fix it as we go.
         */
    // timestamp of the next update we need to track
    long minTs = ColumnTracker.NO_NEWER_PRIMARY_TABLE_ENTRY_TIMESTAMP;
    List<IndexedColumnGroup> columnHints = new ArrayList<IndexedColumnGroup>();
    for (IndexUpdate update : upserts) {
        // this is the one bit where we check the timestamps
        final ColumnTracker tracker = update.getIndexedColumns();
        long trackerTs = tracker.getTS();
        // update the next min TS we need to track
        if (trackerTs < minTs) {
            minTs = tracker.getTS();
        }
        // track index hints for the next round. Hint if we need an update for that column for the
        // next timestamp. These columns clearly won't need to update as we go through time as they
        // already match the most recent possible thing.
        boolean needsCleanup = false;
        if (tracker.hasNewerTimestamps()) {
            columnHints.add(tracker);
            // this update also needs to be cleaned up at the next timestamp because it not the latest.
            needsCleanup = true;
        }
        // only make the put if the index update has been setup
        if (update.isValid()) {
            byte[] table = update.getTableName();
            Mutation mutation = update.getUpdate();
            updateMap.addIndexUpdate(table, mutation);
            // only make the cleanup if we made a put and need cleanup
            if (needsCleanup) {
                // there is a TS for the interested columns that is greater than the columns in the
                // put. Therefore, we need to issue a delete at the same timestamp
                Delete d = new Delete(mutation.getRow());
                d.setTimestamp(tracker.getTS());
                updateMap.addIndexUpdate(table, d);
            }
        }
    }
    return minTs;
}
Also used : Delete(org.apache.hadoop.hbase.client.Delete) ArrayList(java.util.ArrayList) ColumnTracker(org.apache.phoenix.hbase.index.covered.update.ColumnTracker) Mutation(org.apache.hadoop.hbase.client.Mutation) IndexedColumnGroup(org.apache.phoenix.hbase.index.covered.update.IndexedColumnGroup)

Aggregations

Mutation (org.apache.hadoop.hbase.client.Mutation)139 Put (org.apache.hadoop.hbase.client.Put)53 ArrayList (java.util.ArrayList)46 IOException (java.io.IOException)35 Delete (org.apache.hadoop.hbase.client.Delete)32 ImmutableBytesPtr (org.apache.phoenix.hbase.index.util.ImmutableBytesPtr)31 List (java.util.List)28 Cell (org.apache.hadoop.hbase.Cell)25 Pair (org.apache.hadoop.hbase.util.Pair)23 MetaDataMutationResult (org.apache.phoenix.coprocessor.MetaDataProtocol.MetaDataMutationResult)23 HashMap (java.util.HashMap)19 PTable (org.apache.phoenix.schema.PTable)18 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)17 MetaDataResponse (org.apache.phoenix.coprocessor.generated.MetaDataProtos.MetaDataResponse)15 Region (org.apache.hadoop.hbase.regionserver.Region)14 RowLock (org.apache.hadoop.hbase.regionserver.Region.RowLock)14 Test (org.junit.Test)14 MutationCode (org.apache.phoenix.coprocessor.MetaDataProtocol.MutationCode)13 HTableInterface (org.apache.hadoop.hbase.client.HTableInterface)12 MutationProto (org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto)12