Search in sources :

Example 1 with Span

use of org.apache.htrace.Span in project hbase by apache.

the class AsyncFSWAL method appendAndSync.

private void appendAndSync() {
    final AsyncWriter writer = this.writer;
    // maybe a sync request is not queued when we issue a sync, so check here to see if we could
    // finish some.
    finishSync(false);
    long newHighestProcessedAppendTxid = -1L;
    for (Iterator<FSWALEntry> iter = toWriteAppends.iterator(); iter.hasNext(); ) {
        FSWALEntry entry = iter.next();
        boolean appended;
        Span span = entry.detachSpan();
        // the span maybe null if this is a retry after rolling.
        if (span != null) {
            TraceScope scope = Trace.continueSpan(span);
            try {
                appended = append(writer, entry);
            } catch (IOException e) {
                throw new AssertionError("should not happen", e);
            } finally {
                assert scope == NullScope.INSTANCE || !scope.isDetached();
                // append scope is complete
                scope.close();
            }
        } else {
            try {
                appended = append(writer, entry);
            } catch (IOException e) {
                throw new AssertionError("should not happen", e);
            }
        }
        newHighestProcessedAppendTxid = entry.getTxid();
        iter.remove();
        if (appended) {
            unackedAppends.addLast(entry);
            if (writer.getLength() - fileLengthAtLastSync >= batchSize) {
                break;
            }
        }
    }
    // otherwise, use the previous transaction id.
    if (newHighestProcessedAppendTxid > 0) {
        highestProcessedAppendTxid = newHighestProcessedAppendTxid;
    } else {
        newHighestProcessedAppendTxid = highestProcessedAppendTxid;
    }
    if (writer.getLength() - fileLengthAtLastSync >= batchSize) {
        // sync because buffer size limit.
        sync(writer);
        return;
    }
    if (writer.getLength() == fileLengthAtLastSync) {
        // stamped some region sequence id.
        if (unackedAppends.isEmpty()) {
            highestSyncedTxid.set(highestProcessedAppendTxid);
            finishSync(false);
            trySetReadyForRolling();
        }
        return;
    }
    // we have some unsynced data but haven't reached the batch size yet
    if (!syncFutures.isEmpty() && syncFutures.last().getTxid() > highestProcessedAppendTxidAtLastSync) {
        // we have at least one sync request
        sync(writer);
    }
}
Also used : AsyncWriter(org.apache.hadoop.hbase.wal.WALProvider.AsyncWriter) TraceScope(org.apache.htrace.TraceScope) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) Span(org.apache.htrace.Span)

Example 2 with Span

use of org.apache.htrace.Span in project hbase by apache.

the class FSWALEntry method detachSpan.

Span detachSpan() {
    Span span = this.span;
    this.span = null;
    return span;
}
Also used : Span(org.apache.htrace.Span)

Example 3 with Span

use of org.apache.htrace.Span in project phoenix by apache.

the class BaseTracingTestIT method createNewSpan.

protected Span createNewSpan(long traceid, long parentid, long spanid, String description, long startTime, long endTime, String processid, String... tags) {
    Span span = new MilliSpan.Builder().description(description).traceId(traceid).parents(new long[] { parentid }).spanId(spanid).processId(processid).begin(startTime).end(endTime).build();
    int tagCount = 0;
    for (String annotation : tags) {
        span.addKVAnnotation((Integer.toString(tagCount++)).getBytes(), annotation.getBytes());
    }
    return span;
}
Also used : Span(org.apache.htrace.Span) MilliSpan(org.apache.htrace.impl.MilliSpan)

Example 4 with Span

use of org.apache.htrace.Span in project phoenix by apache.

the class MutationState method send.

@SuppressWarnings("deprecation")
private void send(Iterator<TableRef> tableRefIterator) throws SQLException {
    int i = 0;
    long[] serverTimeStamps = null;
    boolean sendAll = false;
    if (tableRefIterator == null) {
        serverTimeStamps = validateAll();
        tableRefIterator = mutations.keySet().iterator();
        sendAll = true;
    }
    Map<ImmutableBytesPtr, RowMutationState> valuesMap;
    List<TableRef> txTableRefs = Lists.newArrayListWithExpectedSize(mutations.size());
    Map<TableInfo, List<Mutation>> physicalTableMutationMap = Maps.newLinkedHashMap();
    // add tracing for this operation
    try (TraceScope trace = Tracing.startNewSpan(connection, "Committing mutations to tables")) {
        Span span = trace.getSpan();
        ImmutableBytesWritable indexMetaDataPtr = new ImmutableBytesWritable();
        boolean isTransactional;
        while (tableRefIterator.hasNext()) {
            // at this point we are going through mutations for each table
            final TableRef tableRef = tableRefIterator.next();
            valuesMap = mutations.get(tableRef);
            if (valuesMap == null || valuesMap.isEmpty()) {
                continue;
            }
            // Validate as we go if transactional since we can undo if a problem occurs (which is unlikely)
            long serverTimestamp = serverTimeStamps == null ? validate(tableRef, valuesMap) : serverTimeStamps[i++];
            final PTable table = tableRef.getTable();
            Iterator<Pair<PName, List<Mutation>>> mutationsIterator = addRowMutations(tableRef, valuesMap, serverTimestamp, false, sendAll);
            // build map from physical table to mutation list
            boolean isDataTable = true;
            while (mutationsIterator.hasNext()) {
                Pair<PName, List<Mutation>> pair = mutationsIterator.next();
                PName hTableName = pair.getFirst();
                List<Mutation> mutationList = pair.getSecond();
                TableInfo tableInfo = new TableInfo(isDataTable, hTableName, tableRef);
                List<Mutation> oldMutationList = physicalTableMutationMap.put(tableInfo, mutationList);
                if (oldMutationList != null)
                    mutationList.addAll(0, oldMutationList);
                isDataTable = false;
            }
            // committed in the event of a failure.
            if (table.isTransactional()) {
                addUncommittedStatementIndexes(valuesMap.values());
                if (txMutations.isEmpty()) {
                    txMutations = Maps.newHashMapWithExpectedSize(mutations.size());
                }
                // Keep all mutations we've encountered until a commit or rollback.
                // This is not ideal, but there's not good way to get the values back
                // in the event that we need to replay the commit.
                // Copy TableRef so we have the original PTable and know when the
                // indexes have changed.
                joinMutationState(new TableRef(tableRef), valuesMap, txMutations);
            }
        }
        long serverTimestamp = HConstants.LATEST_TIMESTAMP;
        Iterator<Entry<TableInfo, List<Mutation>>> mutationsIterator = physicalTableMutationMap.entrySet().iterator();
        while (mutationsIterator.hasNext()) {
            Entry<TableInfo, List<Mutation>> pair = mutationsIterator.next();
            TableInfo tableInfo = pair.getKey();
            byte[] htableName = tableInfo.getHTableName().getBytes();
            List<Mutation> mutationList = pair.getValue();
            //create a span per target table
            //TODO maybe we can be smarter about the table name to string here?
            Span child = Tracing.child(span, "Writing mutation batch for table: " + Bytes.toString(htableName));
            int retryCount = 0;
            boolean shouldRetry = false;
            do {
                TableRef origTableRef = tableInfo.getOrigTableRef();
                PTable table = origTableRef.getTable();
                table.getIndexMaintainers(indexMetaDataPtr, connection);
                final ServerCache cache = tableInfo.isDataTable() ? setMetaDataOnMutations(origTableRef, mutationList, indexMetaDataPtr) : null;
                // If we haven't retried yet, retry for this case only, as it's possible that
                // a split will occur after we send the index metadata cache to all known
                // region servers.
                shouldRetry = cache != null;
                SQLException sqlE = null;
                HTableInterface hTable = connection.getQueryServices().getTable(htableName);
                try {
                    if (table.isTransactional()) {
                        // Track tables to which we've sent uncommitted data
                        txTableRefs.add(origTableRef);
                        addDMLFence(table);
                        uncommittedPhysicalNames.add(table.getPhysicalName().getString());
                        // rollback
                        if (!table.getIndexes().isEmpty()) {
                            hTable = new MetaDataAwareHTable(hTable, origTableRef);
                        }
                        TransactionAwareHTable txnAware = TransactionUtil.getTransactionAwareHTable(hTable, table.isImmutableRows());
                        // during a commit), as we don't need conflict detection for these.
                        if (tableInfo.isDataTable()) {
                            // Even for immutable, we need to do this so that an abort has the state
                            // necessary to generate the rows to delete.
                            addTransactionParticipant(txnAware);
                        } else {
                            txnAware.startTx(getTransaction());
                        }
                        hTable = txnAware;
                    }
                    long numMutations = mutationList.size();
                    GLOBAL_MUTATION_BATCH_SIZE.update(numMutations);
                    long startTime = System.currentTimeMillis();
                    child.addTimelineAnnotation("Attempt " + retryCount);
                    List<List<Mutation>> mutationBatchList = getMutationBatchList(batchSize, batchSizeBytes, mutationList);
                    for (List<Mutation> mutationBatch : mutationBatchList) {
                        hTable.batch(mutationBatch);
                        batchCount++;
                    }
                    if (logger.isDebugEnabled())
                        logger.debug("Sent batch of " + numMutations + " for " + Bytes.toString(htableName));
                    child.stop();
                    child.stop();
                    shouldRetry = false;
                    long mutationCommitTime = System.currentTimeMillis() - startTime;
                    GLOBAL_MUTATION_COMMIT_TIME.update(mutationCommitTime);
                    long mutationSizeBytes = calculateMutationSize(mutationList);
                    MutationMetric mutationsMetric = new MutationMetric(numMutations, mutationSizeBytes, mutationCommitTime);
                    mutationMetricQueue.addMetricsForTable(Bytes.toString(htableName), mutationsMetric);
                    if (tableInfo.isDataTable()) {
                        numRows -= numMutations;
                    }
                    // Remove batches as we process them
                    mutations.remove(origTableRef);
                } catch (Exception e) {
                    serverTimestamp = ServerUtil.parseServerTimestamp(e);
                    SQLException inferredE = ServerUtil.parseServerExceptionOrNull(e);
                    if (inferredE != null) {
                        if (shouldRetry && retryCount == 0 && inferredE.getErrorCode() == SQLExceptionCode.INDEX_METADATA_NOT_FOUND.getErrorCode()) {
                            // Swallow this exception once, as it's possible that we split after sending the index metadata
                            // and one of the region servers doesn't have it. This will cause it to have it the next go around.
                            // If it fails again, we don't retry.
                            String msg = "Swallowing exception and retrying after clearing meta cache on connection. " + inferredE;
                            logger.warn(LogUtil.addCustomAnnotations(msg, connection));
                            connection.getQueryServices().clearTableRegionCache(htableName);
                            // add a new child span as this one failed
                            child.addTimelineAnnotation(msg);
                            child.stop();
                            child = Tracing.child(span, "Failed batch, attempting retry");
                            continue;
                        }
                        e = inferredE;
                    }
                    // Throw to client an exception that indicates the statements that
                    // were not committed successfully.
                    sqlE = new CommitException(e, getUncommittedStatementIndexes(), serverTimestamp);
                } finally {
                    try {
                        if (cache != null)
                            cache.close();
                    } finally {
                        try {
                            hTable.close();
                        } catch (IOException e) {
                            if (sqlE != null) {
                                sqlE.setNextException(ServerUtil.parseServerException(e));
                            } else {
                                sqlE = ServerUtil.parseServerException(e);
                            }
                        }
                        if (sqlE != null) {
                            throw sqlE;
                        }
                    }
                }
            } while (shouldRetry && retryCount++ < 1);
        }
    }
}
Also used : ServerCache(org.apache.phoenix.cache.ServerCacheClient.ServerCache) SQLException(java.sql.SQLException) MutationMetric(org.apache.phoenix.monitoring.MutationMetricQueue.MutationMetric) HTableInterface(org.apache.hadoop.hbase.client.HTableInterface) Span(org.apache.htrace.Span) PTable(org.apache.phoenix.schema.PTable) Entry(java.util.Map.Entry) List(java.util.List) Pair(org.apache.hadoop.hbase.util.Pair) TransactionAwareHTable(org.apache.tephra.hbase.TransactionAwareHTable) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) ImmutableBytesPtr(org.apache.phoenix.hbase.index.util.ImmutableBytesPtr) TraceScope(org.apache.htrace.TraceScope) IOException(java.io.IOException) TransactionFailureException(org.apache.tephra.TransactionFailureException) IllegalDataException(org.apache.phoenix.schema.IllegalDataException) TimeoutException(java.util.concurrent.TimeoutException) TransactionConflictException(org.apache.tephra.TransactionConflictException) TableNotFoundException(org.apache.phoenix.schema.TableNotFoundException) SQLException(java.sql.SQLException) IOException(java.io.IOException) PName(org.apache.phoenix.schema.PName) Mutation(org.apache.hadoop.hbase.client.Mutation) PTableRef(org.apache.phoenix.schema.PTableRef) TableRef(org.apache.phoenix.schema.TableRef)

Example 5 with Span

use of org.apache.htrace.Span in project phoenix by apache.

the class Indexer method preBatchMutateWithExceptions.

public void preBatchMutateWithExceptions(ObserverContext<RegionCoprocessorEnvironment> c, MiniBatchOperationInProgress<Mutation> miniBatchOp) throws Throwable {
    // first group all the updates for a single row into a single update to be processed
    Map<ImmutableBytesPtr, MultiMutation> mutations = new HashMap<ImmutableBytesPtr, MultiMutation>();
    Durability defaultDurability = Durability.SYNC_WAL;
    if (c.getEnvironment().getRegion() != null) {
        defaultDurability = c.getEnvironment().getRegion().getTableDesc().getDurability();
        defaultDurability = (defaultDurability == Durability.USE_DEFAULT) ? Durability.SYNC_WAL : defaultDurability;
    }
    Durability durability = Durability.SKIP_WAL;
    for (int i = 0; i < miniBatchOp.size(); i++) {
        Mutation m = miniBatchOp.getOperation(i);
        if (this.builder.isAtomicOp(m)) {
            miniBatchOp.setOperationStatus(i, SUCCESS);
            continue;
        }
        // way optimization go though.
        if (this.builder.isEnabled(m)) {
            Durability effectiveDurablity = (m.getDurability() == Durability.USE_DEFAULT) ? defaultDurability : m.getDurability();
            if (effectiveDurablity.ordinal() > durability.ordinal()) {
                durability = effectiveDurablity;
            }
            // add the mutation to the batch set
            ImmutableBytesPtr row = new ImmutableBytesPtr(m.getRow());
            MultiMutation stored = mutations.get(row);
            // we haven't seen this row before, so add it
            if (stored == null) {
                stored = new MultiMutation(row);
                mutations.put(row, stored);
            }
            stored.addAll(m);
        }
    }
    // early exit if it turns out we don't have any edits
    if (mutations.isEmpty()) {
        return;
    }
    // dump all the index updates into a single WAL. They will get combined in the end anyways, so
    // don't worry which one we get
    WALEdit edit = miniBatchOp.getWalEdit(0);
    if (edit == null) {
        edit = new WALEdit();
        miniBatchOp.setWalEdit(0, edit);
    }
    // get the current span, or just use a null-span to avoid a bunch of if statements
    try (TraceScope scope = Trace.startSpan("Starting to build index updates")) {
        Span current = scope.getSpan();
        if (current == null) {
            current = NullSpan.INSTANCE;
        }
        // get the index updates for all elements in this batch
        Collection<Pair<Mutation, byte[]>> indexUpdates = this.builder.getIndexUpdate(miniBatchOp, mutations.values());
        current.addTimelineAnnotation("Built index updates, doing preStep");
        TracingUtils.addAnnotation(current, "index update count", indexUpdates.size());
        byte[] tableName = c.getEnvironment().getRegion().getTableDesc().getTableName().getName();
        Iterator<Pair<Mutation, byte[]>> indexUpdatesItr = indexUpdates.iterator();
        List<Mutation> localUpdates = new ArrayList<Mutation>(indexUpdates.size());
        while (indexUpdatesItr.hasNext()) {
            Pair<Mutation, byte[]> next = indexUpdatesItr.next();
            if (Bytes.compareTo(next.getSecond(), tableName) == 0) {
                localUpdates.add(next.getFirst());
                indexUpdatesItr.remove();
            }
        }
        if (!localUpdates.isEmpty()) {
            miniBatchOp.addOperationsFromCP(0, localUpdates.toArray(new Mutation[localUpdates.size()]));
        }
        // write them, either to WAL or the index tables
        doPre(indexUpdates, edit, durability);
    }
}
Also used : HashMap(java.util.HashMap) ImmutableBytesPtr(org.apache.phoenix.hbase.index.util.ImmutableBytesPtr) TraceScope(org.apache.htrace.TraceScope) ArrayList(java.util.ArrayList) Durability(org.apache.hadoop.hbase.client.Durability) Span(org.apache.htrace.Span) NullSpan(org.apache.phoenix.trace.util.NullSpan) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) Mutation(org.apache.hadoop.hbase.client.Mutation) Pair(org.apache.hadoop.hbase.util.Pair)

Aggregations

Span (org.apache.htrace.Span)15 TraceScope (org.apache.htrace.TraceScope)7 Test (org.junit.Test)7 Connection (java.sql.Connection)4 CountDownLatch (java.util.concurrent.CountDownLatch)4 Pair (org.apache.hadoop.hbase.util.Pair)4 Mutation (org.apache.hadoop.hbase.client.Mutation)3 MilliSpan (org.apache.htrace.impl.MilliSpan)3 PhoenixConnection (org.apache.phoenix.jdbc.PhoenixConnection)3 SpanInfo (org.apache.phoenix.trace.TraceReader.SpanInfo)3 NullSpan (org.apache.phoenix.trace.util.NullSpan)3 IOException (java.io.IOException)2 ImmutableBytesPtr (org.apache.phoenix.hbase.index.util.ImmutableBytesPtr)2 TraceHolder (org.apache.phoenix.trace.TraceReader.TraceHolder)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 InterruptedIOException (java.io.InterruptedIOException)1 SQLException (java.sql.SQLException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1