Search in sources :

Example 1 with StopWatch

use of org.apache.tez.util.StopWatch in project tez by apache.

the class TezUtilsInternal method compressBytes.

// 
// public static void addUserSpecifiedTezConfiguration(String baseDir, Configuration conf) throws
// IOException {
// FileInputStream confPBBinaryStream = null;
// ConfigurationProto.Builder confProtoBuilder = ConfigurationProto.newBuilder();
// try {
// confPBBinaryStream =
// new FileInputStream(new File(baseDir, TezConstants.TEZ_PB_BINARY_CONF_NAME));
// confProtoBuilder.mergeFrom(confPBBinaryStream);
// } finally {
// if (confPBBinaryStream != null) {
// confPBBinaryStream.close();
// }
// }
// 
// ConfigurationProto confProto = confProtoBuilder.build();
// 
// List<PlanKeyValuePair> kvPairList = confProto.getConfKeyValuesList();
// if (kvPairList != null && !kvPairList.isEmpty()) {
// for (PlanKeyValuePair kvPair : kvPairList) {
// conf.set(kvPair.getKey(), kvPair.getValue());
// }
// }
// }
public static byte[] compressBytes(byte[] inBytes) throws IOException {
    StopWatch sw = new StopWatch().start();
    byte[] compressed = compressBytesInflateDeflate(inBytes);
    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("UncompressedSize: " + inBytes.length + ", CompressedSize: " + compressed.length + ", CompressTime: " + sw.now(TimeUnit.MILLISECONDS));
    }
    return compressed;
}
Also used : StopWatch(org.apache.tez.util.StopWatch)

Example 2 with StopWatch

use of org.apache.tez.util.StopWatch in project tez by apache.

the class PipelinedSorter method sort.

public void sort() throws IOException {
    SortSpan newSpan = span.next();
    if (newSpan == null) {
        // avoid sort/spill of empty span
        StopWatch stopWatch = new StopWatch();
        stopWatch.start();
        // sort in the same thread, do not wait for the thread pool
        merger.add(span.sort(sorter));
        boolean ret = spill(true);
        stopWatch.stop();
        if (LOG.isDebugEnabled()) {
            LOG.debug(outputContext.getDestinationVertexName() + ": Time taken for spill " + (stopWatch.now(TimeUnit.MILLISECONDS)) + " ms");
        }
        if (pipelinedShuffle && ret) {
            sendPipelinedShuffleEvents();
        }
        // safe to reset bufferIndex to 0;
        bufferIndex = 0;
        int items = 1024 * 1024;
        int perItem = 16;
        if (span.length() != 0) {
            items = span.length();
            perItem = span.kvbuffer.limit() / items;
            items = (int) ((span.capacity) / (METASIZE + perItem));
            if (items > 1024 * 1024) {
                // our goal is to have 1M splits and sort early
                items = 1024 * 1024;
            }
        }
        Preconditions.checkArgument(buffers.get(bufferIndex) != null, "block should not be empty");
        // TODO: fix per item being passed.
        span = new SortSpan((ByteBuffer) buffers.get(bufferIndex).clear(), (1024 * 1024), perItem, ConfigUtils.getIntermediateOutputKeyComparator(this.conf));
    } else {
        // queue up the sort
        SortTask task = new SortTask(span, sorter);
        LOG.debug("Submitting span={} for sort", span.toString());
        Future<SpanIterator> future = sortmaster.submit(task);
        merger.add(future);
        span = newSpan;
    }
    valSerializer.open(span.out);
    keySerializer.open(span.out);
}
Also used : ByteBuffer(java.nio.ByteBuffer) StopWatch(org.apache.tez.util.StopWatch)

Example 3 with StopWatch

use of org.apache.tez.util.StopWatch in project tez by apache.

the class MRInputAMSplitGenerator method initialize.

@Override
public List<Event> initialize() throws Exception {
    StopWatch sw = new StopWatch().start();
    MRInputUserPayloadProto userPayloadProto = MRInputHelpers.parseMRInputPayload(getContext().getInputUserPayload());
    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Time to parse MRInput payload into prot: " + sw.now(TimeUnit.MILLISECONDS));
    }
    sw.reset().start();
    Configuration conf = TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes());
    sendSerializedEvents = conf.getBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Emitting serialized splits: " + sendSerializedEvents + " for input " + getContext().getInputName());
        LOG.debug("Time converting ByteString to configuration: " + sw.now(TimeUnit.MILLISECONDS));
    }
    sw.reset().start();
    int totalResource = getContext().getTotalAvailableResource().getMemory();
    int taskResource = getContext().getVertexTaskResource().getMemory();
    float waves = conf.getFloat(TezSplitGrouper.TEZ_GROUPING_SPLIT_WAVES, TezSplitGrouper.TEZ_GROUPING_SPLIT_WAVES_DEFAULT);
    int numTasks = (int) ((totalResource * waves) / taskResource);
    boolean groupSplits = userPayloadProto.getGroupingEnabled();
    boolean sortSplits = userPayloadProto.getSortSplitsEnabled();
    LOG.info("Input " + getContext().getInputName() + " asking for " + numTasks + " tasks. Headroom: " + totalResource + ". Task Resource: " + taskResource + ". waves: " + waves + ". groupingEnabled: " + groupSplits + ". SortSplitsEnabled: " + sortSplits);
    // Read all credentials into the credentials instance stored in JobConf.
    JobConf jobConf = new JobConf(conf);
    jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());
    InputSplitInfoMem inputSplitInfo = null;
    inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, groupSplits, sortSplits, groupSplits ? numTasks : 0);
    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Time to create splits to mem: " + sw.now(TimeUnit.MILLISECONDS));
    }
    List<Event> events = Lists.newArrayListWithCapacity(inputSplitInfo.getNumTasks() + 1);
    InputConfigureVertexTasksEvent configureVertexEvent = InputConfigureVertexTasksEvent.create(inputSplitInfo.getNumTasks(), VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()), InputSpecUpdate.getDefaultSinglePhysicalInputSpecUpdate());
    events.add(configureVertexEvent);
    if (sendSerializedEvents) {
        MRSplitsProto splitsProto = inputSplitInfo.getSplitsProto();
        int count = 0;
        for (MRSplitProto mrSplit : splitsProto.getSplitsList()) {
            // Unnecessary array copy, can be avoided by using ByteBuffer instead of a raw array.
            InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count++, mrSplit.toByteString().asReadOnlyByteBuffer());
            events.add(diEvent);
        }
    } else {
        int count = 0;
        if (inputSplitInfo.holdsNewFormatSplits()) {
            for (org.apache.hadoop.mapreduce.InputSplit split : inputSplitInfo.getNewFormatSplits()) {
                InputDataInformationEvent diEvent = InputDataInformationEvent.createWithObjectPayload(count++, split);
                events.add(diEvent);
            }
        } else {
            for (org.apache.hadoop.mapred.InputSplit split : inputSplitInfo.getOldFormatSplits()) {
                InputDataInformationEvent diEvent = InputDataInformationEvent.createWithObjectPayload(count++, split);
                events.add(diEvent);
            }
        }
    }
    return events;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) StopWatch(org.apache.tez.util.StopWatch) MRSplitsProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto) InputSplitInfoMem(org.apache.tez.mapreduce.hadoop.InputSplitInfoMem) MRInputUserPayloadProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto) InputInitializerEvent(org.apache.tez.runtime.api.events.InputInitializerEvent) Event(org.apache.tez.runtime.api.Event) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) JobConf(org.apache.hadoop.mapred.JobConf) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 4 with StopWatch

use of org.apache.tez.util.StopWatch in project hive by apache.

the class TezJobMonitor method monitorExecution.

public int monitorExecution() {
    boolean done = false;
    boolean success = false;
    int failedCounter = 0;
    final StopWatch failureTimer = new StopWatch();
    int rc = 0;
    DAGStatus status = null;
    Map<String, Progress> vertexProgressMap = null;
    long monitorStartTime = System.currentTimeMillis();
    synchronized (shutdownList) {
        shutdownList.add(dagClient);
    }
    perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
    perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
    DAGStatus.State lastState = null;
    boolean running = false;
    long checkInterval = HiveConf.getTimeVar(hiveConf, HiveConf.ConfVars.TEZ_DAG_STATUS_CHECK_INTERVAL, TimeUnit.MILLISECONDS);
    WmContext wmContext = null;
    while (true) {
        try {
            if (context != null) {
                context.checkHeartbeaterLockException();
            }
            wmContext = context.getWmContext();
            EnumSet<StatusGetOpts> opts = null;
            if (wmContext != null) {
                Set<String> desiredCounters = wmContext.getSubscribedCounters();
                if (desiredCounters != null && !desiredCounters.isEmpty()) {
                    opts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
                }
            }
            status = dagClient.getDAGStatus(opts, checkInterval);
            vertexProgressMap = status.getVertexProgress();
            List<String> vertexNames = vertexProgressMap.keySet().stream().map(k -> k.replaceAll(" ", "_")).collect(Collectors.toList());
            if (wmContext != null) {
                Set<String> desiredCounters = wmContext.getSubscribedCounters();
                TezCounters dagCounters = status.getDAGCounters();
                // if initial counters exists, merge it with dag counters to get aggregated view
                TezCounters mergedCounters = counters == null ? dagCounters : Utils.mergeTezCounters(dagCounters, counters);
                if (mergedCounters != null && desiredCounters != null && !desiredCounters.isEmpty()) {
                    Map<String, Long> currentCounters = getCounterValues(mergedCounters, vertexNames, vertexProgressMap, desiredCounters, done);
                    LOG.debug("Requested DAG status. checkInterval: {}. currentCounters: {}", checkInterval, currentCounters);
                    wmContext.setCurrentCounters(currentCounters);
                }
            }
            DAGStatus.State state = status.getState();
            // AM is responsive again (recovery?)
            failedCounter = 0;
            failureTimer.reset();
            if (state != lastState || state == RUNNING) {
                lastState = state;
                switch(state) {
                    case SUBMITTED:
                        console.printInfo("Status: Submitted");
                        break;
                    case INITING:
                        console.printInfo("Status: Initializing");
                        this.executionStartTime = System.currentTimeMillis();
                        break;
                    case RUNNING:
                        if (!running) {
                            perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
                            console.printInfo("Status: Running (" + dagClient.getExecutionContext() + ")\n");
                            this.executionStartTime = System.currentTimeMillis();
                            running = true;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        break;
                    case SUCCEEDED:
                        if (!running) {
                            this.executionStartTime = monitorStartTime;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        success = true;
                        running = false;
                        done = true;
                        break;
                    case KILLED:
                        if (!running) {
                            this.executionStartTime = monitorStartTime;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        console.printInfo("Status: Killed");
                        running = false;
                        done = true;
                        rc = 1;
                        break;
                    case FAILED:
                    case ERROR:
                        if (!running) {
                            this.executionStartTime = monitorStartTime;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        console.printError("Status: Failed");
                        running = false;
                        done = true;
                        rc = 2;
                        break;
                }
            }
            if (wmContext != null && done) {
                wmContext.setQueryCompleted(true);
            }
        } catch (Exception e) {
            console.printInfo("Exception: " + e.getMessage());
            boolean isInterrupted = hasInterruptedException(e);
            if (failedCounter == 0) {
                failureTimer.reset();
                failureTimer.start();
            }
            if (isInterrupted || (++failedCounter >= MAX_RETRY_FAILURES && failureTimer.now(TimeUnit.MILLISECONDS) > MAX_RETRY_INTERVAL)) {
                try {
                    if (isInterrupted) {
                        console.printInfo("Killing DAG...");
                    } else {
                        console.printInfo(String.format("Killing DAG... after %d seconds", failureTimer.now(TimeUnit.SECONDS)));
                    }
                    dagClient.tryKillDAG();
                } catch (IOException | TezException tezException) {
                // best effort
                }
                console.printError("Execution has failed. stack trace: " + ExceptionUtils.getStackTrace(e));
                rc = 1;
                done = true;
            } else {
                console.printInfo("Retrying...");
            }
            if (wmContext != null && done) {
                wmContext.setQueryCompleted(true);
            }
        } finally {
            if (done) {
                if (wmContext != null && done) {
                    wmContext.setQueryCompleted(true);
                }
                if (rc != 0 && status != null) {
                    for (String diag : status.getDiagnostics()) {
                        console.printError(diag);
                        diagnostics.append(diag);
                    }
                }
                synchronized (shutdownList) {
                    shutdownList.remove(dagClient);
                }
                break;
            }
        }
    }
    perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
    printSummary(success, vertexProgressMap);
    return rc;
}
Also used : DAGClient(org.apache.tez.dag.api.client.DAGClient) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) TezCounter(org.apache.tez.common.counters.TezCounter) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) InterruptedIOException(java.io.InterruptedIOException) TimeCounterLimit(org.apache.hadoop.hive.ql.wm.TimeCounterLimit) VertexCounterLimit(org.apache.hadoop.hive.ql.wm.VertexCounterLimit) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) ProgressMonitor(org.apache.hadoop.hive.common.log.ProgressMonitor) Map(java.util.Map) InPlaceUpdate(org.apache.hadoop.hive.common.log.InPlaceUpdate) Context(org.apache.hadoop.hive.ql.Context) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) CounterGroup(org.apache.tez.common.counters.CounterGroup) LinkedList(java.util.LinkedList) EnumSet(java.util.EnumSet) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) Progress(org.apache.tez.dag.api.client.Progress) Logger(org.slf4j.Logger) StringWriter(java.io.StringWriter) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Set(java.util.Set) StopWatch(org.apache.tez.util.StopWatch) IOException(java.io.IOException) DAG(org.apache.tez.dag.api.DAG) Collectors(java.util.stream.Collectors) SessionState(org.apache.hadoop.hive.ql.session.SessionState) RUNNING(org.apache.tez.dag.api.client.DAGStatus.State.RUNNING) ShutdownHookManager(org.apache.hive.common.util.ShutdownHookManager) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) TezException(org.apache.tez.dag.api.TezException) TimeUnit(java.util.concurrent.TimeUnit) TezCounters(org.apache.tez.common.counters.TezCounters) List(java.util.List) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) Utils(org.apache.hadoop.hive.ql.exec.tez.Utils) Preconditions(com.google.common.base.Preconditions) TezSessionPoolManager(org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager) ExceptionUtils(org.apache.commons.lang3.exception.ExceptionUtils) Progress(org.apache.tez.dag.api.client.Progress) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) TezCounters(org.apache.tez.common.counters.TezCounters) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) TezException(org.apache.tez.dag.api.TezException) StopWatch(org.apache.tez.util.StopWatch) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) DAGStatus(org.apache.tez.dag.api.client.DAGStatus)

Example 5 with StopWatch

use of org.apache.tez.util.StopWatch in project tez by apache.

the class TestMemoryWithEvents method testMemory.

private void testMemory(DAG dag, boolean sendDMEvents) throws Exception {
    StopWatch stopwatch = new StopWatch();
    stopwatch.start();
    TezConfiguration tezconf = new TezConfiguration(defaultConf);
    MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null, false, false, numThreads, 1000);
    tezClient.start();
    MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
    MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
    mockLauncher.startScheduling(false);
    mockApp.eventsDelegate = new TestMockDAGAppMaster.TestEventsDelegate();
    mockApp.doSleep = false;
    DAGClient dagClient = tezClient.submitDAG(dag);
    mockLauncher.waitTillContainersLaunched();
    mockLauncher.startScheduling(true);
    DAGStatus status = dagClient.waitForCompletion();
    Assert.assertEquals(DAGStatus.State.SUCCEEDED, status.getState());
    checkMemory(dag.getName(), mockApp);
    stopwatch.stop();
    System.out.println("Time taken(ms): " + stopwatch.now(TimeUnit.MILLISECONDS));
    tezClient.stop();
}
Also used : DAGClient(org.apache.tez.dag.api.client.DAGClient) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) MockContainerLauncher(org.apache.tez.dag.app.MockDAGAppMaster.MockContainerLauncher) StopWatch(org.apache.tez.util.StopWatch) TezConfiguration(org.apache.tez.dag.api.TezConfiguration)

Aggregations

StopWatch (org.apache.tez.util.StopWatch)7 Configuration (org.apache.hadoop.conf.Configuration)2 JobConf (org.apache.hadoop.mapred.JobConf)2 DAGClient (org.apache.tez.dag.api.client.DAGClient)2 Preconditions (com.google.common.base.Preconditions)1 IOException (java.io.IOException)1 InterruptedIOException (java.io.InterruptedIOException)1 StringWriter (java.io.StringWriter)1 ByteBuffer (java.nio.ByteBuffer)1 EnumSet (java.util.EnumSet)1 HashMap (java.util.HashMap)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 TimeUnit (java.util.concurrent.TimeUnit)1 Collectors (java.util.stream.Collectors)1 ExceptionUtils (org.apache.commons.lang3.exception.ExceptionUtils)1 InPlaceUpdate (org.apache.hadoop.hive.common.log.InPlaceUpdate)1 ProgressMonitor (org.apache.hadoop.hive.common.log.ProgressMonitor)1