Search in sources :

Example 1 with SimpleQueueSpecificationGenerator

use of co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator in project cdap by caskdata.

the class FlowVerification method verify.

/**
   * Verifies a single {@link FlowSpecification} for a {@link co.cask.cdap.api.flow.Flow}.
   *
   * @param input to be verified
   * @return An instance of {@link VerifyResult} depending of status of verification.
   */
@Override
public VerifyResult verify(ApplicationId appId, final FlowSpecification input) {
    VerifyResult verifyResult = super.verify(appId, input);
    if (!verifyResult.isSuccess()) {
        return verifyResult;
    }
    String flowName = input.getName();
    // Check if there are no flowlets.
    if (input.getFlowlets().isEmpty()) {
        return VerifyResult.failure(Err.Flow.ATLEAST_ONE_FLOWLET, flowName);
    }
    // Check if there no connections.
    if (input.getConnections().isEmpty()) {
        return VerifyResult.failure(Err.Flow.ATLEAST_ONE_CONNECTION, flowName);
    }
    // We go through each Flowlet and verify the flowlets.
    // First collect all source flowlet names
    Set<String> sourceFlowletNames = Sets.newHashSet();
    for (FlowletConnection connection : input.getConnections()) {
        if (connection.getSourceType() == FlowletConnection.Type.FLOWLET) {
            sourceFlowletNames.add(connection.getSourceName());
        }
    }
    for (Map.Entry<String, FlowletDefinition> entry : input.getFlowlets().entrySet()) {
        FlowletDefinition defn = entry.getValue();
        String flowletName = defn.getFlowletSpec().getName();
        // Check if the Flowlet Name is an ID.
        if (!EntityId.isValidId(defn.getFlowletSpec().getName())) {
            return VerifyResult.failure(Err.NOT_AN_ID, flowName + ":" + flowletName);
        }
        // We check if all the dataset names used are ids
        for (String dataSet : defn.getDatasets()) {
            if (!EntityId.isValidDatasetId(dataSet)) {
                return VerifyResult.failure(Err.NOT_AN_ID, flowName + ":" + flowletName + ":" + dataSet);
            }
        }
        // Check if the flowlet has output, it must be appear as source flowlet in at least one connection
        if (entry.getValue().getOutputs().size() > 0 && !sourceFlowletNames.contains(flowletName)) {
            return VerifyResult.failure(Err.Flow.OUTPUT_NOT_CONNECTED, flowName, flowletName);
        }
    }
    // NOTE: We should unify the logic here and the queue spec generation, as they are doing the same thing.
    Table<QueueSpecificationGenerator.Node, String, Set<QueueSpecification>> queueSpecTable = new SimpleQueueSpecificationGenerator(appId).create(input);
    // For all connections, there should be an entry in the table.
    for (FlowletConnection connection : input.getConnections()) {
        QueueSpecificationGenerator.Node node;
        if (connection.getSourceType() == FlowletConnection.Type.FLOWLET) {
            node = new QueueSpecificationGenerator.Node(connection.getSourceType(), connection.getSourceName());
        } else {
            String sourceNamespace = connection.getSourceNamespace() == null ? appId.getNamespace() : connection.getSourceNamespace();
            node = new QueueSpecificationGenerator.Node(connection.getSourceType(), sourceNamespace, connection.getSourceName());
        }
        if (!queueSpecTable.contains(node, connection.getTargetName())) {
            return VerifyResult.failure(Err.Flow.NO_INPUT_FOR_OUTPUT, flowName, connection.getTargetName(), connection.getSourceType(), connection.getSourceName());
        }
    }
    // For each output entity, check for any unconnected output
    for (QueueSpecificationGenerator.Node node : queueSpecTable.rowKeySet()) {
        // For stream output, no need to check
        if (node.getType() == FlowletConnection.Type.STREAM) {
            continue;
        }
        // For all outputs of a flowlet, remove all the matched connected schema, if there is anything left,
        // then it's a incomplete flow connection (has output not connect to any input).
        Multimap<String, Schema> outputs = toMultimap(input.getFlowlets().get(node.getName()).getOutputs());
        for (Map.Entry<String, Set<QueueSpecification>> entry : queueSpecTable.row(node).entrySet()) {
            for (QueueSpecification queueSpec : entry.getValue()) {
                outputs.remove(queueSpec.getQueueName().getSimpleName(), queueSpec.getOutputSchema());
            }
        }
        if (!outputs.isEmpty()) {
            return VerifyResult.failure(Err.Flow.MORE_OUTPUT_NOT_ALLOWED, flowName, node.getType().toString().toLowerCase(), node.getName(), outputs);
        }
    }
    return VerifyResult.success();
}
Also used : Set(java.util.Set) FlowletConnection(co.cask.cdap.api.flow.FlowletConnection) Schema(co.cask.cdap.api.data.schema.Schema) SimpleQueueSpecificationGenerator(co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator) FlowletDefinition(co.cask.cdap.api.flow.FlowletDefinition) QueueSpecificationGenerator(co.cask.cdap.app.queue.QueueSpecificationGenerator) SimpleQueueSpecificationGenerator(co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator) QueueSpecification(co.cask.cdap.app.queue.QueueSpecification) VerifyResult(co.cask.cdap.app.verification.VerifyResult) Map(java.util.Map)

Example 2 with SimpleQueueSpecificationGenerator

use of co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator in project cdap by caskdata.

the class DistributedFlowProgramRunner method getFlowletQueues.

/**
   * Gets the queue configuration of the Flow based on the connections in the given {@link FlowSpecification}.
   */
private Multimap<String, QueueName> getFlowletQueues(ApplicationId appId, FlowSpecification flowSpec) {
    // Generate all queues specifications
    Table<QueueSpecificationGenerator.Node, String, Set<QueueSpecification>> queueSpecs = new SimpleQueueSpecificationGenerator(appId).create(flowSpec);
    // For storing result from flowletId to queue.
    ImmutableSetMultimap.Builder<String, QueueName> resultBuilder = ImmutableSetMultimap.builder();
    // Loop through each flowlet
    for (Map.Entry<String, FlowletDefinition> entry : flowSpec.getFlowlets().entrySet()) {
        String flowletId = entry.getKey();
        // For each queue that the flowlet is a consumer, store the number of instances for this flowlet
        for (QueueSpecification queueSpec : Iterables.concat(queueSpecs.column(flowletId).values())) {
            resultBuilder.put(flowletId, queueSpec.getQueueName());
        }
    }
    return resultBuilder.build();
}
Also used : FlowletDefinition(co.cask.cdap.api.flow.FlowletDefinition) Set(java.util.Set) ImmutableSetMultimap(com.google.common.collect.ImmutableSetMultimap) QueueSpecification(co.cask.cdap.app.queue.QueueSpecification) QueueName(co.cask.cdap.common.queue.QueueName) Map(java.util.Map) SimpleQueueSpecificationGenerator(co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator)

Example 3 with SimpleQueueSpecificationGenerator

use of co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator in project cdap by caskdata.

the class HBaseQueueDebugger method scanQueues.

private void scanQueues(List<NamespaceMeta> namespaceMetas) throws Exception {
    final QueueStatistics totalStats = new QueueStatistics();
    for (NamespaceMeta namespaceMeta : namespaceMetas) {
        final NamespaceId namespaceId = new NamespaceId(namespaceMeta.getName());
        final Collection<ApplicationSpecification> apps = store.getAllApplications(namespaceId);
        for (final ApplicationSpecification app : apps) {
            ApplicationId appId = new ApplicationId(namespaceMeta.getName(), app.getName(), app.getAppVersion());
            Collection<FlowSpecification> flows = app.getFlows().values();
            for (final FlowSpecification flow : flows) {
                final ProgramId flowId = appId.program(ProgramType.FLOW, flow.getName());
                impersonator.doAs(flowId, new Callable<Void>() {

                    @Override
                    public Void call() throws Exception {
                        SimpleQueueSpecificationGenerator queueSpecGenerator = new SimpleQueueSpecificationGenerator(flowId.getParent());
                        Table<QueueSpecificationGenerator.Node, String, Set<QueueSpecification>> table = queueSpecGenerator.create(flow);
                        for (Table.Cell<QueueSpecificationGenerator.Node, String, Set<QueueSpecification>> cell : table.cellSet()) {
                            if (cell.getRowKey().getType() == FlowletConnection.Type.FLOWLET) {
                                for (QueueSpecification queue : cell.getValue()) {
                                    QueueStatistics queueStats = scanQueue(queue.getQueueName(), null);
                                    totalStats.add(queueStats);
                                }
                            }
                        }
                        return null;
                    }
                });
            }
        }
    }
    System.out.printf("Total results for all queues: %s\n", totalStats.getReport(showTxTimestampOnly()));
}
Also used : ApplicationSpecification(co.cask.cdap.api.app.ApplicationSpecification) HTable(org.apache.hadoop.hbase.client.HTable) Table(com.google.common.collect.Table) ProgramId(co.cask.cdap.proto.id.ProgramId) TransactionNotInProgressException(org.apache.tephra.TransactionNotInProgressException) TransactionFailureException(org.apache.tephra.TransactionFailureException) NotFoundException(co.cask.cdap.common.NotFoundException) SimpleQueueSpecificationGenerator(co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator) SimpleQueueSpecificationGenerator(co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator) QueueSpecificationGenerator(co.cask.cdap.app.queue.QueueSpecificationGenerator) FlowSpecification(co.cask.cdap.api.flow.FlowSpecification) NamespaceMeta(co.cask.cdap.proto.NamespaceMeta) NamespaceId(co.cask.cdap.proto.id.NamespaceId) QueueSpecification(co.cask.cdap.app.queue.QueueSpecification) ApplicationId(co.cask.cdap.proto.id.ApplicationId)

Example 4 with SimpleQueueSpecificationGenerator

use of co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator in project cdap by caskdata.

the class FlowQueuePendingCorrector method run.

/**
   * Corrects queue.pending metric for a flowlet.
   */
public void run(FlowId flowId, String producerFlowlet, String consumerFlowlet, String flowletQueue, FlowSpecification flow) throws Exception {
    System.out.println("Running queue.pending correction on flow '" + flowId + "' producerFlowlet '" + producerFlowlet + "' consumerFlowlet '" + consumerFlowlet + "' flowletQueue '" + flowletQueue + "'");
    Map<RunId, ProgramRuntimeService.RuntimeInfo> runtimeInfos = programRuntimeService.list(flowId);
    Preconditions.checkState(runtimeInfos.isEmpty(), "Cannot run tool when flow " + flowId + " is still running");
    SimpleQueueSpecificationGenerator queueSpecGenerator = new SimpleQueueSpecificationGenerator(flowId.getParent());
    Table<QueueSpecificationGenerator.Node, String, Set<QueueSpecification>> table = queueSpecGenerator.create(flow);
    Preconditions.checkArgument(table.contains(QueueSpecificationGenerator.Node.flowlet(producerFlowlet), consumerFlowlet), "Flowlet " + producerFlowlet + " is not emitting to " + consumerFlowlet);
    Set<QueueSpecification> queueSpecs = table.get(QueueSpecificationGenerator.Node.flowlet(producerFlowlet), consumerFlowlet);
    boolean validQueue = false;
    for (QueueSpecification queueSpec : queueSpecs) {
        if (queueSpec.getQueueName().getSimpleName().equals(flowletQueue)) {
            validQueue = true;
            break;
        }
    }
    Preconditions.checkArgument(validQueue, "Queue " + flowletQueue + " does not exist for the given flowlets");
    QueueName queueName = QueueName.fromFlowlet(flowId, producerFlowlet, flowletQueue);
    long consumerGroupId = FlowUtils.generateConsumerGroupId(flowId, consumerFlowlet);
    long correctQueuePendingValue;
    try {
        HBaseQueueDebugger.QueueStatistics stats = queueDebugger.scanQueue(queueName, consumerGroupId);
        correctQueuePendingValue = stats.getUnprocessed() + stats.getProcessedAndNotVisible();
    } catch (NotFoundException e) {
        // OK since flowlet queue exists, but actual queue doesn't exist
        // (e.g. when running upgrade tool from 2.8 to 3.0)
        correctQueuePendingValue = 0;
    }
    Map<String, String> tags = ImmutableMap.<String, String>builder().put(Constants.Metrics.Tag.NAMESPACE, flowId.getNamespace()).put(Constants.Metrics.Tag.APP, flowId.getApplication()).put(Constants.Metrics.Tag.FLOW, flowId.getProgram()).put(Constants.Metrics.Tag.CONSUMER, consumerFlowlet).put(Constants.Metrics.Tag.PRODUCER, producerFlowlet).put(Constants.Metrics.Tag.FLOWLET_QUEUE, flowletQueue).build();
    MetricDataQuery query = new MetricDataQuery(0, 0, Integer.MAX_VALUE, 1, ImmutableMap.of("system.queue.pending", AggregationFunction.SUM), tags, ImmutableList.<String>of(), null);
    Collection<MetricTimeSeries> results = metricStore.query(query);
    long queuePending;
    if (results.isEmpty()) {
        queuePending = 0;
    } else {
        System.out.println("Got results: " + GSON.toJson(results));
        Preconditions.checkState(results.size() == 1);
        List<TimeValue> timeValues = results.iterator().next().getTimeValues();
        Preconditions.checkState(timeValues.size() == 1);
        TimeValue timeValue = timeValues.get(0);
        queuePending = timeValue.getValue();
    }
    metricsCollectionService.startAndWait();
    MetricsContext collector = metricsCollectionService.getContext(tags);
    collector.gauge("queue.pending", correctQueuePendingValue);
    System.out.printf("Adjusted system.queue.pending metric from %d to %d (tags %s)\n", queuePending, correctQueuePendingValue, GSON.toJson(tags));
    // stop will flush the metrics
    metricsCollectionService.stopAndWait();
}
Also used : HBaseQueueDebugger(co.cask.cdap.data.tools.HBaseQueueDebugger) Set(java.util.Set) MetricsContext(co.cask.cdap.api.metrics.MetricsContext) NotFoundException(co.cask.cdap.common.NotFoundException) MetricTimeSeries(co.cask.cdap.api.metrics.MetricTimeSeries) SimpleQueueSpecificationGenerator(co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator) QueueSpecification(co.cask.cdap.app.queue.QueueSpecification) RunId(org.apache.twill.api.RunId) MetricDataQuery(co.cask.cdap.api.metrics.MetricDataQuery) QueueName(co.cask.cdap.common.queue.QueueName) TimeValue(co.cask.cdap.api.dataset.lib.cube.TimeValue)

Example 5 with SimpleQueueSpecificationGenerator

use of co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator in project cdap by caskdata.

the class FlowletProgramRunner method run.

@SuppressWarnings("unchecked")
@Override
public ProgramController run(Program program, ProgramOptions options) {
    BasicFlowletContext flowletContext = null;
    try {
        // Extract and verify parameters
        String flowletName = options.getName();
        int instanceId = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCE_ID, "-1"));
        Preconditions.checkArgument(instanceId >= 0, "Missing instance Id");
        int instanceCount = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCES, "0"));
        Preconditions.checkArgument(instanceCount > 0, "Invalid or missing instance count");
        RunId runId = ProgramRunners.getRunId(options);
        ApplicationSpecification appSpec = program.getApplicationSpecification();
        Preconditions.checkNotNull(appSpec, "Missing application specification.");
        ProgramType processorType = program.getType();
        Preconditions.checkNotNull(processorType, "Missing processor type.");
        Preconditions.checkArgument(processorType == ProgramType.FLOW, "Only FLOW process type is supported.");
        String processorName = program.getName();
        Preconditions.checkNotNull(processorName, "Missing processor name.");
        FlowSpecification flowSpec = appSpec.getFlows().get(processorName);
        FlowletDefinition flowletDef = flowSpec.getFlowlets().get(flowletName);
        Preconditions.checkNotNull(flowletDef, "Definition missing for flowlet \"%s\"", flowletName);
        Class<?> clz = Class.forName(flowletDef.getFlowletSpec().getClassName(), true, program.getClassLoader());
        Preconditions.checkArgument(Flowlet.class.isAssignableFrom(clz), "%s is not a Flowlet.", clz);
        // Setup dataset framework context, if required
        ProgramId programId = program.getId();
        FlowletId flowletId = programId.flowlet(flowletName);
        ProgramRunId run = programId.run(runId);
        ProgramContext programContext = new BasicProgramContext(run, flowletId);
        if (dsFramework instanceof ProgramContextAware) {
            ((ProgramContextAware) dsFramework).setContext(programContext);
        }
        Class<? extends Flowlet> flowletClass = (Class<? extends Flowlet>) clz;
        // Creates flowlet context
        flowletContext = new BasicFlowletContext(program, options, flowletId, instanceId, instanceCount, flowletDef.getDatasets(), flowletDef.getFlowletSpec(), metricsCollectionService, discoveryServiceClient, txClient, dsFramework, secureStore, secureStoreManager, messageService, cConf);
        // Creates tx related objects
        DataFabricFacade dataFabricFacade = dataFabricFacadeFactory.create(program, flowletContext.getDatasetCache());
        if (dataFabricFacade instanceof ProgramContextAware) {
            ((ProgramContextAware) dataFabricFacade).setContext(programContext);
        }
        // Creates QueueSpecification
        Table<Node, String, Set<QueueSpecification>> queueSpecs = new SimpleQueueSpecificationGenerator(new ApplicationId(program.getNamespaceId(), program.getApplicationId())).create(flowSpec);
        Flowlet flowlet = new InstantiatorFactory(false).get(TypeToken.of(flowletClass)).create();
        TypeToken<? extends Flowlet> flowletType = TypeToken.of(flowletClass);
        // Set the context classloader to the cdap classloader. It is needed for the DatumWriterFactory be able
        // to load cdap classes
        Thread.currentThread().setContextClassLoader(FlowletProgramRunner.class.getClassLoader());
        // Inject DataSet, OutputEmitter, Metric fields
        ImmutableList.Builder<ProducerSupplier> queueProducerSupplierBuilder = ImmutableList.builder();
        Reflections.visit(flowlet, flowlet.getClass(), new PropertyFieldSetter(flowletDef.getFlowletSpec().getProperties()), new DataSetFieldSetter(flowletContext), new MetricsFieldSetter(flowletContext.getMetrics()), new OutputEmitterFieldSetter(outputEmitterFactory(flowletContext, flowletName, dataFabricFacade, queueProducerSupplierBuilder, queueSpecs)));
        ImmutableList.Builder<ConsumerSupplier<?>> queueConsumerSupplierBuilder = ImmutableList.builder();
        Collection<ProcessSpecification<?>> processSpecs = createProcessSpecification(flowletContext, flowletType, processMethodFactory(flowlet), processSpecificationFactory(flowletContext, dataFabricFacade, queueReaderFactory, flowletName, queueSpecs, queueConsumerSupplierBuilder, createSchemaCache(program)), Lists.<ProcessSpecification<?>>newLinkedList());
        List<ConsumerSupplier<?>> consumerSuppliers = queueConsumerSupplierBuilder.build();
        // Create the flowlet driver
        AtomicReference<FlowletProgramController> controllerRef = new AtomicReference<>();
        Service serviceHook = createServiceHook(flowletName, consumerSuppliers, controllerRef);
        FlowletRuntimeService driver = new FlowletRuntimeService(flowlet, flowletContext, processSpecs, createCallback(flowlet, flowletDef.getFlowletSpec()), dataFabricFacade, serviceHook);
        FlowletProgramController controller = new FlowletProgramController(program.getId(), flowletName, flowletContext, driver, queueProducerSupplierBuilder.build(), consumerSuppliers);
        controllerRef.set(controller);
        LOG.info("Starting flowlet: {}", flowletContext);
        driver.start();
        LOG.info("Flowlet started: {}", flowletContext);
        return controller;
    } catch (Exception e) {
        // of the flowlet context.
        if (flowletContext != null) {
            flowletContext.close();
        }
        throw Throwables.propagate(e);
    }
}
Also used : ApplicationSpecification(co.cask.cdap.api.app.ApplicationSpecification) Set(java.util.Set) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableList(com.google.common.collect.ImmutableList) Node(co.cask.cdap.app.queue.QueueSpecificationGenerator.Node) BasicProgramContext(co.cask.cdap.internal.app.runtime.BasicProgramContext) ProgramContext(co.cask.cdap.data.ProgramContext) InstantiatorFactory(co.cask.cdap.common.lang.InstantiatorFactory) FlowletDefinition(co.cask.cdap.api.flow.FlowletDefinition) FlowSpecification(co.cask.cdap.api.flow.FlowSpecification) MetricsFieldSetter(co.cask.cdap.internal.app.runtime.MetricsFieldSetter) ProgramType(co.cask.cdap.proto.ProgramType) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) Flowlet(co.cask.cdap.api.flow.flowlet.Flowlet) MetricsCollectionService(co.cask.cdap.api.metrics.MetricsCollectionService) Service(com.google.common.util.concurrent.Service) MessagingService(co.cask.cdap.messaging.MessagingService) AbstractService(com.google.common.util.concurrent.AbstractService) AtomicReference(java.util.concurrent.atomic.AtomicReference) ProgramId(co.cask.cdap.proto.id.ProgramId) BasicProgramContext(co.cask.cdap.internal.app.runtime.BasicProgramContext) DataSetFieldSetter(co.cask.cdap.internal.app.runtime.DataSetFieldSetter) UnsupportedTypeException(co.cask.cdap.api.data.schema.UnsupportedTypeException) IOException(java.io.IOException) SimpleQueueSpecificationGenerator(co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator) PropertyFieldSetter(co.cask.cdap.common.lang.PropertyFieldSetter) FlowletId(co.cask.cdap.proto.id.FlowletId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) DataFabricFacade(co.cask.cdap.internal.app.runtime.DataFabricFacade) ApplicationId(co.cask.cdap.proto.id.ApplicationId) ProgramContextAware(co.cask.cdap.data.ProgramContextAware)

Aggregations

SimpleQueueSpecificationGenerator (co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator)7 QueueSpecification (co.cask.cdap.app.queue.QueueSpecification)6 Set (java.util.Set)6 FlowletDefinition (co.cask.cdap.api.flow.FlowletDefinition)4 QueueName (co.cask.cdap.common.queue.QueueName)3 ApplicationId (co.cask.cdap.proto.id.ApplicationId)3 Map (java.util.Map)3 ApplicationSpecification (co.cask.cdap.api.app.ApplicationSpecification)2 FlowSpecification (co.cask.cdap.api.flow.FlowSpecification)2 QueueSpecificationGenerator (co.cask.cdap.app.queue.QueueSpecificationGenerator)2 NotFoundException (co.cask.cdap.common.NotFoundException)2 ProgramId (co.cask.cdap.proto.id.ProgramId)2 ImmutableSetMultimap (com.google.common.collect.ImmutableSetMultimap)2 Table (com.google.common.collect.Table)2 IOException (java.io.IOException)2 RunId (org.apache.twill.api.RunId)2 Schema (co.cask.cdap.api.data.schema.Schema)1 UnsupportedTypeException (co.cask.cdap.api.data.schema.UnsupportedTypeException)1 TimeValue (co.cask.cdap.api.dataset.lib.cube.TimeValue)1 FlowletConnection (co.cask.cdap.api.flow.FlowletConnection)1