use of co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator in project cdap by caskdata.
the class FlowVerification method verify.
/**
* Verifies a single {@link FlowSpecification} for a {@link co.cask.cdap.api.flow.Flow}.
*
* @param input to be verified
* @return An instance of {@link VerifyResult} depending of status of verification.
*/
@Override
public VerifyResult verify(ApplicationId appId, final FlowSpecification input) {
VerifyResult verifyResult = super.verify(appId, input);
if (!verifyResult.isSuccess()) {
return verifyResult;
}
String flowName = input.getName();
// Check if there are no flowlets.
if (input.getFlowlets().isEmpty()) {
return VerifyResult.failure(Err.Flow.ATLEAST_ONE_FLOWLET, flowName);
}
// Check if there no connections.
if (input.getConnections().isEmpty()) {
return VerifyResult.failure(Err.Flow.ATLEAST_ONE_CONNECTION, flowName);
}
// We go through each Flowlet and verify the flowlets.
// First collect all source flowlet names
Set<String> sourceFlowletNames = Sets.newHashSet();
for (FlowletConnection connection : input.getConnections()) {
if (connection.getSourceType() == FlowletConnection.Type.FLOWLET) {
sourceFlowletNames.add(connection.getSourceName());
}
}
for (Map.Entry<String, FlowletDefinition> entry : input.getFlowlets().entrySet()) {
FlowletDefinition defn = entry.getValue();
String flowletName = defn.getFlowletSpec().getName();
// Check if the Flowlet Name is an ID.
if (!EntityId.isValidId(defn.getFlowletSpec().getName())) {
return VerifyResult.failure(Err.NOT_AN_ID, flowName + ":" + flowletName);
}
// We check if all the dataset names used are ids
for (String dataSet : defn.getDatasets()) {
if (!EntityId.isValidDatasetId(dataSet)) {
return VerifyResult.failure(Err.NOT_AN_ID, flowName + ":" + flowletName + ":" + dataSet);
}
}
// Check if the flowlet has output, it must be appear as source flowlet in at least one connection
if (entry.getValue().getOutputs().size() > 0 && !sourceFlowletNames.contains(flowletName)) {
return VerifyResult.failure(Err.Flow.OUTPUT_NOT_CONNECTED, flowName, flowletName);
}
}
// NOTE: We should unify the logic here and the queue spec generation, as they are doing the same thing.
Table<QueueSpecificationGenerator.Node, String, Set<QueueSpecification>> queueSpecTable = new SimpleQueueSpecificationGenerator(appId).create(input);
// For all connections, there should be an entry in the table.
for (FlowletConnection connection : input.getConnections()) {
QueueSpecificationGenerator.Node node;
if (connection.getSourceType() == FlowletConnection.Type.FLOWLET) {
node = new QueueSpecificationGenerator.Node(connection.getSourceType(), connection.getSourceName());
} else {
String sourceNamespace = connection.getSourceNamespace() == null ? appId.getNamespace() : connection.getSourceNamespace();
node = new QueueSpecificationGenerator.Node(connection.getSourceType(), sourceNamespace, connection.getSourceName());
}
if (!queueSpecTable.contains(node, connection.getTargetName())) {
return VerifyResult.failure(Err.Flow.NO_INPUT_FOR_OUTPUT, flowName, connection.getTargetName(), connection.getSourceType(), connection.getSourceName());
}
}
// For each output entity, check for any unconnected output
for (QueueSpecificationGenerator.Node node : queueSpecTable.rowKeySet()) {
// For stream output, no need to check
if (node.getType() == FlowletConnection.Type.STREAM) {
continue;
}
// For all outputs of a flowlet, remove all the matched connected schema, if there is anything left,
// then it's a incomplete flow connection (has output not connect to any input).
Multimap<String, Schema> outputs = toMultimap(input.getFlowlets().get(node.getName()).getOutputs());
for (Map.Entry<String, Set<QueueSpecification>> entry : queueSpecTable.row(node).entrySet()) {
for (QueueSpecification queueSpec : entry.getValue()) {
outputs.remove(queueSpec.getQueueName().getSimpleName(), queueSpec.getOutputSchema());
}
}
if (!outputs.isEmpty()) {
return VerifyResult.failure(Err.Flow.MORE_OUTPUT_NOT_ALLOWED, flowName, node.getType().toString().toLowerCase(), node.getName(), outputs);
}
}
return VerifyResult.success();
}
use of co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator in project cdap by caskdata.
the class DistributedFlowProgramRunner method getFlowletQueues.
/**
* Gets the queue configuration of the Flow based on the connections in the given {@link FlowSpecification}.
*/
private Multimap<String, QueueName> getFlowletQueues(ApplicationId appId, FlowSpecification flowSpec) {
// Generate all queues specifications
Table<QueueSpecificationGenerator.Node, String, Set<QueueSpecification>> queueSpecs = new SimpleQueueSpecificationGenerator(appId).create(flowSpec);
// For storing result from flowletId to queue.
ImmutableSetMultimap.Builder<String, QueueName> resultBuilder = ImmutableSetMultimap.builder();
// Loop through each flowlet
for (Map.Entry<String, FlowletDefinition> entry : flowSpec.getFlowlets().entrySet()) {
String flowletId = entry.getKey();
// For each queue that the flowlet is a consumer, store the number of instances for this flowlet
for (QueueSpecification queueSpec : Iterables.concat(queueSpecs.column(flowletId).values())) {
resultBuilder.put(flowletId, queueSpec.getQueueName());
}
}
return resultBuilder.build();
}
use of co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator in project cdap by caskdata.
the class HBaseQueueDebugger method scanQueues.
private void scanQueues(List<NamespaceMeta> namespaceMetas) throws Exception {
final QueueStatistics totalStats = new QueueStatistics();
for (NamespaceMeta namespaceMeta : namespaceMetas) {
final NamespaceId namespaceId = new NamespaceId(namespaceMeta.getName());
final Collection<ApplicationSpecification> apps = store.getAllApplications(namespaceId);
for (final ApplicationSpecification app : apps) {
ApplicationId appId = new ApplicationId(namespaceMeta.getName(), app.getName(), app.getAppVersion());
Collection<FlowSpecification> flows = app.getFlows().values();
for (final FlowSpecification flow : flows) {
final ProgramId flowId = appId.program(ProgramType.FLOW, flow.getName());
impersonator.doAs(flowId, new Callable<Void>() {
@Override
public Void call() throws Exception {
SimpleQueueSpecificationGenerator queueSpecGenerator = new SimpleQueueSpecificationGenerator(flowId.getParent());
Table<QueueSpecificationGenerator.Node, String, Set<QueueSpecification>> table = queueSpecGenerator.create(flow);
for (Table.Cell<QueueSpecificationGenerator.Node, String, Set<QueueSpecification>> cell : table.cellSet()) {
if (cell.getRowKey().getType() == FlowletConnection.Type.FLOWLET) {
for (QueueSpecification queue : cell.getValue()) {
QueueStatistics queueStats = scanQueue(queue.getQueueName(), null);
totalStats.add(queueStats);
}
}
}
return null;
}
});
}
}
}
System.out.printf("Total results for all queues: %s\n", totalStats.getReport(showTxTimestampOnly()));
}
use of co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator in project cdap by caskdata.
the class FlowQueuePendingCorrector method run.
/**
* Corrects queue.pending metric for a flowlet.
*/
public void run(FlowId flowId, String producerFlowlet, String consumerFlowlet, String flowletQueue, FlowSpecification flow) throws Exception {
System.out.println("Running queue.pending correction on flow '" + flowId + "' producerFlowlet '" + producerFlowlet + "' consumerFlowlet '" + consumerFlowlet + "' flowletQueue '" + flowletQueue + "'");
Map<RunId, ProgramRuntimeService.RuntimeInfo> runtimeInfos = programRuntimeService.list(flowId);
Preconditions.checkState(runtimeInfos.isEmpty(), "Cannot run tool when flow " + flowId + " is still running");
SimpleQueueSpecificationGenerator queueSpecGenerator = new SimpleQueueSpecificationGenerator(flowId.getParent());
Table<QueueSpecificationGenerator.Node, String, Set<QueueSpecification>> table = queueSpecGenerator.create(flow);
Preconditions.checkArgument(table.contains(QueueSpecificationGenerator.Node.flowlet(producerFlowlet), consumerFlowlet), "Flowlet " + producerFlowlet + " is not emitting to " + consumerFlowlet);
Set<QueueSpecification> queueSpecs = table.get(QueueSpecificationGenerator.Node.flowlet(producerFlowlet), consumerFlowlet);
boolean validQueue = false;
for (QueueSpecification queueSpec : queueSpecs) {
if (queueSpec.getQueueName().getSimpleName().equals(flowletQueue)) {
validQueue = true;
break;
}
}
Preconditions.checkArgument(validQueue, "Queue " + flowletQueue + " does not exist for the given flowlets");
QueueName queueName = QueueName.fromFlowlet(flowId, producerFlowlet, flowletQueue);
long consumerGroupId = FlowUtils.generateConsumerGroupId(flowId, consumerFlowlet);
long correctQueuePendingValue;
try {
HBaseQueueDebugger.QueueStatistics stats = queueDebugger.scanQueue(queueName, consumerGroupId);
correctQueuePendingValue = stats.getUnprocessed() + stats.getProcessedAndNotVisible();
} catch (NotFoundException e) {
// OK since flowlet queue exists, but actual queue doesn't exist
// (e.g. when running upgrade tool from 2.8 to 3.0)
correctQueuePendingValue = 0;
}
Map<String, String> tags = ImmutableMap.<String, String>builder().put(Constants.Metrics.Tag.NAMESPACE, flowId.getNamespace()).put(Constants.Metrics.Tag.APP, flowId.getApplication()).put(Constants.Metrics.Tag.FLOW, flowId.getProgram()).put(Constants.Metrics.Tag.CONSUMER, consumerFlowlet).put(Constants.Metrics.Tag.PRODUCER, producerFlowlet).put(Constants.Metrics.Tag.FLOWLET_QUEUE, flowletQueue).build();
MetricDataQuery query = new MetricDataQuery(0, 0, Integer.MAX_VALUE, 1, ImmutableMap.of("system.queue.pending", AggregationFunction.SUM), tags, ImmutableList.<String>of(), null);
Collection<MetricTimeSeries> results = metricStore.query(query);
long queuePending;
if (results.isEmpty()) {
queuePending = 0;
} else {
System.out.println("Got results: " + GSON.toJson(results));
Preconditions.checkState(results.size() == 1);
List<TimeValue> timeValues = results.iterator().next().getTimeValues();
Preconditions.checkState(timeValues.size() == 1);
TimeValue timeValue = timeValues.get(0);
queuePending = timeValue.getValue();
}
metricsCollectionService.startAndWait();
MetricsContext collector = metricsCollectionService.getContext(tags);
collector.gauge("queue.pending", correctQueuePendingValue);
System.out.printf("Adjusted system.queue.pending metric from %d to %d (tags %s)\n", queuePending, correctQueuePendingValue, GSON.toJson(tags));
// stop will flush the metrics
metricsCollectionService.stopAndWait();
}
use of co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator in project cdap by caskdata.
the class FlowletProgramRunner method run.
@SuppressWarnings("unchecked")
@Override
public ProgramController run(Program program, ProgramOptions options) {
BasicFlowletContext flowletContext = null;
try {
// Extract and verify parameters
String flowletName = options.getName();
int instanceId = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCE_ID, "-1"));
Preconditions.checkArgument(instanceId >= 0, "Missing instance Id");
int instanceCount = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCES, "0"));
Preconditions.checkArgument(instanceCount > 0, "Invalid or missing instance count");
RunId runId = ProgramRunners.getRunId(options);
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.FLOW, "Only FLOW process type is supported.");
String processorName = program.getName();
Preconditions.checkNotNull(processorName, "Missing processor name.");
FlowSpecification flowSpec = appSpec.getFlows().get(processorName);
FlowletDefinition flowletDef = flowSpec.getFlowlets().get(flowletName);
Preconditions.checkNotNull(flowletDef, "Definition missing for flowlet \"%s\"", flowletName);
Class<?> clz = Class.forName(flowletDef.getFlowletSpec().getClassName(), true, program.getClassLoader());
Preconditions.checkArgument(Flowlet.class.isAssignableFrom(clz), "%s is not a Flowlet.", clz);
// Setup dataset framework context, if required
ProgramId programId = program.getId();
FlowletId flowletId = programId.flowlet(flowletName);
ProgramRunId run = programId.run(runId);
ProgramContext programContext = new BasicProgramContext(run, flowletId);
if (dsFramework instanceof ProgramContextAware) {
((ProgramContextAware) dsFramework).setContext(programContext);
}
Class<? extends Flowlet> flowletClass = (Class<? extends Flowlet>) clz;
// Creates flowlet context
flowletContext = new BasicFlowletContext(program, options, flowletId, instanceId, instanceCount, flowletDef.getDatasets(), flowletDef.getFlowletSpec(), metricsCollectionService, discoveryServiceClient, txClient, dsFramework, secureStore, secureStoreManager, messageService, cConf);
// Creates tx related objects
DataFabricFacade dataFabricFacade = dataFabricFacadeFactory.create(program, flowletContext.getDatasetCache());
if (dataFabricFacade instanceof ProgramContextAware) {
((ProgramContextAware) dataFabricFacade).setContext(programContext);
}
// Creates QueueSpecification
Table<Node, String, Set<QueueSpecification>> queueSpecs = new SimpleQueueSpecificationGenerator(new ApplicationId(program.getNamespaceId(), program.getApplicationId())).create(flowSpec);
Flowlet flowlet = new InstantiatorFactory(false).get(TypeToken.of(flowletClass)).create();
TypeToken<? extends Flowlet> flowletType = TypeToken.of(flowletClass);
// Set the context classloader to the cdap classloader. It is needed for the DatumWriterFactory be able
// to load cdap classes
Thread.currentThread().setContextClassLoader(FlowletProgramRunner.class.getClassLoader());
// Inject DataSet, OutputEmitter, Metric fields
ImmutableList.Builder<ProducerSupplier> queueProducerSupplierBuilder = ImmutableList.builder();
Reflections.visit(flowlet, flowlet.getClass(), new PropertyFieldSetter(flowletDef.getFlowletSpec().getProperties()), new DataSetFieldSetter(flowletContext), new MetricsFieldSetter(flowletContext.getMetrics()), new OutputEmitterFieldSetter(outputEmitterFactory(flowletContext, flowletName, dataFabricFacade, queueProducerSupplierBuilder, queueSpecs)));
ImmutableList.Builder<ConsumerSupplier<?>> queueConsumerSupplierBuilder = ImmutableList.builder();
Collection<ProcessSpecification<?>> processSpecs = createProcessSpecification(flowletContext, flowletType, processMethodFactory(flowlet), processSpecificationFactory(flowletContext, dataFabricFacade, queueReaderFactory, flowletName, queueSpecs, queueConsumerSupplierBuilder, createSchemaCache(program)), Lists.<ProcessSpecification<?>>newLinkedList());
List<ConsumerSupplier<?>> consumerSuppliers = queueConsumerSupplierBuilder.build();
// Create the flowlet driver
AtomicReference<FlowletProgramController> controllerRef = new AtomicReference<>();
Service serviceHook = createServiceHook(flowletName, consumerSuppliers, controllerRef);
FlowletRuntimeService driver = new FlowletRuntimeService(flowlet, flowletContext, processSpecs, createCallback(flowlet, flowletDef.getFlowletSpec()), dataFabricFacade, serviceHook);
FlowletProgramController controller = new FlowletProgramController(program.getId(), flowletName, flowletContext, driver, queueProducerSupplierBuilder.build(), consumerSuppliers);
controllerRef.set(controller);
LOG.info("Starting flowlet: {}", flowletContext);
driver.start();
LOG.info("Flowlet started: {}", flowletContext);
return controller;
} catch (Exception e) {
// of the flowlet context.
if (flowletContext != null) {
flowletContext.close();
}
throw Throwables.propagate(e);
}
}
Aggregations