use of org.apache.samza.operators.Scheduler in project beam by apache.
the class DoFnOp method open.
@Override
@SuppressWarnings("unchecked")
public void open(Config config, Context context, Scheduler<KeyedTimerData<Void>> timerRegistry, OpEmitter<OutT> emitter) {
this.inputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
this.sideInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
this.pushbackWatermarkHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
final SamzaExecutionContext samzaExecutionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
this.samzaPipelineOptions = samzaExecutionContext.getPipelineOptions();
this.bundleDisabled = samzaPipelineOptions.getMaxBundleSize() <= 1;
final String stateId = "pardo-" + transformId;
final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory = SamzaStoreStateInternals.createNonKeyedStateInternalsFactory(stateId, context.getTaskContext(), samzaPipelineOptions);
final FutureCollector<OutT> outputFutureCollector = createFutureCollector();
this.bundleManager = new BundleManager<>(createBundleProgressListener(), outputFutureCollector, samzaPipelineOptions.getMaxBundleSize(), samzaPipelineOptions.getMaxBundleTimeMs(), timerRegistry, bundleCheckTimerId);
this.timerInternalsFactory = SamzaTimerInternalsFactory.createTimerInternalFactory(keyCoder, (Scheduler) timerRegistry, getTimerStateId(signature), nonKeyedStateInternalsFactory, windowingStrategy, isBounded, samzaPipelineOptions);
this.sideInputHandler = new SideInputHandler(sideInputs, nonKeyedStateInternalsFactory.stateInternalsForKey(null));
if (isPortable) {
final ExecutableStage executableStage = ExecutableStage.fromPayload(stagePayload);
stageContext = SamzaExecutableStageContextFactory.getInstance().get(jobInfo);
stageBundleFactory = stageContext.getStageBundleFactory(executableStage);
this.fnRunner = SamzaDoFnRunners.createPortable(transformId, bundleStateId, windowedValueCoder, executableStage, sideInputMapping, sideInputHandler, nonKeyedStateInternalsFactory, timerInternalsFactory, samzaPipelineOptions, outputManagerFactory.create(emitter, outputFutureCollector), stageBundleFactory, mainOutputTag, idToTupleTagMap, context, transformFullName);
} else {
this.fnRunner = SamzaDoFnRunners.create(samzaPipelineOptions, doFn, windowingStrategy, transformFullName, stateId, context, mainOutputTag, sideInputHandler, timerInternalsFactory, keyCoder, outputManagerFactory.create(emitter, outputFutureCollector), inputCoder, sideOutputTags, outputCoders, doFnSchemaInformation, (Map<String, PCollectionView<?>>) sideInputMapping);
}
this.pushbackFnRunner = SimplePushbackSideInputDoFnRunner.create(fnRunner, sideInputs, sideInputHandler);
this.pushbackValues = new ArrayList<>();
final Iterator<SamzaDoFnInvokerRegistrar> invokerReg = ServiceLoader.load(SamzaDoFnInvokerRegistrar.class).iterator();
if (!invokerReg.hasNext()) {
// use the default invoker here
doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, samzaPipelineOptions);
} else {
doFnInvoker = Iterators.getOnlyElement(invokerReg).invokerSetupFor(doFn, samzaPipelineOptions, context);
}
}
use of org.apache.samza.operators.Scheduler in project samza by apache.
the class OperatorImplGraph method createAndRegisterOperatorImpl.
/**
* Traverses the DAG of {@link OperatorSpec}s starting from the provided {@link OperatorSpec},
* creates the corresponding DAG of {@link OperatorImpl}s, and returns the root {@link OperatorImpl} node.
*
* @param prevOperatorSpec the parent of the current {@code operatorSpec} in the traversal
* @param operatorSpec the {@link OperatorSpec} to create the {@link OperatorImpl} for
* @param inputStream the source input stream that we traverse the {@link OperatorSpecGraph} from
* @param context the {@link Context} required to instantiate operators
* @return the operator implementation for the operatorSpec
*/
private OperatorImpl createAndRegisterOperatorImpl(OperatorSpec prevOperatorSpec, OperatorSpec operatorSpec, SystemStream inputStream, Context context) {
if (!operatorImpls.containsKey(operatorSpec.getOpId()) || operatorSpec instanceof JoinOperatorSpec) {
// Either this is the first time we've seen this operatorSpec, or this is a join operator spec
// and we need to create 2 partial join operator impls for it. Initialize and register the sub-DAG.
OperatorImpl operatorImpl = createOperatorImpl(prevOperatorSpec, operatorSpec, context);
operatorImpl.init(this.internalTaskContext);
operatorImpl.registerInputStream(inputStream);
if (operatorSpec.getScheduledFn() != null) {
final Scheduler scheduler = operatorImpl.createOperatorScheduler();
operatorSpec.getScheduledFn().schedule(scheduler);
}
// Note: The key here is opImplId, which may not equal opId for some impls (e.g. PartialJoinOperatorImpl).
// This is currently OK since we don't need to look up a partial join operator impl again during traversal
// (a join cannot have a cycle).
operatorImpls.put(operatorImpl.getOpImplId(), operatorImpl);
Collection<OperatorSpec> registeredSpecs = operatorSpec.getRegisteredOperatorSpecs();
registeredSpecs.forEach(registeredSpec -> {
LOG.debug("Creating operator {} with opCode: {}", registeredSpec.getOpId(), registeredSpec.getOpCode());
OperatorImpl nextImpl = createAndRegisterOperatorImpl(operatorSpec, registeredSpec, inputStream, context);
operatorImpl.registerNextOperator(nextImpl);
});
return operatorImpl;
} else {
// the implementation corresponding to operatorSpec has already been instantiated and registered.
OperatorImpl operatorImpl = operatorImpls.get(operatorSpec.getOpId());
operatorImpl.registerInputStream(inputStream);
// We still need to traverse the DAG further to register the input streams.
Collection<OperatorSpec> registeredSpecs = operatorSpec.getRegisteredOperatorSpecs();
registeredSpecs.forEach(registeredSpec -> createAndRegisterOperatorImpl(operatorSpec, registeredSpec, inputStream, context));
return operatorImpl;
}
}
Aggregations