use of org.apache.samza.context.ContainerContext in project samza by apache.
the class TestProjectTranslator method testTranslate.
@Test
public void testTranslate() throws IOException, ClassNotFoundException {
// setup mock values to the constructor of FilterTranslator
LogicalProject mockProject = PowerMockito.mock(LogicalProject.class);
Context mockContext = mock(Context.class);
ContainerContext mockContainerContext = mock(ContainerContext.class);
TranslatorContext mockTranslatorContext = mock(TranslatorContext.class);
TestMetricsRegistryImpl testMetricsRegistryImpl = new TestMetricsRegistryImpl();
RelNode mockInput = mock(RelNode.class);
List<RelNode> inputs = new ArrayList<>();
inputs.add(mockInput);
when(mockInput.getId()).thenReturn(1);
when(mockProject.getId()).thenReturn(2);
when(mockProject.getInputs()).thenReturn(inputs);
when(mockProject.getInput()).thenReturn(mockInput);
RelDataType mockRowType = mock(RelDataType.class);
when(mockRowType.getFieldCount()).thenReturn(1);
when(mockProject.getRowType()).thenReturn(mockRowType);
RexNode mockRexField = mock(RexNode.class);
List<Pair<RexNode, String>> namedProjects = new ArrayList<>();
namedProjects.add(Pair.of(mockRexField, "test_field"));
when(mockProject.getNamedProjects()).thenReturn(namedProjects);
StreamApplicationDescriptorImpl mockAppDesc = mock(StreamApplicationDescriptorImpl.class);
OperatorSpec<Object, SamzaSqlRelMessage> mockInputOp = mock(OperatorSpec.class);
MessageStream<SamzaSqlRelMessage> mockStream = new MessageStreamImpl<>(mockAppDesc, mockInputOp);
when(mockTranslatorContext.getMessageStream(eq(1))).thenReturn(mockStream);
doAnswer(this.getRegisterMessageStreamAnswer()).when(mockTranslatorContext).registerMessageStream(eq(2), any(MessageStream.class));
RexToJavaCompiler mockCompiler = mock(RexToJavaCompiler.class);
when(mockTranslatorContext.getExpressionCompiler()).thenReturn(mockCompiler);
Expression mockExpr = mock(Expression.class);
when(mockCompiler.compile(any(), any())).thenReturn(mockExpr);
when(mockContext.getContainerContext()).thenReturn(mockContainerContext);
when(mockContainerContext.getContainerMetricsRegistry()).thenReturn(testMetricsRegistryImpl);
// Apply translate() method to verify that we are getting the correct map operator constructed
ProjectTranslator projectTranslator = new ProjectTranslator(1);
projectTranslator.translate(mockProject, LOGICAL_OP_ID, mockTranslatorContext);
// make sure that context has been registered with LogicFilter and output message streams
verify(mockTranslatorContext, times(1)).registerRelNode(2, mockProject);
verify(mockTranslatorContext, times(1)).registerMessageStream(2, this.getRegisteredMessageStream(2));
when(mockTranslatorContext.getRelNode(2)).thenReturn(mockProject);
when(mockTranslatorContext.getMessageStream(2)).thenReturn(this.getRegisteredMessageStream(2));
StreamOperatorSpec projectSpec = (StreamOperatorSpec) Whitebox.getInternalState(this.getRegisteredMessageStream(2), "operatorSpec");
assertNotNull(projectSpec);
assertEquals(projectSpec.getOpCode(), OperatorSpec.OpCode.MAP);
// Verify that the bootstrap() method will establish the context for the map function
Map<Integer, TranslatorContext> mockContexts = new HashMap<>();
mockContexts.put(1, mockTranslatorContext);
when(mockContext.getApplicationTaskContext()).thenReturn(new SamzaSqlApplicationContext(mockContexts));
projectSpec.getTransformFn().init(mockContext);
MapFunction mapFn = (MapFunction) Whitebox.getInternalState(projectSpec, "mapFn");
assertNotNull(mapFn);
assertEquals(mockTranslatorContext, Whitebox.getInternalState(mapFn, "translatorContext"));
assertEquals(mockProject, Whitebox.getInternalState(mapFn, "project"));
assertEquals(mockExpr, Whitebox.getInternalState(mapFn, "expr"));
// Verify TestMetricsRegistryImpl works with Project
assertEquals(1, testMetricsRegistryImpl.getGauges().size());
assertEquals(2, testMetricsRegistryImpl.getGauges().get(LOGICAL_OP_ID).size());
assertEquals(1, testMetricsRegistryImpl.getCounters().size());
assertEquals(2, testMetricsRegistryImpl.getCounters().get(LOGICAL_OP_ID).size());
assertEquals(0, testMetricsRegistryImpl.getCounters().get(LOGICAL_OP_ID).get(0).getCount());
assertEquals(0, testMetricsRegistryImpl.getCounters().get(LOGICAL_OP_ID).get(1).getCount());
// Calling mapFn.apply() to verify the filter function is correctly applied to the input message
SamzaSqlRelMessage mockInputMsg = new SamzaSqlRelMessage(new ArrayList<>(), new ArrayList<>(), new SamzaSqlRelMsgMetadata(0L, 0L));
SamzaSqlExecutionContext executionContext = mock(SamzaSqlExecutionContext.class);
DataContext dataContext = mock(DataContext.class);
when(mockTranslatorContext.getExecutionContext()).thenReturn(executionContext);
when(mockTranslatorContext.getDataContext()).thenReturn(dataContext);
Object[] result = new Object[1];
final Object mockFieldObj = new Object();
doAnswer(invocation -> {
Object[] retValue = invocation.getArgumentAt(4, Object[].class);
retValue[0] = mockFieldObj;
return null;
}).when(mockExpr).execute(eq(executionContext), eq(mockContext), eq(dataContext), eq(mockInputMsg.getSamzaSqlRelRecord().getFieldValues().toArray()), eq(result));
SamzaSqlRelMessage retMsg = (SamzaSqlRelMessage) mapFn.apply(mockInputMsg);
assertEquals(retMsg.getSamzaSqlRelRecord().getFieldNames(), Collections.singletonList("test_field"));
assertEquals(retMsg.getSamzaSqlRelRecord().getFieldValues(), Collections.singletonList(mockFieldObj));
// Verify mapFn.apply() updates the TestMetricsRegistryImpl metrics
assertEquals(1, testMetricsRegistryImpl.getCounters().get(LOGICAL_OP_ID).get(0).getCount());
assertEquals(1, testMetricsRegistryImpl.getCounters().get(LOGICAL_OP_ID).get(1).getCount());
}
use of org.apache.samza.context.ContainerContext in project samza by apache.
the class SamzaSqlApplication method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
try {
// TODO: Introduce an API to return a dsl string containing one or more sql statements.
List<String> dslStmts = SamzaSqlDslConverter.fetchSqlFromConfig(appDescriptor.getConfig());
Map<Integer, TranslatorContext> translatorContextMap = new HashMap<>();
// 1. Get Calcite plan
List<String> inputSystemStreams = new LinkedList<>();
List<String> outputSystemStreams = new LinkedList<>();
Collection<RelRoot> relRoots = SamzaSqlApplicationConfig.populateSystemStreamsAndGetRelRoots(dslStmts, appDescriptor.getConfig(), inputSystemStreams, outputSystemStreams);
// 2. Populate configs
SamzaSqlApplicationConfig sqlConfig = new SamzaSqlApplicationConfig(appDescriptor.getConfig(), inputSystemStreams, outputSystemStreams);
// 3. Translate Calcite plan to Samza stream operators
QueryTranslator queryTranslator = new QueryTranslator(appDescriptor, sqlConfig);
SamzaSqlExecutionContext executionContext = new SamzaSqlExecutionContext(sqlConfig);
// QueryId implies the index of the query in multiple query statements scenario. It should always start with 0.
int queryId = 0;
for (RelRoot relRoot : relRoots) {
LOG.info("Translating relRoot {} to samza stream graph with queryId {}", relRoot, queryId);
TranslatorContext translatorContext = new TranslatorContext(appDescriptor, relRoot, executionContext);
translatorContextMap.put(queryId, translatorContext);
queryTranslator.translate(relRoot, sqlConfig.getOutputSystemStreams().get(queryId), translatorContext, queryId);
queryId++;
}
// 4. Set all translator contexts
/*
* TODO When serialization of ApplicationDescriptor is actually needed, then something will need to be updated here,
* since translatorContext is not Serializable. Currently, a new ApplicationDescriptor instance is created in each
* container, so it does not need to be serialized. Therefore, the translatorContext is recreated in each container
* and does not need to be serialized.
*/
appDescriptor.withApplicationTaskContextFactory(new ApplicationTaskContextFactory<SamzaSqlApplicationContext>() {
@Override
public SamzaSqlApplicationContext create(ExternalContext externalContext, JobContext jobContext, ContainerContext containerContext, TaskContext taskContext, ApplicationContainerContext applicationContainerContext) {
return new SamzaSqlApplicationContext(translatorContextMap);
}
});
} catch (RuntimeException e) {
LOG.error("SamzaSqlApplication threw exception.", e);
throw e;
}
}
use of org.apache.samza.context.ContainerContext in project samza by apache.
the class TranslatorInputMetricsMapFunction method init.
/**
* initializes the TranslatorOutputMetricsMapFunction before any message is processed
* @param context the {@link Context} for this task
*/
@Override
public void init(Context context) {
ContainerContext containerContext = context.getContainerContext();
metricsRegistry = containerContext.getContainerMetricsRegistry();
inputEvents = metricsRegistry.newCounter(logicalOpId, TranslatorConstants.INPUT_EVENTS_NAME);
inputEvents.clear();
}
use of org.apache.samza.context.ContainerContext in project samza by apache.
the class TranslatorOutputMetricsMapFunction method init.
/**
* initializes the TranslatorOutputMetricsMapFunction before any message is processed
* @param context the {@link Context} for this task
*/
@Override
public void init(Context context) {
ContainerContext containerContext = context.getContainerContext();
metricsRegistry = containerContext.getContainerMetricsRegistry();
processingTime = new SamzaHistogram(metricsRegistry, logicalOpId, TranslatorConstants.PROCESSING_TIME_NAME);
outputEvents = metricsRegistry.newCounter(logicalOpId, TranslatorConstants.OUTPUT_EVENTS_NAME);
outputEvents.clear();
}
use of org.apache.samza.context.ContainerContext in project samza by apache.
the class ContainerStorageManager method restoreStores.
// Restoration of all stores, in parallel across tasks
private void restoreStores() throws InterruptedException {
LOG.info("Store Restore started");
Set<TaskName> activeTasks = getTasks(containerModel, TaskMode.Active).keySet();
// TODO HIGH dchen verify davinci lifecycle
// Find all non-side input stores
Set<String> nonSideInputStoreNames = storageEngineFactories.keySet().stream().filter(storeName -> !sideInputStoreNames.contains(storeName)).collect(Collectors.toSet());
// Obtain the checkpoints for each task
Map<TaskName, Map<String, TaskRestoreManager>> taskRestoreManagers = new HashMap<>();
Map<TaskName, Checkpoint> taskCheckpoints = new HashMap<>();
containerModel.getTasks().forEach((taskName, taskModel) -> {
Checkpoint taskCheckpoint = null;
if (checkpointManager != null && activeTasks.contains(taskName)) {
// only pass in checkpoints for active tasks
taskCheckpoint = checkpointManager.readLastCheckpoint(taskName);
LOG.info("Obtained checkpoint: {} for state restore for taskName: {}", taskCheckpoint, taskName);
}
taskCheckpoints.put(taskName, taskCheckpoint);
Map<String, Set<String>> backendFactoryStoreNames = getBackendFactoryStoreNames(taskCheckpoint, nonSideInputStoreNames, new StorageConfig(config));
Map<String, TaskRestoreManager> taskStoreRestoreManagers = createTaskRestoreManagers(restoreStateBackendFactories, backendFactoryStoreNames, clock, samzaContainerMetrics, taskName, taskModel);
taskRestoreManagers.put(taskName, taskStoreRestoreManagers);
});
// Initialize each TaskStorageManager
taskRestoreManagers.forEach((taskName, restoreManagers) -> restoreManagers.forEach((factoryName, taskRestoreManager) -> taskRestoreManager.init(taskCheckpoints.get(taskName))));
// Start each store consumer once.
// Note: These consumers are per system and only changelog system store consumers will be started.
// Some TaskRestoreManagers may not require the consumer to to be started, but due to the agnostic nature of
// ContainerStorageManager we always start the changelog consumer here in case it is required
this.storeConsumers.values().stream().distinct().forEach(SystemConsumer::start);
List<Future<Void>> taskRestoreFutures = new ArrayList<>();
// Submit restore callable for each taskInstance
taskRestoreManagers.forEach((taskInstance, restoreManagersMap) -> {
// Submit for each restore factory
restoreManagersMap.forEach((factoryName, taskRestoreManager) -> {
long startTime = System.currentTimeMillis();
String taskName = taskInstance.getTaskName();
LOG.info("Starting restore for state for task: {}", taskName);
CompletableFuture<Void> restoreFuture = taskRestoreManager.restore().handle((res, ex) -> {
// on stop, so paralleling stop() also parallelizes their compaction (a time-intensive operation).
try {
taskRestoreManager.close();
} catch (Exception e) {
LOG.error("Error closing restore manager for task: {} after {} restore", taskName, ex != null ? "unsuccessful" : "successful", e);
// ignore exception from close. container may still be be able to continue processing/backups
// if restore manager close fails.
}
long timeToRestore = System.currentTimeMillis() - startTime;
if (samzaContainerMetrics != null) {
Gauge taskGauge = samzaContainerMetrics.taskStoreRestorationMetrics().getOrDefault(taskInstance, null);
if (taskGauge != null) {
taskGauge.set(timeToRestore);
}
}
if (ex != null) {
// log and rethrow exception to communicate restore failure
String msg = String.format("Error restoring state for task: %s", taskName);
LOG.error(msg, ex);
// wrap in unchecked exception to throw from lambda
throw new SamzaException(msg, ex);
} else {
return null;
}
});
taskRestoreFutures.add(restoreFuture);
});
});
// as samza exceptions
for (Future<Void> future : taskRestoreFutures) {
try {
future.get();
} catch (InterruptedException e) {
LOG.warn("Received an interrupt during store restoration. Interrupting the restore executor to exit " + "prematurely without restoring full state.");
restoreExecutor.shutdownNow();
throw e;
} catch (Exception e) {
LOG.error("Exception when restoring state.", e);
throw new SamzaException("Exception when restoring state.", e);
}
}
// Stop each store consumer once
this.storeConsumers.values().stream().distinct().forEach(SystemConsumer::stop);
// Now create persistent non side input stores in read-write mode, leave non-persistent stores as-is
this.taskStores = createTaskStores(nonSideInputStoreNames, this.containerModel, jobContext, containerContext, storageEngineFactories, serdes, taskInstanceMetrics, taskInstanceCollectors);
// Add in memory stores
this.inMemoryStores.forEach((taskName, stores) -> {
if (!this.taskStores.containsKey(taskName)) {
taskStores.put(taskName, new HashMap<>());
}
taskStores.get(taskName).putAll(stores);
});
// Add side input stores
this.sideInputStores.forEach((taskName, stores) -> {
if (!this.taskStores.containsKey(taskName)) {
taskStores.put(taskName, new HashMap<>());
}
taskStores.get(taskName).putAll(stores);
});
LOG.info("Store Restore complete");
}
Aggregations