use of co.cask.cdap.etl.common.TransformDetail in project cdap by caskdata.
the class ETLWorker method initializeTransforms.
private void initializeTransforms(WorkerContext context, Map<String, TransformDetail> transformDetailMap, PipelinePhase pipeline) throws Exception {
Set<StageInfo> transformInfos = pipeline.getStagesOfType(Transform.PLUGIN_TYPE);
Preconditions.checkArgument(transformInfos != null);
tranformIdToDatasetName = new HashMap<>(transformInfos.size());
for (StageInfo transformInfo : transformInfos) {
String transformName = transformInfo.getName();
try {
Transform<?, ?> transform = context.newPluginInstance(transformName);
;
transform = new WrappedTransform<>(transform, Caller.DEFAULT);
WorkerRealtimeContext transformContext = new WorkerRealtimeContext(context, metrics, new TxLookupProvider(context), transformInfo);
LOG.debug("Transform Class : {}", transform.getClass().getName());
transform.initialize(transformContext);
StageMetrics stageMetrics = new DefaultStageMetrics(metrics, transformName);
transformDetailMap.put(transformName, new TransformDetail(new TrackedTransform<>(transform, stageMetrics, context.getDataTracer(transformName)), pipeline.getStageOutputs(transformName)));
if (transformInfo.getErrorDatasetName() != null) {
tranformIdToDatasetName.put(transformName, transformInfo.getErrorDatasetName());
}
} catch (InstantiationException e) {
LOG.error("Unable to instantiate Transform", e);
Throwables.propagate(e);
}
}
}
use of co.cask.cdap.etl.common.TransformDetail in project cdap by caskdata.
the class ETLWorker method initializeSinks.
@SuppressWarnings("unchecked")
private void initializeSinks(WorkerContext context, Map<String, TransformDetail> transformationMap, PipelinePhase pipeline) throws Exception {
Set<StageInfo> sinkInfos = pipeline.getStagesOfType(RealtimeSink.PLUGIN_TYPE);
sinks = new HashMap<>(sinkInfos.size());
for (StageInfo sinkInfo : sinkInfos) {
String sinkName = sinkInfo.getName();
RealtimeSink sink = context.newPluginInstance(sinkName);
sink = new LoggedRealtimeSink(sinkName, sink);
WorkerRealtimeContext sinkContext = new WorkerRealtimeContext(context, metrics, new TxLookupProvider(context), sinkInfo);
LOG.debug("Sink Class : {}", sink.getClass().getName());
sink.initialize(sinkContext);
sink = new TrackedRealtimeSink(sink, new DefaultStageMetrics(metrics, sinkName));
Transformation identityTransformation = new Transformation() {
@Override
public void transform(Object input, Emitter emitter) throws Exception {
emitter.emit(input);
}
};
// we use identity transformation to simplify executing transformation in pipeline (similar to ETLMapreduce),
// since we want to emit metrics during write to sink and not during this transformation, we use NoOpMetrics.
TrackedTransform trackedTransform = new TrackedTransform(identityTransformation, new DefaultStageMetrics(metrics, sinkName), TrackedTransform.RECORDS_IN, null, context.getDataTracer(sinkName));
transformationMap.put(sinkInfo.getName(), new TransformDetail(trackedTransform, new HashSet<String>()));
sinks.put(sinkInfo.getName(), sink);
}
}
use of co.cask.cdap.etl.common.TransformDetail in project cdap by caskdata.
the class ETLWorker method initialize.
@Override
public void initialize(final WorkerContext context) throws Exception {
if (Boolean.valueOf(context.getSpecification().getProperty(Constants.STAGE_LOGGING_ENABLED))) {
LogStageInjector.start();
}
super.initialize(context);
Map<String, String> properties = context.getSpecification().getProperties();
appName = context.getApplicationSpecification().getName();
Preconditions.checkArgument(properties.containsKey(Constants.PIPELINEID));
Preconditions.checkArgument(properties.containsKey(UNIQUE_ID));
String uniqueId = properties.get(UNIQUE_ID);
// Each worker instance should have its own unique state.
final String appName = context.getApplicationSpecification().getName();
stateStoreKey = String.format("%s%s%s%s%s", appName, SEPARATOR, uniqueId, SEPARATOR, context.getInstanceId());
stateStoreKeyBytes = Bytes.toBytes(stateStoreKey);
Transactionals.execute(getContext(), new TxRunnable() {
@Override
public void run(DatasetContext dsContext) throws Exception {
KeyValueTable stateTable = dsContext.getDataset(ETLRealtimeApplication.STATE_TABLE);
byte[] startKey = Bytes.toBytes(String.format("%s%s", appName, SEPARATOR));
// Scan the table for appname: prefixes and remove rows which doesn't match the unique id of this application.
try (CloseableIterator<KeyValue<byte[], byte[]>> rows = stateTable.scan(startKey, Bytes.stopKeyForPrefix(startKey))) {
while (rows.hasNext()) {
KeyValue<byte[], byte[]> row = rows.next();
if (Bytes.compareTo(stateStoreKeyBytes, row.getKey()) != 0) {
stateTable.delete(row.getKey());
}
}
}
}
}, Exception.class);
PipelinePhase pipeline = GSON.fromJson(properties.get(Constants.PIPELINEID), PipelinePhase.class);
Map<String, TransformDetail> transformationMap = new HashMap<>();
initializeSource(context, pipeline);
initializeTransforms(context, transformationMap, pipeline);
initializeSinks(context, transformationMap, pipeline);
Set<String> startStages = new HashSet<>();
startStages.addAll(pipeline.getStageOutputs(sourceStageName));
transformExecutor = new TransformExecutor(transformationMap, startStages);
}
Aggregations