Search in sources :

Example 1 with TransformDetail

use of co.cask.cdap.etl.common.TransformDetail in project cdap by caskdata.

the class ETLWorker method initializeTransforms.

private void initializeTransforms(WorkerContext context, Map<String, TransformDetail> transformDetailMap, PipelinePhase pipeline) throws Exception {
    Set<StageInfo> transformInfos = pipeline.getStagesOfType(Transform.PLUGIN_TYPE);
    Preconditions.checkArgument(transformInfos != null);
    tranformIdToDatasetName = new HashMap<>(transformInfos.size());
    for (StageInfo transformInfo : transformInfos) {
        String transformName = transformInfo.getName();
        try {
            Transform<?, ?> transform = context.newPluginInstance(transformName);
            ;
            transform = new WrappedTransform<>(transform, Caller.DEFAULT);
            WorkerRealtimeContext transformContext = new WorkerRealtimeContext(context, metrics, new TxLookupProvider(context), transformInfo);
            LOG.debug("Transform Class : {}", transform.getClass().getName());
            transform.initialize(transformContext);
            StageMetrics stageMetrics = new DefaultStageMetrics(metrics, transformName);
            transformDetailMap.put(transformName, new TransformDetail(new TrackedTransform<>(transform, stageMetrics, context.getDataTracer(transformName)), pipeline.getStageOutputs(transformName)));
            if (transformInfo.getErrorDatasetName() != null) {
                tranformIdToDatasetName.put(transformName, transformInfo.getErrorDatasetName());
            }
        } catch (InstantiationException e) {
            LOG.error("Unable to instantiate Transform", e);
            Throwables.propagate(e);
        }
    }
}
Also used : TrackedTransform(co.cask.cdap.etl.common.TrackedTransform) StageInfo(co.cask.cdap.etl.planner.StageInfo) TxLookupProvider(co.cask.cdap.etl.common.TxLookupProvider) TransformDetail(co.cask.cdap.etl.common.TransformDetail) StageMetrics(co.cask.cdap.etl.api.StageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics)

Example 2 with TransformDetail

use of co.cask.cdap.etl.common.TransformDetail in project cdap by caskdata.

the class ETLWorker method initializeSinks.

@SuppressWarnings("unchecked")
private void initializeSinks(WorkerContext context, Map<String, TransformDetail> transformationMap, PipelinePhase pipeline) throws Exception {
    Set<StageInfo> sinkInfos = pipeline.getStagesOfType(RealtimeSink.PLUGIN_TYPE);
    sinks = new HashMap<>(sinkInfos.size());
    for (StageInfo sinkInfo : sinkInfos) {
        String sinkName = sinkInfo.getName();
        RealtimeSink sink = context.newPluginInstance(sinkName);
        sink = new LoggedRealtimeSink(sinkName, sink);
        WorkerRealtimeContext sinkContext = new WorkerRealtimeContext(context, metrics, new TxLookupProvider(context), sinkInfo);
        LOG.debug("Sink Class : {}", sink.getClass().getName());
        sink.initialize(sinkContext);
        sink = new TrackedRealtimeSink(sink, new DefaultStageMetrics(metrics, sinkName));
        Transformation identityTransformation = new Transformation() {

            @Override
            public void transform(Object input, Emitter emitter) throws Exception {
                emitter.emit(input);
            }
        };
        // we use identity transformation to simplify executing transformation in pipeline (similar to ETLMapreduce),
        // since we want to emit metrics during write to sink and not during this transformation, we use NoOpMetrics.
        TrackedTransform trackedTransform = new TrackedTransform(identityTransformation, new DefaultStageMetrics(metrics, sinkName), TrackedTransform.RECORDS_IN, null, context.getDataTracer(sinkName));
        transformationMap.put(sinkInfo.getName(), new TransformDetail(trackedTransform, new HashSet<String>()));
        sinks.put(sinkInfo.getName(), sink);
    }
}
Also used : TrackedTransform(co.cask.cdap.etl.common.TrackedTransform) Transformation(co.cask.cdap.etl.api.Transformation) Emitter(co.cask.cdap.etl.api.Emitter) DefaultEmitter(co.cask.cdap.etl.common.DefaultEmitter) TrackedEmitter(co.cask.cdap.etl.common.TrackedEmitter) StageInfo(co.cask.cdap.etl.planner.StageInfo) TxLookupProvider(co.cask.cdap.etl.common.TxLookupProvider) TransformDetail(co.cask.cdap.etl.common.TransformDetail) RealtimeSink(co.cask.cdap.etl.api.realtime.RealtimeSink) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics) HashSet(java.util.HashSet)

Example 3 with TransformDetail

use of co.cask.cdap.etl.common.TransformDetail in project cdap by caskdata.

the class ETLWorker method initialize.

@Override
public void initialize(final WorkerContext context) throws Exception {
    if (Boolean.valueOf(context.getSpecification().getProperty(Constants.STAGE_LOGGING_ENABLED))) {
        LogStageInjector.start();
    }
    super.initialize(context);
    Map<String, String> properties = context.getSpecification().getProperties();
    appName = context.getApplicationSpecification().getName();
    Preconditions.checkArgument(properties.containsKey(Constants.PIPELINEID));
    Preconditions.checkArgument(properties.containsKey(UNIQUE_ID));
    String uniqueId = properties.get(UNIQUE_ID);
    // Each worker instance should have its own unique state.
    final String appName = context.getApplicationSpecification().getName();
    stateStoreKey = String.format("%s%s%s%s%s", appName, SEPARATOR, uniqueId, SEPARATOR, context.getInstanceId());
    stateStoreKeyBytes = Bytes.toBytes(stateStoreKey);
    Transactionals.execute(getContext(), new TxRunnable() {

        @Override
        public void run(DatasetContext dsContext) throws Exception {
            KeyValueTable stateTable = dsContext.getDataset(ETLRealtimeApplication.STATE_TABLE);
            byte[] startKey = Bytes.toBytes(String.format("%s%s", appName, SEPARATOR));
            // Scan the table for appname: prefixes and remove rows which doesn't match the unique id of this application.
            try (CloseableIterator<KeyValue<byte[], byte[]>> rows = stateTable.scan(startKey, Bytes.stopKeyForPrefix(startKey))) {
                while (rows.hasNext()) {
                    KeyValue<byte[], byte[]> row = rows.next();
                    if (Bytes.compareTo(stateStoreKeyBytes, row.getKey()) != 0) {
                        stateTable.delete(row.getKey());
                    }
                }
            }
        }
    }, Exception.class);
    PipelinePhase pipeline = GSON.fromJson(properties.get(Constants.PIPELINEID), PipelinePhase.class);
    Map<String, TransformDetail> transformationMap = new HashMap<>();
    initializeSource(context, pipeline);
    initializeTransforms(context, transformationMap, pipeline);
    initializeSinks(context, transformationMap, pipeline);
    Set<String> startStages = new HashSet<>();
    startStages.addAll(pipeline.getStageOutputs(sourceStageName));
    transformExecutor = new TransformExecutor(transformationMap, startStages);
}
Also used : CloseableIterator(co.cask.cdap.api.dataset.lib.CloseableIterator) KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) HashMap(java.util.HashMap) IOException(java.io.IOException) TxRunnable(co.cask.cdap.api.TxRunnable) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) TransformDetail(co.cask.cdap.etl.common.TransformDetail) TransformExecutor(co.cask.cdap.etl.common.TransformExecutor) DatasetContext(co.cask.cdap.api.data.DatasetContext) HashSet(java.util.HashSet)

Aggregations

TransformDetail (co.cask.cdap.etl.common.TransformDetail)3 DefaultStageMetrics (co.cask.cdap.etl.common.DefaultStageMetrics)2 TrackedTransform (co.cask.cdap.etl.common.TrackedTransform)2 TxLookupProvider (co.cask.cdap.etl.common.TxLookupProvider)2 StageInfo (co.cask.cdap.etl.planner.StageInfo)2 HashSet (java.util.HashSet)2 TxRunnable (co.cask.cdap.api.TxRunnable)1 DatasetContext (co.cask.cdap.api.data.DatasetContext)1 CloseableIterator (co.cask.cdap.api.dataset.lib.CloseableIterator)1 KeyValue (co.cask.cdap.api.dataset.lib.KeyValue)1 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)1 Emitter (co.cask.cdap.etl.api.Emitter)1 StageMetrics (co.cask.cdap.etl.api.StageMetrics)1 Transformation (co.cask.cdap.etl.api.Transformation)1 RealtimeSink (co.cask.cdap.etl.api.realtime.RealtimeSink)1 DefaultEmitter (co.cask.cdap.etl.common.DefaultEmitter)1 PipelinePhase (co.cask.cdap.etl.common.PipelinePhase)1 TrackedEmitter (co.cask.cdap.etl.common.TrackedEmitter)1 TransformExecutor (co.cask.cdap.etl.common.TransformExecutor)1 IOException (java.io.IOException)1