Search in sources :

Example 1 with TransformExecutor

use of co.cask.cdap.etl.common.TransformExecutor in project cdap by caskdata.

the class ETLWorker method initialize.

@Override
public void initialize(final WorkerContext context) throws Exception {
    if (Boolean.valueOf(context.getSpecification().getProperty(Constants.STAGE_LOGGING_ENABLED))) {
        LogStageInjector.start();
    }
    super.initialize(context);
    Map<String, String> properties = context.getSpecification().getProperties();
    appName = context.getApplicationSpecification().getName();
    Preconditions.checkArgument(properties.containsKey(Constants.PIPELINEID));
    Preconditions.checkArgument(properties.containsKey(UNIQUE_ID));
    String uniqueId = properties.get(UNIQUE_ID);
    // Each worker instance should have its own unique state.
    final String appName = context.getApplicationSpecification().getName();
    stateStoreKey = String.format("%s%s%s%s%s", appName, SEPARATOR, uniqueId, SEPARATOR, context.getInstanceId());
    stateStoreKeyBytes = Bytes.toBytes(stateStoreKey);
    Transactionals.execute(getContext(), new TxRunnable() {

        @Override
        public void run(DatasetContext dsContext) throws Exception {
            KeyValueTable stateTable = dsContext.getDataset(ETLRealtimeApplication.STATE_TABLE);
            byte[] startKey = Bytes.toBytes(String.format("%s%s", appName, SEPARATOR));
            // Scan the table for appname: prefixes and remove rows which doesn't match the unique id of this application.
            try (CloseableIterator<KeyValue<byte[], byte[]>> rows = stateTable.scan(startKey, Bytes.stopKeyForPrefix(startKey))) {
                while (rows.hasNext()) {
                    KeyValue<byte[], byte[]> row = rows.next();
                    if (Bytes.compareTo(stateStoreKeyBytes, row.getKey()) != 0) {
                        stateTable.delete(row.getKey());
                    }
                }
            }
        }
    }, Exception.class);
    PipelinePhase pipeline = GSON.fromJson(properties.get(Constants.PIPELINEID), PipelinePhase.class);
    Map<String, TransformDetail> transformationMap = new HashMap<>();
    initializeSource(context, pipeline);
    initializeTransforms(context, transformationMap, pipeline);
    initializeSinks(context, transformationMap, pipeline);
    Set<String> startStages = new HashSet<>();
    startStages.addAll(pipeline.getStageOutputs(sourceStageName));
    transformExecutor = new TransformExecutor(transformationMap, startStages);
}
Also used : CloseableIterator(co.cask.cdap.api.dataset.lib.CloseableIterator) KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) HashMap(java.util.HashMap) IOException(java.io.IOException) TxRunnable(co.cask.cdap.api.TxRunnable) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) TransformDetail(co.cask.cdap.etl.common.TransformDetail) TransformExecutor(co.cask.cdap.etl.common.TransformExecutor) DatasetContext(co.cask.cdap.api.data.DatasetContext) HashSet(java.util.HashSet)

Aggregations

TxRunnable (co.cask.cdap.api.TxRunnable)1 DatasetContext (co.cask.cdap.api.data.DatasetContext)1 CloseableIterator (co.cask.cdap.api.dataset.lib.CloseableIterator)1 KeyValue (co.cask.cdap.api.dataset.lib.KeyValue)1 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)1 PipelinePhase (co.cask.cdap.etl.common.PipelinePhase)1 TransformDetail (co.cask.cdap.etl.common.TransformDetail)1 TransformExecutor (co.cask.cdap.etl.common.TransformExecutor)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1