Search in sources :

Example 1 with Put

use of co.cask.cdap.api.dataset.table.Put in project cdap by caskdata.

the class DatasetBasedStreamSizeScheduleStore method upgradeVersionKeys.

// Return whether the upgrade process is complete - determined by checking if there were no rows that were
// upgraded after the invocation of this method.
private boolean upgradeVersionKeys(Table table, int maxNumberUpdateRows) {
    int numRowsUpgraded = 0;
    try (Scanner scan = getScannerWithPrefix(table, KEY_PREFIX)) {
        Row next;
        // Upgrade only N rows in one transaction to reduce the probability of conflicts with regular Store operations.
        while (((next = scan.next()) != null) && (numRowsUpgraded < maxNumberUpdateRows)) {
            if (isInvalidRow(next)) {
                LIMITED_LOG.debug("Stream Sized Schedule entry with Row key {} does not have all columns.", Bytes.toString(next.getRow()));
                continue;
            }
            byte[] oldRowKey = next.getRow();
            String oldRowKeyString = Bytes.toString(next.getRow());
            String[] splits = oldRowKeyString.split(":");
            // streamSizeSchedule:namespace:application:type:program:schedule
            if (splits.length != 6) {
                LIMITED_LOG.debug("Skip upgrading StreamSizeSchedule {}. Expected row key " + "format 'streamSizeSchedule:namespace:application:type:program:schedule'", oldRowKeyString);
                continue;
            }
            // append application version after application name
            byte[] newRowKey = Bytes.toBytes(ScheduleUpgradeUtil.getNameWithDefaultVersion(splits, 3));
            // Check if a newRowKey is already present, if it is present, then simply delete the oldRowKey and continue;
            Row row = table.get(newRowKey);
            if (!row.isEmpty()) {
                table.delete(oldRowKey);
                numRowsUpgraded++;
                continue;
            }
            Put put = new Put(newRowKey);
            for (Map.Entry<byte[], byte[]> colValEntry : next.getColumns().entrySet()) {
                put.add(colValEntry.getKey(), colValEntry.getValue());
            }
            table.put(put);
            table.delete(oldRowKey);
            numRowsUpgraded++;
        }
    }
    // If no rows were upgraded, notify that the upgrade process has completed.
    return (numRowsUpgraded == 0);
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Row(co.cask.cdap.api.dataset.table.Row) Map(java.util.Map) Put(co.cask.cdap.api.dataset.table.Put)

Example 2 with Put

use of co.cask.cdap.api.dataset.table.Put in project cdap by caskdata.

the class ProgramScheduleStoreDataset method addSchedules.

/**
   * Add one or more schedules to the store.
   *
   * @param schedules the schedules to add
   * @return the new schedules' last modified timestamp
   * @throws AlreadyExistsException if one of the schedules already exists
   */
public long addSchedules(Iterable<? extends ProgramSchedule> schedules) throws AlreadyExistsException {
    long currentTime = System.currentTimeMillis();
    for (ProgramSchedule schedule : schedules) {
        byte[] scheduleKey = rowKeyBytesForSchedule(schedule.getProgramId().getParent().schedule(schedule.getName()));
        if (!store.get(new Get(scheduleKey)).isEmpty()) {
            throw new AlreadyExistsException(schedule.getProgramId().getParent().schedule(schedule.getName()));
        }
        Put schedulePut = new Put(scheduleKey);
        schedulePut.add(SCHEDULE_COLUMN_BYTES, GSON.toJson(schedule));
        schedulePut.add(UPDATED_COLUMN_BYTES, currentTime);
        // initially suspended
        schedulePut.add(STATUS_COLUMN_BYTES, ProgramScheduleStatus.SUSPENDED.toString());
        store.put(schedulePut);
        int count = 0;
        for (String triggerKey : extractTriggerKeys(schedule)) {
            byte[] triggerRowKey = rowKeyBytesForTrigger(scheduleKey, count++);
            store.put(new Put(triggerRowKey, TRIGGER_KEY_COLUMN_BYTES, triggerKey));
        }
    }
    return currentTime;
}
Also used : AlreadyExistsException(co.cask.cdap.common.AlreadyExistsException) ProgramSchedule(co.cask.cdap.internal.app.runtime.schedule.ProgramSchedule) Get(co.cask.cdap.api.dataset.table.Get) Put(co.cask.cdap.api.dataset.table.Put) Constraint(co.cask.cdap.internal.schedule.constraint.Constraint)

Example 3 with Put

use of co.cask.cdap.api.dataset.table.Put in project cdap by caskdata.

the class ETLWorker method run.

@Override
public void run() {
    final SourceState currentState = new SourceState();
    final SourceState nextState = new SourceState();
    final Map<String, List<Object>> dataToSink = new HashMap<>();
    boolean hasData = false;
    final Map<String, List<InvalidEntry>> transformIdToErrorRecords = intializeTransformIdToErrorsList();
    final WorkerContext context = getContext();
    Set<String> transformErrorsWithoutDataset = Sets.newHashSet();
    // Fetch SourceState from State Table.
    // Only required at the beginning since we persist the state if there is a change.
    Transactionals.execute(context, new TxRunnable() {

        @Override
        public void run(DatasetContext context) throws Exception {
            KeyValueTable stateTable = context.getDataset(ETLRealtimeApplication.STATE_TABLE);
            byte[] stateBytes = stateTable.read(stateStoreKeyBytes);
            if (stateBytes != null) {
                SourceState state = GSON.fromJson(Bytes.toString(stateBytes), SourceState.class);
                currentState.setState(state);
            }
        }
    });
    DefaultEmitter<Object> sourceEmitter = new DefaultEmitter<>();
    TrackedEmitter<Object> trackedSourceEmitter = new TrackedEmitter<>(sourceEmitter, new DefaultStageMetrics(metrics, sourceStageName), TrackedTransform.RECORDS_OUT, context.getDataTracer(sourceStageName));
    while (!stopped) {
        // Invoke poll method of the source to fetch data
        try {
            SourceState newState = source.poll(trackedSourceEmitter, new SourceState(currentState));
            if (newState != null) {
                nextState.setState(newState);
            }
        } catch (Exception e) {
            // Continue since the source threw an exception. No point in processing records and state is not changed.
            LOG.warn("Exception thrown during polling of Source for data", e);
            sourceEmitter.reset();
            continue;
        }
        // to be persisted in the sink.
        for (Object sourceData : sourceEmitter.getEntries()) {
            try {
                TransformResponse transformResponse = transformExecutor.runOneIteration(sourceData);
                for (Map.Entry<String, Collection<Object>> transformedValues : transformResponse.getSinksResults().entrySet()) {
                    dataToSink.put(transformedValues.getKey(), new ArrayList<>());
                    Iterator emitterIterator = transformedValues.getValue().iterator();
                    while (emitterIterator.hasNext()) {
                        if (!hasData) {
                            hasData = true;
                        }
                        dataToSink.get(transformedValues.getKey()).add(emitterIterator.next());
                    }
                }
                for (Map.Entry<String, Collection<InvalidEntry<Object>>> transformErrorsEntry : transformResponse.getMapTransformIdToErrorEmitter().entrySet()) {
                    if (!transformErrorsWithoutDataset.contains(transformErrorsEntry.getKey())) {
                        if (!tranformIdToDatasetName.containsKey(transformErrorsEntry.getKey()) && !transformErrorsEntry.getValue().isEmpty()) {
                            transformErrorsWithoutDataset.add(transformErrorsEntry.getKey());
                            LOG.warn("Error records were emitted in transform {}, " + "but error dataset is not configured for this transform", transformErrorsEntry.getKey());
                        }
                        if (tranformIdToDatasetName.containsKey(transformErrorsEntry.getKey()) && !transformErrorsEntry.getValue().isEmpty()) {
                            // add the errors
                            if (!hasData && transformErrorsEntry.getValue().size() > 0) {
                                hasData = true;
                            }
                            transformIdToErrorRecords.get(transformErrorsEntry.getKey()).addAll(transformErrorsEntry.getValue());
                        }
                    }
                }
            } catch (Exception e) {
                LOG.warn("Exception thrown while processing data {}", sourceData, e);
            }
        }
        sourceEmitter.reset();
        // Start a Transaction if there is data to persist or if the Source state has changed.
        try {
            if (hasData || (!nextState.equals(currentState))) {
                getContext().execute(new TxRunnable() {

                    @Override
                    public void run(DatasetContext context) throws Exception {
                        // Invoke the sink's write method if there is any object to be written.
                        if (!dataToSink.isEmpty()) {
                            DefaultDataWriter defaultDataWriter = new DefaultDataWriter(getContext(), context);
                            for (Map.Entry<String, List<Object>> sinkEntry : dataToSink.entrySet()) {
                                sinks.get(sinkEntry.getKey()).write(sinkEntry.getValue(), defaultDataWriter);
                            }
                        }
                        for (Map.Entry<String, List<InvalidEntry>> errorRecordEntry : transformIdToErrorRecords.entrySet()) {
                            String transformId = errorRecordEntry.getKey();
                            final String datasetName = tranformIdToDatasetName.get(transformId);
                            Table errorTable = context.getDataset(datasetName);
                            long timeInMillis = System.currentTimeMillis();
                            byte[] currentTime = Bytes.toBytes(timeInMillis);
                            String transformIdentifier = appName + SEPARATOR + transformId;
                            for (InvalidEntry invalidEntry : errorRecordEntry.getValue()) {
                                // using random uuid as we want to write each record uniquely,
                                // but we are not concerned about the uuid while scanning later.
                                byte[] rowKey = Bytes.concat(currentTime, Bytes.toBytes(transformIdentifier), Bytes.toBytes(UUID.randomUUID()));
                                Put errorPut = constructErrorPut(rowKey, invalidEntry, timeInMillis);
                                errorTable.write(rowKey, errorPut);
                            }
                        }
                        // Persist nextState if it is different from currentState
                        if (!nextState.equals(currentState)) {
                            KeyValueTable stateTable = context.getDataset(ETLRealtimeApplication.STATE_TABLE);
                            stateTable.write(stateStoreKey, GSON.toJson(nextState));
                        }
                        // after running one iteration and succesfully writing to sinks and error datasets, reset the emitters.
                        transformExecutor.resetEmitter();
                    }
                });
                // Update the in-memory copy of the state only if the transaction succeeded.
                currentState.setState(nextState);
            }
        } catch (Exception e) {
            LOG.warn("Exception thrown during persisting of data", e);
        } finally {
            // Clear the persisted sink data (in case transaction failure occurred, we will poll the source with old state)
            hasData = false;
            dataToSink.clear();
            for (List<InvalidEntry> invalidEntryList : transformIdToErrorRecords.values()) {
                invalidEntryList.clear();
            }
        }
    }
}
Also used : DefaultEmitter(co.cask.cdap.etl.common.DefaultEmitter) HashMap(java.util.HashMap) InvalidEntry(co.cask.cdap.etl.api.InvalidEntry) TxRunnable(co.cask.cdap.api.TxRunnable) TrackedEmitter(co.cask.cdap.etl.common.TrackedEmitter) CloseableIterator(co.cask.cdap.api.dataset.lib.CloseableIterator) Iterator(java.util.Iterator) List(java.util.List) ArrayList(java.util.ArrayList) DatasetContext(co.cask.cdap.api.data.DatasetContext) InvalidEntry(co.cask.cdap.etl.api.InvalidEntry) SourceState(co.cask.cdap.etl.api.realtime.SourceState) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) IOException(java.io.IOException) Put(co.cask.cdap.api.dataset.table.Put) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Collection(java.util.Collection) TransformResponse(co.cask.cdap.etl.common.TransformResponse) WorkerContext(co.cask.cdap.api.worker.WorkerContext) Map(java.util.Map) HashMap(java.util.HashMap) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics)

Example 4 with Put

use of co.cask.cdap.api.dataset.table.Put in project cdap by caskdata.

the class ETLWorker method constructErrorPut.

private Put constructErrorPut(byte[] rowKey, InvalidEntry entry, long timeInMillis) throws IOException {
    Put errorPut = new Put(rowKey);
    errorPut.add(Constants.ErrorDataset.ERRCODE, entry.getErrorCode());
    errorPut.add(Constants.ErrorDataset.TIMESTAMP, timeInMillis);
    if (entry.getInvalidRecord() instanceof StructuredRecord) {
        StructuredRecord record = (StructuredRecord) entry.getInvalidRecord();
        errorPut.add(Constants.ErrorDataset.INVALIDENTRY, StructuredRecordStringConverter.toJsonString(record));
    } else {
        errorPut.add(Constants.ErrorDataset.INVALIDENTRY, String.format("Error Entry is of type %s, only records of type " + "co.cask.cdap.api.data.format.StructuredRecord " + "is supported currently", entry.getInvalidRecord().getClass().getName()));
    }
    return errorPut;
}
Also used : Put(co.cask.cdap.api.dataset.table.Put) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Example 5 with Put

use of co.cask.cdap.api.dataset.table.Put in project cdap by caskdata.

the class HBaseConsumerStateStore method configureInstances.

@Override
public void configureInstances(long groupId, int instances) {
    // Find the last barrier info to get the existing group config
    List<QueueBarrier> queueBarriers = scanBarriers(groupId, new AllCollector<QueueBarrier>()).finish(new ArrayList<QueueBarrier>());
    Preconditions.checkState(!queueBarriers.isEmpty(), "No queue configuration found for group %s", groupId);
    QueueBarrier queueBarrier = queueBarriers.get(queueBarriers.size() - 1);
    ConsumerGroupConfig oldGroupConfig = queueBarrier.getGroupConfig();
    ConsumerGroupConfig groupConfig = new ConsumerGroupConfig(groupId, instances, oldGroupConfig.getDequeueStrategy(), oldGroupConfig.getHashKey());
    byte[] startRow = QueueEntryRow.getQueueEntryRowKey(queueName, transaction.getWritePointer(), 0);
    Put put = new Put(Bytes.add(queueName.toBytes(), startRow));
    put.add(Bytes.toBytes(groupConfig.getGroupId()), GSON.toJson(groupConfig));
    table.put(put);
    // For instances that don't have start row, set the start row to barrier start row
    // We fetches all instances here for cleanup of barrier info later.
    Map<Integer, byte[]> startRows = fetchStartRows(groupId, Integer.MAX_VALUE);
    for (int instanceId = 0; instanceId < instances; instanceId++) {
        if (!startRows.containsKey(instanceId)) {
            table.put(queueName.toBytes(), getConsumerStateColumn(groupId, instanceId), startRow);
        }
    }
    // Remove barrier info that all instances has passed the start row it records
    Deque<byte[]> deletes = Lists.newLinkedList();
    for (QueueBarrier info : queueBarriers) {
        boolean allPassed = true;
        for (byte[] instanceStartRow : startRows.values()) {
            if (Bytes.compareTo(instanceStartRow, info.getStartRow()) <= 0) {
                allPassed = false;
                break;
            }
        }
        if (!allPassed) {
            break;
        }
        deletes.add(Bytes.add(queueName.toBytes(), info.getStartRow()));
    }
    // Retain the last barrier info
    if (deletes.size() > 1) {
        deletes.removeLast();
        byte[] column = Bytes.toBytes(groupId);
        for (byte[] delete : deletes) {
            table.delete(delete, column);
        }
    }
}
Also used : AllCollector(co.cask.cdap.common.collect.AllCollector) ConsumerGroupConfig(co.cask.cdap.data2.queue.ConsumerGroupConfig) Put(co.cask.cdap.api.dataset.table.Put)

Aggregations

Put (co.cask.cdap.api.dataset.table.Put)58 Table (co.cask.cdap.api.dataset.table.Table)23 Test (org.junit.Test)23 Row (co.cask.cdap.api.dataset.table.Row)16 Get (co.cask.cdap.api.dataset.table.Get)15 Transaction (org.apache.tephra.Transaction)12 TransactionAware (org.apache.tephra.TransactionAware)12 TransactionExecutor (org.apache.tephra.TransactionExecutor)10 Schema (co.cask.cdap.api.data.schema.Schema)9 IOException (java.io.IOException)8 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)7 DatasetAdmin (co.cask.cdap.api.dataset.DatasetAdmin)7 Map (java.util.Map)7 HBaseTable (co.cask.cdap.data2.dataset2.lib.table.hbase.HBaseTable)6 WriteOnly (co.cask.cdap.api.annotation.WriteOnly)5 DataSetException (co.cask.cdap.api.dataset.DataSetException)5 Scanner (co.cask.cdap.api.dataset.table.Scanner)5 TxRunnable (co.cask.cdap.api.TxRunnable)4 DatasetContext (co.cask.cdap.api.data.DatasetContext)4 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)4