use of co.cask.cdap.api.dataset.table.Put in project cdap by caskdata.
the class DatasetBasedStreamSizeScheduleStore method upgradeVersionKeys.
// Return whether the upgrade process is complete - determined by checking if there were no rows that were
// upgraded after the invocation of this method.
private boolean upgradeVersionKeys(Table table, int maxNumberUpdateRows) {
int numRowsUpgraded = 0;
try (Scanner scan = getScannerWithPrefix(table, KEY_PREFIX)) {
Row next;
// Upgrade only N rows in one transaction to reduce the probability of conflicts with regular Store operations.
while (((next = scan.next()) != null) && (numRowsUpgraded < maxNumberUpdateRows)) {
if (isInvalidRow(next)) {
LIMITED_LOG.debug("Stream Sized Schedule entry with Row key {} does not have all columns.", Bytes.toString(next.getRow()));
continue;
}
byte[] oldRowKey = next.getRow();
String oldRowKeyString = Bytes.toString(next.getRow());
String[] splits = oldRowKeyString.split(":");
// streamSizeSchedule:namespace:application:type:program:schedule
if (splits.length != 6) {
LIMITED_LOG.debug("Skip upgrading StreamSizeSchedule {}. Expected row key " + "format 'streamSizeSchedule:namespace:application:type:program:schedule'", oldRowKeyString);
continue;
}
// append application version after application name
byte[] newRowKey = Bytes.toBytes(ScheduleUpgradeUtil.getNameWithDefaultVersion(splits, 3));
// Check if a newRowKey is already present, if it is present, then simply delete the oldRowKey and continue;
Row row = table.get(newRowKey);
if (!row.isEmpty()) {
table.delete(oldRowKey);
numRowsUpgraded++;
continue;
}
Put put = new Put(newRowKey);
for (Map.Entry<byte[], byte[]> colValEntry : next.getColumns().entrySet()) {
put.add(colValEntry.getKey(), colValEntry.getValue());
}
table.put(put);
table.delete(oldRowKey);
numRowsUpgraded++;
}
}
// If no rows were upgraded, notify that the upgrade process has completed.
return (numRowsUpgraded == 0);
}
use of co.cask.cdap.api.dataset.table.Put in project cdap by caskdata.
the class ProgramScheduleStoreDataset method addSchedules.
/**
* Add one or more schedules to the store.
*
* @param schedules the schedules to add
* @return the new schedules' last modified timestamp
* @throws AlreadyExistsException if one of the schedules already exists
*/
public long addSchedules(Iterable<? extends ProgramSchedule> schedules) throws AlreadyExistsException {
long currentTime = System.currentTimeMillis();
for (ProgramSchedule schedule : schedules) {
byte[] scheduleKey = rowKeyBytesForSchedule(schedule.getProgramId().getParent().schedule(schedule.getName()));
if (!store.get(new Get(scheduleKey)).isEmpty()) {
throw new AlreadyExistsException(schedule.getProgramId().getParent().schedule(schedule.getName()));
}
Put schedulePut = new Put(scheduleKey);
schedulePut.add(SCHEDULE_COLUMN_BYTES, GSON.toJson(schedule));
schedulePut.add(UPDATED_COLUMN_BYTES, currentTime);
// initially suspended
schedulePut.add(STATUS_COLUMN_BYTES, ProgramScheduleStatus.SUSPENDED.toString());
store.put(schedulePut);
int count = 0;
for (String triggerKey : extractTriggerKeys(schedule)) {
byte[] triggerRowKey = rowKeyBytesForTrigger(scheduleKey, count++);
store.put(new Put(triggerRowKey, TRIGGER_KEY_COLUMN_BYTES, triggerKey));
}
}
return currentTime;
}
use of co.cask.cdap.api.dataset.table.Put in project cdap by caskdata.
the class ETLWorker method run.
@Override
public void run() {
final SourceState currentState = new SourceState();
final SourceState nextState = new SourceState();
final Map<String, List<Object>> dataToSink = new HashMap<>();
boolean hasData = false;
final Map<String, List<InvalidEntry>> transformIdToErrorRecords = intializeTransformIdToErrorsList();
final WorkerContext context = getContext();
Set<String> transformErrorsWithoutDataset = Sets.newHashSet();
// Fetch SourceState from State Table.
// Only required at the beginning since we persist the state if there is a change.
Transactionals.execute(context, new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
KeyValueTable stateTable = context.getDataset(ETLRealtimeApplication.STATE_TABLE);
byte[] stateBytes = stateTable.read(stateStoreKeyBytes);
if (stateBytes != null) {
SourceState state = GSON.fromJson(Bytes.toString(stateBytes), SourceState.class);
currentState.setState(state);
}
}
});
DefaultEmitter<Object> sourceEmitter = new DefaultEmitter<>();
TrackedEmitter<Object> trackedSourceEmitter = new TrackedEmitter<>(sourceEmitter, new DefaultStageMetrics(metrics, sourceStageName), TrackedTransform.RECORDS_OUT, context.getDataTracer(sourceStageName));
while (!stopped) {
// Invoke poll method of the source to fetch data
try {
SourceState newState = source.poll(trackedSourceEmitter, new SourceState(currentState));
if (newState != null) {
nextState.setState(newState);
}
} catch (Exception e) {
// Continue since the source threw an exception. No point in processing records and state is not changed.
LOG.warn("Exception thrown during polling of Source for data", e);
sourceEmitter.reset();
continue;
}
// to be persisted in the sink.
for (Object sourceData : sourceEmitter.getEntries()) {
try {
TransformResponse transformResponse = transformExecutor.runOneIteration(sourceData);
for (Map.Entry<String, Collection<Object>> transformedValues : transformResponse.getSinksResults().entrySet()) {
dataToSink.put(transformedValues.getKey(), new ArrayList<>());
Iterator emitterIterator = transformedValues.getValue().iterator();
while (emitterIterator.hasNext()) {
if (!hasData) {
hasData = true;
}
dataToSink.get(transformedValues.getKey()).add(emitterIterator.next());
}
}
for (Map.Entry<String, Collection<InvalidEntry<Object>>> transformErrorsEntry : transformResponse.getMapTransformIdToErrorEmitter().entrySet()) {
if (!transformErrorsWithoutDataset.contains(transformErrorsEntry.getKey())) {
if (!tranformIdToDatasetName.containsKey(transformErrorsEntry.getKey()) && !transformErrorsEntry.getValue().isEmpty()) {
transformErrorsWithoutDataset.add(transformErrorsEntry.getKey());
LOG.warn("Error records were emitted in transform {}, " + "but error dataset is not configured for this transform", transformErrorsEntry.getKey());
}
if (tranformIdToDatasetName.containsKey(transformErrorsEntry.getKey()) && !transformErrorsEntry.getValue().isEmpty()) {
// add the errors
if (!hasData && transformErrorsEntry.getValue().size() > 0) {
hasData = true;
}
transformIdToErrorRecords.get(transformErrorsEntry.getKey()).addAll(transformErrorsEntry.getValue());
}
}
}
} catch (Exception e) {
LOG.warn("Exception thrown while processing data {}", sourceData, e);
}
}
sourceEmitter.reset();
// Start a Transaction if there is data to persist or if the Source state has changed.
try {
if (hasData || (!nextState.equals(currentState))) {
getContext().execute(new TxRunnable() {
@Override
public void run(DatasetContext context) throws Exception {
// Invoke the sink's write method if there is any object to be written.
if (!dataToSink.isEmpty()) {
DefaultDataWriter defaultDataWriter = new DefaultDataWriter(getContext(), context);
for (Map.Entry<String, List<Object>> sinkEntry : dataToSink.entrySet()) {
sinks.get(sinkEntry.getKey()).write(sinkEntry.getValue(), defaultDataWriter);
}
}
for (Map.Entry<String, List<InvalidEntry>> errorRecordEntry : transformIdToErrorRecords.entrySet()) {
String transformId = errorRecordEntry.getKey();
final String datasetName = tranformIdToDatasetName.get(transformId);
Table errorTable = context.getDataset(datasetName);
long timeInMillis = System.currentTimeMillis();
byte[] currentTime = Bytes.toBytes(timeInMillis);
String transformIdentifier = appName + SEPARATOR + transformId;
for (InvalidEntry invalidEntry : errorRecordEntry.getValue()) {
// using random uuid as we want to write each record uniquely,
// but we are not concerned about the uuid while scanning later.
byte[] rowKey = Bytes.concat(currentTime, Bytes.toBytes(transformIdentifier), Bytes.toBytes(UUID.randomUUID()));
Put errorPut = constructErrorPut(rowKey, invalidEntry, timeInMillis);
errorTable.write(rowKey, errorPut);
}
}
// Persist nextState if it is different from currentState
if (!nextState.equals(currentState)) {
KeyValueTable stateTable = context.getDataset(ETLRealtimeApplication.STATE_TABLE);
stateTable.write(stateStoreKey, GSON.toJson(nextState));
}
// after running one iteration and succesfully writing to sinks and error datasets, reset the emitters.
transformExecutor.resetEmitter();
}
});
// Update the in-memory copy of the state only if the transaction succeeded.
currentState.setState(nextState);
}
} catch (Exception e) {
LOG.warn("Exception thrown during persisting of data", e);
} finally {
// Clear the persisted sink data (in case transaction failure occurred, we will poll the source with old state)
hasData = false;
dataToSink.clear();
for (List<InvalidEntry> invalidEntryList : transformIdToErrorRecords.values()) {
invalidEntryList.clear();
}
}
}
}
use of co.cask.cdap.api.dataset.table.Put in project cdap by caskdata.
the class ETLWorker method constructErrorPut.
private Put constructErrorPut(byte[] rowKey, InvalidEntry entry, long timeInMillis) throws IOException {
Put errorPut = new Put(rowKey);
errorPut.add(Constants.ErrorDataset.ERRCODE, entry.getErrorCode());
errorPut.add(Constants.ErrorDataset.TIMESTAMP, timeInMillis);
if (entry.getInvalidRecord() instanceof StructuredRecord) {
StructuredRecord record = (StructuredRecord) entry.getInvalidRecord();
errorPut.add(Constants.ErrorDataset.INVALIDENTRY, StructuredRecordStringConverter.toJsonString(record));
} else {
errorPut.add(Constants.ErrorDataset.INVALIDENTRY, String.format("Error Entry is of type %s, only records of type " + "co.cask.cdap.api.data.format.StructuredRecord " + "is supported currently", entry.getInvalidRecord().getClass().getName()));
}
return errorPut;
}
use of co.cask.cdap.api.dataset.table.Put in project cdap by caskdata.
the class HBaseConsumerStateStore method configureInstances.
@Override
public void configureInstances(long groupId, int instances) {
// Find the last barrier info to get the existing group config
List<QueueBarrier> queueBarriers = scanBarriers(groupId, new AllCollector<QueueBarrier>()).finish(new ArrayList<QueueBarrier>());
Preconditions.checkState(!queueBarriers.isEmpty(), "No queue configuration found for group %s", groupId);
QueueBarrier queueBarrier = queueBarriers.get(queueBarriers.size() - 1);
ConsumerGroupConfig oldGroupConfig = queueBarrier.getGroupConfig();
ConsumerGroupConfig groupConfig = new ConsumerGroupConfig(groupId, instances, oldGroupConfig.getDequeueStrategy(), oldGroupConfig.getHashKey());
byte[] startRow = QueueEntryRow.getQueueEntryRowKey(queueName, transaction.getWritePointer(), 0);
Put put = new Put(Bytes.add(queueName.toBytes(), startRow));
put.add(Bytes.toBytes(groupConfig.getGroupId()), GSON.toJson(groupConfig));
table.put(put);
// For instances that don't have start row, set the start row to barrier start row
// We fetches all instances here for cleanup of barrier info later.
Map<Integer, byte[]> startRows = fetchStartRows(groupId, Integer.MAX_VALUE);
for (int instanceId = 0; instanceId < instances; instanceId++) {
if (!startRows.containsKey(instanceId)) {
table.put(queueName.toBytes(), getConsumerStateColumn(groupId, instanceId), startRow);
}
}
// Remove barrier info that all instances has passed the start row it records
Deque<byte[]> deletes = Lists.newLinkedList();
for (QueueBarrier info : queueBarriers) {
boolean allPassed = true;
for (byte[] instanceStartRow : startRows.values()) {
if (Bytes.compareTo(instanceStartRow, info.getStartRow()) <= 0) {
allPassed = false;
break;
}
}
if (!allPassed) {
break;
}
deletes.add(Bytes.add(queueName.toBytes(), info.getStartRow()));
}
// Retain the last barrier info
if (deletes.size() > 1) {
deletes.removeLast();
byte[] column = Bytes.toBytes(groupId);
for (byte[] delete : deletes) {
table.delete(delete, column);
}
}
}
Aggregations