use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class ConcurrentPartitionConsumer method abort.
/**
* Resets the process state of the given partition keys, as they were not successfully processed, or discards the
* partition if it has already been attempted the configured number of attempts.
*/
protected void abort(ConsumerWorkingSet workingSet, List<? extends PartitionKey> partitionKeys) {
List<PartitionKey> discardedPartitions = new ArrayList<>();
for (PartitionKey key : partitionKeys) {
ConsumablePartition consumablePartition = workingSet.lookup(key);
assertInProgress(consumablePartition);
// either reset its processState, or remove it from the workingSet, depending on how many tries it already has
if (consumablePartition.getNumFailures() < getConfiguration().getMaxRetries()) {
consumablePartition.retry();
} else {
discardedPartitions.add(key);
workingSet.lookup(key).discard();
}
}
if (!discardedPartitions.isEmpty()) {
LOG.warn("Discarded keys due to being retried {} times: {}", getConfiguration().getMaxRetries(), discardedPartitions);
}
}
use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class MultiWriter method write.
@Override
public void write(K key, V value) throws IOException, InterruptedException {
PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value);
RecordWriter<K, V> rw = this.recordWriters.get(partitionKey);
if (rw == null) {
// if we don't have the record writer yet for the final path, create one and add it to the cache
TaskAttemptContext taskAttemptContext = getKeySpecificContext(partitionKey);
rw = getBaseRecordWriter(taskAttemptContext);
this.recordWriters.put(partitionKey, rw);
this.contexts.put(partitionKey, taskAttemptContext);
}
rw.write(key, value);
}
use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class MultiWriter method close.
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
try {
Map<PartitionKey, RecordWriter<?, ?>> recordWriters = new HashMap<>();
recordWriters.putAll(this.recordWriters);
MultipleOutputs.closeRecordWriters(recordWriters, contexts);
taskContext.flushOperations();
} catch (Exception e) {
throw new IOException(e);
} finally {
dynamicPartitioner.destroy();
}
}
use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class SingleWriter method write.
@Override
public void write(K key, V value) throws IOException, InterruptedException {
PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value);
if (!partitionKey.equals(currPartitionKey)) {
// make sure we haven't written to this partition previously
if (closedKeys.contains(partitionKey)) {
throw new IllegalStateException(String.format("Encountered a partition key for which the writer has already been closed: '%s'.", partitionKey));
}
// currPartitionKey can be null for the first key value pair, in which case there's no writer to close
if (currPartitionKey != null) {
// close the existing RecordWriter and create a new one for the new PartitionKEy
currRecordWriter.close(currContext);
closedKeys.add(currPartitionKey);
}
currPartitionKey = partitionKey;
currContext = getKeySpecificContext(currPartitionKey);
currRecordWriter = getBaseRecordWriter(currContext);
}
currRecordWriter.write(key, value);
}
use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionConsumerTest method testNumRetries.
@Test
public void testNumRetries() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
final int numRetries = 1;
ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxRetries(numRetries).build();
final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), configuration);
final PartitionKey partitionKey = generateUniqueKey();
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.getPartitionOutput(partitionKey).addPartition();
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// from the working set
for (int i = 0; i < numRetries + 1; i++) {
List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions();
Assert.assertEquals(1, partitionDetails.size());
Assert.assertEquals(partitionKey, partitionDetails.get(0).getPartitionKey());
// aborting the processing of the partition
partitionConsumer.onFinish(partitionDetails, false);
}
// after the 2nd abort, the partition is discarded entirely, and so no partitions are available for consuming
PartitionConsumerResult result = partitionConsumer.consumePartitions(1);
Assert.assertEquals(0, result.getPartitions().size());
Assert.assertEquals(1, result.getFailedPartitions().size());
Assert.assertEquals(partitionKey, result.getFailedPartitions().get(0).getPartitionKey());
}
});
}
Aggregations