use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class MultiWriter method write.
public void write(K key, V value) throws IOException, InterruptedException {
PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value);
RecordWriter<K, V> rw = this.recordWriters.get(partitionKey);
if (rw == null) {
// if we don't have the record writer yet for the final path, create one and add it to the cache
TaskAttemptContext taskAttemptContext = getKeySpecificContext(partitionKey);
rw = getBaseRecordWriter(taskAttemptContext);
this.recordWriters.put(partitionKey, rw);
this.contexts.put(partitionKey, taskAttemptContext);
}
rw.write(key, value);
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class MultiWriter method close.
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
try {
Map<PartitionKey, RecordWriter<?, ?>> recordWriters = new HashMap<>();
recordWriters.putAll(this.recordWriters);
MultipleOutputs.closeRecordWriters(recordWriters, contexts);
taskContext.flushOperations();
} catch (Exception e) {
throw new IOException(e);
} finally {
dynamicPartitioner.destroy();
}
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class ConnectorSink method prepareRun.
@Override
public void prepareRun(BatchSinkContext context) throws Exception {
Map<String, String> arguments = new HashMap<>();
PartitionKey outputPartition = PartitionKey.builder().addStringField("phase", phaseName).build();
PartitionedFileSetArguments.setOutputPartitionKey(arguments, outputPartition);
context.addOutput(datasetName, arguments);
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class TimePartitionedFileSetDataset method partitionFiltersForTimeRange.
// returns a list of partition filters that cover that specified time range.
// this may return a list with a single null filter (in case the range is unbounded in both directions)
@VisibleForTesting
static List<PartitionFilter> partitionFiltersForTimeRange(long startTime, long endTime) {
// unsatisfiable range
if (startTime >= endTime) {
return Collections.emptyList();
}
PartitionKey keyLower = startTime <= 0 ? null : partitionKeyForTime(startTime);
PartitionKey keyUpper = endTime == Long.MAX_VALUE ? null : partitionKeyForTime(endTime);
// no bounds -> no filter
if (keyLower == null && keyUpper == null) {
// no filter needed to select all time
return Collections.singletonList(null);
}
List<PartitionFilter> filters = Lists.newArrayList();
String[] allFields = PARTITIONING.getFields().keySet().toArray(new String[PARTITIONING.getFields().size()]);
// if there is no lower bound, we only need the filters for the upper bound
if (keyLower == null) {
addUpperFilters(allFields, 0, keyUpper, filters, initialSupplier());
return filters;
}
// if there is no upper bound, we only need the filters for the lower bound
if (keyUpper == null) {
addLowerFilters(allFields, 0, keyLower, filters, initialSupplier());
return filters;
}
return filtersFor(allFields, 0, keyLower, keyUpper, filters, initialSupplier());
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionedFileSetDataset method getOutputFormatConfiguration.
@Override
public Map<String, String> getOutputFormatConfiguration() {
if (isExternal) {
throw new UnsupportedOperationException("Output is not supported for external partitioned file set '" + spec.getName() + "'");
}
// copy the output properties of the embedded file set to the output arguments
Map<String, String> outputArgs = new HashMap<>(files.getOutputFormatConfiguration());
// we set the file set's output path in the definition's getDataset(), so there is no need to configure it again.
// here we just want to validate that an output partition key or dynamic partitioner was specified in the arguments.
PartitionKey outputKey = PartitionedFileSetArguments.getOutputPartitionKey(runtimeArguments, getPartitioning());
if (outputKey == null) {
String dynamicPartitionerClassName = PartitionedFileSetArguments.getDynamicPartitioner(runtimeArguments);
if (dynamicPartitionerClassName == null) {
throw new DataSetException("Either a Partition key or a DynamicPartitioner class must be given as a runtime argument.");
}
// propagate output metadata into OutputFormatConfiguration so DynamicPartitionerOutputCommitter can assign
// the metadata when it creates the partitions
Map<String, String> outputMetadata = PartitionedFileSetArguments.getOutputPartitionMetadata(runtimeArguments);
PartitionedFileSetArguments.setOutputPartitionMetadata(outputArgs, outputMetadata);
PartitionedFileSetArguments.setDynamicPartitioner(outputArgs, dynamicPartitionerClassName);
PartitionedFileSetArguments.setDynamicPartitionerConcurrency(outputArgs, PartitionedFileSetArguments.isDynamicPartitionerConcurrencyAllowed(runtimeArguments));
outputArgs.put(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_FORMAT_CLASS_NAME, files.getOutputFormatClassName());
outputArgs.put(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET, getName());
}
return ImmutableMap.copyOf(outputArgs);
}
Aggregations