use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionedFileSetDataset method getPartitions.
private void getPartitions(@Nullable PartitionFilter filter, PartitionConsumer consumer, boolean decodeMetadata, @Nullable byte[] startKey, @Nullable byte[] endKey, long limit) {
long count = 0L;
try (Scanner scanner = partitionsTable.scan(startKey, endKey)) {
while (count < limit) {
Row row = scanner.next();
if (row == null) {
break;
}
PartitionKey key;
try {
key = parseRowKey(row.getRow(), partitioning);
} catch (IllegalArgumentException e) {
LOG.debug(String.format("Failed to parse row key for partitioned file set '%s': %s", getName(), Bytes.toStringBinary(row.getRow())));
continue;
}
if (filter != null && !filter.match(key)) {
continue;
}
byte[] pathBytes = row.get(RELATIVE_PATH);
if (pathBytes != null) {
consumer.consume(key, Bytes.toString(pathBytes), decodeMetadata ? metadataFromRow(row) : null);
}
count++;
}
if (count == 0) {
warnIfInvalidPartitionFilter(filter, partitioning);
}
}
}
use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionedFileSetDefinition method updateArgumentsIfNeeded.
// if the arguments do not contain an output location, generate one from the partition key (if present)
protected static Map<String, String> updateArgumentsIfNeeded(Map<String, String> arguments, Partitioning partitioning) {
if (FileSetArguments.getOutputPath(arguments) == null) {
PartitionKey key = PartitionedFileSetArguments.getOutputPartitionKey(arguments, partitioning);
// we need to copy the map, to avoid modifying the passed-in map
arguments = Maps.newHashMap(arguments);
if (key != null) {
FileSetArguments.setOutputPath(arguments, PartitionedFileSetDataset.getOutputPath(key, partitioning));
} else if (PartitionedFileSetArguments.getDynamicPartitioner(arguments) != null) {
// when using DynamicPartitioner, use the baseLocation of the fileSet as the output location
FileSetArguments.setBaseOutputPath(arguments);
}
}
return arguments;
}
use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionedFileSetDataset method onSuccess.
@Override
public void onSuccess() throws DataSetException {
String outputPath = FileSetArguments.getOutputPath(runtimeArguments);
// Either way, we can't do much here.
if (outputPath == null) {
return;
}
// its possible that there is no output key, if using the DynamicPartitioner, in which case
// DynamicPartitioningOutputFormat is responsible for registering the partitions and the metadata
PartitionKey outputKey = PartitionedFileSetArguments.getOutputPartitionKey(runtimeArguments, getPartitioning());
if (outputKey != null) {
Map<String, String> metadata = PartitionedFileSetArguments.getOutputPartitionMetadata(runtimeArguments);
addPartition(outputKey, outputPath, metadata, true, false);
}
// currently, FileSetDataset#onSuccess is a no-op, but call it, in case it does something in the future
((FileSetDataset) files).onSuccess();
}
use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionedFileSetDataset method getOutputFormatConfiguration.
@Override
public Map<String, String> getOutputFormatConfiguration() {
checkNotExternal();
// copy the output properties of the embedded file set to the output arguments
Map<String, String> outputArgs = new HashMap<>(files.getOutputFormatConfiguration());
// we set the file set's output path in the definition's getDataset(), so there is no need to configure it again.
// here we just want to validate that an output partition key or dynamic partitioner was specified in the arguments.
PartitionKey outputKey = PartitionedFileSetArguments.getOutputPartitionKey(runtimeArguments, getPartitioning());
if (outputKey == null) {
String dynamicPartitionerClassName = PartitionedFileSetArguments.getDynamicPartitioner(runtimeArguments);
if (dynamicPartitionerClassName == null) {
throw new DataSetException("Either a Partition key or a DynamicPartitioner class must be given as a runtime argument.");
}
copyDynamicPartitionerArguments(runtimeArguments, outputArgs);
outputArgs.put(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_FORMAT_CLASS_NAME, files.getOutputFormatClassName());
outputArgs.put(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET, getName());
} else {
assertNotExists(outputKey, true);
}
return ImmutableMap.copyOf(outputArgs);
}
use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class TimePartitionedFileSetDataset method getPartitionOutput.
@Override
public TimePartitionOutput getPartitionOutput(long time) {
if (isExternal) {
throw new UnsupportedOperationException("Output is not supported for external time-partitioned file set '" + spec.getName() + "'");
}
PartitionKey key = partitionKeyForTime(time);
assertNotExists(key, true);
return new BasicTimePartitionOutput(this, getOutputPath(key), key);
}
Aggregations