use of co.cask.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by caskdata.
the class PartitionedFileSetDataset method validatePartitionKey.
/**
* Validates the partition key against the partitioning.
*/
private static void validatePartitionKey(PartitionKey key, Partitioning partitioning) {
if (!partitioning.getFields().keySet().equals(key.getFields().keySet())) {
throw new IllegalArgumentException(String.format("Partition key is invalid: It contains fields %s, but the partitioning requires %s", key.getFields().keySet(), partitioning.getFields().keySet()));
}
for (Map.Entry<String, FieldType> entry : partitioning.getFields().entrySet()) {
String fieldName = entry.getKey();
FieldType fieldType = entry.getValue();
Comparable fieldValue = key.getField(fieldName);
if (fieldValue == null) {
throw new IllegalArgumentException(String.format("Incomplete partition key: value for field '%s' is missing", fieldName));
}
try {
fieldType.validate(fieldValue);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(String.format("Invalid partition key: Value for field '%s' is incompatible with the partitioning: %s", fieldName, e.getMessage()));
}
}
}
use of co.cask.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by caskdata.
the class PartitionedFileSetArguments method getOutputPartitionKey.
/**
* @return the partition key of the output partition to be written; or null if no partition key was found
*
* @param arguments the runtime arguments for a partitioned dataset
* @param partitioning the declared partitioning for the dataset, needed for proper interpretation of values
*/
@Nullable
public static PartitionKey getOutputPartitionKey(Map<String, String> arguments, Partitioning partitioning) {
// extract the arguments that describe the output partition key
Map<String, String> keyArguments = FileSetProperties.propertiesWithPrefix(arguments, OUTPUT_PARTITION_KEY_PREFIX);
if (keyArguments.isEmpty()) {
// there is no output partition key
return null;
}
// there is a partition key; now it is required to match the partitioning
PartitionKey.Builder builder = PartitionKey.builder();
for (Map.Entry<String, FieldType> entry : partitioning.getFields().entrySet()) {
String fieldName = entry.getKey();
FieldType fieldType = entry.getValue();
String stringValue = keyArguments.get(fieldName);
Comparable fieldValue = convertFieldValue("key", "value", fieldName, fieldType, stringValue, false);
builder.addField(fieldName, fieldValue);
}
return builder.build();
}
use of co.cask.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by caskdata.
the class PartitionedFileSetDataset method generateRowKey.
/**
* Validates the partition key against the partitioning and gererates the row key for that partition key.
*/
@VisibleForTesting
static byte[] generateRowKey(PartitionKey key, Partitioning partitioning) {
validatePartitionKey(key, partitioning);
// validate partition key, convert values, and compute size of output
Map<String, FieldType> partitionFields = partitioning.getFields();
// one \0 between each of the fields
int totalSize = partitionFields.size() - 1;
ArrayList<byte[]> values = Lists.newArrayListWithCapacity(partitionFields.size());
for (Map.Entry<String, FieldType> entry : partitionFields.entrySet()) {
String fieldName = entry.getKey();
FieldType fieldType = entry.getValue();
Comparable fieldValue = key.getField(fieldName);
byte[] bytes = FieldTypes.toBytes(fieldValue, fieldType);
totalSize += bytes.length;
values.add(bytes);
}
byte[] rowKey = new byte[totalSize];
int offset = 0;
for (byte[] bytes : values) {
System.arraycopy(bytes, 0, rowKey, offset, bytes.length);
// this leaves a \0 byte after the value
offset += bytes.length + 1;
}
return rowKey;
}
use of co.cask.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by caskdata.
the class PartitionedFileSetDataset method parseRowKey.
@VisibleForTesting
static PartitionKey parseRowKey(byte[] rowKey, Partitioning partitioning) {
PartitionKey.Builder builder = PartitionKey.builder();
int offset = 0;
boolean first = true;
for (Map.Entry<String, FieldType> entry : partitioning.getFields().entrySet()) {
String fieldName = entry.getKey();
FieldType fieldType = entry.getValue();
if (!first) {
if (offset >= rowKey.length) {
throw new IllegalArgumentException(String.format("Invalid row key: Expecting field '%s' at offset %d " + "but the end of the row key is reached.", fieldName, offset));
}
if (rowKey[offset] != 0) {
throw new IllegalArgumentException(String.format("Invalid row key: Expecting field separator \\0 before field '%s' at offset %d " + "but found byte value %x.", fieldName, offset, rowKey[offset]));
}
offset++;
}
first = false;
int size = FieldTypes.determineLengthInBytes(rowKey, offset, fieldType);
if (size + offset > rowKey.length) {
throw new IllegalArgumentException(String.format("Invalid row key: Expecting field '%s' of type %s, " + "requiring %d bytes at offset %d, but only %d bytes remain.", fieldName, fieldType.name(), size, offset, rowKey.length - offset));
}
Comparable fieldValue = FieldTypes.fromBytes(rowKey, offset, size, fieldType);
offset += size;
builder.addField(fieldName, fieldValue);
}
if (offset != rowKey.length) {
throw new IllegalArgumentException(String.format("Invalid row key: Read all fields at offset %d but %d extra bytes remain.", offset, rowKey.length - offset));
}
return builder.build();
}
use of co.cask.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by caskdata.
the class PartitionedFileSetDataset method generateStartKey.
private byte[] generateStartKey(PartitionFilter filter) {
if (null == filter) {
return null;
}
// validate partition filter, convert values, and compute size of output
Map<String, FieldType> partitionFields = partitioning.getFields();
int totalSize = 0;
ArrayList<byte[]> values = Lists.newArrayListWithCapacity(partitionFields.size());
for (Map.Entry<String, FieldType> entry : partitionFields.entrySet()) {
String fieldName = entry.getKey();
FieldType fieldType = entry.getValue();
PartitionFilter.Condition<? extends Comparable> condition = filter.getCondition(fieldName);
if (condition == null) {
// this field is not present; we can't include any more fields in the start key
break;
}
Comparable lowerValue = condition.getLower();
if (lowerValue == null) {
// this field has no lower bound; we can't include any more fields in the start key
break;
}
try {
fieldType.validate(lowerValue);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(String.format("Invalid partition filter: Lower bound for field '%s' is incompatible with the partitioning: %s", fieldName, e.getMessage()));
}
byte[] bytes = FieldTypes.toBytes(lowerValue, fieldType);
totalSize += bytes.length;
values.add(bytes);
}
if (values.isEmpty()) {
return null;
}
// one \0 between each of the fields
totalSize += values.size() - 1;
byte[] startKey = new byte[totalSize];
int offset = 0;
for (byte[] bytes : values) {
System.arraycopy(bytes, 0, startKey, offset, bytes.length);
// this leaves a \0 byte after the value
offset += bytes.length + 1;
}
return startKey;
}
Aggregations