Search in sources :

Example 1 with FieldType

use of co.cask.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by caskdata.

the class PartitionedFileSetDataset method validatePartitionKey.

/**
   * Validates the partition key against the partitioning.
   */
private static void validatePartitionKey(PartitionKey key, Partitioning partitioning) {
    if (!partitioning.getFields().keySet().equals(key.getFields().keySet())) {
        throw new IllegalArgumentException(String.format("Partition key is invalid: It contains fields %s, but the partitioning requires %s", key.getFields().keySet(), partitioning.getFields().keySet()));
    }
    for (Map.Entry<String, FieldType> entry : partitioning.getFields().entrySet()) {
        String fieldName = entry.getKey();
        FieldType fieldType = entry.getValue();
        Comparable fieldValue = key.getField(fieldName);
        if (fieldValue == null) {
            throw new IllegalArgumentException(String.format("Incomplete partition key: value for field '%s' is missing", fieldName));
        }
        try {
            fieldType.validate(fieldValue);
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException(String.format("Invalid partition key: Value for field '%s' is incompatible with the partitioning: %s", fieldName, e.getMessage()));
        }
    }
}
Also used : Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) FieldType(co.cask.cdap.api.dataset.lib.Partitioning.FieldType)

Example 2 with FieldType

use of co.cask.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by caskdata.

the class PartitionedFileSetArguments method getOutputPartitionKey.

/**
   * @return the partition key of the output partition to be written; or null if no partition key was found
   *
   * @param arguments the runtime arguments for a partitioned dataset
   * @param partitioning the declared partitioning for the dataset, needed for proper interpretation of values
   */
@Nullable
public static PartitionKey getOutputPartitionKey(Map<String, String> arguments, Partitioning partitioning) {
    // extract the arguments that describe the output partition key
    Map<String, String> keyArguments = FileSetProperties.propertiesWithPrefix(arguments, OUTPUT_PARTITION_KEY_PREFIX);
    if (keyArguments.isEmpty()) {
        // there is no output partition key
        return null;
    }
    // there is a partition key; now it is required to match the partitioning
    PartitionKey.Builder builder = PartitionKey.builder();
    for (Map.Entry<String, FieldType> entry : partitioning.getFields().entrySet()) {
        String fieldName = entry.getKey();
        FieldType fieldType = entry.getValue();
        String stringValue = keyArguments.get(fieldName);
        Comparable fieldValue = convertFieldValue("key", "value", fieldName, fieldType, stringValue, false);
        builder.addField(fieldName, fieldValue);
    }
    return builder.build();
}
Also used : Map(java.util.Map) FieldType(co.cask.cdap.api.dataset.lib.Partitioning.FieldType) Nullable(javax.annotation.Nullable)

Example 3 with FieldType

use of co.cask.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by caskdata.

the class PartitionedFileSetDataset method generateRowKey.

/**
   * Validates the partition key against the partitioning and gererates the row key for that partition key.
   */
@VisibleForTesting
static byte[] generateRowKey(PartitionKey key, Partitioning partitioning) {
    validatePartitionKey(key, partitioning);
    // validate partition key, convert values, and compute size of output
    Map<String, FieldType> partitionFields = partitioning.getFields();
    // one \0 between each of the fields
    int totalSize = partitionFields.size() - 1;
    ArrayList<byte[]> values = Lists.newArrayListWithCapacity(partitionFields.size());
    for (Map.Entry<String, FieldType> entry : partitionFields.entrySet()) {
        String fieldName = entry.getKey();
        FieldType fieldType = entry.getValue();
        Comparable fieldValue = key.getField(fieldName);
        byte[] bytes = FieldTypes.toBytes(fieldValue, fieldType);
        totalSize += bytes.length;
        values.add(bytes);
    }
    byte[] rowKey = new byte[totalSize];
    int offset = 0;
    for (byte[] bytes : values) {
        System.arraycopy(bytes, 0, rowKey, offset, bytes.length);
        // this leaves a \0 byte after the value
        offset += bytes.length + 1;
    }
    return rowKey;
}
Also used : Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) FieldType(co.cask.cdap.api.dataset.lib.Partitioning.FieldType) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 4 with FieldType

use of co.cask.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by caskdata.

the class PartitionedFileSetDataset method parseRowKey.

@VisibleForTesting
static PartitionKey parseRowKey(byte[] rowKey, Partitioning partitioning) {
    PartitionKey.Builder builder = PartitionKey.builder();
    int offset = 0;
    boolean first = true;
    for (Map.Entry<String, FieldType> entry : partitioning.getFields().entrySet()) {
        String fieldName = entry.getKey();
        FieldType fieldType = entry.getValue();
        if (!first) {
            if (offset >= rowKey.length) {
                throw new IllegalArgumentException(String.format("Invalid row key: Expecting field '%s' at offset %d " + "but the end of the row key is reached.", fieldName, offset));
            }
            if (rowKey[offset] != 0) {
                throw new IllegalArgumentException(String.format("Invalid row key: Expecting field separator \\0 before field '%s' at offset %d " + "but found byte value %x.", fieldName, offset, rowKey[offset]));
            }
            offset++;
        }
        first = false;
        int size = FieldTypes.determineLengthInBytes(rowKey, offset, fieldType);
        if (size + offset > rowKey.length) {
            throw new IllegalArgumentException(String.format("Invalid row key: Expecting field '%s' of type %s, " + "requiring %d bytes at offset %d, but only %d bytes remain.", fieldName, fieldType.name(), size, offset, rowKey.length - offset));
        }
        Comparable fieldValue = FieldTypes.fromBytes(rowKey, offset, size, fieldType);
        offset += size;
        builder.addField(fieldName, fieldValue);
    }
    if (offset != rowKey.length) {
        throw new IllegalArgumentException(String.format("Invalid row key: Read all fields at offset %d but %d extra bytes remain.", offset, rowKey.length - offset));
    }
    return builder.build();
}
Also used : PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) FieldType(co.cask.cdap.api.dataset.lib.Partitioning.FieldType) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 5 with FieldType

use of co.cask.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by caskdata.

the class PartitionedFileSetDataset method generateStartKey.

private byte[] generateStartKey(PartitionFilter filter) {
    if (null == filter) {
        return null;
    }
    // validate partition filter, convert values, and compute size of output
    Map<String, FieldType> partitionFields = partitioning.getFields();
    int totalSize = 0;
    ArrayList<byte[]> values = Lists.newArrayListWithCapacity(partitionFields.size());
    for (Map.Entry<String, FieldType> entry : partitionFields.entrySet()) {
        String fieldName = entry.getKey();
        FieldType fieldType = entry.getValue();
        PartitionFilter.Condition<? extends Comparable> condition = filter.getCondition(fieldName);
        if (condition == null) {
            // this field is not present; we can't include any more fields in the start key
            break;
        }
        Comparable lowerValue = condition.getLower();
        if (lowerValue == null) {
            // this field has no lower bound; we can't include any more fields in the start key
            break;
        }
        try {
            fieldType.validate(lowerValue);
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException(String.format("Invalid partition filter: Lower bound for field '%s' is incompatible with the partitioning: %s", fieldName, e.getMessage()));
        }
        byte[] bytes = FieldTypes.toBytes(lowerValue, fieldType);
        totalSize += bytes.length;
        values.add(bytes);
    }
    if (values.isEmpty()) {
        return null;
    }
    // one \0 between each of the fields
    totalSize += values.size() - 1;
    byte[] startKey = new byte[totalSize];
    int offset = 0;
    for (byte[] bytes : values) {
        System.arraycopy(bytes, 0, startKey, offset, bytes.length);
        // this leaves a \0 byte after the value
        offset += bytes.length + 1;
    }
    return startKey;
}
Also used : PartitionFilter(co.cask.cdap.api.dataset.lib.PartitionFilter) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) FieldType(co.cask.cdap.api.dataset.lib.Partitioning.FieldType)

Aggregations

FieldType (co.cask.cdap.api.dataset.lib.Partitioning.FieldType)6 Map (java.util.Map)6 ImmutableMap (com.google.common.collect.ImmutableMap)5 HashMap (java.util.HashMap)5 PartitionFilter (co.cask.cdap.api.dataset.lib.PartitionFilter)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)1 Nullable (javax.annotation.Nullable)1