Search in sources :

Example 6 with FieldType

use of io.cdap.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by cdapio.

the class PartitionedFileSetDataset method parseRowKey.

@VisibleForTesting
static PartitionKey parseRowKey(byte[] rowKey, Partitioning partitioning) {
    PartitionKey.Builder builder = PartitionKey.builder();
    int offset = 0;
    boolean first = true;
    for (Map.Entry<String, FieldType> entry : partitioning.getFields().entrySet()) {
        String fieldName = entry.getKey();
        FieldType fieldType = entry.getValue();
        if (!first) {
            if (offset >= rowKey.length) {
                throw new IllegalArgumentException(String.format("Invalid row key: Expecting field '%s' at offset %d " + "but the end of the row key is reached.", fieldName, offset));
            }
            if (rowKey[offset] != 0) {
                throw new IllegalArgumentException(String.format("Invalid row key: Expecting field separator \\0 before field '%s' at offset %d " + "but found byte value %x.", fieldName, offset, rowKey[offset]));
            }
            offset++;
        }
        first = false;
        int size = FieldTypes.determineLengthInBytes(rowKey, offset, fieldType);
        if (size + offset > rowKey.length) {
            throw new IllegalArgumentException(String.format("Invalid row key: Expecting field '%s' of type %s, " + "requiring %d bytes at offset %d, but only %d bytes remain.", fieldName, fieldType.name(), size, offset, rowKey.length - offset));
        }
        Comparable fieldValue = FieldTypes.fromBytes(rowKey, offset, size, fieldType);
        offset += size;
        builder.addField(fieldName, fieldValue);
    }
    if (offset != rowKey.length) {
        throw new IllegalArgumentException(String.format("Invalid row key: Read all fields at offset %d but %d extra bytes remain.", offset, rowKey.length - offset));
    }
    return builder.build();
}
Also used : PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) FieldType(io.cdap.cdap.api.dataset.lib.Partitioning.FieldType) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 7 with FieldType

use of io.cdap.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by cdapio.

the class PartitionedFileSetArguments method getOutputPartitionKey.

/**
 * @return the partition key of the output partition to be written; or null if no partition key was found
 *
 * @param arguments the runtime arguments for a partitioned dataset
 * @param partitioning the declared partitioning for the dataset, needed for proper interpretation of values
 */
@Nullable
public static PartitionKey getOutputPartitionKey(Map<String, String> arguments, Partitioning partitioning) {
    // extract the arguments that describe the output partition key
    Map<String, String> keyArguments = FileSetProperties.propertiesWithPrefix(arguments, OUTPUT_PARTITION_KEY_PREFIX);
    if (keyArguments.isEmpty()) {
        // there is no output partition key
        return null;
    }
    // there is a partition key; now it is required to match the partitioning
    PartitionKey.Builder builder = PartitionKey.builder();
    for (Map.Entry<String, FieldType> entry : partitioning.getFields().entrySet()) {
        String fieldName = entry.getKey();
        FieldType fieldType = entry.getValue();
        String stringValue = keyArguments.get(fieldName);
        Comparable fieldValue = convertFieldValue("key", "value", fieldName, fieldType, stringValue, false);
        builder.addField(fieldName, fieldValue);
    }
    return builder.build();
}
Also used : Map(java.util.Map) FieldType(io.cdap.cdap.api.dataset.lib.Partitioning.FieldType) Nullable(javax.annotation.Nullable)

Example 8 with FieldType

use of io.cdap.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by caskdata.

the class PartitionedFileSetDataset method generateStopKey.

private byte[] generateStopKey(PartitionFilter filter) {
    if (null == filter) {
        return null;
    }
    // validate partition filter, convert values, and compute size of output
    Map<String, FieldType> partitionFields = partitioning.getFields();
    int totalSize = 0;
    boolean allSingleValue = true;
    ArrayList<byte[]> values = Lists.newArrayListWithCapacity(partitionFields.size());
    for (Map.Entry<String, FieldType> entry : partitionFields.entrySet()) {
        String fieldName = entry.getKey();
        FieldType fieldType = entry.getValue();
        PartitionFilter.Condition<? extends Comparable> condition = filter.getCondition(fieldName);
        if (condition == null) {
            // this field is not present; we can't include any more fields in the stop key
            break;
        }
        Comparable upperValue = condition.getUpper();
        if (upperValue == null) {
            // this field is not present; we can't include any more fields in the stop key
            break;
        }
        try {
            fieldType.validate(upperValue);
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException(String.format("Invalid partition filter: Upper bound for field '%s' is incompatible with the partitioning: %s", fieldName, e.getMessage()));
        }
        byte[] bytes = FieldTypes.toBytes(upperValue, fieldType);
        totalSize += bytes.length;
        values.add(bytes);
        if (!condition.isSingleValue()) {
            allSingleValue = false;
            // upper bound for this field, following fields don't matter
            break;
        }
    }
    if (values.isEmpty()) {
        return null;
    }
    // one \0 between each of the fields
    totalSize += values.size() - 1;
    if (allSingleValue) {
        // in this case the start and stop key are equal, we append one \1 to ensure the scan is not empty
        totalSize++;
    }
    byte[] stopKey = new byte[totalSize];
    int offset = 0;
    for (byte[] bytes : values) {
        System.arraycopy(bytes, 0, stopKey, offset, bytes.length);
        // this leaves a \0 byte after the value
        offset += bytes.length + 1;
        if (allSingleValue && offset == stopKey.length) {
            // see above - we \1 instead of \0 at the end, to make sure scan is not empty
            stopKey[offset - 1] = 1;
        }
    }
    return stopKey;
}
Also used : FieldType(io.cdap.cdap.api.dataset.lib.Partitioning.FieldType) PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 9 with FieldType

use of io.cdap.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by caskdata.

the class PartitionedFileSetDataset method generateStartKey.

private byte[] generateStartKey(PartitionFilter filter) {
    if (null == filter) {
        return null;
    }
    // validate partition filter, convert values, and compute size of output
    Map<String, FieldType> partitionFields = partitioning.getFields();
    int totalSize = 0;
    ArrayList<byte[]> values = Lists.newArrayListWithCapacity(partitionFields.size());
    for (Map.Entry<String, FieldType> entry : partitionFields.entrySet()) {
        String fieldName = entry.getKey();
        FieldType fieldType = entry.getValue();
        PartitionFilter.Condition<? extends Comparable> condition = filter.getCondition(fieldName);
        if (condition == null) {
            // this field is not present; we can't include any more fields in the start key
            break;
        }
        Comparable lowerValue = condition.getLower();
        if (lowerValue == null) {
            // this field has no lower bound; we can't include any more fields in the start key
            break;
        }
        try {
            fieldType.validate(lowerValue);
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException(String.format("Invalid partition filter: Lower bound for field '%s' is incompatible with the partitioning: %s", fieldName, e.getMessage()));
        }
        byte[] bytes = FieldTypes.toBytes(lowerValue, fieldType);
        totalSize += bytes.length;
        values.add(bytes);
    }
    if (values.isEmpty()) {
        return null;
    }
    // one \0 between each of the fields
    totalSize += values.size() - 1;
    byte[] startKey = new byte[totalSize];
    int offset = 0;
    for (byte[] bytes : values) {
        System.arraycopy(bytes, 0, startKey, offset, bytes.length);
        // this leaves a \0 byte after the value
        offset += bytes.length + 1;
    }
    return startKey;
}
Also used : PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) FieldType(io.cdap.cdap.api.dataset.lib.Partitioning.FieldType)

Example 10 with FieldType

use of io.cdap.cdap.api.dataset.lib.Partitioning.FieldType in project cdap by caskdata.

the class PartitionedFileSetDataset method validatePartitionKey.

/**
 * Validates the partition key against the partitioning.
 */
private static void validatePartitionKey(PartitionKey key, Partitioning partitioning) {
    if (!partitioning.getFields().keySet().equals(key.getFields().keySet())) {
        throw new IllegalArgumentException(String.format("Partition key is invalid: It contains fields %s, but the partitioning requires %s", key.getFields().keySet(), partitioning.getFields().keySet()));
    }
    for (Map.Entry<String, FieldType> entry : partitioning.getFields().entrySet()) {
        String fieldName = entry.getKey();
        FieldType fieldType = entry.getValue();
        Comparable fieldValue = key.getField(fieldName);
        if (fieldValue == null) {
            throw new IllegalArgumentException(String.format("Incomplete partition key: value for field '%s' is missing", fieldName));
        }
        try {
            fieldType.validate(fieldValue);
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException(String.format("Invalid partition key: Value for field '%s' is incompatible with the partitioning: %s", fieldName, e.getMessage()));
        }
    }
}
Also used : Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) FieldType(io.cdap.cdap.api.dataset.lib.Partitioning.FieldType)

Aggregations

FieldType (io.cdap.cdap.api.dataset.lib.Partitioning.FieldType)12 Map (java.util.Map)12 ImmutableMap (com.google.common.collect.ImmutableMap)10 HashMap (java.util.HashMap)10 VisibleForTesting (com.google.common.annotations.VisibleForTesting)4 PartitionFilter (io.cdap.cdap.api.dataset.lib.PartitionFilter)4 PartitionKey (io.cdap.cdap.api.dataset.lib.PartitionKey)2 Nullable (javax.annotation.Nullable)2