Search in sources :

Example 6 with PartitionFilter

use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project cdap by caskdata.

the class PartitionFilterTest method testIncompatibleMatch.

@Test(expected = IllegalArgumentException.class)
public void testIncompatibleMatch() {
    PartitionFilter filter = PartitionFilter.builder().addValueCondition("year", 2012).addRangeCondition("month", 4, 7).addValueCondition("market", "asia").build();
    // field of incompatible type
    filter.match(PartitionKey.builder().addField("month", "january").addField("market", "latin").addField("year", 2012).build());
}
Also used : PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) Test(org.junit.Test)

Example 7 with PartitionFilter

use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project cdap by caskdata.

the class PartitionedFileSetArgumentsTest method testSetGetInputPartitionFilter.

@Test
public void testSetGetInputPartitionFilter() throws Exception {
    Map<String, String> arguments = new HashMap<>();
    PartitionFilter filter = PartitionFilter.builder().addRangeCondition("i", 30, 40).addValueCondition("l", 17L).addValueCondition("s", "x").build();
    PartitionedFileSetArguments.setInputPartitionFilter(arguments, filter);
    Assert.assertEquals(filter, PartitionedFileSetArguments.getInputPartitionFilter(arguments));
    arguments = new HashMap<>();
    filter = PartitionFilter.builder().addRangeCondition("i", 30, 40).addValueCondition("s", "x").build();
    PartitionedFileSetArguments.setInputPartitionFilter(arguments, filter);
    Assert.assertEquals(filter, PartitionedFileSetArguments.getInputPartitionFilter(arguments));
    arguments = new HashMap<>();
    filter = PartitionFilter.ALWAYS_MATCH;
    PartitionedFileSetArguments.setInputPartitionFilter(arguments, filter);
    Assert.assertEquals(filter, PartitionedFileSetArguments.getInputPartitionFilter(arguments));
}
Also used : PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 8 with PartitionFilter

use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project cdap by cdapio.

the class TimePartitionedFileSetDataset method partitionFiltersForTimeRange.

// returns a list of partition filters that cover that specified time range.
// this may return a list with a single null filter (in case the range is unbounded in both directions)
@VisibleForTesting
static List<PartitionFilter> partitionFiltersForTimeRange(long startTime, long endTime) {
    // unsatisfiable range
    if (startTime >= endTime) {
        return Collections.emptyList();
    }
    PartitionKey keyLower = startTime <= 0 ? null : partitionKeyForTime(startTime);
    PartitionKey keyUpper = endTime == Long.MAX_VALUE ? null : partitionKeyForTime(endTime);
    // no bounds -> no filter
    if (keyLower == null && keyUpper == null) {
        // no filter needed to select all time
        return Collections.singletonList(null);
    }
    List<PartitionFilter> filters = Lists.newArrayList();
    String[] allFields = PARTITIONING.getFields().keySet().toArray(new String[PARTITIONING.getFields().size()]);
    // if there is no lower bound, we only need the filters for the upper bound
    if (keyLower == null) {
        addUpperFilters(allFields, 0, keyUpper, filters, initialSupplier());
        return filters;
    }
    // if there is no upper bound, we only need the filters for the lower bound
    if (keyUpper == null) {
        addLowerFilters(allFields, 0, keyLower, filters, initialSupplier());
        return filters;
    }
    return filtersFor(allFields, 0, keyLower, keyUpper, filters, initialSupplier());
}
Also used : PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 9 with PartitionFilter

use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project cdap by cdapio.

the class PartitionedFileSetDataset method generateStartKey.

private byte[] generateStartKey(PartitionFilter filter) {
    if (null == filter) {
        return null;
    }
    // validate partition filter, convert values, and compute size of output
    Map<String, FieldType> partitionFields = partitioning.getFields();
    int totalSize = 0;
    ArrayList<byte[]> values = Lists.newArrayListWithCapacity(partitionFields.size());
    for (Map.Entry<String, FieldType> entry : partitionFields.entrySet()) {
        String fieldName = entry.getKey();
        FieldType fieldType = entry.getValue();
        PartitionFilter.Condition<? extends Comparable> condition = filter.getCondition(fieldName);
        if (condition == null) {
            // this field is not present; we can't include any more fields in the start key
            break;
        }
        Comparable lowerValue = condition.getLower();
        if (lowerValue == null) {
            // this field has no lower bound; we can't include any more fields in the start key
            break;
        }
        try {
            fieldType.validate(lowerValue);
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException(String.format("Invalid partition filter: Lower bound for field '%s' is incompatible with the partitioning: %s", fieldName, e.getMessage()));
        }
        byte[] bytes = FieldTypes.toBytes(lowerValue, fieldType);
        totalSize += bytes.length;
        values.add(bytes);
    }
    if (values.isEmpty()) {
        return null;
    }
    // one \0 between each of the fields
    totalSize += values.size() - 1;
    byte[] startKey = new byte[totalSize];
    int offset = 0;
    for (byte[] bytes : values) {
        System.arraycopy(bytes, 0, startKey, offset, bytes.length);
        // this leaves a \0 byte after the value
        offset += bytes.length + 1;
    }
    return startKey;
}
Also used : PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) FieldType(io.cdap.cdap.api.dataset.lib.Partitioning.FieldType)

Example 10 with PartitionFilter

use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project cdap by cdapio.

the class TimePartitionedFileSetTest method testTimePartitionedInputArguments.

@Test
public void testTimePartitionedInputArguments() throws Exception {
    final long time8 = DATE_FORMAT.parse("10/17/2014 8:42 am").getTime();
    final long time9 = DATE_FORMAT.parse("10/17/2014 9:42 am").getTime();
    final String path8 = "8:42";
    final String path9 = "9:42";
    final PartitionFilter filter9 = PartitionFilter.builder().addRangeCondition("hour", 9, null).build();
    // add a few partitions
    {
        final TimePartitionedFileSet dataset = dsFrameworkUtil.getInstance(TPFS_INSTANCE);
        final TransactionAware txAwareDataset = (TransactionAware) dataset;
        dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                dataset.addPartition(time8, path8);
                dataset.addPartition(time9, path9);
            }
        });
    }
    // test specifying time range for input
    Map<String, String> arguments = Maps.newHashMap();
    TimePartitionedFileSetArguments.setInputStartTime(arguments, time8 - 30 * MINUTE);
    TimePartitionedFileSetArguments.setInputEndTime(arguments, time8 + 30 * MINUTE);
    testInputConfiguration(arguments, path8);
    // add a partition filter. it should not have an effect as long as there is a time range
    TimePartitionedFileSetArguments.setInputPartitionFilter(arguments, filter9);
    testInputConfiguration(arguments, path8);
    // test specifying input with a partition filter
    arguments.clear();
    TimePartitionedFileSetArguments.setInputPartitionFilter(arguments, filter9);
    testInputConfiguration(arguments, path9);
    // test specifying only a start time or only an end time for input, or none
    arguments.clear();
    TimePartitionedFileSetArguments.setInputStartTime(arguments, time8 + 30 * MINUTE);
    testInputConfigurationFailure(arguments, " with only a start time");
    arguments.clear();
    TimePartitionedFileSetArguments.setInputEndTime(arguments, time8 + 30 * MINUTE);
    testInputConfigurationFailure(arguments, " with only an end time");
}
Also used : PartitionFilter(io.cdap.cdap.api.dataset.lib.PartitionFilter) TransactionAware(org.apache.tephra.TransactionAware) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) Test(org.junit.Test)

Aggregations

PartitionFilter (io.cdap.cdap.api.dataset.lib.PartitionFilter)29 Test (org.junit.Test)20 PartitionKey (io.cdap.cdap.api.dataset.lib.PartitionKey)10 TransactionAware (org.apache.tephra.TransactionAware)10 PartitionedFileSet (io.cdap.cdap.api.dataset.lib.PartitionedFileSet)8 TransactionExecutor (org.apache.tephra.TransactionExecutor)8 PartitionDetail (io.cdap.cdap.api.dataset.lib.PartitionDetail)7 Predicate (io.cdap.cdap.api.Predicate)6 DataSetException (io.cdap.cdap.api.dataset.DataSetException)6 Partition (io.cdap.cdap.api.dataset.lib.Partition)6 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 ImmutableMap (com.google.common.collect.ImmutableMap)4 PartitionNotFoundException (io.cdap.cdap.api.dataset.PartitionNotFoundException)4 PartitionAlreadyExistsException (io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException)4 FieldType (io.cdap.cdap.api.dataset.lib.Partitioning.FieldType)4 TimePartitionedFileSet (io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet)4 HashSet (java.util.HashSet)4 List (java.util.List)4 Map (java.util.Map)4