use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project cdap by caskdata.
the class PartitionFilterTest method testIncompatibleMatch.
@Test(expected = IllegalArgumentException.class)
public void testIncompatibleMatch() {
PartitionFilter filter = PartitionFilter.builder().addValueCondition("year", 2012).addRangeCondition("month", 4, 7).addValueCondition("market", "asia").build();
// field of incompatible type
filter.match(PartitionKey.builder().addField("month", "january").addField("market", "latin").addField("year", 2012).build());
}
use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project cdap by caskdata.
the class PartitionedFileSetArgumentsTest method testSetGetInputPartitionFilter.
@Test
public void testSetGetInputPartitionFilter() throws Exception {
Map<String, String> arguments = new HashMap<>();
PartitionFilter filter = PartitionFilter.builder().addRangeCondition("i", 30, 40).addValueCondition("l", 17L).addValueCondition("s", "x").build();
PartitionedFileSetArguments.setInputPartitionFilter(arguments, filter);
Assert.assertEquals(filter, PartitionedFileSetArguments.getInputPartitionFilter(arguments));
arguments = new HashMap<>();
filter = PartitionFilter.builder().addRangeCondition("i", 30, 40).addValueCondition("s", "x").build();
PartitionedFileSetArguments.setInputPartitionFilter(arguments, filter);
Assert.assertEquals(filter, PartitionedFileSetArguments.getInputPartitionFilter(arguments));
arguments = new HashMap<>();
filter = PartitionFilter.ALWAYS_MATCH;
PartitionedFileSetArguments.setInputPartitionFilter(arguments, filter);
Assert.assertEquals(filter, PartitionedFileSetArguments.getInputPartitionFilter(arguments));
}
use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project cdap by cdapio.
the class TimePartitionedFileSetDataset method partitionFiltersForTimeRange.
// returns a list of partition filters that cover that specified time range.
// this may return a list with a single null filter (in case the range is unbounded in both directions)
@VisibleForTesting
static List<PartitionFilter> partitionFiltersForTimeRange(long startTime, long endTime) {
// unsatisfiable range
if (startTime >= endTime) {
return Collections.emptyList();
}
PartitionKey keyLower = startTime <= 0 ? null : partitionKeyForTime(startTime);
PartitionKey keyUpper = endTime == Long.MAX_VALUE ? null : partitionKeyForTime(endTime);
// no bounds -> no filter
if (keyLower == null && keyUpper == null) {
// no filter needed to select all time
return Collections.singletonList(null);
}
List<PartitionFilter> filters = Lists.newArrayList();
String[] allFields = PARTITIONING.getFields().keySet().toArray(new String[PARTITIONING.getFields().size()]);
// if there is no lower bound, we only need the filters for the upper bound
if (keyLower == null) {
addUpperFilters(allFields, 0, keyUpper, filters, initialSupplier());
return filters;
}
// if there is no upper bound, we only need the filters for the lower bound
if (keyUpper == null) {
addLowerFilters(allFields, 0, keyLower, filters, initialSupplier());
return filters;
}
return filtersFor(allFields, 0, keyLower, keyUpper, filters, initialSupplier());
}
use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project cdap by cdapio.
the class PartitionedFileSetDataset method generateStartKey.
private byte[] generateStartKey(PartitionFilter filter) {
if (null == filter) {
return null;
}
// validate partition filter, convert values, and compute size of output
Map<String, FieldType> partitionFields = partitioning.getFields();
int totalSize = 0;
ArrayList<byte[]> values = Lists.newArrayListWithCapacity(partitionFields.size());
for (Map.Entry<String, FieldType> entry : partitionFields.entrySet()) {
String fieldName = entry.getKey();
FieldType fieldType = entry.getValue();
PartitionFilter.Condition<? extends Comparable> condition = filter.getCondition(fieldName);
if (condition == null) {
// this field is not present; we can't include any more fields in the start key
break;
}
Comparable lowerValue = condition.getLower();
if (lowerValue == null) {
// this field has no lower bound; we can't include any more fields in the start key
break;
}
try {
fieldType.validate(lowerValue);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(String.format("Invalid partition filter: Lower bound for field '%s' is incompatible with the partitioning: %s", fieldName, e.getMessage()));
}
byte[] bytes = FieldTypes.toBytes(lowerValue, fieldType);
totalSize += bytes.length;
values.add(bytes);
}
if (values.isEmpty()) {
return null;
}
// one \0 between each of the fields
totalSize += values.size() - 1;
byte[] startKey = new byte[totalSize];
int offset = 0;
for (byte[] bytes : values) {
System.arraycopy(bytes, 0, startKey, offset, bytes.length);
// this leaves a \0 byte after the value
offset += bytes.length + 1;
}
return startKey;
}
use of io.cdap.cdap.api.dataset.lib.PartitionFilter in project cdap by cdapio.
the class TimePartitionedFileSetTest method testTimePartitionedInputArguments.
@Test
public void testTimePartitionedInputArguments() throws Exception {
final long time8 = DATE_FORMAT.parse("10/17/2014 8:42 am").getTime();
final long time9 = DATE_FORMAT.parse("10/17/2014 9:42 am").getTime();
final String path8 = "8:42";
final String path9 = "9:42";
final PartitionFilter filter9 = PartitionFilter.builder().addRangeCondition("hour", 9, null).build();
// add a few partitions
{
final TimePartitionedFileSet dataset = dsFrameworkUtil.getInstance(TPFS_INSTANCE);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.addPartition(time8, path8);
dataset.addPartition(time9, path9);
}
});
}
// test specifying time range for input
Map<String, String> arguments = Maps.newHashMap();
TimePartitionedFileSetArguments.setInputStartTime(arguments, time8 - 30 * MINUTE);
TimePartitionedFileSetArguments.setInputEndTime(arguments, time8 + 30 * MINUTE);
testInputConfiguration(arguments, path8);
// add a partition filter. it should not have an effect as long as there is a time range
TimePartitionedFileSetArguments.setInputPartitionFilter(arguments, filter9);
testInputConfiguration(arguments, path8);
// test specifying input with a partition filter
arguments.clear();
TimePartitionedFileSetArguments.setInputPartitionFilter(arguments, filter9);
testInputConfiguration(arguments, path9);
// test specifying only a start time or only an end time for input, or none
arguments.clear();
TimePartitionedFileSetArguments.setInputStartTime(arguments, time8 + 30 * MINUTE);
testInputConfigurationFailure(arguments, " with only a start time");
arguments.clear();
TimePartitionedFileSetArguments.setInputEndTime(arguments, time8 + 30 * MINUTE);
testInputConfigurationFailure(arguments, " with only an end time");
}
Aggregations