use of co.cask.cdap.api.dataset.lib.PartitionFilter in project cdap by caskdata.
the class TimePartitionedFileSetTest method testTimePartitionedInputArguments.
@Test
public void testTimePartitionedInputArguments() throws Exception {
final long time8 = DATE_FORMAT.parse("10/17/2014 8:42 am").getTime();
final long time9 = DATE_FORMAT.parse("10/17/2014 9:42 am").getTime();
final String path8 = "8:42";
final String path9 = "9:42";
final PartitionFilter filter9 = PartitionFilter.builder().addRangeCondition("hour", 9, null).build();
// add a few partitions
{
final TimePartitionedFileSet dataset = dsFrameworkUtil.getInstance(TPFS_INSTANCE);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.addPartition(time8, path8);
dataset.addPartition(time9, path9);
}
});
}
// test specifying time range for input
Map<String, String> arguments = Maps.newHashMap();
TimePartitionedFileSetArguments.setInputStartTime(arguments, time8 - 30 * MINUTE);
TimePartitionedFileSetArguments.setInputEndTime(arguments, time8 + 30 * MINUTE);
testInputConfiguration(arguments, path8);
// add a partition filter. it should not have an effect as long as there is a time range
TimePartitionedFileSetArguments.setInputPartitionFilter(arguments, filter9);
testInputConfiguration(arguments, path8);
// test specifying input with a partition filter
arguments.clear();
TimePartitionedFileSetArguments.setInputPartitionFilter(arguments, filter9);
testInputConfiguration(arguments, path9);
// test specifying only a start time or only an end time for input, or none
arguments.clear();
TimePartitionedFileSetArguments.setInputStartTime(arguments, time8 + 30 * MINUTE);
testInputConfigurationFailure(arguments, " with only a start time");
arguments.clear();
TimePartitionedFileSetArguments.setInputEndTime(arguments, time8 + 30 * MINUTE);
testInputConfigurationFailure(arguments, " with only an end time");
}
use of co.cask.cdap.api.dataset.lib.PartitionFilter in project cdap by caskdata.
the class PartitionFilterTest method testIncompatibleMatch.
@Test(expected = IllegalArgumentException.class)
public void testIncompatibleMatch() {
PartitionFilter filter = PartitionFilter.builder().addValueCondition("year", 2012).addRangeCondition("month", 4, 7).addValueCondition("market", "asia").build();
// field of incompatible type
filter.match(PartitionKey.builder().addField("month", "january").addField("market", "latin").addField("year", 2012).build());
}
use of co.cask.cdap.api.dataset.lib.PartitionFilter in project cdap by caskdata.
the class DataCleansingMapReduceTest method getDataFromFile.
private Set<String> getDataFromFile(Long time, String dsName) throws Exception {
DataSetManager<PartitionedFileSet> cleanRecords = getDataset(dsName);
PartitionFilter filter = PartitionFilter.builder().addValueCondition("time", time).build();
return getDataFromFilter(cleanRecords.get(), filter);
}
use of co.cask.cdap.api.dataset.lib.PartitionFilter in project cdap by caskdata.
the class PartitionedFileSetDataset method generateStartKey.
private byte[] generateStartKey(PartitionFilter filter) {
if (null == filter) {
return null;
}
// validate partition filter, convert values, and compute size of output
Map<String, FieldType> partitionFields = partitioning.getFields();
int totalSize = 0;
ArrayList<byte[]> values = Lists.newArrayListWithCapacity(partitionFields.size());
for (Map.Entry<String, FieldType> entry : partitionFields.entrySet()) {
String fieldName = entry.getKey();
FieldType fieldType = entry.getValue();
PartitionFilter.Condition<? extends Comparable> condition = filter.getCondition(fieldName);
if (condition == null) {
// this field is not present; we can't include any more fields in the start key
break;
}
Comparable lowerValue = condition.getLower();
if (lowerValue == null) {
// this field has no lower bound; we can't include any more fields in the start key
break;
}
try {
fieldType.validate(lowerValue);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(String.format("Invalid partition filter: Lower bound for field '%s' is incompatible with the partitioning: %s", fieldName, e.getMessage()));
}
byte[] bytes = FieldTypes.toBytes(lowerValue, fieldType);
totalSize += bytes.length;
values.add(bytes);
}
if (values.isEmpty()) {
return null;
}
// one \0 between each of the fields
totalSize += values.size() - 1;
byte[] startKey = new byte[totalSize];
int offset = 0;
for (byte[] bytes : values) {
System.arraycopy(bytes, 0, startKey, offset, bytes.length);
// this leaves a \0 byte after the value
offset += bytes.length + 1;
}
return startKey;
}
use of co.cask.cdap.api.dataset.lib.PartitionFilter in project cdap by caskdata.
the class PartitionedFileSetDataset method generateStopKey.
private byte[] generateStopKey(PartitionFilter filter) {
if (null == filter) {
return null;
}
// validate partition filter, convert values, and compute size of output
Map<String, FieldType> partitionFields = partitioning.getFields();
int totalSize = 0;
boolean allSingleValue = true;
ArrayList<byte[]> values = Lists.newArrayListWithCapacity(partitionFields.size());
for (Map.Entry<String, FieldType> entry : partitionFields.entrySet()) {
String fieldName = entry.getKey();
FieldType fieldType = entry.getValue();
PartitionFilter.Condition<? extends Comparable> condition = filter.getCondition(fieldName);
if (condition == null) {
// this field is not present; we can't include any more fields in the stop key
break;
}
Comparable upperValue = condition.getUpper();
if (upperValue == null) {
// this field is not present; we can't include any more fields in the stop key
break;
}
try {
fieldType.validate(upperValue);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(String.format("Invalid partition filter: Upper bound for field '%s' is incompatible with the partitioning: %s", fieldName, e.getMessage()));
}
byte[] bytes = FieldTypes.toBytes(upperValue, fieldType);
totalSize += bytes.length;
values.add(bytes);
if (!condition.isSingleValue()) {
allSingleValue = false;
// upper bound for this field, following fields don't matter
break;
}
}
if (values.isEmpty()) {
return null;
}
// one \0 between each of the fields
totalSize += values.size() - 1;
if (allSingleValue) {
// in this case the start and stop key are equal, we append one \1 to ensure the scan is not empty
totalSize++;
}
byte[] stopKey = new byte[totalSize];
int offset = 0;
for (byte[] bytes : values) {
System.arraycopy(bytes, 0, stopKey, offset, bytes.length);
// this leaves a \0 byte after the value
offset += bytes.length + 1;
if (allSingleValue && offset == stopKey.length) {
// see above - we \1 instead of \0 at the end, to make sure scan is not empty
stopKey[offset - 1] = 1;
}
}
return stopKey;
}
Aggregations