use of co.cask.cdap.api.dataset.lib.TimePartitionedFileSet in project cdap by caskdata.
the class TimePartitionedFileSetTest method testTimePartitionedInputArguments.
@Test
public void testTimePartitionedInputArguments() throws Exception {
final long time8 = DATE_FORMAT.parse("10/17/2014 8:42 am").getTime();
final long time9 = DATE_FORMAT.parse("10/17/2014 9:42 am").getTime();
final String path8 = "8:42";
final String path9 = "9:42";
final PartitionFilter filter9 = PartitionFilter.builder().addRangeCondition("hour", 9, null).build();
// add a few partitions
{
final TimePartitionedFileSet dataset = dsFrameworkUtil.getInstance(TPFS_INSTANCE);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.addPartition(time8, path8);
dataset.addPartition(time9, path9);
}
});
}
// test specifying time range for input
Map<String, String> arguments = Maps.newHashMap();
TimePartitionedFileSetArguments.setInputStartTime(arguments, time8 - 30 * MINUTE);
TimePartitionedFileSetArguments.setInputEndTime(arguments, time8 + 30 * MINUTE);
testInputConfiguration(arguments, path8);
// add a partition filter. it should not have an effect as long as there is a time range
TimePartitionedFileSetArguments.setInputPartitionFilter(arguments, filter9);
testInputConfiguration(arguments, path8);
// test specifying input with a partition filter
arguments.clear();
TimePartitionedFileSetArguments.setInputPartitionFilter(arguments, filter9);
testInputConfiguration(arguments, path9);
// test specifying only a start time or only an end time for input, or none
arguments.clear();
TimePartitionedFileSetArguments.setInputStartTime(arguments, time8 + 30 * MINUTE);
testInputConfigurationFailure(arguments, " with only a start time");
arguments.clear();
TimePartitionedFileSetArguments.setInputEndTime(arguments, time8 + 30 * MINUTE);
testInputConfigurationFailure(arguments, " with only an end time");
}
use of co.cask.cdap.api.dataset.lib.TimePartitionedFileSet in project cdap by caskdata.
the class TimePartitionedFileSetTest method validateInputPaths.
/**
* Validates that the output configuration of the tpfs, when instantiated with (time - start * minutes) as
* input start time and (time + end * minutes) as input end time, returns the expected list of paths.
*/
private void validateInputPaths(long time, long start, long end, final String... expected) throws IOException, DatasetManagementException, InterruptedException, TransactionFailureException {
Map<String, String> arguments = Maps.newHashMap();
TimePartitionedFileSetArguments.setInputStartTime(arguments, time + start * MINUTE);
TimePartitionedFileSetArguments.setInputEndTime(arguments, time + end * MINUTE);
final TimePartitionedFileSet tpfs = dsFrameworkUtil.getInstance(TPFS_INSTANCE, arguments);
TransactionAware txAwareDataset = (TransactionAware) tpfs;
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
Map<String, String> inputConfig = tpfs.getInputFormatConfiguration();
String inputs = inputConfig.get(FileInputFormat.INPUT_DIR);
Assert.assertNotNull(inputs);
if (expected.length == 0) {
Assert.assertTrue(inputs.isEmpty());
return;
}
String[] inputPaths = inputs.split(",");
Assert.assertEquals(expected.length, inputPaths.length);
// order is not guaranteed.
Arrays.sort(expected);
Arrays.sort(inputPaths);
for (int i = 0; i < expected.length; i++) {
// every input path is absolute, whereas expected paths are relative
Assert.assertTrue("path #" + i + " does not match", inputPaths[i].endsWith(expected[i]));
}
}
});
}
use of co.cask.cdap.api.dataset.lib.TimePartitionedFileSet in project cdap by caskdata.
the class TimePartitionedFileSetTest method testPartitionMetadata.
@Test
public void testPartitionMetadata() throws Exception {
final TimePartitionedFileSet tpfs = dsFrameworkUtil.getInstance(TPFS_INSTANCE);
TransactionAware txAware = (TransactionAware) tpfs;
dsFrameworkUtil.newInMemoryTransactionExecutor(txAware).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// make sure the dataset has no partitions
validateTimePartitions(tpfs, 0L, MAX, Collections.<Long, String>emptyMap());
Date date = DATE_FORMAT.parse("6/4/12 10:00 am");
long time = date.getTime();
// keep track of all the metadata added
Map<String, String> allMetadata = Maps.newHashMap();
Map<String, String> metadata = ImmutableMap.of("key1", "value1", "key2", "value3", "key100", "value4");
tpfs.addPartition(time, "file", metadata);
allMetadata.putAll(metadata);
TimePartitionDetail partitionByTime = tpfs.getPartitionByTime(time);
Assert.assertNotNull(partitionByTime);
Assert.assertEquals(metadata, partitionByTime.getMetadata().asMap());
tpfs.addMetadata(time, "key3", "value4");
allMetadata.put("key3", "value4");
try {
// attempting to update an existing key throws a DatasetException
tpfs.addMetadata(time, "key3", "value5");
Assert.fail("Expected not to be able to update an existing metadata entry");
} catch (DataSetException expected) {
}
Map<String, String> newMetadata = ImmutableMap.of("key4", "value4", "key5", "value5");
tpfs.addMetadata(time, newMetadata);
allMetadata.putAll(newMetadata);
partitionByTime = tpfs.getPartitionByTime(time);
Assert.assertNotNull(partitionByTime);
Assert.assertEquals(allMetadata, partitionByTime.getMetadata().asMap());
}
});
}
use of co.cask.cdap.api.dataset.lib.TimePartitionedFileSet in project cdap by caskdata.
the class TimePartitionedFileSetTest method testInputConfiguration.
private void testInputConfiguration(Map<String, String> arguments, final String expectedPath) throws Exception {
final TimePartitionedFileSet dataset = dsFrameworkUtil.getInstance(TPFS_INSTANCE, arguments);
TransactionAware txAwareDataset = (TransactionAware) dataset;
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
Map<String, String> inputConf = dataset.getInputFormatConfiguration();
String input = inputConf.get(FileInputFormat.INPUT_DIR);
Assert.assertNotNull(input);
String[] inputs = input.split(",");
Assert.assertEquals(1, inputs.length);
Assert.assertTrue(inputs[0].endsWith(expectedPath));
}
});
}
use of co.cask.cdap.api.dataset.lib.TimePartitionedFileSet in project cdap by caskdata.
the class TimePartitionedFileSetTest method testOutputPartitionPath.
/**
* Tests that the output file path is set correctly, based on the output partition time.
*/
@Test
public void testOutputPartitionPath() throws Exception {
// test specifying output time
Date date = DATE_FORMAT.parse("1/1/15 8:42 pm");
Map<String, String> args = Maps.newHashMap();
TimePartitionedFileSetArguments.setOutputPartitionTime(args, date.getTime());
TimeZone timeZone = Calendar.getInstance().getTimeZone();
TimePartitionedFileSetArguments.setOutputPathFormat(args, "yyyy-MM-dd/HH_mm", timeZone.getID());
TimePartitionedFileSet ds = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args);
String outputPath = ds.getEmbeddedFileSet().getOutputLocation().toURI().getPath();
Assert.assertTrue(outputPath.endsWith("2015-01-01/20_42"));
Map<String, String> outputConfig = ds.getOutputFormatConfiguration();
Assert.assertTrue(outputConfig.get(FileOutputFormat.OUTDIR).endsWith("2015-01-01/20_42"));
// test specifying output time and partition key -> time should prevail
PartitionKey key = PartitionKey.builder().addIntField("year", 2014).addIntField("month", 1).addIntField("day", 1).addIntField("hour", 20).addIntField("minute", 54).build();
TimePartitionedFileSet ds1 = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args);
TimePartitionedFileSetArguments.setOutputPartitionKey(args, key);
outputConfig = ds1.getOutputFormatConfiguration();
Assert.assertTrue(outputConfig.get(FileOutputFormat.OUTDIR).endsWith("2015-01-01/20_42"));
args.clear();
TimePartitionedFileSetArguments.setOutputPartitionKey(args, key);
TimePartitionedFileSet ds2 = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args);
outputConfig = ds2.getOutputFormatConfiguration();
Assert.assertTrue(outputConfig.get(FileOutputFormat.OUTDIR).endsWith("54"));
args.clear();
TimePartitionedFileSet ds3 = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args);
try {
ds3.getOutputFormatConfiguration();
Assert.fail("getOutputFormatConfiguration should have failed with neither output time nor partition key");
} catch (DataSetException e) {
// expected
}
}
Aggregations