use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class TimePartitionedFileSetDefinition method updateArgumentsIfNeeded.
// if the arguments do not contain an output path, but an output partition time, generate an output path from that;
// also convert the output partition time to a partition key and add it to the arguments;
// also call the super class' method to update arguments if it needs to
protected Map<String, String> updateArgumentsIfNeeded(Map<String, String> arguments) {
Long time = TimePartitionedFileSetArguments.getOutputPartitionTime(arguments);
if (time != null) {
// set the output path according to partition time
if (FileSetArguments.getOutputPath(arguments) == null) {
String outputPathFormat = TimePartitionedFileSetArguments.getOutputPathFormat(arguments);
String path;
if (Strings.isNullOrEmpty(outputPathFormat)) {
path = String.format("%tF/%tH-%tM.%d", time, time, time, time);
} else {
SimpleDateFormat format = new SimpleDateFormat(outputPathFormat);
String timeZoneID = TimePartitionedFileSetArguments.getOutputPathTimeZone(arguments);
if (!Strings.isNullOrEmpty(timeZoneID)) {
format.setTimeZone(TimeZone.getTimeZone(timeZoneID));
}
path = format.format(new Date(time));
}
arguments = Maps.newHashMap(arguments);
FileSetArguments.setOutputPath(arguments, path);
}
// add the corresponding partition key to the arguments
PartitionKey outputKey = TimePartitionedFileSetDataset.partitionKeyForTime(time);
PartitionedFileSetArguments.setOutputPartitionKey(arguments, outputKey);
}
// delegate to super class for anything it needs to do
return updateArgumentsIfNeeded(arguments, TimePartitionedFileSetDataset.PARTITIONING);
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionedFileSetDefinition method updateArgumentsIfNeeded.
// if the arguments do not contain an output location, generate one from the partition key (if present)
protected static Map<String, String> updateArgumentsIfNeeded(Map<String, String> arguments, Partitioning partitioning) {
if (FileSetArguments.getOutputPath(arguments) == null) {
PartitionKey key = PartitionedFileSetArguments.getOutputPartitionKey(arguments, partitioning);
if (key != null) {
arguments = Maps.newHashMap(arguments);
FileSetArguments.setOutputPath(arguments, PartitionedFileSetDataset.getOutputPath(key, partitioning));
} else if (PartitionedFileSetArguments.getDynamicPartitioner(arguments) != null) {
// when using DynamicPartitioner, use the baseLocation of the fileSet as the output location
FileSetArguments.setBaseOutputPath(arguments);
}
}
return arguments;
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class ExploreExecutorHttpHandler method doAddPartition.
private void doAddPartition(HttpRequest request, HttpResponder responder, DatasetId datasetId) {
Dataset dataset;
try (SystemDatasetInstantiator datasetInstantiator = datasetInstantiatorFactory.create()) {
dataset = datasetInstantiator.getDataset(datasetId);
if (dataset == null) {
responder.sendString(HttpResponseStatus.NOT_FOUND, "Cannot load dataset " + datasetId);
return;
}
} catch (IOException e) {
String classNotFoundMessage = isClassNotFoundException(e);
if (classNotFoundMessage != null) {
JsonObject json = new JsonObject();
json.addProperty("handle", QueryHandle.NO_OP.getHandle());
responder.sendJson(HttpResponseStatus.OK, json);
return;
}
LOG.error("Exception instantiating dataset {}.", datasetId, e);
responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, "Exception instantiating dataset " + datasetId.getDataset());
return;
}
try {
if (!(dataset instanceof PartitionedFileSet)) {
responder.sendString(HttpResponseStatus.BAD_REQUEST, "not a partitioned dataset.");
return;
}
Partitioning partitioning = ((PartitionedFileSet) dataset).getPartitioning();
Reader reader = new InputStreamReader(new ChannelBufferInputStream(request.getContent()));
Map<String, String> properties = GSON.fromJson(reader, new TypeToken<Map<String, String>>() {
}.getType());
String fsPath = properties.get("path");
if (fsPath == null) {
responder.sendString(HttpResponseStatus.BAD_REQUEST, "path was not specified.");
return;
}
PartitionKey partitionKey;
try {
partitionKey = PartitionedFileSetArguments.getOutputPartitionKey(properties, partitioning);
} catch (Exception e) {
responder.sendString(HttpResponseStatus.BAD_REQUEST, "invalid partition key: " + e.getMessage());
return;
}
if (partitionKey == null) {
responder.sendString(HttpResponseStatus.BAD_REQUEST, "no partition key was given.");
return;
}
QueryHandle handle = exploreTableManager.addPartition(datasetId, properties, partitionKey, fsPath);
JsonObject json = new JsonObject();
json.addProperty("handle", handle.getHandle());
responder.sendJson(HttpResponseStatus.OK, json);
} catch (Throwable e) {
LOG.error("Got exception:", e);
responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, e.getMessage());
}
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionedFileSetTest method testUpdateMetadata.
@Test
public void testUpdateMetadata() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
PartitionOutput partitionOutput = dataset.getPartitionOutput(PARTITION_KEY);
ImmutableMap<String, String> originalEntries = ImmutableMap.of("key1", "value1");
partitionOutput.setMetadata(originalEntries);
partitionOutput.addPartition();
ImmutableMap<String, String> updatedMetadata = ImmutableMap.of("key2", "value2");
dataset.addMetadata(PARTITION_KEY, updatedMetadata);
PartitionDetail partitionDetail = dataset.getPartition(PARTITION_KEY);
Assert.assertNotNull(partitionDetail);
HashMap<String, String> combinedEntries = Maps.newHashMap();
combinedEntries.putAll(originalEntries);
combinedEntries.putAll(updatedMetadata);
Assert.assertEquals(combinedEntries, partitionDetail.getMetadata().asMap());
// adding an entry, for a key that already exists will throw an Exception
try {
dataset.addMetadata(PARTITION_KEY, "key2", "value3");
Assert.fail("Expected not to be able to update an existing metadata entry");
} catch (DataSetException expected) {
}
PartitionKey nonexistentPartitionKey = PartitionKey.builder().addIntField("i", 42).addLongField("l", 17L).addStringField("s", "nonexistent").build();
try {
// adding an entry, for a key that already exists will throw an Exception
dataset.addMetadata(nonexistentPartitionKey, "key2", "value3");
Assert.fail("Expected not to be able to add metadata for a nonexistent partition");
} catch (DataSetException expected) {
}
}
});
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionedFileSetTest method testAddRemoveGetPartitions.
@Test
@Category(SlowTests.class)
public void testAddRemoveGetPartitions() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final PartitionKey[][][] keys = new PartitionKey[4][4][4];
final String[][][] paths = new String[4][4][4];
final Set<BasicPartition> allPartitionDetails = Sets.newHashSet();
// add a bunch of partitions
for (int s = 0; s < 4; s++) {
for (int i = 0; i < 4; i++) {
for (int l = 0; l < 4; l++) {
final PartitionKey key = PartitionKey.builder().addField("s", String.format("%c-%d", 'a' + s, s)).addField("i", i * 100).addField("l", 15L - 10 * l).build();
BasicPartition basicPartition = dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new Callable<BasicPartition>() {
@Override
public BasicPartition call() throws Exception {
PartitionOutput p = dataset.getPartitionOutput(key);
p.addPartition();
return new BasicPartition((PartitionedFileSetDataset) dataset, p.getRelativePath(), p.getPartitionKey());
}
});
keys[s][i][l] = key;
paths[s][i][l] = basicPartition.getRelativePath();
allPartitionDetails.add(basicPartition);
}
}
}
// validate getPartition with exact partition key
for (int s = 0; s < 4; s++) {
for (int i = 0; i < 4; i++) {
for (int l = 0; l < 4; l++) {
final PartitionKey key = keys[s][i][l];
final String path = paths[s][i][l];
dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
PartitionDetail partitionDetail = dataset.getPartition(key);
Assert.assertNotNull(partitionDetail);
Assert.assertEquals(path, partitionDetail.getRelativePath());
}
});
// also test getPartitionPaths() and getPartitions() for the filter matching this
@SuppressWarnings({ "unchecked", "unused" }) boolean success = testFilter(dataset, allPartitionDetails, PartitionFilter.builder().addValueCondition("l", key.getField("l")).addValueCondition("s", key.getField("s")).addValueCondition("i", key.getField("i")).build());
}
}
}
// test whether query works without filter
testFilter(dataset, allPartitionDetails, null);
// generate an list of partition filters with exhaustive coverage
List<PartitionFilter> filters = generateFilters();
// test all kinds of filters
testAllFilters(dataset, allPartitionDetails, filters);
// remove a few of the partitions and test again, repeatedly
PartitionKey[] keysToRemove = { keys[1][2][3], keys[0][1][0], keys[2][3][2], keys[3][1][2] };
for (final PartitionKey key : keysToRemove) {
// remove in a transaction
dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new TransactionExecutor.Procedure<PartitionKey>() {
@Override
public void apply(PartitionKey partitionKey) throws Exception {
dataset.dropPartition(partitionKey);
}
}, key);
// test all filters
BasicPartition toRemove = Iterables.tryFind(allPartitionDetails, new com.google.common.base.Predicate<BasicPartition>() {
@Override
public boolean apply(BasicPartition partition) {
return key.equals(partition.getPartitionKey());
}
}).get();
allPartitionDetails.remove(toRemove);
testAllFilters(dataset, allPartitionDetails, filters);
}
}
Aggregations