use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionedFileSetDataset method scannerToPartitions.
/**
* While applying a partition filter and a limit, parse partitions from the rows of a scanner and add them to a list.
* Note that multiple partitions can have the same transaction write pointer. For each set of partitions with the same
* write pointer, we either add the entire set or exclude the entire set. The limit is applied after adding each such
* set of partitions to the list.
*
* @param scanner the scanner on the partitions table from which to read partitions
* @param partitions list to add the qualifying partitions to
* @param limit limit, which once reached, partitions committed by other transactions will not be added.
* The limit is checked after adding consuming all partitions of a transaction, so
* the total number of consumed partitions may be greater than this limit.
* @param predicate predicate to apply before adding to the partitions list
* @return Transaction ID of the partition that we reached in the scanner, but did not add to the list. This value
* can be useful in future scans.
*/
@Nullable
private Long scannerToPartitions(Scanner scanner, List<PartitionDetail> partitions, int limit, Predicate<PartitionDetail> predicate) {
Long prevTxId = null;
Row row;
while ((row = scanner.next()) != null) {
PartitionKey key = parseRowKey(row.getRow(), partitioning);
String relativePath = Bytes.toString(row.get(RELATIVE_PATH));
Long txId = Bytes.toLong(row.get(WRITE_PTR_COL));
// by a transaction or none, since we keep our marker based upon transaction id.
if (prevTxId != null && !prevTxId.equals(txId)) {
if (partitions.size() >= limit) {
return txId;
}
}
prevTxId = txId;
BasicPartitionDetail partitionDetail = new BasicPartitionDetail(PartitionedFileSetDataset.this, relativePath, key, metadataFromRow(row));
if (!predicate.apply(partitionDetail)) {
continue;
}
partitions.add(partitionDetail);
}
return null;
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class TimePartitionedFileSetDefinition method updateArgumentsIfNeeded.
// if the arguments do not contain an output path, but an output partition time, generate an output path from that;
// also convert the output partition time to a partition key and add it to the arguments;
// also call the super class' method to update arguments if it needs to
protected Map<String, String> updateArgumentsIfNeeded(Map<String, String> arguments) {
Long time = TimePartitionedFileSetArguments.getOutputPartitionTime(arguments);
if (time != null) {
// set the output path according to partition time
if (FileSetArguments.getOutputPath(arguments) == null) {
String outputPathFormat = TimePartitionedFileSetArguments.getOutputPathFormat(arguments);
String path;
if (Strings.isNullOrEmpty(outputPathFormat)) {
path = String.format("%tF/%tH-%tM.%d", time, time, time, time);
} else {
SimpleDateFormat format = new SimpleDateFormat(outputPathFormat);
String timeZoneID = TimePartitionedFileSetArguments.getOutputPathTimeZone(arguments);
if (!Strings.isNullOrEmpty(timeZoneID)) {
format.setTimeZone(TimeZone.getTimeZone(timeZoneID));
}
path = format.format(new Date(time));
}
arguments = Maps.newHashMap(arguments);
FileSetArguments.setOutputPath(arguments, path);
}
// add the corresponding partition key to the arguments
PartitionKey outputKey = TimePartitionedFileSetDataset.partitionKeyForTime(time);
PartitionedFileSetArguments.setOutputPartitionKey(arguments, outputKey);
}
// delegate to super class for anything it needs to do
return updateArgumentsIfNeeded(arguments, TimePartitionedFileSetDataset.PARTITIONING);
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionedFileSetDefinition method updateArgumentsIfNeeded.
// if the arguments do not contain an output location, generate one from the partition key (if present)
protected static Map<String, String> updateArgumentsIfNeeded(Map<String, String> arguments, Partitioning partitioning) {
if (FileSetArguments.getOutputPath(arguments) == null) {
PartitionKey key = PartitionedFileSetArguments.getOutputPartitionKey(arguments, partitioning);
if (key != null) {
arguments = Maps.newHashMap(arguments);
FileSetArguments.setOutputPath(arguments, PartitionedFileSetDataset.getOutputPath(key, partitioning));
} else if (PartitionedFileSetArguments.getDynamicPartitioner(arguments) != null) {
// when using DynamicPartitioner, use the baseLocation of the fileSet as the output location
FileSetArguments.setBaseOutputPath(arguments);
}
}
return arguments;
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class ExploreExecutorHttpHandler method doAddPartition.
private void doAddPartition(HttpRequest request, HttpResponder responder, DatasetId datasetId) {
Dataset dataset;
try (SystemDatasetInstantiator datasetInstantiator = datasetInstantiatorFactory.create()) {
dataset = datasetInstantiator.getDataset(datasetId);
if (dataset == null) {
responder.sendString(HttpResponseStatus.NOT_FOUND, "Cannot load dataset " + datasetId);
return;
}
} catch (IOException e) {
String classNotFoundMessage = isClassNotFoundException(e);
if (classNotFoundMessage != null) {
JsonObject json = new JsonObject();
json.addProperty("handle", QueryHandle.NO_OP.getHandle());
responder.sendJson(HttpResponseStatus.OK, json);
return;
}
LOG.error("Exception instantiating dataset {}.", datasetId, e);
responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, "Exception instantiating dataset " + datasetId.getDataset());
return;
}
try {
if (!(dataset instanceof PartitionedFileSet)) {
responder.sendString(HttpResponseStatus.BAD_REQUEST, "not a partitioned dataset.");
return;
}
Partitioning partitioning = ((PartitionedFileSet) dataset).getPartitioning();
Reader reader = new InputStreamReader(new ChannelBufferInputStream(request.getContent()));
Map<String, String> properties = GSON.fromJson(reader, new TypeToken<Map<String, String>>() {
}.getType());
String fsPath = properties.get("path");
if (fsPath == null) {
responder.sendString(HttpResponseStatus.BAD_REQUEST, "path was not specified.");
return;
}
PartitionKey partitionKey;
try {
partitionKey = PartitionedFileSetArguments.getOutputPartitionKey(properties, partitioning);
} catch (Exception e) {
responder.sendString(HttpResponseStatus.BAD_REQUEST, "invalid partition key: " + e.getMessage());
return;
}
if (partitionKey == null) {
responder.sendString(HttpResponseStatus.BAD_REQUEST, "no partition key was given.");
return;
}
QueryHandle handle = exploreTableManager.addPartition(datasetId, properties, partitionKey, fsPath);
JsonObject json = new JsonObject();
json.addProperty("handle", handle.getHandle());
responder.sendJson(HttpResponseStatus.OK, json);
} catch (Throwable e) {
LOG.error("Got exception:", e);
responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, e.getMessage());
}
}
Aggregations