use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class AbstractContext method createRuntimeProgramContext.
/**
* Creates a new instance of {@link RuntimeProgramContext} to be
* provided to {@link RuntimeProgramContextAware} dataset.
*/
private RuntimeProgramContext createRuntimeProgramContext(final DatasetId datasetId) {
return new RuntimeProgramContext() {
@Override
public void notifyNewPartitions(Collection<? extends PartitionKey> partitionKeys) throws IOException {
String topic = cConf.get(Constants.Dataset.DATA_EVENT_TOPIC);
if (Strings.isNullOrEmpty(topic)) {
// Don't publish if there is no data event topic
return;
}
TopicId dataEventTopic = NamespaceId.SYSTEM.topic(topic);
MessagePublisher publisher = getMessagingContext().getMessagePublisher();
byte[] payload = Bytes.toBytes(GSON.toJson(Notification.forPartitions(datasetId, partitionKeys)));
int failure = 0;
long startTime = System.currentTimeMillis();
while (true) {
try {
publisher.publish(dataEventTopic.getNamespace(), dataEventTopic.getTopic(), payload);
return;
} catch (TopicNotFoundException e) {
// this shouldn't happen since the TMS creates the data event topic on startup.
throw new IOException("Unexpected exception due to missing topic '" + dataEventTopic + "'", e);
} catch (IOException e) {
long sleepTime = retryStrategy.nextRetry(++failure, startTime);
if (sleepTime < 0) {
throw e;
}
try {
TimeUnit.MILLISECONDS.sleep(sleepTime);
} catch (InterruptedException ex) {
// If interrupted during sleep, just reset the interrupt flag and return
Thread.currentThread().interrupt();
return;
}
}
}
}
@Override
public ProgramRunId getProgramRunId() {
return programRunId;
}
@Nullable
@Override
public NamespacedEntityId getComponentId() {
return AbstractContext.this.getComponentId();
}
};
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class DynamicPartitionerWithAvroTest method groupByPartitionKey.
private Multimap<PartitionKey, GenericRecord> groupByPartitionKey(List<? extends GenericRecord> records, long now) {
HashMultimap<PartitionKey, GenericRecord> groupedByPartitionKey = HashMultimap.create();
for (GenericRecord record : records) {
PartitionKey key = PartitionKey.builder().addLongField("time", now).addIntField("zip", (int) record.get("zip")).build();
groupedByPartitionKey.put(key, record);
}
return groupedByPartitionKey;
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class DynamicPartitionerWithAvroTest method runDynamicPartitionerMapReduce.
private void runDynamicPartitionerMapReduce(final List<? extends GenericRecord> records, boolean allowConcurrentWriters, boolean expectedStatus) throws Exception {
ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingAvroDynamicPartitioner.class);
final long now = System.currentTimeMillis();
final Multimap<PartitionKey, GenericRecord> keyToRecordsMap = groupByPartitionKey(records, now);
// write values to the input kvTable
final KeyValueTable kvTable = datasetCache.getDataset(INPUT_DATASET);
Transactions.createTransactionExecutor(txExecutorFactory, kvTable).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// the keys are not used; it matters that they're unique though
for (int i = 0; i < records.size(); i++) {
kvTable.write(Integer.toString(i), records.get(i).toString());
}
}
});
String allowConcurrencyKey = "dataset." + OUTPUT_DATASET + "." + PartitionedFileSetArguments.DYNAMIC_PARTITIONER_ALLOW_CONCURRENCY;
// run the partition writer m/r with this output partition time
ImmutableMap<String, String> arguments = ImmutableMap.of(OUTPUT_PARTITION_KEY, Long.toString(now), allowConcurrencyKey, Boolean.toString(allowConcurrentWriters));
long startTime = System.currentTimeMillis();
boolean status = runProgram(app, AppWithMapReduceUsingAvroDynamicPartitioner.DynamicPartitioningMapReduce.class, new BasicArguments(arguments));
Assert.assertEquals(expectedStatus, status);
if (!expectedStatus) {
// if we expect the program to fail, no need to check the output data for expected results
return;
}
// Verify notifications
List<Notification> notifications = getDataNotifications(startTime);
Assert.assertEquals(1, notifications.size());
Assert.assertEquals(NamespaceId.DEFAULT.dataset(OUTPUT_DATASET), DatasetId.fromString(notifications.get(0).getProperties().get("datasetId")));
// this should have created a partition in the pfs
final PartitionedFileSet pfs = datasetCache.getDataset(OUTPUT_DATASET);
final Location pfsBaseLocation = pfs.getEmbeddedFileSet().getBaseLocation();
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws IOException {
Map<PartitionKey, PartitionDetail> partitions = new HashMap<>();
for (PartitionDetail partition : pfs.getPartitions(null)) {
partitions.put(partition.getPartitionKey(), partition);
// check that the mapreduce wrote the output partition metadata to all the output partitions
Assert.assertEquals(AppWithMapReduceUsingAvroDynamicPartitioner.DynamicPartitioningMapReduce.METADATA, partition.getMetadata().asMap());
}
Assert.assertEquals(3, partitions.size());
Assert.assertEquals(keyToRecordsMap.keySet(), partitions.keySet());
// Check relative paths of the partitions. Also check that their location = pfs baseLocation + relativePath
for (Map.Entry<PartitionKey, PartitionDetail> partitionKeyEntry : partitions.entrySet()) {
PartitionDetail partitionDetail = partitionKeyEntry.getValue();
String relativePath = partitionDetail.getRelativePath();
int zip = (int) partitionKeyEntry.getKey().getField("zip");
Assert.assertEquals(Long.toString(now) + Path.SEPARATOR + zip, relativePath);
Assert.assertEquals(pfsBaseLocation.append(relativePath), partitionDetail.getLocation());
}
for (Map.Entry<PartitionKey, Collection<GenericRecord>> keyToRecordsEntry : keyToRecordsMap.asMap().entrySet()) {
Set<GenericRecord> genericRecords = new HashSet<>(keyToRecordsEntry.getValue());
Assert.assertEquals(genericRecords, readOutput(partitions.get(keyToRecordsEntry.getKey()).getLocation()));
}
}
});
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class ExploreExecutorHttpHandler method doDropPartition.
private void doDropPartition(HttpRequest request, HttpResponder responder, DatasetId datasetId) {
Dataset dataset;
try (SystemDatasetInstantiator datasetInstantiator = datasetInstantiatorFactory.create()) {
dataset = datasetInstantiator.getDataset(datasetId);
if (dataset == null) {
responder.sendString(HttpResponseStatus.NOT_FOUND, "Cannot load dataset " + datasetId);
return;
}
} catch (IOException e) {
String classNotFoundMessage = isClassNotFoundException(e);
if (classNotFoundMessage != null) {
JsonObject json = new JsonObject();
json.addProperty("handle", QueryHandle.NO_OP.getHandle());
responder.sendJson(HttpResponseStatus.OK, json);
return;
}
LOG.error("Exception instantiating dataset {}.", datasetId, e);
responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, "Exception instantiating dataset " + datasetId);
return;
}
try {
if (!(dataset instanceof PartitionedFileSet)) {
responder.sendString(HttpResponseStatus.BAD_REQUEST, "not a partitioned dataset.");
return;
}
Partitioning partitioning = ((PartitionedFileSet) dataset).getPartitioning();
Reader reader = new InputStreamReader(new ChannelBufferInputStream(request.getContent()));
Map<String, String> properties = GSON.fromJson(reader, new TypeToken<Map<String, String>>() {
}.getType());
PartitionKey partitionKey;
try {
partitionKey = PartitionedFileSetArguments.getOutputPartitionKey(properties, partitioning);
} catch (Exception e) {
responder.sendString(HttpResponseStatus.BAD_REQUEST, "invalid partition key: " + e.getMessage());
return;
}
if (partitionKey == null) {
responder.sendString(HttpResponseStatus.BAD_REQUEST, "no partition key was given.");
return;
}
QueryHandle handle = exploreTableManager.dropPartition(datasetId, properties, partitionKey);
JsonObject json = new JsonObject();
json.addProperty("handle", handle.getHandle());
responder.sendJson(HttpResponseStatus.OK, json);
} catch (Throwable e) {
LOG.error("Got exception:", e);
responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, e.getMessage());
}
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class DynamicPartitioningOutputCommitter method commitJob.
@Override
public void commitJob(JobContext context) throws IOException {
Configuration configuration = context.getConfiguration();
MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext);
String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
outputDataset = taskContext.getDataset(outputDatasetName);
Partitioning partitioning = outputDataset.getPartitioning();
Set<PartitionKey> partitionsToAdd = new HashSet<>();
relativePaths = new HashSet<>();
// Go over all files in the temporary directory and keep track of partitions to add for them
FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context);
for (FileStatus committedTaskPath : allCommittedTaskPaths) {
FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration);
RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true);
while (fileIter.hasNext()) {
Path path = fileIter.next().getPath();
String relativePath = getRelative(committedTaskPath.getPath(), path);
int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR);
if (lastPathSepIdx == -1) {
// this shouldn't happen because each relative path should consist of at least one partition key and
// the output file name
LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path, relativePath);
continue;
}
// relativePath = "../key1/key2/part-m-00000"
// relativeDir = "../key1/key2"
// fileName = "part-m-00000"
String relativeDir = relativePath.substring(0, lastPathSepIdx);
String fileName = relativePath.substring(lastPathSepIdx + 1);
Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir);
if (fs.exists(finalDir)) {
throw new FileAlreadyExistsException("Final output path " + finalDir + " already exists");
}
PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir);
partitionsToAdd.add(partitionKey);
relativePaths.add(relativeDir);
}
}
// We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to
// the original outputDir.
Path finalOutput = FileOutputFormat.getOutputPath(context);
FileSystem fs = finalOutput.getFileSystem(configuration);
for (FileStatus stat : getAllCommittedTaskPaths(context)) {
mergePaths(fs, stat, finalOutput);
}
// compute the metadata to be written to every output partition
Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(), PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX);
// create all the necessary partitions
for (PartitionKey partitionKey : partitionsToAdd) {
PartitionOutput partitionOutput = outputDataset.getPartitionOutput(partitionKey);
partitionOutput.setMetadata(metadata);
partitionOutput.addPartition();
}
// close the TaskContext, which flushes dataset operations
try {
taskContext.flushOperations();
} catch (Exception e) {
Throwables.propagateIfPossible(e, IOException.class);
throw new IOException(e);
}
// delete the job-specific _temporary folder and create a _done file in the o/p folder
cleanupJob(context);
// mark all the final output paths with a _SUCCESS file, if configured to do so (default = true)
if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) {
for (String relativePath : relativePaths) {
Path pathToMark = new Path(finalOutput, relativePath);
Path markerPath = new Path(pathToMark, SUCCEEDED_FILE_NAME);
fs.createNewFile(markerPath);
}
}
}
Aggregations