use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.
the class DynamicPartitionerWithAvroTest method runDynamicPartitionerMR.
private void runDynamicPartitionerMR(final List<? extends GenericRecord> records, boolean allowConcurrentWriters, final boolean precreatePartitions, @Nullable final DynamicPartitioner.PartitionWriteOption partitionWriteOption, boolean expectedStatus) throws Exception {
ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingAvroDynamicPartitioner.class);
final long now = System.currentTimeMillis();
final Multimap<PartitionKey, GenericRecord> keyToRecordsMap = groupByPartitionKey(records, now);
// write values to the input kvTable
final KeyValueTable kvTable = datasetCache.getDataset(INPUT_DATASET);
Transactions.createTransactionExecutor(txExecutorFactory, kvTable).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// the keys are not used; it matters that they're unique though
for (int i = 0; i < records.size(); i++) {
kvTable.write(Integer.toString(i), records.get(i).toString());
}
}
});
final PartitionedFileSet pfs = datasetCache.getDataset(OUTPUT_DATASET);
if (precreatePartitions) {
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws IOException {
writeFile(pfs, createKey(now, 95111));
writeFile(pfs, createKey(now, 98123));
writeFile(pfs, createKey(now, 84125));
}
});
}
String allowConcurrencyKey = "dataset." + OUTPUT_DATASET + "." + PartitionedFileSetArguments.DYNAMIC_PARTITIONER_ALLOW_CONCURRENCY;
// run the partition writer m/r with this output partition time
Map<String, String> arguments = new HashMap<>();
arguments.put(OUTPUT_PARTITION_KEY, Long.toString(now));
arguments.put(allowConcurrencyKey, Boolean.toString(allowConcurrentWriters));
if (partitionWriteOption != null) {
arguments.put("partitionWriteOption", partitionWriteOption.name());
}
long startTime = System.currentTimeMillis();
boolean status = runProgram(app, AppWithMapReduceUsingAvroDynamicPartitioner.DynamicPartitioningMapReduce.class, new BasicArguments(arguments));
Assert.assertEquals(expectedStatus, status);
if (!expectedStatus) {
// if we expect the program to fail, no need to check the output data for expected results
return;
}
// Verify notifications
List<Notification> notifications = getDataNotifications(startTime);
Assert.assertEquals(1, notifications.size());
Assert.assertEquals(NamespaceId.DEFAULT.dataset(OUTPUT_DATASET), DatasetId.fromString(notifications.get(0).getProperties().get("datasetId")));
// this should have created a partition in the pfs
final Location pfsBaseLocation = pfs.getEmbeddedFileSet().getBaseLocation();
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws IOException {
Map<PartitionKey, PartitionDetail> partitions = new HashMap<>();
for (PartitionDetail partition : pfs.getPartitions(null)) {
partitions.put(partition.getPartitionKey(), partition);
// check that the mapreduce wrote the output partition metadata to all the output partitions
Assert.assertEquals(getExpectedMetadata(precreatePartitions, partitionWriteOption), partition.getMetadata().asMap());
// if files were precreated, and the option is to append, expect the empty file to exist
// if partition write option is configured to overwrite, then the file is expected to not exist
Location preexistingFile = partition.getLocation().append("file");
if (precreatePartitions && partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE_OR_APPEND) {
Assert.assertTrue(preexistingFile.exists());
try (InputStream inputStream = preexistingFile.getInputStream()) {
Assert.assertEquals(-1, inputStream.read());
}
} else {
Assert.assertFalse(preexistingFile.exists());
}
}
Assert.assertEquals(3, partitions.size());
Assert.assertEquals(keyToRecordsMap.keySet(), partitions.keySet());
// Check relative paths of the partitions. Also check that their location = pfs baseLocation + relativePath
for (Map.Entry<PartitionKey, PartitionDetail> partitionKeyEntry : partitions.entrySet()) {
PartitionDetail partitionDetail = partitionKeyEntry.getValue();
String relativePath = partitionDetail.getRelativePath();
int zip = (int) partitionKeyEntry.getKey().getField("zip");
Assert.assertEquals(Long.toString(now) + Path.SEPARATOR + zip, relativePath);
Assert.assertEquals(pfsBaseLocation.append(relativePath), partitionDetail.getLocation());
}
for (Map.Entry<PartitionKey, Collection<GenericRecord>> keyToRecordsEntry : keyToRecordsMap.asMap().entrySet()) {
Set<GenericRecord> genericRecords = new HashSet<>(keyToRecordsEntry.getValue());
Assert.assertEquals(genericRecords, readOutput(partitions.get(keyToRecordsEntry.getKey()).getLocation()));
}
}
});
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.
the class CoreDatasetsModule method register.
@Override
public void register(DatasetDefinitionRegistry registry) {
DatasetDefinition<Table, DatasetAdmin> tableDef = registry.get("table");
DatasetDefinition<KeyValueTable, DatasetAdmin> kvTableDef = new KeyValueTableDefinition(KeyValueTable.TYPE, tableDef);
registry.add(kvTableDef);
registry.add(new KeyValueTableDefinition(KeyValueTable.class.getName(), tableDef));
DatasetDefinition<ObjectStore, DatasetAdmin> objectStoreDef = new ObjectStoreDefinition(ObjectStore.TYPE, kvTableDef);
registry.add(new ObjectStoreDefinition(ObjectStore.TYPE, kvTableDef));
registry.add(new ObjectStoreDefinition(ObjectStore.class.getName(), kvTableDef));
registry.add(new IndexedObjectStoreDefinition(IndexedObjectStore.TYPE, tableDef, objectStoreDef));
registry.add(new IndexedObjectStoreDefinition(IndexedObjectStore.class.getName(), tableDef, objectStoreDef));
registry.add(new IndexedTableDefinition(IndexedTable.TYPE, tableDef));
registry.add(new IndexedTableDefinition(IndexedTable.class.getName(), tableDef));
registry.add(new TimeseriesTableDefinition(TimeseriesTable.TYPE, tableDef));
registry.add(new TimeseriesTableDefinition(TimeseriesTable.class.getName(), tableDef));
registry.add(new CounterTimeseriesTableDefinition(CounterTimeseriesTable.TYPE, tableDef));
registry.add(new CounterTimeseriesTableDefinition(CounterTimeseriesTable.class.getName(), tableDef));
// in-memory table
registry.add(new InMemoryTableDefinition(InMemoryTable.TYPE));
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.
the class TestFrameworkTestRun method testAppWithPlugin.
@Test
public void testAppWithPlugin() throws Exception {
ArtifactId artifactId = NamespaceId.DEFAULT.artifact("app-with-plugin", "1.0.0-SNAPSHOT");
addAppArtifact(artifactId, AppWithPlugin.class);
ArtifactId pluginArtifactId = NamespaceId.DEFAULT.artifact("test-plugin", "1.0.0-SNAPSHOT");
addPluginArtifact(pluginArtifactId, artifactId, ToStringPlugin.class);
ApplicationId appId = NamespaceId.DEFAULT.app("AppWithPlugin");
AppRequest createRequest = new AppRequest(new ArtifactSummary(artifactId.getArtifact(), artifactId.getVersion()));
ApplicationManager appManager = deployApplication(appId, createRequest);
final WorkerManager workerManager = appManager.getWorkerManager(AppWithPlugin.WORKER);
workerManager.start();
workerManager.waitForRun(ProgramRunStatus.COMPLETED, 10, TimeUnit.SECONDS);
final ServiceManager serviceManager = appManager.getServiceManager(AppWithPlugin.SERVICE);
serviceManager.start();
serviceManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
URL serviceURL = serviceManager.getServiceURL(5, TimeUnit.SECONDS);
callServiceGet(serviceURL, "dummy");
serviceManager.stop();
serviceManager.waitForStopped(10, TimeUnit.SECONDS);
WorkflowManager workflowManager = appManager.getWorkflowManager(AppWithPlugin.WORKFLOW);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
List<RunRecord> runRecords = workflowManager.getHistory();
Assert.assertNotEquals(ProgramRunStatus.FAILED, runRecords.get(0).getStatus());
DataSetManager<KeyValueTable> workflowTableManager = getDataset(AppWithPlugin.WORKFLOW_TABLE);
String value = Bytes.toString(workflowTableManager.get().read("val"));
Assert.assertEquals(AppWithPlugin.TEST, value);
Map<String, String> workflowTags = ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, NamespaceId.DEFAULT.getNamespace(), Constants.Metrics.Tag.APP, "AppWithPlugin", Constants.Metrics.Tag.WORKFLOW, AppWithPlugin.WORKFLOW, Constants.Metrics.Tag.RUN_ID, runRecords.get(0).getPid());
getMetricsManager().waitForTotalMetricCount(workflowTags, String.format("user.destroy.%s", AppWithPlugin.WORKFLOW), 1, 60, TimeUnit.SECONDS);
// Testing Spark Plugins. First send some data to fileset for the Spark program to process
DataSetManager<FileSet> fileSetManager = getDataset(AppWithPlugin.SPARK_INPUT);
FileSet fileSet = fileSetManager.get();
try (PrintStream out = new PrintStream(fileSet.getLocation("input").append("file.txt").getOutputStream(), true, "UTF-8")) {
for (int i = 0; i < 5; i++) {
out.println("Message " + i);
}
}
Map<String, String> sparkArgs = new HashMap<>();
FileSetArguments.setInputPath(sparkArgs, "input");
SparkManager sparkManager = appManager.getSparkManager(AppWithPlugin.SPARK).start(sparkArgs);
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
// Verify the Spark result.
DataSetManager<Table> dataSetManager = getDataset(AppWithPlugin.SPARK_TABLE);
Table table = dataSetManager.get();
try (Scanner scanner = table.scan(null, null)) {
for (int i = 0; i < 5; i++) {
Row row = scanner.next();
Assert.assertNotNull(row);
String expected = "Message " + i + " " + AppWithPlugin.TEST;
Assert.assertEquals(expected, Bytes.toString(row.getRow()));
Assert.assertEquals(expected, Bytes.toString(row.get(expected)));
}
// There shouldn't be any more rows in the table.
Assert.assertNull(scanner.next());
}
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.
the class TestFrameworkTestRun method testByteCodeClassLoader.
@Category(XSlowTests.class)
@Test
public void testByteCodeClassLoader() throws Exception {
// This test verify bytecode generated classes ClassLoading
ApplicationManager appManager = deployApplication(testSpace, ClassLoaderTestApp.class);
ServiceManager serviceManager = appManager.getServiceManager("RecordHandler").start();
URL serviceURL = serviceManager.getServiceURL(15, TimeUnit.SECONDS);
Assert.assertNotNull(serviceURL);
// Increment record
URL url = new URL(serviceURL, "increment/public");
for (int i = 0; i < 10; i++) {
HttpResponse response = executeHttp(HttpRequest.post(url).build());
Assert.assertEquals(200, response.getResponseCode());
}
// Query record
url = new URL(serviceURL, "query?type=public");
HttpRequest request = HttpRequest.get(url).build();
HttpResponse response = executeHttp(request);
Assert.assertEquals(200, response.getResponseCode());
long count = Long.parseLong(response.getResponseBodyAsString());
serviceManager.stop();
// Verify the record count with dataset
DataSetManager<KeyValueTable> recordsManager = getDataset(testSpace.dataset("records"));
KeyValueTable records = recordsManager.get();
Assert.assertEquals(count, Bytes.toLong(records.read("PUBLIC")));
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.
the class TestFrameworkTestRun method testCustomActionDatasetAccess.
@Category(SlowTests.class)
@Test
public void testCustomActionDatasetAccess() throws Exception {
addDatasetInstance("keyValueTable", DatasetWithCustomActionApp.CUSTOM_TABLE);
addDatasetInstance("fileSet", DatasetWithCustomActionApp.CUSTOM_FILESET);
ApplicationManager appManager = deployApplication(DatasetWithCustomActionApp.class);
ServiceManager serviceManager = appManager.getServiceManager(DatasetWithCustomActionApp.CUSTOM_SERVICE).start();
serviceManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
WorkflowManager workflowManager = appManager.getWorkflowManager(DatasetWithCustomActionApp.CUSTOM_WORKFLOW).start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
appManager.stopAll();
DataSetManager<KeyValueTable> outTableManager = getDataset(DatasetWithCustomActionApp.CUSTOM_TABLE);
KeyValueTable outputTable = outTableManager.get();
Assert.assertEquals("world", Bytes.toString(outputTable.read("hello")));
Assert.assertEquals("service", Bytes.toString(outputTable.read("hi")));
Assert.assertEquals("another.world", Bytes.toString(outputTable.read("another.hello")));
DataSetManager<FileSet> outFileSetManager = getDataset(DatasetWithCustomActionApp.CUSTOM_FILESET);
FileSet fs = outFileSetManager.get();
try (InputStream in = fs.getLocation("test").getInputStream()) {
Assert.assertEquals(42, in.read());
}
}
Aggregations