use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class TestFrameworkTestRun method testCrossNSMapperDatasetAccess.
@Category(SlowTests.class)
@Test
public void testCrossNSMapperDatasetAccess() throws Exception {
NamespaceMeta inputNS = new NamespaceMeta.Builder().setName("inputNS").build();
NamespaceMeta outputNS = new NamespaceMeta.Builder().setName("outputNS").build();
getNamespaceAdmin().create(inputNS);
getNamespaceAdmin().create(outputNS);
addDatasetInstance(inputNS.getNamespaceId().dataset("table1"), "keyValueTable");
addDatasetInstance(outputNS.getNamespaceId().dataset("table2"), "keyValueTable");
DataSetManager<KeyValueTable> tableManager = getDataset(inputNS.getNamespaceId().dataset("table1"));
KeyValueTable inputTable = tableManager.get();
inputTable.write("hello", "world");
tableManager.flush();
ApplicationManager appManager = deployApplication(DatasetCrossNSAccessWithMAPApp.class);
Map<String, String> argsForMR = ImmutableMap.of(DatasetCrossNSAccessWithMAPApp.INPUT_DATASET_NS, inputNS.getName(), DatasetCrossNSAccessWithMAPApp.INPUT_DATASET_NAME, "table1", DatasetCrossNSAccessWithMAPApp.OUTPUT_DATASET_NS, outputNS.getName(), DatasetCrossNSAccessWithMAPApp.OUTPUT_DATASET_NAME, "table2");
MapReduceManager mrManager = appManager.getMapReduceManager(DatasetCrossNSAccessWithMAPApp.MAPREDUCE_PROGRAM).start(argsForMR);
mrManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
appManager.stopAll();
DataSetManager<KeyValueTable> outTableManager = getDataset(outputNS.getNamespaceId().dataset("table2"));
verifyMapperJobOutput(DatasetCrossNSAccessWithMAPApp.class, outTableManager);
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class DataStreamsSparkSinkTest method testSparkSink.
private void testSparkSink(ApplicationManager appManager, final String output) throws Exception {
SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
sparkManager.start(ImmutableMap.of("tablename", output));
sparkManager.waitForStatus(true, 10, 1);
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return getDataset(output).get() != null;
}
}, 1, TimeUnit.MINUTES);
final DataSetManager<KeyValueTable> outputManager = getDataset(output);
final Map<String, String> expectedKeyValues = ImmutableMap.of("0", "samuel", "1", "jackson", "2", "dwayne", "3", "johnson");
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
outputManager.flush();
Map<String, String> keyValues = co.cask.cdap.etl.mock.spark.streaming.MockSink.getValues(expectedKeyValues.keySet(), outputManager);
return expectedKeyValues.equals(keyValues);
}
}, 1, TimeUnit.MINUTES);
sparkManager.stop();
sparkManager.waitForStatus(false, 10, 1);
sparkManager.waitForRun(ProgramRunStatus.KILLED, 10, TimeUnit.SECONDS);
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class DynamicPartitionerWithAvroTest method runDynamicPartitionerMR.
private void runDynamicPartitionerMR(final List<? extends GenericRecord> records, boolean allowConcurrentWriters, final boolean precreatePartitions, @Nullable final DynamicPartitioner.PartitionWriteOption partitionWriteOption, boolean expectedStatus) throws Exception {
ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingAvroDynamicPartitioner.class);
final long now = System.currentTimeMillis();
final Multimap<PartitionKey, GenericRecord> keyToRecordsMap = groupByPartitionKey(records, now);
// write values to the input kvTable
final KeyValueTable kvTable = datasetCache.getDataset(INPUT_DATASET);
Transactions.createTransactionExecutor(txExecutorFactory, kvTable).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// the keys are not used; it matters that they're unique though
for (int i = 0; i < records.size(); i++) {
kvTable.write(Integer.toString(i), records.get(i).toString());
}
}
});
final PartitionedFileSet pfs = datasetCache.getDataset(OUTPUT_DATASET);
if (precreatePartitions) {
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws IOException {
writeFile(pfs, createKey(now, 95111));
writeFile(pfs, createKey(now, 98123));
writeFile(pfs, createKey(now, 84125));
}
});
}
String allowConcurrencyKey = "dataset." + OUTPUT_DATASET + "." + PartitionedFileSetArguments.DYNAMIC_PARTITIONER_ALLOW_CONCURRENCY;
// run the partition writer m/r with this output partition time
Map<String, String> arguments = new HashMap<>();
arguments.put(OUTPUT_PARTITION_KEY, Long.toString(now));
arguments.put(allowConcurrencyKey, Boolean.toString(allowConcurrentWriters));
if (partitionWriteOption != null) {
arguments.put("partitionWriteOption", partitionWriteOption.name());
}
long startTime = System.currentTimeMillis();
boolean status = runProgram(app, AppWithMapReduceUsingAvroDynamicPartitioner.DynamicPartitioningMapReduce.class, new BasicArguments(arguments));
Assert.assertEquals(expectedStatus, status);
if (!expectedStatus) {
// if we expect the program to fail, no need to check the output data for expected results
return;
}
// Verify notifications
List<Notification> notifications = getDataNotifications(startTime);
Assert.assertEquals(1, notifications.size());
Assert.assertEquals(NamespaceId.DEFAULT.dataset(OUTPUT_DATASET), DatasetId.fromString(notifications.get(0).getProperties().get("datasetId")));
// this should have created a partition in the pfs
final Location pfsBaseLocation = pfs.getEmbeddedFileSet().getBaseLocation();
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws IOException {
Map<PartitionKey, PartitionDetail> partitions = new HashMap<>();
for (PartitionDetail partition : pfs.getPartitions(null)) {
partitions.put(partition.getPartitionKey(), partition);
// check that the mapreduce wrote the output partition metadata to all the output partitions
Assert.assertEquals(getExpectedMetadata(precreatePartitions, partitionWriteOption), partition.getMetadata().asMap());
// if files were precreated, and the option is to append, expect the empty file to exist
// if partition write option is configured to overwrite, then the file is expected to not exist
Location preexistingFile = partition.getLocation().append("file");
if (precreatePartitions && partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE_OR_APPEND) {
Assert.assertTrue(preexistingFile.exists());
try (InputStream inputStream = preexistingFile.getInputStream()) {
Assert.assertEquals(-1, inputStream.read());
}
} else {
Assert.assertFalse(preexistingFile.exists());
}
}
Assert.assertEquals(3, partitions.size());
Assert.assertEquals(keyToRecordsMap.keySet(), partitions.keySet());
// Check relative paths of the partitions. Also check that their location = pfs baseLocation + relativePath
for (Map.Entry<PartitionKey, PartitionDetail> partitionKeyEntry : partitions.entrySet()) {
PartitionDetail partitionDetail = partitionKeyEntry.getValue();
String relativePath = partitionDetail.getRelativePath();
int zip = (int) partitionKeyEntry.getKey().getField("zip");
Assert.assertEquals(Long.toString(now) + Path.SEPARATOR + zip, relativePath);
Assert.assertEquals(pfsBaseLocation.append(relativePath), partitionDetail.getLocation());
}
for (Map.Entry<PartitionKey, Collection<GenericRecord>> keyToRecordsEntry : keyToRecordsMap.asMap().entrySet()) {
Set<GenericRecord> genericRecords = new HashSet<>(keyToRecordsEntry.getValue());
Assert.assertEquals(genericRecords, readOutput(partitions.get(keyToRecordsEntry.getKey()).getLocation()));
}
}
});
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testMapreduceWithObjectStore.
@Test
public void testMapreduceWithObjectStore() throws Exception {
// Deploy apps to another namespace and test cross-namespace access meanwhile
final ApplicationWithPrograms app = deployApp(Id.Namespace.fromEntityId(new NamespaceId("someOtherNameSpace")), AppWithMapReduceUsingObjectStore.class);
final ObjectStore<String> input = datasetCache.getDataset("someOtherNameSpace", "keys");
// Get dataset from a non existing namespace
try {
datasetCache.getDataset("nonExistingNameSpace", "keys");
Assert.fail("getDataset() should throw an exception when accessing dataset from a non-existing namespace.");
} catch (DatasetInstantiationException e) {
// expected
}
final String testString = "persisted data";
// Populate some input
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) input).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
input.write(Bytes.toBytes(testString), testString);
input.write(Bytes.toBytes("distributed systems"), "distributed systems");
}
});
runProgram(app, AppWithMapReduceUsingObjectStore.ComputeCounts.class, false, true);
final KeyValueTable output = datasetCache.getDataset("someOtherNameSpace", "count");
// read output and verify result
Transactions.createTransactionExecutor(txExecutorFactory, output).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
byte[] val = output.read(Bytes.toBytes(testString));
Assert.assertTrue(val != null);
Assert.assertEquals(Bytes.toString(val), Integer.toString(testString.length()));
val = output.read(Bytes.toBytes("distributed systems"));
Assert.assertTrue(val != null);
Assert.assertEquals(Bytes.toString(val), "19");
}
});
}
use of co.cask.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testFailureInInit.
private void testFailureInInit(final String expected, ApplicationWithPrograms app, Class<?> programClass, Map<String, String> args) throws Exception {
// We want to verify that when a mapreduce fails during initialize(), especially
// if an input or output format provider fails to produce its configuration, the
// writes by that initialize() method are rolled back. (Background: prior to
// CDAP-7476, the input/output format provider was called *after* initialize
// returns, and therefore that transaction may have been committed already.
// (1) initialize the table with a known value
datasetCache.newTransactionContext();
final KeyValueTable kvTable = datasetCache.getDataset("recorder");
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// the table should not have initialized=true
kvTable.write("initialized", "false");
}
});
// 2) run job
runProgram(app, programClass, args, false);
// 3) verify results
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// the table should not have initialized=true
Assert.assertEquals(expected, Bytes.toString(kvTable.read("initialized")));
}
});
datasetCache.dismissTransactionContext();
}
Aggregations